File Coverage

blib/lib/Regexp/Log/BlueCoat.pm
Criterion Covered Total %
statement 50 50 100.0
branch 14 14 100.0
condition 4 6 66.6
subroutine 10 10 100.0
pod 3 3 100.0
total 81 83 97.5


line stmt bran cond sub pod time code
1             package Regexp::Log::BlueCoat;
2              
3 5     5   157945 use strict;
  5         40  
  5         195  
4 5     5   29 use Carp;
  5         10  
  5         479  
5 5     5   5034 use Regexp::Log 0.01;
  5         8594  
  5         173  
6 5     5   35 use base qw( Regexp::Log );
  5         9  
  5         716  
7 5     5   24 use vars qw( $VERSION %DEFAULT %FORMAT %REGEXP %UFS );
  5         8  
  5         13026  
8              
9             $VERSION = 0.03;
10              
11             =head1 NAME
12              
13             Regexp::Log::BlueCoat - A regexp builder to parse BlueCoat log files
14              
15             =head1 SYNOPSIS
16              
17             my $blue = Regexp::Log::BlueCoat->new(
18             format => '%g %e %a %w/%s %b %m %i %u %H/%d %c',
19             capture => [qw( host code )],
20             );
21              
22             # the format() and capture() methods can be used to set or get
23             $blue->format('%g %e %a %w/%s %b %m %i %u %H/%d %c %f %A');
24             $blue->capture(qw( host code ));
25             $blue->ufs( 'smartfilter' );
26              
27             # this is necessary to know in which order
28             # we will receive the captured fields from the regex
29             my @fields = $blue->capture;
30              
31             # the all-powerful capturing regex :-)
32             my $re = $blue->regex;
33              
34             while (<>) {
35             my %data;
36             @data{@fields} = /$re/;
37              
38             # do something with the fields
39             }
40              
41             =head1 DESCRIPTION
42              
43             Regexp::Log::BlueCoat is a module that computes custom regular
44             expressions to parse log files generated by the BlueCoat Sytems
45             I.
46              
47             See the Regexp::Log documentation for a description of the standard
48             Regexp::Log interface.
49              
50             =head2 Streaming media logs
51              
52             This version of Regexp::Log::BlueCoat does not support streaming
53             related logs. You will have to add the following line at the beginning
54             of the log parsing loop in your scripts, if your BlueCoat appliance
55             is configured to log those events.
56              
57             next if /^(?:Windows_Media|)/;
58              
59             This may or may not be faster than have the regular expression generated
60             by the regexp() method fail on each streaming log line.
61              
62             =cut
63              
64             my $IP = '\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}';
65             my $HOST = '[-.\\S]+';
66              
67             # define the BlueCoat specific stuff
68             %REGEXP = (
69              
70             # %% - Denotes '%' character -
71             '%%' => '%',
72              
73             # %a c-ip Client IP address. Yes
74             '%a' => "(?#=c-ip)$IP(?#!c-ip)",
75              
76             # %b sc-bytes Number of bytes returned by the server (or the Cache). Yes
77             '%b' => '(?#=sc-bytes)-|\\d+(?#!sc-bytes)',
78              
79             # %c cs (content-type) The type of object. Usually the MIME-type. No
80             '%c' => '(?#=cs-content-type)-|UNKNOWN|\\S+(?:/\\S+)?(?#!cs-content-type)',
81              
82             # %d cs-supplier-name SUPPLIER NAME - Name or IP address of the server/cache from which the object was received. Yes
83             '%d' => "(?#=cs-supplier-name)-|$HOST(?#!cs-supplier-name)",
84              
85             # %e time-taken Number of milliseconds request took to process. Yes
86             '%e' => '(?#=time-taken)\\d+(?#!time-taken)',
87              
88             # %f sc-filter-category Filtering reason. Why it was denied (such as sex or business) No
89             # this is handled in _postprocess()
90             '%f' => '(?#=sc-filter-category)%f(?#!sc-filter-category)',
91              
92             # %g timestamp UNIX type timestamp. Yes
93             '%g' => '(?#=timestamp)\\d+\\.\\d+(?#!timestamp)',
94              
95             # %h c-ip Client Hostname (uses IP to avoid reverse DNS) - same as %a Yes
96             '%h' => "(?#=c-hostname)-|$HOST(?#!c-hostname)",
97              
98             # %i cs-uri The requested URI. Note: Web trends expects this to be only cs-uri-stem + cs-uri-query No
99             '%i' => '(?#=cs-uri)-|\\S+://\\S+|.*?(?#!cs-uri)',
100              
101             # %j - [Not used.] -
102             '%j' => '',
103              
104             # %l - Client Identification string. (User Login name remote). - always '-' Yes
105             # %m cs-method HTTP method. HTTP methods include GET, PUT, POST, and so on. Yes
106             '%m' =>
107             '(?#=cs-method)-|OPTIONS|GET|HEAD|POST|PUT|DELETE|TRACE|CONNECT(?#!cs-method)',
108              
109             # %n - [Not used.] -
110             '%n' => '',
111              
112             # %o - [Not used.] -
113             '%o' => '',
114              
115             # %p r-port Port fetched from on host - origin server port Yes
116             '%p' => '(?#=r-port)\\d+(?#!r-port)',
117              
118             # %q - [Not used.] -
119             '%q' => '',
120              
121             # %r cs-request-line First line of the request No
122             # %s sc-status The code returned by the cache to the client (HTTP code). Yes
123             '%s' => '(?#=sc-status)\\d{1,4}(?#!sc-status)',
124              
125             # %t gmttime GMT date and time of the user request, in the format [DD/MM/YYYY:hh:mm:ss GMT] Yes
126             '%t' =>
127             '(?#=gmttime)-|\\[(?#=gmtday)\\d\\d(?#!gmtday)/(?#=gmtmonth)\\d\\d(?#!gmtmonth)/(?#=gmtyear)\\d\\d\\d\\d(?#!gmtyear):(?#=gmthour)\\d\\d(?#!gmthour):(?#=gmtminute)(?#!gmtminute):(?#=gmtsecond)(?#!gmtsecond) GMT\\](?#!gmttime)',
128              
129             # %u cs-username Authenticated user ID. Yes
130             '%u' => '(?### You must define \'login\' to use %u in format ###))',
131              
132             # %v cs-host Name of host sourcing the object. Yes
133             # %w s-action What type of action did the CM take to process this request. NOTE: 'cached' is used by ELFF but has int value. Yes
134             '%w' =>
135             '(?#=s-action)(?:TCP_(?:CLIENT_REFRESH|DENIED|ERR_MISS|HIT|M(?:EM_HIT|ISS)|NC_MISS|PARTIAL_MISS|REFRESH_(?:HIT|MISS)|S(?:PLASHED|WAPFAIL)|TUNNELED)?|UDP_(?:DENIED|HIT|INVALID|MISS(?:_NOFETCH)?)?)(?#!s-action)',
136              
137             # %x date Date in YYYY-MM-DD format Yes
138             '%x' =>
139             '(?#=date)(?#=year)\\d\\d\\d\\d(?#!year)-(?#=month)\\d\\d(?#!month)-(?#=day)\\d\\d(?#!day)(?#!date)',
140              
141             # %y time GMT time in HH:MM:SS format No
142             '%y' =>
143             '(?#=time)(?#=hour)\\d\\d(?#!hour):(?#=minute)\\d\\d(?#!minute):(?#=second)\\d\\d(?#!second)(?#!time)',
144              
145             # %z - [Not used.] -
146             '%z' => '',
147              
148             # %A cs (user-agent) User agent No
149             '%A' => '(?#=user-agent).*(?#!user-agent)',
150              
151             # %B cs-bytes The number of bytes received by the server Yes
152             '%b' => '(?#=cs-bytes)\\d+(?#!cs-bytes)',
153              
154             # %C cs (cookie) Cookie data No
155             # %D s-supplier-ip SUPPLIER IP - IP address of server/cache from which the object was received. Yes
156             # %E s-Policy-Message Policy enforcement message Yes
157             # %F - [Not used.] -
158             '%F' => '',
159              
160             # %G - [Not used.] -
161             '%G' => '',
162              
163             # %H s-hierarchy How and where the object was retrieved from the cache hierarchy (DIRECT from the server, PARENT_HIT = from the parent cache, and so on) No
164             '%H' =>
165             '(?#=s-hierarchy)DIRECT|NONE|(?:PARENT|SIBLING)_HIT|FIRST_PARENT_MISS(?#!s-hierarchy)',
166              
167             # %I s-ip Server IP, the IP address of the server on which the log entry was generated Yes
168             # %J - [Not used.] -
169             '%J' => '',
170              
171             # %K - [Not used.] -
172             '%K' => '',
173              
174             # %L localtime Local date and time of the user request in format: [DD/MMM/YYYY:hh:mm:ss +nnnn] Yes
175             '%L' =>
176             '\\[(?#=localtime)(?#=localday)\\d\\d(?#!localday)/(?#=localmonth)\\d\\d(?#!localmonth)/(?#=localyear)\\d\\d\\d\\d(?#!localyear):(?#=localhour)\\d\\d(?#!localhour):(?#=localminute)\\d\\d(?#!localminute):(?#=localsecond)\\d\\d(?#!localsecond) \\+\\d\\d\\d\\d(?#!localtime)\\]',
177              
178             # %M - [Not used.] -
179             '%M' => '',
180              
181             # %N s-computername Server name, the name of the server on which the log entry was generated Yes
182             '%N' => "(?#=s-computername)$HOST(?#!s-computername)",
183              
184             # %O - [Not used.] -
185             '%O' => '',
186              
187             # %P s-port Server port, the port number the client is connected to. Yes
188             '%P' => '(?#=s-port)\\d+(?#!s-port)',
189              
190             # %Q cs-uri-query The URI query portion of the URL No
191             # %R cs (Referer) Request referrer No
192             # %S s-sitename Internet service and instance number running on client computer Yes
193             # %T duration Elapsed time, seconds Yes
194             '%T' => '(?#=duration)\\d+(?#!duration)',
195              
196             # %U cs-uri-stem Object path from request URL Yes
197             # %V cs-version The protocol (HTTP, FTP) version used by the client. Yes
198             # %W sc-filter-result UFS event (May differ between Websense or SmartFilter or others). No
199             # this is handled in _postprocess() and is unsupported yet
200             '%W' => '',
201              
202             # %X cs (X-Forwarded-For) The IP address of the device which sent the HTTP request. No
203             # %Y - [Not used.] -
204             '%Y' => '',
205              
206             # %Z - [Not used.] -
207             '%Z' => '',
208              
209             # UFS specific
210             # Smartfilter
211              
212             # Login specific
213             '%u-username' => '(?#=cs-username)[-.\\w]+(?#!cs-username)',
214             '%u-ldap' =>
215             '(?#=cs-username)-|(?:[A-Za-z]+=[^,]*,)*[A-Za-z]=[^,]*?(?#!cs-username)',
216             );
217              
218             =head1 METHODS
219              
220             Regexp::Log::BlueCoat is a standard Regexp::Log object, and therefore
221             supports all the standard Regexp::Log methods.
222              
223             Regexp::Log::BlueCoat's constructor accepts several BlueCoat specific
224             arguments:
225              
226             ufs - URL Filtering Service
227             login - The type of username information
228              
229             Note: Though BlueCoat supports SmartFilter, Websense and others,
230             Regexp::Log::BlueCoat only support I UFS in this version.
231              
232             The appropriate accessors are defined for them (if used to set, they
233             return the new value for the attribute).
234              
235             =over 4
236              
237             =item ufs( [$ufs] )
238              
239             Get/set the URL Filter System type (C<%f> and C<%W>).
240             Only C is supported in this version.
241              
242             =cut
243              
244             sub ufs {
245 3     3 1 2124 my $self = shift;
246 3 100       12 $self->{ufs} = shift if @_;
247 3         14 return $self->{ufs};
248             }
249              
250             =item ufs_category( category => string, [...] )
251              
252             This method lets you override the default category names in your UFS.
253              
254             For example, I allows to configure the name of the
255             categories; Regexp::Log::BlueCoat supports the default category names,
256             but lets you override them if needed.
257              
258             The changes are applied on the objet current C.
259              
260             $log->ufs('smartfilter');
261             $log->ufs_category( hm => 'FunStuff' ); # change the Humor category
262              
263             See L for details about the category names.
264              
265             When called without arguments, ufs_category() will return the whole
266             category list for the instance.
267              
268             =item ufs_category( ufs_name, category => string, [...] )
269              
270             This method can also be called as a class method.
271              
272             If you'd rather change the UFS category names for every
273             Regexp::Log::BlueCoat that will be created, you can use the
274             method as a class method.
275              
276             You'll need to tell ufs_category() on which UFS to apply these
277             modifications.
278              
279             Regexp::Log::BlueCoat->ufs_category(
280             'smartfilter',
281             hm => 'Fun', # change the Humor category
282             mp => 'Music', # change the MP3 category
283             );
284              
285             These changes will be on for any new Regexp::Log::Object you'll create.
286              
287             When called with a single argument, ufs_category() will return the whole
288             category list for the specified UFS for the class.
289              
290             =cut
291              
292             sub ufs_category {
293 8     8 1 4330 my $self = shift;
294              
295             # instance method
296 8 100       24 if ( ref $self ) {
297 3         6 my $ufs = $self->{ufs};
298 3 100       8 if (@_) {
299 1         4 my %ufs = @_;
300 1         4 @{ $self->{_ufs}{$ufs} }{ keys %ufs } = values %ufs;
  1         5  
301             }
302 2         3 else { return ( %{ $UFS{$ufs} }, %{ $self->{_ufs}{$ufs} } ) }
  2         22  
  2         55  
303             }
304              
305             # class method
306             else {
307 5         12 my $ufs = shift;
308 5 100       20 if (@_) {
309 3         40 my %ufs = @_;
310 3         20 @{ $UFS{$ufs} }{ keys %ufs } = values %ufs;
  3         50  
311             }
312 2         3 else { return %{ $UFS{$ufs} } }
  2         58  
313             }
314             }
315              
316             =item login()
317              
318             Get/set the user login type (C<%u>).
319              
320             This version supports C (standard bareword) and C
321             (standard C form).
322              
323             =cut
324              
325             sub login {
326 3     3 1 5 my $self = shift;
327 3 100       11 $self->{login} = shift if @_;
328 3         12 return $self->{login};
329             }
330              
331             =back
332              
333             =head1 PREDEFINED FORMATS
334              
335             Regexp::Log::BlueCoat supports several standards log formats.
336             These can be set up by using their short name as the format string,
337             with the format() method.
338              
339             Description Name Format String
340             ----------- ---- -------------
341             Squid log format :squid %g %e %a %w/%s %b %m %i %u %H/%d %c
342             NCSA common log format :clf %h %l %u %t "%r" %s %b
343             NCSA extended log format :elf %h %l %u %L "%r" %s %b "%R" "%A"
344             Microsoft IIS format :iis %a, -, %x, %y, %S, %N, %I, %e, %b, %B, %s, 0, %m, %U, -
345              
346             =cut
347              
348             %FORMAT = (
349             ':squid' => '%g %e %a %w/%s %b %m %i %u %H/%d %c',
350             ':clf' => '%h %l %u %t "%r" %s %b',
351             ':elf' => '%h %l %u %L "%r" %s %b "%R" "%A"',
352             ':iis' => '%a, -, %x, %y, %S, %N, %I, %e, %b, %B, %s, 0, %m, %U, -',
353             );
354              
355             =head1 FIELDS
356              
357             =head2 Blue Coat custom format
358              
359             Not all C<%>-escapes are supported in this version of Regexp::Log::BlueCoat.
360             ELFF is not supported yet.
361              
362             Multiple consecutive spaces in the format string are compressed to
363             a single space.
364              
365             The following list is straight from Blue Coat's documentation.
366              
367             Name ELFF Description
368             ---- ---- -----------
369             % - Denotes an expansion field.
370             %% - Denotes '%' character.
371             %a c-ip Client IP address.
372             %b sc-bytes Number of bytes returned by the server
373             (or the Cache).
374             %c cs (content-type) The type of object. Usually the MIME-type.
375             %d cs-supplier-name SUPPLIER NAME - Name or IP address of the
376             server/cache from which the object was received.
377             %e time-taken Number of milliseconds request took to process.
378             %f sc-filter-category Filtering reason. Why it was denied (such as
379             sex or business)
380             %g timestamp UNIX type timestamp.
381             %h c-ip Client Hostname (uses IP to avoid reverse DNS)
382             - same as %a
383             %i cs-uri The requested URI. Note: Web trends expects
384             this to be only cs-uri-stem + cs-uri-query
385             %j - [Not used.]
386             %l - Client Identification string.
387             (User Login name remote). - always '-'
388             %m cs-method HTTP method. HTTP methods include GET, PUT,
389             POST, and so on.
390             %n - [Not used.]
391             %o - [Not used.]
392             %p r-port Port fetched from on host - origin server port
393             %q - [Not used.]
394             %r cs-request-line First line of the request
395             %s sc-status The code returned by the cache to the client
396             (HTTP code).
397             %t gmttime GMT date and time of the user request, in
398             the format [DD/MM/YYYY:hh:mm:ss GMT]
399             %u cs-username Authenticated user ID.
400             %v cs-host Name of host sourcing the object.
401             %w s-action What type of action did the CM take to process
402             this request. NOTE: 'cached' is used by ELFF
403             but has int value.
404             %x date Date in YYYY-MM-DD format
405             %y time GMT time in HH:MM:SS format
406             %z - [Not used.]
407             %A cs (user-agent) User agent
408             %B cs-bytes The number of bytes received by the server
409             %C cs (cookie) Cookie data
410             %D s-supplier-ip SUPPLIER IP - IP address of server/cache from
411             which the object was received.
412             %E s-Policy-Message Policy enforcement message
413             %F - [Not used.]
414             %G - [Not used.]
415             %H s-hierarchy How and where the object was retrieved from the
416             cache hierarchy (DIRECT from the server,
417             PARENT_HIT = from the parent cache, and so on)
418             %I s-ip Server IP, the IP address of the server on which
419             the log entry was generated
420             %J - [Not used.]
421             %K - [Not used.]
422             %L localtime Local date and time of the user request in
423             format: [DD/MMM/YYYY:hh:mm:ss +nnnn]
424             %M - [Not used.]
425             %N s-computername Server name, the name of the server on which
426             the log entry was generated
427             %O - [Not used.]
428             %P s-port Server port, the port number the client is
429             connected to.
430             %Q cs-uri-query The URI query portion of the URL
431             %R cs (Referer) Request referrer
432             %S s-sitename Internet service and instance number running
433             on client computer
434             %T duration Elapsed time, seconds
435             %U cs-uri-stem Object path from request URL
436             %V cs-version The protocol (HTTP, FTP) version used by
437             the client.
438             %W sc-filter-result UFS event (May differ between Websense or
439             SmartFilter or others).
440             %X cs (X-Forwarded-For) The IP address of the device which sent
441             the HTTP request.
442             %Y - [Not used.]
443             %Z - [Not used.]
444              
445             =head1 URL FILTERING SYSTEMS
446              
447             The BlueCoat Systems Port 80 Security Appliance supports two URL Filtering
448             Systems (UFS): I and I.
449              
450             Since I only had access to log files generated with a BlueCoat + SmartFilter
451             combination, this version of Regexp::Log only supports I UFS.
452             Patches welcome!
453              
454             =head2 SmartFilter
455              
456             When C is set to C, the computed regular expression
457             matches the default SmartFilter category names. These can be changed
458             in SmartFilter's configuration (furthermore one can create one's own
459             categories, with user-defined names).
460              
461             So we need to be able to modify the category names, either in an
462             object instance, or in class data (shared by all instances).
463              
464             To compute a regular expression that matches your specific fields, there
465             are several possibilities:
466              
467             =over 4
468              
469             =item Make the changes in your object instance
470              
471             The method ufs_category() lets you replace any standard category by
472             your own, and even add new "categories" (text that will be matched by
473             the C<%f> fields).
474              
475             These changes are valid for the object only.
476              
477             See ufs_category() for details.
478              
479             =item Change the Regexp::Log::BlueCoat class itself
480              
481             ufs_category() can be used as a class method.
482              
483             One can also be adventurous and acces %Regexp::Log::BlueCoat::UFS directly,
484             but you'll need to read the source to understand the details.
485             Here's an example:
486              
487             $Regexp::Log::UFS{smartfilter} = { simple => '[-\\w]+' };
488              
489             =back
490              
491             I default categories are:
492              
493             Key Default value Category
494             --- ------------- --------
495             sx "sex" Sex
496             dr "drugs" Drugs
497             hs "hate speech" Hate Speech
498             cs "crim. skills" Criminal Skills
499             nd "nudity" Nudity
500             os "on-line sales" Online Sales
501             gb "gambling" Gambling
502             pp "personal pages" Personnal Pages
503             js "job search" Job Search
504             sp "sports" Sports
505             gm "games" Games
506             hm "humor" Humor
507             mp "MP3 sites" MP3 Sites
508             et "entertainment" Entertainment
509             ls "lifestyle" Lifestyle
510             ex "extreme" Extreme
511             ch "chat" Chat
512             in "investing" Investing
513             nw "general news" General News
514             po "politics, opinion, religion" Politics, Opinion, Religion
515             mm "dating" Dating
516             ac "art/culture" Art/Culture
517             na "usenet news access" Usenet News Access
518             oc "cults/occult" Cults/Occult
519             na "Usenet News" Usenet News
520             sh "self help" Self-Help
521             tr "travel" Travel
522             mt "mature" Mature
523             wm "webmail" Webmail
524             ps "portal sites" Portal Sites
525             an "anonymizer/translator" Anonymizer/Translator
526             u0 "user defined category 0" First User-defined Category
527             u1 "user defined category 1" Second User-defined Category
528             u2 "user defined category 2" Third User-defined Category
529             u3 "user defined category 3" Fourth User-defined Category
530             u4 "user defined category 4" Fifth User-defined Category
531             u5 "user defined category 5" Sixth User-defined Category
532             u6 "user defined category 6" Seventh User-defined Category
533             u7 "user defined category 7" Eighth User-defined Category
534             u8 "user defined category 8" Ninth User-defined Category
535             u9 "user defined category 9" Tenth User-defined Category
536              
537             Regexp::Log::BlueCoat add the following three categories:
538              
539             Key Default value Category
540             --- ------------- --------
541             none "-" None
542             uncategorized "uncategorized" Uncategorized
543             not_applied "content_filter_not_applied" Filter not applied
544              
545             =head2 Websense
546              
547             I is not supported yet. Patches and log file excerpts are
548             welcome.
549              
550             =cut
551              
552             %UFS = (
553             smartfilter => {
554             none => '-',
555             uncategorized => 'uncategorized',
556             filter_not_applied => 'content_filter_not_applied',
557             sx => "sex",
558             dr => "drugs",
559             hs => "hate speech",
560             cs => "crim. skills",
561             nd => "nudity",
562             os => "on-line sales",
563             gb => "gambling",
564             pp => "personal pages",
565             js => "job search",
566             sp => "sports",
567             gm => "games",
568             hm => "humor",
569             mp => "MP3 sites",
570             et => "entertainment",
571             ls => "lifestyle",
572             ex => "extreme",
573             ch => "chat",
574             in => "investing",
575             nw => "general news",
576             po => "politics, opinion, religion",
577             mm => "dating",
578             ac => "art/culture",
579             na => "usenet news access",
580             oc => "cults/occult",
581             na => "Usenet News",
582             sh => "self help",
583             tr => "travel",
584             mt => "mature",
585             wm => "webmail",
586             ps => "portal sites",
587             an => "anonymizer/translator",
588             u0 => "user defined category 0",
589             u1 => "user defined category 1",
590             u2 => "user defined category 2",
591             u3 => "user defined category 3",
592             u4 => "user defined category 4",
593             u5 => "user defined category 5",
594             u6 => "user defined category 6",
595             u7 => "user defined category 7",
596             u8 => "user defined category 8",
597             u9 => "user defined category 9",
598             },
599             websense => {},
600             );
601            
602             %DEFAULT = (
603             format => '',
604             capture => [],
605             ufs => '',
606             login => '',
607             _ufs => { map { ($_, {} ) } keys %UFS },
608             );
609              
610             sub _preprocess {
611 9     9   964 my $self = shift;
612 9         19 my $login = $self->{login};
613              
614             # Login specific regexps
615 9 100 66     105 $self->{_regexp} =~ s/%u/%u-$login/g
616             if defined $login && $login =~ /^(?:ldap|username)$/;
617              
618             # Multiple consecutive spaces are compressed to a single space
619 9         82 $self->{_regexp} =~ s/ +/ /g;
620             }
621              
622             sub _postprocess {
623 9     9   1335 my $self = shift;
624 9         24 my $ufs = $self->{ufs};
625              
626             # UFS specific regexps
627 9 100 66     67 if ( defined $ufs and $ufs ne '' ) {
628 8         12 my %categories = ( %{ $UFS{$ufs} }, %{ $self->{_ufs}{$ufs} } );
  8         53  
  8         102  
629 8         188 my $categories = join '|', sort values %categories;
630 8         98 $self->{_regexp} =~ s/%f/$categories/g;
631             }
632             }
633              
634             =head1 TODO
635              
636             Support streaming logs: Windows Media and RealMedia.
637              
638             Support the W3C Extended Log File Format (ELFF), which is a subset of
639             the Blue Coat format where each field is described using a text string.
640              
641             Have a look at the entries that produce multi-line logs.
642              
643             =head1 BUGS
644              
645             Most of the developpement has been done when I was trying to process
646             logs created with the following format:
647             C<%g %e %a %w/%s %b %m %i %u %H/%d %c %f %A>.
648              
649             Which means that the regular expressions that this module produces do not
650             cover every possible format.
651              
652             If Regexp::Log::BlueCoat's regular expressions do not match some of the
653             log that you are trying to munge, please use the F script
654             and send the resulting file to me.
655              
656             =head1 REFERENCES
657              
658             Blue Coat Systems Port 80 Security Appliance, I
659             Guide>: http://www.bluecoat.com/downloads/manuals/BC_Config_Mgmt_Guide.pdf
660              
661             Secure Computing Smartfilter, I,
662             version 3.1.2: http://www.securecomputing.com/pdf/SFConfig312_IC_RevE.pdf
663              
664             =head1 THANKS
665              
666             Thanks to Jarkko Hietaniemi for Regex::PreSuf.
667              
668             =head1 AUTHOR
669              
670             Philippe 'BooK' Bruhat Ebook@cpan.orgE.
671              
672             =head1 LICENCE
673              
674             This module is free software; you can redistribute it or modify it under
675             the same terms as Perl itself.
676              
677             =cut
678              
679             1;