File Coverage

blib/lib/W3C/LogValidator/CSSValidator.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # Copyright (c) 2004-2005 the World Wide Web Consortium :
2             # Keio University,
3             # European Research Consortium for Informatics and Mathematics
4             # Massachusetts Institute of Technology.
5             # written by olivier Thereaux for W3C
6             #
7             # $Id: CSSValidator.pm,v 1.23 2008/11/18 16:48:56 ot Exp $
8              
9             package W3C::LogValidator::CSSValidator;
10 1     1   594 use strict;
  1         3  
  1         34  
11 1     1   7 use warnings;
  1         3  
  1         37  
12 1     1   925 use DB_File;
  0            
  0            
13              
14              
15              
16             require Exporter;
17             our @ISA = qw(Exporter);
18             our %EXPORT_TAGS = ( 'all' => [ qw() ] );
19             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
20             our @EXPORT = qw();
21             our $VERSION = sprintf "%d.%03d",q$Revision: 1.23 $ =~ /(\d+)\.(\d+)/;
22              
23              
24             ###########################
25             # usual package interface #
26             ###########################
27             our $verbose = 1;
28             our %config;
29              
30             sub new
31             {
32             my $self = {};
33             my $proto = shift;
34             my $class = ref($proto) || $proto;
35             # mandatory vars for the API
36             @{$self->{URIs}} = undef;
37             # internal stuff here
38             # don't change this
39             if (@_) {%config = %{(shift)};}
40             if (exists $config{verbose}) {$verbose = $config{verbose}}
41             if (exists $config{AuthorizedExtensions})
42             {
43             $self->{AUTH_EXT} = $config{AuthorizedExtensions};
44             }
45             else
46             {
47             $self->{AUTH_EXT} = ".css";
48             }
49             $config{ValidatorMethod} = "HEAD" ;
50             $config{ValidatorHost} = "jigsaw.w3.org" if (! exists $config{ValidatorHost});
51             $config{ValidatorPort} = "80" if (!exists $config{ValidatorPort});
52             $config{ValidatorString} = "/css-validator/validator" if (!exists $config{ValidatorString});
53             $config{ValidatorString} .= "?uri=";
54             # by default, report invalid documents
55             $config{ShowInvalid} = "Yes" if (!exists $config{ShowInvalid});
56             $config{ShowAborted} = "No" if (!exists $config{ShowAborted});
57             $config{ShowValid} = "No" if (!exists $config{ShowValid});
58             $config{CheckExtensionlessURIs} = "No" if (!exists $config{CheckExtensionlessURIs});
59             bless($self, $class);
60             return $self;
61             }
62              
63              
64             # internal routines
65              
66             sub new_doc{
67             my $self=shift;
68             $self->{VALID} = undef;
69             $self->{VALID_ERR_NUM} = undef;
70             $self->{VALID_SUCCESS} = undef;
71             }
72              
73             sub auth_ext
74             {
75             my $self=shift;
76             if (@_) { $self->{AUTH_EXT} = shift}
77             return $self->{AUTH_EXT};
78             }
79              
80             sub valid
81             {
82             my $self = shift;
83             if (@_) { $self->{VALID} = shift }
84             return $self->{VALID};
85             }
86              
87             sub valid_err_num
88             {
89             my $self = shift;
90             if (@_) { $self->{VALID_ERR_NUM} = shift }
91             return $self->{VALID_ERR_NUM};
92             }
93              
94             sub valid_success
95             {
96             my $self = shift;
97             if (@_) { $self->{VALID_SUCCESS} = shift }
98             return $self->{VALID_SUCCESS};
99             }
100             sub valid_head
101             {
102             my $self = shift;
103             if (@_) { $self->{VALID_HEAD} = shift }
104             return $self->{VALID_HEAD};
105             }
106              
107              
108             sub HEAD_check {
109             ## Checking whether a document with no extension is actually a CSS file
110             ## causes a lot of requests, but internal - should be OK?
111             my $self = shift;
112             my $check_uri;
113             use LWP::UserAgent;
114             if (@_) { $check_uri = shift }
115             my $ua = new LWP::UserAgent;
116             my $method = "HEAD";
117             my $request = new HTTP::Request("$method", "$check_uri");
118             my $response = new HTTP::Response;
119             $response = $ua->simple_request($request);
120             my $is_css = 0;
121             if ($response->is_success) # not an error, we could contact the server
122             {
123             my $type = $response->header('Content-Type');
124             if ($type =~ /text\/css/)
125             {
126             $is_css = 1;
127             print "URI with no extension $check_uri has content-type $type\n" if ($verbose > 2); # debug
128             }
129             }
130             return $is_css;
131             }
132              
133             sub trim_uris
134             {
135             my $self = shift;
136             my @authorized_extensions = split(" ", $self->auth_ext);
137             my @trimmed_uris;
138             my $exclude_regexp = "";
139             my @excluded_areas;
140             $exclude_regexp = $config{ExcludeAreas};
141             if ($exclude_regexp){
142             $exclude_regexp =~ s/\//\\\//g ;
143             @excluded_areas = split(" ", $exclude_regexp);
144             }
145             else { print "nothing to exclude\n" if ($verbose >2);}
146             my $uri;
147             my %HTTPcodes;
148             if (defined ($config{tmpfile_HTTP_codes}))
149             {
150             my $tmp_file_HTTP_codes = $config{tmpfile_HTTP_codes};
151             tie (%HTTPcodes, 'DB_File', "$tmp_file_HTTP_codes", O_RDONLY) ||
152             die ("Cannot create or open $tmp_file_HTTP_codes");
153             }
154              
155             while ($uri = shift)
156             {
157             my $uri_ext = "";
158             my $match = 0;
159             if ($uri =~ /(\.[0-9a-zA-Z]+)(\?.*)?$/)
160             {
161             $uri_ext = $1;
162             }
163             elsif ($uri =~ /\/$/) { $uri_ext = "/";}
164             elsif (($uri_ext eq "") and $config{CheckExtensionlessURIs}) {$match = 1; }
165             # we keep URIs without extension, if asked to
166             # otherwise, we check their mime type through the wire
167             elsif ( $self->HEAD_check($uri) ) { $match = 1; }
168             foreach my $ext (@authorized_extensions)
169             {
170             if (($ext eq $uri_ext) or ($ext eq "*")) { $match = 1; }
171             }
172             if ($match)
173             {
174             foreach my $area (@excluded_areas)
175             {
176             if ($uri =~ /$area/)
177             {
178             my $slasharea = $area;
179             $slasharea =~ s/\\\//\//g;
180             $slasharea =~ s/\\././g;
181             print "Ignoring $uri matching $slasharea \n" if ($verbose > 2) ;
182             $match = 0;
183             }
184              
185             }
186             }
187              
188             if (defined $HTTPcodes{$uri})
189             {
190             if (($HTTPcodes{$uri} ne "200") and ($HTTPcodes{$uri} =~ /\d+/))
191             {
192             $match = 0;
193             if ($verbose > 2) {
194             print "$uri returned code $HTTPcodes{$uri}, ignoring \n";
195             }
196             }
197             }
198             push @trimmed_uris,$uri if ($match);
199             }
200             return @trimmed_uris;
201             }
202              
203             sub uris
204             {
205             my $self = shift;
206             if (@_) { @{$self->{URIs}} = @_ }
207             return @{$self->{URIs}};
208             }
209              
210             #########################################
211             # Actual subroutine to check the list of uris #
212             #########################################
213              
214              
215             sub process_list
216             {
217             my $self = shift;
218             my $max_invalid = undef;
219             if (exists $config{MaxInvalid}) {$max_invalid = $config{MaxInvalid}}
220             else {$max_invalid = 0}
221             my $max_documents = undef;
222             if (exists $config{MaxDocuments}) {$max_documents = $config{MaxDocuments}}
223             else {$max_documents = 0}
224             print "Now Using the CSS Validation module...\n" if $verbose;
225             use LWP::UserAgent;
226             use URI::Escape;
227             my @uris = undef;
228             my %hits;
229             if (defined ($config{tmpfile}))
230             {
231             my $tmp_file = $config{tmpfile};
232             tie (%hits, 'DB_File', "$tmp_file", O_RDONLY) ||
233             die ("Cannot create or open $tmp_file");
234             @uris = sort { $hits{$b} <=> $hits{$a} } keys %hits;
235             }
236             elsif ($self->uris())
237             {
238             @uris = $self->uris();
239             foreach my $uri (@uris) { $hits{$uri} = 0 }
240             }
241             my $name = "";
242             if (exists $config{ServerName}) {$name = $config{ServerName}}
243             my @result;
244             my @result_head;
245             push @result_head, "Rank";
246             push @result_head, "Hits";
247             push @result_head, "#Error(s)";
248             push @result_head, "Address";
249              
250             my @whatweshow;
251             my $whatweshow_str = "";
252              
253             push @whatweshow, "valid" if ($config{ShowValid} eq "Yes");
254             push @whatweshow, "invalid" if ($config{ShowInvalid} eq "Yes");
255             push @whatweshow, "non-validable" if ($config{ShowAborted} eq "Yes");
256             if (@whatweshow eq 3) {
257             $whatweshow_str = "$whatweshow[0], $whatweshow[1] or $whatweshow[2]";
258             }
259             elsif (@whatweshow eq 2) {
260             $whatweshow_str = "$whatweshow[0] or $whatweshow[1]";
261             }
262             elsif (@whatweshow eq 1) {
263             $whatweshow_str = "$whatweshow[0]";
264             }
265              
266              
267             my $intro="Here are the most popular $whatweshow_str document(s) that I could find in the
268             logs for $name.";
269             my $outro;
270             my $ua = new LWP::UserAgent;
271              
272             @uris = $self->trim_uris(@uris);
273             my $invalid_census = 0; # number of invalid docs
274             my $last_invalid_position = 0; # latest position at which we found an invalid doc
275             my $total_census = 0; # number of documents checked
276              
277             my $uri = undef;
278             # bulk of validation
279             while ( (@uris) and (($invalid_census < $max_invalid) or (!$max_invalid)) and (($total_census < $max_documents) or (!$max_documents)) )
280             {
281             $uri = shift (@uris);
282             my $uri_orig = $uri;
283             $self->new_doc();
284             $total_census++;
285             print " processing #$total_census $uri... " if ($verbose > 1);
286             $uri = uri_escape($uri);
287             # creating the HTTP query string with all parameters
288             my $string=join ("", "http://",$config{ValidatorHost},":",$config{ValidatorPort}, $config{ValidatorString},$uri);
289             my $method = $config{ValidatorMethod};
290             my $request = new HTTP::Request("$method", "$string");
291             my $response = new HTTP::Response;
292             $response = $ua->simple_request($request);
293             if ($response->is_success) {
294             # not an error, we could contact the server
295             # set both valid and error number according to response
296             $self->valid($response->header('X-W3C-Validator-Status'));
297             $self->{VALID_ERR_NUM} = $response->header('X-W3C-Validator-Errors');
298             # we know the validator has been able to (in)validate if $self->valid is not NULL
299             if ( ($self->valid)) # we got an answer about validation (valid, invalid or abort)
300             {
301             if (
302             (($self->valid =~ /Invalid/i) and ($config{ShowInvalid} eq "Yes"))
303             or (($self->valid =~ /Valid/i) and ($config{ShowValid} eq "Yes"))
304             or (($self->valid =~ /Abort/i) and ($config{ShowAborted} eq "Yes"))
305             ) {
306             my @result_tmp;
307             push @result_tmp, $total_census;
308             push @result_tmp, $hits{$uri_orig};
309             if ($self->valid =~ /Abort/i) { push @result_tmp, "Abort"; }
310             else { push @result_tmp, $self->valid_err_num; }
311             push @result_tmp, $uri_orig;
312             push @result, [@result_tmp];
313             $invalid_census++;
314             $last_invalid_position = $total_census;
315             }
316             }
317             printf (" %s!", $self->valid) if ( ($verbose > 1) and (defined ($self->valid)));
318             print " Could not validate (validation failed)!" if (($verbose > 1) and(!defined ($self->valid)));
319              
320             if (($verbose > 1) and ($self->valid_err_num)) # verbose or debug
321             {printf ", %s errors!",$self->valid_err_num}
322             }
323             else {
324             print " Could not validate (no response from validator)!" if ($verbose > 1) ;
325             if ($config{ShowAborted} eq "Yes") {
326             my @result_tmp;
327             push @result_tmp, $total_census;
328             push @result_tmp, $hits{$uri_orig};
329             push @result_tmp, "Abort";
330             push @result_tmp, $uri_orig;
331             push @result, [@result_tmp];
332             $invalid_census++;
333             $last_invalid_position = $total_census;
334             }
335             }
336             print "\n" if ($verbose > 1);
337              
338             $self->valid_head($response->as_string); # for debug
339             if ($verbose > 2) {printf "%s :\n%s", $string, $self->valid_head;} # debug
340             sleep(1); # do not kill the validator
341              
342             }
343             print "Done!\n" if $verbose;
344              
345             # print "invalid_census $invalid_census \n" if ($verbose > 2 );
346             if ($invalid_census) # we found invalid docs
347             {
348             if ($invalid_census eq 1) # let's repect grammar here
349             {
350             $intro=~ s/are/is/;
351             $intro=~ s/ //;
352             $intro=~ s/document\(s\)/document/;
353             }
354             $intro =~s//$invalid_census/;
355             my $ratio = 10000*$invalid_census/$total_census;
356             $ratio = int($ratio)/100;
357             if ($last_invalid_position eq $total_census )
358             # usual case
359             {
360             $outro="Conclusion :
361             I had to check $last_invalid_position document(s) in order to find $invalid_census $whatweshow_str CSS documents or documents with stylesheets.
362             This means that about $ratio\% of your most popular documents were $whatweshow_str.";
363             }
364             else
365             # we didn't find as many invalid docs as requested
366             {
367             $outro="Conclusion :
368             You asked for $max_invalid $whatweshow_str stylesheet document(s) but I could only find $invalid_census
369             by processing (all the) $total_census document(s) in your logs.
370             This means that about $ratio\% of your most popular documents were $whatweshow_str.";
371             }
372             }
373             elsif (!$total_census)
374             {
375             $intro="There was nothing to validate in this log.";
376             $outro="";
377             }
378             else # everything was actually valid!
379             {
380             $intro=~s/ //;
381             $outro="I couldn't find any $whatweshow_str document in this log.";
382             }
383             if (($total_census == $max_documents) and ($total_census)) # we stopped because of max_documents
384             {
385             $outro=$outro."\nNOTE: I stopped after processing $max_documents documents:\n Maybe you could set MaxDocuments to a higher value?";
386             }
387            
388             if (defined ($config{tmpfile}))
389             {
390             untie %hits;
391             }
392             # Here is what the module will return. The hash will be sent to
393             # the output module
394              
395             my %returnhash;
396             # the name of the module
397             $returnhash{"name"}="CSSValidator";
398             #intro
399             $returnhash{"intro"}=$intro;
400             #Headers for the result table
401             @{$returnhash{"thead"}}= @result_head;
402             # data for the results table
403             @{$returnhash{"trows"}}= @result;
404             # #outro
405             $returnhash{"outro"}=$outro;
406             return %returnhash;
407             }
408              
409             package W3C::LogValidator::CSSValidator;
410              
411             1;
412              
413             __END__