File Coverage

blib/lib/WebService/Validator/HTML/W3C/Fast.pm
Criterion Covered Total %
statement 42 171 24.5
branch 4 64 6.2
condition 4 25 16.0
subroutine 13 20 65.0
pod 1 5 20.0
total 64 285 22.4


line stmt bran cond sub pod time code
1             package WebService::Validator::HTML::W3C::Fast;
2              
3 1     1   24215 use strict;
  1         2  
  1         35  
4 1     1   6 use warnings;
  1         1  
  1         28  
5 1     1   1375 use WebService::Validator::HTML::W3C();
  1         81814  
  1         23  
6 1     1   1008 use HTTP::Daemon();
  1         73868  
  1         27  
7 1     1   12 use Fcntl();
  1         2  
  1         16  
8 1     1   1640 use File::Temp();
  1         16637  
  1         25  
9 1     1   23 use Symbol();
  1         2  
  1         17  
10 1     1   5 use LWP::UserAgent();
  1         2  
  1         14  
11 1     1   11324 use MIME::Base64();
  1         946  
  1         22  
12 1     1   1024 use POSIX();
  1         8922  
  1         35  
13 1     1   9 use base qw(WebService::Validator::HTML::W3C);
  1         3  
  1         2475  
14              
15             our $VERSION = '0.02';
16             our ($maximum_fork_attempts) = 4;
17              
18             =head1 NAME
19              
20             WebService::Validator::HTML::W3C::Fast - Access the W3Cs online HTML validator in a local persistent daemon
21              
22             =head1 SYNOPSIS
23              
24             use WebService::Validator::HTML::W3C::Fast;
25              
26             my $v = WebService::Validator::HTML::W3C::Fast->new(
27             validator_path => '/path/to/validator/check',
28             user => $username,
29             password => $password,
30             auto_launder_validator => 1,
31             );
32              
33             if ( $v->validate_markup(<<_HTML_) ) {
34            
35            
36            
37            
38             _HTML_
39             if ( $v->is_valid ) {
40             printf ("%s is valid\n", $v->uri);
41             } else {
42             printf ("%s is not valid\n", $v->uri);
43             foreach my $error ( @{$v->errors} ) {
44             printf("%s at line %d\n", $error->msg,
45             $error->line);
46             }
47             }
48             } else {
49             printf ("Failed to validate the supplied markup: %s\n", $v->validator_error);
50             }
51              
52             =head1 DESCRIPTION
53              
54             WebService::Validator::HTML::W3C::Fast provides access a local version of
55             the W3C's Markup validator, via WebService::Validator::HTML::W3C. It loads up
56             a small HTTP::Daemon daemon, listening on a random high port on 127.0.0.1 and
57             loads the check cgi script into a mod_perl type persistent environment for speedy
58             checking of lots of documents.
59              
60             When running under taint-mode you will need to provide the auto_launder_validator
61             argument, otherwise taint will refuse to allow the module to string eval the cgi script.
62              
63             To discourage denial of service attacks, the local web server is protected via http
64             basic auth. You can specify the desired user name and password for the server, or it
65             will use srand and rand to generate a simple password.
66              
67             if validator_path is not supplied, the validator will attempt to guess at where the script
68             is, first looking at '/usr/share/w3c-markup-validator/cgi-bin/check', which is the location of
69             the cgi script used by fedora's w3c-markup-validator package. If this fails and no-one
70             supplies defaults that other operating systems use, the validator will croak().
71              
72             NOTE for debian. At the moment, debian's version of the check script depends on using the
73             open3 function for /usr/bin/onsgmls. I'm done a quick check on this and am not intending
74             to port to debian, rather, i intend to wait for debian to upgrade their source. If anyone
75             would like to fix this, i would be happy to apply supplied patches.
76              
77             The local HTTP::Daemon will occansionally check that the parent program is still present.
78             If the parent ever exits, the HTTP::Daemon will terminate as well. This is to prevent a
79             build up of HTTP::Daemons listening on high ports b/c a test script was aborted.
80              
81             =head1 SEE ALSO
82              
83             =head1 AUTHOR
84              
85             David Dick, Eddick@cpan.orgE
86              
87             =head1 COPYRIGHT AND LICENSE
88              
89             Copyright (C) 2009 by David Dick
90              
91             This library is free software; you can redistribute it and/or modify
92             it under the same terms as Perl itself, either Perl version 5.8.8 or,
93             at your option, any later version of Perl 5 you may have available.
94              
95             =cut
96              
97             sub default_script_paths {
98 1     1 0 3 return ('/usr/share/w3c-markup-validator/cgi-bin/check');
99             }
100              
101             sub new {
102 1     1 1 19 my ($class, %params) = @_;
103 1 50       6 if ($params{validator_uri}) {
104 0         0 Carp::croak("The validator uri cannot be used here");
105             }
106 1         4 my (@default_script_paths) = default_script_paths();
107 1   33     10 my ($validator_path) = $params{validator_path} || shift @default_script_paths;
108 1         9 my ($handle) = Symbol::gensym();
109 1         62 while (not(sysopen($handle, $validator_path, Fcntl::O_RDONLY()))) {
110 1 50 33     33 if (($^E == POSIX::ENOENT()) && (not($params{validator_path})) && (@default_script_paths)) {
    50 33        
    50 33        
111 0         0 $validator_path = shift @default_script_paths;
112             } elsif ($params{validator_path}) {
113 0         0 Carp::croak("Failed to open '$params{validator_path}' for reading:$^E")
114             } elsif (($^E == POSIX::ENOENT()) && (not($params{validator_path}))) {
115 1         254 Carp::croak("Please specify the location of the w3c markup validator on the filesystem")
116             } else {
117 0           Carp::croak("Failed to open '$validator_path' for reading:$^E");
118             }
119             }
120 0           delete $params{validator_path};
121 0           my ($validator_source);
122 0           my ($result, $buffer);
123 0           while($result = read($handle, $buffer, 4096)) {
124 0           $validator_source .= $buffer;
125             }
126 0 0         unless (defined $result) {
127 0           Carp::croak("Failed to read from '$validator_path':$^E");
128             }
129 0 0         unless (close($handle)) {
130 0           Carp::croak("Failed to close '$validator_path':$^E");
131             }
132 0           $validator_source =~ s/exit/return/g;
133 0 0         if ($params{auto_launder_validator}) {
134 0 0         if ($validator_source =~ /^(.*)$/s) {
135 0           ($validator_source) = ($1);
136             }
137             }
138 0           eval <<_HANDLER_;
139             sub handler {
140             my (\$connection) = \@_;
141             \$connection->send_status_line();
142             $validator_source
143             }
144             _HANDLER_
145 0 0         if ($@) {
146 0           die($@);
147             }
148 0           my ($daemon) = HTTP::Daemon->new( 'LocalAddr' => '127.0.0.1', 'Blocking' => 0 );
149 0           my ($self) = WebService::Validator::HTML::W3C->new( 'validator_uri' => $daemon->url(), %params );
150 0           bless $self, $class;
151 0           my ($user, $password) = $self->_set_user_password(\%params);
152 0           my ($parent_pid) = $$;
153 0           my ($number_of_fork_attempts) = 0;
154             FORK: {
155 0 0 0       if (my $pid = fork()) {
  0 0          
    0          
156 0           $self->{_internal_http_server_pid} = $pid;
157 0           return $self;
158             } elsif (defined $pid) {
159 0           my ($terminated) = 0;
160 0           eval {
161 0           $self = undef;
162 0     0     local $SIG{TERM} = sub { $terminated = 1; die("Caught a terminate signal\n"); };
  0            
  0            
163 0     0     local $SIG{INT} = sub { die("Caught an interrupt signal\n"); };
  0            
164             CHECKING: {
165 0           while(my $connection = $daemon->accept()) {
  0            
166 0           my $request = $connection->get_request(1);
167 0 0         unless (check_authorisation($request, $user, $password)) {
168 0           die("Client did not authorise correctly\n");
169             }
170 0           local %ENV = setup_environment_hash($request);
171 0           local *STDIN = setup_stdin($connection);
172 0           local *STDOUT = *{$connection};
  0            
173 0           CGI::initialize_globals();
174 0           handler($connection);
175 0 0         unless (close($connection)) {
176 0           die("Failed to close client socket:$^E");
177             }
178             }
179 0 0         if ($^E == POSIX::EWOULDBLOCK()) {
180 0 0         if (kill(0, $parent_pid)) {
181 0           my ($rin) = '';
182 0           vec($rin,fileno($daemon),1) = 1;
183 0           select($rin, undef, undef, 5); # wait for 5 seconds before checking parent is still alive or for input
184 0           redo CHECKING;
185             }
186             } else {
187 0           die("Failed to accept a new connection:$^E");
188             }
189             }
190             };
191 0 0         if ($terminated) {
    0          
192 0           CORE::exit(0);
193             } elsif ($@) {
194 0           print STDERR $@;
195 0           CORE::exit(1);
196             } else {
197 0           CORE::exit(0);
198             }
199             } elsif (($^E == POSIX::EAGAIN()) && ($number_of_fork_attempts < $maximum_fork_attempts)) {
200 0           $number_of_fork_attempts += 1;
201 0           sleep 5;
202 0           redo FORK;
203             } else {
204 0           die("Failed to fork:$^E");
205             }
206             }
207             }
208              
209             sub setup_stdin {
210 0     0 0   my ($connection) = @_;
211 0           my $temp_input = File::Temp::tempfile();
212 0 0         unless ($temp_input) {
213 0           die("Failed to open a temporary file for writing:$^E");
214             }
215 0           my ($buffer) = $connection->read_buffer();
216 0           my ($remaining_length) = $ENV{CONTENT_LENGTH} - length($buffer);
217 0 0         unless ($temp_input->print($buffer)) {
218 0           die("Failed to write to temporary file:$^E");
219             }
220 0           while ($remaining_length > 0) {
221 0           my ($result) = $connection->sysread($buffer, 32*1024);
222 0 0         if (defined $result) {
223 0           $remaining_length -= $result;
224 0 0         if ($result == 0) {
225 0           die("Client did not provide enough data");
226             } else {
227 0 0         unless ($temp_input->print($buffer)) {
228 0           die("Failed to write to temporary file:$^E");
229             }
230             }
231             } else {
232 0           die("Client closed connection:$^E");
233             }
234             }
235 0 0         unless ($temp_input->seek(0, Fcntl::SEEK_SET())) {
236 0           die("Failed to seek to start of temporary file:$^E");
237             }
238 0           return *{$temp_input};
  0            
239             }
240              
241             sub check_authorisation {
242 0     0 0   my ($request, $user, $password) = @_;
243 0           my ($authorised) = 0;
244 0           my ($auth) = $request->header('Authorization');
245 0 0         if ($auth =~ s/^Basic //) {
246 0           my ($decoded) = MIME::Base64::decode_base64($auth);
247 0 0         if ($decoded =~ s/^$user://) {
248 0 0         if ($decoded eq $password) {
249 0           $authorised = 1;
250             }
251             }
252             }
253 0           return $authorised;
254             }
255              
256             sub setup_environment_hash {
257 0     0 0   my ($request) = @_;
258 0           my (%local_env);
259 0           $local_env{REQUEST_METHOD} = $request->method();
260 0           $local_env{REQUEST_URI} = $request->uri()->as_string();
261 0           $local_env{HTTP_USER_AGENT} = $request->header('User_Agent');
262 0           my ($length) = $request->header('Content_Length');
263 0 0         if ($length) {
264 0           $local_env{CONTENT_LENGTH} = $length;
265             }
266 0           my ($contentType) = $request->header('Content_Type');
267 0 0         if ($contentType) {
268 0           $local_env{CONTENT_TYPE} = $contentType;
269             }
270 0           return (%local_env);
271             }
272              
273             sub _set_user_password {
274 0     0     my ($self, $params) = @_;
275 0           my ($ua) = $self->ua(); # Allow SUPER to create a ua if desired
276 0 0         unless ($ua) {
277 0           $ua = LWP::UserAgent->new( 'agent' => __PACKAGE__ . "/$VERSION" );
278             }
279 0   0       my ($headers) = $ua->default_headers() || HTTP::Headers->new();
280 0           $headers->header('User_Agent' => $ua->agent());
281 0           my ($header_user, $header_pass) = $headers->authorization_basic();
282 0   0       my ($user) = $params->{user} || $header_user || 'Validator';
283 0   0       my ($password) = $params->{password} || $header_pass || '';
284 0 0         unless ($password) {
285 0           srand();
286 0           while(length($password) < 45) {
287 0           $password .= chr(int(rand(57)) + 65);
288             }
289             }
290 0           $headers->authorization_basic($user, $password);
291 0           $ua->default_headers($headers);
292 0           $self->ua($ua);
293 0           return ($user, $password);
294             }
295              
296             sub DESTROY {
297 0     0     my ($self) = @_;
298 0 0 0       if ((exists $self->{_internal_http_server_pid}) && ($self->{_internal_http_server_pid})) {
299 0           kill("TERM", $self->{_internal_http_server_pid});
300             }
301             }
302              
303             1;