File Coverage

blib/lib/HTML/Lint.pm
Criterion Covered Total %
statement 81 81 100.0
branch 15 16 93.7
condition 3 3 100.0
subroutine 16 16 100.0
pod 10 10 100.0
total 125 126 99.2


line stmt bran cond sub pod time code
1             package HTML::Lint;
2              
3 39     39   1817661 use warnings;
  39         360  
  39         1017  
4 39     39   163 use strict;
  39         49  
  39         697  
5              
6 39     39   12563 use HTML::Lint::Error;
  39         76  
  39         1381  
7 39     39   12512 use HTML::Lint::Parser ();
  39         110  
  39         736  
8              
9 39     39   196 use HTML::Entities ();
  39         55  
  39         21204  
10              
11             =head1 NAME
12              
13             HTML::Lint - check for HTML errors in a string or file
14              
15             =head1 VERSION
16              
17             Version 2.32
18              
19             =cut
20              
21             our $VERSION = '2.32';
22              
23             =head1 SYNOPSIS
24              
25             my $lint = HTML::Lint->new;
26             $lint->only_types( HTML::Lint::Error::STRUCTURE );
27              
28             # Parse lines of data.
29             $lint->newfile( $filename );
30             while ( my $line = <> ) {
31             $lint->parse( $line );
32             }
33             $lint->eof();
34              
35             # Or, parse an entire file at once.
36             $lint->parse_file( $filename );
37              
38             # Fetch the errors that the linter found.
39             my $error_count = $lint->errors;
40              
41             foreach my $error ( $lint->errors ) {
42             print $error->as_string, "\n";
43             }
44              
45             HTML::Lint also comes with a wrapper program called F that handles
46             linting from the command line:
47              
48             $ weblint http://www.cnn.com/
49             http://www.cnn.com/ (395:83) tag has no HEIGHT and WIDTH attributes.
50             http://www.cnn.com/ (395:83) does not have ALT text defined
51             http://www.cnn.com/ (396:217) Unknown element
52             http://www.cnn.com/ (396:241) with no opening
53             http://www.cnn.com/ (842:7) target attribute in is repeated
54              
55             And finally, you can also get L that passes any
56             mod_perl-generated code through HTML::Lint and get it dumped into your
57             Apache F.
58              
59             [Mon Jun 3 14:03:31 2002] [warn] /foo.pl (1:45)

with no opening

60             [Mon Jun 3 14:03:31 2002] [warn] /foo.pl (1:49) Unknown element
61             [Mon Jun 3 14:03:31 2002] [warn] /foo.pl (1:56) Unknown attribute "x" for tag
62              
63             =cut
64              
65             =head1 METHODS
66              
67             NOTE: Some of these methods mirror L's methods, but HTML::Lint
68             is not a subclass of HTML::Parser.
69              
70             =head2 new()
71              
72             Create an HTML::Lint object, which inherits from HTML::Parser.
73             You may pass the types of errors you want to check for in the
74             C parm.
75              
76             my $lint = HTML::Lint->new( only_types => HTML::Lint::Error::STRUCTURE );
77              
78             If you want more than one, you must pass an arrayref:
79              
80             my $lint = HTML::Lint->new(
81             only_types => [HTML::Lint::Error::STRUCTURE, HTML::Lint::Error::FLUFF] );
82              
83             =cut
84              
85             sub new {
86 43     43 1 23668 my $class = shift;
87 43         115 my %args = @_;
88              
89 43         174 my $self = {
90             _errors => [],
91             _types => [],
92             };
93 43         110 bless $self, $class;
94              
95 43 100       164 if ( my $only = $args{only_types} ) {
96 2 100       9 $self->only_types( ref $only eq 'ARRAY' ? @{$only} : $only );
  1         4  
97 2         4 delete $args{only_types};
98             }
99              
100 43         133 warn "Unknown argument $_\n" for keys %args;
101              
102 43         116 return $self;
103             }
104              
105             =head2 $lint->parser()
106              
107             Returns the parser object for this object, creating one if necessary.
108              
109             =cut
110              
111             sub parser {
112 902     902 1 851 my $self = shift;
113              
114 902 100       1358 if ( not $self->{_parser} ) {
115 47     91   449 $self->{_parser} = HTML::Lint::Parser->new( sub { $self->gripe( @_ ) } );
  91         204  
116 47         293 $self->{_parser}->ignore_elements( qw(script style) );
117             }
118              
119 902         2735 return $self->{_parser};
120             }
121              
122             =head2 $lint->parse( $text )
123              
124             =head2 $lint->parse( $code_ref )
125              
126             Passes in a chunk of HTML to be linted, either as a piece of text,
127             or a code reference.
128             See L's C method for details.
129              
130             =cut
131              
132             sub parse {
133 668     668 1 2569 my $self = shift;
134              
135 668         876 my $rc = $self->parser->parse( @_ );
136              
137 668         833 $self->{_parse_called} = 1;
138              
139 668         929 return $rc;
140             }
141              
142             =head2 $lint->parse_file( $file )
143              
144             Analyzes HTML directly from a file. The C<$file> argument can be a filename,
145             an open file handle, or a reference to an open file handle.
146             See L's C method for details.
147              
148             =cut
149              
150             sub parse_file {
151 1     1 1 5 my $self = shift;
152              
153 1         3 my $rc = $self->parser->parse_file( @_ );
154              
155 1         2 $self->{_parse_called} = 1;
156 1         3 $self->eof;
157              
158 1         31 return $rc;
159             }
160              
161             =head2 $lint->eof()
162              
163             Signals the end of a block of text getting passed in. This must be
164             called to make sure that all parsing is complete before looking at errors.
165              
166             Any parameters (and there shouldn't be any) are passed through to
167             HTML::Parser's eof() method.
168              
169             =cut
170              
171             sub eof { ## no critic ( Subroutines::ProhibitBuiltinHomonyms )
172 43     43 1 163 my $self = shift;
173              
174 43         103 my $rc;
175 43         120 my $parser = $self->parser;
176 43 50       166 if ( $parser ) {
177 43         265 $rc = $parser->eof(@_);
178 43         81 delete $self->{_parser};
179 43         134 $self->{_eof_called} = 1;
180             }
181              
182 43         988 return $rc;
183             }
184              
185             =head2 $lint->errors()
186              
187             In list context, C returns all of the errors found in the
188             parsed text. Each error is an object of the type L.
189              
190             In scalar context, it returns the number of errors found.
191              
192             =cut
193              
194             sub errors {
195 46     46 1 207 my $self = shift;
196              
197 46 100       211 if ( !$self->{_parse_called} ) {
    100          
198 1         2 $self->gripe( 'api-parse-not-called' );
199             }
200             elsif ( !$self->{_eof_called} ) {
201 3         10 $self->gripe( 'api-eof-not-called' );
202             }
203              
204 46 100       111 if ( wantarray ) {
205 38         57 return @{$self->{_errors}};
  38         141  
206             }
207             else {
208 8         14 return scalar @{$self->{_errors}};
  8         24  
209             }
210             }
211              
212             =head2 $lint->clear_errors()
213              
214             Clears the list of errors, in case you want to print and clear, print and clear.
215              
216             =cut
217              
218             sub clear_errors {
219 3     3 1 4 my $self = shift;
220              
221 3         9 $self->{_errors} = [];
222              
223 3         5 return;
224             }
225              
226             =head2 $lint->only_types( $type1[, $type2...] )
227              
228             Specifies to only want errors of a certain type.
229              
230             $lint->only_types( HTML::Lint::Error::STRUCTURE );
231              
232             Calling this without parameters makes the object return all possible
233             errors.
234              
235             The error types are C, C and C.
236             See L for details on these types.
237              
238             =cut
239              
240             sub only_types {
241 5     5 1 11 my $self = shift;
242              
243 5         11 $self->{_types} = [@_];
244              
245 5         8 return;
246             }
247              
248             =head2 $lint->gripe( $errcode, [$key1=>$val1, ...] )
249              
250             Adds an error message, in the form of an L object,
251             to the list of error messages for the current object. The file,
252             line and column are automatically passed to the L
253             constructor, as well as whatever other key value pairs are passed.
254              
255             For example:
256              
257             $lint->gripe( 'attr-repeated', tag => $tag, attr => $attr );
258              
259             Usually, the user of the object won't call this directly, but just
260             in case, here you go.
261              
262             =cut
263              
264             sub gripe {
265 95     95 1 116 my $self = shift;
266              
267             my $error = HTML::Lint::Error->new(
268 95         208 $self->{_file}, $self->parser->{_line}, $self->parser->{_column}, @_ );
269              
270 95         209 my @keeps = @{$self->{_types}};
  95         193  
271 95 100 100     275 if ( !@keeps || $error->is_type(@keeps) ) {
272 87         107 push( @{$self->{_errors}}, $error );
  87         174  
273             }
274              
275 95         209 return;
276             }
277              
278              
279             =head2 $lint->newfile( $filename )
280              
281             Call C whenever you switch to another file in a batch
282             of linting. Otherwise, the object thinks everything is from the
283             same file. Note that the list of errors is NOT cleared.
284              
285             Note that I<$filename> does NOT need to match what's put into C
286             or C. It can be a description, a URL, or whatever.
287              
288             You should call C even if you are only validating one file. If
289             you do not call C then your errors will not have a filename
290             attached to them.
291              
292             =cut
293              
294             sub newfile {
295 36     36 1 10355 my $self = shift;
296 36         70 my $file = shift;
297              
298 36         115 delete $self->{_parser};
299 36         66 delete $self->{_parse_called};
300 36         63 delete $self->{_eof_called};
301 36         85 $self->{_file} = $file;
302 36         73 $self->{_line} = 0;
303 36         69 $self->{_column} = 0;
304 36         77 $self->{_first_seen} = {};
305              
306 36         77 return $self->{_file};
307             } # newfile
308              
309             1;
310              
311             =head1 MODIFYING HTML::LINT'S BEHAVIOR
312              
313             Sometimes you'll have HTML that for some reason cannot conform to
314             HTML::Lint's expectations. For those instances, you can use HTML
315             comments to modify HTML::Lint's behavior.
316              
317             Say you have an image where for whatever reason you can't get
318             dimensions for the image. This HTML snippet:
319              
320             Company logo
321            
322              
323             causes this error:
324              
325             foo.html (14:20) tag has no HEIGHT and WIDTH attributes
326              
327             But if for some reason you can't get those dimensions when you build
328             the page, you can at least stop HTML::Lint complaining about it.
329              
330             Company logo
331            
332            
333            
334              
335             If you want to turn off all HTML::Lint warnings for a block of code, use
336              
337            
338              
339             And turn them back on with
340              
341            
342              
343             You don't have to use "on" and "off". For "on", you can use "true"
344             or "1". For "off", you can use "0" or "false".
345              
346             For a list of possible errors and their codes, see L,
347             or run F.
348              
349             =head1 BUGS, WISHES AND CORRESPONDENCE
350              
351             All bugs and requests are now being handled through GitHub.
352              
353             https://github.com/petdance/html-lint/issues
354              
355             DO NOT send bug reports to http://rt.cpan.org/ or http://code.google.com/
356              
357             =head1 TODO
358              
359             =over 4
360              
361             =item * Check for attributes that require values
362              
363             =item * s that have no rows.
364              
365             =item * Form fields that aren't in a FORM
366              
367             =item * DIVs with nothing in them.
368              
369             =item * HEIGHT= that have percents in them.
370              
371             =item * Check for goofy stuff like:
372              
373            
  • Hello Reader - Spanish Level 1 (K-3)
  • 374              
    375             =back
    376              
    377             =head1 COPYRIGHT & LICENSE
    378              
    379             Copyright 2005-2018 Andy Lester.
    380              
    381             This program is free software; you can redistribute it and/or modify it
    382             under the terms of the Artistic License v2.0.
    383              
    384             http://www.opensource.org/licenses/Artistic-2.0
    385              
    386             Please note that these modules are not products of or supported by the
    387             employers of the various contributors to the code.
    388              
    389             =head1 AUTHOR
    390              
    391             Andy Lester, andy at petdance.com
    392              
    393             =cut
    394              
    395             1;