File Coverage

blib/lib/HTML/Parser/Simple/Reporter.pm
Criterion Covered Total %
statement 47 47 100.0
branch 8 10 80.0
condition 5 8 62.5
subroutine 6 6 100.0
pod 2 2 100.0
total 68 73 93.1


line stmt bran cond sub pod time code
1             package HTML::Parser::Simple::Reporter;
2              
3 1     1   528 use strict;
  1         1  
  1         41  
4 1     1   5 use warnings;
  1         1  
  1         32  
5              
6 1     1   426 use HTML::Parser::Simple::Attributes;
  1         2  
  1         36  
7              
8 1     1   6 use Moo;
  1         10  
  1         4  
9              
10             extends 'HTML::Parser::Simple';
11              
12             our $VERSION = '2.02';
13              
14             # -----------------------------------------------
15              
16             sub traverse
17             {
18 22     22 1 23 my($self, $node, $output, $depth) = @_;
19 22   100     37 $depth ||= 0;
20 22         42 my(@child) = $node -> getAllChildren;
21 22         142 my($metadata) = $node -> getNodeValue;
22 22         72 my($content) = $$metadata{content};
23 22         24 my($name) = $$metadata{name};
24              
25             # We ignore the root, which means we ignore the DOCTYPE.
26              
27 22 100       50 if ($name ne 'root')
28             {
29 21         48 my($s) = (' ' x ($depth - 1) ) . "$name. Attributes: ";
30 21         380 my($p) = HTML::Parser::Simple::Attributes -> new;
31 21         88 my($a) = $p -> parse($$metadata{attributes});
32 21         44 $s .= $p -> hashref2string($a) . '. Content:';
33 21         23 my($c) = '';
34              
35 21         42 for my $index (0 .. $#child + 1)
36             {
37 41 100 66     142 $c .= $index <= $#$content && defined($$content[$index]) ? $$content[$index] : '';
38             }
39              
40 21         44 $c =~ s/^\s+//;
41 21         30 $c =~ s/\s+$//;
42 21 100       35 $s .= " $c" if (length $c);
43              
44 21         56 push @$output, $s;
45             }
46              
47 22         53 for my $index (0 .. $#child)
48             {
49 21         65 $self -> traverse($child[$index], $output, $depth + 1);
50             }
51              
52             } # End of traverse.
53              
54             # -----------------------------------------------
55              
56             sub traverse_file
57             {
58 1     1 1 12 my($self, $input_file_name) = @_;
59 1   33     13 $input_file_name ||= $self -> input_file;
60              
61 1         3 $self -> input_file($input_file_name);
62 1         7 $self -> log("Reading $input_file_name");
63              
64 1 50       44 open(INX, $input_file_name) || Carp::croak "Can't open($input_file_name): $!";
65 1         2 my($html);
66 1         24 read(INX, $html, -s INX);
67 1         8 close INX;
68              
69 1 50       4 Carp::croak "Can't read($input_file_name): $!" if (! defined $html);
70              
71 1         3 $self -> log('Parsing');
72              
73 1         9 $self -> parse($html);
74              
75 1         32 $self -> log('Traversing');
76              
77 1         2 my($output) = [];
78              
79 1         7 $self -> traverse($self -> root, $output, 0);
80              
81 1         7 return $output;
82              
83             } # End of traverse_file.
84              
85             # -----------------------------------------------
86              
87             1;
88              
89             =head1 NAME
90              
91             HTML::Parser::Simple::Reporter - A sub-class of HTML::Parser::Simple
92              
93             =head1 Synopsis
94              
95             #!/usr/bin/env perl
96              
97             use strict;
98             use warnings;
99              
100             use HTML::Parser::Simple::Reporter;
101              
102             # -------------------------
103              
104             # Method 1:
105              
106             my($p) = HTML::Parser::Simple::Reporter -> new(input_file => 'data/s.1.html');
107             my($s) = $p -> traverse_file;
108              
109             print "$_\n" for @$s;
110              
111             # Method 2:
112              
113             my($p) = HTML::Parser::Simple::Reporter -> new;
114             my($s) = $p -> traverse_file(input_file => 'data/s.1.html');
115              
116             print "$_\n" for @$s;
117              
118             See scripts/traverse.file.pl.
119              
120             =head1 Description
121              
122             C is a pure Perl module.
123              
124             It is a sub-class of L.
125              
126             Specifically, this module overrides the method L, to demonstrate
127             a different way of formatting the output.
128              
129             It parses HTML V 4 files, and generates a tree of nodes, with 1 node per HTML tag.
130              
131             The data associated with each node is documented in the L.
132              
133             See also L and L.
134              
135             =head1 Distributions
136              
137             This module is available as a Unix-style distro (*.tgz).
138              
139             See http://savage.net.au/Perl-modules.html for details.
140              
141             See http://savage.net.au/Perl-modules/html/installing-a-module.html for
142             help on unpacking and installing.
143              
144             =head1 Constructor and initialization
145              
146             new(...) returns an object of type C.
147              
148             This is the class contructor.
149              
150             Usage: C<< HTML::Parser::Simple::Reporter -> new() >>.
151              
152             This method takes a hashref of options.
153              
154             Call C as C<< new({option_1 => value_1, option_2 => value_2, ...}) >>.
155              
156             Available options (each one of which is also a method):
157              
158             =over 4
159              
160             =item o None specific to this class
161              
162             =back
163              
164             But since this class is a sub-class of L, it share all the options to
165             C<< new() >> documented in that class: L.
166              
167             =head1 Methods
168              
169             This module is a sub-class of L, and inherits all its methods.
170              
171             Further, it overrides the L method.
172              
173             =head2 traverse($node, $output, $depth)
174              
175             Returns $output as an arrayref of strings.
176              
177             Traverses the tree built by calling L.
178              
179             Parameters:
180              
181             =over 4
182              
183             =item o $node
184              
185             The node at which to start the traversal. This is normally $self -> root.
186              
187             =item o $output
188              
189             The arrayref in which output is stored. It is normally used like this:
190              
191             my($arrayref) = [];
192              
193             $p -> traverse($p -> root, $arrayref);
194              
195             print "$_\n" for @$arrayref;
196              
197             =item o $depth
198              
199             The depth of $node within the tree. This is normally set to 0.
200              
201             In C<< traverse() >> it is used to indent the output.
202              
203             If not specified, it defaults to 0.
204              
205             =back
206              
207             Lastly note that this method ignores the root of the tree, and hence ignores the DOCTYPE which is stored
208             as an attribute of the root.
209              
210             =head2 traverse_file($input_file_name)
211              
212             Returns an arrayref of formatted text generated from the nodes in the tree built by calling
213             L.
214              
215             Traverses the given file, or the file named in C<< new(input_file => $name) >>, or the file named in
216             C<< input_file($name) >>.
217              
218             Basically it does this (recalling that this class sub-classes L):
219              
220             # Read file and store contents in $html.
221              
222             $self -> parse($html);
223              
224             my($output) = [];
225              
226             $self -> traverse($self -> root, $output, 0);
227              
228             return $output;
229              
230             However, since this class has overridden the L method, the output is
231             not written anywhere, but rather is stored in an arrayref, and returned as the result of this method.
232              
233             Note: The parameter passed in to C<< traverse_file($input_file_name) >>, takes precedence over the
234             I parameter passed in to C<< new() >>, and over the internal value set with
235             C<< input_file($in_file_name) >>.
236              
237             Lastly, the parameter passed in to C<< traverse_file($input_file_name) >> is used to update
238             the internal value set with the I parameter passed in to C<< new() >>,
239             or set with a call to C<< input_file($in_file_name) >>.
240              
241             See the L for sample code. See also scripts/traverse.file.pl.
242              
243             =head1 FAQ
244              
245             See L.
246              
247             =head1 Author
248              
249             C was written by Ron Savage Iron@savage.net.auE> in 2009.
250              
251             Home page: L.
252              
253             =head1 Copyright
254              
255             Australian copyright (c) 2009 Ron Savage.
256              
257             All Programs of mine are 'OSI Certified Open Source Software';
258             you can redistribute them and/or modify them under the terms of
259             The Artistic License, a copy of which is available at:
260             http://www.opensource.org/licenses/index.html
261              
262             =cut