File Coverage

lib/CSS/Inliner/TreeBuilder.pm
Criterion Covered Total %
statement 36 36 100.0
branch 4 4 100.0
condition n/a
subroutine 7 7 100.0
pod 2 2 100.0
total 49 49 100.0


line stmt bran cond sub pod time code
1             package CSS::Inliner::TreeBuilder;
2 26     26   230 use strict;
  26         60  
  26         783  
3 26     26   155 use warnings;
  26         81  
  26         907  
4              
5 26     26   158 use Storable qw(dclone);
  26         72  
  26         1105  
6              
7       26     BEGIN {
8             # $HTML::TreeBuilder::DEBUG = 1;
9             }
10              
11 26     26   154 use base qw(HTML::TreeBuilder);
  26         49  
  26         20541  
12              
13             =pod
14              
15             =head1 NAME
16              
17             CSS::Inliner::TreeBuilder - Parser that builds a HTML syntax tree
18              
19             =head1 SYNOPSIS
20              
21             use CSS::Inliner::TreeBuilder;
22              
23             foreach my $file_name (@ARGV) {
24             my $tree = CSS::Inliner::TreeBuilder->new();
25             $tree->parse_file($file_name);
26              
27             print "Hey, here's a dump of the parse tree of $file_name:\n";
28             $tree->dump(); # a method we inherit from HTML::Element
29             print "And here it is, bizarrely rerendered as HTML:\n", $tree->as_HTML, "\n";
30              
31             $tree = $tree->delete();
32             }
33              
34             =head1 DESCRIPTION
35              
36             Class to handling parsing of generic HTML
37              
38             This sub-module is derived from HTML::TreeBuilder. The aforementioned module is almost completely incapable
39             of handling non-standard HTML4 documents commonly seen in the wild, let alone HTML5 documents. This module
40             basically performs some minor adjustments to the way parsing and printing occur such that an acceptable result
41             can be reached when handling real world documents.
42              
43             =cut
44              
45             sub as_HTML {
46 25     25 1 77 my $self = shift;
47              
48 25         47 my $html;
49 25 100       104 if ($self->implicit_tags() == 0) {
50 1         29 $html = $self->SUPER::as_HTML(@_);
51              
52             #strip trailing and leading whitespace which our relaxed mode may have
53             #inadvertently adds
54 1         2853 $html =~ s/^\s+|\s+$//g;
55              
56             # our indentation is messed up by 1 space, try to clean it up
57 1         40 my @lines = split /\n/, $html;
58 1         8 for (my $count = 0; $count < scalar @lines; $count++) {
59 9         31 $lines[$count] =~ s/^ //;
60             }
61              
62             # put html back together after whitespace processing, probably still indentation
63             # problems, but this is the best we can do without some sort of indentation library
64 1         5 $html = join("\n",@lines);
65             }
66             else {
67 24         518 $html = $self->SUPER::as_HTML(@_);
68             }
69              
70 25         42474 return $html;
71             }
72              
73             sub parse_content {
74 52     52 1 129 my $self = shift;
75              
76 52 100       205 if ($self->implicit_tags() == 0) {
77             # protect doctype declarations... parser is too strict here
78 28         527 $_[0] =~ s/\]+)\>/\$1 $2<\/decl\>/gi;
79              
80 28         180 $self->SUPER::parse_content(@_);
81              
82 28         139384 $self->{_tag} = '~literal';
83 28         89 $self->{text} = '';
84              
85 28         146 my @decls = $self->look_down('_tag','decl','~pi','1');
86 28         5299 foreach my $decl (@decls) {
87 6         38 my $text = 'as_text() . '>';
88 6         206 my $literal = HTML::Element->new('~literal', 'text' => $text );
89              
90 6         211 $decl->replace_with($literal);
91             }
92             }
93             else {
94 24         1042 $self->SUPER::parse_content(@_);
95             }
96              
97 52         129793 return();
98             }
99              
100             1;