File Coverage

lib/CSS/Inliner/TreeBuilder.pm
Criterion Covered Total %
statement 36 36 100.0
branch 4 4 100.0
condition n/a
subroutine 7 7 100.0
pod 2 2 100.0
total 49 49 100.0


line stmt bran cond sub pod time code
1             package CSS::Inliner::TreeBuilder;
2 26     26   172 use strict;
  26         50  
  26         725  
3 26     26   132 use warnings;
  26         51  
  26         794  
4              
5 26     26   131 use Storable qw(dclone);
  26         53  
  26         972  
6              
7       26     BEGIN {
8             # $HTML::TreeBuilder::DEBUG = 1;
9             }
10              
11 26     26   136 use base qw(HTML::TreeBuilder);
  26         38  
  26         17963  
12              
13             =pod
14              
15             =head1 NAME
16              
17             CSS::Inliner::TreeBuilder - Parser that builds a HTML syntax tree
18              
19             =head1 SYNOPSIS
20              
21             use CSS::Inliner::TreeBuilder;
22              
23             foreach my $file_name (@ARGV) {
24             my $tree = CSS::Inliner::TreeBuilder->new();
25             $tree->parse_file($file_name);
26              
27             print "Hey, here's a dump of the parse tree of $file_name:\n";
28             $tree->dump(); # a method we inherit from HTML::Element
29             print "And here it is, bizarrely rerendered as HTML:\n", $tree->as_HTML, "\n";
30              
31             $tree = $tree->delete();
32             }
33              
34             =head1 DESCRIPTION
35              
36             Class to handling parsing of generic HTML
37              
38             This sub-module is derived from HTML::TreeBuilder. The aforementioned module is almost completely incapable
39             of handling non-standard HTML4 documents commonly seen in the wild, let alone HTML5 documents. This module
40             basically performs some minor adjustments to the way parsing and printing occur such that an acceptable result
41             can be reached when handling real world documents.
42              
43             =cut
44              
45             sub as_HTML {
46 25     25 1 62 my $self = shift;
47              
48 25         38 my $html;
49 25 100       95 if ($self->implicit_tags() == 0) {
50 1         31 $html = $self->SUPER::as_HTML(@_);
51              
52             #strip trailing and leading whitespace which our relaxed mode may have
53             #inadvertently adds
54 1         2766 $html =~ s/^\s+|\s+$//g;
55              
56             # our indentation is messed up by 1 space, try to clean it up
57 1         11 my @lines = split /\n/, $html;
58 1         6 for (my $count = 0; $count < scalar @lines; $count++) {
59 9         29 $lines[$count] =~ s/^ //;
60             }
61              
62             # put html back together after whitespace processing, probably still indentation
63             # problems, but this is the best we can do without some sort of indentation library
64 1         6 $html = join("\n",@lines);
65             }
66             else {
67 24         486 $html = $self->SUPER::as_HTML(@_);
68             }
69              
70 25         40130 return $html;
71             }
72              
73             sub parse_content {
74 52     52 1 101 my $self = shift;
75              
76 52 100       206 if ($self->implicit_tags() == 0) {
77             # protect doctype declarations... parser is too strict here
78 28         443 $_[0] =~ s/\]+)\>/\$1 $2<\/decl\>/gi;
79              
80 28         144 $self->SUPER::parse_content(@_);
81              
82 28         133989 $self->{_tag} = '~literal';
83 28         80 $self->{text} = '';
84              
85 28         135 my @decls = $self->look_down('_tag','decl','~pi','1');
86 28         4916 foreach my $decl (@decls) {
87 6         25 my $text = 'as_text() . '>';
88 6         199 my $literal = HTML::Element->new('~literal', 'text' => $text );
89              
90 6         213 $decl->replace_with($literal);
91             }
92             }
93             else {
94 24         373 $self->SUPER::parse_content(@_);
95             }
96              
97 52         122320 return();
98             }
99              
100             1;