File Coverage

blib/lib/HTML/Doctype.pm
Criterion Covered Total %
statement 15 66 22.7
branch 0 30 0.0
condition 0 9 0.0
subroutine 5 14 35.7
pod n/a
total 20 119 16.8


line stmt bran cond sub pod time code
1             package HTML::Doctype;
2 1     1   55324 use 5.008;
  1         4  
  1         40  
3 1     1   6 use strict;
  1         2  
  1         37  
4 1     1   5 use warnings;
  1         7  
  1         63  
5            
6             our $VERSION = '0.02';
7            
8             package HTML::Doctype::Detector;
9 1     1   5 use strict;
  1         1  
  1         44  
10 1     1   6 use warnings;
  1         1  
  1         5810  
11            
12             sub new
13             {
14 0     0     my $class = shift;
15 0           my $p = shift;
16 0           bless { p => $p }, $class;
17             }
18            
19             sub _type
20             {
21 0     0     my $self = shift;
22 0           my $type = shift;
23 0           my $doct = shift;
24            
25 0           my $p = $self->{p};
26            
27 0           $self->{type} = $type;
28 0           $self->{location} = $p->get_location;
29 0           $self->{doctype} = $doct;
30            
31 0           $p->halt;
32 0           return;
33             }
34            
35 0     0     sub public_id { shift->{doctype}{ExternalId}{PublicId} }
36 0     0     sub system_id { shift->{doctype}{ExternalId}{SystemId} }
37            
38 0     0     sub has_doctype { defined $_[0]->{doctype} }
39 0 0   0     sub is_xhtml { defined $_[0]->{type} and $_[0]->{type} eq "XHTML" }
40            
41             # fails if
42             #
43             # * the first [ in the decl does not open the internal subset
44             # * the internal subset contains ]>
45             # * comments outside the internal subset contain >
46             # * the internal subset is not closed with ]>
47             #
48             sub doctype_length
49             {
50 0     0     my $self = shift;
51 0           my $document = shift;
52 0           my $doctyped = $self->{doctype};
53 0           my $location = $self->{location};
54 0           my $gtpos = 0;
55 0           my $gtskip = 0;
56            
57             # no document type declaration
58 0 0         return $gtpos unless defined $doctyped;
59            
60             # < of
61 0           my $ltpos = $location->{EntityOffset} - 9;
62            
63 0           $gtpos = index $document, ">", $ltpos;
64            
65             # malformed doctype, missing >
66 0 0         return if $gtpos < 0;
67            
68 0 0 0       $gtskip += $doctyped->{ExternalId}{PublicId} =~ />/g
69             if defined $doctyped and exists $doctyped->{ExternalId}{PublicId};
70            
71 0 0 0       $gtskip += $doctyped->{ExternalId}{SystemId} =~ />/g
72             if defined $doctyped and exists $doctyped->{ExternalId}{SystemId};
73            
74 0           $gtpos = index $document, ">", $gtpos # +1 ?
75             while $gtskip--;
76            
77             # malformed doctype, missing proper >
78 0 0         return if $gtpos < 0;
79            
80             # extract possible doctype
81 0           my $text = substr $document, $ltpos, $gtpos - $ltpos + 1;
82            
83             # look for ]> if suspected internal subset
84 0 0         if (index($text, "[") >= 0)
85             {
86 0           my $gtpos2 = index $document, "]>", $ltpos;
87 0 0         $gtpos = $gtpos2 + 1 if $gtpos2 >= 0;
88             }
89            
90 0           return $gtpos - $ltpos + 1
91             }
92            
93             sub start_dtd
94             {
95 0     0     my $self = shift;
96 0           my $doct = shift;
97            
98             # ignore specified document type declarations without
99             # public or system identifier and implied document type
100             # declarations (which have just a GeneratedSystemId key)
101 0 0 0       return unless exists $doct->{ExternalId}{PublicId} or
102             exists $doct->{ExternalId}{SystemId};
103            
104 0           my $puid = $doct->{ExternalId}{PublicId};
105            
106             # no public identifier means HTML
107 0 0         return $self->_type("HTML", $doct) unless defined $puid;
108            
109             # split public identifier at //
110 0           my @comp = split(/\/\//, $puid);
111            
112             # malformed public identifiers mean HTML
113 0 0         return $self->_type("HTML", $doct) unless @comp > 2;
114            
115             # we might want something different than \s and \S here
116             # but it is not clear to me what exactly we should expect
117 0 0         return $self->_type("HTML", $doct) unless $comp[2] =~ /^DTD\s+(\S+)/;
118            
119             # the first token of the public text description must include
120             # the string "XHTML", see XHTML M12N section 3.1, and see also
121             # http://w3.org/mid/41584c61.156809450@smtp.bjoern.hoehrmann.de
122 0 0         return $self->_type("HTML", $doct) unless $1 =~ /XHTML/;
123            
124             # otherwise considers this document XHTML
125 0           return $self->_type("XHTML", $doct)
126             }
127            
128             sub start_element
129             {
130 0     0     my $self = shift;
131 0           my $elem = shift;
132            
133             # no xmlns attribute means HTML
134 0 0         return $self->_type("HTML") unless exists $elem->{Attributes}{XMLNS};
135            
136 0           my $xmlns = $elem->{Attributes}{XMLNS};
137            
138             # this should use the corresponding helper function to deal
139             # with some potential edge cases but it is not in CVS yet
140 0 0         return $self->_type("HTML") unless $xmlns->{Defaulted} eq "specified";
141            
142             # see above
143             # return $self->_type("HTML") unless "http://www.w3.org/1999/xhtml" eq
144             # join '', map { $_->{Data} } @{$xmlns->{CdataChunks}};
145            
146 0           return $self->_type("XHTML")
147             }
148            
149             1;
150            
151             __END__