File Coverage

blib/lib/XML/DOM2/Parser.pm
Criterion Covered Total %
statement 56 84 66.6
branch 16 30 53.3
condition 3 6 50.0
subroutine 11 17 64.7
pod 14 14 100.0
total 100 151 66.2


line stmt bran cond sub pod time code
1             package XML::DOM2::Parser;
2              
3             =head1 NAME
4              
5             XML::DOM2::Parser - Sax based xml parser for XML::DOM2
6              
7             =head1 DESCRIPTION
8              
9             This parser was constructed using XML::SAX::PurePerl which
10             Was known at the time to lack a number of calls which where
11             important for dealing with things like document type and
12             text formating and xml decls. hopfully in the future this
13             will be fixed and this method will be able to take advantage
14             of those part of an xml page.
15              
16             =cut
17              
18 3     3   18 use strict;
  3         6  
  3         104  
19 3     3   19 use base qw(XML::SAX::Base);
  3         6  
  3         4320  
20 3     3   90888 use Carp;
  3         10  
  3         4284  
21              
22             =head2 $parser->new( %options )
23              
24             Create a new parser object.
25              
26             =cut
27             sub new
28             {
29 2     2 1 8 my ($proto, %opts) = @_;
30 2         5 $opts{'inline'} = 1;
31 2 50       19 if(not $opts{'document'}) {
32 0         0 croak "Unable to parse xml without document";
33             }
34 2         11 return bless \%opts, $proto;
35             }
36              
37             =head2 $parser->document()
38              
39             Return the document object
40              
41             =cut
42             sub document
43             {
44 28     28 1 44 my ($self) = @_;
45 28         307 return $self->{'document'};
46             }
47              
48             =head2 $parser->start_document( $document )
49              
50             Called at the start of a document.
51              
52             =cut
53             sub start_document {
54 2     2 1 586 my ($self, $doc) = @_;
55 2         9 $self->{'inline'} = 0;
56             }
57              
58             =head2 $parser->end_document()
59              
60             Called at the end of a document.
61              
62             =cut
63             sub end_document {
64 2     2 1 214 my ($self) = @_;
65             }
66              
67             =head2 $parser->start_element( $node )
68              
69             Start a new xml element
70              
71             =cut
72             sub start_element
73             {
74 20     20 1 8871 my ($self, $node) = @_;
75 20         123 $self->text;
76             # ELEMENT
77             # LocalName - The name of the element minus any namespace prefix it may have come with in the document.
78             # NamespaceURI - The URI of the namespace associated with this element, or the empty string for none.
79             # Attributes - A set of attributes as described below.
80             # Name - The name of the element as it was seen in the document (i.e. including any prefix associated with it)
81             # Prefix - The prefix used to qualify this element’s namespace, or the empty string if none.
82              
83 20         21 my $element;
84 20         30 my $parent = $self->{'parent'};
85              
86 20 50 66     145 if(not $parent and not $self->{'inline'}) {
87 2         21 $self->document->doctype->name($node->{'LocalName'});
88             }
89              
90 20 50       57 if( $node->{'LocalName'} ) {
91 20 100       42 if($parent) {
92             # Name spaces
93 18 50       42 my $ns = $self->document->getNamespace( $node->{'Prefix'} ) if $node->{'Prefix'};
94 18 50 33     48 warn "Could not get namespace for node: ".$node->{'Prefix'}."\n" if $node->{'Prefix'} && not defined($ns);
95 18         46 $element = $parent->createChildElement($node->{'LocalName'},
96             document => $self->document,
97             namespace => $ns,
98             );
99             } else {
100             # This would be a root element (document)
101 2         8 $self->{'parents'} = [];
102 2         8 $element = $self->document->createElement( $node->{'LocalName'}, document => $self->document );
103 2         7 $self->document->documentElement($element);
104             # Name spaces, we do this first so later on we don't try adding attributes
105             # into the document element that have namespaces yet to be added in the hash
106             # order (perl!)
107 2         8 my $ns = $self->document->getNamespace( 'xmlns' );
108 2         6 foreach my $a (keys(%{$node->{'Attributes'}})) {
  2         53  
109 0         0 my $attribute = $node->{'Attributes'}->{$a};
110 0 0       0 if($attribute->{'Name'} eq 'xmlns') {
    0          
111             # warn "Namespace ".$attribute->{'Prefix'}.':'.$attribute->{'Name'}.'='.$attribute->{'Value'}." to ".$node->{'Name'}."\n";
112 0         0 $element->setAttribute( $attribute->{'LocalName'}, $attribute->{'Value'} );
113             } elsif($attribute->{'Prefix'} eq 'xmlns') {
114             # warn "NSW ".$attribute->{'Prefix'}.':'.$attribute->{'Name'}.'='.$attribute->{'Value'}." to ".$node->{'Name'}."\n";
115 0         0 $self->document->createNamespace($attribute->{'LocalName'}, $attribute->{'Value'});
116             } else {
117 0         0 next;
118             }
119 0         0 delete($node->{'Attributes'}->{$a});
120             }
121             }
122             }
123              
124             # ATTRIBUTES {}
125             # LocalName - The name of the attribute minus any namespace prefix it may have come with in the document.
126             # NamespaceURI - The URI of the namespace associated with this attribute. If the attribute had no prefix, then this consists of just the empty string.
127             # Name - The attribute’s name as it appeared in the document, including any namespace prefix.
128             # Prefix - The prefix used to qualify this attribute’s namepace, or the empty string if none.
129             # Value - VALUE.
130              
131 20         36 foreach my $attribute (values(%{$node->{'Attributes'}})) {
  20         68  
132 6 50       17 if($attribute->{'Prefix'}) {
133 0         0 my $ns = $self->document->getNamespace( $attribute->{'Prefix'} );
134 0 0       0 if(not $ns) {
135 0         0 warn "Could not get namespace for attribute: ".$attribute->{'Prefix'}." (".$attribute->{'NamespaceURI'}.")\n";
136 0         0 next;
137             }
138 0         0 $element->setAttributeNS( $ns, $attribute->{'LocalName'}, $attribute->{'Value'} );
139             } else {
140 6         51 $element->setAttribute( $attribute->{'LocalName'}, $attribute->{'Value'} );
141             }
142             }
143              
144 20 100       67 push(@{$self->{'parents'}}, $self->{'parent'})if $self->{'parent'};
  18         38  
145 20         82 $self->{'parent'} = $element;
146              
147             }
148              
149             =head2 $parser->end_element( $element )
150              
151             Ends an xml element
152              
153             =cut
154             sub end_element
155             {
156 20     20 1 2956 my ($self, $element) = @_;
157 20         53 $self->text;
158             # ELEMENT
159             # LocalName - The name of the element minus any namespace prefix it may have come with in the document.
160             # NamespaceURI - The URI of the namespace associated with this element, or the empty string for none.
161             # Name - The name of the element as it was seen in the document (i.e. including any prefix associated with it)
162             # Prefix - The prefix used to qualify this element’s namespace, or the empty string if none.
163 20         25 $self->{'parent'} = pop @{$self->{'parents'}};
  20         239  
164             }
165              
166             =head2 $parser->characters()
167              
168             Handle part of a cdata by concatination
169              
170             =cut
171             sub characters
172             {
173 38     38 1 2219 my ($self, $text) = @_;
174              
175 38 50       126 $text = $text->() if ref($text) eq 'CODE';
176             # We wish to keep track of text characters, and
177             # and deal with text once other elements are found
178 38 50       134 $self->{'text'} = '' if not defined($self->{'-text'});
179 38         155 $self->{'text'} .= $text->{'Data'};
180             }
181              
182             =head2 $parser->text()
183              
184             Handle combined text strings as cdata
185              
186             =cut
187             sub text
188             {
189 40     40 1 135 my ($self) = @_;
190 40 100       114 if($self->{'text'}) {
191 38         70 my $text = $self->{'text'};
192 38 100       120 if($text =~ /\S/) {
193 12         128 $self->{'parent'}->cdata($text);
194             }
195 38         106 delete($self->{'text'});
196             }
197             }
198              
199             =head2 $parser->comment()
200              
201             WARNING: Comments are currently removed!
202              
203             =cut
204             sub comment
205             {
206 0     0 1   my ($self, $comment) = @_;
207 0           $self->text;
208             # warn "Comment '".$comment->{'Data'}."'\n";
209             # Data
210             }
211              
212             =head2 $parser->start_cdata()
213              
214             Never used by parser.
215              
216             =cut
217             sub start_cdata
218             {
219 0     0 1   print STDERR "START CDATA\n";
220             }
221              
222             =head2 $parser->end_cdata()
223              
224             Never used by parser.
225              
226             =cut
227             sub end_cdata
228             {
229 0     0 1   print STDERR "END CDATA\n";
230             }
231              
232             =head2 $parser->processing_instruction()
233              
234             Never used by parser.
235              
236             =cut
237             sub processing_instruction
238             {
239 0     0 1   print STDERR "PI\n";
240             }
241              
242             =head2 $parser->doctype_decl( $dtd )
243              
244             We want to store the below details for the document creation
245              
246             =cut
247             sub doctype_decl
248             {
249 0     0 1   my ($self, $dtd) = @_;
250 0           my $doc = $self->document;
251             # Name
252             # SystemId
253             # PublicId
254 0           warn "Setting doctype name to ".$dtd->{'Name'}."\n";
255 0           $doc->doctype->name($dtd->{'Name'});
256 0           $doc->doctype->systemId($dtd->{'SystemId'});
257 0           $doc->doctype->publicId($dtd->{'PublicId'});
258             # $self->{'dtd'} = $dtd;
259             }
260              
261             =head2 $parser->xml_decl( $xml )
262              
263             Decode the xml decleration information.
264              
265             =cut
266             sub xml_decl
267             {
268 0     0 1   my ($self, $xml) = @_;
269 0           my $doc = $self->document;
270             # Version
271             # Encoding
272             # Standalone
273 0           $doc->version($xml->{'Version'});
274 0           $doc->encoding($xml->{'Encoding'});
275 0           $doc->standalone($xml->{'Standalone'});
276             # $self->{'xml'} = $xml;
277             }
278              
279             =head1 COPYRIGHT
280              
281             Martin Owens, doctormo@cpan.org
282              
283             =head1 SEE ALSO
284              
285             L,L
286              
287             =cut
288             1;