File Coverage

blib/lib/XML/Parser/Lite.pm
Criterion Covered Total %
statement 105 108 97.2
branch 26 34 76.4
condition 4 5 80.0
subroutine 26 27 96.3
pod 3 5 60.0
total 164 179 91.6


line stmt bran cond sub pod time code
1             # ======================================================================
2             #
3             # Copyright (C) 2000-2007 Paul Kulchenko (paulclinger@yahoo.com)
4             # Copyright (C) 2008 Martin Kutter (martin.kutter@fen-net.de)
5             # XML::Parser::Lite is free software; you can redistribute it
6             # and/or modify it under the same terms as Perl itself.
7             #
8             # ======================================================================
9              
10             package XML::Parser::Lite;
11              
12 2     2   1374 use 5.006;
  2         6  
  2         71  
13 2     2   7 use strict;
  2         3  
  2         63  
14 2     2   9 use warnings;
  2         2  
  2         258  
15              
16             our $VERSION = '0.720_01';
17              
18             sub new {
19 6     6 1 1312 my $class = shift;
20              
21 6 100       22 return $class if ref $class;
22 5         15 my $self = bless {} => $class;
23              
24 5         11 my %parameters = @_;
25 5         9 $self->setHandlers(); # clear first
26 5 100       4 $self->setHandlers(%{$parameters{Handlers} || {}});
  5         28  
27              
28 5         17 return $self;
29             }
30              
31             sub setHandlers {
32 17     17 1 232 my $self = shift;
33              
34             # allow symbolic refs, avoid "subroutine redefined" warnings
35 2     2   10 no strict 'refs';
  2         2  
  2         61  
36 2     2   8 no warnings qw(redefine);
  2         2  
  2         306  
37             # clear all handlers if called without parameters
38 17 100       38 if (not @_) {
39 11         17 for (qw(Start End Char Final Init Comment Doctype XMLDecl)) {
40 337     337   3608 *$_ = sub {}
41 88         231 }
42             }
43              
44             # we could use each here, too...
45 17         31 while (@_) {
46 16         22 my($name, $func) = splice(@_, 0, 2);
47             *$name = defined $func
48             ? $func
49 2     2   47 : sub {}
50 16 100       57 }
51 17         19 return $self;
52             }
53              
54             sub _regexp {
55 2   50 2   11 my $patch = shift || '';
56 2         3 my $package = __PACKAGE__;
57              
58             # This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html
59              
60             # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
61             # Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998.
62             # Copyright (c) 1998, Robert D. Cameron.
63             # The following code may be freely used and distributed provided that
64             # this copyright and citation notice remains intact and that modifications
65             # or additions are clearly identified.
66              
67             # Modifications may be tracked on SOAP::Lite's SVN at
68             # https://soaplite.svn.sourceforge.net/svnroot/soaplite/
69             #
70 2     2   9 use re 'eval';
  2         3  
  2         117  
71 2         3 my $TextSE = "[^<]+";
72 2         3 my $UntilHyphen = "[^-]*-";
73 2     2   920 my $Until2Hyphens = "([^-]*)-(?:[^-]$[^-]*-)*-";
  2         740  
  2         668  
  2         12  
74             #my $CommentCE = "$Until2Hyphens(?{${package}::comment(\$2)})>?";
75 2         5 my $CommentCE = "(.+)--(?{${package}::comment(\$2)})>?";
76             # my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
77             # my $CommentCE = "$Until2Hyphens>?";
78 2         3 my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
79 2         5 my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
80 2         3 my $S = "[ \\n\\t\\r]+";
81 2         2 my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
82 2         4 my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
83 2         3 my $Name = "(?:$NameStrt)(?:$NameChar)*";
84 2         3 my $QuoteSE = "\"[^\"]*\"|'[^']*'";
85 2         6 my $DT_IdentSE = "$Name(?:$S(?:$Name|$QuoteSE))*";
86             # my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
87 2         4 my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
88 2         2 my $S1 = "[\\n\\r\\t ]";
89 2         27 my $UntilQMs = "[^?]*\\?";
90 2         5 my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*";
91 2         16 my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail>))|%$Name;|$S";
92 2         7 my $DocTypeCE = "$S($DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?)>(?{${package}::_doctype(\$3)})";
93             # my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
94             # my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
95             # my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
96 2         7 my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
97             # my $PI_CE = "$Name(?:$PI_Tail)?";
98 2         6 my $PI_CE = "($Name(?:$PI_Tail))>(?{${package}::_xmldecl(\$5)})";
99             # these expressions were modified for backtracking and events
100             # my $EndTagCE = "($Name)(?{${package}::_end(\$2)})(?:$S)?>";
101 2         4 my $EndTagCE = "($Name)(?{${package}::_end(\$6)})(?:$S)?>";
102 2         3 my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'";
103             # my $ElemTagCE = "($Name)(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)(?{[\@{\$^R||[]},\$4=>defined\$5?\$5:\$6]}))*(?:$S)?(/)?>(?{${package}::_start( \$3,\@{\$^R||[]})})(?{\${7} and ${package}::_end(\$3)})";
104 2         12 my $ElemTagCE = "($Name)"
105             . "(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)"
106             . "(?{[\@{\$^R||[]},\$8=>defined\$9?\$9:\$10]}))*(?:$S)?(/)?>"
107             . "(?{${package}::_start(\$7,\@{\$^R||[]})})(?{\$11 and ${package}::_end(\$7)})";
108              
109 2         5 my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
110              
111             # Next expression is under "black magic".
112             # Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE',
113             # but it doesn't work under Perl 5.005 and only magic with
114             # (?:....)?? solved the problem.
115             # I would appreciate if someone let me know what is the right thing to do
116             # and what's the reason for all this magic.
117             # Seems like a problem related to (?:....)? rather than to ?{} feature.
118             # Tests are in t/31-xmlparserlite.t if you decide to play with it.
119             #"(?{[]})(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE";
120 2         11 "(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE";
121             }
122              
123             setHandlers();
124              
125             # Try 5.6 and 5.10 regex first
126             my $REGEXP = _regexp('??');
127              
128             sub _parse_re {
129 2     2   10 use re "eval";
  2         4  
  2         75  
130 25     25   24 undef $^R;
131 2     2   7 no strict 'refs';
  2         3  
  2         173  
132 25         1639 1 while $_[0] =~ m{$REGEXP}go
133             };
134              
135             # fixup regex if it does not work...
136             {
137             if (not eval { _parse_re('bar'); 1; } ) {
138             $REGEXP = _regexp();
139             local $^W;
140             *_parse_re = sub {
141 2     2   10 use re "eval";
  2         3  
  2         689  
142             undef $^R;
143             1 while $_[0] =~ m{$REGEXP}go
144             };
145             }
146             }
147              
148             sub parse {
149 23     23 1 2598 _init();
150 23         35 _parse_re($_[1]);
151 14         20 _final();
152             }
153              
154             my(@stack, $level);
155              
156             sub _init {
157 23     23   29 @stack = ();
158 23         23 $level = 0;
159 23         37 Init(__PACKAGE__, @_);
160             }
161              
162             sub _final {
163 14 100   14   41 die "not properly closed tag '$stack[-1]'\n" if @stack;
164 11 100       62 die "no element found\n" unless $level;
165 8         39 Final(__PACKAGE__, @_)
166             }
167              
168             sub _start {
169 106 100 100 106   347 die "multiple roots, wrong element '$_[0]'\n" if $level++ && !@stack;
170 104         149 push(@stack, $_[0]);
171 104         133 my $r=Start(__PACKAGE__, @_);
172 104 50       2085 return ref($r) eq 'ARRAY' ? $r : undef;
173             }
174              
175             sub _char {
176 170 100   170   361 Char(__PACKAGE__, $_[0]), return if @stack;
177              
178             # check for junk before or after element
179             # can't use split or regexp due to limitations in ?{} implementation,
180             # will iterate with loop, but we'll do it no more than two times, so
181             # it shouldn't affect performance
182 11         40 for (my $i=0; $i < length $_[0]; $i++) {
183 21 100       215 die "junk '$_[0]' @{[$level ? 'after' : 'before']} XML element\n"
  4 100       57  
184             if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there
185             }
186             }
187              
188             sub _end {
189 2     2   9 no warnings qw(uninitialized);
  2         3  
  2         481  
190 100 100   100   235 pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n";
191 97         136 my $r=End(__PACKAGE__, $_[0]);
192 97 50       2104 return ref($r) eq 'ARRAY' ? $r : undef;
193             }
194              
195             sub comment {
196 2     2 0 6 my $r=Comment(__PACKAGE__, $_[0]);
197 2 50       47 return ref($r) eq 'ARRAY' ? $r : undef;
198             }
199              
200             sub end {
201 0 0   0 0 0 pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n";
202 0         0 my $r=End(__PACKAGE__, $_[0]);
203 0 0       0 return ref($r) eq 'ARRAY' ? $r : undef;
204             }
205              
206             sub _doctype {
207 1     1   3 my $r=Doctype(__PACKAGE__, $_[0]);
208 1 50       26 return ref($r) eq 'ARRAY' ? $r : undef;
209             }
210              
211             sub _xmldecl {
212 2     2   10 XMLDecl(__PACKAGE__, $_[0]);
213             }
214              
215              
216              
217             # ======================================================================
218             1;
219              
220             __END__