File Coverage

blib/lib/XML/Parser/LiteCopy.pm
Criterion Covered Total %
statement 93 96 96.8
branch 25 30 83.3
condition 6 7 85.7
subroutine 22 24 91.6
pod 3 6 50.0
total 149 163 91.4


line stmt bran cond sub pod time code
1             # NOTE: This module originally came from SOAP::Lite, which you probably
2             # don't have. It was first repackaged here just to avoid the huge
3             # dependancy tree, but this version has several features (CDATA
4             # support, better PI and Comment support) that have been added.
5              
6             #
7             # Copyright (C) 2000-2007 Paul Kulchenko (paulclinger@yahoo.com)
8             # Copyright (C) 2008 Martin Kutter (martin.kutter@fen-net.de)
9             # Copyright (C) 2009-2011 Cal Henderson (cal@iamcal.com)
10             #
11             # SOAP::Lite is free software; you can redistribute it
12             # and/or modify it under the same terms as Perl itself.
13             #
14              
15             package XML::Parser::LiteCopy;
16              
17 9     9   241732 use strict;
  9         19  
  9         354  
18 9     9   66 use vars qw($VERSION);
  9         32  
  9         1930  
19             $VERSION = '0.720.00';
20              
21             my $ReturnErrors = 0;
22              
23             sub new {
24 48     48 1 57505 my $class = shift;
25              
26 48 50       162 return $class if ref $class;
27 48         146 my $self = bless {} => $class;
28              
29 48         129 my %parameters = @_;
30 48         138 $self->setHandlers(); # clear first
31 48 100       66 $self->setHandlers(%{$parameters{Handlers} || {}});
  48         316  
32              
33 48   100     247 $ReturnErrors = $parameters{ReturnErrors} || 0;
34              
35 48         163 return $self;
36             }
37              
38             sub setHandlers {
39 106     106 1 442 my $self = shift;
40              
41             # allow symbolic refs, avoid "subroutine redefined" warnings
42 9     9   48 no strict 'refs'; local $^W;
  9         235  
  9         1642  
  106         317  
43             # clear all handlers if called without parameters
44 106 100       281 if (not @_) {
45 64         151 for (qw(Start End Char Final Init CData Comment Doctype PI Error)) {
46 190     190   3280 *$_ = sub {}
47 640         3092 }
48             }
49              
50             # we could use each here, too...
51 106         308 while (@_) {
52 112         223 my($name, $func) = splice(@_, 0, 2);
53             *$name = defined $func
54             ? $func
55 0     0   0 : sub {}
56 112 50       527 }
57 106         245 return $self;
58             }
59              
60             sub _regexp {
61 9   50 9   43 my $patch = shift || '';
62 9         19 my $package = __PACKAGE__;
63              
64             # This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html
65              
66             # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
67             # Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998.
68             # Copyright (c) 1998, Robert D. Cameron.
69             # The following code may be freely used and distributed provided that
70             # this copyright and citation notice remains intact and that modifications
71             # or additions are clearly identified.
72              
73 9     9   112 use re 'eval';
  9         16  
  9         5511  
74 9         15 my $TextSE = "[^<]+";
75              
76             # the following backrefs have been added:
77             # 1 : TextSE
78             # 2 : MarkupSPE / DeclCE / CommentCE
79             # 3 : MarkupSPE / DeclCE / CDATA_CE
80             # 4 : MarkupSPE / DeclCE / DocTypeCE
81             # 5 : MarkupSPE / PI_CE
82             # 6 : MarkupSPE / EndTagCE
83             # 7+: MarkupSPE / ElemTagCE
84              
85 9         16 my $Until2Hyphens = "(?:[^-]*)-(?:[^-]+-)*-";
86 9         31 my $CommentCE = "($Until2Hyphens)(?{${package}::comment(\$2)})>?";
87              
88 9         20 my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
89 9         27 my $CDATA_CE = "($UntilRSBs(?:[^\\]>]$UntilRSBs)*)(?{${package}::cdata(\$3)})>";
90              
91 9         17 my $S = "[ \\n\\t\\r]+";
92 9         15 my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
93 9         14 my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
94 9         31 my $Name = "(?:$NameStrt)(?:$NameChar)*";
95 9         17 my $QuoteSE = "\"[^\"]*\"|'[^']*'";
96 9         33 my $DT_IdentSE = "$Name(?:$S(?:$Name|$QuoteSE))*";
97 9         23 my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
98 9         12 my $S1 = "[\\n\\r\\t ]";
99 9         16 my $UntilQMs = "[^?]*\\?+";
100              
101 9         28 my $PI_Tail = "\\?|$S1$UntilQMs(?:[^>?]$UntilQMs)*";
102 9         39 my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail>))|%$Name;|$S";
103 9         37 my $DocTypeCE = "$S($DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?)>(?{${package}::_doctype(\$4)})";
104              
105 9         38 my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
106              
107 9         74 my $PI_CE = "($Name(?:$PI_Tail))>(?{${package}::_pi(\$5); undef})";
108              
109             # these expressions were modified for backtracking and events
110              
111 9         33 my $EndTagCE = "($Name)(?{${package}::_end(\$6); undef})(?:$S)?>";
112 9         20 my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'";
113              
114 9         121 my $ElemTagCE = "($Name)"
115             . "(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)"
116             . "(?{[\@{\$^R||[]},\$8=>defined\$9?\$9:\$10]}))*(?:$S)?(/)?>"
117             . "(?{${package}::_start(\$7,\@{\$^R||[]}),\$^R=[]})(?{\$11 and ${package}::_end(\$7); undef})";
118              
119 9         43 my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
120              
121             # Next expression is under "black magic".
122             # Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE',
123             # but it doesn't work under Perl 5.005 and only magic with
124             # (?:....)?? solved the problem.
125             # I would appreciate if someone let me know what is the right thing to do
126             # and what's the reason for all this magic.
127             # Seems like a problem related to (?:....)? rather than to ?{} feature.
128             # Tests are in t/31-xmlparserlite.t if you decide to play with it.
129             #"(?{[]})(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE";
130 9         99 "(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE";
131             }
132              
133             setHandlers();
134              
135             # Try 5.6 and 5.10 regex first
136             my $REGEXP = _regexp('??');
137              
138             sub _parse_re {
139 9     9   59 use re "eval";
  9         51  
  9         1193  
140 57     57   104 undef $^R;
141 57         10521 1 while $_[0] =~ m{$REGEXP}go
142             };
143              
144             # fixup regex if it does not work...
145             {
146             if (not eval { _parse_re('bar'); 1; } ) {
147             $REGEXP = _regexp();
148             local $^W;
149             *_parse_re = sub {
150 9     9   50 use re "eval";
  9         18  
  9         9936  
151             undef $^R;
152             1 while $_[0] =~ m{$REGEXP}go
153             };
154             }
155             }
156              
157             sub parse {
158 48     48 1 277 _init();
159 48         147 _parse_re($_[1]);
160 44         125 _final();
161             }
162              
163             my(@stack, $level);
164              
165             sub _init {
166 48     48   90 @stack = ();
167 48         89 $level = 0;
168 48         139 Init(__PACKAGE__, @_);
169             }
170              
171             sub _final {
172 44 100   44   125 return _error("not properly closed tag '$stack[-1]'") if @stack;
173 42 100       126 return _error("no element found") unless $level;
174 40         100 Final(__PACKAGE__, @_)
175             }
176              
177             sub _start {
178 80 100 100 80   343 return _error("multiple roots, wrong element '$_[0]'") if $level++ && !@stack;
179 78         194 push(@stack, $_[0]);
180 78         194 Start(__PACKAGE__, @_);
181             }
182              
183             sub _char {
184 65 100   65   237 Char(__PACKAGE__, $_[0]), return if @stack;
185              
186             # check for junk before or after element
187             # can't use split or regexp due to limitations in ?{} implementation,
188             # will iterate with loop, but we'll do it no more than two times, so
189             # it shouldn't affect performance
190 16         101 for (my $i=0; $i < length $_[0]; $i++) {
191 24 100       458 return _error("junk '$_[0]' @{[$level ? 'after' : 'before']} XML element")
  4 100       23  
192             if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there
193             }
194             }
195              
196             sub _end {
197 76 100   76   225 return _error("unexpected closing tag '$_[0]'") if !@stack;
198 75 100       272 pop(@stack) eq $_[0] or return _error("mismatched tag '$_[0]'");
199 72         341 End(__PACKAGE__, $_[0]);
200             }
201              
202             sub comment {
203 9     9 0 35 Comment(__PACKAGE__, substr $_[0], 0, -2);
204             }
205              
206             sub end {
207 0 0   0 0 0 pop(@stack) eq $_[0] or return _error("mismatched tag '$_[0]'");
208 0         0 End(__PACKAGE__, $_[0]);
209             }
210              
211             sub cdata {
212 9 50   9 0 22 return _error("CDATA outside of tag stack") unless @stack;
213 9         38 CData(__PACKAGE__, substr $_[0], 0, -2);
214             }
215              
216             sub _doctype {
217 1     1   5 Doctype(__PACKAGE__, $_[0]);
218             }
219              
220             sub _pi {
221 6     6   27 PI(__PACKAGE__, substr $_[0], 0, -1);
222             }
223              
224             sub _error {
225 14 100   14   48 if ($ReturnErrors){
226 8         23 Error(__PACKAGE__, $_[0]);
227 8         225 return;
228             }
229 6         42 die "$_[0]\n";
230             }
231              
232             # ======================================================================
233             1;
234              
235             __END__