| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # ====================================================================== | 
| 2 |  |  |  |  |  |  | # | 
| 3 |  |  |  |  |  |  | # Copyright (C) 2000-2007 Paul Kulchenko (paulclinger@yahoo.com) | 
| 4 |  |  |  |  |  |  | # Copyright (C) 2008 Martin Kutter (martin.kutter@fen-net.de) | 
| 5 |  |  |  |  |  |  | # XML::Parser::Lite is free software; you can redistribute it | 
| 6 |  |  |  |  |  |  | # and/or modify it under the same terms as Perl itself. | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | # ====================================================================== | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | package XML::Parser::Lite; | 
| 11 |  |  |  |  |  |  |  | 
| 12 | 2 |  |  | 2 |  | 1374 | use 5.006; | 
|  | 2 |  |  |  |  | 6 |  | 
|  | 2 |  |  |  |  | 71 |  | 
| 13 | 2 |  |  | 2 |  | 7 | use strict; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 63 |  | 
| 14 | 2 |  |  | 2 |  | 9 | use warnings; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 258 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | our $VERSION = '0.720_01'; | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | sub new { | 
| 19 | 6 |  |  | 6 | 1 | 1312 | my $class = shift; | 
| 20 |  |  |  |  |  |  |  | 
| 21 | 6 | 100 |  |  |  | 22 | return $class if ref $class; | 
| 22 | 5 |  |  |  |  | 15 | my $self = bless {} => $class; | 
| 23 |  |  |  |  |  |  |  | 
| 24 | 5 |  |  |  |  | 11 | my %parameters = @_; | 
| 25 | 5 |  |  |  |  | 9 | $self->setHandlers(); # clear first | 
| 26 | 5 | 100 |  |  |  | 4 | $self->setHandlers(%{$parameters{Handlers} || {}}); | 
|  | 5 |  |  |  |  | 28 |  | 
| 27 |  |  |  |  |  |  |  | 
| 28 | 5 |  |  |  |  | 17 | return $self; | 
| 29 |  |  |  |  |  |  | } | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | sub setHandlers { | 
| 32 | 17 |  |  | 17 | 1 | 232 | my $self = shift; | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | # allow symbolic refs, avoid "subroutine redefined" warnings | 
| 35 | 2 |  |  | 2 |  | 10 | no strict 'refs'; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 61 |  | 
| 36 | 2 |  |  | 2 |  | 8 | no warnings qw(redefine); | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 306 |  | 
| 37 |  |  |  |  |  |  | # clear all handlers if called without parameters | 
| 38 | 17 | 100 |  |  |  | 38 | if (not @_) { | 
| 39 | 11 |  |  |  |  | 17 | for (qw(Start End Char Final Init Comment Doctype XMLDecl)) { | 
| 40 | 337 |  |  | 337 |  | 3608 | *$_ = sub {} | 
| 41 | 88 |  |  |  |  | 231 | } | 
| 42 |  |  |  |  |  |  | } | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | # we could use each here, too... | 
| 45 | 17 |  |  |  |  | 31 | while (@_) { | 
| 46 | 16 |  |  |  |  | 22 | my($name, $func) = splice(@_, 0, 2); | 
| 47 |  |  |  |  |  |  | *$name = defined $func | 
| 48 |  |  |  |  |  |  | ? $func | 
| 49 | 2 |  |  | 2 |  | 47 | : sub {} | 
| 50 | 16 | 100 |  |  |  | 57 | } | 
| 51 | 17 |  |  |  |  | 19 | return $self; | 
| 52 |  |  |  |  |  |  | } | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | sub _regexp { | 
| 55 | 2 |  | 50 | 2 |  | 11 | my $patch = shift || ''; | 
| 56 | 2 |  |  |  |  | 3 | my $package = __PACKAGE__; | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | # This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", | 
| 61 |  |  |  |  |  |  | # Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998. | 
| 62 |  |  |  |  |  |  | # Copyright (c) 1998, Robert D. Cameron. | 
| 63 |  |  |  |  |  |  | # The following code may be freely used and distributed provided that | 
| 64 |  |  |  |  |  |  | # this copyright and citation notice remains intact and that modifications | 
| 65 |  |  |  |  |  |  | # or additions are clearly identified. | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | # Modifications may be tracked on SOAP::Lite's SVN at | 
| 68 |  |  |  |  |  |  | # https://soaplite.svn.sourceforge.net/svnroot/soaplite/ | 
| 69 |  |  |  |  |  |  | # | 
| 70 | 2 |  |  | 2 |  | 9 | use re 'eval'; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 117 |  | 
| 71 | 2 |  |  |  |  | 3 | my $TextSE = "[^<]+"; | 
| 72 | 2 |  |  |  |  | 3 | my $UntilHyphen = "[^-]*-"; | 
| 73 | 2 |  |  | 2 |  | 920 | my $Until2Hyphens = "([^-]*)-(?:[^-]$[^-]*-)*-"; | 
|  | 2 |  |  |  |  | 740 |  | 
|  | 2 |  |  |  |  | 668 |  | 
|  | 2 |  |  |  |  | 12 |  | 
| 74 |  |  |  |  |  |  | #my $CommentCE = "$Until2Hyphens(?{${package}::comment(\$2)})>?"; | 
| 75 | 2 |  |  |  |  | 5 | my $CommentCE = "(.+)--(?{${package}::comment(\$2)})>?"; | 
| 76 |  |  |  |  |  |  | #    my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-"; | 
| 77 |  |  |  |  |  |  | #    my $CommentCE = "$Until2Hyphens>?"; | 
| 78 | 2 |  |  |  |  | 3 | my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; | 
| 79 | 2 |  |  |  |  | 5 | my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>"; | 
| 80 | 2 |  |  |  |  | 3 | my $S = "[ \\n\\t\\r]+"; | 
| 81 | 2 |  |  |  |  | 2 | my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; | 
| 82 | 2 |  |  |  |  | 4 | my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; | 
| 83 | 2 |  |  |  |  | 3 | my $Name = "(?:$NameStrt)(?:$NameChar)*"; | 
| 84 | 2 |  |  |  |  | 3 | my $QuoteSE = "\"[^\"]*\"|'[^']*'"; | 
| 85 | 2 |  |  |  |  | 6 | my $DT_IdentSE = "$Name(?:$S(?:$Name|$QuoteSE))*"; | 
| 86 |  |  |  |  |  |  | #    my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*"; | 
| 87 | 2 |  |  |  |  | 4 | my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; | 
| 88 | 2 |  |  |  |  | 2 | my $S1 = "[\\n\\r\\t ]"; | 
| 89 | 2 |  |  |  |  | 27 | my $UntilQMs = "[^?]*\\?"; | 
| 90 | 2 |  |  |  |  | 5 | my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*"; | 
| 91 | 2 |  |  |  |  | 16 | my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail>))|%$Name;|$S"; | 
| 92 | 2 |  |  |  |  | 7 | my $DocTypeCE = "$S($DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?)>(?{${package}::_doctype(\$3)})"; | 
| 93 |  |  |  |  |  |  | #    my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>"; | 
| 94 |  |  |  |  |  |  | #    my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S"; | 
| 95 |  |  |  |  |  |  | #    my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?"; | 
| 96 | 2 |  |  |  |  | 7 | my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; | 
| 97 |  |  |  |  |  |  | #    my $PI_CE = "$Name(?:$PI_Tail)?"; | 
| 98 | 2 |  |  |  |  | 6 | my $PI_CE = "($Name(?:$PI_Tail))>(?{${package}::_xmldecl(\$5)})"; | 
| 99 |  |  |  |  |  |  | # these expressions were modified for backtracking and events | 
| 100 |  |  |  |  |  |  | #    my $EndTagCE = "($Name)(?{${package}::_end(\$2)})(?:$S)?>"; | 
| 101 | 2 |  |  |  |  | 4 | my $EndTagCE = "($Name)(?{${package}::_end(\$6)})(?:$S)?>"; | 
| 102 | 2 |  |  |  |  | 3 | my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'"; | 
| 103 |  |  |  |  |  |  | #    my $ElemTagCE = "($Name)(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)(?{[\@{\$^R||[]},\$4=>defined\$5?\$5:\$6]}))*(?:$S)?(/)?>(?{${package}::_start( \$3,\@{\$^R||[]})})(?{\${7} and ${package}::_end(\$3)})"; | 
| 104 | 2 |  |  |  |  | 12 | my $ElemTagCE = "($Name)" | 
| 105 |  |  |  |  |  |  | . "(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)" | 
| 106 |  |  |  |  |  |  | . "(?{[\@{\$^R||[]},\$8=>defined\$9?\$9:\$10]}))*(?:$S)?(/)?>" | 
| 107 |  |  |  |  |  |  | . "(?{${package}::_start(\$7,\@{\$^R||[]})})(?{\$11 and ${package}::_end(\$7)})"; | 
| 108 |  |  |  |  |  |  |  | 
| 109 | 2 |  |  |  |  | 5 | my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)"; | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | # Next expression is under "black magic". | 
| 112 |  |  |  |  |  |  | # Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE', | 
| 113 |  |  |  |  |  |  | # but it doesn't work under Perl 5.005 and only magic with | 
| 114 |  |  |  |  |  |  | # (?:....)?? solved the problem. | 
| 115 |  |  |  |  |  |  | # I would appreciate if someone let me know what is the right thing to do | 
| 116 |  |  |  |  |  |  | # and what's the reason for all this magic. | 
| 117 |  |  |  |  |  |  | # Seems like a problem related to (?:....)? rather than to ?{} feature. | 
| 118 |  |  |  |  |  |  | # Tests are in t/31-xmlparserlite.t if you decide to play with it. | 
| 119 |  |  |  |  |  |  | #"(?{[]})(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE"; | 
| 120 | 2 |  |  |  |  | 11 | "(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE"; | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | setHandlers(); | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | # Try 5.6 and 5.10 regex first | 
| 126 |  |  |  |  |  |  | my $REGEXP = _regexp('??'); | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | sub _parse_re { | 
| 129 | 2 |  |  | 2 |  | 10 | use re "eval"; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 75 |  | 
| 130 | 25 |  |  | 25 |  | 24 | undef $^R; | 
| 131 | 2 |  |  | 2 |  | 7 | no strict 'refs'; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 173 |  | 
| 132 | 25 |  |  |  |  | 1639 | 1 while $_[0] =~ m{$REGEXP}go | 
| 133 |  |  |  |  |  |  | }; | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | # fixup regex if it does not work... | 
| 136 |  |  |  |  |  |  | { | 
| 137 |  |  |  |  |  |  | if (not eval { _parse_re('bar'); 1; } ) { | 
| 138 |  |  |  |  |  |  | $REGEXP = _regexp(); | 
| 139 |  |  |  |  |  |  | local $^W; | 
| 140 |  |  |  |  |  |  | *_parse_re = sub { | 
| 141 | 2 |  |  | 2 |  | 10 | use re "eval"; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 689 |  | 
| 142 |  |  |  |  |  |  | undef $^R; | 
| 143 |  |  |  |  |  |  | 1 while $_[0] =~ m{$REGEXP}go | 
| 144 |  |  |  |  |  |  | }; | 
| 145 |  |  |  |  |  |  | } | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | sub parse { | 
| 149 | 23 |  |  | 23 | 1 | 2598 | _init(); | 
| 150 | 23 |  |  |  |  | 35 | _parse_re($_[1]); | 
| 151 | 14 |  |  |  |  | 20 | _final(); | 
| 152 |  |  |  |  |  |  | } | 
| 153 |  |  |  |  |  |  |  | 
| 154 |  |  |  |  |  |  | my(@stack, $level); | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | sub _init { | 
| 157 | 23 |  |  | 23 |  | 29 | @stack = (); | 
| 158 | 23 |  |  |  |  | 23 | $level = 0; | 
| 159 | 23 |  |  |  |  | 37 | Init(__PACKAGE__, @_); | 
| 160 |  |  |  |  |  |  | } | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | sub _final { | 
| 163 | 14 | 100 |  | 14 |  | 41 | die "not properly closed tag '$stack[-1]'\n" if @stack; | 
| 164 | 11 | 100 |  |  |  | 62 | die "no element found\n" unless $level; | 
| 165 | 8 |  |  |  |  | 39 | Final(__PACKAGE__, @_) | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  |  | 
| 168 |  |  |  |  |  |  | sub _start { | 
| 169 | 106 | 100 | 100 | 106 |  | 347 | die "multiple roots, wrong element '$_[0]'\n" if $level++ && !@stack; | 
| 170 | 104 |  |  |  |  | 149 | push(@stack, $_[0]); | 
| 171 | 104 |  |  |  |  | 133 | my $r=Start(__PACKAGE__, @_); | 
| 172 | 104 | 50 |  |  |  | 2085 | return ref($r) eq 'ARRAY' ? $r : undef; | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | sub _char { | 
| 176 | 170 | 100 |  | 170 |  | 361 | Char(__PACKAGE__, $_[0]), return if @stack; | 
| 177 |  |  |  |  |  |  |  | 
| 178 |  |  |  |  |  |  | # check for junk before or after element | 
| 179 |  |  |  |  |  |  | # can't use split or regexp due to limitations in ?{} implementation, | 
| 180 |  |  |  |  |  |  | # will iterate with loop, but we'll do it no more than two times, so | 
| 181 |  |  |  |  |  |  | # it shouldn't affect performance | 
| 182 | 11 |  |  |  |  | 40 | for (my $i=0; $i < length $_[0]; $i++) { | 
| 183 | 21 | 100 |  |  |  | 215 | die "junk '$_[0]' @{[$level ? 'after' : 'before']} XML element\n" | 
|  | 4 | 100 |  |  |  | 57 |  | 
| 184 |  |  |  |  |  |  | if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  | } | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | sub _end { | 
| 189 | 2 |  |  | 2 |  | 9 | no warnings qw(uninitialized); | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 481 |  | 
| 190 | 100 | 100 |  | 100 |  | 235 | pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n"; | 
| 191 | 97 |  |  |  |  | 136 | my $r=End(__PACKAGE__, $_[0]); | 
| 192 | 97 | 50 |  |  |  | 2104 | return ref($r) eq 'ARRAY' ? $r : undef; | 
| 193 |  |  |  |  |  |  | } | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  | sub comment { | 
| 196 | 2 |  |  | 2 | 0 | 6 | my $r=Comment(__PACKAGE__, $_[0]); | 
| 197 | 2 | 50 |  |  |  | 47 | return ref($r) eq 'ARRAY' ? $r : undef; | 
| 198 |  |  |  |  |  |  | } | 
| 199 |  |  |  |  |  |  |  | 
| 200 |  |  |  |  |  |  | sub end { | 
| 201 | 0 | 0 |  | 0 | 0 | 0 | pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n"; | 
| 202 | 0 |  |  |  |  | 0 | my $r=End(__PACKAGE__, $_[0]); | 
| 203 | 0 | 0 |  |  |  | 0 | return ref($r) eq 'ARRAY' ? $r : undef; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | sub _doctype { | 
| 207 | 1 |  |  | 1 |  | 3 | my $r=Doctype(__PACKAGE__, $_[0]); | 
| 208 | 1 | 50 |  |  |  | 26 | return ref($r) eq 'ARRAY' ? $r : undef; | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | sub _xmldecl { | 
| 212 | 2 |  |  | 2 |  | 10 | XMLDecl(__PACKAGE__, $_[0]); | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  | # ====================================================================== | 
| 218 |  |  |  |  |  |  | 1; | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | __END__ |