| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package YAX::Parser; |
|
2
|
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
41108
|
use strict; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
98
|
|
|
4
|
|
|
|
|
|
|
|
|
5
|
3
|
|
|
3
|
|
796
|
use YAX::Node; |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
80
|
|
|
6
|
3
|
|
|
3
|
|
857
|
use YAX::Text; |
|
|
3
|
|
|
|
|
25
|
|
|
|
3
|
|
|
|
|
96
|
|
|
7
|
3
|
|
|
3
|
|
888
|
use YAX::Element; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
66
|
|
|
8
|
3
|
|
|
3
|
|
460
|
use YAX::Fragment; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
59
|
|
|
9
|
3
|
|
|
3
|
|
1465
|
use YAX::Document; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
107
|
|
|
10
|
3
|
|
|
3
|
|
17
|
use YAX::Constants qw/:all/; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
7385
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
#======================================================================== |
|
13
|
|
|
|
|
|
|
# These regular expressions have been gratefully borrowed from: |
|
14
|
|
|
|
|
|
|
# |
|
15
|
|
|
|
|
|
|
# REX/Perl 1.0 |
|
16
|
|
|
|
|
|
|
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", |
|
17
|
|
|
|
|
|
|
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser |
|
18
|
|
|
|
|
|
|
# University, November, 1998. |
|
19
|
|
|
|
|
|
|
# Copyright (c) 1998, Robert D. Cameron. |
|
20
|
|
|
|
|
|
|
# The following code may be freely used and distributed provided that |
|
21
|
|
|
|
|
|
|
# this copyright and citation notice remains intact and that modifications |
|
22
|
|
|
|
|
|
|
# or additions are clearly identified. |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our $TextSE = "[^<]+"; |
|
25
|
|
|
|
|
|
|
our $UntilHyphen = "[^-]*-"; |
|
26
|
|
|
|
|
|
|
our $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-"; |
|
27
|
|
|
|
|
|
|
our $CommentCE = "$Until2Hyphens>?"; |
|
28
|
|
|
|
|
|
|
our $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; |
|
29
|
|
|
|
|
|
|
our $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>"; |
|
30
|
|
|
|
|
|
|
our $S = "[ \\n\\t\\r]+"; |
|
31
|
|
|
|
|
|
|
our $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; |
|
32
|
|
|
|
|
|
|
our $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; |
|
33
|
|
|
|
|
|
|
our $Name = "(?:$NameStrt)(?:$NameChar)*"; |
|
34
|
|
|
|
|
|
|
our $QuoteSE = "\"[^\"]*\"|'[^']*'"; |
|
35
|
|
|
|
|
|
|
our $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*"; |
|
36
|
|
|
|
|
|
|
our $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; |
|
37
|
|
|
|
|
|
|
our $S1 = "[\\n\\r\\t ]"; |
|
38
|
|
|
|
|
|
|
our $UntilQMs = "[^?]*\\?+"; |
|
39
|
|
|
|
|
|
|
our $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>"; |
|
40
|
|
|
|
|
|
|
our $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S"; |
|
41
|
|
|
|
|
|
|
our $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?"; |
|
42
|
|
|
|
|
|
|
our $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; |
|
43
|
|
|
|
|
|
|
our $PI_CE = "$Name(?:$PI_Tail)?"; |
|
44
|
|
|
|
|
|
|
our $EndTagCE = "$Name(?:$S)?>?"; |
|
45
|
|
|
|
|
|
|
our $AttValSE = "\"[^<\"]*\"|'[^<']*'"; |
|
46
|
|
|
|
|
|
|
our $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?"; |
|
47
|
|
|
|
|
|
|
our $ElementCE = "/(?:$EndTagCE)?|(?:$ElemTagCE)?"; |
|
48
|
|
|
|
|
|
|
our $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|(?:$ElementCE)?)"; |
|
49
|
|
|
|
|
|
|
our $XML_SPE = "$TextSE|$MarkupSPE"; |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
#======================================================================== |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# these have captures for parsing attributes |
|
54
|
|
|
|
|
|
|
our $AttValSE2 = "\"([^<\"]*)\"|'([^<']*)'"; |
|
55
|
|
|
|
|
|
|
our $ElemTagCE2 = "(?:($Name)(?:$S)?=(?:$S)?(?:$AttValSE2))+(?:$S)?/?>?"; |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub new { |
|
58
|
1
|
|
|
1
|
0
|
10
|
my ( $class ) = @_; |
|
59
|
1
|
|
|
|
|
3
|
my $self = bless { }, $class; |
|
60
|
1
|
|
|
|
|
3
|
return $self; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub parse { |
|
64
|
2
|
|
|
2
|
1
|
21
|
my ( $self, $xstr ) = ( shift, shift ); |
|
65
|
2
|
50
|
|
|
|
7
|
return unless $xstr; |
|
66
|
2
|
|
|
|
|
11
|
my @nodes = $self->tokenize( $xstr ); |
|
67
|
|
|
|
|
|
|
|
|
68
|
2
|
|
|
|
|
34
|
my $xdoc = YAX::Document->new(); |
|
69
|
2
|
|
|
|
|
6
|
my @stack = ( $xdoc ); |
|
70
|
2
|
|
|
|
|
4
|
my ( $spec, $elmt ); |
|
71
|
2
|
|
|
|
|
6
|
foreach my $node ( $self->tokenize( $xstr ) ) { |
|
72
|
102
|
|
|
|
|
165
|
$spec = substr( $node, 0, 2 ); |
|
73
|
102
|
100
|
|
|
|
219
|
if ( index( $spec, '<' ) != 0 ) { |
|
74
|
48
|
|
|
|
|
111
|
$self->_mk_text( $node, $stack[-1] ); |
|
75
|
48
|
|
|
|
|
71
|
next; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
54
|
100
|
|
|
|
110
|
if ( $spec eq '' ) { |
|
78
|
22
|
|
|
|
|
24
|
pop @stack; |
|
79
|
22
|
|
|
|
|
30
|
next; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
32
|
100
|
|
|
|
57
|
if ( $spec eq '
|
|
82
|
3
|
|
|
|
|
11
|
$self->_mk_decl( $node, $stack[-1] ); |
|
83
|
3
|
|
|
|
|
8
|
next; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
29
|
100
|
|
|
|
53
|
if ( $spec eq '' ) { |
|
86
|
2
|
|
|
|
|
9
|
$self->_mk_proc( $node, $stack[-1] ); |
|
87
|
2
|
|
|
|
|
4
|
next; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
27
|
|
|
|
|
63
|
$elmt = $self->_mk_elmt( $node, $stack[-1] ); |
|
91
|
27
|
100
|
|
|
|
85
|
push( @stack, $elmt ) unless ( $node =~ m{/>$} ); |
|
92
|
27
|
100
|
|
|
|
55
|
$xdoc->set( $elmt->{id} => $elmt ) if $elmt->{id} |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
2
|
|
|
|
|
28
|
return $xdoc; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub stream { |
|
99
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $xstr, $state ) = ( shift, shift, shift ); |
|
100
|
0
|
|
|
|
|
0
|
my %subs; |
|
101
|
0
|
0
|
0
|
|
|
0
|
if ( @_ == 1 and ref $_[0] eq 'HASH' ) { |
|
102
|
0
|
|
|
|
|
0
|
%subs = %{$_[0]}; |
|
|
0
|
|
|
|
|
0
|
|
|
103
|
|
|
|
|
|
|
} else { |
|
104
|
0
|
|
|
|
|
0
|
%subs = @_; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
0
|
|
0
|
|
|
0
|
my $text = delete $subs{text} || $subs{pass}; |
|
108
|
0
|
|
0
|
|
|
0
|
my $decl = delete $subs{decl} || $subs{pass}; |
|
109
|
0
|
|
0
|
|
|
0
|
my $proc = delete $subs{proc} || $subs{pass}; |
|
110
|
0
|
|
0
|
|
|
0
|
my $elmt = delete $subs{elmt} || $subs{pass}; |
|
111
|
0
|
|
0
|
|
|
0
|
my $elcl = delete $subs{elcl} || $subs{pass}; |
|
112
|
|
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
0
|
my ( $spec, $name, $copy, $atts, %atts ); |
|
114
|
0
|
|
|
|
|
0
|
foreach my $node ( $self->tokenize( $xstr ) ) { |
|
115
|
0
|
|
|
|
|
0
|
$spec = substr( $node, 0, 2 ); |
|
116
|
0
|
0
|
|
|
|
0
|
if ( index( $spec, '<' ) != 0 ) { |
|
117
|
0
|
0
|
|
|
|
0
|
$text && $text->( $state, $node ); |
|
118
|
0
|
|
|
|
|
0
|
next; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
0
|
0
|
|
|
|
0
|
if ( $spec eq '' ) { |
|
121
|
0
|
0
|
|
|
|
0
|
$elcl && $elcl->( $state, substr( $node, 2, -1 ) ); |
|
122
|
0
|
|
|
|
|
0
|
next; |
|
123
|
|
|
|
|
|
|
} |
|
124
|
0
|
0
|
|
|
|
0
|
if ( $spec eq '
|
|
125
|
0
|
0
|
|
|
|
0
|
$decl && $decl->( $state, $node ); |
|
126
|
0
|
|
|
|
|
0
|
next; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
0
|
0
|
|
|
|
0
|
if ( $spec eq '' ) { |
|
129
|
0
|
0
|
|
|
|
0
|
$proc && $proc->( $state, $node ); |
|
130
|
0
|
|
|
|
|
0
|
next; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
0
|
0
|
|
|
|
0
|
$elmt && do { |
|
134
|
0
|
|
|
|
|
0
|
$copy = substr( $node, 1, -1 ); |
|
135
|
0
|
|
|
|
|
0
|
( $name, $atts ) = split( /\s+/, $copy, 2 ); |
|
136
|
0
|
|
|
|
|
0
|
$name =~ s{/$}{}; |
|
137
|
0
|
0
|
|
|
|
0
|
%atts = $atts ? $self->parse_attributes( $atts ) : ( ); |
|
138
|
0
|
|
|
|
|
0
|
$elmt->( $state, $name, %atts ); |
|
139
|
|
|
|
|
|
|
}; |
|
140
|
|
|
|
|
|
|
|
|
141
|
0
|
0
|
|
|
|
0
|
if ( substr( $node, -2 ) eq '/>' ) { |
|
142
|
0
|
0
|
|
|
|
0
|
$elcl && $elcl->( $state, $name ); |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
sub read_file { |
|
148
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $file ) = @_; |
|
149
|
0
|
|
|
|
|
0
|
my $xstr; |
|
150
|
|
|
|
|
|
|
{ |
|
151
|
0
|
0
|
|
|
|
0
|
open FH, $file or return; |
|
|
0
|
|
|
|
|
0
|
|
|
152
|
0
|
|
|
|
|
0
|
local $/ = undef; |
|
153
|
0
|
|
|
|
|
0
|
$xstr = ; |
|
154
|
0
|
|
|
|
|
0
|
close FH; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
0
|
|
|
|
|
0
|
return $xstr; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub parse_file { |
|
160
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $file ) = @_; |
|
161
|
0
|
|
|
|
|
0
|
return $self->parse( $self->read_file( $file ) ); |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub stream_file { |
|
165
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $file, $state, %subs ) = @_; |
|
166
|
0
|
|
|
|
|
0
|
return $self->stream( $self->read_file( $file ), $state, %subs ); |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub parse_as_fragment { |
|
170
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $xstr ) = @_; |
|
171
|
0
|
|
|
|
|
0
|
my $xdoc = $self->parse( ''.$xstr.'' ); |
|
172
|
0
|
|
|
|
|
0
|
my $root = $xdoc->root; |
|
173
|
0
|
|
|
|
|
0
|
my $frag = YAX::Fragment->new; |
|
174
|
0
|
|
|
|
|
0
|
$frag->append( $root->[0] ) while @$root; |
|
175
|
0
|
|
|
|
|
0
|
return $frag; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub parse_file_as_fragment { |
|
179
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $file ) = @_; |
|
180
|
0
|
|
|
|
|
0
|
my $xstr = $self->read_file( $file ); |
|
181
|
0
|
|
|
|
|
0
|
my $frag = $self->parse_as_fragment( $xstr ); |
|
182
|
0
|
|
|
|
|
0
|
return $frag; |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
sub tokenize { |
|
186
|
4
|
|
|
4
|
1
|
9
|
my ( $self, $xstr ) = @_; |
|
187
|
4
|
|
|
|
|
1199
|
return $xstr =~ /$XML_SPE/g; |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub _mk_decl { |
|
191
|
3
|
|
|
3
|
|
7
|
my ( $self, $decl, $parent ) = @_; |
|
192
|
3
|
|
|
|
|
4
|
my ( $type, $name ); |
|
193
|
3
|
|
|
|
|
4
|
my $offset = 1; |
|
194
|
3
|
|
|
|
|
5
|
my $length = length( $decl ); |
|
195
|
|
|
|
|
|
|
|
|
196
|
3
|
100
|
|
|
|
8
|
substr( $decl, 0, 4 ) eq ' |