File Coverage

lib/YAX/Parser.pm
Criterion Covered Total %
statement 97 153 63.4
branch 22 52 42.3
condition 0 18 0.0
subroutine 16 22 72.7
pod 5 10 50.0
total 140 255 54.9


line stmt bran cond sub pod time code
1             package YAX::Parser;
2              
3 3     3   41108 use strict;
  3         5  
  3         98  
4              
5 3     3   796 use YAX::Node;
  3         8  
  3         80  
6 3     3   857 use YAX::Text;
  3         25  
  3         96  
7 3     3   888 use YAX::Element;
  3         7  
  3         66  
8 3     3   460 use YAX::Fragment;
  3         6  
  3         59  
9 3     3   1465 use YAX::Document;
  3         7  
  3         107  
10 3     3   17 use YAX::Constants qw/:all/;
  3         5  
  3         7385  
11              
12             #========================================================================
13             # These regular expressions have been gratefully borrowed from:
14             #
15             # REX/Perl 1.0
16             # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
17             # Technical Report TR 1998-17, School of Computing Science, Simon Fraser
18             # University, November, 1998.
19             # Copyright (c) 1998, Robert D. Cameron.
20             # The following code may be freely used and distributed provided that
21             # this copyright and citation notice remains intact and that modifications
22             # or additions are clearly identified.
23              
24             our $TextSE = "[^<]+";
25             our $UntilHyphen = "[^-]*-";
26             our $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
27             our $CommentCE = "$Until2Hyphens>?";
28             our $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
29             our $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
30             our $S = "[ \\n\\t\\r]+";
31             our $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
32             our $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
33             our $Name = "(?:$NameStrt)(?:$NameChar)*";
34             our $QuoteSE = "\"[^\"]*\"|'[^']*'";
35             our $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
36             our $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
37             our $S1 = "[\\n\\r\\t ]";
38             our $UntilQMs = "[^?]*\\?+";
39             our $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
40             our $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
41             our $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
42             our $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
43             our $PI_CE = "$Name(?:$PI_Tail)?";
44             our $EndTagCE = "$Name(?:$S)?>?";
45             our $AttValSE = "\"[^<\"]*\"|'[^<']*'";
46             our $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
47             our $ElementCE = "/(?:$EndTagCE)?|(?:$ElemTagCE)?";
48             our $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|(?:$ElementCE)?)";
49             our $XML_SPE = "$TextSE|$MarkupSPE";
50              
51             #========================================================================
52              
53             # these have captures for parsing attributes
54             our $AttValSE2 = "\"([^<\"]*)\"|'([^<']*)'";
55             our $ElemTagCE2 = "(?:($Name)(?:$S)?=(?:$S)?(?:$AttValSE2))+(?:$S)?/?>?";
56              
57             sub new {
58 1     1 0 10 my ( $class ) = @_;
59 1         3 my $self = bless { }, $class;
60 1         3 return $self;
61             }
62              
63             sub parse {
64 2     2 1 21 my ( $self, $xstr ) = ( shift, shift );
65 2 50       7 return unless $xstr;
66 2         11 my @nodes = $self->tokenize( $xstr );
67              
68 2         34 my $xdoc = YAX::Document->new();
69 2         6 my @stack = ( $xdoc );
70 2         4 my ( $spec, $elmt );
71 2         6 foreach my $node ( $self->tokenize( $xstr ) ) {
72 102         165 $spec = substr( $node, 0, 2 );
73 102 100       219 if ( index( $spec, '<' ) != 0 ) {
74 48         111 $self->_mk_text( $node, $stack[-1] );
75 48         71 next;
76             }
77 54 100       110 if ( $spec eq '
78 22         24 pop @stack;
79 22         30 next;
80             }
81 32 100       57 if ( $spec eq '
82 3         11 $self->_mk_decl( $node, $stack[-1] );
83 3         8 next;
84             }
85 29 100       53 if ( $spec eq '
86 2         9 $self->_mk_proc( $node, $stack[-1] );
87 2         4 next;
88             }
89              
90 27         63 $elmt = $self->_mk_elmt( $node, $stack[-1] );
91 27 100       85 push( @stack, $elmt ) unless ( $node =~ m{/>$} );
92 27 100       55 $xdoc->set( $elmt->{id} => $elmt ) if $elmt->{id}
93             }
94              
95 2         28 return $xdoc;
96             }
97              
98             sub stream {
99 0     0 1 0 my ( $self, $xstr, $state ) = ( shift, shift, shift );
100 0         0 my %subs;
101 0 0 0     0 if ( @_ == 1 and ref $_[0] eq 'HASH' ) {
102 0         0 %subs = %{$_[0]};
  0         0  
103             } else {
104 0         0 %subs = @_;
105             }
106              
107 0   0     0 my $text = delete $subs{text} || $subs{pass};
108 0   0     0 my $decl = delete $subs{decl} || $subs{pass};
109 0   0     0 my $proc = delete $subs{proc} || $subs{pass};
110 0   0     0 my $elmt = delete $subs{elmt} || $subs{pass};
111 0   0     0 my $elcl = delete $subs{elcl} || $subs{pass};
112              
113 0         0 my ( $spec, $name, $copy, $atts, %atts );
114 0         0 foreach my $node ( $self->tokenize( $xstr ) ) {
115 0         0 $spec = substr( $node, 0, 2 );
116 0 0       0 if ( index( $spec, '<' ) != 0 ) {
117 0 0       0 $text && $text->( $state, $node );
118 0         0 next;
119             }
120 0 0       0 if ( $spec eq '
121 0 0       0 $elcl && $elcl->( $state, substr( $node, 2, -1 ) );
122 0         0 next;
123             }
124 0 0       0 if ( $spec eq '
125 0 0       0 $decl && $decl->( $state, $node );
126 0         0 next;
127             }
128 0 0       0 if ( $spec eq '
129 0 0       0 $proc && $proc->( $state, $node );
130 0         0 next;
131             }
132              
133 0 0       0 $elmt && do {
134 0         0 $copy = substr( $node, 1, -1 );
135 0         0 ( $name, $atts ) = split( /\s+/, $copy, 2 );
136 0         0 $name =~ s{/$}{};
137 0 0       0 %atts = $atts ? $self->parse_attributes( $atts ) : ( );
138 0         0 $elmt->( $state, $name, %atts );
139             };
140              
141 0 0       0 if ( substr( $node, -2 ) eq '/>' ) {
142 0 0       0 $elcl && $elcl->( $state, $name );
143             }
144             }
145             }
146              
147             sub read_file {
148 0     0 0 0 my ( $self, $file ) = @_;
149 0         0 my $xstr;
150             {
151 0 0       0 open FH, $file or return;
  0         0  
152 0         0 local $/ = undef;
153 0         0 $xstr = ;
154 0         0 close FH;
155             }
156 0         0 return $xstr;
157             }
158              
159             sub parse_file {
160 0     0 1 0 my ( $self, $file ) = @_;
161 0         0 return $self->parse( $self->read_file( $file ) );
162             }
163              
164             sub stream_file {
165 0     0 1 0 my ( $self, $file, $state, %subs ) = @_;
166 0         0 return $self->stream( $self->read_file( $file ), $state, %subs );
167             }
168              
169             sub parse_as_fragment {
170 0     0 0 0 my ( $self, $xstr ) = @_;
171 0         0 my $xdoc = $self->parse( ''.$xstr.'' );
172 0         0 my $root = $xdoc->root;
173 0         0 my $frag = YAX::Fragment->new;
174 0         0 $frag->append( $root->[0] ) while @$root;
175 0         0 return $frag;
176             }
177              
178             sub parse_file_as_fragment {
179 0     0 0 0 my ( $self, $file ) = @_;
180 0         0 my $xstr = $self->read_file( $file );
181 0         0 my $frag = $self->parse_as_fragment( $xstr );
182 0         0 return $frag;
183             }
184              
185             sub tokenize {
186 4     4 1 9 my ( $self, $xstr ) = @_;
187 4         1199 return $xstr =~ /$XML_SPE/g;
188             }
189              
190             sub _mk_decl {
191 3     3   7 my ( $self, $decl, $parent ) = @_;
192 3         4 my ( $type, $name );
193 3         4 my $offset = 1;
194 3         5 my $length = length( $decl );
195              
196 3 100       8 substr( $decl, 0, 4 ) eq '