File Coverage

blib/lib/XML/Filter/Dispatcher/Ops.pm
Criterion Covered Total %
statement 205 983 20.8
branch 55 324 16.9
condition 16 87 18.3
subroutine 43 334 12.8
pod n/a
total 319 1728 18.4


line stmt bran cond sub pod time code
1             package XML::Filter::Dispatcher::Ops;
2              
3             =head1 NAME
4              
5             XML::Filter::Dispatcher::Ops - The Syntax Tree
6              
7             =head1 SYNOPSIS
8              
9             None. Used by XML::Filter::Dispatcher.
10              
11             =cut
12              
13             ## TODO: Replace XFD:: with XML::Filter::Dispatcher
14              
15             ## TODO: "helper" subs. functions? dunno.
16             ## as-structure() EventPath function
17             ## emit XML chunk (well balanced, not wf)
18             ## as_document
19              
20             ## TODO: use context->{PossibleEventTypes} to
21             ## reduce the amount of downstream test code and
22             ## perhaps the amount of currying. No need to test
23             ## for EventType if we know only one node type is
24             ## possible :).
25              
26             package XFD;
27              
28 1     1   5 use Carp qw( confess ); ## NOT croak: this module must die "...\n".
  1         1  
  1         68  
29              
30 1     1   4 use constant is_tracing => defined $Devel::TraceSAX::VERSION;
  1         1  
  1         111  
31              
32             # Devel::TraceSAX does not work with perl5.8.0
33             #use constant is_tracing => 1;
34             #sub emit_trace_SAX_message { warn @_ };
35              
36             ## Some debugging aids
37             *_ev = \&XML::Filter::Dispatcher::_ev;
38             *_po = \&XML::Filter::Dispatcher::_po;
39              
40             BEGIN {
41 1 50   1   74 eval( is_tracing
42             ? 'use Devel::TraceSAX qw( emit_trace_SAX_message ); 1'
43             : 'sub emit_trace_SAX_message; 1'
44             ) or die $@;
45              
46             }
47              
48 1     1   4 use strict;
  1         2  
  1         27  
49              
50             use vars (
51 1         1701 '$dispatcher', ## The X::F::D that we're doing the parse for.
52 1     1   3 );
  1         3  
53              
54             =begin private
55              
56             =head1 Precursors and Postponment
57              
58             NOTE: in this blurb, nodes occur in alphabetical order in the document
59             being processed.
60              
61             level 0 expressions are able to be evaluated using only the current
62             event and it's 'ancestor events' and, when the match succeeds, the
63             action is executed in the current event's context. The path '/a/b[@c]'
64             is level 0 can be evaluated using just the start_document event and the
65             start_element events for , , and . So are the paths
66             '/a[@a]/b[@b]/c' and '/a[b]/b[c]/c'.
67              
68             The paths '/a[b]' is not level 0; it requires some evaluation to occur
69             when the start_element event for '/a' is seen and other evaluation when
70             the start_element event for '/a/b' is seen; and when it does match
71             (which will occur in the start event for the first '/a/b'), the action's
72             execution context must be '/a', not '/a/b'. In this case, the match
73             must proceed in stages and the action is postponed until all stages are
74             satisfied.
75              
76             This is implemented in this code by converting level 1 expressions in to
77             precursors ('/a' and './b' in /a's context in our example) and
78             postponing the main expression (which just fires the action in this
79             case) using an object called a "postponement session" until enough
80             precursors are satisfied. Once enough precursors are satisfied, the
81             action is executed and the postponement is destroyed.
82              
83             The phrase "enough precursors are satisfied" is used rather than "all
84             precursors are satisfied" because expressions like 'a[ b or c ]' does
85             not need both the './b' or './c' precursors; either one will suffice.
86              
87             A postponement session has a value slot for each precursor and a slot for
88             the result context. Each precursor fills is it's slot when it matches,
89             and the primary precursor sets the main expression / action context. As
90             each precursor fires, it checks to see if enough of the slots are filled
91             in and fires the action or main expression if so. Expressions like
92             '//*[b]' and '/a//b[c]' can cause multiple simultaneous postponement
93             sessions for the same expression.
94              
95             For expressions like '/a/b[c]/d', the main expression (or result)
96             context may not be set when some of the precursors match: /a/b/c will
97             match before /a/b/d. The precursor for '/a/b/d' is called the primary
98             precursor and sets the result context.
99              
100             The action (or main expression in an expression line '/a[concat(b,c)]') is
101             evaluated in the result context ('/a' in this case).
102              
103             The main expression (or action) should be executed once per primary
104             precursor match in the document where the entire expression is true.
105             So a rule with a pattern '/a/b[c]/d[e]' would fire once for every /a/b/d
106             node in the document where the entire match is true.
107              
108             //a[b]/c and //a[c]/b
109              
110             The first precursor to match must create a postponement session.
111              
112             Q: How do we associate the precursors with their appropriate
113             postponement sessions?
114              
115             In expressions like 'concat( /a, /b )', the precursors '/a' and '/b' are
116             numbered 0 and 1 and there is a "main expression" of 'concat(
117             PRECURSOR_0, PRECURSOR_1)', where the PRECURSOR_# is the result of the
118             indicated precursor. The main expression which is computed before
119             firing the action. The action context is "/".
120              
121             Expressions like 'concat( a, b )' are similar except that the action
122             context is the parent of the and elements that match.
123              
124             In '/a[concat( b, c )]' the precursors are './b', './c' and 'a[concat(
125             PRECURSOR_0, PRECURSOR_1 )]', and the action can only fire when
126             PRECURSOR_2 becomes defined. The action contexst is '/a'.
127              
128             Each time a context-setting precursor matches, all presursor sessions
129             that it affects become "eligible" for firing. A precursor session fires
130             as soon as enough precursors are satisfied.
131              
132             =head2 Firing Policy
133              
134             This does not apply to level 0 patterns.
135              
136             An application may support multiple policies controlling when a
137             postponed expression is finally completely evaluated (ie matches, fails
138             to match, or returns a result).
139              
140             A "prompt evaluation policy" describes implementations where postponed
141             expressions match the first time enough predicates are
142             satisified (when the start_element() arrives, in this case). This
143             is useful to minimize memory and recognize conditions as rapidly as
144             possible.
145              
146             A "delayed evaluation policy" describes implementations where postponed
147             expressions are evaluated during the end_...() event for the node
148             deepest in the hierarchy for which an un-postponed test was true. For
149             example, rules with these patterns must fire their actions before the
150             if at all: '/a[b]', '/z/a/[b]'. This may make some
151             implementations easier but is discouraged unless necessary.
152              
153             An application must be apply a consistent firing policy policy prompt,
154              
155             An application may also provide for
156              
157             An application must detail whether it supports modes other than "prompt
158             firing" or not and all applications
159              
160             =end private
161              
162             =cut
163              
164             ###############################################################################
165             ##
166             ## Boolean Singletons
167             ##
168              
169             ## These are not used internally; 1 and 0 are. These are used when passing
170             ## boolean values in / out to Perl code, so they may be differentiated from
171             ## numeric ones.
172             sub true() { \"true" }
173             sub false() { \"false" }
174              
175              
176             ###############################################################################
177             ##
178             ## Helpers
179             ##
180 0     0   0 sub _looks_numeric($) { $_[0] =~ /^[ \t\r\n]*-?(?:\d+(?:\.\d+)?|\.\d+)[ \t\r\n]*$/ }
181              
182 0     0   0 sub _looks_literal($) { $_[0] =~ /^(?:'[^']*'|"[^"]*")(?!\n)\Z/ }
183              
184             sub _indentomatic() { 1 } ## Turning this off shaves a little parse time.
185             ## I leave it on for more readable error
186             ## messages, and it's key for debugging since
187             ## is so much more readable; in fact, messed
188             ## up indenting can indicate serious problems
189              
190 10 50   10   23 sub _indent { Carp::confess "undef" unless defined $_[0]; $_[0] =~ s/^/ /mg; }
  10         209  
191              
192             sub _is_rel_path($) {
193 0     0   0 my $path = shift;
194              
195 0 0 0     0 return 0
      0        
196             if $path->isa( "XFD::PathTest" )
197             && $path->isa( "XFD::doc_node" ) ## /... paths
198             && ! $path->isa( "XFD::union" ); ## (a|b), union called this already.
199              
200 0         0 return 1;
201             }
202              
203              
204             ###############################################################################
205             ##
206             ## Postponement Records
207             ##
208             ## When the current location step or operator/function call
209             ## in an expression can't be calculated because it needs some
210             ## future information, it must be postponed. The portions of
211             ## the expression that can't yet be calculated are called
212             ## precursors; only when enough of them are calculated can
213             ## this expression be calculated.
214             ##
215             ## A Postponement record contains:
216             ##
217             ## - A list of contexts for which this postponement eventually
218             ## becomes valid.
219             ## - A pointer to the parent postponement
220             ## - A set of results one for each precursor.
221             ##
222             ## A postponement record is a simple array with a few set data fields
223             ## in the first elements; the remaining elements are used to hold
224             ## precursor results.
225             ##
226             ## This could be an object, but we don't need any inheritence.
227             ##
228             sub _p_parent_postponement() { 0 }
229             sub _p_contexts() { 1 }
230             sub _p_first_precursor () { 2 }
231              
232             ###############################################################################
233             ##
234             ## expr_as_incr_code
235             ##
236              
237             ##
238             ## Precursors
239             ## ==========
240             ##
241             ## A precursor is something (so far always a location path sub-expr) that
242             ## (usually) needs to be dealt with before a pattern can be evaluated.
243             ## A precursor is known as "defined" if it's been evaluated and returned
244             ## some result (number, string, boolean, or node).
245             ##
246             ## The only time a pattern can be fully evaluated in the face of undefined
247             ## precursor is when the precursor is supposed to return a node and the
248             ## precursor result is being turned in to a boolean. Booleans accept
249             ## empty node sets as false values. Right now, all precursors happen to
250             ## return node sets of 0 or 1 nodes.
251             ##
252             ## The precursor values are stored in $ctx because I'm afraid of leaky
253             ## closure in older perl. I haven't tested them in this case, but have been
254             ## bit before.
255              
256             sub _replace_NEXT {
257 0     0   0 my $what_next = "";
258 0 0       0 $what_next = pop if @_ > 2;
259 0         0 my $next_code = pop;
260 0         0 $_[0] =~ s{(^[ \t]*)?$what_next}{
261 0 0       0 if ( _indentomatic && defined $1 ) {
262 0         0 _indent $next_code for 1..(length( $1 ) / 2 );
263             }
264             $next_code
265 0         0 }gme;
266             }
267              
268              
269             ###############################################################################
270             ##
271             ## Parse tree node base class
272             ##
273             ## This is used for all of the pieces of location paths axis tests, name
274             ## and type tests, predicates. It is also used by a few functions
275             ## that act on the result of a location path (which is effectively a
276             ## node set with just the current node in it).
277             ##
278             sub XFD::Op::new {
279 35     35   48 my $class = shift;
280 35         199 return bless [ @_ ], $class;
281             }
282              
283              
284 0     0   0 sub XFD::Op::op_type { ( my $type = ref shift ) =~ s/.*:// ; $type }
  0         0  
285              
286             ## The optimizer combines common expressions in some cases (it should
287             ## do more, right now it only does common leading op codes). To do this
288             ## it needs to know the type of the operator and its arguments, if any.
289             ## By default, the signature is this op's reference id, which makes each
290             ## op look different to the optimizer.
291 0     0   0 sub XFD::Op::optim_signature { int shift }
292              
293              
294             sub XFD::Op::is_constant {
295 0     0   0 my $self = shift;
296 0         0 return ! grep ! $_->is_constant, @$self;
297             }
298            
299             ## fixup is called on a freshly parsed op tree just before it's
300             ## compiled to convert expression like 'A' to be like '//A'.
301             ## TODO: Perhaps move this to an XML::Filter::Dispatcher::_fixup(),
302             ## like _optimize().
303             sub XFD::Op::fixup {
304 32     32   41 my $self = shift;
305 32         40 my ( $context ) = @_;
306              
307 32         62 for ( @$self ) {
308 45 100 66     265 if ( defined && UNIVERSAL::isa( $_, "XFD::Op" ) ) {
309 35 50 33     276 if ( ! $context->{BelowRoot}
      66        
310             && ( $_->isa( "XFD::Axis::child" )
311             || $_->isa( "XFD::Axis::attribute" )
312             || $_->isa( "XFD::Axis::start_element" )
313             || $_->isa( "XFD::Axis::end_element" )
314             # || $_->isa( "XFD::Axis::end" )
315             )
316             ) {
317             ## The miniature version of XPath used in
318             ## XSLT's