File Coverage

blib/lib/XML/TreePP/XMLPath.pm
Criterion Covered Total %
statement 502 855 58.7
branch 228 632 36.0
condition 101 386 26.1
subroutine 30 43 69.7
pod 13 13 100.0
total 874 1929 45.3


line stmt bran cond sub pod time code
1             =pod
2              
3             =head1 NAME
4              
5             XML::TreePP::XMLPath - Similar to XPath, defines a path as an accessor to nodes of an XML::TreePP parsed XML Document.
6              
7             =head1 SYNOPSIS
8              
9             use XML::TreePP;
10             use XML::TreePP::XMLPath;
11            
12             my $tpp = XML::TreePP->new();
13             my $tppx = XML::TreePP::XMLPath->new();
14            
15             my $tree = { rss => { channel => { item => [ {
16             title => "The Perl Directory",
17             link => "http://www.perl.org/",
18             }, {
19             title => "The Comprehensive Perl Archive Network",
20             link => "http://cpan.perl.org/",
21             } ] } } };
22             my $xml = $tpp->write( $tree );
23              
24             Get a subtree of the XMLTree:
25              
26             my $xmlsub = $tppx->filterXMLDoc( $tree , q{rss/channel/item[title="The Comprehensive Perl Archive Network"]} );
27             print $xmlsub->{'link'};
28              
29             Iterate through all attributes and Elements of each XML element:
30              
31             my $xmlsub = $tppx->filterXMLDoc( $tree , q{rss/channel/item} );
32             my $h_attr = $tppx->getAttributes( $xmlsub );
33             my $h_elem = $tppx->getElements( $xmlsub );
34             foreach $attrHash ( @{ $h_attr } ) {
35             while my ( $attrKey, $attrVal ) = each ( %{$attrHash} ) {
36             ...
37             }
38             }
39             foreach $elemHash ( @{ $h_elem } ) {
40             while my ( $elemName, $elemVal ) = each ( %{$elemHash} ) {
41             ...
42             }
43             }
44              
45             EXAMPLE for using XML::TreePP::XMLPath to access a non-XML compliant tree of
46             PERL referenced data.
47              
48             use XML::TreePP::XMLPath;
49            
50             my $tppx = new XML::TreePP::XMLPath;
51             my $hashtree = {
52             config => {
53             nodes => {
54             "10.0.10.5" => {
55             options => [ 'option1', 'option2' ],
56             alerts => {
57             email => 'someone@nowhere.org'
58             }
59             }
60             }
61             }
62             };
63             print $tppx->filterXMLDoc($hashtree, '/config/nodes/10.0.10.5/alerts/email');
64             print "\n";
65             print $tppx->filterXMLDoc($hashtree, '/config/nodes/10.0.10.5/options[2]');
66             print "\n";
67              
68             Result
69            
70             someone@nowhere.org
71             option2
72              
73             =head1 DESCRIPTION
74              
75             A pure PERL module to compliment the pure PERL XML::TreePP module. XMLPath may
76             be similar to XPath, and it does attempt to conform to the XPath standard when
77             possible, but it is far from being fully XPath compliant.
78              
79             Its purpose is to implement an XPath-like accessor methodology to nodes in a
80             XML::TreePP parsed XML Document. In contrast, XPath is an accessor methodology
81             to nodes in an unparsed (or raw) XML Document.
82              
83             The advantage of using XML::TreePP::XMLPath over any other PERL implementation
84             of XPath is that XML::TreePP::XMLPath is an accessor to XML::TreePP parsed
85             XML Documents. If you are already using XML::TreePP to parse XML, you can use
86             XML::TreePP::XMLPath to access nodes inside that parsed XML Document without
87             having to convert it into a raw XML Document.
88              
89             As an additional side-benefit, any PERL HASH/ARRY reference data structure can
90             be accessible via the XPath accessor method provided by this module. It does
91             not have to a parsed XML structure. The last example in the SYNOPSIS illustrates
92             this.
93              
94             =head1 REQUIREMENTS
95              
96             The following perl modules are depended on by this module:
97             ( I )
98              
99             =over 4
100              
101             =item * XML::TreePP
102              
103             =item * Data::Dumper
104              
105             =back
106              
107             =head1 IMPORTABLE METHODS
108              
109             When the calling application invokes this module in a use clause, the following
110             methods can be imported into its space.
111              
112             =over 4
113              
114             =item * C
115              
116             =item * C
117              
118             =item * C
119              
120             =item * C
121              
122             =item * C
123              
124             =item * C
125              
126             =back
127              
128             Example:
129              
130             use XML::TreePP::XMLPath qw(parseXMLPath filterXMLDoc getValues getAttributes getElements);
131              
132             =head1 REMOVED METHODS
133              
134             The following methods are removed in the current release.
135              
136             =over 4
137              
138             =item * C
139              
140             =item * C
141              
142             =back
143              
144             =head1 XMLPath PHILOSOPHY
145              
146             =head2 General Illustration of XMLPath
147              
148             Referring to the following XML Data.
149              
150            
151            
152             Do red cats eat yellow food
153             ?
154            
155            
156             Brown cows eat green grass
157             .
158            
159            
160              
161             Where the path "C" has two matches:
162             "C" and "C".
163              
164             Where the path "C" has the same previous two
165             matches.
166              
167             Where the path "C" has one
168             match: "C".
169              
170             And where the path "C" matches
171             "C"
172              
173             So that "C<[@attr=val]>" is identified as an attribute inside the
174             ""
175              
176             And "C<[attr=val]>" is identified as a nested attribute inside the
177             "val"
178              
179             And "C<[2]>" is a positional argument identifying the second node in a list
180             "value-1value-2".
181              
182             And "C<@attr>" identifies all nodes containing the C<@attr> attribute.
183             "value-1value-2".
184              
185             After XML::TreePP parses the above XML, it looks like this:
186              
187             {
188             paragraph => {
189             sentence => [
190             {
191             "-language" => "english",
192             punctuation => "?",
193             words => "Do red cats eat yellow food",
194             },
195             {
196             "-language" => "english",
197             punctuation => ".",
198             words => "Brown cows eat green grass",
199             },
200             ],
201             },
202             }
203              
204             =head2 Noting Attribute Identification in Parsed XML
205              
206             Note that attributes are specified in the XMLPath as C<@attribute_name>, but
207             after C parses the XML Document, the attribute name is
208             identified as C<-attribute_name> in the resulting parsed document.
209             This can be changed in Object Oriented mode using the
210             C<$tppx->tpp->set(attr_prefix=>'@')> method to set the attr_prefix attribute in
211             the XML::TreePP object referenced internally. It should only be changed if the
212             XML Document is provided as already parsed, and the attributes are represented
213             with a value other than the default.
214             This document uses the default value of C<-> in its examples.
215              
216             XMLPath requires attributes to be specified as C<@attribute_name> and takes care
217             of the conversion from C<@> to C<-> behind the scenes when accessing the
218             XML::TreePP parsed XML document.
219              
220             Child elements on the next level of a parent element are accessible as
221             attributes as C. This is the same format as C<@attribute_name>
222             except without the C<@> symbol. Specifying the attribute without an C<@> symbol
223             identifies the attribute as a child element of the parent element being
224             evaluated.
225              
226             =head2 Noting Text (CDATA) Identification in Parsed XML
227              
228             Additionally, the values of child elements are identified in XML parsed by
229             C with the C<#> pound/hash symbol. This can be changed
230             via the C property in the C object referenced by
231             Ctpp()>. C derives the value to
232             use from this.
233              
234             =head2 Accessing Child Element Values in XMLPath
235              
236             Child element values are only accessible as C. That is when the
237             element being evaluated is C, the attribute (or child element) is
238             C, and the value of the attribute is C, it is presented as this:
239              
240            
241            
242             tiger
243            
244            
245              
246             The XMLPath used to access the key=value pair of C for element
247             C would be as follows:
248              
249             jungle/animal[cat='tiger']
250              
251             And in version 0.52, in this second case, the above XMLPath is still valid:
252              
253            
254            
255             tiger
256            
257            
258              
259             In version 0.52, the period (.) is supported as it is in XPath to represent
260             the current context node. As such, the following XMLPaths would also be valid:
261              
262             jungle/animal/cat[.='tiger']
263             jungle/animal/cat[@color='black'][.='tiger']
264              
265             One should realize that in these previous two XMLPaths, the element C is
266             being evaluated, and not the element C as in the first case. And will
267             be undesirable if you want to evaluate C for results.
268              
269             To perform the same evaluation, but return the matching C node, the
270             following XMLPath can be used:
271              
272             jungle/animal[cat='tiger']
273              
274             To evaluate C and C, but return the matching C node, the
275             following XMLPaths can be used:
276              
277             jungle/animal[cat='tiger']/cat
278             jungle/animal/cat[.='tiger']
279              
280             The first path analyzes C, and the second path analyzes C. But
281             both matches the same node "
299              
300             The result set of this XMLPath would be "C".
301              
302             =head1 METHODS
303              
304             =cut
305              
306             package XML::TreePP::XMLPath;
307              
308 1     1   43334 use 5.005;
  1         4  
  1         41  
309 1     1   5 use strict;
  1         2  
  1         63  
310 1     1   5 use warnings;
  1         7  
  1         37  
311 1     1   5 use Exporter;
  1         2  
  1         48  
312 1     1   5 use Carp;
  1         2  
  1         87  
313 1     1   1076 use XML::TreePP;
  1         10950  
  1         45  
314 1     1   1355 use Data::Dumper;
  1         11610  
  1         86  
315              
316             BEGIN {
317 1     1   9 use vars qw(@ISA @EXPORT @EXPORT_OK);
  1         2  
  1         102  
318 1     1   18 @ISA = qw(Exporter);
319 1         3 @EXPORT = qw();
320 1         4 @EXPORT_OK = qw(&charlexsplit &getAttributes &getElements &getSubtree &parseXMLPath &assembleXMLPath &filterXMLDoc &getValues);
321              
322 1     1   5 use vars qw($REF_NAME);
  1         2  
  1         39  
323 1         2 $REF_NAME = "XML::TreePP::XMLPath"; # package name
324              
325 1     1   5 use vars qw( $VERSION $TPPKEYS );
  1         2  
  1         59  
326 1         2 $VERSION = '0.72';
327 1         2 $TPPKEYS = "force_array force_hash cdata_scalar_ref user_agent http_lite lwp_useragent base_class elem_class xml_deref first_out last_out indent xml_decl output_encoding utf8_flag attr_prefix text_node_key ignore_error use_ixhash";
328              
329 1     1   4 use vars qw($DEBUG $DEBUGMETHOD $DEBUGNODE $DEBUGPATH $DEBUGFILTER $DEBUGDUMP);
  1         1  
  1         144  
330 1         14 $DEBUG = 0;
331 1         2 $DEBUGMETHOD = 1;
332 1         2 $DEBUGNODE = 2;
333 1         1 $DEBUGPATH = 3;
334 1         2 $DEBUGFILTER = 4;
335 1         11364 $DEBUGDUMP = 7;
336             }
337              
338              
339             =pod
340              
341             =head2 tpp
342              
343             =over
344              
345             This module is an extension of the XML::TreePP module. As such, it uses the
346             module in many different methods to parse XML Documents, and to get the value
347             of C properties like C and C.
348              
349             The C module, however, is only loaded into C
350             when it becomes necessary to perform the previously described requests. For the
351             aformentioned properties C and C, default values
352             are used if a C object has not been loaded.
353              
354             To avoid having this module load the XML::TreePP module,
355             do not pass in unparsed XML documents. The caller would instead want to
356             parse the XML document with C before passing it in.
357             Passing in an unparsed XML document causes this module to load C
358             in order to parse it for processing.
359              
360             Alternately, If the caller has loaded a copy of C, that object
361             instance can be assigned to be used by the instance of this module using this
362             method. In doing so, when XML::TreePP is needed, the instance provided is used
363             instead of loading another copy.
364              
365             If this module has loaded an instance of , this instance can be
366             directly accessed or retrieved through this method. For example, the
367             aformentioned properties can be set.
368              
369             $tppx->tpp->set('attr_prefix','@'); # default is (-) dash
370             $tppx->tpp->set('text_node_key','#'); # default is (#) pound
371              
372             If you want to only get the internally loaded instance of C, but
373             do not want to load a new instance and instead have undef returned if an
374             instance is not already loaded, then use the C method.
375              
376             my $tppobj = $tppx->get( 'tpp' );
377             warn "XML::TreePP is not loaded in XML::TreePP::XMLPath.\n" if !defined $tppobj;
378              
379             This method was added in version 0.52
380              
381             =over 4
382              
383             =item * B
384              
385             An instance of XML::TreePP that this object should use instead of, when needed,
386             loading its own copy. If not provided, the currently loaded instance is
387             returned. If an instance is not loaded, an instance is loaded and then returned.
388              
389             =item * I
390              
391             Returns the result of setting an instance of XML::TreePP in this object.
392             Or returns the internally loaded instance of XML::TreePP.
393             Or loads a new instance of XML::TreePP and returns it.
394              
395             =back
396              
397             $tppx->tpp( new XML::TreePP ); # Sets the XML::TreePP instance to be used by this object
398             $tppx->tpp(); # Retrieve the currently loaded XML::TreePP instance
399              
400             =back
401              
402             =cut
403              
404             sub tpp(@) {
405 0 0 0 0 1 0 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
406 0 0       0 if (!defined $self) {
407 0         0 return new XML::TreePP;
408             } else {
409 0 0 0     0 return $self->{'tpp'} = shift if @_ >= 1 && ref($_[0]) eq "XML::TreePP";
410 0 0 0     0 return $self->{'tpp'} if defined $self->{'tpp'} && ref($self->{'tpp'}) eq "XML::TreePP";
411 0         0 $self->{'tpp'} = new XML::TreePP;
412 0         0 return $self->{'tpp'};
413             }
414             }
415              
416              
417             =pod
418              
419             =head2 set
420              
421             =over
422              
423             Set the value for a property in this object instance.
424             This method can only be accessed in object oriented style.
425              
426             This method was added in version 0.52
427              
428             =over 4
429              
430             =item * B
431              
432             The property to set the value for.
433              
434             =item * B
435              
436             The value of the property to set.
437             If no value is given, the property is deleted.
438              
439             =item * I
440              
441             Returns the result of setting the value of the property, or the result of
442             deleting the property.
443              
444             =back
445              
446             $tppx->set( 'property_name' ); # deletes the property property_name
447             $tppx->set( 'property_name' => 'val' ); # sets the value of property_name
448              
449             =back
450              
451             =cut
452              
453             sub set(@) {
454 0 0 0 0 1 0 my $self = shift if ref($_[0]) eq $REF_NAME || return undef;
455 0         0 my %args = @_;
456 0         0 while (my ($key,$val) = each %args) {
457 0 0       0 if ( defined $val ) {
458 0         0 $self->{$key} = $val;
459             }
460             else {
461 0         0 delete $self->{$key};
462             }
463             }
464             }
465              
466              
467             =pod
468              
469             =head2 get
470              
471             =over
472              
473             Retrieve the value set for a property in this object instance.
474             This method can only be accessed in object oriented style.
475              
476             This method was added in version 0.52
477              
478             =over 4
479              
480             =item * B
481              
482             The property to get the value for
483              
484             =item * I
485              
486             Returns the value of the property requested
487              
488             =back
489              
490             $tppx->get( 'property_name' );
491              
492             =back
493              
494             =cut
495              
496             sub get(@) {
497 7 50 50 7 1 23 my $self = shift if ref($_[0]) eq $REF_NAME || return undef;
498 7         7 my $key = shift;
499 7 50       21 return $self->{$key} if exists $self->{$key};
500 7         26 return undef;
501             }
502              
503              
504             =pod
505              
506             =head2 new
507              
508             =over
509              
510             Create a new object instances of this module.
511              
512             =over 4
513              
514             =item * B
515              
516             An instance of XML::TreePP to be used instead of letting this module load its
517             own.
518              
519             =item * I
520              
521             An object instance of this module.
522              
523             =back
524              
525             $tppx = new XML::TreePP::XMLPath();
526              
527             =back
528              
529             =cut
530              
531             # new
532             #
533             # It is not necessary to create an object of this module.
534             # However, if you choose to do so any way, here is how you do it.
535             #
536             # my $obj = new XML::TreePP::XMLPath;
537             #
538             # This module supports being called by two methods.
539             # 1. By importing the functions you wish to use, as in:
540             # use XML::TreePP::XMLPath qw( function1 function2 );
541             # function1( args )
542             # 2. Or by calling the functions in an object oriented manor, as in:
543             # my $tppx = new XML::TreePP::XMLPath()
544             # $tppx->function1( args )
545             # Using either method works the same and returns the same output.
546             #
547             sub new {
548 1     1 1 816 my $pkg = shift;
549 1   33     6 my $class = ref($pkg) || $pkg;
550 1         3 my $self = bless {}, $class;
551              
552 1         2 my %args = @_;
553 1 50       4 $self->tpp($args{'tpp'}) if exists $args{'tpp'};
554              
555 1         3 return $self;
556             }
557              
558              
559             =pod
560              
561             =head2 charlexsplit
562              
563             =over
564              
565             An analysis method for single character boundary and start/stop tokens
566              
567             =over 4
568              
569             =item * B
570              
571             The string to analyze
572              
573             =item * B
574              
575             The single character starting boundary separating wanted elements
576              
577             =item * B
578              
579             The single character stopping boundary separating wanted elements
580              
581             =item * B
582              
583             A { start_char => stop_char } hash reference of start/stop tokens.
584             The characters in C contained within a start_char and stop_char are not
585             evaluated to match boundaries.
586              
587             =item * B
588              
589             Provide "1" if the beginning of the string should be treated as a
590             C character.
591              
592             =item * B
593              
594             Provide "1" if the ending of the string should be treated as a C
595             character.
596              
597             =item * B
598              
599             The character that indicates the next character in the string is to be escaped.
600             The default value is the backward slash (\). And example is used in the
601             following string:
602              
603             'The Cat\'s Meow'
604              
605             Without a recognized escape character, the previous string would fail to be
606             recognized properly.
607              
608             This optional parameter was introduced in version 0.70.
609              
610             =item * I
611              
612             An array reference of elements
613              
614             =back
615              
616             $elements = charlexsplit (
617             string => $string,
618             boundry_start => $charA, boundry_stop => $charB,
619             tokens => \@tokens,
620             boundry_begin => $char1, boundry_end => $char2 );
621              
622             =back
623              
624             =cut
625              
626             # charlexsplit
627             # @brief A lexical analysis function for single character boundary and start/stop tokens
628             # @param string the string to analyze
629             # @param boundry_start the single character starting boundary separating wanted elements
630             # @param boundry_stop the single character stopping boundary separating wanted elements
631             # @param tokens a { start_char => stop_char } hash reference of start/stop tokens
632             # @param boundry_begin set to "1" if the beginning of the string should be treated as a 'boundry_start' character
633             # @param boundry_end set to "1" if the ending of the string should be treated as a 'boundry_stop' character
634             # @param escape_char the character that indicates the next character in the string is to be escaped. default is '\'
635             # @return an array reference of the resulting parsed elements
636             #
637             # Example:
638             # {
639             # my @el = charlexsplit (
640             # string => q{abcdefg/xyz/path[@key='val'][@key2='val2']/last},
641             # boundry_start => '/',
642             # boundry_stop => '/',
643             # tokens => [qw( [ ] ' ' " " )],
644             # boundry_begin => 1,
645             # boundry_end => 1
646             # );
647             # print join(', ',@el),"\n";
648             # my @el2 = charlexsplit (
649             # string => $el[2],
650             # boundry_start => '[',
651             # boundry_stop => ']',
652             # tokens => [qw( ' ' " " )],
653             # boundry_begin => 0,
654             # boundry_end => 0
655             # );
656             # print join(', ',@el2),"\n";
657             # my @el3 = charlexsplit (
658             # string => $el2[0],
659             # boundry_start => '=',
660             # boundry_stop => '=',
661             # tokens => [qw( ' ' " " )],
662             # boundry_begin => 1,
663             # boundry_end => 1
664             # );
665             # print join(', ',@el3),"\n";
666             #
667             # OUTPUT:
668             # abcdefg, xyz, path[@key='val'][@key2='val2'], last
669             # @key='val', @key2='val2'
670             # @key, 'val'
671             #
672             sub charlexsplit (@) {
673 22 50 50 22 1 84 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
674 22         75 my %args = @_;
675 22         23 my @warns;
676 22 50       40 push(@warns,'string') if !defined $args{'string'};
677 22 50       37 push(@warns,'boundry_start') if !exists $args{'boundry_start'};
678 22 50       43 push(@warns,'boundry_stop') if !exists $args{'boundry_stop'};
679 22 50       33 push(@warns,'tokens') if !exists $args{'tokens'};
680 22 50       34 if (@warns) { carp ('method charlexsplit(@) requires the arguments: '.join(', ',@warns).'.'); return undef; }
  0         0  
  0         0  
681              
682 22         33 my $string = $args{'string'}; # The string to parse
683 22         21 my $boundry_start = $args{'boundry_start'}; # The boundary character separating wanted elements
684 22         21 my $boundry_stop = $args{'boundry_stop'}; # The boundary character separating wanted elements
685 22         15 my %tokens = @{$args{'tokens'}}; # The start=>stop characters that must be paired inside an element
  22         53  
686 22   100     74 my $boundry_begin = $args{'boundry_begin'} || 0;
687 22   100     53 my $boundry_end = $args{'boundry_end'} || 0;
688 22   50     254 my $escape_char = $args{'escape_char'} || "\\";
689              
690              
691             # split the string into individual characters
692 22         96 my @string = split(//,$string);
693              
694             # initialize variables
695 22         31 my $next = undef;
696 22         21 my $current_element = undef;
697 22         15 my @elements;
698 22         20 my $collect = 0;
699 22         18 my $escape_char_flag = 0;
700              
701 22 100       35 if ($boundry_begin == 1) {
702 6         6 $collect = 1;
703             }
704 22         27 CHAR: foreach my $c (@string) {
705 236 100       337 if ($c eq $escape_char) {
706 2         3 $current_element .= $c;
707 2         3 $escape_char_flag = 1;
708 2         2 next CHAR;
709             }
710 234 100       324 if ($escape_char_flag) {
711 2         2 $current_element .= $c;
712 2         2 $escape_char_flag = 0;
713 2         3 next CHAR;
714             }
715 232 100       344 if (!defined $next) { # If not looking for the 'stop' matching token
716 180 100       255 if ($c eq $boundry_stop) { # If this character matches the boundry_stop character...
717 20 100       31 if (defined $current_element) { # -and the current_element is defined...
718 14         20 push(@elements,$current_element); # -put the current element in the elements array...
719 14         12 $current_element = undef; # -stop collecting elements.
720             }
721 20 100       25 if ($boundry_start ne $boundry_stop) { # -and the start and stop boundaries are different
722 4         4 $collect = 0; # -turn off collection
723             } else {
724 16         17 $collect = 1; # -but keep collection on if the boundaries are the same
725             }
726 20         28 next CHAR; # Process the next character if this character matches the boundry_stop character.
727             }
728 160 100       236 if ($c eq $boundry_start) { # If this character matches the boundry_start character...
729 4         4 $collect = 1; # -turn on collection
730 4         8 next CHAR; # Process the next character if this character matches the boundry_start character.
731             }
732             } # continue if the current character does not match stop|start boundry, or if we are looking for the 'stop' matching token (do not turn off collection)
733 208         284 TKEY: foreach my $tkey (keys %tokens) {
734 509 100       664 if (! defined $next) { # If not looking for the 'stop' matching token
    50          
735 376 100       659 if ($c eq $tkey) { # If this character matches the 'start' matching token...
736 5         6 $next = $tokens{$tkey}; # -start looking for the 'stop' matching token
737 5         7 last TKEY;
738             }
739             } elsif
740             (defined $next) { # If I am looking for the 'stop' matching token
741 133 100       216 if ($c eq $next) { # If this character matches the 'stop' matching token...
742 5         5 $next = undef; # -then I am no longer looking for the 'stop' matching token.
743 5         6 last TKEY;
744             }
745             }
746             }
747 208 100       373 if ($collect == 1) {
748 142         155 $current_element .= $c;
749             }
750             }
751 22 100       41 if ($boundry_end == 1) {
752 6 50       10 if (defined $current_element) {
753 6         7 push(@elements,$current_element);
754 6         7 $current_element = undef;
755             }
756             }
757              
758 22 100       86 return \@elements if @elements >= 1;
759 12         66 return undef;
760             }
761              
762              
763             =pod
764              
765             =head2 parseXMLPath
766              
767             =over
768              
769             Parse a string that represents the XMLPath to a XML element or attribute in a
770             XML::TreePP parsed XML Document.
771              
772             Note that the XML attributes, known as "@attr" are transformed into "-attr".
773             The preceding (-) minus in place of the (@) at is the recognized format of
774             attributes in the XML::TreePP module.
775              
776             Being that this is intended to be a submodule of XML::TreePP, the format of
777             '@attr' is converted to '-attr' to conform with how XML::TreePP handles
778             attributes.
779              
780             See: Cset( attr_prefix => '@' )> for more information.
781             This module supports the default format, '-attr', of attributes. But this can
782             be changed by setting the 'attr_prefix' property in the internally referenced
783             XML::TreePP object using the C method in object oriented programming.
784             Example:
785              
786             my $tppx = new XML::TreePP::XMLPath();
787             $tppx->tpp->set( attr_prefix => '@' );
788              
789             B
790             Also, as of version 0.52, there are two additional types of XMLPaths understood.
791              
792             I
793              
794             $path = '/books/book[5]';
795              
796             This defines the fifth book in a list of book elements under the books root.
797             When using this to get the value, the 5th book is returned.
798             When using this to test an element, there must be 5 or more books to return true.
799              
800             I
801              
802             $path = '/books/book[author]';
803              
804             This XMLPath represents all book elements under the books root which have 1 or
805             more author child element. It does not evaluate if the element or attribute to
806             evaluate has a value. So it is a test for existence of the element or attribute.
807              
808             =over 4
809              
810             =item * B
811              
812             The XML path to be parsed.
813              
814             =item * I
815              
816             An array reference of array referenced elements of the XMLPath.
817              
818             =back
819              
820             $parsedXMLPath = parseXMLPath( $XMLPath );
821              
822             =back
823              
824             =cut
825              
826             # parseXMLPath
827             # something like XPath parsing, but it is not
828             # @param xmlpath the XML path to be parsed
829             # @return an array reference of hash reference elements of the path
830             #
831             # Example:
832             # use Data::Dumper;
833             # print Dumper (parseXMLPath(q{abcdefg/xyz/path[@key='val'][key2=val2]/last}));
834             #
835             # OUTPUT:
836             # $VAR1 = [
837             # [ 'abcdefg', undef ],
838             # [ 'xyz', undef ],
839             # [ 'path',
840             # [
841             # [ '-key', 'val' ],
842             # [ 'key2', 'val2' ]
843             # ]
844             # ],
845             # [ 'last', undef ]
846             # ];
847             #
848             # Philosophy:
849             #
850             #
851             # Do red cats eat yellow food
852             # ?
853             #
854             #
855             # Brown cows eat green grass
856             # .
857             #
858             #
859             # Where the path 'paragraph/sentence[@language=english]/words' matches 'Do red cats eat yellow food'
860             # (Note this is because it is the first element of a multi element match)
861             # And the path 'paragraph/sentence[punctuation=.]/words' matches 'Brown cows eat green grass'
862             # So that '@attr=val' is identified as an attribute inside the
863             # And 'attr=val' is identified as a nested attribute inside the val
864             #
865             # Note the format of '@attr' is converted to '-attr' to conform with how XML::TreePP handles this
866             #
867             sub parseXMLPath ($) {
868 6 100 100 6 1 261 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
869 6 50       12 unless (@_ == 1) { carp 'method parseXMLPath($) requires one argument.'; return undef; }
  0         0  
  0         0  
870 6         6 my $path = shift;
871 6         8 my $hpath = [];
872 6         6 my ($tpp,$xml_text_id,$xml_attr_id);
873              
874 6 50 66     15 if ((defined $self) && (defined $self->get('tpp'))) {
875 0 0       0 $tpp = $self ? $self->tpp() : tpp();
876 0   0     0 $xml_text_id = $tpp->get( 'text_node_key' ) || '#text';
877 0   0     0 $xml_attr_id = $tpp->get( 'attr_prefix' ) || '-';
878             } else {
879 6         8 $xml_text_id = '#text';
880 6         7 $xml_attr_id = '-';
881             }
882              
883 6         23 my $h_el = charlexsplit (
884             string => $path,
885             boundry_start => '/',
886             boundry_stop => '/',
887             tokens => [qw( [ ] ' ' " " )],
888             boundry_begin => 1,
889             boundry_end => 1
890             );
891 6         9 foreach my $el (@{$h_el}) {
  6         12  
892             # See: XML::TreePP->set( attr_prefix => '@' );, where default is '-'
893 16         23 $el =~ s/^\@/$xml_attr_id/;
894 16   100     42 my $h_param = charlexsplit (
895             string => $el,
896             boundry_start => '[',
897             boundry_stop => ']',
898             tokens => [qw( ' ' " " )],
899             boundry_begin => 0,
900             boundry_end => 0
901             ) || undef;
902 16 100       35 if (defined $h_param) {
903 4         17 my ($el2) = $el =~ /^([^\[]*)/;
904 4         7 my $ha_param = [];
905 4         4 foreach my $param (@{$h_param}) {
  4         8  
906 4         2 my ($attr,$val);
907             #
908             # define string values here
909             # defined first, as string is recognized as the default
910 4         11 ($attr,$val) = $param =~ /([^\=]*)\=[\'\"]?(.*[^\'\"])[\'\"]?/;
911 4 50 66     15 if ((! defined $attr) && (! defined $val)) {
912 3         5 ($attr) = $param =~ /([^\=]*)\=[\'\"]?[\'\"]?/;
913 3         5 $val = '';
914             }
915 4 50 66     15 if ((! defined $attr) && (! defined $val)) {
916 0         0 ($attr) = $param =~ /^([^\=]*)$/;
917 0         0 $val = undef;
918             }
919             #
920             # define literal values here, which are not string-values
921             # defined second, as literals are strictly defined
922 4 100       19 if ($param =~ /^(\d*)$/) {
    100          
923             # It is a positional argument, ex: /books/book[3]
924 2         6 $attr = $1;
925 2         2 $val = undef;
926             } elsif ($param =~ /^([^\=]*)$/) {
927             # Only the element/attribute is defined, ex: /path[@attr]
928 1         2 $attr = $1;
929 1         2 $val = undef;
930             }
931             #
932             # Internal - convert the attribute identifier
933             # See: XML::TreePP->set( attr_prefix => '@' );, where default is '-'
934 4         6 $attr =~ s/^\@/$xml_attr_id/;
935             #
936             # push the result
937 4         3 push (@{$ha_param},[$attr, $val]);
  4         15  
938             }
939 4         7 push (@{$hpath},[$el2, $ha_param]);
  4         12  
940             } else {
941 12         10 push (@{$hpath},[$el, undef]);
  12         43  
942             }
943              
944             }
945 6         17 return $hpath;
946             }
947              
948             =pod
949              
950             =head2 assembleXMLPath
951              
952             =over
953              
954             Assemble an ARRAY or HASH ref structure representing an XMLPath. This method
955             can be used to construct an XMLPath array ref that has been parsed by the
956             parseXMLPath method.
957              
958             Note that the XML attributes can be identified as "-attribute" or "@attribute".
959             When identified as "-attribute', they are transformed into "@attribute" upon
960             assembly. The preceding minus (-) in place of the at (@) is the recognized
961             format of attributes in the C module, though can be changed. See
962             the C method for further information.
963              
964             This method was added in version 0.70.
965              
966              
967             =over 4
968              
969             =item * B
970              
971             The XML path to be assembled, represented as either an ARRAY or HASH reference.
972              
973             =item * I
974              
975             An XMLPath.
976              
977             =back
978              
979             $XMLPath = assembleXMLPath( $parsedXMLPath );
980              
981             or
982              
983             my $xmlpath = q{/books/book[5]/cats[@author="The Cat's Meow"]/tigers[meateater]};
984            
985             my $ppath = $tppx->parseXMLPath($xpath);
986             ## $ppath == [['books',undef],['book',[['5',undef]]],['cats',[['-author','The Cat\'s Meow']]],['tigers',[['meateater',undef]]]]
987              
988             my $apath = [ 'books', ['book', 5], ['cats',[['@author' => "The Cat's Meow"]]], ['tigers',['meateater']] ];
989             my $hpath = { books => { book => { -attrs => [5], cats => { -attrs => [['-author' => "The Cat's Meow"]], tigers => { -attrs => ["meateater"] } } } } };
990            
991             print "original: ",$xmlpath,"\n";
992             print " re: ",$tppx->assembleXMLPath($ppath),"\n";
993             print " array: ",$tppx->assembleXMLPath($apath),"\n";
994             print " hash: ",$tppx->assembleXMLPath($hpath),"\n";
995              
996             output
997              
998             original: /books/book[5]/cats[@author="The Cat's Meow"]/tigers[meateater]
999             re: /books/book[5]/cats[@author="The Cat's Meow"]/tigers[meateater]
1000             array: /books/book[5]/cats[@author="The Cat's Meow"]/tigers[meateater]
1001             hash: /books/book[5]/cats[@author="The Cat's Meow"]/tigers[meateater]
1002              
1003             =back
1004              
1005             =cut
1006              
1007             sub assembleXMLPath ($) {
1008 1 50 50 1 1 8 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
1009 1 50       4 unless (@_ == 1) { carp 'method assembleXMLPath($) requires one argument.'; return undef; }
  0         0  
  0         0  
1010 1         2 my $ref_path = shift;
1011 1         1 my $path = undef;
1012 1         1 my ($tpp,$xml_text_id,$xml_attr_id);
1013              
1014 1 50 33     10 if ((defined $self) && (defined $self->get('tpp'))) {
1015 0 0       0 $tpp = $self ? $self->tpp() : tpp();
1016 0   0     0 $xml_text_id = $tpp->get( 'text_node_key' ) || '#text';
1017 0   0     0 $xml_attr_id = $tpp->get( 'attr_prefix' ) || '-';
1018             } else {
1019 1         2 $xml_text_id = '#text';
1020 1         2 $xml_attr_id = '-';
1021             }
1022              
1023             my $assemble_attributes = sub ($) {
1024 4   100 4   12 my $attrs = shift || return undef;
1025 3 50 33     16 if ((defined $attrs) && (! ref $attrs)) {
    50          
1026 0         0 return ('['.$attrs.']');
1027             }
1028             elsif (ref $attrs eq "SCALAR") {
1029 0         0 return ('['.${$attrs}.']');
  0         0  
1030             }
1031 3 50       6 return undef unless ref $attrs eq "ARRAY";
1032 3         3 my $path;
1033 3         4 foreach my $itemattr (@{$attrs}) {
  3         5  
1034 3         2 my ($key,$val);
1035 3 50       5 if (ref $itemattr eq "ARRAY") {
1036 3         4 ($key,$val) = @{$itemattr};
  3         5  
1037             }
1038             else {
1039 0         0 $key = $itemattr;
1040             }
1041 3 50       7 next unless defined $key;
1042            
1043 3 100 66     36 if (($key =~ /^\d+$/) && (! defined $val)) {
    100 66        
1044 1         4 $path .= ('['.$key.']');
1045             }
1046             elsif (($key =~ /^\-(.*)/) || ($key =~ /^\@(.*)/)) {
1047 1         3 my $keystring = $1;
1048 1 50       3 if (defined $val) {
1049 1         3 $val =~ s/\"/\\\"/g;
1050 1         4 $path .= ('[@'.$keystring.'="'.$val.'"]');
1051             }
1052             else {
1053 0         0 $path .= ('[@'.$keystring.']');
1054             }
1055             }
1056             else {
1057 1 50       3 if (defined $val) {
1058 0         0 $val =~ s/\"/\\\"/g;
1059 0         0 $path .= ('['.$key.'="'.$val.'"]');
1060             }
1061             else {
1062 1         4 $path .= ('['.$key.']');
1063             }
1064             }
1065             }
1066 3         10 return $path;
1067 1         16 };
1068              
1069             # Reassemble a path parsed by parseXMLPath()
1070 1 50       4 if (ref $ref_path eq "ARRAY") {
    0          
1071 1         1 foreach my $pathitem (@{$ref_path}) {
  1         3  
1072 4         4 $path .= "/";
1073 4         9 my ($param,$attrs);
1074 4 50       13 if (ref $pathitem eq "ARRAY") {
1075 4         5 ($param,$attrs) = @{$pathitem};
  4         7  
1076             }
1077             else {
1078 0         0 $param = $pathitem;
1079             }
1080 4         5 $path .= $param;
1081 4 100       7 if (my $param_attrs = $assemble_attributes->($attrs)) {
1082 3         7 $path .= $param_attrs;
1083             }
1084             }
1085             }
1086             # Assemble a path represented by a hash
1087             elsif (ref $ref_path eq "HASH") {
1088 0     0   0 my $recurse = sub ($) {};
  0         0  
1089             $recurse = sub ($) {
1090 0     0   0 my $this_path = shift;
1091 0         0 my $path;
1092 0         0 foreach my $pathitem (keys %{$this_path}) {
  0         0  
1093 0 0       0 next if $pathitem eq "-attrs";
1094 0         0 $path .= "/";
1095 0         0 $path .= $pathitem;
1096 0         0 my $attrs = $this_path->{$pathitem}->{'-attrs'};
1097 0 0       0 if (my $pathitem_attrs = $assemble_attributes->($attrs)) {
1098 0         0 $path .= $pathitem_attrs;
1099             }
1100 0 0       0 if (my $recursed_path = $recurse->($this_path->{$pathitem})) {
1101 0         0 $path .= $recursed_path;
1102             }
1103 0         0 last;
1104             }
1105 0         0 return $path;
1106 0         0 };
1107 0         0 $path = $recurse->($ref_path);
1108             }
1109              
1110 1         11 return $path;
1111             }
1112              
1113             =pod
1114              
1115             =head2 filterXMLDoc
1116              
1117             =over
1118              
1119             To filter down to a subtree or set of subtrees of an XML document based on a
1120             given XMLPath
1121              
1122             This method can also be used to determine if a node within an XML tree is valid
1123             based on the given filters in an XML path.
1124              
1125             This method replaces the two methods C and C.
1126              
1127             This method was added in version 0.52
1128              
1129             =over 4
1130              
1131             =item * B
1132              
1133             The XML document tree, or subtree node to validate.
1134             This is an XML document either given as plain text string, or as parsed by the
1135             Cparse()> method.
1136              
1137             The XMLDocument, when parsed, can be an ARRAY of multiple elements to evaluate,
1138             which would be validated as follows:
1139              
1140             # when path is: context[@attribute]
1141             # returning: $subtree[item] if valid (returns all validated [item])
1142             $subtree[item]->{'-attribute'} exists
1143             # when path is: context[@attribute="value"]
1144             # returning: $subtree[item] if valid (returns all validated [item])
1145             $subtree[item]->{'-attribute'} eq "value"
1146             $subtree[item]->{'-attribute'}->{'value'} exists
1147             # when path is: context[5]
1148             # returning: $subtree[5] if exists (returns the fifth item if validated)
1149             $subtree['itemnumber']
1150             # when path is: context[5][element="value"]
1151             # returning: $subtree[5] if exists (returns the fifth item if validated)
1152             $subtree['itemnumber']->{'element'} eq "value"
1153             $subtree['itemnumber']->{'element'}->{'value'} exists
1154              
1155             Or the XMLDocument can be a HASH which would be a single element to evaluate.
1156             The XMLSubTree would be validated as follows:
1157              
1158             # when path is: context[element]
1159             # returning: $subtree if validated
1160             $subtree{'element'} exists
1161             # when path is: context[@attribute]
1162             # returning: $subtree if validated
1163             $subtree{'-attribute'} eq "value"
1164             $subtree{'-attribute'}->{'value'} exists
1165              
1166             =item * B
1167              
1168             The path within the XML Tree to retrieve. See C
1169              
1170             =item * B => C | C | C (optional)
1171              
1172             This optional argument defines the format of the search results to be returned.
1173             The default structure is C
1174              
1175             TargetRaw - Return references to xml document fragments matching the XMLPath
1176             filter. If the matching xml document fragment is a string, then the string is
1177             returned as a non-reference.
1178              
1179             RootMap - Return a Map of the entire xml document, a result set (list) of the
1180             definitive XMLPath (mapped from the root) to the found targets, which includes:
1181             (1) a reference map from root (/) to all matching child nodes
1182             (2) a reference to the xml document from root (/)
1183             (3) a list of targets as absolute XMLPath strings for the matching child nodes
1184              
1185             { root => HASHREF,
1186             path => '/',
1187             target => [ "/nodename[#]/nodename[#]/nodename[#]/targetname" ],
1188             child =>
1189             [{ name => nodename, position => #, child => [{
1190             [{ name => nodename, position => #, child => [{
1191             [{ name => nodename, position => #, target => targetname }]
1192             }] }]
1193             }] }]
1194             }
1195              
1196             ParentMap - Return a Map of the parent nodes to found target nodes in the xml
1197             document, which includes:
1198             (1) a reference map from each parent node to all matching child nodes
1199             (2) a reference to xml document fragments from the parent nodes
1200              
1201             [
1202             { root => HASHREF,
1203             path => '/nodename[#]/nodename[6]/targetname',
1204             child => [{ name => nodename, position => 6, target => targetname }]
1205             },
1206             { root => HASHREF,
1207             path => '/nodename[#]/nodename[7]/targetname',
1208             child => [{ name => nodename, position => 7, target => targetname }]
1209             },
1210             ]
1211              
1212             =item * I
1213              
1214             The parsed XML Document subtrees that are validated, or undef if not validated
1215              
1216             You can retrieve the result set in one of two formats.
1217              
1218             # Option 1 - An ARRAY reference to a list
1219             my $result = filterXMLDoc( $xmldoc, '/books' );
1220             # $result is:
1221             # [ { book => { title => "PERL", subject => "programming" } },
1222             # { book => { title => "All About Backpacks", subject => "hiking" } } ]
1223            
1224             # Option 2 - A list, or normal array
1225             my @result = filterXMLDoc( $xmldoc, '/books/book[subject="camping"]' );
1226             # $result is:
1227             # ( { title => "campfires", subject => "camping" },
1228             # { title => "tents", subject => "camping" } )
1229              
1230             =back
1231              
1232             my $result = filterXMLDoc( $XMLDocument , $XMLPath );
1233             my @result = filterXMLDoc( $XMLDocument , $XMLPath );
1234              
1235             =back
1236              
1237             =cut
1238              
1239             sub filterXMLDoc (@) {
1240 5 100 100 5 1 12175 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
1241 5 50       13 unless (@_ >= 2) { carp 'method filterXMLDoc($$) requires two arguments.'; return undef; }
  0         0  
  0         0  
1242 5   33     11 my $tree = shift || (carp 'filterXMLDoc($$) requires two arguments.' && return undef);
1243 5   33     11 my $path = shift || (carp 'filterXMLDoc($$) requires two arguments.' && return undef);
1244 5         7 my %options = @_; # Additional optional options:
1245             # structure => TargetRaw | RootMAP | ParentMAP
1246 5 50       13 my $o_structure = $options{'structure'} ? $options{'structure'} : "TargetRaw";
1247 5         5 my ($tpp,$xtree,$xpath,$xml_text_id,$xml_attr_id);
1248              
1249 5         5 local $Data::Dumper::Indent = 0;
1250 5         7 local $Data::Dumper::Purity = 1;
1251 5         6 local $Data::Dumper::Terse = 1;
1252              
1253 5 50 66     45 if ((defined $self) && (defined $self->get('tpp'))) {
1254 0 0       0 $tpp = $self ? $self->tpp() : tpp();
1255 0   0     0 $xml_text_id = $tpp->get( 'text_node_key' ) || '#text';
1256 0   0     0 $xml_attr_id = $tpp->get( 'attr_prefix' ) || '-';
1257             } else {
1258 5         4 $xml_text_id = '#text';
1259 5         6 $xml_attr_id = '-';
1260             }
1261 5 50       11 if (ref $tree) { $xtree = $tree;
  5 0       24  
1262             }
1263             elsif (!defined $tree)
1264 0         0 { $xtree = undef
1265             }
1266 0 0       0 else { if (!defined $tpp) { $tpp = $self ? $self->tpp() : tpp(); }
  0 0       0  
1267 0 0       0 $xtree = $tpp->parse($tree) if defined $tree;
1268             }
1269 5 100       7 if (ref $path) { $xpath = eval (Dumper($path)); # make a copy of inputted parsed XMLPath
  1         5  
1270             }
1271 4         7 else { $xpath = parseXMLPath($path);
1272             }
1273              
1274             # This is used on the lowest level of an element, and is the
1275             # execution of our rules for matching or validating a value
1276             my $validateFilter = sub (@) {
1277 0     0   0 my %args = @_;
1278 0 0       0 print ("="x8,"sub:filterXMLDoc|validateFilter->()\n") if $DEBUG >= $DEBUGMETHOD;
1279 0 0       0 print (" "x8,"= attempting to validate filter, with: ", Dumper(\%args) ,"\n") if $DEBUG >= $DEBUGDUMP;
1280             # we accept:
1281             # - required: node,param,comparevalue ; optional: operand(=) (= is default)
1282             # not accepted: - required: node,param,operand(exists)
1283 0 0 0     0 return 0 if !exists $args{'node'} || !exists $args{'comparevalue'};
1284             # Node possibilities this method is expecting to see:
1285             # VALUE: 'Henry' -asin-> { people => { person => 'Henry' } }
1286             # VALUE: [ 'Henry', 'Sally' ] -asin-> { people => { person => [ 'Henry', 'Sally' ] } }
1287             # VALUE: { id => 45, #text => 'Henry' } -asin-> { people => { person => { id => 45, #text => 'Henry' } } }
1288             # Also, comparevalue could be '', an empty string
1289             # comparevalue of undef is attempted to be matched here, because operand defaults to "eq" or "="
1290 0 0       0 if (ref $args{'node'} eq "HASH") {
    0          
    0          
1291 0 0       0 if (exists $args{'node'}->{$xml_text_id}) {
1292 0 0 0     0 return 1 if defined $args{'node'}->{$xml_text_id} && defined $args{'comparevalue'} && $args{'node'}->{$xml_text_id} eq "" && $args{'comparevalue'} eq "";
      0        
      0        
1293 0 0 0     0 return 1 if !defined $args{'node'}->{$xml_text_id} && !defined $args{'comparevalue'};
1294 0 0       0 return 1 if $args{'node'}->{$xml_text_id} eq $args{'comparevalue'};
1295             }
1296             } elsif (ref $args{'node'} eq "ARRAY") {
1297 0         0 foreach my $value (@{$args{'node'}}) {
  0         0  
1298 0 0       0 if (ref $value eq "HASH") {
1299 0 0       0 if (exists $value->{$xml_text_id}) {
1300 0 0 0     0 return 1 if defined $value->{$xml_text_id} && defined $args{'comparevalue'} && $value->{$xml_text_id} eq "" && $args{'comparevalue'} eq "";
      0        
      0        
1301 0 0 0     0 return 1 if !defined $value->{$xml_text_id} && !defined $args{'comparevalue'};
1302 0 0       0 return 1 if $value->{$xml_text_id} eq $args{'comparevalue'};
1303             }
1304             } else {
1305 0 0 0     0 return 1 if defined $value && defined $args{'comparevalue'} && $value eq "" && $args{'comparevalue'} eq "";
      0        
      0        
1306 0 0 0     0 return 1 if !defined $value && !defined $args{'comparevalue'};
1307 0 0       0 return 1 if $value eq $args{'comparevalue'};
1308             }
1309             }
1310             } elsif (ref $args{'node'} eq "SCALAR") { # not likely -asin-> { people => { person => \$value } }
1311 0 0 0     0 return 1 if defined ${$args{'node'}} && defined $args{'comparevalue'} && ${$args{'node'}} eq "" && $args{'comparevalue'} eq "";
  0   0     0  
  0   0     0  
1312 0 0 0     0 return 1 if !defined ${$args{'node'}} && !defined $args{'comparevalue'};
  0         0  
1313 0 0       0 return 1 if ${$args{'node'}} eq $args{'comparevalue'};
  0         0  
1314             } else { # $node =~ /\w/
1315 0 0 0     0 return 1 if defined $args{'node'} && defined $args{'comparevalue'} && $args{'node'} eq "" && $args{'comparevalue'} eq "";
      0        
      0        
1316 0 0 0     0 return 1 if !defined $args{'node'} && !defined $args{'comparevalue'};
1317 0 0       0 return 1 if $args{'node'} eq $args{'comparevalue'};
1318             }
1319 0         0 return 0;
1320 5         32 }; #end validateFilter->();
1321              
1322             my $extractFilterPosition = sub (@) {
1323 16     16   12 my $filters = shift;
1324 16         16 my $position = undef;
1325             # Process the first filter, if it exists, for positional testing
1326             # If a positional argument is given, shift to the item located at
1327             # that position
1328             # Yes, this does mean the positional argument must be the first filter.
1329             # But then again, this would not make clear sense: /books/book[author="smith"][5]
1330             # And this path makes more clear sense: /books/book[5][author="smith"]
1331 16 50 66     62 if ( (defined $filters) &&
      66        
      33        
      33        
1332             (defined $filters->[0]) &&
1333             ($filters->[0]->[0] =~ /^\d*$/) &&
1334             (! defined $filters->[0]->[1]) &&
1335             ($filters->[0]->[0] >= 1) ) {
1336 1 50       8 print (" "x12,"= processing list position filter. Extracting first filter.\n") if $DEBUG >= $DEBUGFILTER;
1337 1         2 my $lpos = shift @{$filters}; # This also deletes the positional filter from passed in filter REF
  1         2  
1338 1         2 $position = $lpos->[0]; # if $lpos >= 1;
1339             }
1340 16 100 66     34 return $position if defined $position && $position >= 1;
1341 15         18 return undef;
1342 5         25 };
1343              
1344             # So what do we support as filters
1345             # /books/book[@id="value"] # attribute eq value
1346             # /books/book[title="value"] # element eq value
1347             # /books/book[@type] # Attribute exists
1348             # /books/book[author] # element exists
1349             # Not yet: /books/book[publisher/address/city="value"] # sub/child element eq value
1350             # And what are some of the things we do not support
1351             # /books/book[publisher/address[country="US"]/city="value"] # sub/child element eq value based on another filter
1352             # /books/book[5][./title=../book[4]/title] # comparing the values of two elements
1353             my $processFilters = sub ($$) {
1354 31 50   31   53 print ("="x8,"sub:filterXMLDoc|processFilters->()\n") if $DEBUG >= $DEBUGMETHOD;
1355 31         33 my $xmltree_child = shift;
1356 31         30 my $filters = shift;
1357 31 50       52 print ("++++ELEMENT:".Dumper($xmltree_child)."\n") if $DEBUG >= $DEBUGDUMP;
1358 31 50       42 print ("++++ FILTER:".Dumper($filters)."\n") if $DEBUG >= $DEBUGDUMP;
1359 31         26 my $filters_processed_count = 0; # Will catch a filters error of [[][][]] or something
1360 31         26 my $param_match_flag = 0;
1361 31 50 66     67 if ((!defined $filters) || (@{$filters} == 0)) {
  1         4  
1362             # If !defined $filters or if $filters = []
1363 31         68 return $xmltree_child;
1364             }
1365 0         0 FILTER: foreach my $filter (@{$filters}) {
  0         0  
1366 0 0       0 next if !defined $filter; # if we get empty filters;
1367 0         0 $filters_processed_count++;
1368              
1369 0         0 my $param = $filter->[0];
1370 0         0 my $value = $filter->[1];
1371 0 0       0 print (" "x8,"= processing filter: " . $param) if $DEBUG >= $DEBUGFILTER;
1372 0 0 0     0 print (" , " . $value) if defined $value && $DEBUG >= $DEBUGFILTER;
1373 0 0       0 print ("\n") if $DEBUG >= $DEBUGFILTER;
1374              
1375             # attribute/element exists filter
1376             # deal with special #text/$xml_text_id element
1377 0 0 0     0 if (ref $xmltree_child eq "HASH") {
    0 0        
1378 0 0 0     0 if (($param ne ".") && (! exists $xmltree_child->{$param})) {
    0 0        
      0        
1379 0         0 $param_match_flag = 0;
1380 0         0 last FILTER;
1381             } elsif ((($param eq ".") || (exists $xmltree_child->{$param})) && (! defined $value)) {
1382             # NOTE, maybe filter needs to be [['attr'],['attr','val']] for this one
1383 0         0 $param_match_flag = 1;
1384 0         0 next FILTER;
1385             }
1386             } elsif ( ($param eq $xml_text_id)
1387             && (($xmltree_child =~ /\w+/) || ((ref $xmltree_child eq "SCALAR") && (${$xmltree_child} =~ /\w+/)))) {
1388 0         0 $param_match_flag = 1;
1389 0         0 next FILTER;
1390             } else {
1391             # else ref $xmltree_child eq "ARRAY" or "BLOB" or something
1392 0         0 $param_match_flag = 0;
1393 0         0 last FILTER;
1394             }
1395              
1396 0 0       0 print (" "x12,"= about to validate filter.\n") if $DEBUG >= $DEBUGFILTER;
1397 0 0 0     0 if ( ($param ne ".") &&
    0 0        
1398             ($validateFilter->( node => $xmltree_child->{$param},
1399             operand => '=',
1400             comparevalue => $value))
1401             ) {
1402 0 0       0 print (" "x12,"= validated filter.\n") if $DEBUG >= $DEBUGFILTER;
1403 0         0 $param_match_flag = 1;
1404 0         0 next FILTER;
1405             } elsif (($param eq ".") &&
1406             ($validateFilter->( node => $xmltree_child,
1407             operand => '=',
1408             comparevalue => $value))
1409             ) {
1410 0 0       0 print (" "x12,"= validated filter.\n") if $DEBUG >= $DEBUGFILTER;
1411 0         0 $param_match_flag = 1;
1412 0         0 next FILTER;
1413             } else {
1414 0 0       0 print (" "x12,"= unvalidated filter.\n") if $DEBUG >= $DEBUGFILTER;
1415 0         0 $param_match_flag = 0;
1416 0         0 last FILTER;
1417             }
1418              
1419             # Examples of what $xmltree_child->{$param} can be
1420             # (Perhaps this info should be bundled with $validateFilter->() method)
1421             # 1. A SCALAR ref will probably never occur
1422             # 2a. An ARRAY ref of strings
1423             # PATH: /people[person='Henry']
1424             # XML: HenrySally
1425             # PARSED: { people => { person => [ 'Henry', 'Sally' ] } }
1426             # 2b. or ARRAY ref or HASH refs
1427             # XML: HenrySally
1428             # PARSED: { people => { person => [ { id => 1, #text => 'Henry' }, { id => 2, #text => 'Sally' } ] } }
1429             # 3. A HASH when in cases like this:
1430             # PATH: /people/person[@id=45]
1431             # XML: Henry
1432             # PARSED: { people => { person => { id => 45, #text => 'Henry' } } }
1433             # 4. The most likely encounter of plain old text/string values
1434             # PATH: /people/person
1435             # XML: Henry
1436             # PARSED: { people => { person => 'Henry' } }
1437              
1438             } #end FILTER
1439 0 0       0 if ($filters_processed_count == 0) {
    0          
1440             # there was some unusual error which caused a lot of undef filters
1441             # And as such, $param_match_flag will be 0
1442             # we return the entire tree as valid
1443 0         0 return $xmltree_child;
1444             } elsif ($param_match_flag == 0) {
1445             # filters were processed, but there was no matches
1446             # we return undef because nothing validated
1447 0         0 return undef;
1448             } else {
1449 0         0 return $xmltree_child;
1450             }
1451 5         27 }; #end processFilters->()
1452              
1453             # mapAssemble(), mapChildExists() and mapTran() are utilized for the ParentMap and RootMap options
1454 5     0   17 my $mapAssemble = sub (@) {};
  0         0  
1455             $mapAssemble = sub (@) {
1456 0     0   0 my $mapObj = shift;
1457 0   0     0 my $rootpath = $_[0] || $mapObj->{'path'} || '/';
1458 0 0       0 $rootpath .= '/' if $rootpath !~ /\/$/;
1459 0         0 my @paths;
1460 0         0 foreach my $child (@{$mapObj->{'child'}}) {
  0         0  
1461 0         0 my $tmppath .= ($rootpath.$child->{'name'}."[".$child->{'position'}."]");
1462 0 0       0 if (exists $child->{'child'}) {
    0          
1463 0         0 my $rpaths = $mapAssemble->($child,$tmppath);
1464 0 0       0 if (ref($rpaths) eq "ARRAY") {
1465 0         0 push(@paths,@{$rpaths});
  0         0  
1466             } else {
1467 0         0 push(@paths,$rpaths);
1468             }
1469             } elsif (exists $child->{'target'}) {
1470 0 0       0 if (defined $child->{'target'}) {
1471 0         0 $tmppath .= ("/".$child->{'target'});
1472             }
1473 0         0 push(@paths,$tmppath);
1474             }
1475             }
1476 0         0 return \@paths;
1477 5         18 };
1478             # mapAssemble(), mapChildExists() and mapTran() are utilized for the ParentMap and RootMap options
1479             my $mapChildExists = sub (@) {
1480 21     21   20 my $mapObj = shift;
1481 21         17 my $child = shift;
1482 21         20 foreach my $cmap (@{$mapObj->{'child'}}) {
  21         33  
1483 36 50 33     154 if ( ($cmap->{'name'} eq $child->{'name'})
1484             && ($cmap->{'position'} eq $child->{'position'})) {
1485 0         0 return $cmap;
1486             }
1487             }
1488 21         53 return 0;
1489 5         18 };
1490             # mapAssemble(), mapChildExists() and mapTran() are utilized for the ParentMap and RootMap options
1491 5     0   14 my $mapTran = sub (@) {};
  0         0  
1492             $mapTran = sub (@) {
1493 26     26   29 my $mapObj = shift;
1494 26         47 my %args = @_; # action => new | assemble | child => { name => S, position => #, target => S }
1495 26 100       41 if (! defined $mapObj) {
1496 5 50 33     16 if ((exists $args{'action'}) && ($args{'action'} eq "new")) {
1497 0         0 $mapObj = {};
1498 0         0 return ($mapObj);
1499             } else {
1500 5         22 return undef;
1501             }
1502             }
1503 21 50 33     101 if ((exists $args{'action'}) && ($args{'action'} eq "childcount")) {
1504 0 0       0 return (ref($mapObj->{'child'}) eq "ARRAY") ? @{$mapObj->{'child'}} : 0;
  0         0  
1505             }
1506 21 50       32 if (exists $args{'child'}) { # && (exists $args{'child'}->{'name'}) && (exists $args{'child'}->{'position'})) {
1507 21 100       48 $mapObj->{'child'} = [] if ref($mapObj->{'child'}) ne "ARRAY";
1508 21         23 my $newchild = $args{'child'};
1509 21 50       30 if (my $cmap = $mapChildExists->($mapObj,$newchild)) {
1510             # If the child already exists, try to merge the two childs.
1511             # merging will attempt to add the child's child(s) to the mapObj's child if the child's child(s) do not already exist.
1512 0 0       0 if (ref($newchild->{'child'}) eq "ARRAY") {
1513 0         0 foreach my $nc_child (@{$newchild->{'child'}}) {
  0         0  
1514 0 0       0 if ($mapTran->($cmap, child => $nc_child)) {
1515 0         0 return $newchild;
1516             }
1517             }
1518             } else {
1519 0         0 return undef;
1520             }
1521             } else {
1522             # Add the child if it does not already exist
1523 21         17 push (@{$mapObj->{'child'}}, $newchild);
  21         31  
1524 21         70 return $newchild;
1525             }
1526             }
1527 0 0 0     0 if ((exists $args{'action'}) && ($args{'action'} eq "assemble")) {
1528 0         0 return $mapAssemble->("",$mapObj);
1529             }
1530 5         23 };
1531              
1532             # whatisnode() looks at the nodename to determine what it is
1533             my $whatisnode = sub ($) {
1534 16     16   17 my $nodename = shift;
1535 16 50       21 return undef if ref($nodename);
1536 16 50       27 return "text" if $nodename eq $xml_text_id;
1537 16 100       73 return "attribute" if $nodename =~ /^$xml_attr_id\w+$/;
1538 11 50       19 return "parent" if $nodename eq '..';
1539 11 50       18 return "current" if $nodename eq '.';
1540 11         18 return "element";
1541 5         17 };
1542              
1543             # bctrail() is the breadcrumb trail, so we can find our way back to the root node
1544             my $bctrail = sub (@) {
1545 42   50 42   74 my $bcobj = shift || return undef;
1546 42   50     67 my $action = shift || return undef;
1547 42 100       91 if ($action eq "addnode") {
    100          
    50          
    0          
1548 11         7 push(@{$bcobj},@_);
  11         32  
1549 11         16 return 1;
1550             } elsif ($action eq "poplast") {
1551 11         9 my $j = pop(@{$bcobj});
  11         13  
1552 11         19 return $j;
1553             } elsif ($action eq "clone") {
1554 20         17 my @clone;
1555 20         19 foreach my $noderef (@{$bcobj}) {
  20         29  
1556 40         57 push(@clone,$noderef);
1557             }
1558 20         68 return \@clone;
1559             } elsif ($action eq "length") {
1560 0         0 my $num = @{$bcobj};
  0         0  
1561 0         0 return $num;
1562             }
1563 0         0 return undef;
1564 5         16 };
1565              
1566             # find() is the primary searching function
1567 5     0   16 my $find = sub (@) {};
  0         0  
1568             $find = sub (@) {
1569 32     32   34 my $xmltree = shift; # The parsed XML::TreePP tree
1570 32         28 my $xmlpath = shift; # The parsed XML::TreePP::XMLPath path
1571 32   100     55 my $thisnodemap = shift || undef;
1572 32   100     66 my $breadcrumb = shift || [];
1573 32 50       48 print ("="x8,"sub::filterXMLDoc|_find()\n") if $DEBUG >= $DEBUGMETHOD;
1574 32 50       48 print (" "x7,"=attempting to find path: ", Dumper($xmlpath) ,"\n") if $DEBUG >= $DEBUGDUMP;
1575 32 50       47 print (" "x7,"=attempting to search in: ", Dumper($xmltree) ,"\n") if $DEBUG >= $DEBUGDUMP;
1576 32 50 33     51 if (($DEBUG >= 1) && ($DEBUG <= 5)) {
1577 0         0 print ( "-"x11 . "# Descending in search with criteria: " . "\n");
1578 0         0 print ( Dumper({ nodemap => $thisnodemap }) . "\n");
1579 0         0 print ( Dumper({ xmlpath => $xmlpath }) . "\n");
1580 0         0 print ( Dumper({ xmlfragment => $xmltree }) . "\n");
1581             }
1582              
1583 32         25 my (@found,@maps);
1584             #print (" "x8, "searching begins on node with nodemap:", Dumper ($thisnodemap) if $DEBUG > 5;
1585             # If there are no more path to analyze, return
1586 32 100 66     74 if ((ref($xmlpath) ne "ARRAY") || (! @{$xmlpath} >= 1)) {
  32         97  
1587 16 50       23 print (" "x12,"= end of path reached\n") if $DEBUG >= $DEBUGPATH;
1588             # FOUND: XMLPath is satisfied, Return $xmltree as a found target
1589 16         22 $thisnodemap->{'target'} = undef;
1590 16         27 push(@found, $xmltree);
1591             }
1592              
1593             # Otherwise, we have more path to analyze - @{$xmlpath} is >= 1
1594              
1595 32 100       64 if (@found == 0) {
1596 16 100       52 if (! ref($xmltree)) {
    50          
    50          
1597 1 50       3 print ("-"x12,"= search tree is TEXT (non-REF)\n") if $DEBUG >= $DEBUGPATH;
1598             # This should almost always return undef
1599             # The only exception is if $element eq '.', as in "/path/to/element/."
1600              
1601 1         2 my $path_element = shift @{$xmlpath};
  1         2  
1602 1         2 my $element = shift @{$path_element};
  1         2  
1603 1         1 my $filters = shift @{$path_element};
  1         2  
1604              
1605 1         3 my $elementposition = $extractFilterPosition->($filters);
1606 1 50 33     8 if ( (($element =~ /\w+/) && ($element ne '.')) || ((defined $elementposition) && (! $elementposition >= 2)) ) {
      0        
      33        
1607 1         3 return undef;
1608             }
1609 0 0       0 if (@{$xmlpath} >= 1) {
  0         0  
1610 0         0 return undef;
1611             }
1612              
1613 0 0 0     0 if ( ((!defined $filters) || (@{$filters} < 1))
  0   0     0  
1614             || ( defined $processFilters->($xmltree,$filters) ) ) {
1615 0         0 push(@found,$xmltree);
1616             }
1617             } elsif (ref $xmltree eq "ARRAY") {
1618 0 0       0 print ("-"x12,"= search tree is ARRAY\n") if $DEBUG >= $DEBUGPATH;
1619             # If $xmltree is an array, and not a HASH, then we are not searching
1620             # an XML::TreePP parsed XML Document, so we just keep descending
1621             # Instead, this tree might look something like:
1622             # { parent=>[ {child1=> CDATA},{child2=>[["v1","v2","v3"],["vA","vB","vC"]]} ] }
1623             # A normal expected XML::TreePP tree will not have arrays of arrays
1624 0         0 foreach my $singlexmltree (@{$xmltree}) {
  0         0  
1625 0         0 my $bc_clone = $bctrail->($breadcrumb,"clone"); # do not addnode $xmltree, because ref $xmltree eq ARRAY
1626 0         0 my $result = $find->($singlexmltree,$xmlpath,$thisnodemap,$bc_clone);
1627 0 0       0 next unless defined $result;
1628 0 0       0 push(@found,@{$result}) if ref($result) eq "ARRAY";
  0         0  
1629 0 0       0 push(@found,$result) if ref($result) ne "ARRAY";
1630             }
1631             } elsif (ref $xmltree eq "HASH") {
1632 15 50       35 print ("-"x12,"= search tree is HASH\n") if $DEBUG >= $DEBUGPATH;
1633             # Pretty much all the searching is done here
1634              
1635 15         9 my $path_element = shift @{$xmlpath};
  15         27  
1636 15         15 my $element = shift @{$path_element};
  15         26  
1637 15         49 my $filters = shift @{$path_element};
  15         19  
1638              
1639 15         25 my $elementposition = $extractFilterPosition->($filters);
1640              
1641 15         13 my $result;
1642 15         24 my $nodetype = $whatisnode->($element);
1643 15 50 33     96 if ($nodetype eq "text") {
    100          
    50          
    50          
    50          
    50          
1644 0 0       0 print ("-"x12,"= search tree node (".$element.") is text\n") if $DEBUG >= $DEBUGNODE;
1645             # Filters are not allowed in text elements directly
1646             # Alt is to give: '/path/to/sub[#text="my value"]/#text
1647             # However, perhaps we should allow: '/path/to/sub/#text[.="my value"]
1648 0 0       0 return undef if (@{$xmlpath} >= 1); # Cannot descend as path dictates, so no match
  0         0  
1649 0 0 0     0 return undef if defined $elementposition && $elementposition >= 2; # There is only one child node
1650 0 0       0 print (" "x8,"= end of path reached with text CDATA\n") if $DEBUG > 1;
1651 0 0       0 if ( defined $processFilters->($xmltree->{$element},$filters) ) {
1652 0         0 $thisnodemap->{'target'} = $element; # $element eq '#text'
1653 0         0 $result = $xmltree->{$element};
1654             } else {
1655 0 0       0 print ("-"x12,"= node (text) did not pass filters.\n") if $DEBUG >= $DEBUGNODE;
1656 0         0 return undef;
1657             }
1658             } elsif ($nodetype eq "attribute") {
1659 4 50       9 print ("-"x12,"= search tree node (".$element.") is sttribute\n") if $DEBUG >= $DEBUGNODE;
1660             # Filters are not allowed on attribute elements directly
1661             # Alt is to give: '/path/to/sub[@attrname="my value"]/@attrname
1662             # However, perhaps we should allow: '/path/to/sub/@attrname[.="my value"]
1663 4 50       3 return undef if (@{$xmlpath} >= 1); # Cannot descend as path dictates, so no match
  4         10  
1664 4 50 33     10 return undef if defined $elementposition && $elementposition >= 2; # There is only one child node
1665 4 50       9 print (" "x8,"= end of path reached with attribute\n") if $DEBUG >= $DEBUGPATH;
1666 4 50       9 if ( defined $processFilters->($xmltree->{$element},$filters) ) {
1667 4         6 $thisnodemap->{'target'} = $element;
1668 4         6 $result = $xmltree->{$element};
1669             } else {
1670 0 0       0 print ("-"x12,"= node (attribute) did not pass filters.\n") if $DEBUG >= $DEBUGNODE;
1671 0         0 return undef;
1672             }
1673             } elsif (($nodetype eq "element") && (! ref($xmltree->{$element})) ) {
1674 0 0       0 print ("-"x12,"= search tree node (".$element.") is element with text CDATA\n") if $DEBUG >= $DEBUGNODE;
1675             # Here must take care of matching the abscence of #text
1676             # eg: /path/to/element == /path/to/element/#text if element =~ /\w+/
1677 0 0 0     0 unless ( (defined $xmltree->{$element}) && ($xmltree->{$element} =~ /\w+/)
  0   0     0  
      0        
      0        
      0        
1678             && ( ((ref($xmlpath) eq "ARRAY") && (@{$xmlpath} == 1))
1679             && ($whatisnode->($xmlpath->[0]->[0]) eq "text")
1680             && (defined $processFilters->($xmltree->{$element},$xmlpath->[0]->[1])) ) ) {
1681 0 0       0 return undef if (@{$xmlpath} >= 1); # Cannot descend as path dictates, so no match
  0         0  
1682 0 0 0     0 return undef if defined $elementposition && $elementposition >= 2; # There is only one child node
1683             }
1684 0 0       0 print ("-"x16,"60= nodetype is element with text on final path\n") if $DEBUG >= $DEBUGNODE;
1685 0 0       0 if ( defined $processFilters->($xmltree->{$element},$filters) ) {
1686 0         0 my $childmap = { name => $element, position => 1, target => undef };
1687 0         0 $mapTran->($thisnodemap, child => $childmap );
1688 0         0 $result = $xmltree->{$element};
1689             } else {
1690 0 0       0 print ("-"x16,"= node (element) did not pass filters.\n") if $DEBUG >= $DEBUGNODE;
1691 0         0 return undef;
1692             }
1693             } elsif ($nodetype eq "parent") {
1694 0 0       0 print ("-"x12,"= search tree node (".$element.") is parent node\n") if $DEBUG >= $DEBUGNODE;
1695 0 0 0     0 return undef if defined $elementposition && $elementposition >= 2; # This is not supported, as parent (..) is the parent hash, not array
1696 0         0 my $crumb = $bctrail->($breadcrumb,"poplast"); # get the parent from the end of the breadcrumb trail
1697 0         0 my $parentnode = $crumb->[0];
1698 0         0 my $parentmap = $crumb->[1];
1699 0 0       0 return undef unless defined $parentnode;
1700 0 0       0 if ( defined $processFilters->($parentnode,$filters) ) {
1701             # If there were no filters, path was something like '/path/to/../element'
1702             # If there were filters, path was something like '/path/to/..[filter]/element'
1703 0         0 $result = $find->($parentnode,$xmlpath,$parentmap,$breadcrumb);
1704             } else {
1705 0 0       0 print ("-"x16,"= node (parent) did not pass filters.\n") if $DEBUG >= $DEBUGNODE;
1706 0         0 return undef;
1707             }
1708             } elsif ($nodetype eq "current") {
1709 0 0       0 print ("-"x12,"= search tree node (".$element.") is current node\n") if $DEBUG >= $DEBUGNODE;
1710 0 0 0     0 return undef if defined $elementposition && $elementposition >= 2; # The current node is always a hash
1711 0 0       0 if ( defined $processFilters->($xmltree,$filters) ) {
1712             # If there were no filters, path was something like '/path/to/./element'
1713             # If there were filters, path was something like '/path/to/.[filter]/element'
1714 0         0 $result = $find->($xmltree,$xmlpath,$thisnodemap,$breadcrumb);
1715             } else {
1716 0 0       0 print ("-"x16,"= node (current) did not pass filters.\n") if $DEBUG >= $DEBUGNODE;
1717 0         0 return undef;
1718             }
1719             } elsif ($nodetype eq "element") {
1720 11 50       18 print ("-"x12,"= search tree node (".$element.") is element with REF\n") if $DEBUG >= $DEBUGNODE;
1721 11 100 66     51 if ( ref($xmltree->{$element}) eq "HASH" ) {
    100 66        
    50          
1722 6 50       10 print ("-"x16,"= search tree node (".$element.") is element with REF HASH\n") if $DEBUG >= $DEBUGNODE;
1723 6 50 33     14 return undef if defined $elementposition && $elementposition >= 2; # There is only one child node, as a hash
1724 6 50       12 if ( defined $processFilters->($xmltree->{$element},$filters) ) {
1725 6         17 my $childmap = { name => $element, position => 1 };
1726 6         14 $bctrail->($breadcrumb,"addnode",[$xmltree,$thisnodemap]);
1727 6         92 $result = $find->($xmltree->{$element},$xmlpath,$childmap,$breadcrumb);
1728 6         8 $bctrail->($breadcrumb,"poplast");
1729 6 50       10 if (defined $result) {
1730 6         9 $mapTran->($thisnodemap, child => $childmap );
1731             }
1732             } else {
1733 0 0       0 print ("-"x16,"= node (element[hash]) did not pass filters.\n") if $DEBUG >= $DEBUGNODE;
1734 0         0 return undef;
1735             }
1736             } elsif (( ref($xmltree->{$element}) eq "ARRAY" ) && (defined $elementposition) && ($elementposition >= 1)) {
1737 1 50       3 print ("-"x16,"= search tree node (".$element.") is element with REF ARRAY position $elementposition\n") if $DEBUG >= $DEBUGNODE;
1738 1 50       5 if ( defined $processFilters->($xmltree->{$element},$filters) ) {
1739 1         9 my $childmap = { name => $element, position => $elementposition };
1740 1         3 $bctrail->($breadcrumb,"addnode",[$xmltree,$thisnodemap]);
1741 1         19 $result = $find->($xmltree->{$element}->[($elementposition - 1)],$xmlpath,$childmap,$breadcrumb);
1742 1         3 $bctrail->($breadcrumb,"poplast");
1743 1 50       3 if (defined $result) {
1744 1         2 $mapTran->($thisnodemap, child => $childmap );
1745             }
1746             } else {
1747 0 0       0 print ("-"x16,"= node (element[array]) did not pass filters.\n") if $DEBUG >= $DEBUGNODE;
1748 0         0 return undef;
1749             }
1750             } elsif ( ref($xmltree->{$element}) eq "ARRAY" ) {
1751 4 50       8 print ("-"x16,"= search tree node (".$element.") is element with REF ARRAY\n") if $DEBUG >= $DEBUGNODE;
1752 4         5 my $xmlpos = 0;
1753 4         10 $bctrail->($breadcrumb,"addnode",[$xmltree,$thisnodemap]);
1754 4         3 foreach my $sub (@{$xmltree->{$element}}) {
  4         14  
1755             # print (" "x20, "filtering child node:", Dumper({ sub => $sub, target => $xmlpath->[0]->[0] }) if $DEBUG > 5;
1756 20         17 $xmlpos++;
1757 20         19 my ($mresult,$childmap);
1758 20         43 my $tmpfilters = eval( Dumper($filters) );
1759 20         126 my $tmpxmlpath = eval( Dumper($xmlpath) );
1760 20         104 my ($bc_clone);
1761 20 50 66     109 if ( ((!ref($sub)) && ($sub =~ /\w+/))
  4 50 66     27  
      66        
      33        
      66        
1762             && ( ((ref($xmlpath) eq "ARRAY") && (@{$xmlpath} == 1))
1763             && ($whatisnode->($xmlpath->[0]->[0]) eq "text")
1764             && (defined $processFilters->($sub,$tmpxmlpath->[0]->[1])) ) ) {
1765 0         0 $childmap = { name => $element, position => $xmlpos, target => undef };
1766 0         0 $mresult = $xmltree->{$element}->[($xmlpos - 1)];
1767             } elsif ( defined $processFilters->($sub,$tmpfilters) ) {
1768 20 50       34 print ("-"x16,"= node at position ".$xmlpos." passed filters.\n") if $DEBUG >= $DEBUGNODE;
1769 20         48 $childmap = { name => $element, position => $xmlpos };
1770 20         30 $bc_clone = $bctrail->($breadcrumb,"clone");
1771 20         75 $mresult = $find->($sub,$tmpxmlpath,$childmap,$bc_clone);
1772             } else {
1773 0 0       0 print ("-"x16,"= node (element) at position ".$xmlpos." did not pass filters.\n") if $DEBUG >= $DEBUGNODE;
1774 0         0 next;
1775             }
1776 20 100       40 if (defined $mresult) {
1777 19 50       34 push(@{$result},@{$mresult}) if ref($mresult) eq "ARRAY";
  19         23  
  19         26  
1778 19 50       32 push(@{$result},$mresult) if ref($mresult) ne "ARRAY";
  0         0  
1779 19         31 $mapTran->($thisnodemap, child => $childmap );
1780             }
1781             }
1782 4         10 $bctrail->($breadcrumb,"poplast");
1783             } else {
1784 0 0       0 print ("-"x12,"= search tree node (".$element.") is element with REF but is not REF ARRAY or HASH\n") if $DEBUG >= $DEBUGNODE;
1785             }
1786             }
1787              
1788 15 100       35 if (ref($result) eq "ARRAY") {
1789 11 50       9 push(@found,@{$result}) unless @{$result} == 0;
  11         29  
  11         24  
1790             } else {
1791 4 50       13 push(@found,$result) unless !defined $result;
1792             }
1793             }
1794             }
1795             #print (" "x8, "searching ended on node with nodemap:", Dumper ($thisnodemap) if $DEBUG > 5;
1796 31 50       69 return undef if @found == 0;
1797 31         65 return \@found;
1798 5         115 }; # end find->()
1799              
1800             # pathsplit() takes a parsed XML::TreePP::XMLPath, and splits it into two
1801             # XML::TreePP::XMLPath paths. The path to the parent node and the path to
1802             # the child node. The child XML::TreePP::XMLPath is the path to the child
1803             # node plus the target, and including any filters.
1804             # ( $parent, ($child ."/". $target) ) = $pathsplit->(parsed XML::TreePP::XMLPath)
1805             my $pathsplit = sub ($) {
1806 0     0   0 my $parent_path = shift;
1807 0         0 $parent_path = eval(Dumper($parent_path));
1808 0         0 my ($child_path,$string_element); # string_element is #text or @attribute if exists in path
1809 0 0 0     0 if ( ($whatisnode->($parent_path->[ (@{$parent_path}-1) ]->[0]) eq "text")
  0         0  
  0         0  
1810             || ($whatisnode->($parent_path->[ (@{$parent_path}-1) ]->[0]) eq "attribute") ) {
1811 0         0 unshift( @{$child_path}, pop @{$parent_path} ); # $parent_path becomes just the without #text/@attr
  0         0  
  0         0  
1812 0         0 unshift( @{$child_path}, pop @{$parent_path} ); # $parent_path becomes the parent
  0         0  
  0         0  
1813             } else {
1814             # whatis eq element
1815 0         0 unshift( @{$child_path}, pop @{$parent_path} ); # $parent_path becomes the parent
  0         0  
  0         0  
1816             }
1817 0         0 return ($parent_path,$child_path);
1818 5         28 };
1819              
1820             # structure => TargetRaw | RootMAP | ParentMAP
1821 5         6 my ($found,$thismap);
1822 5 50       20 if ($o_structure =~ /^RootMap$/i) {
    50          
1823 0         0 $thismap = { root => $xtree, path => '/' }; # the root map
1824 0 0 0     0 if (($DEBUG >= 1) && ($DEBUG <= 5)) {
1825 0         0 print ("-"x11,"# Searching for the path within the root node." . "\n");
1826             }
1827 0         0 $found = $find->($xtree,$xpath,$thismap); # results from searching root
1828 0         0 $thismap->{'target'} = $mapAssemble->($thismap); # assemble the absolute XMLPaths to all targets
1829 0 0       0 return undef if ! defined $thismap;
1830             } elsif ($o_structure =~ /^ParentMap$/i) {
1831 0         0 my ($p_xpath,$c_xpath) = $pathsplit->($xpath); # split XMLPath into parent node and child node paths
1832 0         0 my $rootmap = { root => $xtree, path => '/' }; # the root map
1833 0 0 0     0 if (($DEBUG >= 1) && ($DEBUG <= 5)) {
1834 0         0 print ("-"x11,"# Searching for the parent path within the root node." . "\n");
1835             }
1836 0         0 my $p_found = $find->($xtree,$p_xpath,$rootmap); # results from searching root
1837 0         0 my $p_path = $mapAssemble->($rootmap); # assemble the absolute XMLPaths to all targets to parent nodes
1838 0         0 foreach my $p_xtree (@{$p_found}) { # search each parent xml document fragment for its child nodes
  0         0  
1839 0         0 my $parentmap = { root => $p_xtree, path => (shift(@{$p_path})) }; # create the map for the parent node
  0         0  
1840 0         0 my $tmpc_xpath = eval(Dumper($c_xpath)); # make a copy of the child XMLPath
1841 0 0 0     0 if (($DEBUG >= 1) && ($DEBUG <= 5)) {
1842 0         0 print ("-"x11,"# Searching for the child path within the parent node." . "\n");
1843             }
1844 0         0 my $c_found = $find->($p_xtree,$tmpc_xpath,$parentmap); # results from searching parent
1845 0 0       0 next if !defined $c_found;
1846 0 0       0 push(@{$found},@{$c_found}) if ref($c_found) eq "ARRAY";
  0         0  
  0         0  
1847 0 0       0 push(@{$found},$c_found) if ref($c_found) ne "ARRAY";
  0         0  
1848 0         0 push(@{$thismap},$parentmap);
  0         0  
1849             }
1850             } else {
1851 5         5 $thismap = undef;
1852 5         9 $found = $find->($xtree,$xpath);
1853             }
1854              
1855 5 50 33     12 if (($DEBUG >= 1) && ($DEBUG <= 5)) {
1856 0 0 0     0 print ("-"x11,"# Search yielded results." . "\n") if defined $thismap || defined $found;
1857             }
1858 5 50 33     23 if (($DEBUG) && (defined $thismap)) {
    50 33        
1859 0         0 print Dumper({ structure => $o_structure, thismap => $thismap });
1860             } elsif (($DEBUG) && (defined $found)) {
1861 0         0 print Dumper({ structure => $o_structure, results => $found });
1862             }
1863              
1864 5 50 33     28 if (($o_structure =~ /^RootMap$/i) || ($o_structure =~ /^ParentMap$/i)) {
1865 0 0       0 $thismap = [$thismap] if ref $thismap ne "ARRAY";
1866 0 0 0     0 return undef if (! defined $thismap || @{$thismap} == 0) && !defined wantarray;
      0        
1867 0 0       0 return (@{$thismap}) if !defined wantarray;
  0         0  
1868 0 0       0 return wantarray ? @{$thismap} : $thismap;
  0         0  
1869             }
1870 5 50       10 return undef if ! defined $found;
1871 5 50       11 $found = [$found] if ref $found ne "ARRAY";
1872 5 50 33     15 return undef if (! defined $found || @{$found} == 0) && !defined wantarray;
      33        
1873 5 50       11 return (@{$found}) if !defined wantarray;
  0         0  
1874 5 50       52 return wantarray ? @{$found} : $found;
  0         0  
1875             }
1876              
1877              
1878             =pod
1879              
1880             =head2 getValues
1881              
1882             =over
1883              
1884             Retrieve the values found in the given XML Document at the given XMLPath.
1885              
1886             This method was added in version 0.53 as getValue, and changed to getValues in 0.54
1887              
1888             =over 4
1889              
1890             =item * B
1891              
1892             The XML Document to search and return values from.
1893              
1894             =item * B
1895              
1896             The XMLPath to retrieve the values from.
1897              
1898             =item * B => C<1> | C<0>
1899              
1900             Return values that are strings. (default is 1)
1901              
1902             =item * B => C<1> | C<0>
1903              
1904             Return values that are xml, as raw xml. (default is 0)
1905              
1906             =item * B => C<1> | C<0>
1907              
1908             Return values that are xml, as parsed xml. (default is 0)
1909              
1910             =item * B => C<1> | C<0>
1911              
1912             Trim off the white space at the beginning and end of each value in the result
1913             set before returning the result set. (default is 0)
1914              
1915             =item * I
1916              
1917             Returns the values from the XML Document found at the XMLPath.
1918              
1919             =back
1920              
1921             # return the value of @author from all book elements
1922             $vals = $tppx->getValues( $xmldoc, '/books/book/@author' );
1923             # return the values of the current node, or XML Subtree
1924             $vals = $tppx->getValues( $xmldoc_node, "." );
1925             # return only XML data from the 5th book node
1926             $vals = $tppx->getValues( $xmldoc, '/books/book[5]', valstring => 0, valxml => 1 );
1927             # return only XML::TreePP parsed XML from the all book nodes having an id attribute
1928             $vals = $tppx->getValues( $xmldoc, '/books/book[@id]', valstring => 0, valxmlparsed => 1 );
1929             # return both unparsed XML data and text content from the 3rd book excerpt,
1930             # and trim off the white space at the beginning and end of each value
1931             $vals = $tppx->getValues( $xmldoc, '/books/book[3]/excerpt', valstring => 1, valxml => 1, valtrim => 1 );
1932              
1933             =back
1934              
1935             =cut
1936              
1937             sub getValues (@) {
1938 1 50 50 1 1 249 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
1939 1 50       4 unless (@_ >= 2) { carp 'method getValues(@) requires at least two arguments.'; return undef; }
  0         0  
  0         0  
1940 1         2 my $tree = shift;
1941 1         2 my $path = shift;
1942              
1943 1         13 local $Data::Dumper::Indent = 0;
1944 1         2 local $Data::Dumper::Purity = 1;
1945 1         1 local $Data::Dumper::Terse = 1;
1946              
1947             # Supported arguments:
1948             # valstring = 1|0 ; default = 1; 1 = return values that are strings
1949             # valxml = 1|0 ; default = 0; 1 = return values that are xml, as raw xml
1950             # valxmlparsed = 1|0 ; default = 0; 1 = return values that are xml, as parsed xml
1951 1         2 my %args = @_;
1952 1 50       4 my $v_string = exists $args{'valstring'} ? $args{'valstring'} : 1;
1953 1 50       3 my $v_xml = exists $args{'valxml'} ? $args{'valxml'} : 0;
1954 1 50       3 my $v_xmlparsed = exists $args{'valxmlparsed'} ? $args{'valxmlparsed'} : 0;
1955 1 50       4 my $v_trim = exists $args{'valtrim'} ? $args{'valtrim'} : 0;
1956             # Make up this code to dictate allowed combinations of return types
1957 1 50 33     7 my $v_ret_type = "sp" if $v_string && $v_xmlparsed;
1958 1 50 33     5 $v_ret_type = "sx" if $v_string && $v_xml;
1959 1 50 33     16 $v_ret_type = "s" if $v_string && ! $v_xml && ! $v_xmlparsed;
      33        
1960 1 50 33     10 $v_ret_type = "p" if ! $v_string && $v_xmlparsed;
1961 1 50 33     8 $v_ret_type = "x" if ! $v_string && $v_xml;
1962              
1963 1         2 my ($tpp,$xtree,$xpath,$xml_text_id,$xml_attr_id,$old_prop_xml_decl);
1964              
1965 1 50 33     5 if ((defined $self) && (defined $self->get('tpp'))) {
1966 0 0       0 $tpp = $self ? $self->tpp() : tpp();
1967 0   0     0 $xml_text_id = $tpp->get( 'text_node_key' ) || '#text';
1968 0   0     0 $xml_attr_id = $tpp->get( 'attr_prefix' ) || '-';
1969             } else {
1970 1         1 $xml_text_id = '#text';
1971 1         2 $xml_attr_id = '-';
1972             }
1973 1 50       8 if (ref $tree) { $xtree = $tree;
  1 0       2  
1974             }
1975             elsif (!defined $tree)
1976 0         0 { $xtree = undef
1977             }
1978 0 0       0 else { if (!defined $tpp) { $tpp = $self ? $self->tpp() : tpp(); }
  0 0       0  
1979 0 0       0 $xtree = $tpp->parse($tree) if defined $tree;
1980             }
1981 1 50       2 if (ref $path) { $xpath = eval (Dumper($path)); # make a copy of inputted parsed XMLPath
  0         0  
1982             }
1983 1         2 else { $xpath = parseXMLPath($path);
1984             }
1985              
1986 1 50       4 if ($v_ret_type =~ /x/) {
1987 0 0       0 if (ref($tpp) ne "XML::TreePP") {
1988 0 0       0 $tpp = $self ? $self->tpp() : tpp();
1989             }
1990             # $tpp->set( indent => 2 );
1991 0         0 $old_prop_xml_decl = $tpp->get( "xml_decl" );
1992 0         0 $tpp->set( xml_decl => '' );
1993             }
1994              
1995 1 50       2 print ("="x8,"sub::getValues()\n") if $DEBUG >= $DEBUGMETHOD;
1996 1 50       3 print (" "x8, "=called with return type: ",$v_ret_type,"\n") if $DEBUG >= $DEBUGMETHOD;
1997 1 50       4 print (" "x8, "=called with path: ",Dumper($xpath),"\n") if $DEBUG >= $DEBUGPATH;
1998              
1999             # Retrieve the sub tree of the XML document at path
2000 1         2 my $results = filterXMLDoc($xtree, $xpath);
2001              
2002             # for debugging purposes
2003 1 50       5 print (" "x8, "=Found at var's path: ", Dumper( $results ),"\n") if $DEBUG >= $DEBUGDUMP;
2004              
2005 1     0   4 my $getVal = sub ($$) {};
  0         0  
2006             $getVal = sub ($$) {
2007 5 50   5   11 print ("="x8,"sub::getValues|getVal->()\n") if $DEBUG >= $DEBUGMETHOD;
2008 5         6 my $v_ret_type = shift;
2009 5         31 my $treeNodes = shift;
2010 5 50       10 print (" "x8,"getVal->():from> ",Dumper($treeNodes)) if $DEBUG >= $DEBUGDUMP;
2011 5 50 0     9 print (" - '",ref($treeNodes)||'string',"'\n") if $DEBUG >= $DEBUGDUMP;
2012 5         4 my @results;
2013 5 50       16 if (ref($treeNodes) eq "HASH") {
    100          
    50          
2014 0         0 my $utreeNodes = eval ( Dumper($treeNodes) ); # make a copy for the result set
2015 0 0 0     0 push (@results, $utreeNodes->{$xml_text_id}) if exists $utreeNodes->{$xml_text_id} && $v_ret_type =~ /s/;
2016 0 0 0     0 delete $utreeNodes->{$xml_text_id} if exists $utreeNodes->{$xml_text_id} && $v_ret_type =~ /[x,p]/;
2017 0 0       0 push (@results, $utreeNodes) if $v_ret_type =~ /p/;
2018 0 0       0 push (@results, $tpp->write($utreeNodes)) if $v_ret_type =~ /x/;
2019             } elsif (ref($treeNodes) eq "ARRAY") {
2020 1         2 foreach my $item (@{$treeNodes}) {
  1         2  
2021 4         10 my $r1 = $getVal->($v_ret_type,$item);
2022 4         5 foreach my $r2 (@{$r1}) {
  4         6  
2023 4 50       16 push(@results,$r2) if defined $r2;
2024             }
2025             }
2026             } elsif (! ref($treeNodes)) {
2027 4 50       14 push(@results,$treeNodes) if $v_ret_type =~ /s/;
2028             }
2029 5         9 return \@results;
2030 1         7 };
2031              
2032 1 50       4 if ($v_ret_type =~ /x/) {
2033 0         0 $tpp->set( xml_decl => $old_prop_xml_decl );
2034             }
2035              
2036 1         4 my $found = $getVal->($v_ret_type,$results);
2037 1 50       6 $found = [$found] if ref $found ne "ARRAY";
2038              
2039 1 50       4 if ($v_trim) {
2040 0         0 my $i=0;
2041 0         0 while($i < @{$found}) {
  0         0  
2042 0 0       0 print (" =trimmimg result (".$i."): '",$found->[$i],"'") if $DEBUG >= $DEBUGDUMP;
2043 0         0 $found->[$i] =~ s/\s*$//g;
2044 0         0 $found->[$i] =~ s/^\s*//g;
2045 0 0       0 print (" to '",$found->[$i],"'\n") if $DEBUG >= $DEBUGDUMP;
2046 0         0 $i++;
2047             }
2048             }
2049              
2050 1 50 33     4 return undef if (! defined $found || @{$found} == 0) && !defined wantarray;
      33        
2051 1 50       3 return (@{$found}) if !defined wantarray;
  0         0  
2052 1 50       7 return wantarray ? @{$found} : $found;
  0         0  
2053             }
2054              
2055             # validateAttrValue
2056             # Wrapper around filterXMLDoc for backwards compatibility only.
2057             sub validateAttrValue ($$) {
2058 0     0 1 0 carp 'Method validateAttrValue($$) is deprecated, use filterXMLDoc() instead.';
2059 0 0 0     0 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
2060 0 0       0 unless (@_ == 2) { carp 'method validateAttrValue($$) requires two arguments.'; return undef; }
  0         0  
  0         0  
2061 0         0 my $subtree = shift;
2062 0         0 my $params = shift;
2063              
2064 0 0       0 if ($self) {
2065 0         0 return $self->filterXMLDoc( $subtree , [ "." , $params ]);
2066             }
2067             else {
2068 0         0 return filterXMLDoc( $subtree , [ "." , $params ]);
2069             }
2070             }
2071              
2072             # getSubtree
2073             # Wrapper around filterXMLDoc for backwards compatibility only.
2074             sub getSubtree ($$) {
2075 0     0 1 0 carp 'Method getSubtree($$) is deprecated, use filterXMLDoc() instead.';
2076 0 0 0     0 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
2077 0 0       0 unless (@_ == 2) { carp 'method getSubtree($$) requires two arguments.'; return undef; }
  0         0  
  0         0  
2078 0         0 my $tree = shift;
2079 0         0 my $path = shift;
2080 0         0 my $result;
2081              
2082 0 0       0 if ($self) {
2083 0         0 $result = $self->filterXMLDoc($tree,$path);
2084             }
2085             else {
2086 0         0 $result = filterXMLDoc($tree,$path);
2087             }
2088 0 0       0 return undef unless defined $result;
2089 0 0       0 return wantarray ? @{$result} : $result->[0];
  0         0  
2090             }
2091              
2092             =pod
2093              
2094             =head2 getAttributes
2095              
2096             =over
2097              
2098             Retrieve the attributes found in the given XML Document at the given XMLPath.
2099              
2100             =over 4
2101              
2102             =item * B
2103              
2104             An XML::TreePP parsed XML document.
2105              
2106             =item * B
2107              
2108             The path within the XML Tree to retrieve. See parseXMLPath()
2109              
2110             =item * I
2111              
2112             An array reference of [{attribute=>value}], or undef if none found
2113              
2114             In the case where the XML Path points at a multi-same-name element, the return
2115             value is a ref array of ref hashes, one hash ref for each element.
2116              
2117             Example Returned Data:
2118              
2119             XML Path points at a single named element
2120             [ {attr1=>val,attr2=>val} ]
2121              
2122             XML Path points at a multi-same-name element
2123             [ {attr1A=>val,attr1B=>val}, {attr2A=>val,attr2B=>val} ]
2124              
2125             =back
2126              
2127             $attributes = getAttributes ( $XMLTree , $XMLPath );
2128              
2129             =back
2130              
2131             =cut
2132              
2133             # getAttributes
2134             # @param xmltree the XML::TreePP parsed xml document
2135             # @param xmlpath the XML path (See parseXMLPath)
2136             # @return an array ref of [{attr=>val, attr=>val}], or undef if none found
2137             #
2138             # In the case where the XML Path points at a multi-same-name element, the
2139             # return value is a ref array of ref arrays, one for each element.
2140             # Example:
2141             # XML Path points at a single named element
2142             # [{attr1=>val, attr2=>val}]
2143             # XML Path points at a multi-same-name element
2144             # [ {attr1A=>val,attr1B=>val}, {attr2A=>val,attr2B=val} ]
2145             #
2146             sub getAttributes (@);
2147             sub getAttributes (@) {
2148 6 100 100 6 1 281 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
2149 6 50       13 unless (@_ >= 1) { carp 'method getAttributes($$) requires one argument, and optionally a second argument.'; return undef; }
  0         0  
  0         0  
2150 6         7 my $tree = shift;
2151 6   100     14 my $path = shift || undef;
2152              
2153 6         7 my ($tpp,$xml_text_id,$xml_attr_id);
2154 6 50 66     24 if ((defined $self) && (defined $self->get('tpp'))) {
2155 0 0       0 my $tpp = $self ? $self->tpp() : tpp();
2156 0   0     0 $xml_text_id = $tpp->get( 'text_node_key' ) || '#text';
2157 0   0     0 $xml_attr_id = $tpp->get( 'attr_prefix' ) || '-';
2158             } else {
2159 6         7 $xml_text_id = '#text';
2160 6         5 $xml_attr_id = '-';
2161             }
2162              
2163 6         5 my $subtree;
2164 6 100       7 if (defined $path) {
2165 1         2 $subtree = filterXMLDoc($tree,$path);
2166             } else {
2167 5         6 $subtree = $tree;
2168             }
2169 6         6 my @attributes;
2170 6 100       17 if (ref $subtree eq "ARRAY") {
    100          
2171 1         2 foreach my $element (@{$subtree}) {
  1         2  
2172 5         13 my $e_attr = getAttributes($element);
2173 5         6 foreach my $a (@{$e_attr}) {
  5         7  
2174 4         10 push(@attributes,$a);
2175             }
2176             }
2177             } elsif (ref $subtree eq "HASH") {
2178 4         5 my $e_elem;
2179 4         6 while (my ($k,$v) = each(%{$subtree})) {
  12         29  
2180 8 100       39 if ($k =~ /^$xml_attr_id/) {
2181 4         18 $k =~ s/^$xml_attr_id//;
2182 4         14 $e_elem->{$k} = $v;
2183             }
2184             }
2185 4         9 push(@attributes,$e_elem);
2186             } else {
2187 1         3 return undef;
2188             }
2189 5         10 return \@attributes;
2190             }
2191              
2192             =pod
2193              
2194             =head2 getElements
2195              
2196             =over
2197              
2198             Gets the child elements found at a specified XMLPath
2199              
2200             =over 4
2201              
2202             =item * B
2203              
2204             An XML::TreePP parsed XML document.
2205              
2206             =item * B
2207              
2208             The path within the XML Tree to retrieve. See parseXMLPath()
2209              
2210             =item * I
2211              
2212             An array reference of [{element=>value}], or undef if none found
2213              
2214             An array reference of a hash reference of elements (not attributes) and each
2215             elements XMLSubTree, or undef if none found. If the XMLPath points at a
2216             multi-valued element, then the subelements of each element at the XMLPath are
2217             returned as separate hash references in the returning array reference.
2218              
2219             The format of the returning data is the same as the getAttributes() method.
2220              
2221             The XMLSubTree is fetched based on the provided XMLPath. Then all elements
2222             found under that XMLPath are placed into a referenced hash table to be
2223             returned. If an element found has additional XML data under it, it is all
2224             returned just as it was provided.
2225              
2226             Simply, this strips all XML attributes found at the XMLPath, returning the
2227             remaining elements found at that path.
2228              
2229             If the XMLPath has no elements under it, then undef is returned instead.
2230              
2231             =back
2232              
2233             $elements = getElements ( $XMLTree , $XMLPath );
2234              
2235             =back
2236              
2237             =cut
2238              
2239             # getElements
2240             # @param xmltree the XML::TreePP parsed xml document
2241             # @param xmlpath the XML path (See parseXMLPath)
2242             # @return an array ref of [[element,{val}]] where val can be a scalar or a subtree, or undef if none found
2243             #
2244             # See also getAttributes function for further details of the return type
2245             #
2246             sub getElements (@);
2247             sub getElements (@) {
2248 6 100 100 6 1 262 my $self = shift if ref($_[0]) eq $REF_NAME || undef;
2249 6 50       13 unless (@_ >= 1) { carp 'method getElements($$) requires one argument, and optionally a second argument.'; return undef; }
  0         0  
  0         0  
2250 6         8 my $tree = shift;
2251 6   100     16 my $path = shift || undef;
2252              
2253 6         5 my ($tpp,$xml_text_id,$xml_attr_id);
2254 6 50 66     19 if ((defined $self) && (defined $self->get('tpp'))) {
2255 0 0       0 my $tpp = $self ? $self->tpp() : tpp();
2256 0   0     0 $xml_text_id = $tpp->get( 'text_node_key' ) || '#text';
2257 0   0     0 $xml_attr_id = $tpp->get( 'attr_prefix' ) || '-';
2258             } else {
2259 6         8 $xml_text_id = '#text';
2260 6         6 $xml_attr_id = '-';
2261             }
2262              
2263 6         7 my $subtree;
2264 6 100       8 if (defined $path) {
2265 1         3 $subtree = filterXMLDoc($tree,$path);
2266             } else {
2267 5         5 $subtree = $tree;
2268             }
2269 6         7 my @elements;
2270 6 100       16 if (ref $subtree eq "ARRAY") {
    100          
2271 1         1 foreach my $element (@{$subtree}) {
  1         2  
2272 5         13 my $e_elem = getElements($element);
2273 5         5 foreach my $a (@{$e_elem}) {
  5         9  
2274 4         13 push(@elements,$a);
2275             }
2276             }
2277             } elsif (ref $subtree eq "HASH") {
2278 4         6 my $e_elem;
2279 4         5 while (my ($k,$v) = each(%{$subtree})) {
  12         63  
2280 8 100       42 if ($k !~ /^$xml_attr_id/) {
2281 4         15 $e_elem->{$k} = $v;
2282             }
2283             }
2284 4         6 push(@elements,$e_elem);
2285             } else {
2286 1         3 return undef;
2287             }
2288 5         15 return \@elements;
2289             }
2290              
2291              
2292             1;
2293             __END__