File Coverage

blib/lib/XML/EasyOBJ.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1              
2             =head1 NAME
3              
4             XML::EasyOBJ - Easy XML object navigation
5              
6             =head1 VERSION
7              
8             Version 1.12
9              
10             =head1 SYNOPSIS
11              
12             # open exisiting file
13             my $doc = new XML::EasyOBJ('my_xml_document.xml');
14             my $doc = new XML::EasyOBJ(-type => 'file', -param => 'my_xml_document.xml');
15              
16             # create object from XML string
17             my $doc = new XML::EasyOBJ(-type => 'string', -param => $xml_source);
18              
19             # create new file
20             my $doc = new XML::EasyOBJ(-type => 'new', -param => 'root_tag');
21            
22             # read from document
23             my $text = $doc->some_element($index)->getString;
24             my $attr = $doc->some_element($index)->getAttr('foo');
25             my $element = $doc->some_element($index);
26             my @elements = $doc->some_element;
27              
28             # first "some_element" element
29             my $elements = $doc->some_element;
30             # list of "some_element" elements
31             my @elements = $doc->some_element;
32              
33             # write to document
34             $doc->an_element->setString('some string')
35             $doc->an_element->addString('some string')
36             $doc->an_element->setAttr('attrname', 'val')
37             $doc->an_element->setAttr('attr1' => 'val', 'attr2' => 'val2')
38              
39             # access elements with non-name chars and the underlying DOM
40             my $element = $doc->getElement('foo-bar')->getElement('bar-none');
41             my $dom = $doc->foobar->getDomObj;
42              
43             # get elements without specifying the element name
44             my @elements = $doc->getElement();
45             my $sixth_element = $doc->getElement('', 5);
46              
47             # remove elements/attrs
48             $doc->remElement('tagname', $index);
49             $doc->tag_name->remAttr($attr);
50              
51             # remap builtin methods
52             $doc->remapMethod('getString', 's');
53             my $text = $doc->some_element->s;
54              
55              
56             =head1 DESCRIPTION
57              
58             I wrote XML::EasyOBJ a couple of years ago because it seemed to me
59             that the DOM wasn't very "perlish" and the DOM is difficult for us
60             mere mortals that don't use it on a regular basis. As I only need
61             to process XML on an occasionally I wanted an easy way to do what
62             I needed to do without having to refer back to DOM documentation
63             each time.
64              
65             A quick fact list about XML::EasyOBJ:
66              
67             * Runs on top of XML::DOM
68             * Allows access to the DOM as needed
69             * Simple routines to reading and writing elements/attributes
70              
71             =head1 REQUIREMENTS
72              
73             XML::EasyOBJ uses XML::DOM. XML::DOM is available from CPAN (www.cpan.org).
74              
75             =head1 METHODS
76              
77             Below is a description of the methods avialable.
78              
79             =cut
80              
81             package XML::EasyOBJ;
82              
83 5     5   29760 use strict;
  5         11  
  5         164  
84 5     5   7868 use XML::DOM;
  0            
  0            
85             use vars qw/$VERSION/;
86              
87             $VERSION = '1.12';
88              
89             =head2 new
90              
91             You can create a new object from an XML file, a string of XML, or
92             a new document. The constructor takes a set of key value pairs as
93             follows:
94              
95             =item -type
96              
97             The type is either "file", "string" or "new". "file" will create
98             the object from a file source, "string" will create the object from
99             a string of XML code, and "new" will create a new document object.
100              
101             =item -param
102              
103             This value depends on the -type that is passed to the constructor.
104             If the -type is "file" this will be the filename to open and parse.
105             If -type is "string", this is a string of XML code. If -type is
106             "new", this is the name of the root element.
107              
108             Creating an object from an XML file:
109              
110             my $doc = new XML::EasyOBJ(-type => 'file', -param => 'my_xml_document.xml');
111              
112             Creating an object from a string containing the XML source:
113              
114             my $doc = new XML::EasyOBJ(-type => 'string', -param => $xml_source);
115              
116             Creating a new XML document by passing the root tag name:
117              
118             my $doc = new XML::EasyOBJ(-type => 'new', -param => 'root_tag');
119              
120             =item -expref
121              
122             Passing a value of 1 will force the expansion of references when
123             grabbing string data from the XML file. The default value is 0,
124             not to expand references.
125              
126              
127             Obtionally you may also pass the filename to open as the first
128             argument instead of passing the -type and -param parameters.
129             This is backwards compatable with early version of XML::EasyOBJ
130             which did not handle -type and -param parameters.
131              
132             my $doc = new XML::EasyOBJ('my_xml_document.xml');
133              
134             =cut
135              
136             sub new {
137             my $class = shift;
138              
139             # container for DOM object
140             my $doc = '';
141              
142             # expand references flag. true to expand.
143             my $expref = 0;
144              
145             # if there are an odd number of parameters, take the first
146             # argument as a filename.
147             if ( scalar(@_) % 2 ) {
148             my $file = shift;
149             my $parser = new XML::DOM::Parser;
150             $doc = $parser->parsefile( $file ) || return;
151             }
152             # if there are an even number of arguments, treat them as
153             # hash name/value pairs.
154             else {
155             my %args = @_;
156              
157             # check for "expand references" flag, and set $expref
158             $expref = 1 if ( exists $args{-expref} and $args{-expref} == 1 );
159              
160             # create DOM from file, param is filename
161             if ( $args{-type} eq 'file' ) {
162             my $parser = new XML::DOM::Parser;
163             $doc = $parser->parsefile( $args{-param} ) || return;
164             }
165             # create a new DOM object, param is root element name
166             elsif ( $args{-type} eq 'new' ) {
167             $doc = new XML::DOM::Document();
168             $doc->appendChild( $doc->createElement( $args{-param} ) );
169             }
170             # create DOM from string
171             elsif ( $args{-type} eq 'string' ) {
172             my $parser = new XML::DOM::Parser;
173             $doc = $parser->parse( $args{-param} ) || return;
174             }
175             else {
176             return;
177             }
178             }
179              
180             # set method mappings, may be changed by remapMethod method
181             my %map = ( getString => 'getString',
182             setString => 'setString',
183             addString => 'addString',
184             getAttr => 'getAttr',
185             setAttr => 'setAttr',
186             remAttr => 'remAttr',
187             remElement => 'remElement',
188             getElement => 'getElement',
189             getDomObj => 'getDomObj',
190             remapMethod => 'remapMethod',
191             getTagName => 'getTagName',
192             );
193              
194             return bless( { 'map' => \%map,
195             'doc' => $doc,
196             'ptr' => $doc->getDocumentElement(),
197             'expref' => $expref,
198             }, 'XML::EasyOBJ::Object' );
199             }
200              
201             package XML::EasyOBJ::Object;
202              
203             use strict;
204             use XML::DOM;
205             use vars qw/%SUBLIST %INTSUBLIST $AUTOLOAD/;
206              
207             $AUTOLOAD = '';
208             %SUBLIST = ();
209             %INTSUBLIST = ();
210              
211             sub DESTROY {
212             local $^W = 0;
213             my $self = $_[0];
214             $_[0] = '';
215             unless ( $_[0] ) {
216             $_[0] = $self;
217             $AUTOLOAD = 'DESTROY';
218             return AUTOLOAD( @_ );
219             }
220             }
221              
222             sub AUTOLOAD {
223             my $funcname = $AUTOLOAD || 'AUTOLOAD';
224             $funcname =~ s/^XML::EasyOBJ::Object:://;
225             $AUTOLOAD = '';
226              
227             if ( exists $_[0]->{map}->{$funcname} ) {
228             return &{$SUBLIST{$_[0]->{map}->{$funcname}}}( @_ );
229             }
230              
231             my $self = shift;
232             my $index = shift;
233             my @nodes = ();
234              
235             die "Fatal error: lost pointer!" unless ( exists $self->{ptr} );
236              
237             for my $kid ( $self->{ptr}->getChildNodes ) {
238             if ( ( $kid->getNodeType == ELEMENT_NODE ) && ( $kid->getTagName eq $funcname ) ) {
239             push @nodes, bless(
240             { map => $self->{map},
241             doc => $self->{doc},
242             ptr => $kid,
243             expref => $self->{expref},
244             }, 'XML::EasyOBJ::Object' );
245             }
246             }
247              
248             if ( wantarray ) {
249             return @nodes;
250             }
251             else {
252             if ( defined $index ) {
253             unless ( defined $nodes[$index] ) {
254             for ( my $i = scalar(@nodes); $i <= $index; $i++ ) {
255             $nodes[$i] = bless(
256             { map => $self->{map},
257             doc => $self->{doc},
258             ptr => &{$INTSUBLIST{'makeNewNode'}}( $self, $funcname ),
259             expref => $self->{expref},
260             }, 'XML::EasyOBJ::Object' )
261             }
262             }
263             return $nodes[$index];
264             }
265             else {
266             return bless(
267             { map => $self->{map},
268             doc => $self->{doc},
269             ptr => &{$INTSUBLIST{'makeNewNode'}}( $self, $funcname ),
270             expref => $self->{expref},
271             }, 'XML::EasyOBJ::Object' ) unless ( defined $nodes[0] );
272             return $nodes[0];
273             }
274             }
275             }
276              
277             =head2 makeNewNode( NEW_TAG )
278              
279             Append a new element node to the current node. Takes the tag name
280             as the parameter and returns the created node as a convienence.
281              
282             my $p_element = $doc->body->makeNewNode('p');
283              
284             =cut
285              
286             $INTSUBLIST{'makeNewNode'} =
287             sub {
288             my $self = shift;
289             my $element_name = shift;
290             return $self->{ptr}->appendChild( $self->{doc}->createElement($element_name) );
291             };
292              
293             =head2 remapMethod( CUR_METHOD, NEW_METHOD )
294              
295             Allows you to change the name of any of the object methods. You
296             might want to do this for convienience or to avoid a naming
297             collision with an element in the document.
298              
299             Two parameters need to be passed; the current name of the method
300             and the new name. Returns 1 on a successful mapping and undef
301             on failure. A failure can result if you don't pass two parameters
302             if if the "copy from" method name does not exist.
303              
304             $doc->remapMethod('getString', 's');
305             $doc->s();
306              
307             After remapping you must use the new name if you with to remap
308             the method again. You can call the remapMethod method from
309             any place in the XML tree and it will always change the method
310             globally.
311              
312             In the following example $val1 and $val2 are equal:
313              
314             $doc->some_element->another_element->('getString', 's');
315             my $val1 = $doc->s();
316             $doc->remapMethod('s', 'getString');
317             my $val2 = $doc->getString();
318              
319             =cut
320              
321             $SUBLIST{remapMethod} =
322             sub {
323             my $self = shift;
324             my ( $from, $to ) = @_;
325              
326             die "Fatal error: lost the pointer!" unless ( exists $self->{ptr} );
327              
328             return unless ( ( $from ) && ( $to ) );
329             return unless ( exists $self->{map}->{$from} );
330              
331             my $tmp = $self->{map}->{$from};
332             delete $self->{map}->{$from};
333             $self->{map}->{$to} = $tmp;
334             return 1;
335             };
336              
337             =head2 getString( )
338              
339             Recursively extracts text from the current node and all children
340             element nodes. Returns the extracted text as a single scalar value.
341             Expands entities based on if the -expref flag was supplied during
342             object creation.
343              
344             =cut
345              
346             $SUBLIST{getString} =
347             sub {
348             my $self = shift;
349             die "Fatal error: lost the pointer!" unless ( exists $self->{ptr} );
350             my $string = &{$INTSUBLIST{extractText}}( $self->{ptr} );
351             return ( $self->{expref} ) ? $self->{doc}->expandEntityRefs($string) : $string;
352             };
353              
354             =head2 extractText( )
355              
356             Same as getString() but does not check the -expref flag. Included for
357             compatability with inital version of interface.
358              
359             =cut
360              
361             $INTSUBLIST{extractText} =
362             sub {
363             my $n = shift;
364             my $text;
365              
366             if ( $n->getNodeType == TEXT_NODE ) {
367             $text = $n->toString;
368             }
369             elsif ( $n->getNodeType == ELEMENT_NODE ) {
370             foreach my $c ( $n->getChildNodes ) {
371             $text .= &{$INTSUBLIST{extractText}}( $c );
372             }
373             }
374             return $text;
375             };
376              
377             =head2 setString( STRING )
378              
379             Sets the text value of the specified element. This is done by
380             first removing all text node children of the current element
381             and then appending the supplied text as a new child element.
382              
383             Take this XML fragment and code for example:
384              
385            

This elment has text and child elements

386              
387             $doc->p->setString('This is the new text');
388              
389             This will change the fragment to this:
390              
391            

textchildThis is the new text

392              
393             Because the and tags are not text nodes they are left
394             unchanged, and the new text is added at the end of the specified
395             element.
396              
397             If you need more specific control on the change you should
398             either use the getDomObj() method and use the DOM methods
399             directly or remove all of the child nodes and rebuild the
400            

element from scratch. Also see the addString() method.

401              
402             =cut
403              
404             $SUBLIST{setString} =
405             sub {
406             my $self = shift;
407             my $text = shift;
408              
409             die "Fatal error: lost the pointer!" unless ( exists $self->{ptr} );
410              
411             foreach my $n ( $self->{ptr}->getChildNodes ) {
412             if ( $n->getNodeType == TEXT_NODE ) {
413             $self->{ptr}->removeChild( $n );
414             }
415             }
416              
417             $self->{ptr}->appendChild( $self->{doc}->createTextNode( $text ) );
418             return &{$INTSUBLIST{extractText}}( $self->{ptr} );
419             };
420              
421             =head2 addString( STRING )
422              
423             Adds to the the text value of the specified element. This
424             is done by appending the supplied text as a new child element.
425              
426             Take this XML fragment and code for example:
427              
428            

This elment has text

429              
430             $doc->p->addString(' and elements');
431              
432             This will change the fragment to this:
433              
434            

This elment has text and elements

435              
436             =cut
437              
438             $SUBLIST{addString} =
439             sub {
440             my $self = shift;
441             my $text = shift;
442              
443             die "Fatal error: lost the pointer!" unless ( exists $self->{ptr} );
444              
445             $self->{ptr}->appendChild( $self->{doc}->createTextNode( $text ) );
446             return &{$INTSUBLIST{extractText}}( $self->{ptr} );
447             };
448              
449             =head2 getAttr( ATTR_NAME )
450              
451             Returns the value of the named attribute.
452              
453             my $val = $doc->body->img->getAttr('src');
454              
455             =cut
456              
457             $SUBLIST{getAttr} =
458             sub {
459             my $self = shift;
460             my $attr = shift;
461              
462             die "Fatal error: lost the pointer!" unless( exists $self->{ptr} );
463             if ( $self->{ptr}->getNodeType == ELEMENT_NODE ) {
464             return $self->{ptr}->getAttribute($attr);
465             }
466             return '';
467             };
468              
469             =head2 getTagName( )
470              
471             Returns the tag name of the specified element. This method is
472             useful when you are enumerating child elements and do not
473             know their element names.
474              
475             foreach my $element ( $doc->getElement() ) {
476             print $element->getTagName();
477             }
478              
479             =cut
480              
481             $SUBLIST{getTagName} =
482             sub {
483             my $self = shift;
484            
485             die "Fatal error: lost the pointer!" unless( exists $self->{ptr} );
486             if ( $self->{ptr}->getNodeType == ELEMENT_NODE ) {
487             return $self->{ptr}->getTagName;
488             }
489             return '';
490             };
491              
492             =head2 setAttr( ATTR_NAME, ATTR_VALUE, [ATTR_NAME, ATTR_VALUE]... )
493              
494             For each name/value pair passed the attribute name and value will
495             be set for the specified element.
496              
497             =cut
498              
499             $SUBLIST{setAttr} =
500             sub {
501             my $self = shift;
502             my %attr = @_;
503              
504             die "Fatal error: lost the pointer!" unless( exists $self->{ptr} );
505             if ( $self->{ptr}->getNodeType == ELEMENT_NODE ) {
506             if ( scalar(keys %attr) == 1 ) {
507             for ( keys %attr ) {
508             return $self->{ptr}->setAttribute($_, $attr{$_});
509             }
510             }
511             else {
512             for ( keys %attr ) {
513             $self->{ptr}->setAttribute($_, $attr{$_});
514             }
515             return 1;
516             }
517             }
518             return '';
519             };
520              
521             =head2 remAttr( ATTR_NAME )
522              
523             Removes the specified attribute from the current element.
524              
525             =cut
526              
527             $SUBLIST{remAttr} =
528             sub {
529             my $self = shift;
530             my $attr = shift;
531            
532             die "Fatal error: lost the pointer!" unless( exists $self->{ptr} );
533             if ( $self->{ptr}->getNodeType == ELEMENT_NODE ) {
534             if ( $self->{ptr}->getAttributes->getNamedItem( $attr ) ) {
535             $self->{ptr}->getAttributes->removeNamedItem( $attr );
536             return 1;
537             }
538             }
539             return 0;
540             };
541              
542             =head2 remElement( TAG_NAME, INDEX )
543              
544             Removes a child element of the current element. The name of the
545             child element and the index must be supplied. An index of 0
546             will remove the first occurance of the named element, 1 the second,
547             2 the third, etc.
548              
549             =cut
550              
551             $SUBLIST{remElement} =
552             sub {
553             my $self = shift;
554             my $name = shift;
555             my $index = shift;
556            
557             my $node = ( $index ) ? $self->$name($index) : $self->$name();
558             $self->{ptr}->removeChild( $node->{ptr} );
559             };
560              
561             =head2 getElement( TAG_NAME, INDEX )
562              
563             Returns the node from the tag name and index. If no index is
564             given the first child with that name is returned. Use this
565             method when you have element names that include characters that
566             are not legal as a perl method name. For example:
567              
568            
569            
570             test
571            
572            
573              
574             # "foo-bar" is not a legal method name
575             print $doc->bar->getElement('foo-bar')->getString();
576              
577             =cut
578              
579             $SUBLIST{getElement} =
580             sub {
581             my $self = shift;
582             my $funcname = shift;
583             my $index = shift;
584             my @nodes = ();
585              
586             die "Fatal error: lost pointer!" unless ( exists $self->{ptr} );
587              
588             foreach my $kid ( $self->{ptr}->getChildNodes ) {
589             if ( $funcname ) {
590             if ( ( $kid->getNodeType == ELEMENT_NODE ) && ( $kid->getTagName eq $funcname ) ) {
591             push @nodes, bless(
592             { map => $self->{map},
593             doc => $self->{doc},
594             ptr => $kid,
595             expref => $self->{expref},
596             }, 'XML::EasyOBJ::Object' );
597             }
598             }
599             else {
600             if ( $kid->getNodeType == ELEMENT_NODE ) {
601             push @nodes, bless(
602             { map => $self->{map},
603             doc => $self->{doc},
604             ptr => $kid,
605             expref => $self->{expref},
606             }, 'XML::EasyOBJ::Object' );
607             }
608             }
609             }
610            
611             if ( wantarray ) {
612             return @nodes;
613             }
614             else {
615             $index = 0 unless ( defined $index );
616              
617             if ( defined $nodes[$index] ) {
618             return $nodes[$index];
619             }
620             else {
621             # fail if no tag name given
622             return undef unless ( $funcname );
623              
624             for ( my $i = scalar(@nodes); $i <= $index; $i++ ) {
625             $nodes[$i] = bless(
626             { map => $self->{map},
627             doc => $self->{doc},
628             ptr => &{$INTSUBLIST{'makeNewNode'}}( $self, $funcname ),
629             expref => $self->{expref},
630             }, 'XML::EasyOBJ::Object' )
631             }
632              
633             return $nodes[$index];
634             }
635             }
636             };
637              
638             =head1 getDomObj( )
639              
640             Returns the DOM object associated with the current node. This
641             is useful when you need fine access via the DOM to perform
642             a specific function.
643              
644             =cut
645              
646             $SUBLIST{getDomObj} =
647             sub {
648             my $self = shift;
649             return $self->{ptr};
650             };
651              
652             1;
653              
654             =head1 BEGINNER QUICK START GUIDE
655              
656             =head2 Introduction
657              
658             You too can write XML applications, just as long as you understand
659             the basics of XML (elements and attributes). You can learn to write
660             your first program that can read data from an XML file in a mere
661             10 minutes.
662              
663             =head2 Assumptions
664              
665             It is assumed that you are familiar with the structure of the document that
666             you are reading. Next, you must know the basics of perl lists, loops, and
667             how to call a function. You must also have an XML document to read.
668              
669             Simple eh?
670              
671             =head2 Loading the XML document
672              
673             use XML::EasyOBJ;
674             my $doc = new XML::EasyOBJ('my_xml_document.xml') || die "Can't make object";
675              
676             Replace the string "my_xml_document.xml" with the name of your XML document.
677             If the document is in another directory you will need to specify the path
678             to it as well.
679              
680             The variable $doc is an object, and represents our root XML element in the document.
681              
682             =head2 Reading text with getString
683              
684             Each element becomes an object. So lets assume that the XML page looks like
685             this:
686              
687            
688            
689            
690             field1a
691             field2b
692             field3c
693            
694            
695             field1d
696             field2e
697             field3f
698            
699            
700            
701              
702             As mentioned in he last step, the $doc object is the root
703             element of the XML page. In this case the root element is the "table"
704             element.
705              
706             To read the text of any field is as easy as navigating the XML elements.
707             For example, lets say that we want to retrieve the text "field2e". This
708             text is in the "field2" element of the SECOND "rec2" element, which is
709             in the FIRST "record" element.
710              
711             So the code to print that value it looks like this:
712              
713             print $doc->record(0)->rec2(1)->field2->getString;
714              
715             The "getString" method returns the text within an element.
716              
717             We can also break it down like this:
718              
719             # grab the FIRST "record" element (index starts at 0)
720             my $record = $doc->record(0);
721            
722             # grab the SECOND "rec2" element within $record
723             my $rec2 = $record->rec2(1);
724            
725             # grab the "field2" element from $rec2
726             # NOTE: If you don't specify an index, the first item
727             # is returned and in this case there is only 1.
728             my $field2 = $rec2->field2;
729              
730             # print the text
731             print $field2->getString;
732              
733             =head2 Reading XML attributes with getAttr
734              
735             Looking at the example in the previous step, can you guess what
736             this code will print?
737              
738             print $doc->record(0)->rec2(0)->getAttr('foo');
739             print $doc->record(0)->rec2(1)->getAttr('foo');
740              
741             If you couldn't guess, they will print out the value of the "foo"
742             attribute of the first and second rec2 elements.
743              
744             =head2 Looping through elements
745              
746             Lets take our example in the previous step where we printed the
747             attribute values and rewrite it to use a loop. This will allow
748             it to print all of the "foo" attributes no matter how many "rec2"
749             elements we have.
750              
751             foreach my $rec2 ( $doc->record(0)->rec2 ) {
752             print $rec2->getAttr('foo');
753             }
754              
755             When we call $doc->record(0)->rec2 this way (i.e. in list context),
756             the module will return a list of "rec2" elements.
757              
758             =head2 That's it!
759              
760             You are now an XML programmer! *start rejoicing now*
761              
762             =head1 PROGRAMMING NOTES
763              
764             When creating a new instance of XML::EasyOBJ it will return an
765             object reference on success, or undef on failure. Besides that,
766             ALL methods will always return a value. This means that if you
767             specify an element that does not exist, it will still return an
768             object reference (and create that element automagically). This
769             is just another way to lower the bar, and make this module easier
770             to use.
771              
772             You will run into problems if you have XML tags which are named
773             after perl's special subroutine names (i.e. "DESTROY", "AUTOLOAD"),
774             or if they are named after subroutines used in the module
775             ( "getString", "getAttr", etc ). You can get around this by using
776             the getElement() method of using the remapMethod() method which can
777             be used on every object method (except AUTOLOAD and DESTROY).
778              
779             =head1 AUTHOR/COPYRIGHT
780              
781             Copyright (C) 2000-2002 Robert Hanson
782              
783             This library is free software; you can redistribute it and/or modify
784             it under the same terms as Perl itself.
785              
786             =head1 SEE ALSO
787              
788             XML::DOM
789              
790             =cut
791              
792              
793