File Coverage

blib/lib/XML/Easy/NodeBasics.pm
Criterion Covered Total %
statement 101 102 99.0
branch 33 34 97.0
condition 3 3 100.0
subroutine 26 26 100.0
pod 14 14 100.0
total 177 179 98.8


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             XML::Easy::NodeBasics - basic manipulation of XML data nodes
4              
5             =head1 SYNOPSIS
6              
7             use XML::Easy::NodeBasics qw(xml_content_object xml_element);
8              
9             $content = xml_content_object("this", "&", "that");
10             $content = xml_content_object(@sublems);
11              
12             $element = xml_element("a", { href => "there" }, "there");
13             $element = xml_element("div", @subelems);
14              
15             use XML::Easy::NodeBasics
16             qw(xml_c_content_object xml_c_content_twine);
17              
18             $content = xml_c_content_object($content);
19             $twine = xml_c_content_twine($content);
20              
21             use XML::Easy::NodeBasics qw(
22             xml_e_type_name
23             xml_e_attributes xml_e_attribute
24             xml_e_content_object
25             );
26              
27             $type_name = xml_e_type_name($element);
28             $attributes = xml_e_attributes($element);
29             $href = xml_e_attribute($element, "href");
30             $content = xml_e_content_object($element);
31              
32             use XML::Easy::NodeBasics qw(
33             xml_c_equal xml_e_equal
34             xml_c_unequal xml_e_unequal
35             );
36              
37             if(xml_c_equal($content0, $content1)) { ...
38             if(xml_e_equal($element0, $element1)) { ...
39             if(xml_c_unequal($content0, $content1)) { ...
40             if(xml_e_unequal($element0, $element1)) { ...
41              
42             =head1 DESCRIPTION
43              
44             This module supplies functions concerned with the creation, examination,
45             and other manipulation of XML data nodes (content chunks and elements).
46             The nodes are dumb data objects, best manipulated using plain functions
47             such as the ones in this module.
48              
49             The nodes are objects of the classes L and
50             L. The data contained within an existing node
51             cannot be modified. This means that references to nodes can be copied
52             and passed around arbitrarily, without worrying about who might write to
53             them, or deep versus shallow copying. As a result, tasks that you might
54             think of as "modifying an XML node" actually involve creating a new node.
55              
56             The node classes do not have any interesting object-oriented behaviour,
57             and their minimalistic methods are not meant to be called directly.
58             Instead, node creation and examination should be performed using the
59             functions of this module.
60              
61             =head2 Twine
62              
63             For the purposes of examining what is contained within a chunk of
64             content, there is a standard representation of content known as "twine".
65             (It's stronger than a string, and has an alternating structure as will
66             be described.)
67              
68             A piece of twine is a reference to an array with an odd number of members.
69             The first and last members, and all members in between with an even index,
70             are strings giving the chunk's character data. Each member with an odd
71             index is a reference to an L object, representing
72             an XML element contained directly within the chunk. Any of the strings
73             may be empty, if the chunk has no character data between subelements or
74             at the start or end of the chunk.
75              
76             When not looking inside a content chunk, it is preferred to represent
77             it in encapsulated form as an L object.
78              
79             =cut
80              
81             package XML::Easy::NodeBasics;
82              
83 3     3   35446 { use 5.008; }
  3         13  
84 3     3   17 use warnings;
  3         7  
  3         96  
85 3     3   14 use strict;
  3         8  
  3         103  
86              
87 3     3   18 use Params::Classify 0.000 qw(is_string is_ref);
  3         52  
  3         205  
88 3         215 use XML::Easy::Classify 0.001 qw(
89             is_xml_name check_xml_chardata check_xml_attributes
90             is_xml_content_object check_xml_content_object
91             is_xml_element check_xml_element
92 3     3   1689 );
  3         55  
93 3     3   18 use XML::Easy::Content 0.007 ();
  3         37  
  3         56  
94 3     3   14 use XML::Easy::Element 0.007 ();
  3         42  
  3         198  
95              
96             BEGIN {
97 3 50   3   9 if(eval { local $SIG{__DIE__};
  3         13  
98 3         874 require Internals;
99 3         1075 exists &Internals::SetReadOnly;
100             }) {
101 3         108 *_set_readonly = \&Internals::SetReadOnly;
102             } else {
103 0         0 *_set_readonly = sub { };
104             }
105             }
106              
107             our $VERSION = "0.010";
108              
109 3     3   17 use parent "Exporter";
  3         6  
  3         20  
110             our @EXPORT_OK = qw(
111             xml_content_object xc xml_content_twine xct xml_content xml_element xe
112             xml_c_content_object xc_cont xml_c_content_twine xc_twine xml_c_content
113             xml_e_type_name xe_type
114             xml_e_attributes xe_attrs xml_e_attribute xe_attr
115             xml_e_content_object xe_cont xml_e_content_twine xe_twine xml_e_content
116             xml_c_equal xc_eq xml_e_equal xe_eq
117             xml_c_unequal xc_ne xml_e_unequal xe_ne
118             );
119              
120             sub _throw_data_error($) {
121 218     218   583 my($msg) = @_;
122 218         1615 die "invalid XML data: $msg\n";
123             }
124              
125             =head1 FUNCTIONS
126              
127             Each function has two names. There is a longer descriptive name, and
128             a shorter name to spare screen space and the programmer's fingers.
129              
130             =head2 Construction
131              
132             The construction functions each accept any number of items of XML content.
133             These items may be supplied in any of several forms. Content item
134             types may be mixed arbitrarily, in any sequence. The permitted forms
135             of content item are:
136              
137             =over
138              
139             =item character data
140              
141             A plain string of characters that are acceptable to XML.
142              
143             =item element
144              
145             A reference to an L object representing an XML
146             element.
147              
148             =item content object
149              
150             A reference to an L object representing a chunk of
151             XML content.
152              
153             =item twine array
154              
155             A reference to a L array listing a chunk of XML content.
156              
157             =back
158              
159             The construction functions are:
160              
161             =over
162              
163             =item xml_content_object(ITEM ...)
164              
165             =item xc(ITEM ...)
166              
167             Constructs and returns a XML content object based on a list of
168             constituents. Any number of Is (zero or more) may be supplied; each
169             one must be a content item of a permitted type. All the constituents
170             are checked for validity, against the XML 1.0 specification, and the
171             function Cs if any are invalid.
172              
173             All the supplied content items are concatenated to form a single chunk.
174             The function returns a reference to an L object.
175              
176             =cut
177              
178             sub xml_content_twine(@);
179              
180 11736     11736 1 4463678 sub xml_content_object(@) { XML::Easy::Content->new(&xml_content_twine) }
181              
182             *xc = \&xml_content_object;
183              
184             =item xml_content_twine(ITEM ...)
185              
186             =item xct(ITEM ...)
187              
188             Performs the same construction job as L, but returns
189             the resulting content chunk in the form of L rather than
190             a content object.
191              
192             The returned array must not be subsequently modified. If possible,
193             it will be marked as read-only in order to prevent modification.
194              
195             =cut
196              
197             sub xml_content_twine(@) {
198 17326     17326 1 2437815 my @content = ("");
199 17326         43553 foreach(@_) {
200 24242 100       83825 if(is_string($_)) {
    100          
    100          
    100          
201 11420         40520 check_xml_chardata($_);
202 8540         45313 $content[-1] .= $_;
203             } elsif(is_xml_element($_)) {
204 2138         7024 push @content, $_, "";
205             } elsif(is_xml_content_object($_)) {
206 18         73 my $twine = $_->twine;
207 18         51 $content[-1] .= $twine->[0];
208 18         51 push @content, @{$twine}[1 .. $#$twine];
  18         66  
209             } elsif(is_ref($_, "ARRAY")) {
210 10458         463374 my $twine = XML::Easy::Content->new($_)->twine;
211 3210         25344 $content[-1] .= $twine->[0];
212 3210         10135 push @content, @{$twine}[1 .. $#$twine];
  3210         17909  
213             } else {
214 208         543 _throw_data_error("invalid content item");
215             }
216             }
217 6990         37126 _set_readonly(\$_) foreach @content;
218 6990         20632 _set_readonly(\@content);
219 6990         440868 return \@content;
220             }
221              
222             *xct = \&xml_content_twine;
223              
224             =item xml_content(ITEM ...)
225              
226             Deprecated alias for L.
227              
228             =cut
229              
230             *xml_content = \&xml_content_twine;
231              
232             =item xml_element(TYPE_NAME, ITEM ...)
233              
234             =item xe(TYPE_NAME, ITEM ...)
235              
236             Constructs and returns an L object, representing an
237             XML element, based on a list of consitutents. I must be a
238             string, and gives the name of the element's type. Any number of Is
239             (zero or more) may be supplied; each one must be either a content item
240             of a permitted type or a reference to a hash of attributes. All the
241             constituents are checked for validity, against the XML 1.0 specification,
242             and the function Cs if any are invalid.
243              
244             All the attributes supplied are gathered together to form the element's
245             attribute set. It is an error if an attribute name has been used more
246             than once (even if the same value was given each time). All the supplied
247             content items are concatenated to form the element's content.
248             The function returns a reference to an L object.
249              
250             =cut
251              
252             sub xml_element($@) {
253 16724     16724 1 15521851 my $type_name = shift(@_);
254 16724 100       67371 XML::Easy::Element->new($type_name, {}, [""])
255             unless is_xml_name($type_name);
256 14844         40238 my %attrs;
257 14844         50329 for(my $i = 0; $i != @_; ) {
258 31646         63963 my $item = $_[$i];
259 31646 100       78002 if(is_ref($item, "HASH")) {
260 12772         58656 while(my($k, $v) = each(%$item)) {
261             _throw_data_error("duplicate attribute name")
262 304900 100       711519 if exists $attrs{$k};
263 304890         983602 $attrs{$k} = $v;
264             }
265 12762         42143 splice @_, $i, 1, ();
266             } else {
267 18874         52727 $i++;
268             }
269             }
270 14834         63489 check_xml_attributes(\%attrs);
271 6144         18925 return XML::Easy::Element->new($type_name, \%attrs,
272             &xml_content_object);
273             }
274              
275             *xe = \&xml_element;
276              
277             =back
278              
279             =head2 Examination of content chunks
280              
281             =over
282              
283             =item xml_c_content_object(CONTENT)
284              
285             =item xc_cont(CONTENT)
286              
287             I must be a reference to either an L
288             object or a L array.
289             Returns a reference to an L object encapsulating
290             the content.
291              
292             =cut
293              
294             sub xml_c_content_object($) {
295 13856 100   13856 1 493747 if(is_ref($_[0], "ARRAY")) {
296 8040         432068 return XML::Easy::Content->new($_[0]);
297             } else {
298 5816         22514 &check_xml_content_object;
299 5648         25492 return $_[0];
300             }
301             }
302              
303             *xc_cont = \&xml_c_content_object;
304              
305             =item xml_c_content_twine(CONTENT)
306              
307             =item xc_twine(CONTENT)
308              
309             I must be a reference to either an L
310             object or a L array.
311             Returns a reference to a L array listing the content.
312              
313             The returned array must not be subsequently modified. If possible,
314             it will be marked as read-only in order to prevent modification.
315              
316             =cut
317              
318 13216     13216 1 499847 sub xml_c_content_twine($) { xml_c_content_object($_[0])->twine }
319              
320             *xc_twine = \&xml_c_content_twine;
321              
322             =item xml_c_content(CONTENT)
323              
324             Deprecated alias for L.
325              
326             =cut
327              
328             *xml_c_content = \&xml_c_content_twine;
329              
330             =back
331              
332             =head2 Examination of elements
333              
334             =over
335              
336             =item xml_e_type_name(ELEMENT)
337              
338             =item xe_type(ELEMENT)
339              
340             I must be a reference to an L object.
341             Returns the element's type's name, as a string.
342              
343             =cut
344              
345             sub xml_e_type_name($) {
346 36     36 1 21755 &check_xml_element;
347 4         30 return $_[0]->type_name;
348             }
349              
350             *xe_type = \&xml_e_type_name;
351              
352             =item xml_e_attributes(ELEMENT)
353              
354             =item xe_attrs(ELEMENT)
355              
356             I must be a reference to an L object.
357             Returns a reference to a hash encapsulating
358             the element's attributes. In the hash, each key is an attribute name,
359             and the corresponding value is the attribute's value as a string.
360              
361             The returned hash must not be subsequently modified. If possible, it
362             will be marked as read-only in order to prevent modification. As a side
363             effect, the read-only-ness may make lookup of any non-existent attribute
364             generate an exception rather than returning C.
365              
366             =cut
367              
368             sub xml_e_attributes($) {
369 36     36 1 20399 &check_xml_element;
370 4         43 return $_[0]->attributes;
371             }
372              
373             *xe_attrs = \&xml_e_attributes;
374              
375             =item xml_e_attribute(ELEMENT, NAME)
376              
377             =item xe_attr(ELEMENT, NAME)
378              
379             I must be a reference to an L object.
380             Looks up a specific attribute of the
381             element, by a name supplied as a string. If there is an attribute by
382             that name then its value is returned, as a string. If there is no such
383             attribute then C is returned.
384              
385             =cut
386              
387             sub xml_e_attribute($$) {
388 546     546 1 331000 check_xml_element($_[0]);
389 482         12215 return $_[0]->attribute($_[1]);
390             }
391              
392             *xe_attr = \&xml_e_attribute;
393              
394             =item xml_e_content_object(ELEMENT)
395              
396             =item xe_cont(ELEMENT)
397              
398             I must be a reference to an L object.
399             Returns a reference to an L object encapsulating
400             the element's content.
401              
402             =cut
403              
404             sub xml_e_content_object($) {
405 36     36 1 18586 &check_xml_element;
406 4         31 return $_[0]->content_object;
407             }
408              
409             *xe_cont = \&xml_e_content_object;
410              
411             =item xml_e_content_twine(ELEMENT)
412              
413             =item xe_twine(ELEMENT)
414              
415             I must be a reference to an L object.
416             Returns a reference to a L array listing the element's content.
417              
418             The returned array must not be subsequently modified. If possible,
419             it will be marked as read-only in order to prevent modification.
420              
421             =cut
422              
423             sub xml_e_content_twine($) {
424 36     36 1 16858 &check_xml_element;
425 4         121 return $_[0]->content_twine;
426             }
427              
428             *xe_twine = \&xml_e_content_twine;
429              
430             =item xml_e_content(ELEMENT)
431              
432             Deprecated alias for L.
433              
434             =cut
435              
436             *xml_e_content = \&xml_e_content_twine;
437              
438             =back
439              
440             =head2 Comparison
441              
442             =over
443              
444             =item xml_c_equal(A, B)
445              
446             =item xc_eq(A, B)
447              
448             I and I must each be a reference to either an L
449             object or a L array.
450             Returns true if they represent exactly the same content,
451             and false if they do not.
452              
453             =cut
454              
455             sub _xe_eq($$);
456              
457             sub _xct_eq($$) {
458 4424     4424   10869 my($a, $b) = @_;
459 4424 100       12812 return !!1 if $a == $b;
460 4408 100       14876 return !!0 unless @$a == @$b;
461 2960         9513 for(my $i = $#$a; $i >= 0; $i -= 2) {
462 3032 100       14669 return !!0 unless $a->[$i] eq $b->[$i];
463             }
464 104         281 for(my $i = $#$a-1; $i >= 0; $i -= 2) {
465 56 100       172 return !!0 unless _xe_eq($a->[$i], $b->[$i]);
466             }
467 96         499 return !!1;
468             }
469              
470             sub xml_c_equal($$) {
471 6920     6920 1 2303658 return _xct_eq(xml_c_content_twine($_[0]), xml_c_content_twine($_[1]));
472             }
473              
474             *xc_eq = \&xml_c_equal;
475              
476             =item xml_e_equal(A, B)
477              
478             =item xe_eq(A, B)
479              
480             I and I must each be a reference to an L object.
481             Returns true if they represent exactly the same element,
482             and false if they do not.
483              
484             =cut
485              
486             sub _xe_eq($$) {
487 192     192   438 my($a, $b) = @_;
488 192 100       1066 return !!1 if $a == $b;
489 128 100       631 return !!0 unless $a->type_name eq $b->type_name;
490 88         254 my $aattr = $a->attributes;
491 88         215 my $battr = $b->attributes;
492 88         321 foreach(keys %$aattr) {
493             return !!0 unless exists($battr->{$_}) &&
494 100 100 100     673 $aattr->{$_} eq $battr->{$_};
495             }
496 52         145 foreach(keys %$battr) {
497 60 100       254 return !!0 unless exists $aattr->{$_};
498             }
499 32         122 return _xct_eq($a->content_twine, $b->content_twine);
500             }
501              
502             sub xml_e_equal($$) {
503 264     264 1 61745 check_xml_element($_[0]);
504 200         711 check_xml_element($_[1]);
505 136         339 return &_xe_eq;
506             }
507              
508             *xe_eq = \&xml_e_equal;
509              
510             =item xml_c_unequal(A, B)
511              
512             =item xc_ne(A, B)
513              
514             I and I must each be a reference to either an L
515             object or a L array.
516             Returns true if they do not represent exactly the same content,
517             and false if they do.
518              
519             =cut
520              
521 3460     3460 1 2189525 sub xml_c_unequal($$) { !&xml_c_equal }
522              
523             *xc_ne = \&xml_c_unequal;
524              
525             =item xml_e_unequal(A, B)
526              
527             =item xe_ne(A, B)
528              
529             I and I must each be a reference to an L object.
530             Returns true if they do not represent exactly the same element,
531             and false if they do.
532              
533             =cut
534              
535 132     132 1 49623 sub xml_e_unequal($$) { !&xml_e_equal }
536              
537             *xe_ne = \&xml_e_unequal;
538              
539             =back
540              
541             =head1 SEE ALSO
542              
543             L,
544             L,
545             L,
546             L,
547             L,
548             L
549              
550             =head1 AUTHOR
551              
552             Andrew Main (Zefram)
553              
554             =head1 COPYRIGHT
555              
556             Copyright (C) 2009, 2010, 2011, 2017
557             Andrew Main (Zefram)
558              
559             =head1 LICENSE
560              
561             This module is free software; you can redistribute it and/or modify it
562             under the same terms as Perl itself.
563              
564             =cut
565              
566             1;