File Coverage

blib/lib/XML/Easy/NodeBasics.pm
Criterion Covered Total %
statement 101 102 99.0
branch 33 34 97.0
condition 3 3 100.0
subroutine 26 26 100.0
pod 14 14 100.0
total 177 179 98.8


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             XML::Easy::NodeBasics - basic manipulation of XML data nodes
4              
5             =head1 SYNOPSIS
6              
7             use XML::Easy::NodeBasics qw(xml_content_object xml_element);
8              
9             $content = xml_content_object("this", "&", "that");
10             $content = xml_content_object(@sublems);
11              
12             $element = xml_element("a", { href => "there" }, "there");
13             $element = xml_element("div", @subelems);
14              
15             use XML::Easy::NodeBasics qw(
16             xml_c_content_object xml_c_content_twine);
17              
18             $content = xml_c_content_object($content);
19             $twine = xml_c_content_twine($content);
20              
21             use XML::Easy::NodeBasics qw(
22             xml_e_type_name
23             xml_e_attributes xml_e_attribute
24             xml_e_content_object);
25              
26             $type_name = xml_e_type_name($element);
27             $attributes = xml_e_attributes($element);
28             $href = xml_e_attribute($element, "href");
29             $content = xml_e_content_object($element);
30              
31             use XML::Easy::NodeBasics qw(
32             xml_c_equal xml_e_equal xml_c_unequal xml_e_unequal);
33              
34             if(xml_c_equal($content0, $content1)) { ...
35             if(xml_e_equal($element0, $element1)) { ...
36             if(xml_c_unequal($content0, $content1)) { ...
37             if(xml_e_unequal($element0, $element1)) { ...
38              
39             =head1 DESCRIPTION
40              
41             This module supplies functions concerned with the creation, examination,
42             and other manipulation of XML data nodes (content chunks and elements).
43             The nodes are dumb data objects, best manipulated using plain functions
44             such as the ones in this module.
45              
46             The nodes are objects of the classes L and
47             L. The data contained within an existing node
48             cannot be modified. This means that references to nodes can be copied
49             and passed around arbitrarily, without worrying about who might write to
50             them, or deep versus shallow copying. As a result, tasks that you might
51             think of as "modifying an XML node" actually involve creating a new node.
52              
53             The node classes do not have any interesting object-oriented behaviour,
54             and their minimalistic methods are not meant to be called directly.
55             Instead, node creation and examination should be performed using the
56             functions of this module.
57              
58             =head2 Twine
59              
60             For the purposes of examining what is contained within a chunk of
61             content, there is a standard representation of content known as "twine".
62             (It's stronger than a string, and has an alternating structure as will
63             be described.)
64              
65             A piece of twine is a reference to an array with an odd number of members.
66             The first and last members, and all members in between with an even index,
67             are strings giving the chunk's character data. Each member with an odd
68             index is a reference to an L object, representing
69             an XML element contained directly within the chunk. Any of the strings
70             may be empty, if the chunk has no character data between subelements or
71             at the start or end of the chunk.
72              
73             When not looking inside a content chunk, it is preferred to represent
74             it in encapsulated form as an L object.
75              
76             =cut
77              
78             package XML::Easy::NodeBasics;
79              
80 3     3   77206 { use 5.008; }
  3         11  
81 3     3   15 use warnings;
  3         6  
  3         84  
82 3     3   15 use strict;
  3         7  
  3         88  
83              
84 3     3   15 use Params::Classify 0.000 qw(is_string is_ref);
  3         45  
  3         173  
85 3         226 use XML::Easy::Classify 0.001 qw(
86             is_xml_name check_xml_chardata check_xml_attributes
87             is_xml_content_object check_xml_content_object
88             is_xml_element check_xml_element
89 3     3   347 );
  3         53  
90 3     3   17 use XML::Easy::Content 0.007 ();
  3         40  
  3         54  
91 3     3   13 use XML::Easy::Element 0.007 ();
  3         39  
  3         218  
92              
93             BEGIN {
94 3 50   3   6 if(eval { local $SIG{__DIE__};
  3         15  
95 3         945 require Internals;
96 3         1268 exists &Internals::SetReadOnly;
97             }) {
98 3         114 *_set_readonly = \&Internals::SetReadOnly;
99             } else {
100 0         0 *_set_readonly = sub { };
101             }
102             }
103              
104             our $VERSION = "0.011";
105              
106 3     3   16 use parent "Exporter";
  3         7  
  3         20  
107             our @EXPORT_OK = qw(
108             xml_content_object xc xml_content_twine xct xml_content xml_element xe
109             xml_c_content_object xc_cont xml_c_content_twine xc_twine xml_c_content
110             xml_e_type_name xe_type
111             xml_e_attributes xe_attrs xml_e_attribute xe_attr
112             xml_e_content_object xe_cont xml_e_content_twine xe_twine xml_e_content
113             xml_c_equal xc_eq xml_e_equal xe_eq
114             xml_c_unequal xc_ne xml_e_unequal xe_ne
115             );
116              
117             sub _throw_data_error($) {
118 218     218   488 my($msg) = @_;
119 218         1410 die "invalid XML data: $msg\n";
120             }
121              
122             =head1 FUNCTIONS
123              
124             Each function has two names. There is a longer descriptive name, and
125             a shorter name to spare screen space and the programmer's fingers.
126              
127             =head2 Construction
128              
129             The construction functions each accept any number of items of XML content.
130             These items may be supplied in any of several forms. Content item
131             types may be mixed arbitrarily, in any sequence. The permitted forms
132             of content item are:
133              
134             =over
135              
136             =item character data
137              
138             A plain string of characters that are acceptable to XML.
139              
140             =item element
141              
142             A reference to an L object representing an XML
143             element.
144              
145             =item content object
146              
147             A reference to an L object representing a chunk of
148             XML content.
149              
150             =item twine array
151              
152             A reference to a L array listing a chunk of XML content.
153              
154             =back
155              
156             The construction functions are:
157              
158             =over
159              
160             =item xml_content_object(ITEM ...)
161              
162             =item xc(ITEM ...)
163              
164             Constructs and returns a XML content object based on a list of
165             constituents. Any number of Is (zero or more) may be supplied; each
166             one must be a content item of a permitted type. All the constituents
167             are checked for validity, against the XML 1.0 specification, and the
168             function Cs if any are invalid.
169              
170             All the supplied content items are concatenated to form a single chunk.
171             The function returns a reference to an L object.
172              
173             =cut
174              
175             sub xml_content_twine(@);
176              
177 12120     12120 1 4732834 sub xml_content_object(@) { XML::Easy::Content->new(&xml_content_twine) }
178              
179             *xc = \&xml_content_object;
180              
181             =item xml_content_twine(ITEM ...)
182              
183             =item xct(ITEM ...)
184              
185             Performs the same construction job as L, but returns
186             the resulting content chunk in the form of L rather than
187             a content object.
188              
189             The returned array must not be subsequently modified. If possible,
190             it will be marked as read-only in order to prevent modification.
191              
192             =cut
193              
194             sub xml_content_twine(@) {
195 17902     17902 1 3008824 my @content = ("");
196 17902         44529 foreach(@_) {
197 24962 100       75218 if(is_string($_)) {
    100          
    100          
    100          
198 11756         36149 check_xml_chardata($_);
199 8684         42678 $content[-1] .= $_;
200             } elsif(is_xml_element($_)) {
201 2138         6594 push @content, $_, "";
202             } elsif(is_xml_content_object($_)) {
203 18         64 my $twine = $_->twine;
204 18         46 $content[-1] .= $twine->[0];
205 18         48 push @content, @{$twine}[1 .. $#$twine];
  18         55  
206             } elsif(is_ref($_, "ARRAY")) {
207 10842         443796 my $twine = XML::Easy::Content->new($_)->twine;
208 3210         23493 $content[-1] .= $twine->[0];
209 3210         8818 push @content, @{$twine}[1 .. $#$twine];
  3210         15950  
210             } else {
211 208         487 _throw_data_error("invalid content item");
212             }
213             }
214 6990         27408 _set_readonly(\$_) foreach @content;
215 6990         17215 _set_readonly(\@content);
216 6990         375674 return \@content;
217             }
218              
219             *xct = \&xml_content_twine;
220              
221             =item xml_content(ITEM ...)
222              
223             Deprecated alias for L.
224              
225             =cut
226              
227             *xml_content = \&xml_content_twine;
228              
229             =item xml_element(TYPE_NAME, ITEM ...)
230              
231             =item xe(TYPE_NAME, ITEM ...)
232              
233             Constructs and returns an L object, representing an
234             XML element, based on a list of consitutents. I must be a
235             string, and gives the name of the element's type. Any number of Is
236             (zero or more) may be supplied; each one must be either a content item
237             of a permitted type or a reference to a hash of attributes. All the
238             constituents are checked for validity, against the XML 1.0 specification,
239             and the function Cs if any are invalid.
240              
241             All the attributes supplied are gathered together to form the element's
242             attribute set. It is an error if an attribute name has been used more
243             than once (even if the same value was given each time). All the supplied
244             content items are concatenated to form the element's content.
245             The function returns a reference to an L object.
246              
247             =cut
248              
249             sub xml_element($@) {
250 17236     17236 1 16221320 my $type_name = shift(@_);
251 17236 100       60635 XML::Easy::Element->new($type_name, {}, [""])
252             unless is_xml_name($type_name);
253 15316         39475 my %attrs;
254 15316         65351 for(my $i = 0; $i != @_; ) {
255 32654         58821 my $item = $_[$i];
256 32654 100       70206 if(is_ref($item, "HASH")) {
257 13164         57401 while(my($k, $v) = each(%$item)) {
258             _throw_data_error("duplicate attribute name")
259 315292 100       600246 if exists $attrs{$k};
260 315282         878747 $attrs{$k} = $v;
261             }
262 13154         37992 splice @_, $i, 1, ();
263             } else {
264 19490         50931 $i++;
265             }
266             }
267 15306         58999 check_xml_attributes(\%attrs);
268 6336         19400 return XML::Easy::Element->new($type_name, \%attrs,
269             &xml_content_object);
270             }
271              
272             *xe = \&xml_element;
273              
274             =back
275              
276             =head2 Examination of content chunks
277              
278             =over
279              
280             =item xml_c_content_object(CONTENT)
281              
282             =item xc_cont(CONTENT)
283              
284             I must be a reference to either an L
285             object or a L array.
286             Returns a reference to an L object encapsulating
287             the content.
288              
289             =cut
290              
291             sub xml_c_content_object($) {
292 14112 100   14112 1 559561 if(is_ref($_[0], "ARRAY")) {
293 8232         428023 return XML::Easy::Content->new($_[0]);
294             } else {
295 5880         18749 &check_xml_content_object;
296 5712         23472 return $_[0];
297             }
298             }
299              
300             *xc_cont = \&xml_c_content_object;
301              
302             =item xml_c_content_twine(CONTENT)
303              
304             =item xc_twine(CONTENT)
305              
306             I must be a reference to either an L
307             object or a L array.
308             Returns a reference to a L array listing the content.
309              
310             The returned array must not be subsequently modified. If possible,
311             it will be marked as read-only in order to prevent modification.
312              
313             =cut
314              
315 13440     13440 1 517723 sub xml_c_content_twine($) { xml_c_content_object($_[0])->twine }
316              
317             *xc_twine = \&xml_c_content_twine;
318              
319             =item xml_c_content(CONTENT)
320              
321             Deprecated alias for L.
322              
323             =cut
324              
325             *xml_c_content = \&xml_c_content_twine;
326              
327             =back
328              
329             =head2 Examination of elements
330              
331             =over
332              
333             =item xml_e_type_name(ELEMENT)
334              
335             =item xe_type(ELEMENT)
336              
337             I must be a reference to an L object.
338             Returns the element's type's name, as a string.
339              
340             =cut
341              
342             sub xml_e_type_name($) {
343 36     36 1 24377 &check_xml_element;
344 4         62 return $_[0]->type_name;
345             }
346              
347             *xe_type = \&xml_e_type_name;
348              
349             =item xml_e_attributes(ELEMENT)
350              
351             =item xe_attrs(ELEMENT)
352              
353             I must be a reference to an L object.
354             Returns a reference to a hash encapsulating
355             the element's attributes. In the hash, each key is an attribute name,
356             and the corresponding value is the attribute's value as a string.
357              
358             The returned hash must not be subsequently modified. If possible, it
359             will be marked as read-only in order to prevent modification. As a side
360             effect, the read-only-ness may make lookup of any non-existent attribute
361             generate an exception rather than returning C.
362              
363             =cut
364              
365             sub xml_e_attributes($) {
366 36     36 1 20475 &check_xml_element;
367 4         31 return $_[0]->attributes;
368             }
369              
370             *xe_attrs = \&xml_e_attributes;
371              
372             =item xml_e_attribute(ELEMENT, NAME)
373              
374             =item xe_attr(ELEMENT, NAME)
375              
376             I must be a reference to an L object.
377             Looks up a specific attribute of the
378             element, by a name supplied as a string. If there is an attribute by
379             that name then its value is returned, as a string. If there is no such
380             attribute then C is returned.
381              
382             =cut
383              
384             sub xml_e_attribute($$) {
385 556     556 1 480027 check_xml_element($_[0]);
386 492         16170 return $_[0]->attribute($_[1]);
387             }
388              
389             *xe_attr = \&xml_e_attribute;
390              
391             =item xml_e_content_object(ELEMENT)
392              
393             =item xe_cont(ELEMENT)
394              
395             I must be a reference to an L object.
396             Returns a reference to an L object encapsulating
397             the element's content.
398              
399             =cut
400              
401             sub xml_e_content_object($) {
402 36     36 1 27148 &check_xml_element;
403 4         29 return $_[0]->content_object;
404             }
405              
406             *xe_cont = \&xml_e_content_object;
407              
408             =item xml_e_content_twine(ELEMENT)
409              
410             =item xe_twine(ELEMENT)
411              
412             I must be a reference to an L object.
413             Returns a reference to a L array listing the element's content.
414              
415             The returned array must not be subsequently modified. If possible,
416             it will be marked as read-only in order to prevent modification.
417              
418             =cut
419              
420             sub xml_e_content_twine($) {
421 36     36 1 25991 &check_xml_element;
422 4         34 return $_[0]->content_twine;
423             }
424              
425             *xe_twine = \&xml_e_content_twine;
426              
427             =item xml_e_content(ELEMENT)
428              
429             Deprecated alias for L.
430              
431             =cut
432              
433             *xml_e_content = \&xml_e_content_twine;
434              
435             =back
436              
437             =head2 Comparison
438              
439             =over
440              
441             =item xml_c_equal(A, B)
442              
443             =item xc_eq(A, B)
444              
445             I and I must each be a reference to either an L
446             object or a L array.
447             Returns true if they represent exactly the same content,
448             and false if they do not.
449              
450             =cut
451              
452             sub _xe_eq($$);
453              
454             sub _xct_eq($$) {
455 4424     4424   11182 my($a, $b) = @_;
456 4424 100       12231 return !!1 if $a == $b;
457 4408 100       14996 return !!0 unless @$a == @$b;
458 2960         10285 for(my $i = $#$a; $i >= 0; $i -= 2) {
459 3032 100       16570 return !!0 unless $a->[$i] eq $b->[$i];
460             }
461 104         346 for(my $i = $#$a-1; $i >= 0; $i -= 2) {
462 56 100       163 return !!0 unless _xe_eq($a->[$i], $b->[$i]);
463             }
464 96         579 return !!1;
465             }
466              
467             sub xml_c_equal($$) {
468 7048     7048 1 2679175 return _xct_eq(xml_c_content_twine($_[0]), xml_c_content_twine($_[1]));
469             }
470              
471             *xc_eq = \&xml_c_equal;
472              
473             =item xml_e_equal(A, B)
474              
475             =item xe_eq(A, B)
476              
477             I and I must each be a reference to an L object.
478             Returns true if they represent exactly the same element,
479             and false if they do not.
480              
481             =cut
482              
483             sub _xe_eq($$) {
484 192     192   473 my($a, $b) = @_;
485 192 100       752 return !!1 if $a == $b;
486 128 100       676 return !!0 unless $a->type_name eq $b->type_name;
487 88         290 my $aattr = $a->attributes;
488 88         229 my $battr = $b->attributes;
489 88         314 foreach(keys %$aattr) {
490             return !!0 unless exists($battr->{$_}) &&
491 98 100 100     727 $aattr->{$_} eq $battr->{$_};
492             }
493 52         164 foreach(keys %$battr) {
494 60 100       251 return !!0 unless exists $aattr->{$_};
495             }
496 32         138 return _xct_eq($a->content_twine, $b->content_twine);
497             }
498              
499             sub xml_e_equal($$) {
500 264     264 1 54607 check_xml_element($_[0]);
501 200         735 check_xml_element($_[1]);
502 136         338 return &_xe_eq;
503             }
504              
505             *xe_eq = \&xml_e_equal;
506              
507             =item xml_c_unequal(A, B)
508              
509             =item xc_ne(A, B)
510              
511             I and I must each be a reference to either an L
512             object or a L array.
513             Returns true if they do not represent exactly the same content,
514             and false if they do.
515              
516             =cut
517              
518 3524     3524 1 2589020 sub xml_c_unequal($$) { !&xml_c_equal }
519              
520             *xc_ne = \&xml_c_unequal;
521              
522             =item xml_e_unequal(A, B)
523              
524             =item xe_ne(A, B)
525              
526             I and I must each be a reference to an L object.
527             Returns true if they do not represent exactly the same element,
528             and false if they do.
529              
530             =cut
531              
532 132     132 1 52273 sub xml_e_unequal($$) { !&xml_e_equal }
533              
534             *xe_ne = \&xml_e_unequal;
535              
536             =back
537              
538             =head1 SEE ALSO
539              
540             L,
541             L,
542             L,
543             L,
544             L,
545             L
546              
547             =head1 AUTHOR
548              
549             Andrew Main (Zefram)
550              
551             =head1 COPYRIGHT
552              
553             Copyright (C) 2009, 2010, 2011, 2017
554             Andrew Main (Zefram)
555              
556             =head1 LICENSE
557              
558             This module is free software; you can redistribute it and/or modify it
559             under the same terms as Perl itself.
560              
561             =cut
562              
563             1;