File Coverage

blib/lib/Test/XML/Easy.pm
Criterion Covered Total %
statement 208 221 94.1
branch 85 94 90.4
condition 25 28 89.2
subroutine 16 16 100.0
pod 4 4 100.0
total 338 363 93.1


line stmt bran cond sub pod time code
1             package Test::XML::Easy;
2              
3 11     11   699329 use strict;
  11         33  
  11         542  
4 11     11   69 use warnings;
  11         25  
  11         514  
5              
6 11     11   63 use vars qw(@EXPORT @ISA);
  11         25  
  11         933  
7 11     11   75 use Exporter;
  11         36  
  11         964  
8             @ISA = qw(Exporter);
9              
10             our $VERSION = '0.01';
11              
12 11     11   68 use Carp qw(croak);
  11         21  
  11         875  
13              
14 11     11   22998 use XML::Easy::Text qw(xml10_read_document xml10_write_document);
  11         119297  
  11         1348  
15 11     11   20470 use XML::Easy::Classify qw(is_xml_element);
  11         409732  
  11         1859  
16 11     11   138 use XML::Easy::Syntax qw($xml10_s_rx);
  11         25  
  11         1216  
17              
18 11     11   70 use Test::Builder;
  11         29  
  11         35164  
19             my $tester = Test::Builder->new();
20              
21             =head1 NAME
22              
23             Test::XML::Easy - test XML with XML::Easy
24              
25             =head1 SYNOPSIS
26              
27             use Test::More tests => 2;
28             use Test::XML::Easy;
29              
30             is_xml $some_xml, <<'ENDOFXML', "a test";
31            
32            
33            
34             fuzz
35            
36             ENDOFXML
37              
38             is_xml $some_xml, <<'ENDOFXML', { ignore_whitespace => 1, description => "my test" };
39            
40            
41             fuzz
42            
43             ENDOFXML
44              
45             isnt_xml $some_xml, $some_xml_it_must_not_be;
46              
47             is_well_formed_xml $some_xml;
48              
49             =head1 DESCRIPTION
50              
51             A simple testing tool, with only pure Perl dependancies, that checks if
52             two XML documents are "the same". In particular this module will check if
53             the documents schemantically equal as defined by the XML 1.0 specification
54             (i.e. that the two documents would construct the same DOM
55             model when parsed, so things like character sets and if you've used two tags
56             or a self closing tags aren't important.)
57              
58             This modules is a strict superset of B's interface, meaning if you
59             were using that module to check if two identical documents were the same then
60             this module should function as a drop in replacement. Be warned, however,
61             that this module by default is a lot stricter about how the XML documents
62             are allowed to differ.
63              
64             =head2 Functions
65              
66             This module, by default, exports a number of functions into your namespace.
67              
68             =over
69              
70             =item is_xml($xml_to_test, $expected_xml[, $options_hashref])
71              
72             Tests that the passed XML is "the same" as the expected XML.
73              
74             XML can be passed into this function in one of two ways; Either you can
75             provide a string (which the function will parse for you) or you can pass in
76             B objects that you've constructed yourself somehow.
77              
78             This funtion takes several options as the third argument. These can be
79             passed in as a hashref:
80              
81             =over
82              
83             =item description
84              
85             The name of the test that will be used in constructing the C / C
86             test output.
87              
88             =item ignore_whitespace
89              
90             Ignore many whitespace differences in text nodes. Currently
91             this has the same effect as turning on C
92             and C.
93              
94             =item ignore_surrounding_whitespace
95              
96             Ignore differences in leading and trailing whitespace
97             between elements. This means that
98              
99            

foo bar baz

100              
101             Is considered the same as
102              
103            

104             foo bar baz
105            

106              
107             And even
108              
109            

110             this is my cat:
111            

112              
113             Is considered the same as:
114              
115            

116             this is my cat:
117            

118              
119             Even though, to a web-browser, that extra space is significant whitespace
120             and the two documents would be renderd differently.
121              
122             However, as comments are completely ignored (we treat them as if they were
123             never even in the document) the following:
124              
125            

foobar

126              
127             would be considered different to
128              
129            

130             foo
131            
132             bar
133            

134              
135             As it's the same as comparing the string
136              
137             "foobar"
138              
139             And:
140              
141             "foo
142            
143             bar"
144              
145             The same is true for processing instructions and DTD declarations.
146              
147             =item ignore_leading_whitespace
148              
149             The same as C but only ignore
150             the whitespace immediately after an element start or end tag not
151             immedately before.
152              
153             =item ignore_trailing_whitespace
154              
155             The same as C but only ignore
156             the whitespace immediately before an element start or end tag not
157             immedately after.
158              
159             =item ignore_different_whitespace
160              
161             If set to a true value ignores differences in what characters
162             make up whitespace in text nodes. In other words, this option
163             makes the comparison only care that wherever there's whitespace
164             in the expected XML there's any whitespace in the actual XML
165             at all, not what that whitespace is made up of.
166              
167             It means the following
168              
169            

170             foo bar baz
171            

172              
173             Is the same as
174              
175            

176             foo
177             bar
178             baz
179            

180              
181             But not the same as
182              
183            

184             foobarbaz
185            

186              
187             This setting has no effect on attribute comparisons.
188              
189             =item verbose
190              
191             If true, print obsessive amounts of debug info out while
192             checking things
193              
194             =item show_xml
195              
196             This prints out in the diagnostic messages the expected and
197             actual XML on failure.
198              
199             =back
200              
201             If a third argument is passed to this function and that argument
202             is not a hashref then it will be assumed that this argument is
203             the the description as passed above. i.e.
204              
205             is_xml $xml, $expected, "my test";
206              
207             is the same as
208              
209             is_xml $xml, $expected, { description => "my test" };
210              
211             =cut
212              
213             sub is_xml($$;$) {
214 57     57 1 57678 my $got = shift;
215 57         102 my $expected = shift;
216              
217 57 100       180 unless (defined $expected) {
218 2         535 croak("expected argument must be defined");
219             }
220              
221             # munge the options
222              
223 55         85 my $got_original = $got;
224 55         74 my $expected_original = $expected;
225              
226 55         80 my $options = shift;
227 55 100       424 $options = { description => $options } unless ref $options eq "HASH";
228 55 100       210 $options = { %{$options}, description => "xml test" } unless defined $options->{description};
  42         195  
229 55 100       244 unless (is_xml_element($expected)) {
230             # throws an exception if there isn't a problem.
231 54         298 $expected = eval { xml10_read_document($expected) };
  54         844  
232 54 100       181 if ($@) {
233 2         528 croak "Couldn't parse expected XML document: $@";
234             }
235             }
236              
237             # convert into something useful if needed
238 53 100       174 unless (is_xml_element($got)) {
239 51         270 my $parsed = eval { xml10_read_document($got) };
  51         582  
240 51 100       126 if ($@) {
241 4         15 $tester->ok(0, $options->{description});
242 4         11 $tester->diag("Couldn't parse submitted XML document:");
243 4         14 $tester->diag(" $@");
244 4         28 return;
245             }
246              
247 47         96 $got = $parsed;
248             }
249              
250 49 100       164 if(_is_xml($got,$expected,$options,"", {})) {
251 23         109 $tester->ok(1,$options->{description});
252 23         6219 return 1;
253             }
254              
255 26 100       272 if ($options->{show_xml}) {
256 3         10 $tester->diag("The XML that we expected was:");
257 3 100       173 if (is_xml_element($expected_original))
258 1         14 { $tester->diag(xml10_write_document($expected_original)) }
259             else
260 2         15 { $tester->diag($expected_original) }
261              
262 3         216 $tester->diag("The XML that we received was:");
263 3 100       153 if (is_xml_element($got_original))
264 1         12 { $tester->diag(xml10_write_document($got_original)) }
265             else
266 2         12 { $tester->diag($got_original) }
267             }
268              
269 26         421 return;
270             }
271             push @EXPORT, "is_xml";
272              
273             sub _is_xml {
274 75     75   102 my $got = shift;
275 75         99 my $expected = shift;
276 75         87 my $options = shift;
277              
278             # this is the path
279 75         295 my $path = shift;
280              
281             # the index is used to keep track of how many of a particular
282             # typename of a particular element we've seen as previous siblings
283             # of the node that just got in. It's a hashref with type_name and
284             # the index.
285 75         119 my $index = shift;
286              
287             # change where the errors are reported from
288 75         118 local $Test::Builder::Level = $Test::Builder::Level + 1;
289              
290             # work out the details of the node we're looking at
291             # nb add one to the index because xpath is weirdly 1-index
292             # not 0-indexed like most other modern languages
293 75         249 my $got_name = $got->type_name();
294 75   100     359 my $got_index = ($index->{ $got_name } || 0) + 1;
295              
296             ### check if we've got a node to compare to
297              
298 75 50       199 unless ($expected) {
299 0         0 $tester->ok(0, $options->{description});
300 0         0 $tester->diag("Element '$path/$got_name\[$got_index]' was not expected");
301 0         0 return;
302             }
303              
304             ### check the node name
305              
306             # work out the details of the node we're comparing with
307 75         195 my $expected_name = $expected->type_name();
308 75   100     279 my $expected_index = ($index->{ $expected_name } || 0) + 1;
309              
310             # alter the index hashref to record we've seen another node
311             # of this name
312 75         247 $index->{$got_name}++;
313              
314 75 50       190 $tester->diag("comparing '$path/$got_name\[$expected_index]' to '$path/$expected_name\[$expected_index]'...") if $options->{verbose};
315              
316 75 100       184 if ($got_name ne $expected_name) {
317 4         20 $tester->ok(0, $options->{description});
318 4         1180 $tester->diag("Element '$path/$got_name\[$got_index]' does not match '$path/$expected_name\[$expected_index]'");
319 4         197 return;
320             }
321 71 50       174 $tester->diag("...matched name") if $options->{verbose};
322              
323             ### check the attributes
324              
325             # we're not looking at decendents, so burn the path of
326             # this node into the path we got passed in
327 71         265 $path .= "/$got_name\[$got_index]";
328              
329             # XML::Easy returns read only data structures
330             # we want to modify these to keep track of what
331             # we've processed, so we need to copy them
332 71         130 my %got_attr = %{ $got->attributes };
  71         542  
333 71         178 my $expected_attr = $expected->attributes;
334              
335 71         89 foreach my $attr (keys %{ $expected_attr }) {
  71         244  
336 6 50       16 $tester->diag("checking attribute '$path/\@$attr'...") if $options->{verbose};
337              
338 6 100       19 if (!exists($got_attr{$attr})) {
339 1         8 $tester->ok(0, $options->{description});
340 1         676 $tester->diag("expected attribute '$path/\@$attr' not found");
341 1         113 return;
342             }
343 5 50       13 $tester->diag("...found attribute") if $options->{verbose};
344              
345 5         12 my $expected_string = $expected_attr->{$attr};
346 5         10 my $got_string = delete $got_attr{$attr};
347              
348 5 100       208 if ($expected_string ne $got_string) {
349 1         12 $tester->ok(0, $options->{description});
350 1         3189 $tester->diag("attribute value for '$path/\@$attr' didn't match");
351 1         258 $tester->diag("found value:\n");
352 1         230 $tester->diag(" '$got_string'\n");
353 1         67 $tester->diag("expected value:\n");
354 1         228 $tester->diag(" '$expected_string'\n");
355 1         241 return;
356             }
357 4 50       13 $tester->diag("...the attribute contents matched") if $options->{verbose};
358             }
359 69 100       669 if (keys %got_attr) {
360 2         9 $tester->ok(0, $options->{description});
361 2 100       1132 $tester->diag("found extra unexpected attribute".(keys %got_attr>1 ? "s":"").":");
362 2         383 $tester->diag(" '$path/\@$_'") foreach sort keys %got_attr;
363 2         264 return;
364             }
365 67 50       154 $tester->diag("the attributes all matched") if $options->{verbose};
366              
367             ### check the child nodes
368              
369             # create a new index to pass to our children distint from
370             # the index that was passed in to us (as that one was created
371             # by our parent for me and my siblings)
372 67         102 my $child_index = {};
373              
374             # grab the child text...element...text...element...text...
375 67         181 my $got_content = $got->content;
376 67         135 my $expected_content = $expected->content;
377              
378             # step though the text/elements
379             # nb this loop works in steps of two; The other $i++
380             # is half way through the loop below
381 67         117 for (my $i = 0; $i < @{$got_content}; $i++) {
  86         209  
382              
383             ### check the text node
384              
385             # extract the text from the object
386 86         132 my $got_text = $got_content->[ $i ];
387 86         115 my $expected_text = $expected_content->[ $i ];
388 86         122 my $comp_got_text = $got_text;
389 86         97 my $comp_expected_text = $expected_text;
390              
391 86 100 100     559 if ($options->{ignore_whitespace} || $options->{ignore_leading_whitespace} || $options->{ignore_surrounding_whitespace}) {
      66        
392 10         133 $comp_got_text =~ s/ \A (?:$xml10_s_rx)* //x;
393 10         89 $comp_expected_text =~ s/ \A (?:$xml10_s_rx)* //x;
394             }
395              
396 86 100 100     628 if ($options->{ignore_whitespace} || $options->{ignore_trailing_whitespace} || $options->{ignore_surrounding_whitespace}) {
      66        
397 10         184 $comp_got_text =~ s/ (?:$xml10_s_rx)* \z//x;
398 10         173 $comp_expected_text =~ s/ (?:$xml10_s_rx)* \z//x;
399             }
400              
401 86 100 100     570 if ($options->{ignore_whitespace} || $options->{ignore_different_whitespace}) {
402 8         86 $comp_got_text =~ s/ (?:$xml10_s_rx)+ / /gx;
403 8         66 $comp_expected_text =~ s/ (?:$xml10_s_rx)+ / /gx;
404             }
405              
406 86 100       277 if ($comp_got_text ne $comp_expected_text) {
407              
408 18         71 $tester->ok(0, $options->{description});
409              
410             # I don't like these error message not being specific with xpath but as
411             # far as I know there's no easy way to express in xpath the text immediatly following
412             # a particular element. The best I could come up with was this mouthful:
413             # "$path/following-sibling::text()[ previous-sibling::*[1] == $path ]"
414              
415 18 100 100     14597 if ($i == 0) {
  5 100       18  
416 13 100 100     34 if (@{ $got_content } == 1 && @{ $expected_content } == 1) {
  13         48  
  11         43  
417 10         52 $tester->diag("text inside '$path' didn't match");
418             } else {
419 3         11 $tester->diag("text immediately inside opening tag of '$path' didn't match");
420             }
421 2         8 } elsif ($i == @{ $got_content} - 1 && $i == @{ $expected_content } - 1 ) {
422 1         6 $tester->diag("text immediately before closing tag of '$path' didn't match");
423             } else {
424 4         14 my $name = $got_content->[ $i - 1 ]->type_name;
425 4         7 my $ind = $child_index->{ $name };
426 4         18 $tester->diag("text immediately after '$path/$name\[$ind]' didn't match");
427             }
428              
429 18         1259 $tester->diag("found:\n");
430 18         1208 $tester->diag(" '$got_text'\n");
431 18         1255 $tester->diag("expected:\n");
432 18         1214 $tester->diag(" '$expected_text'\n");
433              
434 18 50       1233 if ($options->{verbose}) {
435 0         0 $tester->diag("compared found text:\n");
436 0         0 $tester->diag(" '$comp_got_text'\n");
437 0         0 $tester->diag("against text:\n");
438 0         0 $tester->diag(" '$comp_expected_text'\n");
439             }
440              
441 18         115 return;
442             }
443              
444             # move onto the next (elemnent) node if we didn't reach the end
445 68         312 $i++;
446 68 100       83 last if $i >= @{$got_content};
  68         210  
447              
448             ### check the element node
449              
450             # simply recurse for that node
451             # (don't bother checking if the expected node is defined or not, the case
452             # where it isn't is handled at the start of _is_xml)
453 26 100       157 return unless _is_xml(
454             $got_content->[$i],
455             $expected_content->[$i],
456             $options,
457             $path,
458             $child_index
459             );
460             }
461              
462             # check if we expected more nodes
463 42 50       58 if (@{ $expected_content } > @{ $got_content }) {
  42         71  
  42         113  
464 0         0 my $expected_nom = $expected_content->[ scalar @{ $got_content } ]->type_name;
  0         0  
465 0         0 my $expected_ind = $child_index->{ $expected_nom } + 1;
466 0         0 $tester->diag("Couldn't find expected node '$path/$expected_nom\[$expected_ind]'");
467 0         0 $tester->ok(0, $options->{description});
468 0         0 return;
469             }
470              
471 42         292 return 1;
472             }
473              
474             =item isnt_xml($xml_to_test, $not_expected_xml[, $options_hashref])
475              
476             Exactly the same as C (taking exactly the same options) but passes
477             if and only if what is passed is different to the not expected XML.
478              
479             By different, of course, we mean schematically different according to the
480             XML 1.0 specification. For example, this will fail:
481              
482             isnt_xml "", "";
483              
484             as those are schematically the same XML documents.
485              
486             However, it's worth noting that the first argument doesn't even have to be
487             valid XML for the test to pass. Both these pass as they're not schemantically
488             identical to the not expected XML:
489              
490             isnt_xml undef, $not_expecteded_xml;
491             isnt_xml "", $not_expected_xml;
492              
493             as invalid XML is not ever schemanitcally identical to a valid XML document.
494              
495             If you want to insist what you pass in is valid XML, but just not the
496             same as the other xml document you pass in then you can use two tests:
497              
498             is_well_formed_xml $xml;
499             isnt_xml $xml, $not_expected_xml;
500              
501             This function accepts the C option (just as C does) but
502             turning it on doesn't actually output anything extra - there's not useful this
503             function can output that would help you diagnose the failure case.
504              
505             =cut
506              
507             sub isnt_xml($$;$) {
508 11     11 1 9510 my $got = shift;
509 11         21 my $expected = shift;
510 11         19 my $options = shift;
511              
512 11 100       53 $options = { description => $options } unless ref $options eq "HASH";
513 11 100       35 $options = { %{$options}, description => "not xml test" }
  9         47  
514             unless defined $options->{description};
515              
516             # temporarly ignore test output and just get the result of running
517             # the is_xml function as normal
518 11         57 $tester = bless {}, "Test::XML::Easy::Ignore";
519 11 100       18 my $result = eval { is_xml($got, $expected, $options) ? 0 : 1 };
  11         42  
520 11         61 $tester = Test::Builder->new();
521              
522             # did we get an error? Note we don't check $@ directly incase
523             # it's been reset by a weird DESTROY() eval...
524 11 100 66     145 unless (defined($result) && length $result) { croak $@; }
  2         365  
525              
526 9 100       21 if ($result) {
527 6         35 $tester->ok(1, $options->{description});
528 6         2798 return 1;
529             }
530              
531 3         15 $tester->ok(0, $options->{description});
532 3         1499 $tester->diag("Unexpectedly matched the XML we didn't expect");
533 3 100       211 if ($options->{show_xml}) {
534 2         8 $tester->diag("The XML that we received was:");
535 2 100       131 if (is_xml_element($got))
536 1         10 { $tester->diag(xml10_write_document($got)) }
537             else
538 1         9 { $tester->diag($got) }
539             }
540 3         143 return;
541             }
542             push @EXPORT, "isnt_xml";
543              
544             =item is_well_formed_xml($string_containing_xml[, $description])
545              
546             Passes if and only if the string passed contains well formed XML.
547              
548             =cut
549              
550             sub is_well_formed_xml($;$) {
551 4     4 1 1989 my $xml_string = shift;
552 4         7 my $options = shift;
553              
554 4 100       19 $options = { description => $options } unless ref $options eq "HASH";
555 4 100       13 $options = { %{$options}, description => "xml well formed test" }
  2         11  
556             unless defined $options->{description};
557              
558 4 100       8 if(eval { xml10_read_document($xml_string); 1 }) {
  4         39  
  3         22  
559 3         14 $tester->ok(1, $options->{description});
560 3         941 return 1;
561             }
562              
563 1         6 $tester->ok(0, $options->{description});
564 1         570 $tester->diag($@);
565 1         69 return;
566             }
567             push @EXPORT, "is_well_formed_xml";
568              
569             =item isnt_well_formed_xml($string_not_containing_xml[, $description])
570              
571             Passes if and only if the string passed does not contain well formed XML.
572              
573             =cut
574              
575             sub isnt_well_formed_xml($;$) {
576 4     4 1 2271 my $xml_string = shift;
577 4         6 my $options = shift;
578              
579 4 100       19 $options = { description => $options } unless ref $options eq "HASH";
580 4 100       12 $options = { %{$options}, description => "xml not well formed test" }
  2         10  
581             unless defined $options->{description};
582              
583 4 100       8 unless (eval { xml10_read_document($xml_string); 1 }) {
  4         27  
  1         30  
584 3         12 $tester->ok(1, $options->{description});
585 3         723 return 1;
586             }
587              
588 1         5 $tester->ok(0, $options->{description});
589 1         472 $tester->diag("Unexpectedly well formed XML");
590 1         242 return;
591             }
592             push @EXPORT, "isnt_well_formed_xml";
593              
594             =back
595              
596             =head2 A note on Character Handling
597              
598             If you do not pass it an XML::Easy::Element object then these functions will happly parse
599             XML from the characters contained in whatever scalars you passed in. They will not
600             (and cannot) correctly parse data from a scalar that contains binary data (e.g. that
601             you've sucked in from a raw file handle) as they would have no idea what characters
602             those octlets would represent
603              
604             As long as your XML document contains legal characters from the ASCII range (i.e.
605             chr(1) to chr(127)) this distintion will not matter to you.
606              
607             However, if you use characters above codepoint 127 then you will probably need to
608             convert any bytes you have read in into characters. This is usually done by using
609             C, or by using a PerlIO layer on the filehandle as you read the data
610             in.
611              
612             If you don't know what any of this means I suggest you read the Encode::encode manpage
613             very carefully. Tom Insam's slides at L
614             may or may not help you understand this more (they at the very least contain a
615             cheatsheet for conversion.)
616              
617             The author highly recommends those of you using latin-1 characters from a utf-8 source
618             to use B to check the string for common mistakes before handing it C.
619              
620             =head1 AUTHOR
621              
622             Mark Fowler, C<< >>
623              
624             Copyright 2009 PhotoBox, All Rights Reserved.
625              
626             This program is free software; you can redistribute it and/or modify it
627             under the same terms as Perl itself.
628              
629             =head1 BUGS
630              
631             There's a few cavets when using this module:
632              
633             =over
634              
635             =item Not a validating parser
636              
637             Infact, we don't process (or compare) DTDs at all. These nodes are completely
638             ignored (it's as if you didn't include them in the string at all.)
639              
640             =item Comments and processing instructions are ignored
641              
642             We totally ignore comments and processing instructions, and it's as
643             if you didn't include them in the string at all either.
644              
645             =item Limited entity handling
646              
647             We only support the five "core" named entities (i.e. C<&>,
648             C<<>, C<>>, C<'> and C<">) and numerical character references
649             (in decimal or hex form.) It is not possible to declare further named
650             entities and the precence of undeclared named entities will either cause
651             an exception to be thrown (in the case of the expected string) or the test to
652             fail (in the case of the string you are testing)
653              
654             =item No namespace support
655              
656             Currently this is only an XML 1.0 parser, and not XML Namespaces aware (further
657             options may be added to later version of this module to enable namespace support)
658              
659             This means the following document:
660              
661            
662              
663             Is considered to be different to
664              
665            
666              
667             =item XML whitespace handling
668              
669             This module considers "whitespace" to be what the XML specification considers
670             to be whitespace. This is subtily different to what Perl considers to be
671             whitespace.
672              
673             =item No node reordering support
674              
675             Unlike B this module considers the order of sibling nodes to be
676             significant, and you cannot tell it to ignore the differring order of nodes
677             when comparing the expected and actual output.
678              
679             =back
680              
681             Please see L for
682             details of how to submit bugs, access the source control for
683             this project, and contact the author.
684              
685             =head1 SEE ALSO
686              
687             L (for instructions on how to test), L (for info
688             on the underlying xml parser) and L (for a similar module that
689             tests using XML::SchemanticDiff)
690              
691             =cut
692              
693             1; # End of Test::XML::Easy
694              
695             package Test::XML::Easy::Ignore;
696              
697             # a handy class you can bless your tester into so we ignore all
698             # calls and don't actually produce any test output
699              
700 9     9   16 sub ok { return }
701 14     14   20 sub diag { return }
702              
703             1; # End of Test::XML::Easy::Ignore;