File Coverage

blib/lib/XML/Validator/Schema.pm
Criterion Covered Total %
statement 76 129 58.9
branch 5 24 20.8
condition 2 8 25.0
subroutine 24 28 85.7
pod 3 5 60.0
total 110 194 56.7


line stmt bran cond sub pod time code
1             package XML::Validator::Schema;
2              
3 5     5   93350 use 5.006;
  5         19  
  5         199  
4 5     5   27 use strict;
  5         8  
  5         169  
5 5     5   34 use warnings;
  5         9  
  5         721  
6              
7             our $VERSION = '1.10';
8              
9             =head1 NAME
10              
11             XML::Validator::Schema - validate XML against a subset of W3C XML Schema
12              
13             =head1 SYNOPSIS
14              
15             use XML::SAX::ParserFactory;
16             use XML::Validator::Schema;
17              
18             #
19             # create a new validator object, using foo.xsd
20             #
21             $validator = XML::Validator::Schema->new(file => 'foo.xsd');
22              
23             #
24             # create a SAX parser and assign the validator as a Handler
25             #
26             $parser = XML::SAX::ParserFactory->parser(Handler => $validator);
27              
28             #
29             # validate foo.xml against foo.xsd
30             #
31             eval { $parser->parse_uri('foo.xml') };
32             die "File failed validation: $@" if $@;
33              
34             =head1 DESCRIPTION
35              
36             This module allows you to validate XML documents against a W3C XML
37             Schema. This module does not implement the full W3C XML Schema
38             recommendation (http://www.w3.org/XML/Schema), but a useful subset.
39             See the L section below.
40              
41             B: To get line and column numbers in the error
42             messages generated by this module you must install
43             L and use
44             L as your SAX parser. This
45             module is much more useful if you can tell where your errors are, so
46             using these modules is highly recommeded!
47              
48             =head1 INTERFACE
49              
50             =over 4
51              
52             =item *
53              
54             C<< XML::Validator::Schema->new(file => 'file.xsd', cache => 1) >>
55              
56             Call this method to create a new XML::Validator:Schema object. The
57             only required option is C which must provide a path to an XML
58             Schema document.
59              
60             Setting the optional C parameter to 1 causes
61             XML::Validator::Schema to keep a copy of the schema parse tree in
62             memory. The tree will be reused on subsequent calls with the same
63             C parameter, as long as the mtime on the schema file hasn't
64             changed. This can save a lot of time if you're validating many
65             documents against a single schema.
66              
67             Since XML::Validator::Schema is a SAX filter you will normally pass
68             this object to a SAX parser:
69              
70             $validator = XML::Validator::Schema->new(file => 'foo.xsd');
71             $parser = XML::SAX::ParserFactory->parser(Handler => $validator);
72              
73             Then you can proceed to validate files using the parser:
74              
75             eval { $parser->parse_uri('foo.xml') };
76             die "File failed validation: $@" if $@;
77              
78             Setting the optional C parameter to 1 causes
79             XML::Validator::Schema to output elements and associated attributes
80             while parsing and validating the XML document. This provides useful
81             information on the position where the validation failed (although not
82             at useful as the line and column numbers included when
83             XML::Filter::ExceptiionLocator and XML::SAX::ExpatXS are used).
84              
85             =back
86              
87             =head1 RATIONALE
88              
89             I'm writing a piece of software which uses Xerces/C++
90             ( http://xml.apache.org/xerces-c/ ) to validate documents against XML
91             Schema schemas. This works very well, but I'd like to release my
92             project to the world. Requiring users to install Xerces is simply too
93             onerous a requirement; few will have it already and the Xerces
94             installation system leaves much to be desired.
95              
96             On CPAN, the only available XML Schema validator is XML::Schema.
97             Unfortunately, this module isn't ready for use as it lacks the ability
98             to actually parse the XML Schema document format! I looked into
99             enhancing XML::Schema but I must admit that I'm not smart enough to
100             understand the code... One day, when XML::Schema is completed I will
101             replace this module with a wrapper around it.
102              
103             This module represents my attempt to support enough XML Schema syntax
104             to be useful without attempting to tackle the full standard. I'm sure
105             this will mean that it can't be used in all situations, but hopefully
106             that won't prevent it from being used at all.
107              
108             =head1 SCHEMA SUPPORT
109              
110             =head2 Supported Elements
111              
112             The following elements are supported by the XML Schema parser. If you
113             don't see an element or an attribute here then you definitely can't
114             use it in a schema document.
115              
116             You can expect that the schema document parser will produce an error
117             if you include elements which are not supported. However, unsupported
118             attributes I be silently ignored. This should not be
119             misconstrued as a feature and will eventually be fixed.
120              
121             All of these elements must be in the http://www.w3.org/2001/XMLSchema
122             namespace, either using a default namespace or a prefix.
123              
124            
125              
126             Supported attributes: targetNamespace, elementFormDefault,
127             attributeFormDefault
128              
129             Notes: the only supported values for elementFormDefault and
130             attributeFormDefault are "unqualified." As such, targetNamespace
131             is essentially ignored.
132            
133            
134              
135             Supported attributes: name, type, minOccurs, maxOccurs, ref
136              
137            
138              
139             Supported attributes: name, type, use, ref
140              
141            
142              
143             Supported attributes: minOccurs, maxOccurs
144              
145            
146              
147             Supported attributes: minOccurs, maxOccurs
148              
149            
150              
151             Supported attributes: minOccurs, maxOccurs
152              
153            
154              
155             Supported attributes: name
156              
157            
158              
159             The only supported sub-element is .
160              
161            
162              
163             Supported attributes: base
164              
165             Notes: only allowed inside
166              
167            
168              
169             Supported attributes: name
170              
171            
172              
173             Supported attributes: base
174              
175             Notes: only allowed inside
176              
177            
178              
179             Supported attributes: value
180              
181            
182              
183             Supported attributes: value
184              
185            
186              
187             Supported attributes: value
188              
189            
190              
191             Supported attributes: value
192              
193            
194              
195             Supported attributes: value
196              
197            
198              
199             Supported attributes: value
200              
201            
202              
203             Supported attributes: value
204              
205            
206              
207             Supported attributes: value
208              
209            
210              
211             Supported attributes: value
212              
213            
214              
215             Supported attributes: value
216              
217            
218              
219             Supported attributes: value
220              
221            
222              
223             Supported attributes: value
224              
225            
226              
227            
228              
229             Supported attributes: name
230              
231            
232             Supported attributes: MemberTypes
233              
234             =head2 Simple Type Support
235              
236             Supported built-in types are:
237              
238             string
239              
240             normalizedString
241              
242             token
243              
244             NMTOKEN
245              
246             Notes: the spec says NMTOKEN should only be used for attributes,
247             but this rule is not enforced.
248              
249             boolean
250              
251             decimal
252              
253             Notes: the enumeration facet is not supported on decimal or any
254             types derived from decimal.
255              
256             integer
257              
258             int
259              
260             short
261              
262             byte
263              
264             unsignedInt
265              
266             unsignedShort
267              
268             unsignedByte
269              
270             positiveInteger
271              
272             negativeInteger
273              
274             nonPositiveInteger
275              
276             nonNegativeInteger
277              
278             dateTime
279              
280             Notes: Although dateTime correctly validates the lexical format it does not
281             offer comparison facets (min*, max*, enumeration).
282              
283             double
284              
285             Notes: Although double correctly validates the lexical format it
286             does not offer comparison facets (min*, max*, enumeration). Also,
287             minimum and maximum constraints as described in the spec are not
288             checked.
289              
290             float
291              
292             Notes: The restrictions on double support apply to float as well.
293              
294             duration
295              
296             time
297              
298             date
299              
300             gYearMonth
301              
302             gYear
303              
304             gMonthDay
305              
306             gDay
307              
308             gMonth
309              
310             hexBinary
311              
312             base64Binary
313              
314             anyURI
315              
316             QName
317              
318             NOTATION
319              
320             =head2 Miscellaneous Details
321              
322             Other known devations from the specification:
323              
324             =over
325              
326             =item *
327              
328             Patterns specified in pattern simpleType restrictions are Perl regexes
329             with none of the XML Schema extensions available.
330              
331             =item *
332              
333             No effort is made to prevent the declaration of facets which "loosen"
334             the restrictions on a type. This is a bug and will be fixed in a
335             future release. Until then types which attempt to loosen restrictions
336             on their base class will behave unpredictably.
337              
338             =item *
339              
340             No attempt has been made to exclude content models which are
341             ambiguous, as the spec demands. In fact, I don't see any compelling
342             reason to do so, aside from strict compliance to the spec. The
343             content model implementaton uses regular expressions which should be
344             able to handle loads of ambiguity without significant performance
345             problems.
346              
347             =item *
348              
349             Marking a facet "fixed" has no effect.
350              
351             =item *
352              
353             SimpleTypes must come after their base types in the schema body. For
354             example, this is ok:
355              
356            
357            
358            
359            
360            
361            
362            
363            
364            
365            
366              
367             But this is not:
368              
369            
370            
371            
372            
373            
374            
375            
376            
377            
378            
379              
380             =back
381              
382             =head1 CAVEATS
383              
384             Here are a few gotchas that you should know about:
385              
386             =over
387              
388             =item *
389              
390             No Unicode testing has been performed, although it seems possible that
391             the module will handle Unicode data correctly.
392              
393             =item *
394              
395             Namespace processing is almost entirely missing from the module.
396              
397             =item *
398              
399             Little work has been done to ensure that invalid schemas fail
400             gracefully. Until that is done you may want to develop your schemas
401             using a more mature validator (like Xerces or XML Spy) before using
402             them with this module.
403              
404             =back
405              
406             =head1 BUGS
407              
408             Please use C to report bugs in this module:
409              
410             http://rt.cpan.org
411              
412             Please note that I will delete bugs which merely point out the lack of
413             support for a particular feature of XML Schema. Those are feature
414             requests, and believe me, I know we've got a long way to go.
415              
416             =head1 SUPPORT
417              
418             This module is supported on the perl-xml mailing-list. Please join
419             the list if you have questions, suggestions or patches:
420              
421             http://listserv.activestate.com/mailman/listinfo/perl-xml
422              
423             =head1 CVS
424              
425             If you'd like to help develop XML::Validator::Schema you'll want to
426             check out a copy of the CVS tree:
427              
428             http://sourceforge.net/cvs/?group_id=89764
429              
430             =head1 CREDITS
431              
432             The following people have contributed bug reports, test cases and/or
433             code:
434              
435             Russell B Cecala (aka Plankton)
436             David Wheeler
437             Toby Long-Leather
438             Mathieu
439             h.bridge@fasol.fujitsu.com
440             michael.jacob@schering.de
441             josef@clubphoto.com
442             adamk@ali.as
443             Jean Flouret
444              
445             =head1 AUTHOR
446              
447             Sam Tregar
448              
449             =head1 COPYRIGHT AND LICENSE
450              
451             Copyright (C) 2002-2003 Sam Tregar
452              
453             This program is free software; you can redistribute it and/or modify
454             it under the same terms as Perl 5 itself.
455              
456             =head1 A NOTE ON DEVELOPMENT METHODOLOGY
457              
458             This module isn't just an XML Schema validator, it's also a test of
459             the Test Driven Development methodology. I've been writing tests
460             while I develop code for a while now, but TDD goes further by
461             requiring tests to be written I code. One consequence of this
462             is that the module code may seem naive; it really is I
463             code to pass the current test suite. If I'm doing it right then there
464             shouldn't be a single line of code that isn't directly related to
465             passing a test. As I add functionality (by way of writing tests) I'll
466             refactor the code a great deal, but I won't add code only to support
467             future development.
468              
469             For more information I recommend "Test Driven Development: By Example"
470             by Kent Beck.
471              
472             =head1 SEE ALSO
473              
474             L
475              
476             http://www.w3.org/XML/Schema
477              
478             http://xml.apache.org/xerces-c/
479              
480             =cut
481              
482 5     5   26 use base qw(XML::SAX::Base); # this module is a SAX filter
  5         9  
  5         7566  
483 5     5   123211 use Carp qw(croak); # make some noise
  5         13  
  5         412  
484 5     5   34 use XML::SAX::Exception; # for real
  5         10  
  5         753  
485 5     5   5633 use XML::Filter::BufferText; # keep text together
  5         2379  
  5         135  
486 5     5   1917 use XML::SAX::ParserFactory; # needed to parse the schema documents
  5         10626  
  5         135  
487              
488 5     5   3425 use XML::Validator::Schema::Parser;
  5         15  
  5         152  
489 5     5   3399 use XML::Validator::Schema::ElementNode;
  5         22  
  5         175  
490 5     5   3656 use XML::Validator::Schema::ElementRefNode;
  5         13  
  5         141  
491 5     5   2880 use XML::Validator::Schema::RootNode;
  5         12  
  5         163  
492 5     5   3014 use XML::Validator::Schema::ComplexTypeNode;
  5         13  
  5         128  
493 5     5   2984 use XML::Validator::Schema::SimpleTypeNode;
  5         13  
  5         132  
494 5     5   3253 use XML::Validator::Schema::SimpleType;
  5         17  
  5         192  
495 5     5   3530 use XML::Validator::Schema::TypeLibrary;
  5         14  
  5         134  
496 5     5   2703 use XML::Validator::Schema::ElementLibrary;
  5         14  
  5         119  
497 5     5   2570 use XML::Validator::Schema::AttributeLibrary;
  5         12  
  5         115  
498 5     5   2965 use XML::Validator::Schema::ModelNode;
  5         16  
  5         154  
499 5     5   2925 use XML::Validator::Schema::Attribute;
  5         15  
  5         172  
500 5     5   2569 use XML::Validator::Schema::AttributeNode;
  5         15  
  5         166  
501              
502 5     5   29 use XML::Validator::Schema::Util qw(_err);
  5         11  
  5         4373  
503             our %CACHE;
504              
505             our $DEBUG = 0;
506              
507             # create a new validation filter
508             sub new {
509 1     1 0 465 my $pkg = shift;
510 1 50       6 my $opt = (@_ == 1) ? { %{shift()} } : {@_};
  0         0  
511 1         3 my $self = bless $opt, $pkg;
512              
513 1 50       10 $self->{debug} = exists $self->{debug} ? $self->{debug} : $DEBUG;
514              
515             # check options
516 1 50       5 croak("Missing required 'file' option.") unless $self->{file};
517              
518             # if caching is on, check the cache
519 1 50 33     10 if ($self->{cache} and
      33        
520             exists $CACHE{$self->{file}} and
521             $CACHE{$self->{file}}{mtime} == (stat($self->{file}))[9]) {
522              
523             # load cached object
524 0         0 $self->{node_stack} = $CACHE{$self->{file}}{node_stack};
525              
526             # might have nodes on it leftover from failed validation,
527             # truncate to root
528 0         0 $#{$self->{node_stack}} = 0;
  0         0  
529              
530             # clean up any lingering state from the last use of this tree
531             $self->{node_stack}[0]->walk_down(
532 0     0   0 { callback => sub { shift->clear_memory; 1; } });
  0         0  
  0         0  
533              
534             } else {
535             # create an empty element stack
536 1         3 $self->{node_stack} = [];
537              
538             # load the schema, filling in the element tree
539 1         4 $self->parse_schema();
540              
541             # store to cache
542 0 0       0 if ($self->{cache}) {
543 0         0 $CACHE{$self->{file}}{mtime} = (stat($self->{file}))[9];
544 0         0 $CACHE{$self->{file}}{node_stack} = $self->{node_stack};
545             }
546             }
547              
548             # buffer text for convenience
549 0         0 my $bf = XML::Filter::BufferText->new( Handler => $self );
550              
551             # add line-numbers and column-numbers to errors if
552             # XML::Filter::ExceptionLocator is available
553 0         0 eval { require XML::Filter::ExceptionLocator; };
  0         0  
554 0 0       0 if ($@) {
555             # no luck, just return the buffer-text handler
556 0         0 return $bf;
557             } else {
558             # create a new exception-locator and return it
559 0         0 my $el = XML::Filter::ExceptionLocator->new( Handler => $bf );
560 0         0 return $el;
561             }
562             }
563              
564             # parse an XML schema document, filling $self->{node_stack}
565             sub parse_schema {
566 1     1 0 2 my $self = shift;
567              
568 1 50       21 _err("Specified schema file '$self->{file}' does not exist.")
569             unless -e $self->{file};
570            
571             # initialize the schema parser
572 0           my $parser = XML::Validator::Schema::Parser->new(schema => $self);
573              
574             # add line-numbers and column-numbers to errors if
575             # XML::Filter::ExceptionLocator is available
576 0           eval { require XML::Filter::ExceptionLocator; };
  0            
577 0 0         unless ($@) {
578             # create a new exception-locator and set it up above the parser
579 0           $parser = XML::Filter::ExceptionLocator->new( Handler => $parser );
580             }
581              
582             # parse the schema file
583 0           $parser = XML::SAX::ParserFactory->parser(Handler => $parser);
584 0           $parser->parse_uri($self->{file});
585             }
586              
587             # check element start
588             sub start_element {
589 0     0 1   my ($self, $data) = @_;
590 0           my $name = $data->{LocalName};
591 0           my $node_stack = $self->{node_stack};
592 0           my $element = $node_stack->[-1];
593              
594 0 0         print STDERR " " x scalar(@{$node_stack}), " o ", $name, "\n"
  0            
595             if $self->{debug};
596              
597             # check that this alright
598 0           my $daughter = $element->check_daughter($name);
599              
600             # check attributes
601 0           $daughter->check_attributes($data->{Attributes});
602            
603 0 0         if ($self->{debug}) {
604 0           foreach my $att ( keys %{ $data->{Attributes} } ) {
  0            
605 0           print STDERR " " x (scalar(@{$node_stack}) + 2), " - ",
  0            
606             $data->{Attributes}->{$att}->{Name}, " = ",
607             $data->{Attributes}->{$att}->{Value}, "\n"
608             }
609             }
610              
611             # enter daughter node
612 0           push(@$node_stack, $daughter);
613              
614 0           $self->SUPER::start_element($data);
615             }
616              
617             # check character content
618             sub characters {
619 0     0 1   my ($self, $data) = @_;
620 0           my $element = $self->{node_stack}[-1];
621 0           $element->check_contents($data->{Data});
622 0           $element->{checked_content} = 1;
623              
624 0           $self->SUPER::characters($data);
625             }
626              
627             # finish element checking
628             sub end_element {
629 0     0 1   my ($self, $data) = @_;
630 0           my $node_stack = $self->{node_stack};
631 0           my $element = $node_stack->[-1];
632              
633             # check empty content if haven't checked yet
634 0 0         $element->check_contents('')
635             unless $element->{checked_content};
636 0           $element->{checked_content} = 0;
637              
638             # final model check
639 0 0 0       $element->{model}->check_final_model($data->{LocalName},
640             $element->{memory} || [])
641             if $element->{model};
642              
643             # done
644 0           $element->clear_memory();
645 0           pop(@$node_stack);
646              
647 0           $self->SUPER::end_element($data);
648             }
649              
650             1;