File Coverage

blib/lib/XML/QL.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1              
2             =pod
3              
4             =head1 NAME
5              
6             XML::QL - An XML query language
7              
8             =head1 VERSION
9              
10             0.07
11              
12             =head1 SYNOPSIS
13              
14             $ql = 'WHERE
15             $head
16             ORDER-BY
17             $head
18             IN
19             "file:REC-xml-19980210.xml"
20             CONSTRUCT
21             $head';
22              
23             print XML::QL->query($sql);
24              
25             =head1 DESCRIPTION
26              
27             This module is an early implementation of a note published by the W3C called
28             "XML-QL: A Query Language for XML". XML-QL allows the user to query an XML
29             document much like a database, and describe a construct for output. Currently
30             this module only offers partial functionality as described in the specification,
31             and even some of that has been changed for ease of use. This documentation
32             will describe the fuctionality of this module as well as differences from the
33             XML-QL specification.
34              
35             =head1 METHODS
36              
37             =over 4
38              
39             =item query( "query" )
40              
41             This is the only method required to use this module. This one method allows
42             the user to pass a valid XML-QL query to the module, and the return value is
43             the output.
44              
45             =back
46              
47             =head1 XML-QL: The Query Language
48              
49             The basic syntax consists of two parts, a WHERE clause to describe the data
50             to search for, and a CONSTRUCT clause to describe how to return the data that
51             is found.
52              
53             =over 4
54              
55             =item WHERE
56              
57             WHERE XML-searchstring [ORDER-BY variable [DESCENDING] [, variable [DESCENDING]] ] IN 'filename'
58              
59             The WHERE clause can be separated into several parts. The first is the search string,
60             the second is an optional ORDER-BY clause much like ORDER BY in SQL, and last is
61             the required XML document file name. Each of these parts is described below.
62              
63             =over 4
64              
65             =item XML-searchstring
66              
67             The search string MUST be a valid XML snippet. This is one are where this module
68             differs from the specification. It has been implemented in this way so that the
69             search string may be parsed by the XML::Parser module.
70              
71             The first step in building a query is to list the tags to search for in the document.
72             For example, consider the following search string:
73              
74            
75            
76            
77              
78             This search string will search for the AUTHOR tag nested within a BOOK tag. Note
79             however that no information has been selected for retrieval. In the following
80             example, we actually grab some information:
81              
82            
83             $author
84            
85              
86             The variable name $author will grab the information that it finds withing this tag,
87             and makes this information avalable to us for use in the CONSTRUCT section of the
88             query. You will notice that variable names start with a dollar sign ($), as this
89             is called for by the specification. In Perl, this means that if the query is enclosed
90             in double quotes, this dollar sign must be escaped.
91              
92             In the following example we take it a step further by searching for books of that are
93             non-fiction:
94              
95            
96             $author
97            
98              
99             We can also express this as a regular expression:
100              
101            
102             $author
103            
104              
105             This is another area where this module differs from the specification. The regular
106             expesssion ability as defined in the specification only allows for a subset of
107             the ability available in a Perl regular expression. With this module, the full range
108             of regular expression syntax has been made available. This also means that you
109             must also escape things such as periods(.), parenthesis (), and brackets ([]). All
110             non tag matched are case insensitive.
111              
112             Now lets say that besides matching the TYPE, we also wanted to grab the value.
113             Consider this example:
114              
115            
116             $author
117            
118              
119             The AS_ELEMENT keyword allows you to save the matched value for later use in the
120             CONSTRUCT portion of the query.
121              
122             =item ORDER-BY
123              
124             The ORDER-BY clause allows to sort the data retrieved in the variables. You may
125             specify multiple variables, and specify DESCENDING for a reverse sort. This clause
126             is not required. For example:
127              
128             ORDER-BY $type, $author DESCENDING
129              
130             =item IN
131              
132             The IN clause is a required clause that specifies the file name of the XML file.
133             This can be any URI that is supported by LWP, or it can be
134             a single file name enclosed in quotes. In later versions of this module there will
135             be support for multiple files, directories. The following will work:
136              
137             IN 'REC-xml-19980210.xml'
138              
139             IN 'file://othermachine/share/filename.xml'
140              
141             IN 'http://www.example.com/file.xml'
142              
143             =back
144              
145             =item CONSTRUCT
146              
147             The CONSTRUCT construct allows you to specify a template for output. The template
148             will match character for character from the first space after the word CONSTRUCT
149             to the end of the XML-QL query. For example:
150              
151             $ql = '(where clause...)
152             CONSTRUCT
153             Type: $type
154             Author: $author';
155              
156             The ouput of this will then be a carriage return, a tab, "Type: ", the contents
157             of $type, a carriage return, a tab, "Author: ", and the contents of $author. This
158             construct will be repeated for every match found and returned as a single string.
159              
160             =back
161              
162             =head1 AUTHOR
163              
164             Robert Hanson - Initial Version
165             rhanson@blast.net
166              
167             Matt Sergeant - Only minor fixes so far
168             msergeant@ndirect.co.uk, sergeant@geocities.com
169              
170             =head1 COPYRIGHT
171              
172             Robert's Original licence B:
173             I hereby reserve NO rights to this module, except for maybe a little recognition
174             if you decide to rewrite it and redistribute as your own. Beyond that, you can
175             do whatever you want with this. I would just appreciate a copy of any improvements
176             to this module.
177              
178             However that only stands for version 0.01 of the module. All versions above that
179             are released under the same terms as perl itself.
180              
181             =cut
182              
183             package XML::QL;
184              
185 1     1   827 use strict;
  1         2  
  1         39  
186 1     1   5 use vars qw/$VERSION $construct $uri @orderby @match @found/;
  1         1  
  1         95  
187 1     1   1739 use XML::Parser;
  0            
  0            
188             # use Data::Dumper;
189              
190             $VERSION = "0.07";
191             my $VARNAME='\$([_a-zA-Z0-9]+)';
192             my $AS_ELEMENT="^(.*?)\\s+AS_ELEMENT\\s+$VARNAME\\s*\$";
193              
194              
195             # sub new {
196             # my $proto = shift;
197             # my $class = ref($proto) || $proto;
198             # my $self = {};
199             # my %params = @_;
200             # $self->{parser} = new XML::Parser(
201             # ErrorContext => ($params{ErrorContext} || 0),
202             # Style => 'Stream');
203             # bless $self, $class;
204             # return $self;
205             # }
206              
207             sub query {
208             my $self = shift;
209             my $sql = shift;
210              
211             local @match;
212             local @found;
213             local $construct;
214             local $uri;
215             local @orderby;
216              
217              
218             # $self->{match} = ();
219             # $self->{context} = ();
220             # $self->{currmatch} = ();
221             # $self->{found} = ();
222              
223             # $self->buildMatchData($sql) || die "Unable to parse query string!\n";
224             # $self->searchXMLfile($uri) || die "Unable to open file $uri\n";
225             # if (!defined wantarray) {
226             # return;
227             # }
228              
229             buildMatchData($sql) || die "Unable to parse query string!\n";
230             searchXMLfile($uri) || die "Unable to open and search file $uri\n";
231             return createConstruct($construct);
232             }
233              
234             sub searchXMLfile {
235             my ($uri) = @_;
236              
237             my $ql = new XML::Parser(
238             Style => 'Stream',
239             Pkg => 'XML::QL::Search',
240             ErrorContext => 2,
241             _matches => \@match,
242             );
243              
244             if ($uri =~ /^(file:|https?:|ftp:|gopher:)/) {
245             my $doc;
246              
247             eval <<'EOLWP';
248              
249             use LWP::UserAgent;
250             my $ua = LWP::UserAgent->new;
251             $ua->env_proxy;
252              
253             my $req = new HTTP::Request 'GET',$uri;
254             $doc = $ua->request($req)->content;
255             EOLWP
256              
257             $ql->parsestring($doc);
258             }
259             else {
260             # Assume it's a file
261             $ql->parsefile($uri);
262             }
263              
264             #open OUT, ">debug.txt";
265             #print OUT Data::Dumper->Dump([\@match, \@context, \@curmat, \@found],['match', 'context', 'curmat', 'found']);
266             #close OUT;
267             }
268              
269             sub createConstruct {
270             my ($construct) = @_;
271             my @ret_val;
272             @found = sort { orderBy($a,$b) } (@found) if ( @orderby > 0 );
273             foreach my $match (@found) {
274             my $tmp = $construct;
275             foreach my $key ( keys(%{$match})) {
276             $tmp =~ s/\$$key/$match->{$key}/eg;
277             }
278             push @ret_val, $tmp;
279             }
280             return join '', @ret_val;
281             }
282              
283             sub orderBy {
284             my $self = shift;
285              
286             my ($aval, $bval) = @_;
287             my $numeric = 0;
288             foreach (@{$self->{orderby}}) {
289             my $sortby = $_->{field};
290             my $order = $_->{order};
291             if ( ($aval->{$sortby} =~ /^\d*\.?\d*$/) && ($bval->{$sortby} =~ /^\d*\.?\d*$/)) {
292             $numeric = 1;
293             }
294             if ($numeric) {
295             if ($order eq 'DESCENDING') {
296             return ($bval->{$sortby} <=> $aval->{$sortby}) if ($bval->{$sortby} != $aval->{$sortby});
297             }
298             else {
299             return ($aval->{$sortby} <=> $bval->{$sortby}) if ($aval->{$sortby} != $bval->{$sortby});
300             }
301             }
302             else {
303             if ($order eq 'DESCENDING') {
304             return ($bval->{$sortby} cmp $aval->{$sortby}) if ($bval->{$sortby} ne $aval->{$sortby});
305             }
306             else {
307             return ($aval->{$sortby} cmp $bval->{$sortby}) if ($aval->{$sortby} ne $bval->{$sortby});
308             }
309             }
310             }
311             return 0;
312             }
313              
314             sub buildMatchData {
315             my $sql = shift;
316             my ($where, $orderby, @sqlparms);
317             return 0 unless (@sqlparms= ($sql =~
318             /^\s*
319             WHERE\s+(.*?)\s+
320             (?:ORDER-BY\s+(.*?)\s+)?
321             IN\s+(.*?)\s+
322             CONSTRUCT\s+(.*)$
323             /isx ));
324             $where = shift @sqlparms;
325             $construct = pop @sqlparms;
326             $uri = pop @sqlparms;
327             $orderby = shift @sqlparms;
328              
329             # check URI syntax
330             return 0 unless $uri =~ s/^(['"])(.*)\1$/$2/;
331              
332             # handle order-by
333             if ($orderby) {
334             foreach my $tmp (split(/\s*,\s*/, $orderby)) {
335             return 0 unless $tmp =~ /^$VARNAME(?:\s+(DESCENDING))?$/io;
336             push @orderby, { 'field' => $1, 'order' => (defined($2))? 'DESCENDING': 'ASCENDING'};
337             }
338             }
339              
340             my $ql = new XML::Parser(
341             Style => 'Stream',
342             Pkg => 'XML::QL::Where',
343             _matches => \@match,
344             );
345             eval {
346             $ql->parse("<__query>$where");
347             };
348             if ($@) {
349             die "Parsing where clause '$where' failed: $@\n";
350             }
351             return 1;
352             }
353              
354             package XML::QL::Where;
355              
356             sub StartTag {
357             my ($expat,$element)=@_;
358             my %attributes = %_;
359             return if $element eq '__query';
360             push @{$expat->{_matches}}, {'type' => 'starttag', 'element' => $element, 'char' => '', 'attrib' => \%attributes };
361             }
362              
363             sub EndTag {
364             my ($expat,$element)=@_;
365             return if $element eq '__query';
366             push @{$expat->{_matches}}, {'type' => 'endtag', 'element' => $element, 'char' => '', 'attrib' => {}};
367             }
368              
369             sub Text {
370             my ($expat)=@_;
371             my $string = $_;
372             $string =~ s/^\s+//s; # strip leading white space
373             $string =~ s/\s+$//s; # strip following white space
374             push @{$expat->{_matches}}, {'type' => 'char', 'element' => '', 'char' => $string, 'attrib' => {}} if ($string ne '');
375             }
376              
377             package XML::QL::Search;
378              
379             use vars qw($lastcall @curmat);
380              
381             my $found = 0;
382             #my $VARNAME = $XML::QL::VARNAME;
383             #my $AS_ELEMENT = $XML::QL::AS_ELEMENT;
384              
385             sub StartTag {
386             my ($expat,$element)=@_;
387             # warn "Start: $element\n";
388             my %attributes;
389             %attributes = %_;
390             $lastcall = "open$element";
391             my $limit=scalar(@curmat);
392             for (my $i = 0; $i < $limit; $i++ ) {
393             if ( ! $curmat[$i]->{done} and $expat->{_matches}->[$curmat[$i]->{ptr}]->{type} eq 'starttag') {
394             if ( $expat->{_matches}->[$curmat[$i]->{ptr}]->{element} eq $element ) {
395             # If the target tag equals the current element...
396             # Advance match
397              
398             my %tmphash = %{$curmat[$i]};
399             push @curmat, \%tmphash;
400              
401             $curmat[$i]->{ptr}++ if ( matchAttributes($expat, $curmat[$i], %attributes) );
402             }
403             }
404             }
405              
406             if ( $expat->{_matches}->[0]->{type} eq 'starttag' and $expat->{_matches}->[0]->{element} eq $element) {
407             # If the start of the match is a starttag and the element matches the target element
408             push @curmat, {'ptr' => 0, 'done' => 0, 'fail' => $expat->context};
409             matchAttributes($expat, $curmat[-1], %attributes);
410             $curmat[-1]->{ptr}++;
411             }
412             }
413              
414             sub matchAttributes {
415             my ($expat, $cm, %attributes) = @_;
416             my %match_attribs = %{$expat->{_matches}->[$cm->{ptr}]->{attrib}};
417             foreach my $key ( keys(%match_attribs) ) {
418             if ( $match_attribs{$key} =~ /$AS_ELEMENT/io ) {
419             my $tmpfind = $1;
420             my $tmpvar = $2;
421             if ( $attributes{$key} =~ /^$tmpfind$/i ) {
422             $cm->{vars}->{$tmpvar} = $attributes{$key};
423             }
424             else {
425             $cm->{done} = 1;
426             return 0;
427             }
428             }
429             elsif ( $match_attribs{$key} =~ /^\s*$VARNAME\s*$/o ) {
430             $cm->{vars}->{$1} = $attributes{$key};
431             }
432             elsif ( $attributes{$key} !~ /^$match_attribs{$key}$/i ) {
433             $cm->{done} = 1;
434             return 0;
435             }
436             }
437             return 1;
438             }
439              
440             sub EndTag {
441             my ($expat,$element)=@_;
442             # warn "End: $element\n";
443             if ($lastcall eq "open$element") {
444             # To fix Char handler not being called on an empty string
445             $_ = '';
446             Text($expat);
447             }
448             $lastcall = "close$element";
449             foreach my $cm (@curmat) {
450             if ( ! $cm->{done} and $expat->{_matches}->[$cm->{ptr}]->{type} eq 'endtag') {
451             # If type of cur match equals endtag...
452             # warn "Found end tag. it is: '", $expat->{_matches}->[$cm->{ptr}]->{element}, "'\n";
453             # warn "Current element is '$element'\n";
454             if ( $expat->{_matches}->[$cm->{ptr}]->{element} eq $element ) {
455             # If the target tag equals the current element...
456             # Advance match
457             $cm->{ptr}++;
458             if ($cm->{ptr} == scalar(@{$expat->{_matches}})) {
459             # if the match pointer has been advanced to the end of the match...
460             # Match is done!
461             my %tmp = %{$cm->{vars}};
462             push @XML::QL::found, \%tmp;
463             @curmat = ();
464             $cm->{done} = 1;
465             $cm->{reason} = 'matched query';
466             }
467             }
468             }
469             if ( ( ! $cm->{done} ) && ( $expat->context < $cm->{fail} ) ) {
470             $cm->{done} = 1;
471             $cm->{reason} = "out of context on $element";
472             }
473             }
474             }
475              
476             sub Text {
477             my ($expat)=@_;
478             my $string = $_;
479             $lastcall = "char";
480             $string =~ s/^\s+//s; # strip leading whitespace
481             $string =~ s/\s+$//s; # strip following white space
482             foreach my $cm (@curmat) {
483             if ( ! $cm->{done} and $expat->{_matches}->[$cm->{ptr}]->{type} eq 'char' ) {
484             if ( $expat->{_matches}->[$cm->{ptr}]->{char} =~ /$AS_ELEMENT/io ) {
485             my $tmpfind = $1;
486             my $tmpvar = $2;
487             if ( $string =~ /^$tmpfind$/i ) {
488             $cm->{vars}->{$tmpvar} = $string;
489             $cm->{ptr}++;
490             }
491             else {
492             # warn "Doesn't match '$AS_ELEMENT'\n";
493             $cm->{done} = 1;
494             $cm->{reason} = "Does not match string $string";
495             }
496             }
497             elsif ( $string =~ /^$expat->{_matches}->[$cm->{ptr}]->{char}$/i ) {
498             # warn "Matches current element!\n";
499             # If the target tag equals the current element...
500             # Advance match
501             $cm->{ptr}++;
502             }
503             elsif ( $expat->{_matches}->[$cm->{ptr}]->{char} =~ /^$VARNAME$/ ) {
504             # warn "Matches varname\n";
505             $cm->{vars}->{$1} = $string;
506             $cm->{ptr}++;
507             }
508             else {
509             # warn "Doesn't match '$VARNAME':(\n";
510             $cm->{done} = 1;
511             $cm->{reason} = "Does not match string $string";
512             }
513             }
514             }
515             }
516              
517             # use Data::Dumper;
518             sub EndDocument {
519             # print Dumper(\@XML::QL::found);
520            
521             1;
522             }
523              
524             1;