File Coverage

blib/lib/Net/Z3950/PQF.pm
Criterion Covered Total %
statement 59 69 85.5
branch 22 28 78.5
condition 5 6 83.3
subroutine 9 11 81.8
pod 3 3 100.0
total 98 117 83.7


line stmt bran cond sub pod time code
1             # $Id: PQF.pm,v 1.8 2007/10/05 12:12:34 mike Exp $
2              
3             package Net::Z3950::PQF;
4              
5 3     3   85255 use 5.006;
  3         10  
  3         125  
6 3     3   17 use strict;
  3         8  
  3         143  
7 3     3   16 use warnings;
  3         11  
  3         93  
8              
9 3     3   1809 use Net::Z3950::PQF::Node;
  3         8  
  3         2804  
10              
11             our $VERSION = '0.04';
12              
13              
14             =head1 NAME
15              
16             Net::Z3950::PQF - Perl extension for parsing PQF (Prefix Query Format)
17              
18             =head1 SYNOPSIS
19              
20             use Net::Z3950::PQF;
21             $parser = new Net::Z3950::PQF();
22             $node = $parser->parse('@and @attr 1=1003 kernighan @attr 1=4 unix');
23             print $node->render(0);
24              
25             =head1 DESCRIPTION
26              
27             This library provides a parser for PQF (Prefix Query Format), an ugly
28             but precise string format for expressing Z39.50 Type-1 queries. This
29             format is widely used behind the scenes of Z39.50 applications, and is
30             also used extensively with test-harness programs such as the YAZ
31             command-line client, C. A few particularly misguided
32             souls have been known to type it by hand.
33              
34             Unlike PQF itself, this module
35             is simple to use. Create a parser object, then pass PQF strings
36             into its C method to yield parse-trees. The trees are made
37             up of nodes whose types are subclasses of
38             C.
39             and have names of the form
40             C. You may find it helpful to use
41             C to visualise the structure of the returned
42             parse-trees.
43              
44             What is a PQF parse-tree good for? Not much. You can render a
45             human-readable version by invoking the top node's C method,
46             which is probably useful only for debugging. Or you can turn it into
47             tree of nodes like those passed into SimpleServer search handlers
48             using C. If you want to do anything useful, such as
49             implementing an actual query server that understands PQF, you'll have
50             to walk the tree.
51              
52             =head1 METHODS
53              
54             =head2 new()
55              
56             $parser = new Net::Z3950::PQF();
57              
58             Creates a new parser object.
59              
60             =cut
61              
62             sub new {
63 2     2 1 27 my $class = shift();
64              
65 2         13 return bless {
66             text => undef,
67             errmsg => undef,
68             }, $class;
69             }
70              
71              
72             =head2 parse()
73              
74             $query = '@and @attr 1=1003 kernighan @attr 1=4 unix';
75             $node = $parser->parse($query);
76             if (!defined $node) {
77             die "parse($query) failed: " . $parser->errmsg();
78             }
79              
80             Parses the PQF string provided as its argument. If an error occurs,
81             then an undefined value is returned, and the error message can be
82             obtained by calling the C method. Otherwise, the top node
83             of the parse tree is returned.
84              
85             $node2 = $parser->parse($query, "zthes");
86             $node3 = $parser->parse($query, "1.2.840.10003.3.13");
87              
88             A second argument may be provided after the query itself. If it is
89             provided, then it is taken to be either the name or the OID of a
90             default attribute set, which attributes specified in the query belong
91             to if no alternative attribute set is explicitly specified within the
92             query. When this second argument is absent, the default attribute set
93             is BIB-1.
94              
95             =cut
96              
97             sub parse {
98 21     21 1 12162 my $this = shift();
99 21         41 my($text, $attrset) = @_;
100 21 50       60 $attrset = "bib-1" if !defined $attrset;
101              
102 21         344 $this->{text} = $text;
103 21         65 return $this->_parse($attrset, {});
104             }
105              
106              
107             # PRIVATE to parse();
108             #
109             # Underlying parse function. $attrset is the default attribute-set to
110             # use for attributes that are not specified with an explicit set, and
111             # $attrhash is hash of attributes (at most one per type per
112             # attribute-set) to be applied to all nodes below this point. The
113             # keys of this hash are of the form ":" and the values
114             # are the corresponding attribute values.
115             #
116             sub _parse {
117 67     67   96 my $this = shift();
118 67         90 my($attrset, $attrhash) = @_;
119              
120 67         377 $this->{text} =~ s/^\s+//;
121              
122             ### This rather nasty hack for quoted terms doesn't recognised
123             # backslash-quoted embedded double quotes.
124 67 100       251 if ($this->{text} =~ s/^"(.*?)"//) {
125 3         10 return $this->_leaf('term', $1, $attrhash);
126             }
127              
128             # Also recognise multi-word terms enclosed in {curly braces}
129 64 100       512 if ($this->{text} =~ s/^{(.*?)}//) {
130 1         3 return $this->_leaf('term', $1, $attrhash);
131             }
132              
133 63         134 my $word = $this->_word();
134 63 100 100     671 if ($word eq '@attrset') {
    100 66        
    100          
    50          
    100          
135 2         7 $attrset = $this->_word();
136 2         6 return $this->_parse($attrset, $attrhash);
137              
138             } elsif ($word eq '@attr') {
139 20         507 $word = $this->_word();
140 20 100       66 if ($word !~ /=/) {
141 4         8 $attrset = $word;
142 4         10 $word = $this->_word();
143             }
144 20         88 my($type, $val) = ($word =~ /(.*)=(.*)/);
145 20         56 my %h = %$attrhash;
146 20         54 $h{"$attrset:$type"} = $val;
147 20         160 return $this->_parse($attrset, \%h);
148              
149             } elsif ($word eq '@and' || $word eq '@or' || $word eq '@not') {
150 12         148 my $sub1 = $this->_parse($attrset, $attrhash);
151 12         126 my $sub2 = $this->_parse($attrset, $attrhash);
152 12 100       38 if ($word eq '@and') {
    50          
    0          
153 4         38 return new Net::Z3950::PQF::AndNode($sub1, $sub2);
154             } elsif ($word eq '@or') {
155 8         46 return new Net::Z3950::PQF::OrNode($sub1, $sub2);
156             } elsif ($word eq '@not') {
157 0         0 return new Net::Z3950::PQF::NotNode($sub1, $sub2);
158             } else {
159 0         0 die "Houston, we have a problem";
160             }
161              
162             } elsif ($word eq '@prox') {
163 0         0 return $this->_error("proximity not yet implemented");
164              
165             } elsif ($word eq '@set') {
166 3         17 $word = $this->_word();
167 3         10 return $this->_leaf('rset', $word, $attrhash);
168             }
169              
170             # It must be a bareword
171 26         60 return $this->_leaf('term', $word, $attrhash);
172             }
173              
174              
175             # PRIVATE to _parse();
176             sub _word {
177 92     92   172 my $this = shift();
178              
179 92         227 $this->{text} =~ s/^\s+//;
180 92         352 $this->{text} =~ s/^(\S+)//;
181 92         267 return $1;
182             }
183              
184              
185             # PRIVATE to _parse();
186             sub _error {
187 0     0   0 my $this = shift();
188 0         0 my (@msg) = @_;
189              
190 0         0 $this->{errmsg} = join("", @msg);
191 0         0 return undef;
192             }
193              
194              
195             # PRIVATE to _parse();
196             sub _leaf {
197 33     33   126 my $this = shift();
198 33         57 my($type, $word, $attrhash) = @_;
199              
200 33         41 my @attrs;
201 33         136 foreach my $key (sort keys %$attrhash) {
202 24         144 my($attrset, $type) = split /:/, $key;
203 24         138 push @attrs, [ $attrset, $type, $attrhash->{$key} ];
204             }
205              
206 33 100       95 if ($type eq 'term') {
    50          
207 30         122 return new Net::Z3950::PQF::TermNode($word, @attrs);
208             } elsif ($type eq 'rset') {
209 3         39 return new Net::Z3950::PQF::RsetNode($word, @attrs);
210             } else {
211 0           die "_leaf() called with type='$type' (should be 'term' or 'rset')";
212             }
213             }
214              
215              
216             =head2 errmsg()
217              
218             print $parser->errmsg();
219              
220             Returns the last error-message generated by a failed attempt to parse
221             a query.
222              
223             =cut
224              
225             sub errmsg {
226 0     0 1   my $this = shift();
227 0           return $this->{errmsg};
228             }
229              
230              
231             =head1 SEE ALSO
232              
233             The C module.
234              
235             The definition of the Type-1 query in the Z39.50 standard, the
236             relevant section of which is on-line at
237             http://www.loc.gov/z3950/agency/markup/09.html#3.7
238              
239             The documentation of Prefix Query Format in the YAZ Manual, the
240             relevant section of which is on-line at
241             http://indexdata.com/yaz/doc/tools.tkl#PQF
242              
243             =head1 AUTHOR
244              
245             Mike Taylor, Emike@indexdata.comE
246              
247             =head1 COPYRIGHT AND LICENSE
248              
249             Copyright 2004 by Index Data ApS.
250              
251             This library is free software; you can redistribute it and/or modify
252             it under the same terms as Perl itself.
253              
254             =cut
255              
256              
257             1;