File Coverage

blib/lib/Lucene/QueryParser.pm
Criterion Covered Total %
statement 109 117 93.1
branch 48 64 75.0
condition 11 15 73.3
subroutine 12 13 92.3
pod 0 2 0.0
total 180 211 85.3


line stmt bran cond sub pod time code
1             package Lucene::QueryParser;
2              
3 2     2   9463 use 5.00503;
  2         8  
  2         86  
4 2     2   13 use strict;
  2         5  
  2         67  
5 2     2   21 use Carp;
  2         5  
  2         206  
6              
7             require Exporter;
8 2     2   16510 use Text::Balanced qw(extract_bracketed extract_delimited);
  2         64639  
  2         239  
9              
10 2     2   21 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
  2         4  
  2         3395  
11             @ISA = qw(Exporter);
12             @EXPORT_OK = qw( parse_query deparse_query );
13             @EXPORT = qw( parse_query deparse_query );
14             $VERSION = '1.04';
15              
16             sub parse_query {
17 17     17 0 15122 local $_ = shift;
18 17         29 my @rv;
19 17         55 while ($_) {
20 41 100       160 s/^\s+// and next;
21 29         31 my $item;
22 29         65 s/^(AND|OR|\|\|)\s+//;
23 29 100       87 if ($1) { $item->{conj} = $1; }
  7         18  
24 29 100       136 if (s/^\+//) { $item->{type} = "REQUIRED"; }
  1 100       3  
25 4         11 elsif (s/^(-|!|NOT)\s*//i){ $item->{type} = "PROHIBITED"; }
26 24         60 else { $item->{type} = "NORMAL"; }
27              
28 29 100       97 if (s/^([^\s(":]+)://) { $item->{field} = $1 }
  5         17  
29              
30             # Subquery
31 29 100       120 if (/^\(/) {
    100          
    50          
32 1         6 my ($extracted, $remainer) = extract_bracketed($_,"(");
33 1 50       197 if (!$extracted) { croak "Unbalanced subquery" }
  0         0  
34 1         2 $_ = $remainer;
35 1         4 $extracted =~ s/^\(//;
36 1         4 $extracted =~ s/\)$//;
37 1         3 $item->{query} = "SUBQUERY";
38 1         7 $item->{subquery} = parse_query($extracted);
39             } elsif (/^"/) {
40 3         22 my ($extracted, $remainer) = extract_delimited($_, '"');
41 3 50       385 if (!$extracted) { croak "Unbalanced phrase" }
  0         0  
42 3         8 $_ = $remainer;
43 3         13 $extracted =~ s/^"//;
44 3         16 $extracted =~ s/"$//;
45 3         8 $item->{query} = "PHRASE";
46 3         11 $item->{term} = $extracted;
47             } elsif (s/^(\S+)\*//) {
48 0         0 $item->{query} = "PREFIX";
49 0         0 $item->{term} = $1;
50             } else {
51 25 50       115 s/([^\s\^]+)// or croak "Malformed query";
52 25         55 $item->{query} = "TERM";
53 25         60 $item->{term} = $1;
54             }
55              
56 29 100       189 if (s/^\^(\d+(?:.\d+)?)//) { $item->{boost} = $1 }
  2         7  
57              
58 29         210 push @rv, bless $item, "Lucene::QueryParser::".ucfirst lc $item->{query};
59             }
60 17         85 return bless \@rv, "Lucene::QueryParser::TopLevel";
61             }
62              
63             sub deparse_query {
64 10     10 0 16839 my $ds = shift;
65 10         20 my @out;
66 10         22 for my $elem (@$ds) {
67 20         26 my $thing = "";
68 20 100       46 if ($elem->{conj}) { $thing .= "$elem->{conj} "; }
  6         11  
69 20 100       68 if ($elem->{type} eq "REQUIRED") {
    100          
70 1         3 $thing .= "+";
71             } elsif ($elem->{type} eq "PROHIBITED") {
72 3         4 $thing .= "-";
73             }
74 20 100       48 if (exists $elem->{field}) {
75 2         6 $thing .= $elem->{field}.":"
76             }
77 20 100       44 if ($elem->{query} eq "TERM") {
    50          
    0          
78 19         32 $thing .= $elem->{term};
79             } elsif ($elem->{query} eq "SUBQUERY") {
80 1         7 $thing .= "(".deparse_query($elem->{subquery}).")";
81             } elsif ($elem->{query} eq "PHRASE") {
82 0         0 $thing .= '"'.$elem->{term}.'"';
83             }
84 20 100       47 if (exists $elem->{boost}) { $thing .= "^".$elem->{boost} }
  1         3  
85 20         54 push @out, $thing;
86             }
87 10         54 return join " ", @out;
88             }
89              
90             package Lucene::QueryParser::TopLevel;
91              
92             sub to_plucene {
93 6     6   12 my ($self, $field) = @_;
94 6 50       17 Carp::croak("You need to specify a default field for your query")
95             unless $field;
96 6 100 100     72 return $self->[0]->to_plucene($field)
97             if @$self ==1 and $self->[0]->{type} eq "NORMAL";
98              
99 2         5 my @clauses;
100 2         12 $self->add_clause(\@clauses, $_, $field) for @$self;
101 2         955 require Plucene::Search::BooleanQuery;
102 2         4546 my $query = new Plucene::Search::BooleanQuery;
103 2         40 $query->add_clause($_) for @clauses;
104            
105 2         31 $query;
106             }
107              
108             sub add_clause {
109 3     3   17 my ($self, $clauses, $term, $field) = @_;
110 3         57 my $q = $term->to_plucene($field);
111 3 50 66     45 if (exists $term->{conj} and $term->{conj} eq "AND" and @$clauses) {
      66        
112             # The previous term needs to become required
113 1 50       5 $clauses->[-1]->required(1) unless $clauses->[-1]->prohibited;
114             }
115              
116 3 50       22 return unless $q; # Shouldn't happen yet
117 3         8 my $prohibited = $term->{type} eq "PROHIBITED";
118 3         5 my $required = $term->{type} eq "REQUIRED";
119 3 50 66     17 $required = 1 if exists $term->{conj} and $term->{conj} eq "AND"
      66        
120             and !$prohibited;
121 3         1237 require Plucene::Search::BooleanClause;
122 3         432 push @$clauses, Plucene::Search::BooleanClause->new({
123             prohibited => $prohibited,
124             required => $required,
125             query => $q
126             });
127             }
128              
129             # Oh, I really like abstraction
130              
131             package Lucene::QueryParser::Term;
132              
133             sub to_plucene {
134 6     6   1114 require Plucene::Search::TermQuery;
135 6         107050 require Plucene::Index::Term;
136 6         14 my ($self, $field) = @_;
137 6 100       64 $self->{pl_term} = Plucene::Index::Term->new({
138             field => (exists $self->{field} ? $self->{field} : $field),
139             text => $self->{term}
140             });
141 6         116 my $q = Plucene::Search::TermQuery->new({ term => $self->{pl_term} });
142 6         120 $self->set_boost($q);
143 6         38 return $q;
144             }
145              
146             sub set_boost {
147 7     7   12 my ($self, $q) = @_;
148 7 100       38 $q->boost($self->{boost}) if exists $self->{boost};
149             }
150              
151             package Lucene::QueryParser::Phrase;
152             our @ISA = qw(Lucene::QueryParser::Term);
153             # This corresponds to the rules for "PHRASE" in the Plucene grammar
154              
155             sub to_plucene {
156 2     2   1120 require Plucene::Search::PhraseQuery;
157 2         7422 require Plucene::Index::Term;
158 2         6 my ($self, $field) = @_;
159 2         11 my @words = split /\s+/, $self->{term};
160 2 100       22 return $self->SUPER::to_plucene($field) if @words == 1;
161              
162 1         6 my $phrase = Plucene::Search::PhraseQuery->new;
163 1         52 for my $word (@words) {
164 2 50       38 my $term = Plucene::Index::Term->new({
165             field => (exists $self->{field} ? $self->{field} : $field),
166             text => $word
167             });
168 2         24 $phrase->add($term);
169             }
170 1 50       34 if (exists $self->{slop}) { # Future extension
171 0         0 $phrase->slop($self->{slop});
172             }
173 1         5 $self->set_boost($phrase);
174 1         3 return $phrase;
175             }
176              
177             package Lucene::QueryParser::Subquery;
178              
179             sub to_plucene {
180 0     0     my ($self, $field) = @_;
181 0 0         $self->{subquery}->to_plucene(
182             exists $self->{field} ? $self->{field} : $field
183             )
184             }
185              
186             1;
187             __END__