File Coverage

blib/lib/Catalyst/Model/Xapian.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             package Catalyst::Model::Xapian;
2              
3 1     1   788 use base qw/Catalyst::Model/;
  1         3  
  1         546  
4 1     1   1402 use Moose;
  0            
  0            
5              
6             use strict;
7              
8             use Catalyst::Model::Xapian::Result;
9             use Encode qw/from_to/;
10             use Search::Xapian qw/:all/;
11             use Storable;
12             use MRO::Compat;
13             use Time::HiRes qw/gettimeofday tv_interval/;
14              
15             our $VERSION='0.06';
16              
17             __PACKAGE__->mk_accessors('db');
18             __PACKAGE__->mk_accessors('qp');
19             has 'db' => (isa => 'Search::Xapian::Database', is => 'rw');
20             has 'qp' => (isa => 'Search::Xapian::QueryParser', is => 'rw');
21              
22              
23             =head1 NAME
24              
25             Catalyst::Model::Xapian - Catalyst model for Search::Xapian.
26              
27             =head1 SYNOPSIS
28              
29             my ($it,$res)= $c->comp('MyApp::M::Xapian')->search(
30             $c->req->param('q'),
31             $c->req->param('page') ||0 ,
32             $c->req->param('itemsperpage')||0
33             );
34             $c->stash->{searchresults}=$res;
35             $c->stash->{iterator}=$it;
36            
37              
38             =head1 DESCRIPTION
39              
40             This model class wraps L<Search::Xapian> to provide a friendly, paged
41             interface to Xapian (www.xapian.org) indexes. This class adds a little
42             extra convenience on top of the Search::Xapian class. It expects you to
43             use the QueryParser, and sets up some keywords based on the standard
44             omega keywords (id, host, date, month, year,title), so that you can
45             do searches like
46              
47             'fubar site:microsoft.com'
48              
49             =head1 CONFIG OPTIONS
50              
51             =over 4
52              
53             =item db
54              
55             Path to the index directory. will default to <MyApp>/index.
56              
57             =item language
58              
59             Language to use for stemming. Defaults to english
60              
61             =item page_size
62              
63             Default page sizes for L<Data::Page>. Defaults to 10.
64              
65             =item utf8_query
66              
67             Queries are passed as utf8 strings. defaults to 1.
68              
69             =item order_by_date
70              
71             Sets weighting to order by docid descending rather than the usual BM25
72             weighting. Off by default.
73              
74             =back
75              
76             =head1 METHODS
77              
78             =over 4
79              
80             =item new
81              
82             Constructor. sets up the db and qp accessors. Is called automatically by
83             Catalyst at startup.
84              
85             =cut
86              
87             sub new {
88             my ( $self, $c ) = @_;
89             $self = $self->NEXT::new($c); my %config = (
90             db => $c->config->{home}.'/index',
91             language => "english",
92             page_size => 10,
93             utf8_query => 1,
94             %{ $self->config() },
95             );
96              
97             $self->db(Search::Xapian::Database->new($config{db}));
98             $self->qp(Search::Xapian::QueryParser->new($self->db));
99            
100             if ( defined($config{language}) ) {
101             my $stemmer=Search::Xapian::Stem->new($config{language});
102             $self->qp->set_stemmer($stemmer);
103             }
104             $self->qp->set_default_op(OP_AND);
105            
106             $self->qp->add_boolean_prefix("site", "H");
107             $self->qp->add_boolean_prefix("year", "Y");
108             $self->qp->add_boolean_prefix("month", "M");
109             $self->qp->add_boolean_prefix("date", "D");
110             $self->qp->add_boolean_prefix("id", "Q");
111             $self->qp->add_prefix("title", "T");
112              
113             $self->config(\%config);
114             return $self;
115             }
116              
117              
118             =item search <q>,[<page>],[<page_size>]
119              
120             perform a search using the Xapian QueryBuilder. expands the document data
121             using extract_data. You can override the page size per query by passing
122             page size as a final argument to the function. returns a L<Data::Page>
123             object and an arrayref to the extracted document data.
124              
125             =cut
126              
127              
128             sub search {
129             my ( $class,$q, $page,$page_size) = @_;
130             my $t=[gettimeofday];
131             $page ||= 1;
132             $page_size ||= $class->config->{page_size};
133             $class->db->reopen();
134             my $query=$class->qp->parse_query( $q, 23 );
135             my $enq = $class->db->enquire ( $query );
136             $class->prepare_enq($enq);
137             if( $class->config->{order_by_date} ) {
138             $enq->set_docid_order(ENQ_DESCENDING);
139             $enq->set_weighting_scheme(Search::Xapian::BoolWeight->new());
140             }
141             my $mset = $enq->get_mset( ($page-1)*$page_size,
142             $page_size );
143             my ($time)=tv_interval($t) =~ m/^(\d+\.\d{0,2})/;
144             $time =~ s/\./\,/;
145             from_to($q,'utf-8','iso-8859-1') if $class->config->{utf8_query};
146             #$q=utf8::decode($q) if $class->{config}->{utf8_query};
147             return Catalyst::Model::Xapian::Result->new({ mset=>$mset,
148             search=>$class,query=>$q,query_obj=>$query,querytime=>$time,page=>$page,page_size=>$page_size });
149             }
150            
151             =item prepare_enq <enq>
152              
153             Prepare enquire object before getting mset. Allows you to modify
154             ordering and such in your subclass.
155              
156             =cut
157            
158             sub prepare_enq {}
159            
160             =item extract_data <item> <query>
161              
162             Extract data from a L<Search::Xapian::Document>. Defaults to
163             using Storable::thaw.
164              
165             =cut
166              
167             sub extract_data {
168             my ( $self,$item, $query ) = @_;
169             my $data=Storable::thaw( $item->get_data );
170             return $data;
171             }
172              
173             1;
174              
175             =item qp
176              
177             Query Parser. The L<Search::Xapian::QueryParser> object used to parse the query.
178              
179             =back
180              
181             =head1 AUTHOR
182              
183             Marcus Ramberg <mramberg@cpan.org>
184              
185             =head1 LICENSE
186              
187             This library is free software . You can redistribute it and/or modify it under
188             the same terms as perl itself.