File Coverage

blib/lib/Catmandu/Store/Solr.pm
Criterion Covered Total %
statement 21 49 42.8
branch 0 4 0.0
condition n/a
subroutine 7 10 70.0
pod 1 1 100.0
total 29 64 45.3


line stmt bran cond sub pod time code
1             package Catmandu::Store::Solr;
2              
3 3     3   298830 use Catmandu::Sane;
  3         175750  
  3         20  
4 3     3   595 use Catmandu::Util qw(:is :array);
  3         3  
  3         1035  
5 3     3   33 use Moo;
  3         4  
  3         16  
6 3     3   360547 use MooX::Aliases;
  3         5303  
  3         19  
7 3     3   2164 use WebService::Solr;
  3         391330  
  3         104  
8 3     3   1368 use Catmandu::Store::Solr::Bag;
  3         11  
  3         91  
9 3     3   19 use Catmandu::Error;
  3         3  
  3         1648  
10              
11             with 'Catmandu::Store';
12             with 'Catmandu::Transactional';
13              
14             =head1 NAME
15              
16             Catmandu::Store::Solr - A searchable store backed by Solr
17              
18             =cut
19              
20             our $VERSION = '0.0302';
21              
22             =head1 SYNOPSIS
23              
24             # From the command line
25              
26             # Import data into Solr
27             $ catmandu import JSON to Solr < data.json
28              
29             # Export data from ElasticSearch
30             $ catmandu export Solr to JSON > data.json
31              
32             # Export only one record
33             $ catmandu export Solr --id 1234
34              
35             # Export using an Solr query
36             $ catmandu export Solr --query "name:Recruitment OR name:college"
37              
38             # Export using a CQL query (needs a CQL mapping)
39             $ catmandu export Solr --q "name any college"
40              
41             # From Perl
42             use Catmandu::Store::Solr;
43              
44             my $store = Catmandu::Store::Solr->new(url => 'http://localhost:8983/solr' );
45              
46             my $obj1 = $store->bag->add({ name => 'Patrick' });
47              
48             printf "obj1 stored as %s\n" , $obj1->{_id};
49              
50             # Force an id in the store
51             my $obj2 = $store->bag->add({ _id => 'test123' , name => 'Nicolas' });
52              
53             # send all changes to solr (committed automatically)
54             $store->bag->commit;
55              
56             #transaction: rollback issued after 'die'
57             $store->transaction(sub{
58             $bag->delete_all();
59             die("oops, didn't want to do that!");
60             });
61              
62             my $obj3 = $store->bag->get('test123');
63              
64             $store->bag->delete('test123');
65              
66             $store->bag->delete_all;
67              
68             # All bags are iterators
69             $store->bag->each(sub { ... });
70             $store->bag->take(10)->each(sub { ... });
71              
72             # Some stores can be searched
73             my $hits = $store->bag->search(query => 'name:Patrick');
74              
75             =cut
76              
77             has url => (is => 'ro', default => sub { 'http://localhost:8983/solr' });
78              
79             has solr => (
80             is => 'ro',
81             lazy => 1,
82             builder => '_build_solr',
83             );
84              
85             has bag_key => (is => 'lazy', alias => 'bag_field');
86              
87             has on_error => (
88             is => 'ro',
89             isa => sub {
90             array_includes([qw(throw ignore)],$_[0]) or die("on_error must be 'throw' or 'ignore'");
91             },
92             lazy => 1,
93             default => sub { "throw" }
94             );
95              
96             has _bags_used => (
97             is => 'ro',
98             lazy => 1,
99             default => sub { []; }
100             );
101             around 'bag' => sub {
102              
103             my $orig = shift;
104             my $self = shift;
105              
106             my $bags_used = $self->_bags_used;
107             unless(array_includes($bags_used,$_[0])){
108             push @$bags_used,$_[0];
109             }
110              
111             $orig->($self,@_);
112             };
113              
114             sub _build_solr {
115 0     0     WebService::Solr->new($_[0]->url, {autocommit => 0, default_params => {wt => 'json'}});
116             }
117              
118             sub _build_bag_key {
119 0     0     $_[0]->key_for('bag');
120             }
121              
122             sub transaction {
123 0     0 1   my($self,$sub)=@_;
124              
125 0 0         if($self->{_tx}){
126 0           return $sub->();
127             }
128 0           my $solr = $self->solr;
129 0           my @res;
130              
131             eval {
132             #flush buffers of all known bags ( with commit=true ), to ensure correct state
133 0           for my $bag_name(@{ $self->_bags_used() }){
  0            
134 0           $self->bag($bag_name)->commit();
135             }
136              
137             #mark store as 'in transaction'. All subsequent calls to commit only flushes buffers without setting 'commit' to 'true' in solr
138 0           $self->{_tx} = 1;
139              
140             #transaction
141 0           @res = $sub->();
142              
143             #flushing buffers of all known bags (with commit=false)
144 0           for my $bag_name(@{ $self->_bags_used() }){
  0            
145 0           $self->bag($bag_name)->commit();
146             }
147              
148             #commit in solr
149 0           $solr->commit;
150              
151             #remove mark 'in transaction'
152 0           $self->{_tx} = 0;
153 0           1;
154 0 0         } or do {
155 0           my $err = $@;
156             #remove remaining documents from all buffers, because they were added during the transaction
157 0           for my $bag_name(@{ $self->_bags_used() }){
  0            
158 0           $self->bag($bag_name)->clear_buffer();
159             }
160             #rollback in solr
161 0           eval { $solr->rollback };
  0            
162             #remove mark 'in transaction'
163 0           $self->{_tx} = 0;
164 0           Catmandu::Error->throw($err);
165             };
166              
167 0           @res;
168             }
169              
170             =head1 SOLR SCHEMA
171              
172             The Solr schema needs to support at least the identifier field (C<_id> by default) and a bag
173             field (C<_bag> by default) to be able to store Catmandu items:
174              
175             # In schema.xml
176             <field name="_id" type="string" indexed="true" stored="true" required="true" />
177             <field name="_bag" type="string" indexed="true" stored="true" required="true" />
178              
179             The names of these fields can optionally be changed using the C<id_field> and C<_bag>
180             configuration parameters of L<Catmandu::Store::Solr>.
181              
182             The C<_id> will contain the record identifier. The C<_bag> field will contain a string
183             to support L<Catmandu::Bag>-s in Solr.
184              
185             =head1 CONFIGURATION
186              
187             =over
188              
189             =item url
190              
191             URL of Solr core
192              
193             Default: C<http://localhost:8983/solr>
194              
195             =item id_field
196              
197             Name of unique field in Solr core.
198              
199             Default: C<_id>
200              
201             This Solr field is mapped to C<_id> when retrieved
202              
203             =item bag_field
204              
205             Name of field in Solr we can use to split the core into 'bags'.
206              
207             Default: C<_bag>
208              
209             This Solr field is mapped to C<_bag> when retrieved
210              
211             =item on_error
212              
213             Action to take when records cannot be saved to Solr. Default: throw. Available: ignore.
214              
215             =back
216              
217             =head1 METHODS
218              
219             =head2 new( url => $url )
220              
221             =head2 new( url => $url, id_field => '_id', bag_field => '_bag' )
222              
223             =head2 new( url => $url, bags => { data => { cql_mapping => \%mapping } } )
224              
225             Creates a new Catmandu::Store::Solr store connected to a Solr core, specificied by $url.
226              
227             The store supports CQL searches when a cql_mapping is provided. This hash
228             contains a translation of CQL fields into Solr searchable fields.
229              
230             # Example mapping
231             $cql_mapping = {
232             title => {
233             op => {
234             'any' => 1 ,
235             'all' => 1 ,
236             '=' => 1 ,
237             '<>' => 1 ,
238             'exact' => {field => 'mytitle.exact' }
239             } ,
240             sort => 1,
241             field => 'mytitle',
242             cb => ['Biblio::Search', 'normalize_title']
243             }
244             }
245              
246             The CQL mapping above will support for the 'title' field the CQL operators: any, all, =, <> and exact.
247              
248             For all the operators the 'title' field will be mapping into the Solr field 'mytitle', except
249             for the 'exact' operator. In case of 'exact' we will search the field 'mytitle.exact'.
250              
251             The CQL has an optional callback field 'cb' which contains a reference to subroutines to rewrite or
252             augment the search query. In this case, in the Biblio::Search package there is a normalize_title
253             subroutine which returns a string or an ARRAY of string with augmented title(s). E.g.
254              
255             package Biblio::Search;
256              
257             sub normalize_title {
258             my ($self,$title) = @_;
259             my $new_title =~ s{[^A-Z0-9]+}{}g;
260             $new_title;
261             }
262              
263             1;
264              
265             =head2 transaction
266              
267             When you issue $bag->commit, all changes made in the buffer are sent to solr, along with a commit.
268             So committing in Catmandu merely means flushing changes;-).
269              
270             When you wrap your subroutine within 'transaction', this behaviour is disabled temporarily.
271             When you call 'die' within the subroutine, a rollback is sent to solr.
272              
273             Remember that transactions happen at store level: after the transaction, all buffers of all bags are flushed to solr,
274             and a commit is issued in solr.
275              
276             # Record 'test' added
277             $bag->add({ _id => "test" });
278              
279             # Buffer flushed, and 'commit' sent to solr
280             $bag->commit();
281              
282             $bag->store->transaction(sub{
283             $bag->add({ _id => "test",title => "test" });
284             # Call to die: rollback sent to solr
285             die("oops, didn't want to do that!");
286             });
287              
288             # Record is still { _id => "test" }
289              
290             =head1 SEE ALSO
291              
292             L<Catmandu::Store>, L<WebService::Solr>
293              
294             =head1 AUTHOR
295              
296             Nicolas Steenlant, C<< nicolas.steenlant at ugent.be >>
297              
298             Patrick Hochstenbach, C<< patrick.hochstenbach at ugent.be >>
299              
300             Nicolas Franck, C<< nicolas.franck at ugent.be >>
301              
302             Pieter De Praetere
303              
304             =head1 LICENSE AND COPYRIGHT
305              
306             This program is free software; you can redistribute it and/or modify it
307             under the terms of either: the GNU General Public License as published
308             by the Free Software Foundation; or the Artistic License.
309              
310             See http://dev.perl.org/licenses/ for more information.
311              
312             =cut
313              
314             1;