File Coverage

blib/lib/Catmandu/Store/ElasticSearch.pm
Criterion Covered Total %
statement 18 24 75.0
branch 0 2 0.0
condition n/a
subroutine 6 9 66.6
pod 0 1 0.0
total 24 36 66.6


line stmt bran cond sub pod time code
1             package Catmandu::Store::ElasticSearch;
2              
3 1     1   1158 use Catmandu::Sane;
  1         182327  
  1         8  
4              
5             our $VERSION = '1.01';
6              
7 1     1   804 use Search::Elasticsearch;
  1         5229  
  1         47  
8 1     1   9 use Catmandu::Util qw(is_instance);
  1         2  
  1         54  
9 1     1   503 use Catmandu::Store::ElasticSearch::Bag;
  1         4  
  1         36  
10 1     1   9 use Moo;
  1         3  
  1         8  
11 1     1   310 use namespace::clean;
  1         3  
  1         5  
12              
13             with 'Catmandu::Store';
14              
15             has _es_args => (is => 'rw', lazy => 1, default => sub {+{}});
16             has es => (is => 'lazy');
17             has is_es_1_or_2 => (is => 'lazy', init_arg => undef);
18              
19             sub BUILD {
20 0     0 0   my ($self, $args) = @_;
21 0           $self->_es_args($args);
22             }
23              
24             sub _build_es {
25 0     0     my ($self) = @_;
26 0           Search::Elasticsearch->new($self->_es_args);
27             }
28              
29             sub _build_is_es_1_or_2 {
30 0     0     my ($self) = @_;
31 0 0         is_instance($self->es, 'Search::Elasticsearch::Client::1_0::Direct')
32             || is_instance($self->es,
33             'Search::Elasticsearch::Client::2_0::Direct');
34             }
35              
36             1;
37              
38             __END__
39              
40             =pod
41              
42             =head1 NAME
43              
44             Catmandu::Store::ElasticSearch - A searchable store backed by Elasticsearch
45              
46             =head1 SYNOPSIS
47              
48             # From the command line
49              
50             # Import data into ElasticSearch
51             $ catmandu import JSON to ElasticSearch --bag catmandu < data.json
52              
53             # Export data from ElasticSearch
54             $ catmandu export ElasticSearch --bag catmandu to JSON > data.json
55              
56             # Export only one record
57             $ catmandu export ElasticSearch --bag catmandu --id 1234
58              
59             # Export using an ElasticSearch query
60             $ catmandu export ElasticSearch --bag catmandu --query "name:Recruitment OR name:college"
61              
62             # Export using a CQL query (needs a CQL mapping)
63             $ catmandu export ElasticSearch --bag catmandu --cql-query "name any college"
64              
65             # You need to specify the client version if your Elasticsearch server version is
66             # not the same as your default Search::Elasticsearch client version
67             $ catmandu import JSON to ElasticSearch --bag catmandu --client '5_0::Direct' < data.json
68              
69             # From Perl
70              
71             use Catmandu;
72              
73             my $store = Catmandu->store('ElasticSearch');
74              
75             my $obj1 = $store->bag('catmandu')->add({ name => 'Patrick' });
76              
77             printf "obj1 stored as %s\n" , $obj1->{_id};
78              
79             # Force an id in the store
80             my $obj2 = $store->bag('catmandu')->add({ _id => 'test123' , name => 'Nicolas' });
81              
82             # Commit all changes
83             $store->bag('catmandu')->commit;
84              
85             $store->bag('catmandu')->delete('test123');
86              
87             $store->bag('catmandu')->delete_all;
88              
89             # All bags are iterators
90             $store->bag->each(sub { ... });
91             $store->bag->take(10)->each(sub { ... });
92              
93             # Query the store using a simple ElasticSearch query
94             my $hits = $store->bag->search(query => '(content:this OR name:this) AND (content:that OR name:that)');
95              
96             # Native queries are also supported by providing a hash of terms
97             # See the ElasticSearch manual for more examples
98             my $hits = $store->bag->search(
99             query => {
100             # All name.exact fields that start with 'test'
101             prefix => {
102             'name.exact' => 'test'
103             }
104             } ,
105             limit => 1000);
106              
107             # Catmandu::Store::ElasticSearch supports CQL...
108             my $hits = $store->bag->search(cql_query => 'name any "Patrick"');
109              
110             =head1 METHODS
111              
112             =head2 new(%params)
113              
114             =head2 new(%params, bags => { mybag => { index => 'myindex', mapping => \%map cql_mapping => \%map } })
115              
116             Create a new Catmandu::Store::ElasticSearch store. ElasticSearch connection
117             parameters will be passed on to the underlying client.
118              
119             Optionally provide for each bag a C<index> to indicate which index to use.
120             This defaults to the bag's name.
121              
122             Optionally provide for each bag a C<type> to indicate the name of the mapping.
123             This defaults to the bag's name.
124              
125             Optionally provide for each bag a C<mapping> which contains a ElasticSearch schema
126             for each field in the index (See below).
127              
128             Optionally provide for each bag a C<cql_mapping> to map fields to CQL indexes.
129              
130             Optionally provide for each bag an C<on_error> error handler (See below).
131              
132             =head1 INHERITED METHODS
133              
134             This Catmandu::Store implements:
135              
136             =over 3
137              
138             =item L<Catmandu::Store>
139              
140             =back
141              
142             Each Catmandu::Bag in this Catmandu::Store implements:
143              
144             =over 3
145              
146             =item L<Catmandu::Bag>
147              
148             =item L<Catmandu::Droppable>
149              
150             =item L<Catmandu::Searchable>
151              
152             =item L<Catmandu::CQLSearchable>
153              
154             =back
155              
156             =head1 INDEX MAPPING
157              
158             The mapping contains a Elasticsearch schema mappings for each
159             bag defined in the index. E.g.
160              
161             {
162             properties => {
163             title => {
164             type => 'text'
165             }
166             }
167             }
168              
169             See L<https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html>
170             for more information on mappings.
171              
172             These mappings can be passed inside a Perl program, or be written into a
173             Catmandu 'catmandu.yml' configuration file. E.g.
174              
175             # catmandu.yml
176             store:
177             search:
178             package: ElasticSearch
179             options:
180             bags:
181             mybag:
182             mapping:
183             properties:
184             title:
185             type: text
186              
187             Via the command line these configuration parameters can be read in by using the
188             name of the store, C<search> in this case:
189              
190             $ catmandu import JSON to search --bag mybag < data.json
191             $ catmandu export search --bag mybag to JSON > data.json
192              
193             =head1 CQL MAPPING
194              
195             Catmandu::Store::ElasticSearch supports CQL searches when a cql_mapping is provided
196             for each bag. This hash contains a translation of CQL fields into Elasticsearch
197             searchable fields.
198              
199             # Example mapping
200             {
201             indexes => {
202             title => {
203             op => {
204             'any' => 1 ,
205             'all' => 1 ,
206             '=' => 1 ,
207             '<>' => 1 ,
208             'exact' => {field => [qw(mytitle.exact myalttitle.exact)]}
209             } ,
210             field => 'mytitle',
211             sort => 1,
212             cb => ['Biblio::Search', 'normalize_title']
213             }
214             }
215             }
216              
217             The CQL mapping above will support for the 'title' field the CQL operators:
218             any, all, =, <> and exact.
219              
220             The 'title' field will be mapping into the Elasticsearch field 'mytitle', except
221             for the 'exact' operator. In case of 'exact' we will search both the
222             'mytitle.exact' and 'myalttitle.exact' fields.
223              
224             The CQL mapping allows for sorting on the 'title' field. If, for instance, we
225             would like to use a special ElasticSearch field for sorting we could
226             have written "sort => { field => 'mytitle.sort' }".
227              
228             The callback field C<cb> contains a reference to subroutines to rewrite or
229             augment a search query. In this case, the Biblio::Search package contains a
230             normalize_title subroutine which returns a string or an ARRAY of strings
231             with augmented title(s). E.g.
232              
233             package Biblio::Search;
234              
235             sub normalize_title {
236             my ($self,$title) = @_;
237             my $new_title =~ s{[^A-Z0-9]+}{}g;
238             $new_title;
239             }
240              
241             1;
242              
243             Also this configuration can be added to a catmandu.yml configuration file like:
244              
245             # catmandu.yml
246             store:
247             search:
248             package: ElasticSearch
249             options:
250             client: 6_0::Direct
251             bags:
252             book:
253             mapping:
254             properties:
255             title:
256             type: text
257             cql_mapping:
258             indexes:
259             title:
260             op:
261             'any': true
262             'all': true
263             '=': true
264             '<>': true
265             'exact':
266             field: [ 'mytitle.exact' , 'myalttitle.exact' ]
267             field: mytitle
268             sort: true
269             cb: [ 'Biblio::Search' , 'normalize_title' ]
270              
271             Via the command line these configuration parameters can be read in by using the
272             name of the store, C<search> in this case:
273              
274             $ catmandu export search --bag book -q 'title any blablabla' to JSON > data.json
275              
276             =head1 COMPATIBILITY
277              
278             The appropriate client should be installed:
279              
280             # Elasticsearch 6.x
281             cpanm Search::Elasticsearch::Client::6_0::Direct
282             # Elasticsearch 1.x
283             cpanm Search::Elasticsearch::Client::1_0::Direct
284              
285             And specified in the options:
286              
287             Catmandu::Store::ElasticSearch->new(client => '1_0::Direct')
288              
289             If you want to use the C<delete_by_query> method with Elasticsearch 2.0 you
290             have to L<install the delete by query plugin|https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugins-delete-by-query.html>.
291              
292             =head1 ERROR HANDLING
293              
294             Error handling can be activated by specifying an error handling callback for index when creating
295             a store. E.g. to create an error handler for the bag 'data' index use:
296              
297             my $error_handler = sub {
298             my ($action, $response, $i) = @_;
299             do_something_with_error($response);
300             };
301              
302             my $store = Catmandu::Store::ElasticSearch->new(
303             bags => { data => { on_error => $error_handler } }
304             });
305              
306             Instead of a callback, the following shortcuts are also accepted for on_error:
307              
308             log: log the response
309              
310             throw: throw the response as an error
311              
312             ignore: do nothing
313              
314             my $store = Catmandu::Store::ElasticSearch->new(
315             bags => { data => { on_error => 'log' } }
316             });
317              
318             =head1 UPGRADING FROM A PRE 1.0 VERSION
319              
320             Versions of this store < 1.0 used Elasticsearch types to map bags to a single
321             index. Support for multiple types in one index has since been removed from
322             Elasticsearch and since 1.0 each bag is mapped to an index.
323              
324             You need to export you data before upgrading, update the configuration and then
325             import you data again.
326              
327             =head1 SEE ALSO
328              
329             L<Catmandu::Store>
330              
331             =head1 AUTHOR
332              
333             =over 4
334              
335             =item Nicolas Steenlant, C<< <nicolas.steenlant at ugent.be> >>
336              
337             =back
338              
339             =head1 CONTRIBUTORS
340              
341             =over 4
342              
343             =item Dave Sherohman, C<< dave.sherohman at ub.lu.se >>
344              
345             =item Robin Sheat, C<< robin at kallisti.net.nz >>
346              
347             =item Patrick Hochstenbach, C<< patrick.hochstenbach at ugent.be >>
348              
349             =back
350              
351             =head1 LICENSE AND COPYRIGHT
352              
353             This program is free software; you can redistribute it and/or modify it
354             under the terms of either: the GNU General Public License as published
355             by the Free Software Foundation; or the Artistic License.
356              
357             See http://dev.perl.org/licenses/ for more information.
358              
359             =cut