File Coverage

blib/lib/Catmandu/Store/ElasticSearch.pm
Criterion Covered Total %
statement 18 24 75.0
branch 0 2 0.0
condition n/a
subroutine 6 9 66.6
pod 0 1 0.0
total 24 36 66.6


line stmt bran cond sub pod time code
1             package Catmandu::Store::ElasticSearch;
2              
3 1     1   1091 use Catmandu::Sane;
  1         191322  
  1         8  
4              
5             our $VERSION = '1.0202';
6              
7 1     1   810 use Search::Elasticsearch;
  1         5198  
  1         34  
8 1     1   8 use Catmandu::Util qw(is_instance);
  1         2  
  1         48  
9 1     1   552 use Catmandu::Store::ElasticSearch::Bag;
  1         4  
  1         37  
10 1     1   11 use Moo;
  1         2  
  1         9  
11 1     1   368 use namespace::clean;
  1         2  
  1         4  
12              
13             with 'Catmandu::Store';
14              
15             has _es_args => (is => 'rw', lazy => 1, default => sub {+{}});
16             has es => (is => 'lazy');
17             has is_es_1_or_2 => (is => 'lazy', init_arg => undef);
18              
19             sub BUILD {
20 0     0 0   my ($self, $args) = @_;
21 0           $self->_es_args($args);
22             }
23              
24             sub _build_es {
25 0     0     my ($self) = @_;
26 0           Search::Elasticsearch->new($self->_es_args);
27             }
28              
29             sub _build_is_es_1_or_2 {
30 0     0     my ($self) = @_;
31 0 0         is_instance($self->es, 'Search::Elasticsearch::Client::1_0::Direct')
32             || is_instance($self->es,
33             'Search::Elasticsearch::Client::2_0::Direct');
34             }
35              
36             1;
37              
38             __END__
39              
40             =pod
41              
42             =head1 NAME
43              
44             Catmandu::Store::ElasticSearch - A searchable store backed by Elasticsearch
45              
46             =head1 SYNOPSIS
47              
48             # From the command line
49              
50             # Import data into ElasticSearch
51             $ catmandu import JSON to ElasticSearch --bag catmandu < data.json
52              
53             # Export data from ElasticSearch
54             $ catmandu export ElasticSearch --bag catmandu to JSON > data.json
55              
56             # Export only one record
57             $ catmandu export ElasticSearch --bag catmandu --id 1234
58              
59             # Export using an ElasticSearch query
60             $ catmandu export ElasticSearch --bag catmandu --query "name:Recruitment OR name:college"
61              
62             # Export using a CQL query (needs a CQL mapping)
63             $ catmandu export ElasticSearch --bag catmandu --cql-query "name any college"
64              
65             # You need to specify the client version if your Elasticsearch server version is
66             # not the same as your default Search::Elasticsearch client version
67             $ catmandu import JSON to ElasticSearch --bag catmandu --client '5_0::Direct' < data.json
68              
69             # From Perl
70              
71             use Catmandu;
72              
73             my $store = Catmandu->store('ElasticSearch');
74             # options will be passed to the underlying Search::Elasticsearch client
75             my $store = Catmandu->store('ElasticSearch', nodes => ['server.example.com:9200']);
76              
77             my $obj1 = $store->bag('catmandu')->add({ name => 'Patrick' });
78              
79             printf "obj1 stored as %s\n" , $obj1->{_id};
80              
81             # Force an id in the store
82             my $obj2 = $store->bag('catmandu')->add({ _id => 'test123' , name => 'Nicolas' });
83              
84             # Commit all changes
85             $store->bag('catmandu')->commit;
86              
87             $store->bag('catmandu')->delete('test123');
88              
89             $store->bag('catmandu')->delete_all;
90              
91             # All bags are iterators
92             $store->bag->each(sub { ... });
93             $store->bag->take(10)->each(sub { ... });
94              
95             # Query the store using a simple ElasticSearch query
96             my $hits = $store->bag->search(query => '(content:this OR name:this) AND (content:that OR name:that)');
97              
98             # Native queries are also supported by providing a hash of terms
99             # See the ElasticSearch manual for more examples
100             my $hits = $store->bag->search(
101             query => {
102             # All name.exact fields that start with 'test'
103             prefix => {
104             'name.exact' => 'test'
105             }
106             } ,
107             limit => 1000);
108              
109             # Catmandu::Store::ElasticSearch supports CQL...
110             my $hits = $store->bag->search(cql_query => 'name any "Patrick"');
111              
112             =head1 METHODS
113              
114             =head2 new(%params)
115              
116             =head2 new(%params, bags => { mybag => { index => 'myindex', mapping => \%map cql_mapping => \%map } })
117              
118             Create a new Catmandu::Store::ElasticSearch store. ElasticSearch connection
119             parameters will be passed on to the underlying client.
120              
121             Optionally provide for each bag a C<index> to indicate which index to use.
122             This defaults to the bag's name.
123              
124             Optionally provide for each bag a C<type> to indicate the name of the mapping.
125             This defaults to the bag's name.
126              
127             Optionally provide for each bag a C<mapping> which contains a ElasticSearch schema
128             for each field in the index (See below).
129              
130             Optionally provide for each bag a C<cql_mapping> to map fields to CQL indexes.
131              
132             Optionally provide for each bag an C<on_error> error handler (See below).
133              
134             =head1 INHERITED METHODS
135              
136             This Catmandu::Store implements:
137              
138             =over 3
139              
140             =item L<Catmandu::Store>
141              
142             =back
143              
144             Each Catmandu::Bag in this Catmandu::Store implements:
145              
146             =over 3
147              
148             =item L<Catmandu::Bag>
149              
150             =item L<Catmandu::Droppable>
151              
152             =item L<Catmandu::Searchable>
153              
154             =item L<Catmandu::CQLSearchable>
155              
156             =back
157              
158             =head1 INDEX MAPPING
159              
160             The mapping contains a Elasticsearch schema mappings for each
161             bag defined in the index. E.g.
162              
163             {
164             properties => {
165             title => {
166             type => 'text'
167             }
168             }
169             }
170              
171             See L<https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html>
172             for more information on mappings.
173              
174             These mappings can be passed inside a Perl program, or be written into a
175             Catmandu 'catmandu.yml' configuration file. E.g.
176              
177             # catmandu.yml
178             store:
179             search:
180             package: ElasticSearch
181             options:
182             bags:
183             mybag:
184             mapping:
185             properties:
186             title:
187             type: text
188              
189             Via the command line these configuration parameters can be read in by using the
190             name of the store, C<search> in this case:
191              
192             $ catmandu import JSON to search --bag mybag < data.json
193             $ catmandu export search --bag mybag to JSON > data.json
194              
195             =head1 CQL MAPPING
196              
197             Catmandu::Store::ElasticSearch supports CQL searches when a cql_mapping is provided
198             for each bag. This hash contains a translation of CQL fields into Elasticsearch
199             searchable fields.
200              
201             # Example mapping
202             {
203             indexes => {
204             title => {
205             op => {
206             'any' => 1 ,
207             'all' => 1 ,
208             '=' => 1 ,
209             '<>' => 1 ,
210             'exact' => {field => [qw(mytitle.exact myalttitle.exact)]}
211             } ,
212             field => 'mytitle',
213             sort => 1,
214             cb => ['Biblio::Search', 'normalize_title']
215             }
216             }
217             }
218              
219             The CQL mapping above will support for the 'title' field the CQL operators:
220             any, all, =, <> and exact.
221              
222             The 'title' field will be mapping into the Elasticsearch field 'mytitle', except
223             for the 'exact' operator. In case of 'exact' we will search both the
224             'mytitle.exact' and 'myalttitle.exact' fields.
225              
226             The CQL mapping allows for sorting on the 'title' field. If, for instance, we
227             would like to use a special ElasticSearch field for sorting we could
228             have written "sort => { field => 'mytitle.sort' }".
229              
230             The callback field C<cb> contains a reference to subroutines to rewrite or
231             augment a search query. In this case, the Biblio::Search package contains a
232             normalize_title subroutine which returns a string or an ARRAY of strings
233             with augmented title(s). E.g.
234              
235             package Biblio::Search;
236              
237             sub normalize_title {
238             my ($self,$title) = @_;
239             my $new_title =~ s{[^A-Z0-9]+}{}g;
240             $new_title;
241             }
242              
243             1;
244              
245             Also this configuration can be added to a catmandu.yml configuration file like:
246              
247             # catmandu.yml
248             store:
249             search:
250             package: ElasticSearch
251             options:
252             client: 6_0::Direct
253             bags:
254             book:
255             mapping:
256             properties:
257             title:
258             type: text
259             cql_mapping:
260             indexes:
261             title:
262             op:
263             'any': true
264             'all': true
265             '=': true
266             '<>': true
267             'exact':
268             field: [ 'mytitle.exact' , 'myalttitle.exact' ]
269             field: mytitle
270             sort: true
271             cb: [ 'Biblio::Search' , 'normalize_title' ]
272              
273             Via the command line these configuration parameters can be read in by using the
274             name of the store, C<search> in this case:
275              
276             $ catmandu export search --bag book -q 'title any blablabla' to JSON > data.json
277              
278             =head1 COMPATIBILITY
279              
280             The appropriate client should be installed:
281              
282             # Elasticsearch 6.x
283             cpanm Search::Elasticsearch::Client::6_0::Direct
284             # Elasticsearch 1.x
285             cpanm Search::Elasticsearch::Client::1_0::Direct
286              
287             And specified in the options:
288              
289             Catmandu::Store::ElasticSearch->new(client => '1_0::Direct')
290              
291             If you want to use the C<delete_by_query> method with Elasticsearch 2.0 you
292             have to L<install the delete by query plugin|https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugins-delete-by-query.html>.
293              
294             =head1 ERROR HANDLING
295              
296             Error handling can be activated by specifying an error handling callback for index when creating
297             a store. E.g. to create an error handler for the bag 'data' index use:
298              
299             my $error_handler = sub {
300             my ($action, $response, $i) = @_;
301             do_something_with_error($response);
302             };
303              
304             my $store = Catmandu::Store::ElasticSearch->new(
305             bags => { data => { on_error => $error_handler } }
306             });
307              
308             Instead of a callback, the following shortcuts are also accepted for on_error:
309              
310             log: log the response
311              
312             throw: throw the response as an error
313              
314             ignore: do nothing
315              
316             my $store = Catmandu::Store::ElasticSearch->new(
317             bags => { data => { on_error => 'log' } }
318             });
319              
320             =head1 UPGRADING FROM A PRE 1.0 VERSION
321              
322             Versions of this store < 1.0 used Elasticsearch types to map bags to a single
323             index. Support for multiple types in one index has since been removed from
324             Elasticsearch and since 1.0 each bag is mapped to an index.
325              
326             You need to export you data before upgrading, update the configuration and then
327             import you data again.
328              
329             =head1 SEE ALSO
330              
331             L<Catmandu::Store>
332              
333             =head1 AUTHOR
334              
335             =over 4
336              
337             =item Nicolas Steenlant, C<< <nicolas.steenlant at ugent.be> >>
338              
339             =back
340              
341             =head1 CONTRIBUTORS
342              
343             =over 4
344              
345             =item Dave Sherohman, C<< dave.sherohman at ub.lu.se >>
346              
347             =item Robin Sheat, C<< robin at kallisti.net.nz >>
348              
349             =item Patrick Hochstenbach, C<< patrick.hochstenbach at ugent.be >>
350              
351             =back
352              
353             =head1 LICENSE AND COPYRIGHT
354              
355             This program is free software; you can redistribute it and/or modify it
356             under the terms of either: the GNU General Public License as published
357             by the Free Software Foundation; or the Artistic License.
358              
359             See http://dev.perl.org/licenses/ for more information.
360              
361             =cut