File Coverage

blib/lib/ElasticSearch.pm
Criterion Covered Total %
statement 18 83 21.6
branch 0 40 0.0
condition 0 17 0.0
subroutine 6 19 31.5
pod 11 12 91.6
total 35 171 20.4


line stmt bran cond sub pod time code
1             package ElasticSearch;
2              
3 1     1   33123 use strict;
  1         3  
  1         34  
4 1     1   6 use warnings FATAL => 'all';
  1         2  
  1         41  
5 1     1   803 use ElasticSearch::Transport();
  1         3  
  1         22  
6 1     1   6 use ElasticSearch::Error();
  1         2  
  1         15  
7 1     1   1142 use ElasticSearch::RequestParser;
  1         4  
  1         47  
8 1     1   13 use ElasticSearch::Util qw(throw parse_params);
  1         3  
  1         2159  
9              
10             our $VERSION = '0.68';
11             our $DEBUG = 0;
12              
13             #===================================
14             sub new {
15             #===================================
16 0     0 1   my ( $proto, $params ) = parse_params(@_);
17 0           my $self = {
18             _base_qs => {},
19             _default => {},
20             _builder_class => 'ElasticSearch::SearchBuilder'
21             };
22              
23 0   0       bless $self, ref $proto || $proto;
24 0           $self->{_transport} = ElasticSearch::Transport->new($params);
25 0           $self->$_( $params->{$_} ) for keys %$params;
26 0           return $self;
27             }
28              
29             #===================================
30             sub builder_class {
31             #===================================
32 0     0 1   my $self = shift;
33 0 0         if (@_) {
34 0           $self->{_builder_class} = shift;
35 0           delete $self->{_builder};
36             }
37 0           return $self->{_builder_class};
38             }
39              
40             #===================================
41             sub builder {
42             #===================================
43 0     0 1   my $self = shift;
44 0 0         unless ( $self->{_builder} ) {
45 0 0         my $class = $self->{_builder_class}
46             or $self->throw( 'Param', "No builder_class specified" );
47 0 0 0       eval "require $class; 1"
48             or $self->throw( 'Internal',
49             "Couldn't load class $class: " . ( $@ || 'Unknown error' ) );
50 0           $self->{_builder} = $class->new(@_);
51             }
52 0           return $self->{_builder};
53             }
54              
55             #===================================
56             sub request {
57             #===================================
58 0     0 0   my ( $self, $params ) = parse_params(@_);
59 0           return $self->transport->request($params);
60             }
61              
62             #===================================
63             sub use_index {
64             #===================================
65 0     0 1   my $self = shift;
66 0 0         if (@_) {
67 0           $self->{_default}{index} = shift;
68             }
69 0           return $self->{_default}{index};
70             }
71              
72             #===================================
73             sub use_type {
74             #===================================
75 0     0 1   my $self = shift;
76 0 0         if (@_) {
77 0           $self->{_default}{type} = shift;
78             }
79 0           return $self->{_default}{type};
80             }
81              
82             #===================================
83             sub reindex {
84             #===================================
85 0     0 1   my ( $self, $params ) = parse_params(@_);
86              
87 0 0         my $source = $params->{source}
88             or $self->throw( 'Param', 'Missing source param' );
89              
90 0   0 0     my $transform = $params->{transform} || sub { shift() };
  0            
91 0           my $verbose = !$params->{quiet};
92 0           my $dest_index = $params->{dest_index};
93 0   0       my $bulk_size = $params->{bulk_size} || 1000;
94 0   0       my $method = $params->{_method_name} || 'next';
95              
96 0           local $| = $verbose;
97 0 0         printf( "Reindexing %d docs\n", $source->total )
98             if $verbose;
99              
100 0           my @docs;
101 0           while (1) {
102 0           my $doc = $source->$method();
103 0 0 0       if ( !$doc or @docs == $bulk_size ) {
104 0           my $results = $self->bulk_index(
105             docs => \@docs,
106 0           map { $_ => $params->{$_} } qw(on_conflict on_error),
107             );
108 0 0 0       $results = $results->recv
109             if ref $results ne 'HASH'
110             && $results->isa('AnyEvent::CondVar');
111 0 0         if ( my $err = $results->{errors} ) {
112 0           my @errors = splice @$err, 0, 5;
113 0 0         push @errors, sprintf "...and %d more", scalar @$err
114             if @$err;
115 0           $self->throw( 'Request', "Errors occurred while reindexing:",
116             \@errors );
117             }
118 0           @docs = ();
119 0 0         print "." if $verbose;
120             }
121 0 0         last unless $doc;
122              
123 0 0         $doc = $transform->($doc) or next;
124 0 0         $doc->{version_type} = 'external'
125             if defined $doc->{_version};
126 0 0         if ( my $fields = delete $doc->{fields} ) {
127 0 0         $doc->{parent} = $fields->{_parent}
128             if defined $fields->{_parent};
129             }
130 0 0         $doc->{_index} = $dest_index
131             if $dest_index;
132 0           push @docs, $doc;
133             }
134              
135 0 0         print "\nDone\n" if $verbose;
136             }
137              
138             #===================================
139 0     0 1   sub transport { shift()->{_transport} }
140 0     0 1   sub trace_calls { shift->transport->trace_calls(@_) }
141 0     0 1   sub timeout { shift->transport->timeout(@_) }
142 0     0 1   sub refresh_servers { shift->transport->refresh_servers(@_) }
143             #===================================
144              
145             #===================================
146             sub query_parser {
147             #===================================
148 0     0 1   require ElasticSearch::QueryParser;
149 0           shift; # drop class/$self
150 0           ElasticSearch::QueryParser->new(@_);
151             }
152              
153             =head1 NAME
154              
155             ElasticSearch - DEPRECATED: An API for communicating with ElasticSearch
156              
157             =head1 VERSION
158              
159             Version 0.68, tested against ElasticSearch server version 0.90.0.
160              
161             =head1 DEPRECATION
162              
163             This module is being deprecated in favour of the new official client
164             L
165             and will be removed from CPAN in 2015.
166              
167             =head1 DESCRIPTION
168              
169             ElasticSearch is an Open Source (Apache 2 license), distributed, RESTful
170             Search Engine based on Lucene, and built for the cloud, with a JSON API.
171              
172             Check out its features: L
173              
174             This module is a thin API which makes it easy to communicate with an
175             ElasticSearch cluster.
176              
177             It maintains a list of all servers/nodes in the ElasticSearch cluster, and
178             spreads the load across these nodes in round-robin fashion.
179             If the current active node disappears, then it attempts to connect to another
180             node in the list.
181              
182             Forking a process triggers a server list refresh, and a new connection to
183             a randomly chosen node in the list.
184              
185             =cut
186              
187             =head1 SYNOPSIS
188              
189              
190             use ElasticSearch;
191             my $es = ElasticSearch->new(
192             servers => 'search.foo.com:9200', # default '127.0.0.1:9200'
193             transport => 'http' # default 'http'
194             | 'httplite'
195             | 'httptiny'
196             | 'curl'
197             | 'aehttp'
198             | 'aecurl'
199             | 'thrift',
200             max_requests => 10_000, # default 10_000
201             trace_calls => 'log_file',
202             no_refresh => 0 | 1,
203             );
204              
205             $es->index(
206             index => 'twitter',
207             type => 'tweet',
208             id => 1,
209             data => {
210             user => 'kimchy',
211             post_date => '2009-11-15T14:12:12',
212             message => 'trying out Elastic Search'
213             }
214             );
215              
216             $data = $es->get(
217             index => 'twitter',
218             type => 'tweet',
219             id => 1
220             );
221              
222             # native elasticsearch query language
223             $results = $es->search(
224             index => 'twitter',
225             type => 'tweet',
226             query => {
227             text => { user => 'kimchy' }
228             }
229             );
230              
231             # ElasticSearch::SearchBuilder Perlish query language
232             $results = $es->search(
233             index => 'twitter',
234             type => 'tweet',
235             queryb => {
236             message => 'Perl API',
237             user => 'kimchy',
238             post_date => {
239             '>' => '2010-01-01',
240             '<=' => '2011-01-01',
241             }
242             }
243             );
244              
245              
246             $dodgy_qs = "foo AND AND bar";
247             $results = $es->search(
248             index => 'twitter',
249             type => 'tweet',
250             query => {
251             query_string => {
252             query => $es->query_parser->filter($dodgy_qs)
253             },
254             }
255             );
256              
257             See the C directory for a simple working example.
258              
259             =cut
260              
261             =head1 GETTING ElasticSearch
262              
263             You can download the latest released version of ElasticSearch from
264             L.
265              
266             See here for setup instructions:
267             L
268              
269             =cut
270              
271             =head1 CALLING CONVENTIONS
272              
273             I've tried to follow the same terminology as used in the ElasticSearch docs
274             when naming methods, so it should be easy to tie the two together.
275              
276             Some methods require a specific C and a specific C, while others
277             allow a list of indices or types, or allow you to specify all indices or
278             types. I distinguish between them as follows:
279              
280             $es->method( index => multi, type => single, ...)
281              
282             C values must be a scalar, and are required parameters
283              
284             type => 'tweet'
285              
286             C values can be:
287              
288             index => 'twitter' # specific index
289             index => ['twitter','user'] # list of indices
290             index => undef # (or not specified) = all indices
291              
292             C values work like C values, but at least one value is
293             required, so:
294              
295             index => 'twitter' # specific index
296             index => ['twitter','user'] # list of indices
297             index => '_all' # all indices
298              
299             index => [] # error
300             index => undef # error
301              
302              
303             Also, see L.
304              
305             =head2 as_json
306              
307             If you pass C<< as_json => 1 >> to any request to the ElasticSearch server,
308             it will return the raw UTF8-decoded JSON response, rather than a Perl
309             datastructure.
310              
311             =cut
312              
313             =head1 RETURN VALUES AND EXCEPTIONS
314              
315             Methods that query the ElasticSearch cluster return the raw data structure
316             that the cluster returns. This may change in the future, but as these
317             data structures are still in flux, I thought it safer not to try to interpret.
318              
319             Anything that is known to be an error throws an exception, eg trying to delete
320             a non-existent index.
321              
322             =cut
323              
324             =head1 INTEGRATION WITH ElasticSearch::SearchBuilder
325              
326             L provides a concise Perlish
327             L-style query language, which gets translated into the native
328             L that
329             ElasticSearch uses.
330              
331             For instance:
332              
333             {
334             content => 'search keywords',
335             -filter => {
336             tags => ['perl','ruby'],
337             date => {
338             '>' => '2010-01-01',
339             '<=' => '2011-01-01'
340             },
341             }
342             }
343              
344             Would be translated to:
345              
346             { query => {
347             filtered => {
348             query => { text => { content => "search keywords" } },
349             filter => {
350             and => [
351             { terms => { tags => ["perl", "ruby"] } },
352             { numeric_range => {
353             date => {
354             gt => "2010-01-01",
355             lte => "2011-01-01"
356             }}},
357             ],
358             }
359             }}}
360              
361             All you have to do to start using L is to change
362             your C or C parameter to C or C (where the
363             extra C stands for C):
364              
365             $es->search(
366             queryb => { content => 'keywords' }
367             )
368              
369             If you want to see what your SearchBuilder-style query is being converted into,
370             you can either use L or access it directly with:
371              
372             $native_query = $es->builder->query( $query )
373             $native_filter = $es->builder->filter( $filter )
374              
375             See the L docs for more information about
376             the syntax.
377              
378             =head1 METHODS
379              
380             =head2 Creating a new ElasticSearch instance
381              
382             =head3 new()
383              
384             $es = ElasticSearch->new(
385             transport => 'http',
386             servers => '127.0.0.1:9200' # single server
387             | ['es1.foo.com:9200',
388             'es2.foo.com:9200'], # multiple servers
389             trace_calls => 1 | '/path/to/log/file' | $fh
390             timeout => 30,
391             max_requests => 10_000, # refresh server list
392             # after max_requests
393              
394             no_refresh => 0 | 1 # don't retrieve the live
395             # server list. Instead, use
396             # just the servers specified
397             );
398              
399             C can be either a single server or an ARRAY ref with a list of servers.
400             If not specified, then it defaults to C and the port for the
401             specified transport (eg C<9200> for C or C<9500> for C).
402              
403             These servers are used in a round-robin fashion. If any server fails to
404             connect, then the other servers in the list are tried, and if any
405             succeeds, then a list of all servers/nodes currently known to the
406             ElasticSearch cluster are retrieved and stored.
407              
408             Every C (default 10,000) this list of known nodes is refreshed
409             automatically. To disable this automatic refresh, you can set C
410             to C<0>.
411              
412             To force a lookup of live nodes, you can do:
413              
414             $es->refresh_servers();
415              
416             =head4 no_refresh()
417              
418             Regardless of the C setting, a list of live nodes will still be
419             retrieved on the first request. This may not be desirable behaviour
420             if, for instance, you are connecting to remote servers which use internal
421             IP addresses, or which don't allow remote C requests.
422              
423             If you want to disable this behaviour completely, set C to C<1>,
424             in which case the transport module will round robin through the
425             C list only. Failed nodes will be removed from the list
426             (but added back in every C or when all nodes have failed).
427              
428             =head4 Transport Backends
429              
430             There are various C backends that ElasticSearch can use:
431             C (the default, based on LWP), C (based on L),
432             C (based on L), C (based on L),
433             C (based on L), C (based on
434             L) and C (which uses the Thrift protocol).
435              
436             Although the C interface has the right buzzwords (binary, compact,
437             sockets), the generated Perl code is very slow. Until that is improved, I
438             recommend one of the C backends instead.
439              
440             The C backend is about 30% faster than the default C backend,
441             and will probably become the default after more testing in production.
442              
443             The C backend is 1% faster again than C.
444              
445             See also: L, L, L,
446             L
447             and L
448              
449             =cut
450              
451             =head2 Document-indexing methods
452              
453             =head3 index()
454              
455             $result = $es->index(
456             index => single,
457             type => single,
458             id => $document_id, # optional, otherwise auto-generated
459             data => {
460             key => value,
461             ...
462             },
463              
464             # optional
465             consistency => 'quorum' | 'one' | 'all',
466             create => 0 | 1,
467             parent => $parent,
468             percolate => $percolate,
469             refresh => 0 | 1,
470             replication => 'sync' | 'async',
471             routing => $routing,
472             timeout => eg '1m' or '10s'
473             version => int,
474             version_type => 'internal' | 'external',
475             );
476              
477             eg:
478              
479             $result = $es->index(
480             index => 'twitter',
481             type => 'tweet',
482             id => 1,
483             data => {
484             user => 'kimchy',
485             post_date => '2009-11-15T14:12:12',
486             message => 'trying out Elastic Search'
487             },
488             );
489              
490             Used to add a document to a specific C as a specific C with
491             a specific C. If the C combination already exists,
492             then that document is updated, otherwise it is created.
493              
494             Note:
495              
496             =over
497              
498             =item *
499              
500             If the C is not specified, then ElasticSearch autogenerates a unique
501             ID and a new document is always created.
502              
503             =item *
504              
505             If C is passed, and the current version in ElasticSearch is
506             different, then a C error will be thrown.
507              
508             =item *
509              
510             C can also be a raw JSON encoded string (but ensure that it is correctly
511             encoded, otherwise you see errors when trying to retrieve it from ElasticSearch).
512              
513             $es->index(
514             index => 'foo',
515             type => 'bar',
516             id => 1,
517             data => '{"foo":"bar"}'
518             );
519              
520             =item *
521              
522             C for all CRUD methods and L is a query timeout,
523             specifying the amount of time ElasticSearch will spend (roughly) processing a
524             query. Units can be concatenated with the integer value, e.g., C<500ms> or
525             C<1s>.
526              
527             See also: L
528              
529             Note: this is distinct from the transport timeout, see L.
530              
531             =back
532              
533             See also: L,
534             L and L
535              
536             =head3 set()
537              
538             C is a synonym for L
539              
540              
541             =head3 create()
542              
543             $result = $es->create(
544             index => single,
545             type => single,
546             id => $document_id, # optional, otherwise auto-generated
547             data => {
548             key => value,
549             ...
550             },
551              
552             # optional
553             consistency => 'quorum' | 'one' | 'all',
554             parent => $parent,
555             percolate => $percolate,
556             refresh => 0 | 1,
557             replication => 'sync' | 'async',
558             routing => $routing,
559             timeout => eg '1m' or '10s',
560             version => int,
561             version_type => 'internal' | 'external',
562             );
563              
564             eg:
565              
566             $result = $es->create(
567             index => 'twitter',
568             type => 'tweet',
569             id => 1,
570             data => {
571             user => 'kimchy',
572             post_date => '2009-11-15T14:12:12',
573             message => 'trying out Elastic Search'
574             },
575             );
576              
577             Used to add a NEW document to a specific C as a specific C with
578             a specific C. If the C combination already exists,
579             then a C error is thrown.
580              
581             If the C is not specified, then ElasticSearch autogenerates a unique
582             ID.
583              
584             If you pass a C parameter to C, then it must be C<0> unless
585             you also set C to C.
586              
587             See also: L
588              
589             =head3 update()
590              
591             $result = $es->update(
592             index => single,
593             type => single,
594             id => single,
595              
596             # required
597             script => $script,
598             | doc => $doc
599              
600             # optional
601             params => { params },
602             upsert => { new_doc },
603             consistency => 'quorum' | 'one' | 'all',
604             fields => ['_source'],
605             ignore_missing => 0 | 1,
606             parent => $parent,
607             percolate => $percolate,
608             retry_on_conflict => 2,
609             routing => $routing,
610             timeout => '10s',
611             replication => 'sync' | 'async'
612             )
613              
614             The C method accepts a C