File Coverage

blib/lib/Interchange/Search/Solr.pm
Criterion Covered Total %
statement 74 501 14.7
branch 6 180 3.3
condition 0 31 0.0
subroutine 18 64 28.1
pod 24 24 100.0
total 122 800 15.2


line stmt bran cond sub pod time code
1             package Interchange::Search::Solr;
2              
3 13     13   104680 use strict;
  13         92  
  13         360  
4 13     13   67 use warnings;
  13         26  
  13         314  
5              
6 13     13   6940 use Moo;
  13         179941  
  13         59  
7 13     13   24945 use WebService::Solr;
  13         2634204  
  13         515  
8 13     13   6310 use WebService::Solr::Query;
  13         51794  
  13         507  
9 13     13   8011 use Data::Dumper;
  13         80296  
  13         816  
10 13     13   5967 use POSIX qw//;
  13         75535  
  13         366  
11 13     13   141 use Encode qw//;
  13         28  
  13         230  
12 13     13   8471 use XML::LibXML;
  13         496696  
  13         87  
13 13     13   7749 use Interchange::Search::Solr::Response;
  13         40  
  13         418  
14 13     13   5650 use Interchange::Search::Solr::Builder;
  13         39  
  13         433  
15 13     13   6086 use Lingua::StopWords;
  13         4290  
  13         691  
16 13     13   176 use Types::Standard qw/ArrayRef HashRef Int Bool/;
  13         27  
  13         79  
17 13     13   17896 use namespace::clean;
  13         124229  
  13         87  
18 13     13   8373 use HTTP::Response;
  13         34  
  13         379  
19 13     13   69 use Scalar::Util;
  13         26  
  13         529  
20 13     13   71 use constant { DEBUG => 0 };
  13         25  
  13         78405  
21              
22             =head1 NAME
23              
24             Interchange::Search::Solr -- Solr query encapsulation
25              
26             =head1 VERSION
27              
28             Version 0.20
29              
30             =cut
31              
32             our $VERSION = '0.20';
33              
34             =head1 DESCRIPTION
35              
36             Exposes Solr search API in a programmer-friendly way.
37              
38             =head1 SYNOPSIS
39              
40             Quick summary of what the module does.
41              
42             Perhaps a little code snippet.
43              
44             use Interchange::Search::Solr;
45             my $solr = Interchange::Search::Solr->new(solr_url => $url);
46             $solr->rows(10);
47             $solr->start(0);
48             $solr->search('shirts');
49             $results = $solr->results;
50              
51             =head2 INDEX HANDLERS
52              
53             # Clear the index
54             $solr->delete( ['*:*'] );
55              
56             # Add a document
57             $solr->add( [
58             { sku => 'foo', title => 'My Foo' },
59             { sku => 'bar', title => 'My Bar' },
60             ] )
61              
62             # Commit (only needed if autocommit is disabled)
63             $solr->commit;
64              
65             =head1 ACCESSORS
66              
67             =head2 solr_url
68              
69             Url of the solr instance. Read-only.
70              
71             =head2 input_encoding [DEPRECATED]
72              
73             Assume the urls to be in this encoding, so decode it before parsing
74             it. This is basically a (probably bugged) workaround when you have all
75             the shop in latin1. If the search keep crashing on non-ascii
76             characters, try to set this to iso-8859-1.
77              
78             =head2 rows
79              
80             Number of results to return. Read-write (so you can reuse the object).
81              
82             =head2 page_scope
83              
84             The number of paging items for the paginator
85              
86             =head2 search_fields
87              
88             An arrayref with the indexed fields to search. Defaults to:
89              
90             [qw/sku name description/]
91              
92             You can add boost to fields appending a float with a caret.
93              
94             [qw/ sku^5.0 name^1.0 description^0.4 /]
95              
96              
97             =head2 return_fields
98              
99             An arrayref of indexed fields to return. All by default.
100              
101             =head2 facets
102              
103             A string or an arrayref with the fields which will generate a facet.
104             Defaults to
105              
106             [qw/suchbegriffe manufacturer/]
107              
108             =head2 start
109              
110             Start of pagination. Read-write.
111              
112             =head2 page
113              
114             Current page. Read-write.
115              
116             =head2 filters
117              
118             An hashref with the filters. E.g.
119              
120             {
121             suchbegriffe => [qw/xxxxx yyyy/],
122             manufacturer => [qw/pikeur/],
123             }
124              
125             The keys of the hashref, to have any effect, must be one of the facets.
126              
127             =head2 response
128              
129             Read-only accessor to the response object of the current search.
130              
131             =head2 facets_found
132              
133             After a search, the structure with the facets can be retrieved with
134             this accessor, with this structure:
135              
136             {
137             field => [ { name => "name", count => 11 }, { name => "name", count => 9 }, ... ],
138             field2 => [ { name => "name", count => 7 }, { name => "name", count => 6 }, ... ],
139             ...
140             }
141              
142             Each hashref in each field's arrayref has the following keys:
143              
144             =over 4
145              
146             =item name
147              
148             The name to display
149              
150             =item count
151              
152             The count of item
153              
154             =item query_url
155              
156             The url fragment to toggle this filter.
157              
158             =item active
159              
160             True if currently in use (to be used for, e.g., checkboxes)
161              
162             =back
163              
164             =head2 search_string
165              
166             Debug only. The search string produced by the query.
167              
168             =head2 search_terms
169              
170             The terms used for the current search.
171              
172             =head2 search_structure
173              
174             The perl data structure used for the current search. It's passed to
175             L<Webservice::Solr::Query> for stringification.
176              
177             =head2 sorting
178              
179             The field used to sort the result (optional and defaults to score, as
180             per Solr doc).
181              
182             You can set it to a scalar with a field name or instead you can use
183             the L<SQL::Abstract> syntax (all the cases documented there are
184             supported). E.g.
185              
186             $solr->sorting([{ -asc => 'created_date' }, {-desc => [qw/updated_date sku/] }]);
187              
188             If you pass a reference, the C<sorting_direction> setting is ignored.
189              
190             =head2 sorting_direction
191              
192             The direction used by the sorting, when C<sorting> is specified and is a plain scalar.
193             Default to 'desc'.
194              
195             =cut
196              
197             has solr_url => (is => 'ro',
198             required => 1);
199              
200             has input_encoding => (is => 'ro');
201              
202              
203             =head2 wild_matching
204              
205             By default, a search term produce a query with a wildcard appended. So
206             searching for 1234 will query 1234*. With this option set to true, a
207             wildcard is prepended as well, querying for *1234* instead).
208              
209             =cut
210              
211              
212             has wild_matching => (is => 'ro',
213             default => sub { 0 });
214              
215             =head2 stop_words_langs
216              
217             The languages for which we should build the stop word list. It
218             defaults to:
219              
220             [ 'en' ]
221              
222             New in 0.10. To revert to the old behaviour (no filtering of
223             stopwords), pass an empty arrayref.
224              
225             =cut
226              
227             has stop_words => (is => 'lazy', isa => HashRef);
228              
229             has stop_words_langs => (is => 'ro', default => sub { [qw/en/ ] }, isa => ArrayRef);
230              
231             sub _build_stop_words {
232 0     0   0 my $self = shift;
233 0         0 my @stopwords;
234 0         0 foreach my $lang (@{ $self->stop_words_langs }) {
  0         0  
235 0 0       0 if (my $stops = Lingua::StopWords::getStopWords($lang, 'UTF-8')) {
236 0         0 push @stopwords, keys %$stops;
237             }
238             }
239 0         0 my %out = map { $_ => 1 } @stopwords;
  0         0  
240 0         0 return \%out;
241             }
242              
243             =head2 min_chars
244              
245             Minimum characters for filtering the search terms. Default to 3.
246              
247             New in 0.10. To revert to the old behaviour, set it to 0.
248              
249             =head2 permit_empty_search
250              
251             By default, empty searches are not executed. You can permit them
252             setting this accessor to 1. The module will reset it to 0 when the
253             search is executed.
254              
255             =cut
256              
257             has min_chars => (is => 'ro', isa => Int, default => sub { 3 });
258              
259             has permit_empty_search => (is => 'rw', isa => Bool, default => sub { 0 });
260              
261             has search_fields => (is => 'ro',
262             default => sub {
263             return [
264             qw/sku
265             name
266             description/
267             ]
268             },
269             isa => sub { die unless ref($_[0]) eq 'ARRAY' });
270              
271             has facets => (is => 'rw',
272             isa => sub { die "not an arrayref" unless ref($_[0]) eq 'ARRAY' },
273             default => sub {
274             return [qw/suchbegriffe manufacturer/];
275             });
276              
277             has rows => (is => 'rw',
278             default => sub { 10 });
279              
280             has page_scope => (is => 'rw',
281             default => sub { 5 });
282              
283             has start => (is => 'rw',
284             default => sub { 0 });
285              
286             has page => (is => 'rw',
287             default => sub { 1 });
288              
289             has filters => (is => 'rw',
290             isa => HashRef,
291             default => sub { return {} },
292             );
293              
294             has response => (is => 'rwp');
295              
296             has search_string => (is => 'rwp');
297              
298             has search_terms => (is => 'rw',
299             isa => sub { die unless ref($_[0]) eq 'ARRAY' },
300             default => sub { return [] },
301             );
302              
303             has search_structure => (is => 'rw');
304              
305             has sorting => (is => 'rw');
306             has sorting_direction => (is => 'rw',
307             isa => sub { die unless $_[0] =~ m/\A(asc|desc)\z/ },
308             default => sub { 'desc' },
309             );
310              
311             has return_fields => (is => 'rw',
312             isa => sub { die unless ref($_[0]) eq 'ARRAY' },
313             );
314              
315             has global_conditions => (is => 'rw',
316             isa => sub { die unless ref($_[0]) eq 'HASH' }
317             );
318              
319             sub results {
320 0     0 1 0 my $self = shift;
321 0         0 my @matches;
322 0 0       0 if ($self->response->ok) {
323 0         0 for my $doc ( $self->response->docs ) {
324 0         0 my (%record, $name);
325              
326 0         0 for my $fld ($doc->fields) {
327 0         0 $name = $fld->name;
328 0 0       0 next if $name =~ /^_/;
329              
330 0         0 $record{$name} = $fld->value;
331             }
332              
333 0         0 push @matches, \%record;
334             }
335             }
336 0         0 return \@matches;
337             }
338              
339             =head1 INTERNAL ACCESSORS
340              
341             =head2 solr_object
342              
343             The L<WebService::Solr> instance.
344              
345             =cut
346              
347             has solr_object => (is => 'lazy');
348              
349             sub _build_solr_object {
350 0     0   0 my $self = shift;
351 0         0 my @args = $self->solr_url;
352             # if (my $enc = $self->solr_encoding) {
353             # my %options = (
354             # default_params => {
355             # ie => $enc,
356             # },
357             # );
358             # push @args, \%options;
359             # }
360 0         0 return WebService::Solr->new(@args);
361             }
362              
363             =head2 builder_object(\@terms, \%filters, $page)
364              
365             Creates Interchange::Search::Solr::Builder instance.
366              
367             =cut
368              
369             sub builder_object {
370 0     0 1 0 my ($self, $terms, $filters, $page) = @_;
371 0         0 return Interchange::Search::Solr::Builder->new(
372             terms => $terms,
373             filters => $filters,
374             facets => $self->facets,
375             page => $page
376             );
377             }
378              
379             =head1 METHODS
380              
381             =head2 search( [ $string_or$structure ] ])
382              
383             Run a search and return a L<WebService::Solr::Response> object.
384              
385             The method accept zero or one argument.
386              
387             With no arguments, run a full wildcard search.
388              
389             With one argument, if it's a string, run the search against all the
390             indexed fields. If it's a structure, build a query for it. The syntax
391             of the structure is described at L<WebService::Solr::Query>.
392              
393             After calling this method you can inspect the response using the
394             following methods:
395              
396             =head2 results
397              
398             Returns reference to list of results, each result is a hash
399             reference.
400              
401             =head2 skus_found
402              
403             Returns just a plain list of skus.
404              
405             =head2 num_found
406              
407             Return the number of items found
408              
409             =head2 has_more
410              
411             Return true if there are more pages
412              
413             =cut
414              
415             sub search {
416 0     0 1 0 my ($self, $string) = (shift, shift);
417 0 0       0 die "Extra parameter found" if (@_);
418 0         0 my $structure;
419             # here we just split the terms, set C<search_terms>, and call
420             # _do_search.
421 0 0 0     0 if ($string && ref($string)) {
422 0         0 $structure = $string;
423 0         0 $string = undef;
424             }
425 0         0 my @terms;
426 0 0       0 if ($string) {
427 0         0 @terms = grep { $self->_term_is_good($_) } split(/\s+/, $string);
  0         0  
428             }
429 0         0 $self->search_terms(\@terms);
430 0         0 $self->search_structure($structure);
431 0         0 return $self->_do_search;
432             }
433              
434             sub _do_search {
435 0     0   0 my $self = shift;
436              
437 0         0 my @terms = grep { $self->_term_is_good($_) } @{ $self->search_terms };
  0         0  
  0         0  
438              
439 0         0 my $query = '';
440 0         0 my $wild_match = '';
441 0 0       0 if ($self->wild_matching) {
442 0         0 $wild_match = '*';
443             }
444              
445 0 0       0 if (@terms) {
446 0         0 my @escaped = map { $wild_match . WebService::Solr::Query->escape($_) . '*' } @terms;
  0         0  
447 0         0 $query = '(' . join(' AND ', @escaped) . ')';
448             # even if the structure looks correct, the query isn't build properly
449             # print Dumper($query);
450             }
451             else {
452             # catch all
453 0 0       0 if (my $structure = $self->search_structure) {
454 0         0 $query = WebService::Solr::Query->new($structure);
455             }
456              
457             }
458 0         0 return $self->execute_query($query);
459             }
460              
461             =head2 execute_query($query)
462              
463             Accept either a raw string with the query or a WebService::Solr::Query
464             object and run the query against the Solr service.
465              
466             If no query is provided, a wildcard search is performed.
467              
468             =cut
469              
470             sub _search_is_empty {
471 0     0   0 my $self = shift;
472 0         0 my @terms = @{ $self->search_terms };
  0         0  
473 0         0 my %filters = %{ $self->filters };
  0         0  
474 0         0 my $structure = $self->search_structure;
475 0 0 0     0 if (@terms || %filters || $structure) {
      0        
476 0         0 return 0;
477             }
478             else {
479 0         0 return 1;
480             }
481             }
482              
483             sub execute_query {
484 0     0 1 0 my ($self, $query) = @_;
485 0         0 my $querystring = '*';
486 0 0       0 if (ref($query)) {
    0          
487 0         0 $querystring = $query->stringify;
488             }
489             elsif ($query) {
490 0         0 $querystring = $query;
491             }
492              
493 0 0       0 if (my $global = $self->global_conditions) {
494 0         0 my @conditions = ($querystring);
495 0         0 foreach my $condition (keys %$global) {
496 0         0 my $string;
497 0 0       0 if (ref($global->{$condition}) eq 'SCALAR') {
498 0         0 $string = ${$global->{$condition}};
  0         0  
499             }
500             else {
501             $string = '"' .
502 0         0 WebService::Solr::Query->escape($global->{$condition}) . '"';
503             }
504 0         0 push @conditions, qq{($condition:$string)};
505             }
506 0         0 $querystring = '(' . join(' AND ', @conditions) . ')';
507             }
508             # save the debug info
509 0         0 $self->_set_search_string($querystring);
510              
511 0         0 my $our_res;
512 0 0       0 unless ($self->permit_empty_search) {
513 0 0       0 if ($self->_search_is_empty) {
514 0         0 $our_res = Interchange::Search::Solr::Response->new(HTTP::Response->new(404));
515 0         0 $our_res->error('empty_search');
516             }
517             }
518 0 0       0 unless ($our_res) {
519 0         0 my %params = $self->construct_params;
520             # print Dumper(\%params) if DEBUG;
521 0         0 my $res = $self->solr_object->search($querystring, \%params);
522 0         0 $our_res = Interchange::Search::Solr::Response->new($res->raw_response);
523              
524 0 0       0 if ($our_res->solr_status != 0) {
525 0         0 die "Solr failure: ".$our_res->raw_response->message;
526             }
527             }
528 0         0 $self->_set_response($our_res);
529 0         0 $self->permit_empty_search(0);
530 0         0 return $our_res;
531             }
532              
533             =head2 construct_params
534              
535             Constructs parameters for the search.
536              
537             =cut
538              
539             sub construct_params {
540             # set start and rows
541 0     0 1 0 my $self = shift;
542 0         0 my %params = (
543             start => $self->_start_row,
544             rows => $self->_rows
545             );
546              
547              
548 0 0       0 if (my $facet_field = $self->facets) {
549 0         0 $params{facet} = 'true';
550 0         0 $params{'facet.field'} = $facet_field;
551 0         0 $params{'facet.mincount'} = 1;
552              
553             # see if we have filters set
554 0 0       0 if (my $filters = $self->filters) {
555 0         0 my @fq;
556 0         0 foreach my $facet (@{ $self->facets }) {
  0         0  
557 0 0       0 if (my $condition = $filters->{$facet}) {
558 0         0 push @fq,
559             WebService::Solr::Query->new({
560             $facet => $condition,
561             });
562             }
563             }
564 0 0       0 if (@fq) {
565 0         0 $params{fq} = \@fq;
566             }
567             }
568             }
569 0 0       0 if (my $sort_by = $self->sorting) {
570 0         0 my $sort_by_struct;
571 0 0       0 if (ref($sort_by)) {
572 0         0 $sort_by_struct = $sort_by;
573             }
574             else {
575 0         0 $sort_by_struct = { '-' . $self->sorting_direction => $sort_by };
576             }
577 0         0 $params{sort} = join(', ', $self->_build_sort_field($sort_by_struct));
578             }
579 0 0       0 if (my $fl = $self->return_fields) {
580 0         0 $params{fl} = join(',', @$fl);
581             }
582             # if using edifmax
583 0         0 $params{qf} = join(' ', @{ $self->search_fields });
  0         0  
584 0         0 $params{defType} = 'edismax';
585 0         0 return %params;
586             }
587              
588             sub _start_row {
589 0     0   0 my $self = shift;
590 0   0     0 return $self->_convert_to_int($self->start) || 0;
591             }
592              
593             sub _rows {
594 0     0   0 my $self = shift;
595 0   0     0 return $self->_convert_to_int($self->rows) || 10;
596             }
597              
598             sub _convert_to_int {
599 0     0   0 my ($self, $maybe_num) = @_;
600 0 0       0 return 0 unless $maybe_num;
601 0 0       0 if ($maybe_num =~ m/([1-9][0-9]*)/) {
602 0         0 return $1;
603             }
604             else {
605 0         0 return 0;
606             }
607             }
608              
609             sub num_found {
610 0     0 1 0 my $self = shift;
611 0 0       0 if (my $res = $self->response) {
612 0   0     0 return $res->content->{response}->{numFound} || 0;
613             }
614             else {
615 0         0 return 0;
616             }
617             }
618              
619             sub skus_found {
620 0     0 1 0 my $self = shift;
621 0         0 my @skus;
622 0 0       0 if ($self->response->ok) {
623 0         0 foreach my $item ($self->response->docs) {
624 0         0 push @skus, $item->value_for('sku');
625             }
626             }
627 0         0 return @skus;
628             }
629              
630             sub facets_found {
631 0     0 1 0 my $self = shift;
632 0         0 my $res = $self->response;
633 0         0 my $facets = $res->content->{facet_counts}->{facet_fields};
634 0         0 my %out;
635 0         0 foreach my $field (keys %$facets) {
636 0         0 my @list = @{$facets->{$field}};
  0         0  
637 0         0 my @items;
638 0         0 while (@list > 1) {
639 0         0 my $name = shift @list;
640 0         0 my $count = shift @list;
641 0         0 push @items, {
642             name => $name,
643             count => $count,
644             query_url => $self->_build_facet_url($field, $name),
645             active => $self->_filter_is_active($field, $name),
646             };
647             }
648 0         0 $out{$field} = \@items;
649             }
650 0         0 return \%out;
651             }
652              
653              
654             sub has_more {
655 0     0 1 0 my $self = shift;
656 0 0       0 if ($self->num_found > ($self->_start_row + $self->_rows)) {
657 0         0 return 1;
658             }
659             else {
660 0         0 return 0;
661             }
662             }
663              
664             =head2 num_docs
665              
666             Returns the number of documents in the index.
667              
668             =cut
669              
670             sub num_docs {
671 0     0 1 0 my $self = shift;
672 0         0 my $response;
673              
674 0         0 $self->permit_empty_search(1);
675 0         0 $response = $self->search('*:*');
676              
677 0         0 return $self->num_found;
678             }
679              
680             =head2 add $data
681              
682             Adds the documents in $data to the index. $data is a reference to an array
683             of hash references, e.g.:
684              
685             [ { sku => 'foo', title => 'My Foo' }, { sku => 'bar', title => 'My Bar' } ]
686              
687             =cut
688              
689             sub add {
690 0     0 1 0 my ($self, $data) = @_;
691 0         0 my ($res, $our_res, $xml);
692              
693 0         0 $xml = $self->_build_xml_add_op($data);
694 0         0 $res = $self->solr_object->_send_update($xml);
695 0         0 $our_res = Interchange::Search::Solr::Response->new($res->raw_response);
696              
697 0         0 return $our_res;
698             }
699              
700             =head2 delete $data
701              
702             Deletes documents by queries in $data.
703              
704             =cut
705              
706             sub delete {
707 0     0 1 0 my ($self, $data) = @_;
708 0         0 my ($res, $our_res, $xml);
709              
710 0         0 $xml = $self->_build_xml_del_op($data);
711 0         0 $res = $self->solr_object->_send_update($xml);
712 0         0 $our_res = Interchange::Search::Solr::Response->new($res->raw_response);
713              
714 0         0 return $our_res;
715             }
716              
717             =head2 commit
718              
719             Commits recently indexed content. This is necessary to make these changes visible
720             for searches unless autocommit is enabled in the configuration.
721              
722             =cut
723              
724             sub commit {
725 0     0 1 0 my $self = shift;
726 0         0 my ($res, $our_res);
727 0         0 my $xml = '<commit/>';
728              
729 0         0 $res = $self->solr_object->_send_update($xml);
730 0         0 $our_res = Interchange::Search::Solr::Response->new($res->raw_response);
731              
732 0         0 return $our_res;
733             }
734              
735             =head2 maintainer_update($mode)
736              
737             Perform a maintainer update and return a L<WebService::Solr::Response>
738             object.
739              
740             =cut
741              
742             sub maintainer_update {
743 0     0 1 0 my ($self, $mode, $data) = @_;
744 0 0       0 die "Missing argument" unless $mode;
745 0         0 my (@query, %params, $xml, $wss_res, $res);
746              
747 0 0       0 if ($mode eq 'add') {
    0          
    0          
    0          
748 0         0 $res = $self->add($data);
749             }
750             elsif ($mode eq 'clear') {
751 0         0 $res = $self->delete(['*:*']);
752             }
753             elsif ($mode eq 'full') {
754 0         0 @query = ('dataimport', { command => 'full-import' });
755             }
756             elsif ($mode eq 'delta') {
757 0         0 @query = ('dataimport', { command => 'delta-import' });
758             }
759             else {
760 0         0 die "Unrecognized mode $mode!";
761             }
762              
763 0 0       0 unless ( $res ) {
764 0         0 $wss_res = $self->solr_object->generic_solr_request(@query);
765 0         0 $res = Interchange::Search::Solr::Response->new($wss_res->raw_response);
766             }
767              
768 0 0       0 if ( $res->success ) {
769             # commit the changes
770 0         0 $res = $self->commit;
771             }
772              
773 0         0 return $res;
774             }
775              
776             # builds XML for add index handler
777              
778             sub _build_xml_add_op {
779 3     3   1658 my ($self, $input) = @_;
780 3         24 my $doc = XML::LibXML::Document->new;
781 3         27 my $el_add = $doc->createElement('add');
782 3         6 my $list;
783 3         23 $doc->addChild($el_add);
784              
785 3 100       10 if (ref($input) eq 'ARRAY') {
    50          
786 1         16 $list = $input;
787             }
788             elsif (ref($input) eq 'HASH') {
789 2         83 $list = [ $input ];
790             }
791             else {
792 0         0 die "Bad usage: input should be an arrayref or an hashref";
793             }
794              
795 3         7 foreach my $data (@$list) {
796 4         42 my $el_doc = $doc->createElement('doc');
797 4         19 $el_add->addChild($el_doc);
798 4         9 while (my ($name, $value) = each %$data) {
799 4 50       60 if (defined $value) {
800 4         6 my @values;
801 4 100       13 if (ref($value) eq 'ARRAY') {
802 2         5 @values = @$value;
803             } else {
804 2         5 @values = ($value);
805             }
806 4         9 foreach my $v (@values) {
807 6         56 my $el_field = $doc->createElement('field');
808 6         19 $el_field->setAttribute(name => $name);
809 6         68 $el_field->appendText($v);
810 6         23 $el_doc->addChild($el_field);
811             }
812             }
813             }
814             }
815 3         196 return $doc->firstChild->toString;
816             }
817              
818             # builds XML for delete index handler
819              
820             sub _build_xml_del_op {
821 0     0     my ($self, $input) = @_;
822 0           my $doc = XML::LibXML::Document->new;
823 0           my $el_del = $doc->createElement('delete');
824 0           my $list;
825 0           $doc->addChild($el_del);
826              
827 0 0         if (ref($input) eq 'ARRAY') {
828 0           $list = $input;
829             }
830             else {
831 0           die "Bad usage: input should be an arrayref";
832             }
833              
834 0           foreach my $query (@$list) {
835 0           my $el_q = $doc->createElement('query');
836 0           $el_q->appendText($query);
837 0           $el_del->addChild($el_q);
838             }
839              
840 0           return $doc->firstChild->toString;
841             }
842              
843             =head2 reset_object
844              
845             Reset the leftovers of a possible previous search.
846              
847             =head2 search_from_url($url)
848              
849             Parse the url provided and do the search.
850              
851             =cut
852              
853             sub reset_object {
854 0     0 1   my $self = shift;
855 0           $self->start(0);
856 0           $self->page(1);
857 0           $self->_set_response(undef);
858 0           $self->_set_search_string(undef);
859 0           $self->filters({});
860 0           $self->search_terms([]);
861 0           $self->search_structure(undef);
862             }
863              
864             sub search_from_url {
865 0     0 1   my ($self, $url) = @_;
866 0 0         if (my $enc = $self->input_encoding) {
867 0           $url = Encode::decode($enc, $url);
868             }
869 0           $self->_parse_url($url);
870             # at this point, all the parameters are set after the url parsing
871 0           return $self->_do_search;
872             }
873              
874             =head2 add_terms_to_url($url, $string)
875              
876             Parse the url, and return a new one with the additional words added.
877             The page is discarded, while the filters are retained.
878              
879             =cut
880              
881             sub add_terms_to_url {
882 0     0 1   my ($self, $url, @other_terms) = @_;
883 0 0         die "Bad usage" unless defined $url;
884 0           $self->_parse_url($url);
885 0 0         return $url unless @other_terms;
886 0           my @additional_terms = grep { $self->_term_is_good($_) } @other_terms;
  0            
887 0           my @terms = @{ $self->search_terms };
  0            
888 0           push @terms, @additional_terms;
889 0           $self->search_terms(\@terms);
890 0           my $builder = $self->builder_object(
891             $self->search_terms,
892             $self->filters
893             );
894 0           return $builder->url_builder;
895            
896             }
897              
898              
899             sub _parse_url {
900 0     0     my ($self, $url) = @_;
901 0           $self->reset_object;
902 0 0         return unless $url;
903 0           my @fragments = grep { $_ } split('/', $url);
  0            
904              
905             # nothing to do if there are no fragments
906 0 0         return unless @fragments;
907              
908 0           my (@terms, %filters);
909             # the first keyword we need is the optional "words"
910 0 0         if ($fragments[0] eq 'words') {
911             # just discards and check if we have something. This could
912             # also mean that the next word is not a keyword.
913 0           shift @fragments;
914 0 0         if (@fragments) {
915 0           push @terms, shift @fragments;
916             }
917             }
918              
919             # the page is the last fragment, so check that
920 0 0         if (@fragments > 1) {
921 0           my $page = $#fragments;
922 0 0         if ($fragments[$page - 1] eq 'page') {
923 0           $page = pop @fragments;
924             # and remove the page
925 0           pop @fragments;
926             # but assert it is a number, 1 otherwise
927 0 0         if ($page =~ s/^([1-9][0-9]*)$/)/) {
928 0           $self->page($1);
929             }
930             else {
931 0           $self->page(1);
932             }
933 0           $self->_set_start_from_page;
934             }
935             }
936 0           my $current_filter;
937 0           while (@fragments) {
938              
939 0           my $chunk = shift @fragments;
940              
941             # we lookup until the first keyword, but only if there is
942             # non-keywords after that.
943 0 0 0       if ($self->_fragment_is_keyword($chunk) and
      0        
944             @fragments and
945             !$self->_fragment_is_keyword($fragments[0])) {
946             # chunk is actually a keyword. Set the flag, prepare the
947             # array and move on.
948 0           $current_filter = $chunk;
949 0           $filters{$current_filter} = [];
950 0           next;
951             }
952              
953             # are we inside a filter?
954 0 0         if ($current_filter) {
955 0           push @{ $filters{$current_filter} }, $chunk;
  0            
956             }
957             # if not, it's a term
958             else {
959 0           push @terms, $chunk;
960             }
961             }
962             # filter the terms
963 0           $self->search_terms([ grep { $self->_term_is_good($_) } @terms ]);
  0            
964 0           $self->filters(\%filters);
965             }
966              
967             sub _fragment_is_keyword {
968 0     0     my ($self, $fragment) = @_;
969 0 0         return unless defined $fragment;
970 0           return grep { $_ eq $fragment } @{ $self->facets };
  0            
  0            
971             }
972              
973              
974             sub _set_start_from_page {
975 0     0     my $self = shift;
976 0           $self->start($self->rows * ($self->page - 1));
977             }
978              
979              
980             =head2 current_search_to_url
981              
982             Return the url for the current search.
983              
984             =cut
985              
986             sub current_search_to_url {
987 0     0 1   my ($self, %args) = @_;
988 0           my $page;
989              
990 0 0         if (! $args{hide_page}) {
991 0           $page = $self->page;
992             }
993              
994 0           my $builder = $self->builder_object($self->search_terms,
995             $self->filters,
996             $page);
997              
998 0           return $builder->url_builder;
999             }
1000              
1001             sub _build_facet_url {
1002 0     0     my ($self, $field, $name) = @_;
1003             # get the current filters
1004             # print "Building $field $name\n";
1005 0           my @terms = @{ $self->search_terms };
  0            
1006             # page is not needed
1007              
1008             # the hash for the url builder
1009 0           my %toggled_filters;
1010              
1011             # the current filters
1012 0           my $filters = $self->filters;
1013              
1014             # loop over the facets we defined
1015 0           foreach my $facet (@{ $self->facets }) {
  0            
1016              
1017             # copy of the active filters
1018 0 0         my @active = @{ $filters->{$facet} || [] };
  0            
1019              
1020             # filter is active: remove
1021 0 0         if ($self->_filter_is_active($facet, $name)) {
    0          
1022 0           @active = grep { $_ ne $name } @active;
  0            
1023             }
1024             # it's not active, but we're building an url for this facet
1025             elsif ($facet eq $field) {
1026 0           push @active, $name;
1027             }
1028             # and store
1029 0 0         $toggled_filters{$facet} = \@active if @active;
1030             }
1031             # print Dumper(\@terms, \%toggled_filters);
1032 0           my $builder = $self->builder_object(\@terms, \%toggled_filters);
1033 0           return $builder->url_builder;
1034             }
1035              
1036             sub _filter_is_active {
1037 0     0     my ($self, $field, $name) = @_;
1038 0           my $filters = $self->filters;
1039 0 0         if (my $list = $self->filters->{$field}) {
1040 0 0         if (my @active = @$list) {
1041 0 0         if (grep { $_ eq $name } @active) {
  0            
1042 0           return 1;
1043             }
1044             }
1045             }
1046 0           return 0 ;
1047             }
1048              
1049             =head2 paginator
1050              
1051             Return an hashref suitable to be turned into a paginator, undef if
1052             there is no need for a paginator.
1053              
1054             Be careful that a defined empty string in the first/last/next/previous
1055             keys is perfectly legit and points to an unfiltered search which will
1056             return all the items, so concatenating it to the prefix is perfectly
1057             fine (e.g. "products/" . ''). When rendering this structure to HTML,
1058             just check if the value is defined, not if it's true.
1059              
1060             The structure looks like this:
1061              
1062             {
1063             first => 'words/bla' || undef,
1064             first_page => 1 || undef,
1065             last => 'words/bla/page/5' || undef,
1066             last_page => 5 || undef,
1067             next => 'words/bla/page/5' || undef,
1068             next_page => 5 || undef
1069             previous => 'words/bla/page/3' || undef,
1070             previous_page => 3 || undef,
1071             pages => [
1072             {
1073             name => 1,
1074             url => 'words/bla/page/1',
1075             },
1076             {
1077             name => 2,
1078             url => 'words/bla/page/2',
1079             },
1080             {
1081             name => 3,
1082             url => 'words/bla/page/3',
1083             },
1084             {
1085             name => 4,
1086             url => 'words/bla/page/4',
1087             current => 1,
1088             },
1089             {
1090             name => 5,
1091             url => 'words/bla/page/5',
1092             },
1093             ]
1094             total_pages => 5
1095             }
1096              
1097             =cut
1098              
1099             sub paginator {
1100 0     0 1   my $self = shift;
1101 0   0       my $page = $self->page || 1;
1102 0           my $page_size = $self->rows;
1103 0           my $page_scope = $self->page_scope;
1104 0           my $total = $self->num_found;
1105 0 0         return undef unless $total;
1106 0           my $total_pages = POSIX::ceil($total / $page_size);
1107 0 0         return undef if $total_pages < 2;
1108              
1109             # compute the scope
1110 0 0         my $start = ($page - $page_scope > 0) ? ($page - $page_scope) : 1;
1111 0 0         my $end = ($page + $page_scope < $total_pages) ? ($page + $page_scope) : $total_pages;
1112              
1113 0           my %pager = (items => []);
1114 0           my $builder = $self->builder_object($self->search_terms, $self->filters);
1115              
1116 0           for (my $count = $start; $count <= $end ; $count++) {
1117             # create the link
1118 0           $builder->page($count);
1119 0           my $url = $builder->url_builder;
1120 0           my $item = {
1121             url => $url,
1122             name => $count,
1123             };
1124 0           my $position = $count - $page;
1125 0 0         if ($position == 0) {
    0          
    0          
1126 0           $item->{current} = 1;
1127             }
1128             elsif ($position == 1) {
1129 0           $pager{next} = $url;
1130 0           $pager{next_page} = $count;
1131             }
1132             elsif ($position == -1) {
1133 0           $pager{previous} = $url;
1134 0           $pager{previous_page} = $count;
1135             }
1136 0           push @{$pager{items}}, $item;
  0            
1137             }
1138 0 0         if ($page != $total_pages) {
1139 0           $builder->page($total_pages);
1140 0           $pager{last} = $builder->url_builder;
1141 0           $pager{last_page} = $total_pages;
1142             }
1143 0 0         if ($page != 1) {
1144 0           $builder->page(1);
1145 0           $pager{first} = $builder->url_builder;
1146 0           $pager{first_page} = 1;
1147             }
1148 0           $pager{total_pages} = $total_pages;
1149 0           return \%pager;
1150             }
1151              
1152             =head2 terms_found
1153              
1154             Returns an hashref suitable to build a widget with the terms used and
1155             the links to toggle them. Return undef if no terms were used in the search.
1156              
1157             The structure looks like this:
1158              
1159             {
1160             reset => '',
1161             terms => [
1162             { term => 'first', url => 'words/second' },
1163             { term => 'second', url => 'words/first' },
1164             ],
1165             }
1166              
1167             See also:
1168              
1169             =over 4
1170              
1171             =item clear_words_link
1172              
1173             Which return the reset link
1174              
1175             =item remove_word_links
1176              
1177             Which returns a list of hashrefs with C<uri> and C<label> for each
1178             word to remove.
1179              
1180             =back
1181              
1182             =cut
1183              
1184             sub terms_found {
1185 0     0 1   my $self = shift;
1186 0           my @terms = @{ $self->search_terms };
  0            
1187 0 0         return unless @terms;
1188 0           my %out = (
1189             reset => $self->builder_object([], $self->filters)->url_builder,
1190             terms => [],
1191             );
1192 0           my @toggled;
1193 0           my $builder = $self->builder_object(\@toggled, $self->filters);
1194 0           foreach my $term (@terms) {
1195 0           @toggled = grep { $_ ne $term } @terms;
  0            
1196 0           $builder->terms(\@toggled);
1197 0           push @{ $out{terms} }, {
  0            
1198             term => $term,
1199             url => $builder->url_builder,
1200             };
1201             }
1202 0           return \%out;
1203            
1204             }
1205              
1206             =head2 version
1207              
1208             Return the version of this module.
1209              
1210             =cut
1211              
1212             sub version {
1213 0     0 1   return $VERSION;
1214             }
1215              
1216              
1217             =head2 breadcrumbs
1218              
1219             Return a list of hashrefs with C<uri> and C<label> suitable to compose
1220             a breadcrumb for the current search.
1221              
1222             If the breadcrumb points to a facet, the facet name is stored in the
1223             C<facet> key.
1224              
1225             =cut
1226              
1227             sub breadcrumbs {
1228 0     0 1   my $self = shift;
1229 0           my $words = $self->search_terms;
1230 0           my $filters = $self->filters;
1231             # always add the root
1232 0           my @pieces;
1233 0           my $current_uri = 'words';
1234 0           foreach my $word (@$words) {
1235 0           $current_uri .= "/$word";
1236 0           push @pieces, {
1237             uri => $current_uri,
1238             label => $word,
1239             };
1240             }
1241 0 0         if (%$filters) {
1242 0           foreach my $facet (@{ $self->facets }) {
  0            
1243 0 0         if (my $terms = $filters->{$facet}) {
1244 0           $current_uri .= "/$facet";
1245 0           foreach my $term (@$terms) {
1246 0           $current_uri .= "/$term";
1247 0           push @pieces, {
1248             uri => $current_uri,
1249             label => $term,
1250             facet => $facet,
1251             };
1252             }
1253             }
1254             }
1255             }
1256 0           return @pieces;
1257             }
1258              
1259             sub clear_words_link {
1260 0     0 1   my $self = shift;
1261 0 0         if (my $struct = $self->terms_found) {
1262 0           return $struct->{reset};
1263             }
1264             else {
1265 0           return;
1266             }
1267             }
1268              
1269             sub remove_word_links {
1270 0     0 1   my $self = shift;
1271 0           my @out;
1272 0 0         if (my $struct = $self->terms_found) {
1273 0 0         if (my $terms = $struct->{terms}) {
1274 0           foreach my $term (@$terms) {
1275             push @out, {
1276             uri => $term->{url},
1277             label => $term->{term},
1278 0           };
1279             }
1280             }
1281             }
1282 0           return @out;
1283             }
1284              
1285             sub _term_is_good {
1286 0     0     my ($self, $term) = @_;
1287 0 0 0       if ($term && $term =~ /\w/) {
1288 0 0         if ($self->stop_words->{lc($term)}) {
1289 0           return 0;
1290             }
1291 0 0         if ($self->min_chars > 1) {
1292 0           my $re = "\\w.*" x $self->min_chars;
1293 0 0         if ($term =~ m/$re/) {
1294 0           return 1;
1295             }
1296             else {
1297 0           return 0;
1298             }
1299             }
1300 0           return 1;
1301             }
1302 0           return 0;
1303             }
1304              
1305             # stolen from SQL::Abstract
1306              
1307             sub _build_sort_field {
1308 0     0     my ($self, $arg) = @_;
1309             return $self->_SWITCH_refkind($arg,
1310             {
1311             ARRAYREF => sub {
1312 0     0     map { $self->_build_sort_field($_) } @$arg;
  0            
1313             },
1314             SCALAR => sub {
1315 0     0     return "$arg";
1316             },
1317             UNDEF => sub {
1318 0     0     return;
1319             },
1320             SCALARREF => sub {
1321 0     0     $$arg;
1322             },
1323             HASHREF => sub {
1324 0     0     my ($key, $val, @rest) = %$arg;
1325 0 0         return () unless $key;
1326 0 0 0       if ( @rest or not $key =~ /^-(desc|asc)/i ) {
1327 0           die "hash passed to sorting must have exactly ".
1328             "one key (-desc or -asc)";
1329             }
1330 0           my $direction = $1;
1331 0           my @ret;
1332 0           for my $c ($self->_build_sort_field($val)) {
1333 0           my $query;
1334             $self->_SWITCH_refkind ($c,
1335             {
1336             SCALAR => sub {
1337 0           $query = $c;
1338             },
1339 0           });
1340 0           $query = $query . ' ' . lc($direction);
1341 0           push @ret, $query;
1342             }
1343 0           return @ret;
1344             },
1345 0           });
1346             }
1347              
1348              
1349             sub _refkind {
1350 0     0     my ($self, $data) = @_;
1351 0 0         return 'UNDEF' unless defined $data;
1352             # blessed objects are treated like scalars
1353 0 0         my $ref = (Scalar::Util::blessed $data) ? '' : ref $data;
1354 0 0         return 'SCALAR' unless $ref;
1355 0           my $n_steps = 1;
1356 0           while ($ref eq 'REF') {
1357 0           $data = $$data;
1358 0 0         $ref = (Scalar::Util::blessed $data) ? '' : ref $data;
1359 0 0         $n_steps++ if $ref;
1360             }
1361 0   0       return ($ref||'SCALAR') . ('REF' x $n_steps);
1362             }
1363              
1364             sub _SWITCH_refkind {
1365 0     0     my ($self, $data, $dispatch_table) = @_;
1366 0           my $type = $self->_refkind($data);
1367 0           my $coderef = $dispatch_table->{$self->_refkind($data)};
1368 0 0         die "Unsupported structure $type" unless $coderef;
1369 0           $coderef->();
1370             }
1371              
1372              
1373             =head1 AUTHORS
1374              
1375             Marco Pessotto, C<< <melmothx at gmail.com> >>
1376              
1377             Stefan Hornburg (Racke), C<< <racke at linuxia.de> >>
1378              
1379             =head1 BUGS
1380              
1381             Please report any bugs or feature requests to
1382             L<https://github.com/interchange/Interchange-Search-Solr/issues>.
1383              
1384             =head1 SUPPORT
1385              
1386             You can find documentation for this module with the perldoc command.
1387              
1388             perldoc Interchange::Search::Solr
1389              
1390              
1391             You can also look for information at:
1392              
1393             =over 4
1394              
1395             =item * Github
1396              
1397             L<https://github.com/interchange/Interchange-Search-Solr>
1398              
1399             =item * AnnoCPAN: Annotated CPAN documentation
1400              
1401             L<http://annocpan.org/dist/Interchange-Search-Solr>
1402              
1403             =item * CPAN Ratings
1404              
1405             L<http://cpanratings.perl.org/d/Interchange-Search-Solr>
1406              
1407             =item * META CPAN
1408              
1409             L<https://metacpan.org/pod/Interchange::Search::Solr>
1410              
1411             =back
1412              
1413              
1414             =head1 ACKNOWLEDGEMENTS
1415              
1416             Mohammad S Anwar (GH #14).
1417              
1418             =head1 LICENSE AND COPYRIGHT
1419              
1420             Copyright 2014-2019 Marco Pessotto, Stefan Hornburg (Racke).
1421              
1422             This program is free software; you can redistribute it and/or modify it
1423             under the terms of the the Artistic License (2.0). You may obtain a
1424             copy of the full license at:
1425              
1426             L<http://www.perlfoundation.org/artistic_license_2_0>
1427              
1428             Any use, modification, and distribution of the Standard or Modified
1429             Versions is governed by this Artistic License. By using, modifying or
1430             distributing the Package, you accept this license. Do not use, modify,
1431             or distribute the Package, if you do not accept this license.
1432              
1433             If your Modified Version has been derived from a Modified Version made
1434             by someone other than you, you are nevertheless required to ensure that
1435             your Modified Version complies with the requirements of this license.
1436              
1437             This license does not grant you the right to use any trademark, service
1438             mark, tradename, or logo of the Copyright Holder.
1439              
1440             This license includes the non-exclusive, worldwide, free-of-charge
1441             patent license to make, have made, use, offer to sell, sell, import and
1442             otherwise transfer the Package with respect to any patent claims
1443             licensable by the Copyright Holder that are necessarily infringed by the
1444             Package. If you institute patent litigation (including a cross-claim or
1445             counterclaim) against any party alleging that the Package constitutes
1446             direct or contributory patent infringement, then this Artistic License
1447             to you shall terminate on the date that such litigation is filed.
1448              
1449             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
1450             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
1451             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
1452             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
1453             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
1454             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
1455             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
1456             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1457              
1458              
1459             =cut
1460              
1461             1; # End of Interchange::Search::Solr