File Coverage

blib/lib/Plucene/SearchEngine/Index/Base.pm
Criterion Covered Total %
statement 63 65 96.9
branch 10 14 71.4
condition n/a
subroutine 12 12 100.0
pod 5 5 100.0
total 90 96 93.7


line stmt bran cond sub pod time code
1             package Plucene::SearchEngine::Index::Base;
2 1     1   9 use Plucene::Document;
  1         4  
  1         17  
3 1     1   1047 use Plucene::Document::DateSerializer;
  1         29763  
  1         56  
4 1     1   11 use Plucene::Document::Field;
  1         3  
  1         17  
5 1     1   27 use Time::Piece;
  1         1  
  1         4  
6 1     1   885 use UNIVERSAL::moniker;
  1         8  
  1         10  
7 1     1   21 use strict;
  1         1  
  1         38  
8              
9             =head1 NAME
10              
11             Plucene::SearchEngine::Index::Base - The definitely indexer base class
12              
13             =head1 DESCRIPTION
14              
15             This module is the base class from which both frontend and backend
16             indiexing modules should inherit. It makes it easier for modules to
17             create C objects through the intermediary of a nested
18             hash.
19              
20             =head1 METHODS
21              
22             =head2 register_handler
23              
24             __PACKAGE__->register_handler($ext, $mime_type, $ext2, ...);
25              
26             This registers the module to handle each given extension or MIME type.
27             C works out whether a parameter is a file extension or a MIME
28             type.
29              
30             =head2 handler_for
31              
32             $self->handler_for($filename, $mime_type)
33              
34             This finds the relevant handler which has been registered for the givern
35             mime type or file name extension.
36              
37             =cut
38              
39 1     1   7 use constant DEFAULT_HANDLER => "Plucene::SearchEngine::Index::Text";
  1         2  
  1         559  
40             {
41             my %mime_handlers;
42             my %extension_handlers;
43             sub register_handler {
44 2     2 1 7 my ($package, @specs) = @_;
45 2         5 for my $spec (@specs) {
46 4 100       16 if ($spec =~ m{/}) {
47 2         8 $mime_handlers{$spec} = $package;
48             } else {
49 2         10 $extension_handlers{$spec} = $package;
50             }
51             }
52             }
53             sub handler_for {
54 1     1 1 3 my ($self, $filename, $mime) = @_;
55 1 50       6 if (exists $mime_handlers{$mime}) { return $mime_handlers{$mime} }
  0         0  
56 1         5 for my $spec (keys %extension_handlers) {
57 2 50       19 if ($filename =~ /$spec$/) { return $extension_handlers{$spec} }
  0         0  
58             }
59 1         16 return DEFAULT_HANDLER;
60             }
61             }
62              
63             =head2 new
64              
65             This creates a new backend object, which knows about the C,
66             C and C date for the data.
67              
68             =cut
69              
70             sub new {
71 1     1 1 3 my ($handler) = @_;
72 1         4 my $self = bless {}, $handler;
73 1         9 $self->add_data("handler", "Keyword", $handler);
74 1         11 $self->add_data("type", "Keyword", $handler->moniker);
75 1         11 $self->add_data("indexed", "Date", Time::Piece->new());
76 1         4 $self;
77             }
78              
79             =head2 add_data
80              
81             $self->add_data($field, $type, $data);
82              
83             This adds data to a backend object. A backend object represents a
84             C, a hash which will later be turned into a
85             C object.
86              
87             The C<$field> element should be the field name that's stored in Plucene.
88             The C<$type> should be one of the methods that
89             C can cope with - Keyword, Text, UnIndexed,
90             UnStored - or C, which takes a C object as its
91             C<$data>.
92              
93             =cut
94              
95             sub add_data {
96 22     22 1 492 my ($self, $field, $type, $data) = @_;
97 22         52 $self->{$field}{type} = $type;
98 22         22 push @{$self->{$field}{data}}, $data;
  22         101  
99             }
100              
101             =head2 document
102              
103             This turns the backend's hash into a C.
104              
105             =cut
106              
107              
108             sub document {
109 1     1 1 1229 my $self = shift;
110 1         23 my $doc = Plucene::Document->new;
111 1         12 my $text;
112 1         4 for my $field_name (keys %{$self}) {
  1         6  
113 8 100       187 next if $field_name eq "text";
114 7         11 my $field = $self->{$field_name};
115 7         12 my $type = $field->{type};
116 7 50       15 warn "No type for field $field_name!" unless $type;
117 7 100       18 if ($field->{type} eq "Date") {
118 2         4 $type = "Keyword";
119 2         3 for (@{$field->{data}}) { $_ = freeze_date($_) }
  2         4  
  2         10  
120             }
121 7         215 for (@{$field->{data}}) {
  7         12  
122 7         12 $text .= " ". $_;
123 7         34 $doc->add(Plucene::Document::Field->$type( $field_name => $_));
124             }
125             }
126 1 50       25 $text .= " ". join " ", @{$self->{text}{data}||[]};
  1         8  
127 1         5 $doc->add(Plucene::Document::Field->UnStored(text => $text));
128 1         25 return $doc;
129             }
130              
131             1;