File Coverage

blib/lib/Plucene/Index/TermInfosWriter.pm
Criterion Covered Total %
statement 61 61 100.0
branch 12 16 75.0
condition 7 8 87.5
subroutine 14 14 100.0
pod 4 4 100.0
total 98 103 95.1


line stmt bran cond sub pod time code
1             package Plucene::Index::TermInfosWriter;
2              
3             =head1 NAME
4              
5             Plucene::Index::TermInfosWriter - write to the term infos file
6              
7             =head1 SYNOPSIS
8              
9             my $writer = Plucene::Index::TermInfosWriter->new(
10             $dir_name, $segment, $field_infos);
11              
12             $writer->add(Plucene::Index::Term $term,
13             Plucene::Index::TermInfo $term_info);
14              
15             $writer->write_term(Plucene::Index::Term $term);
16              
17             =head1 DESCRIPTION
18              
19             This will allow for the writing and adding to a term infos file for a
20             particular segment. It also writes the term infos index.
21              
22             =head1 METHODS
23              
24             =cut
25              
26 19     19   1197 use strict;
  19         36  
  19         659  
27 19     19   176 use warnings;
  19         39  
  19         670  
28              
29 19     19   99 use constant INDEX_INTERVAL => 128;
  19         36  
  19         1378  
30              
31 19     19   98 use Carp qw(confess carp);
  19         37  
  19         1112  
32              
33 19     19   10040 use Plucene::Store::OutputStream;
  19         50  
  19         791  
34 19     19   15241 use Plucene::Index::Term;
  19         43  
  19         148  
35 19     19   1297 use Plucene::Index::TermInfo;
  19         39  
  19         128  
36              
37             =head2 new
38              
39             my $writer = Plucene::Index::TermInfosWriter->new(
40             $dir_name, $segment, $field_infos);
41              
42             This will create a new Plucene::Index::TermInfosWriter object.
43            
44             =cut
45              
46             sub new {
47 594     594 1 1538 my ($class, $d, $segment, $fis, $is_i) = @_;
48              
49 594 100       4495 my $self = bless {
50             field_infos => $fis,
51             is_index => $is_i,
52             size => 0,
53             last_term => Plucene::Index::Term->new({ field => "", text => "" }),
54             last_ti => Plucene::Index::TermInfo->new,
55             last_index_ptr => 0,
56             output => Plucene::Store::OutputStream->new(
57             "$d/$segment.ti" . ($is_i ? "i" : "s")
58             ),
59             }, $class;
60 594 50       4748 confess("No field_infos!") unless $self->{field_infos};
61 594         2997 $self->{output}->write_int(0); # Will be filled in when DESTROYed
62 594 100       1572 if (!$is_i) {
63 297         1901 $self->{other} = $class->new($d, $segment, $fis, 1);
64 297         1116 $self->{other}->{other} = $self; # My enemy's enemy is my friend
65             }
66 594         2386 return $self;
67             }
68              
69             =head2 break_ref
70              
71             This will break a circular reference.
72              
73             =cut
74              
75             # Damned circular references.
76 297     297 1 155285 sub break_ref { undef shift->{other} }
77              
78             =head2 add
79              
80             $writer->add(Plucene::Index::Term $term,
81             Plucene::Index::TermInfo $term_info);
82              
83             This will add the term and term info to the term infos file.
84            
85             =cut
86              
87             sub add {
88 46707     46707 1 342625 my ($self, $term, $ti) = @_;
89 19     19   5050 no warnings 'uninitialized';
  19         40  
  19         7457  
90 46707 50 66     218126 carp sprintf "Can't add out-of-order term %s lt %s (%s lt %s)", $term->text,
91             $self->{last_term}->text, $term->field, $self->{last_term}->{field}
92             if !$self->{is_index} && $term->lt($self->{last_term});
93 46707 50       129938 carp "Frequency pointer out of order"
94             if $ti->freq_pointer < $self->{last_ti}->freq_pointer;
95 46707 50       416927 carp "Proximity pointer out of order"
96             if $ti->prox_pointer < $self->{last_ti}->prox_pointer;
97              
98 46707 100 100     484002 $self->{other}->add($self->{last_term}, $self->{last_ti})
99             if !$self->{is_index}
100             and (($self->{size} % INDEX_INTERVAL) == 0);
101              
102 46707         98660 $self->write_term($term);
103 46707         162920 $self->{output}->write_vint($ti->doc_freq);
104 46707         154744 $self->{output}
105             ->write_vlong($ti->freq_pointer - $self->{last_ti}->freq_pointer);
106 46707         146039 $self->{output}
107             ->write_vlong($ti->prox_pointer - $self->{last_ti}->prox_pointer);
108              
109 46707 100       130309 if ($self->{is_index}) { # I bet Tony will think about subclassing
110             # at this point
111 635         4290 $self->{output}->write_vlong(
112             $self->{other}->{output}->tell - $self->{last_index_pointer});
113 635         3085 $self->{last_index_pointer} = $self->{other}->{output}->tell;
114             }
115              
116 46707         118648 $self->{last_ti} = $ti->clone;
117 46707         221689 $self->{size}++;
118             }
119              
120             =head2 write_term
121              
122             $writer->write_term(Plucene::Index::Term $term);
123              
124             This will write the term to the term infos file.
125            
126             =cut
127              
128             sub write_term {
129 46707     46707 1 69802 my ($self, $term) = @_;
130 46707   100     121512 my $text = $term->text || "";
131 19     19   110 no warnings 'uninitialized';
  19         43  
  19         3740  
132              
133             # Find longest common prefix
134 46707         328933 ($text ^ $self->{last_term}->text) =~ /^(\0*)/;
135 46707         347786 my $start = length $1;
136              
137 46707         150688 $self->{output}->write_vint($start);
138 46707         179174 $self->{output}->write_string(substr($text, $start));
139 46707         176850 $self->{output}
140             ->write_vint($self->{field_infos}->field_number($term->field));
141 46707         129801 $self->{last_term} = $term;
142             }
143              
144             sub DESTROY {
145 594     594   1257 my $self = shift;
146 594         2719 $self->{output}->seek(0, 0);
147 594         3599 $self->{output}->write_int($self->{size});
148             }
149              
150             1;