line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Plucene::Index::TermInfosReader; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Plucene::Index::TermInfosReader - read the term infos file |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $reader = Plucene::Index::TermInfosReader->new( |
10
|
|
|
|
|
|
|
$dir_name, $segment, $fis); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my Plucene::Index::TermInfo $term_info = |
13
|
|
|
|
|
|
|
$reader->get(Plucene::Index::Term $term); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
my Plucene::Index::SegmentTermEnum $enum = |
16
|
|
|
|
|
|
|
$reader->terms(Plucene::Index::Term $term); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
This reads a term infos file. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 METHODS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
25
|
|
|
|
|
|
|
|
26
|
19
|
|
|
19
|
|
10204
|
use strict; |
|
19
|
|
|
|
|
31
|
|
|
19
|
|
|
|
|
728
|
|
27
|
19
|
|
|
19
|
|
96
|
use warnings; |
|
19
|
|
|
|
|
37
|
|
|
19
|
|
|
|
|
512
|
|
28
|
|
|
|
|
|
|
|
29
|
19
|
|
|
19
|
|
26536
|
use Memoize; |
|
19
|
|
|
|
|
83285
|
|
|
19
|
|
|
|
|
1244
|
|
30
|
|
|
|
|
|
|
|
31
|
19
|
|
|
19
|
|
166
|
use Carp qw/confess/; |
|
19
|
|
|
|
|
44
|
|
|
19
|
|
|
|
|
961
|
|
32
|
|
|
|
|
|
|
|
33
|
19
|
|
|
19
|
|
13241
|
use Plucene::Index::SegmentTermEnum; |
|
19
|
|
|
|
|
51
|
|
|
19
|
|
|
|
|
138
|
|
34
|
19
|
|
|
19
|
|
13800
|
use Plucene::Index::TermInfosWriter; |
|
19
|
|
|
|
|
53
|
|
|
19
|
|
|
|
|
548
|
|
35
|
19
|
|
|
19
|
|
854
|
use Plucene::Store::InputStream; |
|
19
|
|
|
|
|
40
|
|
|
19
|
|
|
|
|
14914
|
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head2 new |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $reader = Plucene::Index::TermInfosReader->new( |
40
|
|
|
|
|
|
|
$dir_name, $segment, $fis); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
This will create a new Plucene::Index::TermInfosReader object with |
43
|
|
|
|
|
|
|
the passed directory name, segment name and field infos. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=cut |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub new { |
48
|
545
|
|
|
545
|
1
|
2046
|
my ($class, $dir, $seg, $fis) = @_; |
49
|
545
|
|
|
|
|
1599
|
my $file = "$dir/$seg.tis"; |
50
|
545
|
50
|
|
|
|
11064
|
confess("$file is already open!") unless -s $file; |
51
|
|
|
|
|
|
|
|
52
|
545
|
|
|
|
|
3781
|
my $self = bless { |
53
|
|
|
|
|
|
|
directory => $dir, |
54
|
|
|
|
|
|
|
segment => $seg, |
55
|
|
|
|
|
|
|
field_infos => $fis, |
56
|
|
|
|
|
|
|
enum => Plucene::Index::SegmentTermEnum->new( |
57
|
|
|
|
|
|
|
Plucene::Store::InputStream->new($file), |
58
|
|
|
|
|
|
|
$fis, 0 |
59
|
|
|
|
|
|
|
), |
60
|
|
|
|
|
|
|
}, $class; |
61
|
545
|
|
|
|
|
2730
|
$self->{size} = $self->{enum}->size; |
62
|
545
|
|
|
|
|
4234
|
$self->_read_index; |
63
|
545
|
|
|
|
|
2638
|
return $self; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub _read_index { |
67
|
545
|
|
|
545
|
|
1057
|
my $self = shift; |
68
|
545
|
|
|
|
|
4412
|
my $index_enum = Plucene::Index::SegmentTermEnum->new( |
69
|
|
|
|
|
|
|
Plucene::Store::InputStream->new( |
70
|
|
|
|
|
|
|
"$self->{directory}/$self->{segment}.tii"), |
71
|
|
|
|
|
|
|
$self->{field_infos}, |
72
|
|
|
|
|
|
|
1 |
73
|
|
|
|
|
|
|
); |
74
|
545
|
|
|
|
|
2737
|
my $size = $index_enum->size; |
75
|
545
|
|
|
|
|
3531
|
$self->{index_terms} = []; |
76
|
545
|
|
|
|
|
1464
|
$self->{index_infos} = []; |
77
|
545
|
|
|
|
|
1907
|
$self->{index_pointers} = []; |
78
|
545
|
|
|
|
|
2496
|
for (my $i = 0 ; $index_enum->next ; $i++) { |
79
|
4407
|
|
|
|
|
12423
|
$self->{index_terms}->[$i] = $index_enum->term; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# Need to clone here. |
82
|
4407
|
|
|
|
|
14413
|
$self->{index_infos}->[$i] = |
83
|
4407
|
|
|
|
|
23586
|
Plucene::Index::TermInfo->new({ %{ $index_enum->term_info } }); |
84
|
4407
|
|
|
|
|
89125
|
$self->{index_pointers}->[$i] = $index_enum->index_pointer; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
memoize('_get_index_offset'); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub _get_index_offset { |
91
|
|
|
|
|
|
|
my ($self, $term) = @_; |
92
|
|
|
|
|
|
|
my $lo = 0; |
93
|
|
|
|
|
|
|
my $hi = $#{ $self->{index_terms} }; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
while ($hi >= $lo) { |
96
|
|
|
|
|
|
|
my $mid = ($lo + $hi) >> 1; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# Terms are comparable, hooray |
99
|
|
|
|
|
|
|
my $delta = $term->_cmp($self->{index_terms}->[$mid]); |
100
|
|
|
|
|
|
|
if ($delta < 0) { $hi = $mid - 1; } |
101
|
|
|
|
|
|
|
elsif ($delta > 0) { $lo = $mid + 1; } |
102
|
|
|
|
|
|
|
else { return $mid } |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
return $hi; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head2 get |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my Plucene::Index::TermInfo $term_info = |
110
|
|
|
|
|
|
|
$reader->get(Plucene::Index::Term $term); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=cut |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub get { |
115
|
629
|
|
|
629
|
1
|
2748
|
my ($self, $term) = @_; |
116
|
629
|
50
|
|
|
|
2389
|
return unless $self->{size}; |
117
|
629
|
|
|
|
|
16826
|
$self->_seek_enum($self->_get_index_offset($term)); |
118
|
629
|
|
|
|
|
6155
|
return $self->_scan_enum($term); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub _seek_enum { |
122
|
904
|
|
|
904
|
|
15134
|
my ($self, $offset) = @_; |
123
|
904
|
|
|
|
|
7580
|
$self->{enum}->seek( |
124
|
|
|
|
|
|
|
$self->{index_pointers}->[$offset], |
125
|
|
|
|
|
|
|
$offset * Plucene::Index::TermInfosWriter::INDEX_INTERVAL() - 1, |
126
|
|
|
|
|
|
|
$self->{index_terms}->[$offset], |
127
|
|
|
|
|
|
|
$self->{index_infos}->[$offset]); |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub _scan_enum { |
131
|
629
|
|
|
629
|
|
1098
|
my ($self, $term) = @_; |
132
|
629
|
|
66
|
|
|
2284
|
1 while $term->gt($self->{enum}->term) && $self->{enum}->next; |
133
|
629
|
100
|
66
|
|
|
2602
|
return $self->{enum}->term_info |
134
|
|
|
|
|
|
|
if $self->{enum}->term |
135
|
|
|
|
|
|
|
and $self->{enum}->term->eq($term); |
136
|
172
|
|
|
|
|
1720
|
return; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head2 get_int / get_position |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
These are never called. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=cut |
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
0
|
1
|
0
|
sub get_int { } |
146
|
0
|
|
|
0
|
1
|
0
|
sub get_position { } |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head2 terms |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
my Plucene::Index::SegmentTermEnum $enum = |
151
|
|
|
|
|
|
|
$reader->terms(Plucene::Index::Term $term); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
This will return the Plucene::Index::SegmentTermEnum for the passed-in |
154
|
|
|
|
|
|
|
Plucene::Index::Term. |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=cut |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub terms { |
159
|
279
|
|
|
279
|
1
|
1023
|
my ($self, $term) = @_; |
160
|
279
|
100
|
|
|
|
893
|
$term ? $self->get($term) : $self->_seek_enum(0); |
161
|
279
|
|
|
|
|
2350
|
$self->{enum}->clone; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
1; |