| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package KinoSearch1::Index::TermInfosReader; |
|
2
|
34
|
|
|
34
|
|
31965
|
use strict; |
|
|
34
|
|
|
|
|
73
|
|
|
|
34
|
|
|
|
|
1134
|
|
|
3
|
34
|
|
|
34
|
|
181
|
use warnings; |
|
|
34
|
|
|
|
|
75
|
|
|
|
34
|
|
|
|
|
808
|
|
|
4
|
34
|
|
|
34
|
|
781
|
use KinoSearch1::Util::ToolSet; |
|
|
34
|
|
|
|
|
68
|
|
|
|
34
|
|
|
|
|
5007
|
|
|
5
|
34
|
|
|
34
|
|
203
|
use base qw( KinoSearch1::Util::Class ); |
|
|
34
|
|
|
|
|
69
|
|
|
|
34
|
|
|
|
|
3949
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
BEGIN { |
|
8
|
34
|
|
|
34
|
|
354
|
__PACKAGE__->init_instance_vars( |
|
9
|
|
|
|
|
|
|
# constructor params / members |
|
10
|
|
|
|
|
|
|
invindex => undef, |
|
11
|
|
|
|
|
|
|
seg_name => undef, |
|
12
|
|
|
|
|
|
|
finfos => undef, |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# members |
|
15
|
|
|
|
|
|
|
orig_enum => undef, |
|
16
|
|
|
|
|
|
|
index_enum => undef, |
|
17
|
|
|
|
|
|
|
); |
|
18
|
|
|
|
|
|
|
} |
|
19
|
|
|
|
|
|
|
|
|
20
|
34
|
|
|
34
|
|
21525
|
use KinoSearch1::Index::SegTermEnum; |
|
|
34
|
|
|
|
|
166
|
|
|
|
34
|
|
|
|
|
17685
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub init_instance { |
|
23
|
95
|
|
|
95
|
1
|
175
|
my $self = shift; |
|
24
|
95
|
|
|
|
|
290
|
my $invindex = $self->{invindex}; |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# prepare a main Enum which can access all terms |
|
27
|
95
|
|
|
|
|
490
|
$self->{orig_enum} = KinoSearch1::Index::SegTermEnum->new( |
|
28
|
|
|
|
|
|
|
finfos => $self->{finfos}, |
|
29
|
|
|
|
|
|
|
instream => $invindex->open_instream("$self->{seg_name}.tis"), |
|
30
|
|
|
|
|
|
|
); |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# load an index Enum into memory which can point to places in main |
|
33
|
95
|
|
|
|
|
550
|
$self->{index_enum} = KinoSearch1::Index::SegTermEnum->new( |
|
34
|
|
|
|
|
|
|
finfos => $self->{finfos}, |
|
35
|
|
|
|
|
|
|
instream => $invindex->open_instream("$self->{seg_name}.tii"), |
|
36
|
|
|
|
|
|
|
is_index => 1, |
|
37
|
|
|
|
|
|
|
); |
|
38
|
95
|
|
|
|
|
958
|
$self->{index_enum}->fill_cache; |
|
39
|
|
|
|
|
|
|
} |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Return a SegTermEnum, pre-located at the right spot if a Term is supplied. |
|
42
|
|
|
|
|
|
|
sub terms { |
|
43
|
17
|
|
|
17
|
0
|
31
|
my ( $self, $term ) = @_; |
|
44
|
17
|
100
|
|
|
|
46
|
if ( defined $term ) { |
|
45
|
1
|
|
|
|
|
4
|
$self->fetch_term_info($term); |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
else { |
|
48
|
16
|
|
|
|
|
120
|
$self->{orig_enum}->reset; |
|
49
|
|
|
|
|
|
|
} |
|
50
|
17
|
|
|
|
|
90
|
return $self->{orig_enum}->clone_enum; |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# Given a Term, return a TermInfo if the Term is present in the segment, or |
|
54
|
|
|
|
|
|
|
# undef if it's not. |
|
55
|
|
|
|
|
|
|
sub fetch_term_info { |
|
56
|
2068
|
|
|
2068
|
0
|
2775
|
my ( $self, $term ) = @_; |
|
57
|
2068
|
|
|
|
|
6100
|
my $termstring = $term->get_termstring( $self->{finfos} ); |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# termstring will be undefined if field doesn't exist |
|
60
|
2068
|
100
|
|
|
|
5044
|
return unless defined $termstring; |
|
61
|
|
|
|
|
|
|
|
|
62
|
2022
|
|
|
|
|
4268
|
$self->_seek_enum($termstring); |
|
63
|
|
|
|
|
|
|
|
|
64
|
2022
|
|
|
|
|
7954
|
return $self->_scan_enum($termstring); |
|
65
|
|
|
|
|
|
|
} |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Locate the main Enum as close as possible to where the term might be found. |
|
68
|
|
|
|
|
|
|
sub _seek_enum { |
|
69
|
2022
|
|
|
2022
|
|
2736
|
my ( $self, $termstring ) = @_; |
|
70
|
2022
|
|
|
|
|
3174
|
my $index_enum = $self->{index_enum}; |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# get the approximate possible location of the term in the main Enum |
|
73
|
2022
|
|
|
|
|
8059
|
my $tii_position = $index_enum->scan_cache($termstring); |
|
74
|
2022
|
|
|
|
|
6850
|
my $ballpark_termstring = $index_enum->get_termstring; |
|
75
|
2022
|
|
|
|
|
9337
|
my $ballpark_tinfo = $index_enum->get_term_info; |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# point the main Enum just before the term |
|
78
|
2022
|
|
|
|
|
16492
|
$self->{orig_enum}->seek( |
|
79
|
|
|
|
|
|
|
$ballpark_tinfo->get_index_fileptr, |
|
80
|
|
|
|
|
|
|
( ( $tii_position * $self->{orig_enum}->get_index_interval ) - 1 ), |
|
81
|
|
|
|
|
|
|
$ballpark_termstring, |
|
82
|
|
|
|
|
|
|
$ballpark_tinfo, |
|
83
|
|
|
|
|
|
|
); |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# One-by-one targeted iteration through TermEnum. |
|
87
|
|
|
|
|
|
|
sub _scan_enum { |
|
88
|
2022
|
|
|
2022
|
|
3205
|
my ( $self, $target_termstring ) = @_; |
|
89
|
2022
|
|
|
|
|
2878
|
my $orig_enum = $self->{orig_enum}; |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# iterate through the Enum until the result is ge the term |
|
92
|
2022
|
|
|
|
|
30038
|
$orig_enum->scan_to($target_termstring); |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# if the stopping point matches the target, return info; otherwise, undef |
|
95
|
2022
|
|
|
|
|
7084
|
my $found_termstring = $orig_enum->get_termstring; |
|
96
|
2022
|
100
|
100
|
|
|
9512
|
if ( defined $found_termstring |
|
97
|
|
|
|
|
|
|
and $found_termstring eq $target_termstring ) |
|
98
|
|
|
|
|
|
|
{ |
|
99
|
1825
|
|
|
|
|
11471
|
return $orig_enum->get_term_info; |
|
100
|
|
|
|
|
|
|
} |
|
101
|
197
|
|
|
|
|
698
|
return; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub get_skip_interval { |
|
105
|
654
|
|
|
654
|
0
|
4380
|
shift->{orig_enum}->get_skip_interval; |
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub close { |
|
109
|
39
|
|
|
39
|
0
|
61
|
my $self = shift; |
|
110
|
39
|
|
|
|
|
163
|
$self->{orig_enum}->close; |
|
111
|
39
|
|
|
|
|
139
|
$self->{index_enum}->close; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
1; |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
__END__ |