| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package KinoSearch1::Index::FieldsWriter; |
|
2
|
34
|
|
|
34
|
|
188
|
use strict; |
|
|
34
|
|
|
|
|
72
|
|
|
|
34
|
|
|
|
|
1509
|
|
|
3
|
34
|
|
|
34
|
|
184
|
use warnings; |
|
|
34
|
|
|
|
|
80
|
|
|
|
34
|
|
|
|
|
1146
|
|
|
4
|
34
|
|
|
34
|
|
196
|
use KinoSearch1::Util::ToolSet; |
|
|
34
|
|
|
|
|
91
|
|
|
|
34
|
|
|
|
|
4787
|
|
|
5
|
34
|
|
|
34
|
|
260
|
use base qw( KinoSearch1::Util::Class ); |
|
|
34
|
|
|
|
|
78
|
|
|
|
34
|
|
|
|
|
3282
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
BEGIN { |
|
8
|
34
|
|
|
34
|
|
425
|
__PACKAGE__->init_instance_vars( |
|
9
|
|
|
|
|
|
|
# constructor params / members |
|
10
|
|
|
|
|
|
|
invindex => undef, |
|
11
|
|
|
|
|
|
|
seg_name => undef, |
|
12
|
|
|
|
|
|
|
# members |
|
13
|
|
|
|
|
|
|
fdata_stream => undef, |
|
14
|
|
|
|
|
|
|
findex_stream => undef, |
|
15
|
|
|
|
|
|
|
); |
|
16
|
|
|
|
|
|
|
} |
|
17
|
34
|
|
|
34
|
|
384
|
use Compress::Zlib qw( compress ); |
|
|
34
|
|
|
|
|
321
|
|
|
|
34
|
|
|
|
|
23169
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub init_instance { |
|
20
|
62
|
|
|
62
|
1
|
131
|
my $self = shift; |
|
21
|
62
|
|
|
|
|
269
|
my $invindex = $self->{invindex}; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# open an index stream and a data stream. |
|
24
|
62
|
|
|
|
|
176
|
my $fdx_file = "$self->{seg_name}.fdx"; |
|
25
|
62
|
|
|
|
|
156
|
my $fdt_file = "$self->{seg_name}.fdt"; |
|
26
|
62
|
|
|
|
|
163
|
for ( $fdx_file, $fdt_file, ) { |
|
27
|
124
|
50
|
|
|
|
419
|
$invindex->delete_file($_) if $invindex->file_exists($_); |
|
28
|
|
|
|
|
|
|
} |
|
29
|
62
|
|
|
|
|
275
|
$self->{findex_stream} = $invindex->open_outstream($fdx_file); |
|
30
|
62
|
|
|
|
|
256
|
$self->{fdata_stream} = $invindex->open_outstream($fdt_file); |
|
31
|
|
|
|
|
|
|
} |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub add_doc { |
|
34
|
14017
|
|
|
14017
|
0
|
17342
|
my ( $self, $doc ) = @_; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# record the data stream's current file pointer in the index. |
|
37
|
14017
|
|
|
|
|
130775
|
$self->{findex_stream}->lu_write( 'Q', $self->{fdata_stream}->tell ); |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# only store fields marked as "stored" |
|
40
|
14017
|
|
|
|
|
50169
|
my @stored = sort { $a->get_field_num <=> $b->get_field_num } |
|
|
2890
|
|
|
|
|
7331
|
|
|
41
|
|
|
|
|
|
|
grep $_->get_stored, $doc->get_fields; |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# add the number of stored fields in the Doc |
|
44
|
14017
|
|
|
|
|
24282
|
my @to_write = ( scalar @stored ); |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# add flag bits and value for each stored field |
|
47
|
14017
|
|
|
|
|
22503
|
for (@stored) { |
|
48
|
15390
|
|
|
|
|
35986
|
push @to_write, ( $_->get_field_num, $_->get_fdt_bits ); |
|
49
|
15390
|
100
|
|
|
|
37440
|
push @to_write, $_->get_compressed |
|
50
|
|
|
|
|
|
|
? compress( $_->get_value ) |
|
51
|
|
|
|
|
|
|
: $_->get_value; |
|
52
|
15390
|
|
|
|
|
38567
|
push @to_write, $_->get_tv_string; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# write out data |
|
56
|
14017
|
|
|
|
|
32166
|
my $lu_template = 'V' . ( 'VaTT' x scalar @stored ); |
|
57
|
14017
|
|
|
|
|
91010
|
$self->{fdata_stream}->lu_write( $lu_template, @to_write ); |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub add_segment { |
|
61
|
16
|
|
|
16
|
0
|
32
|
my ( $self, $seg_reader, $doc_map, $field_num_map ) = @_; |
|
62
|
16
|
|
|
|
|
45
|
my ( $findex_stream, $fdata_stream ) |
|
63
|
16
|
|
|
|
|
31
|
= @{$self}{qw( findex_stream fdata_stream )}; |
|
64
|
16
|
|
|
|
|
60
|
my $fields_reader = $seg_reader->get_fields_reader; |
|
65
|
|
|
|
|
|
|
|
|
66
|
16
|
|
|
|
|
60
|
my $max = $seg_reader->max_doc; |
|
67
|
16
|
50
|
|
|
|
51
|
return unless $max; |
|
68
|
16
|
|
|
|
|
28
|
$max -= 1; |
|
69
|
16
|
|
|
|
|
39
|
for my $orig ( 0 .. $max ) { |
|
70
|
|
|
|
|
|
|
# if the doc isn't deleted, copy it to the new seg |
|
71
|
10138
|
50
|
|
|
|
34728
|
next unless defined $doc_map->get($orig); |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# write pointer |
|
74
|
10138
|
|
|
|
|
36671
|
$findex_stream->lu_write( 'Q', $fdata_stream->tell ); |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# retrieve all fields |
|
77
|
10138
|
|
|
|
|
32521
|
my ( $num_fields, $all_data ) = $fields_reader->fetch_raw($orig); |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# write number of fields |
|
80
|
10138
|
|
|
|
|
26367
|
$fdata_stream->lu_write( 'V', $num_fields ); |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# write data for each field |
|
83
|
10138
|
|
|
|
|
16629
|
for ( 1 .. $num_fields ) { |
|
84
|
10138
|
|
|
|
|
22947
|
my ( $field_num, @some_data ) = splice( @$all_data, 0, 4 ); |
|
85
|
10138
|
|
|
|
|
63747
|
$fdata_stream->lu_write( 'VaTT', $field_num_map->get($field_num), |
|
86
|
|
|
|
|
|
|
@some_data ); |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub finish { |
|
92
|
62
|
|
|
62
|
0
|
126
|
my $self = shift; |
|
93
|
62
|
|
|
|
|
737
|
$self->{fdata_stream}->close; |
|
94
|
62
|
|
|
|
|
1028
|
$self->{findex_stream}->close; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
1; |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
__END__ |