File Coverage

blib/lib/KinoSearch1/Index/FieldsReader.pm
Criterion Covered Total %
statement 68 72 94.4
branch 14 14 100.0
condition n/a
subroutine 17 18 94.4
pod 1 7 14.2
total 100 111 90.0


line stmt bran cond sub pod time code
1             package KinoSearch1::Index::FieldsReader;
2 34     34   187 use strict;
  34         143  
  34         1262  
3 34     34   191 use warnings;
  34         91  
  34         879  
4 34     34   176 use KinoSearch1::Util::ToolSet;
  34         1844  
  34         4906  
5 34     34   237 use base qw( KinoSearch1::Util::Class Exporter );
  34         1764  
  34         4014  
6              
7 34     34   182 use constant ANALYZED => "\x01";
  34         63  
  34         2594  
8 34     34   349 use constant BINARY => "\x02";
  34         79  
  34         1746  
9 34     34   175 use constant COMPRESSED => "\x04";
  34         174  
  34         3143  
10              
11             our @EXPORT_OK;
12              
13             BEGIN {
14 34     34   120 @EXPORT_OK = qw( ANALYZED BINARY COMPRESSED );
15 34         299 __PACKAGE__->init_instance_vars(
16             # constructor params / members
17             finfos => undef,
18             fdata_stream => undef,
19             findex_stream => undef,
20             # members
21             size => undef,
22             );
23              
24             }
25              
26 34     34   43518 use Compress::Zlib qw( uncompress );
  34         3133959  
  34         3826  
27 34     34   8591 use KinoSearch1::Document::Field;
  34         104  
  34         1313  
28 34     34   10781 use KinoSearch1::Document::Doc;
  34         96  
  34         25500  
29              
30             sub init_instance {
31 95     95 1 164 my $self = shift;
32              
33             # derive the number of documents in the segment
34 95         769 $self->{size} = $self->{findex_stream}->length / 8;
35             }
36              
37             # Return number of documents in segment.
38 1080     1080 0 20189 sub get_size { $_[0]->{size} }
39              
40             # Retrieve raw field data from files. Either the data will be turned into
41             # full-on Field and Doc objects by fetch_doc, or it will be passed on mostly
42             # intact when merging segments (field numbers will be modified).
43             sub fetch_raw {
44 10187     10187 0 13996 my ( $self, $doc_num ) = @_;
45 10187         17431 my ( $findex_stream, $fdata_stream )
46 10187         11130 = @{$self}{ 'findex_stream', 'fdata_stream' };
47              
48             # get data file pointer from index
49 10187         50192 $findex_stream->seek( $doc_num * 8 );
50 10187         133997 my $start = $findex_stream->lu_read('Q');
51              
52             # retrieve one doc's worth of field data
53 10187         46691 $fdata_stream->seek($start);
54 10187         91656 my $num_fields = $fdata_stream->lu_read('V');
55 10187         21048 my $template = 'VaTT' x $num_fields;
56 10187         43396 my @raw = $fdata_stream->lu_read($template);
57 10187         35004 return ( $num_fields, \@raw );
58             }
59              
60             # Given a doc_num, rebuild a Doc object from the fields that were
61             # stored.
62             sub fetch_doc {
63 49     49 0 89 my ( $self, $doc_num ) = @_;
64 49         100 my $finfos = $self->{finfos};
65              
66             # start a new Doc object, read in data
67 49         326 my $doc = KinoSearch1::Document::Doc->new;
68 49         173 my ( $num_fields, $data ) = $self->fetch_raw($doc_num);
69              
70             # docode stored data and build up the Doc object Field by Field.
71 49         147 for ( 1 .. $num_fields ) {
72 126         316 my ( $field_num, $bits, $string, $tv_string )
73             = splice( @$data, 0, 4 );
74              
75             # decode fnm bits
76 126 100       521 my $analyzed = ( $bits & ANALYZED ) eq ANALYZED ? 1 : 0;
77 126 100       268 my $binary = ( $bits & BINARY ) eq BINARY ? 1 : 0;
78 126 100       261 my $compressed = ( $bits & COMPRESSED ) eq COMPRESSED ? 1 : 0;
79              
80             # create a field object, merging in the FieldInfo data, and add it
81 126         435 my $finfo = $finfos->info_by_num($field_num);
82 126 100       1171 my $field = KinoSearch1::Document::Field->new(
83             %$finfo,
84             field_num => $field_num,
85             analyzed => $analyzed,
86             binary => $binary,
87             compressed => $compressed,
88             fdt_bits => $bits,
89             value => $compressed ? uncompress($string) : $string,
90             tv_string => $tv_string,
91             );
92 126         694 $doc->add_field($field);
93             }
94              
95 49         275 return $doc;
96             }
97              
98             sub decode_fdt_bits {
99 0     0 0 0 my ( undef, $field, $bits ) = @_;
100 0         0 $field->set_analyzed( ( $bits & ANALYZED ) eq ANALYZED );
101 0         0 $field->set_binary( ( $bits & BINARY ) eq BINARY );
102 0         0 $field->set_compressed( ( $bits & COMPRESSED ) eq COMPRESSED );
103             }
104              
105             sub encode_fdt_bits {
106 128     128 0 254 my ( undef, $field ) = @_;
107 128         236 my $bits = "\0";
108 128         262 for ($bits) {
109 128 100       497 $_ |= ANALYZED if $field->get_analyzed;
110 128 100       421 $_ |= BINARY if $field->get_binary;
111 128 100       435 $_ |= COMPRESSED if $field->get_compressed;
112             }
113 128         774 return $bits;
114             }
115              
116             sub close {
117 39     39 0 61 my $self = shift;
118 39         154 $self->{findex_stream}->close;
119 39         131 $self->{fdata_stream}->close;
120             }
121              
122             1;
123              
124             __END__