File Coverage

blib/lib/KinoSearch1/Index/FieldInfos.pm
Criterion Covered Total %
statement 114 118 96.6
branch 17 22 77.2
condition n/a
subroutine 25 26 96.1
pod 1 16 6.2
total 157 182 86.2


line stmt bran cond sub pod time code
1             package KinoSearch1::Index::FieldInfos;
2 34     34   6028 use strict;
  34         72  
  34         1276  
3 34     34   192 use warnings;
  34         71  
  34         1005  
4 34     34   179 use KinoSearch1::Util::ToolSet;
  34         67  
  34         5272  
5 34     34   200 use base qw( KinoSearch1::Util::Class Exporter );
  34         69  
  34         4415  
6              
7 34     34   299 use constant INDEXED => "\x01";
  34         69  
  34         2465  
8 34     34   182 use constant VECTORIZED => "\x02";
  34         86  
  34         2127  
9 34     34   211 use constant OMIT_NORMS => "\x10";
  34         150  
  34         3528  
10              
11             our @EXPORT_OK;
12              
13             BEGIN {
14 34     34   355 __PACKAGE__->init_instance_vars(
15             # members
16             by_name => undef,
17             by_num => undef,
18             from_file => 0,
19             );
20 34         277 __PACKAGE__->ready_get_set(qw( from_file ));
21              
22 34         773 @EXPORT_OK = qw(
23             INDEXED
24             VECTORIZED
25             OMIT_NORMS
26             );
27             }
28              
29 34     34   1259 use KinoSearch1::Document::Field;
  34         68  
  34         55828  
30              
31             sub init_instance {
32 260     260 1 388 my $self = shift;
33 260         772 $self->{by_name} = {};
34 260         789 $self->{by_num} = [];
35             }
36              
37             sub clone {
38 100     100 0 183 my $self = shift;
39 100         414 my $evil_twin = __PACKAGE__->new;
40 100         246 $evil_twin->{from_file} = $self->{from_file};
41 100         199 my @by_num;
42             my %by_name;
43 100         162 for my $finfo ( @{ $self->{by_num} } ) {
  100         273  
44 182         608 my $dupe = $finfo->clone;
45 182         346 push @by_num, $dupe;
46 182         627 $by_name{ $finfo->get_name } = $dupe;
47             }
48 100         255 $evil_twin->{by_num} = \@by_num;
49 100         226 $evil_twin->{by_name} = \%by_name;
50 100         781 return $evil_twin;
51             }
52              
53             # Add a user-supplied Field object to the collection.
54             sub add_field {
55 158     158 0 262 my ( $self, $field ) = @_;
56 158 50       541 croak("Not a KinoSearch1::Document::Field")
57             unless a_isa_b( $field, 'KinoSearch1::Document::Field' );
58              
59             # don't mod Field objects for segments that are read back in
60 158 50       502 croak("Can't update FieldInfos that were read in from file")
61             if $self->{from_file};
62              
63             # add the field
64 158         445 my $fieldname = $field->get_name;
65 158         435 $self->{by_name}{$fieldname} = $field;
66 158         428 $self->_assign_field_nums;
67             }
68              
69             # Return the number of fields in the segment.
70 208     208 0 274 sub size { scalar @{ $_[0]->{by_num} } }
  208         1281  
71              
72             # Return a list of the Field objects.
73 341     341 0 514 sub get_infos { @{ $_[0]->{by_num} } }
  341         1381  
74              
75             # Given a fieldname, return its number.
76             sub get_field_num {
77 2199     2199 0 3294 my ( $self, $name ) = @_;
78             return undef
79 2199 100       5909 unless exists $self->{by_name}{$name};
80 2153         6629 my $num = $self->{by_name}{$name}->get_field_num;
81 2153         5985 return $num;
82             }
83              
84             # Given a fieldname, return its FieldInfo.
85 1     1 0 7 sub info_by_name { $_[0]->{by_name}{ $_[1] } }
86              
87             # Given a field number, return its fieldInfo.
88 152     152 0 539 sub info_by_num { $_[0]->{by_num}[ $_[1] ] }
89              
90             # Given the field number (new, not original), return the name of the field.
91             sub field_name {
92 10     10 0 12 my ( $self, $num ) = @_;
93 10         25 my $name = $self->{by_num}[$num]->get_name;
94 10 50       21 croak("Don't know about field number $num")
95             unless defined $name;
96 10         20 return $name;
97             }
98              
99             # Sort all the fields lexically by name and assign ascending numbers.
100             sub _assign_field_nums {
101 173     173   244 my $self = shift;
102 173 50       479 confess("Can't _assign_field_nums when from_file") if $self->{from_file};
103              
104             # assign field nums according to lexical order of field names
105 173         796 @{ $self->{by_num} }
  1410         3204  
106 173         241 = sort { $a->get_name cmp $b->get_name } values %{ $self->{by_name} };
  173         662  
107 173         343 my $inc = 0;
108 173         240 $_->set_field_num( $inc++ ) for @{ $self->{by_num} };
  173         799  
109             }
110              
111             # Decode an existing .fnm file.
112             sub read_infos {
113 100     100 0 302 my ( $self, $instream ) = @_;
114 100         178 my ( $by_name, $by_num ) = @{$self}{qw( by_name by_num )};
  100         256  
115              
116             # set flag indicating that this FieldInfos object has been read in
117 100         225 $self->{from_file} = 1;
118              
119             # read in infos from stream
120 100         1170 my $num_fields = $instream->lu_read('V');
121 100         892 my @names_and_bits = $instream->lu_read( 'Ta' x $num_fields );
122 100         228 my $field_num = 0;
123 100         343 while ( $field_num < $num_fields ) {
124 247         811 my ( $name, $bits ) = splice( @names_and_bits, 0, 2 );
125 247 100       2000 my $info = KinoSearch1::Document::Field->new(
    100          
126             field_num => $field_num,
127             name => $name,
128             indexed => ( "$bits" & INDEXED ) eq INDEXED ? 1 : 0,
129             vectorized => ( "$bits" & VECTORIZED ) eq VECTORIZED ? 1 : 0,
130             fnm_bits => $bits,
131             );
132 247         590 $by_name->{$name} = $info;
133             # order of storage implies lexical order by name and field number
134 247         440 push @$by_num, $info;
135 247         778 $field_num++;
136             }
137             }
138              
139             # Write .fnm file.
140             sub write_infos {
141 64     64 0 152 my ( $self, $outstream ) = @_;
142              
143 64         133 $outstream->lu_write( 'V', scalar @{ $self->{by_num} } );
  64         346  
144 64         106 for my $finfo ( @{ $self->{by_num} } ) {
  64         191  
145 162         545 $outstream->lu_write( 'Ta', $finfo->get_name, $finfo->get_fnm_bits, );
146             }
147             }
148              
149             # Merge two FieldInfos objects, redefining fields as necessary and generating
150             # new field numbers.
151             sub consolidate {
152 15     15 0 48 my ( $self, @others ) = @_;
153 15         50 my $infos = $self->{by_name};
154              
155             # Make *this* finfos the master FieldInfos object
156 15         35 for my $other (@others) {
157 36         53 while ( my ( $name, $other_finfo ) = each %{ $other->{by_name} } ) {
  84         363  
158 48 100       133 if ( exists $infos->{$name} ) {
159 28         100 $infos->{$name} = $other_finfo->breed_with( $infos->{$name} );
160             }
161             else {
162 20         68 $infos->{$name} = $other_finfo->clone;
163             }
164             }
165             }
166              
167 15         60 $self->_assign_field_nums;
168             }
169              
170             # Generate a mapping of field numbers between two FieldInfos objects. Should
171             # be called by the superset.
172             sub generate_field_num_map {
173 16     16 0 42 my ( $self, $other ) = @_;
174 16         33 my $map = '';
175 16         23 for my $other_finfo ( @{ $other->{by_num} } ) {
  16         48  
176 18         67 my $orig_finfo = $self->{by_name}{ $other_finfo->get_name };
177 18         64 $map .= pack( 'I', $orig_finfo->get_field_num );
178             }
179 16         67 return KinoSearch1::Util::IntMap->new( \$map );
180             }
181              
182             sub encode_fnm_bits {
183 158     158 0 279 my ( undef, $field ) = @_;
184 158         243 my $bits = "\0";
185 158         309 for ($bits) {
186 158 100       483 $_ |= INDEXED if $field->get_indexed;
187 158 100       509 $_ |= VECTORIZED if $field->get_vectorized;
188 158 50       505 $_ |= OMIT_NORMS if $field->get_omit_norms;
189             }
190 158         727 return $bits;
191             }
192              
193             sub decode_fnm_bits {
194 0     0 0 0 my ( undef, $field, $bits ) = @_;
195 0         0 $field->set_indexed( ( $bits & INDEXED ) eq INDEXED );
196 0         0 $field->set_vectorized( ( $bits & VECTORIZED ) eq VECTORIZED );
197 0         0 $field->set_omit_norms( ( $bits & OMIT_NORMS ) eq OMIT_NORMS );
198             }
199              
200 39     39 0 83 sub close { }
201              
202             1;
203              
204             __END__