File Coverage

blib/lib/LucyX/Index/ZlibDocReader.pm
Criterion Covered Total %
statement 75 76 98.6
branch 9 16 56.2
condition n/a
subroutine 11 11 100.0
pod 2 4 50.0
total 97 107 90.6


line stmt bran cond sub pod time code
1             # Licensed to the Apache Software Foundation (ASF) under one or more
2             # contributor license agreements. See the NOTICE file distributed with
3             # this work for additional information regarding copyright ownership.
4             # The ASF licenses this file to You under the Apache License, Version 2.0
5             # (the "License"); you may not use this file except in compliance with
6             # the License. You may obtain a copy of the License at
7             #
8             # http://www.apache.org/licenses/LICENSE-2.0
9             #
10             # Unless required by applicable law or agreed to in writing, software
11             # distributed under the License is distributed on an "AS IS" BASIS,
12             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13             # See the License for the specific language governing permissions and
14             # limitations under the License.
15              
16 2     2   1178 use strict;
  2         2  
  2         55  
17 2     2   8 use warnings;
  2         2  
  2         65  
18              
19             package LucyX::Index::ZlibDocReader;
20 2     2   7 use base qw( Lucy::Index::DocReader );
  2         2  
  2         664  
21             our $VERSION = '0.006000_002';
22             $VERSION = eval $VERSION;
23 2     2   8 use Lucy::Util::StringHelper qw( utf8_valid utf8_flag_on );
  2         2  
  2         75  
24 2     2   6 use Compress::Zlib qw( uncompress );
  2         3  
  2         84  
25 2     2   9 use Carp;
  2         2  
  2         1115  
26              
27             # Inside-out member vars.
28             our %ix_in;
29             our %dat_in;
30             our %binary_fields;
31              
32             sub new {
33 6     6 1 509 my ( $either, %args ) = @_;
34 6         58 my $self = $either->SUPER::new(%args);
35              
36             # Validate metadata. Only open streams if the segment has data we
37             # recognize.
38 6         153 my $segment = $self->get_segment;
39 6         30 my $metadata = $segment->fetch_metadata("zdocs");
40 6 50       14 if ($metadata) {
41 6 50       16 if ( $metadata->{format} != 1 ) {
42 0         0 confess("Unrecognized format: '$metadata->{format}'");
43             }
44              
45             # Open InStreams.
46 6         18 my $dat_filename = $segment->get_name . "/zdocs.dat";
47 6         13 my $ix_filename = $segment->get_name . "/zdocs.ix";
48 6         13 my $folder = $self->get_folder;
49 6 50       69 $ix_in{$$self} = $folder->open_in($ix_filename)
50             or confess Clownfish->error;
51 6 50       39 $dat_in{$$self} = $folder->open_in($dat_filename)
52             or confess Clownfish->error;
53              
54             # Remember which fields are binary.
55 6         10 my $schema = $self->get_schema;
56 6         8 my $bin_fields = $binary_fields{$$self} = {};
57             $bin_fields->{$_} = 1
58 6         6 for grep { $schema->fetch_type($_)->binary }
  24         91  
59 6         29 @{ $schema->all_fields };
60             }
61              
62 6         22 return $self;
63             }
64              
65             sub fetch_doc {
66 3     3 1 204 my ( $self, $doc_id ) = @_;
67 3         5 my $dat_in = $dat_in{$$self};
68 3         6 my $ix_in = $ix_in{$$self};
69 3         4 my $bin_fields = $binary_fields{$$self};
70              
71             # Bail if no data in segment.
72 3 50       9 return unless $ix_in;
73              
74             # Read index information.
75 3         13 $ix_in->seek( $doc_id * 8 );
76 3         13 my $start = $ix_in->read_i64;
77 3         8 my $len = $ix_in->read_i64 - $start;
78 3         5 my $compressed;
79              
80             # Read main data.
81 3         7 $dat_in->seek($start);
82 3         11 $dat_in->read( $compressed, $len );
83 3         8 my $inflated = uncompress($compressed);
84 3         144 my $num_fields = unpack( "w", $inflated );
85 3         4 my $pack_template = "w ";
86 3         7 $pack_template .= "w/a*" x ( $num_fields * 2 );
87 3         16 my ( undef, %fields ) = unpack( $pack_template, $inflated );
88              
89             # Turn on UTF-8 flag for string fields.
90 3         10 for my $field ( keys %fields ) {
91 9 100       18 next if $bin_fields->{$field};
92 6         7 utf8_flag_on( $fields{$field} );
93             confess("Invalid UTF-8 read for doc $doc_id field '$field'")
94 6 50       15 unless utf8_valid( $fields{$field} );
95             }
96              
97 3         34 return Lucy::Document::HitDoc->new(
98             fields => \%fields,
99             doc_id => $doc_id,
100             );
101             }
102              
103             sub read_record {
104 21     21 0 14 my ( $self, $doc_id, $buf ) = @_;
105 21         18 my $dat_in = $dat_in{$$self};
106 21         15 my $ix_in = $ix_in{$$self};
107 21         15 my $bin_fields = $binary_fields{$$self};
108              
109 21 50       28 if ($ix_in) {
110 21         32 $ix_in->seek( $doc_id * 8 );
111 21         28 my $start = $ix_in->read_i64;
112 21         27 my $len = $ix_in->read_i64 - $start;
113 21         27 $dat_in->seek($start);
114 21         43 $dat_in->read( $$buf, $len );
115             }
116             }
117              
118             sub close {
119 6     6 0 7 my $self = shift;
120 6         15 delete $ix_in{$$self};
121 6         8 delete $dat_in{$$self};
122 6         32 delete $binary_fields{$$self};
123             }
124              
125             sub DESTROY {
126 6     6   263 my $self = shift;
127 6         13 delete $ix_in{$$self};
128 6         7 delete $dat_in{$$self};
129 6         6 delete $binary_fields{$$self};
130 6         645 $self->SUPER::DESTROY;
131             }
132              
133             1;
134              
135             __END__