File Coverage

blib/lib/LucyX/Index/ZlibDocReader.pm
Criterion Covered Total %
statement 75 76 98.6
branch 9 16 56.2
condition n/a
subroutine 11 11 100.0
pod 2 4 50.0
total 97 107 90.6


line stmt bran cond sub pod time code
1             # Licensed to the Apache Software Foundation (ASF) under one or more
2             # contributor license agreements. See the NOTICE file distributed with
3             # this work for additional information regarding copyright ownership.
4             # The ASF licenses this file to You under the Apache License, Version 2.0
5             # (the "License"); you may not use this file except in compliance with
6             # the License. You may obtain a copy of the License at
7             #
8             # http://www.apache.org/licenses/LICENSE-2.0
9             #
10             # Unless required by applicable law or agreed to in writing, software
11             # distributed under the License is distributed on an "AS IS" BASIS,
12             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13             # See the License for the specific language governing permissions and
14             # limitations under the License.
15              
16 2     2   1178 use strict;
  2         2  
  2         47  
17 2     2   6 use warnings;
  2         2  
  2         61  
18              
19             package LucyX::Index::ZlibDocReader;
20 2     2   7 use base qw( Lucy::Index::DocReader );
  2         2  
  2         666  
21             our $VERSION = '0.006000_001';
22             $VERSION = eval $VERSION;
23 2     2   8 use Lucy::Util::StringHelper qw( utf8_valid utf8_flag_on );
  2         2  
  2         80  
24 2     2   38 use Compress::Zlib qw( uncompress );
  2         2  
  2         74  
25 2     2   7 use Carp;
  2         3  
  2         1109  
26              
27             # Inside-out member vars.
28             our %ix_in;
29             our %dat_in;
30             our %binary_fields;
31              
32             sub new {
33 6     6 1 495 my ( $either, %args ) = @_;
34 6         58 my $self = $either->SUPER::new(%args);
35              
36             # Validate metadata. Only open streams if the segment has data we
37             # recognize.
38 6         176 my $segment = $self->get_segment;
39 6         33 my $metadata = $segment->fetch_metadata("zdocs");
40 6 50       13 if ($metadata) {
41 6 50       18 if ( $metadata->{format} != 1 ) {
42 0         0 confess("Unrecognized format: '$metadata->{format}'");
43             }
44              
45             # Open InStreams.
46 6         19 my $dat_filename = $segment->get_name . "/zdocs.dat";
47 6         13 my $ix_filename = $segment->get_name . "/zdocs.ix";
48 6         15 my $folder = $self->get_folder;
49 6 50       69 $ix_in{$$self} = $folder->open_in($ix_filename)
50             or confess Clownfish->error;
51 6 50       39 $dat_in{$$self} = $folder->open_in($dat_filename)
52             or confess Clownfish->error;
53              
54             # Remember which fields are binary.
55 6         17 my $schema = $self->get_schema;
56 6         9 my $bin_fields = $binary_fields{$$self} = {};
57             $bin_fields->{$_} = 1
58 6         6 for grep { $schema->fetch_type($_)->binary }
  24         102  
59 6         32 @{ $schema->all_fields };
60             }
61              
62 6         22 return $self;
63             }
64              
65             sub fetch_doc {
66 3     3 1 204 my ( $self, $doc_id ) = @_;
67 3         5 my $dat_in = $dat_in{$$self};
68 3         4 my $ix_in = $ix_in{$$self};
69 3         3 my $bin_fields = $binary_fields{$$self};
70              
71             # Bail if no data in segment.
72 3 50       7 return unless $ix_in;
73              
74             # Read index information.
75 3         12 $ix_in->seek( $doc_id * 8 );
76 3         9 my $start = $ix_in->read_i64;
77 3         7 my $len = $ix_in->read_i64 - $start;
78 3         4 my $compressed;
79              
80             # Read main data.
81 3         4 $dat_in->seek($start);
82 3         7 $dat_in->read( $compressed, $len );
83 3         7 my $inflated = uncompress($compressed);
84 3         136 my $num_fields = unpack( "w", $inflated );
85 3         3 my $pack_template = "w ";
86 3         8 $pack_template .= "w/a*" x ( $num_fields * 2 );
87 3         16 my ( undef, %fields ) = unpack( $pack_template, $inflated );
88              
89             # Turn on UTF-8 flag for string fields.
90 3         10 for my $field ( keys %fields ) {
91 9 100       17 next if $bin_fields->{$field};
92 6         9 utf8_flag_on( $fields{$field} );
93             confess("Invalid UTF-8 read for doc $doc_id field '$field'")
94 6 50       14 unless utf8_valid( $fields{$field} );
95             }
96              
97 3         36 return Lucy::Document::HitDoc->new(
98             fields => \%fields,
99             doc_id => $doc_id,
100             );
101             }
102              
103             sub read_record {
104 21     21 0 17 my ( $self, $doc_id, $buf ) = @_;
105 21         17 my $dat_in = $dat_in{$$self};
106 21         14 my $ix_in = $ix_in{$$self};
107 21         18 my $bin_fields = $binary_fields{$$self};
108              
109 21 50       24 if ($ix_in) {
110 21         34 $ix_in->seek( $doc_id * 8 );
111 21         32 my $start = $ix_in->read_i64;
112 21         33 my $len = $ix_in->read_i64 - $start;
113 21         23 $dat_in->seek($start);
114 21         45 $dat_in->read( $$buf, $len );
115             }
116             }
117              
118             sub close {
119 6     6 0 7 my $self = shift;
120 6         14 delete $ix_in{$$self};
121 6         9 delete $dat_in{$$self};
122 6         31 delete $binary_fields{$$self};
123             }
124              
125             sub DESTROY {
126 6     6   267 my $self = shift;
127 6         13 delete $ix_in{$$self};
128 6         8 delete $dat_in{$$self};
129 6         6 delete $binary_fields{$$self};
130 6         690 $self->SUPER::DESTROY;
131             }
132              
133             1;
134              
135             __END__