File Coverage

blib/lib/Data/Embed/Reader.pm
Criterion Covered Total %
statement 120 129 93.0
branch 23 36 63.8
condition 3 5 60.0
subroutine 16 20 80.0
pod 3 3 100.0
total 165 193 85.4


line stmt bran cond sub pod time code
1             package Data::Embed::Reader;
2             {
3             $Data::Embed::Reader::VERSION = '0.2_03';
4             }
5              
6             # ABSTRACT: embed arbitrary data in a file - reader class
7              
8 8     8   2910 use strict;
  8         10  
  8         242  
9 8     8   28 use warnings;
  8         9  
  8         180  
10 8     8   27 use English qw< -no_match_vars >;
  8         9  
  8         32  
11 8     8   2785 use Fcntl qw< :seek >;
  8         12  
  8         916  
12 8     8   39 use Log::Log4perl::Tiny qw< :easy :dead_if_first >;
  8         11  
  8         64  
13 8     8   7091 use Storable qw< dclone >;
  8         21692  
  8         587  
14 8     8   4803 use Data::Embed::File;
  8         21  
  8         242  
15 8     8   4557 use Data::Embed::Util qw< :constants unescape >;
  8         16  
  8         9676  
16              
17              
18             sub new {
19 5     5 1 7 my $package = shift;
20 5         8 my $input = shift;
21              
22             # Undocumented, keep additional parameters around...
23 5 50 33     19 my %args = (scalar(@_) && ref($_[0])) ? %{$_[0]} : @_;
  0         0  
24 5         21 my $self = bless {args => \%args}, $package;
25              
26             # If a GLOB, just assign a default filename for logs and set
27             # binary mode :raw
28 5 100       12 if (ref($input) eq 'GLOB') {
29 4         9 DEBUG $package, ': input is a GLOB';
30 4         44 $self->{filename} = '';
31 4 50       21 binmode $input, ":raw"
32             or LOGCROAK "binmode() to ':raw' failed";
33 4         7 $self->{fh} = $input;
34             }
35             else { # otherwise... it's a filename
36 1         3 DEBUG $package, ': input is a file or other thing that can be open-ed';
37 1         13 $self->{filename} = $input;
38 1 50       26 open $self->{fh}, '<:raw', $input
39             or LOGCROAK "open('$input'): $OS_ERROR";
40             }
41              
42 5         13 return $self;
43             } ## end sub new
44              
45              
46             sub files {
47 1     1 1 3 my $files = shift->_ensure_index()->{files};
48 1 50       12 return wantarray() ? @$files : $files;
49             }
50              
51              
52             sub reset {
53 0     0 1 0 my $self = shift;
54 0         0 delete $self->{$_} for qw< files index >;
55 0         0 return $self;
56             } ## end sub reset
57              
58              
59             ######## PRIVATE METHODS ##############################################
60              
61             # Get the index of the embedded files as a L object.
62             # You should normally not need this index, because it is parsed
63 4     4   14 sub _index { return shift->_ensure_index()->{_index}; }
64              
65             sub _ensure_index {
66 5     5   6 my $self = shift;
67              
68             # rebuild cache if not in place
69 5 50       13 if (!exists $self->{files}) {
70 5   100     9 my $index = $self->_load_index()
71             || {
72             files => [],
73             _index => Data::Embed::File->new(
74             fh => $self->{fh},
75             filename => $self->{filename},
76             name => 'Data::Embed index',
77             length => 0,
78             offset => scalar(__size($self->{fh})),
79             ),
80             };
81 5         41 %$self = (%$self, %$index);
82             } ## end if (!exists $self->{files...})
83              
84             # return a reference to $self, for easy chaining
85 5         15 return $self;
86             } ## end sub _ensure_index
87              
88             sub _load_index {
89 5     5   5 my $self = shift;
90              
91             # read the index section from the end of the file, or bail out
92 5 100       14 defined(my $index_text = $self->_read_index())
93             or return;
94 2         3 my $index_length = length($index_text);
95              
96             # trim to isolate the data in the section
97 2         2 my $terminator_length = length TERMINATOR();
98 2         2 substr $index_text, 0, length(STARTER()), '';
99 2         6 substr $index_text, -$terminator_length, $terminator_length, '';
100 2         6 DEBUG "index contents is '$index_text'";
101              
102             # iterate over the index that has been read. Each line in this
103             # index is assumed to contain a pair length/name
104 2         13 my $data_length = 0;
105 2         2 my ($fh, $filename) = @{$self}{qw< fh filename >};
  2         4  
106 3 50       13 my @files = map {
107 2         12 my ($length, $name) = m{\A \s* (\d+) \s+ (\S*) \s*\z}mxs
108             or LOGCROAK "index line is not compliant: >$_<";
109 3         7 $name = Data::Embed::Util::unescape($name);
110              
111             # the offset at which "this" file lives is equal to the length
112             # of all data considered so far
113 3         4 my $offset = $data_length;
114              
115             # the addition of this file increases the data length with the
116             # size of the section, plus two bytes for separating newlines
117 3         4 $data_length += $length + 2;
118             {
119 3         11 fh => $fh,
120             filename => $filename,
121             name => $name,
122             length => $length,
123             offset => $offset, # to be adjusted further
124             };
125             } split /\n+/, $index_text;
126              
127             # Now we established the full length of the data section, so it's
128             # possible to adjust all offsets for all files (remember that the
129             # files are assumed to be at the end of the embedding file)
130 2         5 my $full_length = __size($fh);
131 2         3 my $offset_correction = $full_length - $index_length - $data_length;
132 2         3 for my $file (@files) {
133 3         17 $file =
134             Data::Embed::File->new(%$file,
135             offset => ($file->{offset} + $offset_correction),);
136             }
137              
138             # return the files in the index and the index itself, all as
139             # Data::Embed::File objects for consistency
140             return {
141 2         7 files => \@files,
142             _index => Data::Embed::File->new(
143             fh => $fh,
144             filename => $filename,
145             name => 'Data::Embed index',
146             length => $index_length,
147             offset => $data_length + $offset_correction,
148             ),
149             };
150             } ## end sub _load_index
151              
152             sub __size {
153 10     10   12 my $fh = shift;
154 10         48 my $size = -s $fh;
155 10 100       24 if (! defined $size) {
156 2         4 DEBUG "getting size via seek";
157 2         15 my $current = tell $fh;
158 2         2 seek $fh, 0, SEEK_END;
159 2         3 $size = tell $fh;
160 2         4 DEBUG "size: $size";
161 2         13 seek $fh, $current, SEEK_SET;
162             }
163 10         29 return $size;
164             }
165              
166             # read the last section of the file, looking for the index
167             sub _read_index {
168 5     5   5 my $self = shift;
169 5         6 my ($fh, $filename) = @{$self}{qw< fh filename >};
  5         11  
170 5         21 DEBUG "_read_index(): fh[$fh] filename[$filename]";
171 5         68 my $full_length = __size($fh); # length of the whole stream/file
172              
173             # look for TERMINATOR at the very end of the file
174 5         13 my $terminator = TERMINATOR;
175              
176             # is there enough data?
177 5         9 my $terminator_length = length $terminator;
178 5 100       30 return unless $full_length > $terminator_length;
179              
180             # read exactly that number of bytes from the end of the file
181             # and compare with the TERMINATOR
182 2         5 my $ending = $self->_read(($terminator_length) x 2);
183 2 50       5 return unless $ending eq $terminator;
184 2         3 DEBUG "found terminator";
185              
186             # TERMINATOR is in place, this is promising. Now let's look for
187             # STARTER going backwards in the file
188 2         13 my $starter = STARTER;
189 2         3 my $readable = $full_length - $terminator_length;
190 2         1 my $chunk_size = 80; # we'll read this number of bytes per time
191 2         2 my $starter_position; # this will tell us where the STARTER begins
192 2         4 while ($readable) { # loop until there's stuff to read backwards
193              
194             # how many bytes to read? $chunk_size if possible, what remains
195             # otherwise
196 2 100       4 my $n = ($readable > $chunk_size) ? $chunk_size : $readable;
197 2         5 my $chunk = $self->_read($n, $n + length $ending);
198              
199             # we're reading backwards, so the new $chunk as to be pre-pended
200 2         3 $ending = $chunk . $ending;
201 2     0   7 TRACE sub { "ENDING: >$ending<" };
  0         0  
202              
203             # Look for the STARTER. We have to work on the full $ending
204             # instead of the shorter last $chunk because the STARTER might
205             # have been split across two reads
206 2         16 $starter_position = CORE::index $ending, $starter;
207              
208             # finding the STARTER is a good exit condition
209 2 50       5 last if $starter_position >= 0;
210              
211             # otherwise note that we already read some bytes and go on
212 0         0 $readable -= $n;
213             } ## end while ($readable)
214              
215             # if $starter_position is not valid (i.e. -1) then we did not find
216             # the STARTER and we exit with a failure (not an exception, the whole
217             # thing might not be in place at all, although the presence of the
218             # TERMINATOR is suspect anyway...)
219 2 50       3 return unless $starter_position >= 0;
220 2         4 DEBUG "found starter";
221              
222             # trim the available buffer $ending to isolate the index and return it
223 2         14 substr $ending, 0, $starter_position, '';
224 2         6 return $ending;
225             } ## end sub _read_index
226              
227             # read data from the underlying stream, using offsets from the end
228             # of the stream
229             sub _read {
230 4     4   5 my $self = shift;
231 4         6 my @args = my ($count, $offset_from_end) = @_;
232 4         4 my ($fh, $filename) = @{$self}{qw< fh filename >};
  4         9  
233             DEBUG
234 4     0   16 sub { my $args = join ', ', @args; "_read($args) [file: $filename]" };
  0         0  
  0         0  
235              
236 4 50       38 LOGDIE '_read(): offset from end cannot be less than count'
237             if $offset_from_end < $count;
238 4         10 DEBUG "seeking $offset_from_end to the end";
239 4 50       34 seek $fh, -$offset_from_end, SEEK_END
240             or LOGCROAK "seek('$filename'): $OS_ERROR";
241              
242 4         4 my $buffer = '';
243 4         6 while ($count) {
244 4         3 my $chunk;
245 4 50       29 defined(my $nread = read $fh, $chunk, $count)
246             or LOGCROAK "read('$filename'): $OS_ERROR";
247 4     0   14 TRACE sub { "read $nread bytes, '$chunk'" };
  0         0  
248 4         42 DEBUG "read $nread out of $count bytes needed";
249 4 50       32 LOGCROAK "unexpectedly reached end of file"
250             unless $nread;
251 4         5 $buffer .= $chunk;
252 4         7 $count -= $nread;
253             } ## end while ($count)
254 4         7 return $buffer;
255             } ## end sub _read
256              
257             1;
258              
259             __END__