File Coverage

blib/lib/File/Properties/Compressed.pm
Criterion Covered Total %
statement 79 107 73.8
branch 17 40 42.5
condition 7 24 29.1
subroutine 17 20 85.0
pod 5 5 100.0
total 125 196 63.7


line stmt bran cond sub pod time code
1             # ----------------------------------------------------------------------------
2             #
3             # This module supports computing and caching of file SHA-2 digests and
4             # mime types. Files compressed using bzip2 and gzip are uncompressed,
5             # and their content SHA digests and mime types are also accessible.
6             #
7             # Copyright © 2010,2011 Brendt Wohlberg
8             # See distribution LICENSE file for license details.
9             #
10             # Most recent modification: 18 December 2011
11             #
12             # ----------------------------------------------------------------------------
13              
14             package File::Properties::Compressed;
15             our $VERSION = 0.02;
16              
17 3     3   3994 use File::Properties::Regular;
  3         11  
  3         126  
18 3     3   21 use base qw(File::Properties::Regular);
  3         6  
  3         351  
19              
20             require 5.005;
21 3     3   18 use strict;
  3         17  
  3         109  
22 3     3   17 use warnings;
  3         5  
  3         134  
23 3     3   18 use Error qw(:try);
  3         6  
  3         28  
24 3     3   794 use IO::File;
  3         7  
  3         571  
25 3     3   4591 use File::Temp;
  3         60105  
  3         390  
26 3     3   3101 use Compress::Bzip2 qw(bzopen BZ_OK BZ_STREAM_END);
  3         49300  
  3         622  
27 3     3   18309 use Compress::Zlib qw(gzopen Z_OK Z_STREAM_END);
  3         549145  
  3         8463  
28              
29             our $UncompressBufferSize = 1048576;
30             our $CacheTableName = 'CompFileCache';
31             our $CacheTableCols = ['FileDigest TEXT','ContentMimeType TEXT',
32             'ContentDigest TEXT'];
33              
34              
35             # ----------------------------------------------------------------------------
36             # Initialiser
37             # ----------------------------------------------------------------------------
38             sub _init {
39 2     2   7 my $self = shift;
40 2         9 my $path = shift; # File path
41 2         8 my $fpcr = shift; # File::Properties::Cache reference
42              
43             # Initialisation for base
44 2         20 $self->SUPER::_init($path, $fpcr);
45             # Remainder of initialisation only required if file is a regular
46             # file that is compressed
47 2 50 33     34 if ($self->isreg and $self->iscompressed) {
48 2         4 my $cent;
49             ## If File::Properties::Cache reference has been specified, try to
50             ## retrieve compressed file cache entry for this file. If
51             ## retrieval fails, compute the relevant properties for this file
52             ## and store them in the cache.
53 2 100 66     241 if (defined $fpcr and ($cent = $fpcr->cretrieve($CacheTableName,
54             {'FileDigest' => $self->SUPER::digest}))) {
55 1         7 $self->cmimetype($cent->{'ContentMimeType'});
56 1         5 $self->cdigest($cent->{'ContentDigest'});
57             # Set flag indicating that this entry was obtained from the cache
58 1         4 $self->_fromcache($CacheTableName, 1);
59             } else {
60 1         7 my $fhnd = $self->cfilehandle;
61 1         10 $self->cmimetype(File::Properties::Regular::_mimetype($fhnd->filename));
62 1         8 $self->cdigest(File::Properties::Regular::_digest($fhnd->filename));
63             # Set flag indicating that this entry was not obtained from the cache
64 1         12 $self->_fromcache($CacheTableName, 0);
65 1 50       6 if (defined $fpcr) {
66 1         11 my $row = {'FileDigest' => $self->SUPER::digest,
67             'ContentMimeType' => $self->cmimetype,
68             'ContentDigest' => $self->cdigest};
69 1         10 $fpcr->cinsert($CacheTableName, $row);
70             }
71             }
72             }
73             }
74              
75              
76             # ----------------------------------------------------------------------------
77             # Get (or set) mime type of compressed file content
78             # ----------------------------------------------------------------------------
79             sub cmimetype {
80 3     3 1 1186 my $self = shift;
81              
82 3 100       18 $self->{'cmtp'} = shift if (@_);
83 3 50       18 return (defined $self->{'cmtp'})?$self->{'cmtp'}:$self->mimetype;
84             }
85              
86              
87             # ----------------------------------------------------------------------------
88             # Get (or set) digest string of compressed file content
89             # ----------------------------------------------------------------------------
90             sub cdigest {
91 3     3 1 28 my $self = shift;
92              
93 3 100       17 $self->{'cdgs'} = shift if (@_);
94 3 50       26 return (defined $self->{'cdgs'})?$self->{'cdgs'}:$self->digest;
95             }
96              
97              
98             # ----------------------------------------------------------------------------
99             # Get file handle to file, or to file containing uncompressed content
100             # if file is compressed.
101             # ----------------------------------------------------------------------------
102             sub cfilehandle {
103 1     1 1 3 my $self = shift;
104              
105             ## If a file handle for the file (or temporary file containing
106             ## uncompressed file data, if it is a compressed file) is not
107             ## stored, create one. The assumption is that this file handle will
108             ## only need to be created and accessed on the initial pass through
109             ## the class hierarchy for a specific file, before it has been
110             ## cached. Any class derived from File::Properties::Compressed
111             ## should cache all information that has to be computed from the
112             ## file so that subsequent object constructions for the file can be
113             ## done purely from the cache, without need for additional
114             ## uncompressing of the data.
115 1 50       5 if (not defined $self->{'cfhn'}) {
116             # Open and store a file handle for the file, to the file itself if
117             # it is not compressed, or to a temporary file containing
118             # uncompressed file data if it is
119 1 50       142 $self->{'cfhn'} = ($self->iscompressed)?
120             $self->_tmpunzip:
121             IO::File->new($self->path, 'r');
122 1 50 33     18 throw File::Properties::Error("Error opening file handle")
123             if (not (defined $self->{'cfhn'} and $self->{'cfhn'}->opened));
124             }
125 1         17 return $self->{'cfhn'};
126             }
127              
128              
129             # ----------------------------------------------------------------------------
130             # Determine whether file properties represent a compressed file
131             # ----------------------------------------------------------------------------
132             sub iscompressed {
133 3     3 1 7 my $self = shift;
134              
135 3   33     37 return ((defined $self->SUPER::mimetype) and
136             ($self->SUPER::mimetype eq 'application/x-bzip2' or
137             $self->SUPER::mimetype eq 'application/x-gzip'));
138             }
139              
140              
141             # ----------------------------------------------------------------------------
142             # Construct string representing properties hash
143             # ----------------------------------------------------------------------------
144             sub string {
145 0     0 1 0 my $self = shift;
146 0         0 my $levl = shift;
147              
148 0 0       0 $levl = 0 if (!defined $levl);
149 0         0 my $lpfx = ' ' x (2*$levl);
150 0         0 my $s = $self->SUPER::string($levl);
151 0 0       0 if ($self->iscompressed) {
152 0         0 $s .= $lpfx . " Content Mime Type: ".$self->cmimetype."\n";
153 0         0 $s .= $lpfx . " Content Digest: ".substr($self->cdigest,0,40)."...\n";
154             }
155 0         0 return $s;
156             }
157              
158              
159             # ----------------------------------------------------------------------------
160             # Initialise cache table for File::Properties::Compressed objects
161             # ----------------------------------------------------------------------------
162             sub _cacheinit {
163 1     1   4 my $self = shift;
164 1         3 my $fpcr = shift; # File::Properties::Cache reference
165 1         3 my $opts = shift; # Options hash
166              
167 1         20 $self->SUPER::_cacheinit($fpcr, $opts);
168 1         12 $fpcr->define($CacheTableName, $CacheTableCols,
169             {'TableVersion' => [__PACKAGE__.'::Version', $VERSION]});
170             }
171              
172              
173             # ----------------------------------------------------------------------------
174             # Clear invalid entries in cache table for File::Properties::Compressed data
175             # ----------------------------------------------------------------------------
176             sub _cacheclean {
177 0     0   0 my $self = shift;
178 0         0 my $fpcr = shift; # File::Properties::Cache reference
179              
180 0         0 my $ctbl = $CacheTableName;
181 0         0 my $rtbl = $File::Properties::Regular::CacheTableName;
182             # Remove any entries in the File::Properties::Compressed cache table
183             # for which there is not a corresponding entry with the same file
184             # digest in the File::Properties::Regular cache table
185 0         0 $fpcr->remove($ctbl, {'Where' => "NOT EXISTS (SELECT * FROM $rtbl " .
186             "WHERE Digest = $ctbl.FileDigest)"});
187             }
188              
189              
190             # ----------------------------------------------------------------------------
191             # Return file handle and path to temporary file containing unzipped data
192             # ----------------------------------------------------------------------------
193             sub _tmpunzip {
194 1     1   4 my $self = shift;
195              
196 1         6 my $fmtp = $self->SUPER::mimetype;
197             # Path is specified in constructor argument, so it should be
198             # available independent of whether base object was retrieved from
199             # the cache
200 1         10 my $path = $self->path;
201 1 50       6 if ($fmtp eq 'application/x-bzip2') {
    0          
202 1         6 return _tmpbunzip($path);
203             } elsif ($fmtp eq 'application/x-gzip') {
204 0         0 return _tmpgunzip($path);
205             } else {
206 0         0 return undef;
207             }
208             }
209              
210              
211             # ----------------------------------------------------------------------------
212             # Return file handle to temporary file containing bunzipped data
213             # ----------------------------------------------------------------------------
214             sub _tmpbunzip {
215 1     1   3 my $path = shift; # File path
216              
217 1         3 my $buf;
218 1         3 my $bsz = $UncompressBufferSize;
219 1         17 my $tpfh = File::Temp->new();
220 1 50 33     742 throw File::Properties::Error("Error creating temporary file")
221             if (not (defined $tpfh and $tpfh->opened));
222 1 50       333 my $bz = bzopen($path, 'r') or
223             throw File::Properties::Error("Error opening bzip2 file $path");
224 1         182046 while ($bz->bzread($buf, $bsz)) {
225 1         5096 print $tpfh $buf;
226             }
227 1 50 33     28 throw File::Properties::Error("Error reading bzip2 file $path: ".$bz->bzerror)
228             if $bz->bzerror != BZ_OK and $bz->bzerror != BZ_STREAM_END;
229 1         88 $bz->bzclose();
230 1         11 $tpfh->seek(0,0);
231              
232 1         32 return $tpfh;
233             }
234              
235              
236             # ----------------------------------------------------------------------------
237             # Return file handle to temporary file containing gunzipped data
238             # ----------------------------------------------------------------------------
239             sub _tmpgunzip {
240 0     0     my $path = shift; # File path
241              
242 0           my $buf;
243 0           my $bsz = $UncompressBufferSize;
244 0           my $tpfh = File::Temp->new();
245 0 0 0       throw File::Properties::Error("Error creating temporary file")
246             if (not (defined $tpfh and $tpfh->opened));
247 0 0         my $gz = gzopen($path, 'r') or
248             throw File::Properties::Error("Error opening gzip file $path");
249 0           while ($gz->gzread($buf, $bsz)) {
250 0           print $tpfh $buf;
251             }
252 0 0 0       throw File::Properties::Error("Error reading gzip file $path: ".$gz->gzerror)
253             if $gz->gzerror != Z_OK and $gz->gzerror != Z_STREAM_END;
254 0           $gz->gzclose();
255 0           $tpfh->seek(0,0);
256              
257 0           return $tpfh;
258             }
259              
260              
261             # ----------------------------------------------------------------------------
262             # End of method definitions
263             # ----------------------------------------------------------------------------
264              
265              
266             1;
267             __END__