File Coverage

blib/lib/Image/ExifTool/ZIP.pm
Criterion Covered Total %
statement 140 255 54.9
branch 46 144 31.9
condition 17 86 19.7
subroutine 6 8 75.0
pod 0 5 0.0
total 209 498 41.9


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: ZIP.pm
3             #
4             # Description: Read ZIP archive meta information
5             #
6             # Revisions: 10/28/2007 - P. Harvey Created
7             #
8             # References: 1) http://www.pkware.com/documents/casestudies/APPNOTE.TXT
9             # 2) http://www.cpanforum.com/threads/9046
10             # 3) http://www.gzip.org/zlib/rfc-gzip.html
11             # 4) http://DataCompression.info/ArchiveFormats/RAR202.txt
12             # 5) https://jira.atlassian.com/browse/CONF-21706
13             # 6) http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf
14             #------------------------------------------------------------------------------
15              
16             package Image::ExifTool::ZIP;
17              
18 1     1   4027 use strict;
  1         3  
  1         39  
19 1     1   5 use vars qw($VERSION $warnString);
  1         2  
  1         55  
20 1     1   7 use Image::ExifTool qw(:DataAccess :Utils);
  1         1  
  1         3129  
21              
22             $VERSION = '1.27';
23              
24 0     0 0 0 sub WarnProc($) { $warnString = $_[0]; }
25              
26             # file types for recognized Open Document "mimetype" values
27             my %openDocType = (
28             'application/vnd.oasis.opendocument.database' => 'ODB', #5
29             'application/vnd.oasis.opendocument.chart' => 'ODC', #5
30             'application/vnd.oasis.opendocument.formula' => 'ODF', #5
31             'application/vnd.oasis.opendocument.graphics' => 'ODG', #5
32             'application/vnd.oasis.opendocument.image' => 'ODI', #5
33             'application/vnd.oasis.opendocument.presentation' => 'ODP',
34             'application/vnd.oasis.opendocument.spreadsheet' => 'ODS',
35             'application/vnd.oasis.opendocument.text' => 'ODT',
36             'application/vnd.adobe.indesign-idml-package' => 'IDML', #6 (not open doc)
37             'application/epub+zip' => 'EPUB', #PH (not open doc)
38             );
39              
40             # iWork file types based on names of files found in the zip archive
41             my %iWorkFile = (
42             'Index/Slide.iwa' => 'KEY',
43             'Index/Tables/DataList.iwa' => 'NUMBERS',
44             );
45              
46             my %iWorkType = (
47             NUMBERS => 'NUMBERS',
48             PAGES => 'PAGES',
49             KEY => 'KEY',
50             KTH => 'KTH',
51             NMBTEMPLATE => 'NMBTEMPLATE',
52             );
53              
54             # ZIP metadata blocks
55             %Image::ExifTool::ZIP::Main = (
56             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
57             GROUPS => { 2 => 'Other' },
58             FORMAT => 'int16u',
59             NOTES => q{
60             The following tags are extracted from ZIP archives. ExifTool also extracts
61             additional meta information from compressed documents inside some ZIP-based
62             files such Office Open XML (DOCX, PPTX and XLSX), Open Document (ODB, ODC,
63             ODF, ODG, ODI, ODP, ODS and ODT), iWork (KEY, PAGES, NUMBERS), Capture One
64             Enhanced Image Package (EIP), Adobe InDesign Markup Language (IDML),
65             Electronic Publication (EPUB), and Sketch design files (SKETCH). The
66             ExifTool family 3 groups may be used to organize ZIP tags by embedded
67             document number (ie. the exiftool C<-g3> option).
68             },
69             2 => 'ZipRequiredVersion',
70             3 => {
71             Name => 'ZipBitFlag',
72             PrintConv => '$val ? sprintf("0x%.4x",$val) : $val',
73             },
74             4 => {
75             Name => 'ZipCompression',
76             PrintConv => {
77             0 => 'None',
78             1 => 'Shrunk',
79             2 => 'Reduced with compression factor 1',
80             3 => 'Reduced with compression factor 2',
81             4 => 'Reduced with compression factor 3',
82             5 => 'Reduced with compression factor 4',
83             6 => 'Imploded',
84             7 => 'Tokenized',
85             8 => 'Deflated',
86             9 => 'Enhanced Deflate using Deflate64(tm)',
87             10 => 'Imploded (old IBM TERSE)',
88             12 => 'BZIP2',
89             14 => 'LZMA (EFS)',
90             18 => 'IBM TERSE (new)',
91             19 => 'IBM LZ77 z Architecture (PFS)',
92             96 => 'JPEG recompressed', #2
93             97 => 'WavPack compressed', #2
94             98 => 'PPMd version I, Rev 1',
95             },
96             },
97             5 => {
98             Name => 'ZipModifyDate',
99             Format => 'int32u',
100             Groups => { 2 => 'Time' },
101             ValueConv => sub {
102             my $val = shift;
103             return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d',
104             ($val >> 25) + 1980, # year
105             ($val >> 21) & 0x0f, # month
106             ($val >> 16) & 0x1f, # day
107             ($val >> 11) & 0x1f, # hour
108             ($val >> 5) & 0x3f, # minute
109             ($val & 0x1f) * 2 # second
110             );
111             },
112             PrintConv => '$self->ConvertDateTime($val)',
113             },
114             7 => { Name => 'ZipCRC', Format => 'int32u', PrintConv => 'sprintf("0x%.8x",$val)' },
115             9 => { Name => 'ZipCompressedSize', Format => 'int32u' },
116             11 => { Name => 'ZipUncompressedSize', Format => 'int32u' },
117             13 => {
118             Name => 'ZipFileNameLength',
119             # don't store a tag -- just extract the value for use with ZipFileName
120             Hidden => 1,
121             RawConv => '$$self{ZipFileNameLength} = $val; undef',
122             },
123             # 14 => 'ZipExtraFieldLength',
124             15 => {
125             Name => 'ZipFileName',
126             Format => 'string[$$self{ZipFileNameLength}]',
127             },
128             _com => 'ZipFileComment',
129             );
130              
131             # GNU ZIP tags (ref 3)
132             %Image::ExifTool::ZIP::GZIP = (
133             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
134             GROUPS => { 2 => 'Other' },
135             NOTES => q{
136             These tags are extracted from GZIP (GNU ZIP) archives, but currently only
137             for the first file in the archive.
138             },
139             2 => {
140             Name => 'Compression',
141             PrintConv => {
142             8 => 'Deflated',
143             },
144             },
145             3 => {
146             Name => 'Flags',
147             PrintConv => { BITMASK => {
148             0 => 'Text',
149             1 => 'CRC16',
150             2 => 'ExtraFields',
151             3 => 'FileName',
152             4 => 'Comment',
153             }},
154             },
155             4 => {
156             Name => 'ModifyDate',
157             Format => 'int32u',
158             Groups => { 2 => 'Time' },
159             ValueConv => 'ConvertUnixTime($val,1)',
160             PrintConv => '$self->ConvertDateTime($val)',
161             },
162             8 => {
163             Name => 'ExtraFlags',
164             PrintConv => {
165             0 => '(none)',
166             2 => 'Maximum Compression',
167             4 => 'Fastest Algorithm',
168             },
169             },
170             9 => {
171             Name => 'OperatingSystem',
172             PrintConv => {
173             0 => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)',
174             1 => 'Amiga',
175             2 => 'VMS (or OpenVMS)',
176             3 => 'Unix',
177             4 => 'VM/CMS',
178             5 => 'Atari TOS',
179             6 => 'HPFS filesystem (OS/2, NT)',
180             7 => 'Macintosh',
181             8 => 'Z-System',
182             9 => 'CP/M',
183             10 => 'TOPS-20',
184             11 => 'NTFS filesystem (NT)',
185             12 => 'QDOS',
186             13 => 'Acorn RISCOS',
187             255 => 'unknown',
188             },
189             },
190             10 => 'ArchivedFileName',
191             11 => 'Comment',
192             );
193              
194             # RAR tags (ref 4)
195             %Image::ExifTool::ZIP::RAR = (
196             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
197             GROUPS => { 2 => 'Other' },
198             NOTES => 'These tags are extracted from RAR archive files.',
199             0 => {
200             Name => 'CompressedSize',
201             Format => 'int32u',
202             },
203             4 => {
204             Name => 'UncompressedSize',
205             Format => 'int32u',
206             },
207             8 => {
208             Name => 'OperatingSystem',
209             PrintConv => {
210             0 => 'MS-DOS',
211             1 => 'OS/2',
212             2 => 'Win32',
213             3 => 'Unix',
214             },
215             },
216             13 => {
217             Name => 'ModifyDate',
218             Format => 'int32u',
219             Groups => { 2 => 'Time' },
220             ValueConv => sub {
221             my $val = shift;
222             return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d',
223             ($val >> 25) + 1980, # year
224             ($val >> 21) & 0x0f, # month
225             ($val >> 16) & 0x1f, # day
226             ($val >> 11) & 0x1f, # hour
227             ($val >> 5) & 0x3f, # minute
228             ($val & 0x1f) * 2 # second
229             );
230             },
231             PrintConv => '$self->ConvertDateTime($val)',
232             },
233             18 => {
234             Name => 'PackingMethod',
235             PrintHex => 1,
236             PrintConv => {
237             0x30 => 'Stored',
238             0x31 => 'Fastest',
239             0x32 => 'Fast',
240             0x33 => 'Normal',
241             0x34 => 'Good Compression',
242             0x35 => 'Best Compression',
243             },
244             },
245             19 => {
246             Name => 'FileNameLength',
247             Format => 'int16u',
248             Hidden => 1,
249             RawConv => '$$self{FileNameLength} = $val; undef',
250             },
251             25 => {
252             Name => 'ArchivedFileName',
253             Format => 'string[$$self{FileNameLength}]',
254             },
255             );
256              
257             #------------------------------------------------------------------------------
258             # Extract information from a RAR file (ref 4)
259             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
260             # Returns: 1 on success, 0 if this wasn't a valid RAR file
261             sub ProcessRAR($$)
262             {
263 0     0 0 0 my ($et, $dirInfo) = @_;
264 0         0 my $raf = $$dirInfo{RAF};
265 0         0 my ($flags, $buff);
266              
267 0 0 0     0 return 0 unless $raf->Read($buff, 7) and $buff eq "Rar!\x1a\x07\0";
268              
269 0         0 $et->SetFileType();
270 0         0 SetByteOrder('II');
271 0         0 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR');
272 0         0 my $docNum = 0;
273              
274 0         0 for (;;) {
275             # read block header
276 0 0       0 $raf->Read($buff, 7) == 7 or last;
277 0         0 my ($type, $flags, $size) = unpack('xxCvv', $buff);
278 0         0 $size -= 7;
279 0 0       0 if ($flags & 0x8000) {
280 0 0       0 $raf->Read($buff, 4) == 4 or last;
281 0         0 $size += unpack('V',$buff) - 4;
282             }
283 0 0       0 last if $size < 0;
284 0 0       0 next unless $size; # ignore blocks with no data
285             # don't try to read very large blocks unless LargeFileSupport is enabled
286 0 0 0     0 if ($size >= 0x80000000 and not $et->Options('LargeFileSupport')) {
287 0         0 $et->Warn('Large block encountered. Aborting.');
288 0         0 last;
289             }
290             # process the block
291 0 0 0     0 if ($type == 0x74) { # file block
    0          
292             # read maximum 4 KB from a file block
293 0 0       0 my $n = $size > 4096 ? 4096 : $size;
294 0 0       0 $raf->Read($buff, $n) == $n or last;
295             # add compressed size to start of data so we can extract it with the other tags
296 0         0 $buff = pack('V',$size) . $buff;
297 0         0 $$et{DOC_NUM} = ++$docNum;
298 0         0 $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
299 0         0 $size -= $n;
300             } elsif ($type == 0x75 and $size > 6) { # comment block
301 0 0       0 $raf->Read($buff, $size) == $size or last;
302             # save comment, only if "Stored" (this is untested)
303 0 0       0 if (Get8u(\$buff, 3) == 0x30) {
304 0         0 $et->FoundTag('Comment', substr($buff, 6));
305             }
306 0         0 next;
307             }
308             # seek to the start of the next block
309 0 0 0     0 $raf->Seek($size, 1) or last if $size;
310             }
311 0         0 $$et{DOC_NUM} = 0;
312 0 0 0     0 if ($docNum > 1 and not $et->Options('Duplicates')) {
313 0         0 $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
314             }
315              
316 0         0 return 1;
317             }
318              
319             #------------------------------------------------------------------------------
320             # Extract information from a GNU ZIP file (ref 3)
321             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
322             # Returns: 1 on success, 0 if this wasn't a valid GZIP file
323             sub ProcessGZIP($$)
324             {
325 1     1 0 5 my ($et, $dirInfo) = @_;
326 1         3 my $raf = $$dirInfo{RAF};
327 1         2 my ($flags, $buff);
328              
329 1 50 33     4 return 0 unless $raf->Read($buff, 10) and $buff =~ /^\x1f\x8b\x08/;
330              
331 1         4 $et->SetFileType();
332 1         5 SetByteOrder('II');
333              
334 1         3 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::GZIP');
335 1         5 $et->HandleTag($tagTablePtr, 2, Get8u(\$buff, 2));
336 1         4 $et->HandleTag($tagTablePtr, 3, $flags = Get8u(\$buff, 3));
337 1         6 $et->HandleTag($tagTablePtr, 4, Get32u(\$buff, 4));
338 1         16 $et->HandleTag($tagTablePtr, 8, Get8u(\$buff, 8));
339 1         11 $et->HandleTag($tagTablePtr, 9, Get8u(\$buff, 9));
340              
341             # extract file name and comment if they exist
342 1 50       4 if ($flags & 0x18) {
343 1 50       3 if ($flags & 0x04) {
344             # skip extra field
345 0 0       0 $raf->Read($buff, 2) == 2 or return 1;
346 0         0 my $len = Get16u(\$buff, 0);
347 0 0       0 $raf->Read($buff, $len) == $len or return 1;
348             }
349 1 50       3 $raf->Read($buff, 4096) or return 1;
350 1         3 my $pos = 0;
351 1         1 my $tagID;
352             # loop for ArchivedFileName (10) and Comment (11) tags
353 1         3 foreach $tagID (10, 11) {
354 2 100       5 my $mask = $tagID == 10 ? 0x08 : 0x10;
355 2 50       5 next unless $flags & $mask;
356 2 50       14 my $end = $buff =~ /\0/g ? pos($buff) - 1 : length($buff);
357             # (the doc specifies the string should be ISO 8859-1,
358             # but in OS X it seems to be UTF-8, so don't translate
359             # it because I could just as easily screw it up)
360 2         6 my $str = substr($buff, $pos, $end - $pos);
361 2         7 $et->HandleTag($tagTablePtr, $tagID, $str);
362 2 50       5 last if $end >= length $buff;
363 2         5 $pos = $end + 1;
364             }
365             }
366 1         2 return 1;
367             }
368              
369             #------------------------------------------------------------------------------
370             # Call HandleTags for attributes of an Archive::Zip member
371             # Inputs: 0) ExifTool object ref, 1) member ref, 2) optional tag table ref
372             sub HandleMember($$;$)
373             {
374 28     28 0 49 my ($et, $member, $tagTablePtr) = @_;
375 28 100       66 $tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
376 28         70 $et->HandleTag($tagTablePtr, 2, $member->versionNeededToExtract());
377 28         73 $et->HandleTag($tagTablePtr, 3, $member->bitFlag());
378 28         71 $et->HandleTag($tagTablePtr, 4, $member->compressionMethod());
379 28         84 $et->HandleTag($tagTablePtr, 5, $member->lastModFileDateTime());
380 28         74 $et->HandleTag($tagTablePtr, 7, $member->crc32());
381 28         73 $et->HandleTag($tagTablePtr, 9, $member->compressedSize());
382 28         71 $et->HandleTag($tagTablePtr, 11, $member->uncompressedSize());
383 28         67 $et->HandleTag($tagTablePtr, 15, $member->fileName());
384 28         77 my $com = $member->fileComment();
385 28 50 33     220 $et->HandleTag($tagTablePtr, '_com', $com) if defined $com and length $com;
386             }
387              
388             #------------------------------------------------------------------------------
389             # Extract information from a ZIP file
390             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
391             # Returns: 1 on success, 0 if this wasn't a valid ZIP file
392             sub ProcessZIP($$)
393             {
394 5     5 0 15 my ($et, $dirInfo) = @_;
395 5         11 my $raf = $$dirInfo{RAF};
396 5         10 my ($buff, $buf2, $zip);
397              
398 5 50 33     13 return 0 unless $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/;
399              
400 5         14 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
401 5         10 my $docNum = 0;
402              
403             # use Archive::Zip if available
404 5         6 for (;;) {
405 5 50 33     9 unless (eval { require Archive::Zip } and eval { require IO::File }) {
  5         683  
  5         66773  
406 0 0 0     0 if ($$et{FILE_EXT} and $$et{FILE_EXT} ne 'ZIP') {
407 0         0 $et->Warn("Install Archive::Zip to decode compressed ZIP information");
408             }
409 0         0 last;
410             }
411             # Archive::Zip requires a seekable IO::File object
412 5         9 my $fh;
413 5 50       15 if ($raf->{TESTED} >= 0) {
    0          
414 5 50       9 unless (eval { require IO::File }) {
  5         19  
415             # (this shouldn't happen because IO::File is a prerequisite of Archive::Zip)
416 0         0 $et->Warn("Install IO::File to decode compressed ZIP information");
417 0         0 last;
418             }
419 5         22 $raf->Seek(0,0);
420 5         11 $fh = $raf->{FILE_PT};
421 5         19 bless $fh, 'IO::File'; # Archive::Zip expects an IO::File object
422 0         0 } elsif (eval { require IO::String }) {
423             # read the whole file into memory (what else can I do?)
424 0         0 $raf->Slurp();
425 0         0 $fh = new IO::String ${$raf->{BUFF_PT}};
  0         0  
426             } else {
427 0 0       0 my $type = $raf->{FILE_PT} ? 'pipe or socket' : 'scalar reference';
428 0         0 $et->Warn("Install IO::String to decode compressed ZIP information from a $type");
429 0         0 last;
430             }
431 5         23 $et->VPrint(1, " --- using Archive::Zip ---\n");
432 5         22 $zip = new Archive::Zip;
433             # catch all warnings! (Archive::Zip is bad for this)
434 5         162 local $SIG{'__WARN__'} = \&WarnProc;
435 5         19 my $status = $zip->readFromFileHandle($fh);
436 5 0 33     14310 if ($status eq '4' and $raf->{TESTED} >= 0 and eval { require IO::String } and
  0   33     0  
      0        
      0        
437             $raf->Seek(0,2) and $raf->Tell() < 100000000)
438             {
439             # try again, reading it ourself this time in an attempt to avoid
440             # a failed test with Perl 5.6.2 GNU/Linux 2.6.32-5-686 i686-linux-64int-ld
441 0         0 $raf->Seek(0,0);
442 0         0 $raf->Slurp();
443 0         0 $fh = new IO::String ${$raf->{BUFF_PT}};
  0         0  
444 0         0 $zip = new Archive::Zip;
445 0         0 $status = $zip->readFromFileHandle($fh);
446             }
447 5 50       14 if ($status) {
448 0         0 undef $zip;
449 0         0 my %err = ( 1=>'Stream end error', 3=>'Format error', 4=>'IO error' );
450 0   0     0 my $err = $err{$status} || "Error $status";
451 0         0 $et->Warn("$err reading ZIP file");
452 0         0 last;
453             }
454             # extract zip file comment
455 5         16 my $comment = $zip->zipfileComment();
456 5 50 33     54 $et->FoundTag(Comment => $comment) if defined $comment and length $comment;
457              
458 5         13 $$dirInfo{ZIP} = $zip;
459              
460             # check for an Office Open file (DOCX, etc)
461             # --> read '[Content_Types].xml' to determine the file type
462 5         8 my ($mime, @members);
463 5         17 my $cType = $zip->memberNamed('[Content_Types].xml');
464 5 100       244 if ($cType) {
465 1         4 ($buff, $status) = $zip->contents($cType);
466 1 0 33     1154 if (not $status and (
      33        
467             # first look for the main document with the expected name
468             $buff =~ m{\sPartName\s*=\s*['"](?:/ppt/presentation.xml|/word/document.xml|/xl/workbook.xml)['"][^>]*\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1} or
469             # then look for the main part
470             $buff =~ /]*\sPartName[^<]+\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/ or
471             # and if all else fails, use the default main
472             $buff =~ /ContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/))
473             {
474 1         4 $mime = $2;
475             }
476             }
477             # check for docProps if we couldn't find a MIME type
478 5 100       24 $mime or @members = $zip->membersMatching('^docProps/.*\.(xml|XML)$');
479 5 100 66     328 if ($mime or @members) {
480 1         3 $$dirInfo{MIME} = $mime;
481 1         459 require Image::ExifTool::OOXML;
482 1         7 Image::ExifTool::OOXML::ProcessDOCX($et, $dirInfo);
483 1         3 delete $$dirInfo{MIME};
484 1         5 last;
485             }
486              
487             # check for an EIP file
488 4         14 @members = $zip->membersMatching('^CaptureOne/.*\.(cos|COS)$');
489 4 100       266 if (@members) {
490 1         458 require Image::ExifTool::CaptureOne;
491 1         6 Image::ExifTool::CaptureOne::ProcessEIP($et, $dirInfo);
492 1         6 last;
493             }
494              
495             # check for an iWork file
496 3         9 @members = $zip->membersMatching('(?i)^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+\.(pages|numbers|key)/Index.(zip|xml|apxl))$');
497 3 100       406 if (@members) {
498 1         463 require Image::ExifTool::iWork;
499 1         5 Image::ExifTool::iWork::Process_iWork($et, $dirInfo);
500 1         4 last;
501             }
502              
503             # check for an Open Document, IDML or EPUB file
504 2         7 my $mType = $zip->memberNamed('mimetype');
505 2 100       38 if ($mType) {
506 1         5 ($mime, $status) = $zip->contents($mType);
507 1 50 33     571 if (not $status and $mime =~ /([\x21-\xfe]+)/s) {
508             # clean up MIME type just in case (note that MIME is case insensitive)
509 1         4 $mime = lc $1;
510 1   50     8 $et->SetFileType($openDocType{$mime} || 'ZIP', $mime);
511 1 50       4 $et->Warn("Unrecognized MIMEType $mime") unless $openDocType{$mime};
512             # extract Open Document metadata from "meta.xml"
513 1         4 my $meta = $zip->memberNamed('meta.xml');
514             # IDML files have metadata in a different place (ref 6)
515 1 50       132 $meta or $meta = $zip->memberNamed('META-INF/metadata.xml');
516 1 50       5 if ($meta) {
517 1         4 ($buff, $status) = $zip->contents($meta);
518 1 50       523 unless ($status) {
519 1         7 my %dirInfo = (
520             DirName => 'XML',
521             DataPt => \$buff,
522             DirLen => length $buff,
523             DataLen => length $buff,
524             );
525             # (avoid structure warnings when copying from XML)
526 1         4 my $oldWarn = $$et{NO_STRUCT_WARN};
527 1         2 $$et{NO_STRUCT_WARN} = 1;
528 1         4 $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::Main'));
529 1         4 $$et{NO_STRUCT_WARN} = $oldWarn;
530             }
531             }
532             # process rootfile of EPUB container if applicable
533 1         2 for (;;) {
534 1 50 33     7 last if $meta and $mime ne 'application/epub+zip';
535 0         0 my $container = $zip->memberNamed('META-INF/container.xml');
536 0         0 ($buff, $status) = $zip->contents($container);
537 0 0       0 last if $status;
538 0 0       0 $buff =~ /]*?\bfull-path=(['"])(.*?)\1/s or last;
539             # load the rootfile data (OPF extension; contains XML metadata)
540 0 0       0 my $meta2 = $zip->memberNamed($2) or last;
541 0         0 $meta = $meta2;
542 0         0 ($buff, $status) = $zip->contents($meta);
543 0 0       0 last if $status;
544             # use opf:event to generate more meaningful tag names for dc:date
545 0         0 while ($buff =~ s{([^<]+)}{$2}s) {
546 0         0 my $dcTable = GetTagTable('Image::ExifTool::XMP::dc');
547 0         0 my $tag = "${1}Date";
548             AddTagToTable($dcTable, $tag, {
549             Name => ucfirst $tag,
550             Groups => { 2 => 'Time' },
551             List => 'Seq',
552             %Image::ExifTool::XMP::dateTimeInfo
553 0 0       0 }) unless $$dcTable{$tag};
554             }
555 0         0 my %dirInfo = (
556             DataPt => \$buff,
557             DirLen => length $buff,
558             DataLen => length $buff,
559             IgnoreProp => { 'package' => 1, metadata => 1 },
560             );
561             # (avoid structure warnings when copying from XML)
562 0         0 my $oldWarn = $$et{NO_STRUCT_WARN};
563 0         0 $$et{NO_STRUCT_WARN} = 1;
564 0         0 $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::XML'));
565 0         0 $$et{NO_STRUCT_WARN} = $oldWarn;
566 0         0 last;
567             }
568 1 50 33     5 if ($openDocType{$mime} or $meta) {
569             # extract preview image(s) from "Thumbnails" directory if they exist
570 1         2 my $type;
571 1         4 my %tag = ( jpg => 'PreviewImage', png => 'PreviewPNG' );
572 1         3 foreach $type ('jpg', 'png') {
573 2         8 my $thumb = $zip->memberNamed("Thumbnails/thumbnail.$type");
574 2 100       219 next unless $thumb;
575 1         4 ($buff, $status) = $zip->contents($thumb);
576 1 50       1064 $et->FoundTag($tag{$type}, $buff) unless $status;
577             }
578 1         6 last; # all done since we recognized the MIME type or found metadata
579             }
580             # continue on to list ZIP contents...
581             }
582             }
583              
584             # otherwise just extract general ZIP information
585 1         7 $et->SetFileType();
586 1         4 @members = $zip->members();
587 1         6 my ($member, $iWorkType);
588             # special files to extract
589 1         7 my %extract = (
590             'meta.json' => 1,
591             'previews/preview.png' => 'PreviewPNG',
592             'preview.jpg' => 'PreviewImage', # (iWork 2013 files)
593             'preview-web.jpg' => 'OtherImage', # (iWork 2013 files)
594             'preview-micro.jpg' => 'ThumbnailImage', # (iWork 2013 files)
595             'QuickLook/Thumbnail.jpg' => 'ThumbnailImage', # (iWork 2009 files)
596             'QuickLook/Preview.pdf' => 'PreviewPDF', # (iWork 2009 files)
597             );
598 1         3 foreach $member (@members) {
599 1         3 $$et{DOC_NUM} = ++$docNum;
600 1         4 HandleMember($et, $member, $tagTablePtr);
601 1         17 my $file = $member->fileName();
602             # extract things from Sketch files
603 1 50 33     15 if ($extract{$file}) {
    50          
    50          
604 0         0 ($buff, $status) = $zip->contents($member);
605 0 0       0 $status and $et->Warn("Error extracting $file"), next;
606 0 0       0 if ($file eq 'meta.json') {
607 0         0 $et->ExtractInfo(\$buff, { ReEntry => 1 });
608 0 0 0     0 if ($$et{VALUE}{App} and $$et{VALUE}{App} =~ /sketch/i) {
609 0         0 $et->OverrideFileType('SKETCH');
610             }
611             } else {
612 0         0 $et->FoundTag($extract{$file} => $buff);
613             }
614             } elsif ($file eq 'Index/Document.iwa' and not $iWorkType) {
615 0   0     0 my $type = $iWorkType{$$et{FILE_EXT} || ''};
616 0   0     0 $iWorkType = $type || 'PAGES';
617             } elsif ($iWorkFile{$file}) {
618 0         0 $iWorkType = $iWorkFile{$file};
619             }
620             }
621 1 50       4 $et->OverrideFileType($iWorkType) if $iWorkType;
622 1         5 last;
623             }
624             # all done if we processed this using Archive::Zip
625 5 50       17 if ($zip) {
626 5         9 delete $$dirInfo{ZIP};
627 5         10 delete $$et{DOC_NUM};
628 5 50 33     14 if ($docNum > 1 and not $et->Options('Duplicates')) {
629 0         0 $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
630             }
631 5         116 return 1;
632             }
633             #
634             # process the ZIP file by hand (funny, but this seems easier than using Archive::Zip)
635             #
636 0           $et->VPrint(1, " -- processing as binary data --\n");
637 0           $raf->Seek(30, 0);
638 0           $et->SetFileType();
639 0           SetByteOrder('II');
640              
641             # A. Local file header:
642             # local file header signature 0) 4 bytes (0x04034b50)
643             # version needed to extract 4) 2 bytes
644             # general purpose bit flag 6) 2 bytes
645             # compression method 8) 2 bytes
646             # last mod file time 10) 2 bytes
647             # last mod file date 12) 2 bytes
648             # crc-32 14) 4 bytes
649             # compressed size 18) 4 bytes
650             # uncompressed size 22) 4 bytes
651             # file name length 26) 2 bytes
652             # extra field length 28) 2 bytes
653 0           for (;;) {
654 0           my $len = Get16u(\$buff, 26) + Get16u(\$buff, 28);
655 0 0         $raf->Read($buf2, $len) == $len or last;
656              
657 0           $$et{DOC_NUM} = ++$docNum;
658 0           $buff .= $buf2;
659 0           my %dirInfo = (
660             DataPt => \$buff,
661             DataPos => $raf->Tell() - 30 - $len,
662             DataLen => 30 + $len,
663             DirStart => 0,
664             DirLen => 30 + $len,
665             MixedTags => 1, # (to ignore FileComment tag)
666             );
667 0           $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
668 0           my $flags = Get16u(\$buff, 6);
669 0 0         if ($flags & 0x08) {
670             # we don't yet support skipping stream mode data
671             # (when this happens, the CRC, compressed size and uncompressed
672             # sizes are set to 0 in the header. Instead, they are stored
673             # after the compressed data with an optional header of 0x08074b50)
674 0           $et->Warn('Stream mode data encountered, file list may be incomplete');
675 0           last;
676             }
677 0           $len = Get32u(\$buff, 18); # file data length
678 0 0         $raf->Seek($len, 1) or last; # skip file data
679 0 0 0       $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/ or last;
680             }
681 0           delete $$et{DOC_NUM};
682 0 0 0       if ($docNum > 1 and not $et->Options('Duplicates')) {
683 0           $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
684             }
685 0           return 1;
686             }
687              
688             1; # end
689              
690             __END__