File Coverage

blib/lib/Image/ExifTool/ZIP.pm
Criterion Covered Total %
statement 144 259 55.6
branch 46 144 31.9
condition 17 86 19.7
subroutine 7 9 77.7
pod 0 6 0.0
total 214 504 42.4


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: ZIP.pm
3             #
4             # Description: Read ZIP archive meta information
5             #
6             # Revisions: 10/28/2007 - P. Harvey Created
7             #
8             # References: 1) http://www.pkware.com/documents/casestudies/APPNOTE.TXT
9             # 2) http://www.cpanforum.com/threads/9046
10             # 3) http://www.gzip.org/zlib/rfc-gzip.html
11             # 4) http://DataCompression.info/ArchiveFormats/RAR202.txt
12             # 5) https://jira.atlassian.com/browse/CONF-21706
13             # 6) http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf
14             #------------------------------------------------------------------------------
15              
16             package Image::ExifTool::ZIP;
17              
18 1     1   4390 use strict;
  1         3  
  1         33  
19 1     1   6 use vars qw($VERSION $warnString);
  1         2  
  1         46  
20 1     1   6 use Image::ExifTool qw(:DataAccess :Utils);
  1         2  
  1         4052  
21              
22             $VERSION = '1.28';
23              
24 0     0 0 0 sub WarnProc($) { $warnString = $_[0]; }
25              
26             # file types for recognized Open Document "mimetype" values
27             my %openDocType = (
28             'application/vnd.oasis.opendocument.database' => 'ODB', #5
29             'application/vnd.oasis.opendocument.chart' => 'ODC', #5
30             'application/vnd.oasis.opendocument.formula' => 'ODF', #5
31             'application/vnd.oasis.opendocument.graphics' => 'ODG', #5
32             'application/vnd.oasis.opendocument.image' => 'ODI', #5
33             'application/vnd.oasis.opendocument.presentation' => 'ODP',
34             'application/vnd.oasis.opendocument.spreadsheet' => 'ODS',
35             'application/vnd.oasis.opendocument.text' => 'ODT',
36             'application/vnd.adobe.indesign-idml-package' => 'IDML', #6 (not open doc)
37             'application/epub+zip' => 'EPUB', #PH (not open doc)
38             );
39              
40             # iWork file types based on names of files found in the zip archive
41             my %iWorkFile = (
42             'Index/Slide.iwa' => 'KEY',
43             'Index/Tables/DataList.iwa' => 'NUMBERS',
44             );
45              
46             my %iWorkType = (
47             NUMBERS => 'NUMBERS',
48             PAGES => 'PAGES',
49             KEY => 'KEY',
50             KTH => 'KTH',
51             NMBTEMPLATE => 'NMBTEMPLATE',
52             );
53              
54             # ZIP metadata blocks
55             %Image::ExifTool::ZIP::Main = (
56             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
57             GROUPS => { 2 => 'Other' },
58             FORMAT => 'int16u',
59             NOTES => q{
60             The following tags are extracted from ZIP archives. ExifTool also extracts
61             additional meta information from compressed documents inside some ZIP-based
62             files such Office Open XML (DOCX, PPTX and XLSX), Open Document (ODB, ODC,
63             ODF, ODG, ODI, ODP, ODS and ODT), iWork (KEY, PAGES, NUMBERS), Capture One
64             Enhanced Image Package (EIP), Adobe InDesign Markup Language (IDML),
65             Electronic Publication (EPUB), and Sketch design files (SKETCH). The
66             ExifTool family 3 groups may be used to organize ZIP tags by embedded
67             document number (ie. the exiftool C<-g3> option).
68             },
69             2 => 'ZipRequiredVersion',
70             3 => {
71             Name => 'ZipBitFlag',
72             PrintConv => '$val ? sprintf("0x%.4x",$val) : $val',
73             },
74             4 => {
75             Name => 'ZipCompression',
76             PrintConv => {
77             0 => 'None',
78             1 => 'Shrunk',
79             2 => 'Reduced with compression factor 1',
80             3 => 'Reduced with compression factor 2',
81             4 => 'Reduced with compression factor 3',
82             5 => 'Reduced with compression factor 4',
83             6 => 'Imploded',
84             7 => 'Tokenized',
85             8 => 'Deflated',
86             9 => 'Enhanced Deflate using Deflate64(tm)',
87             10 => 'Imploded (old IBM TERSE)',
88             12 => 'BZIP2',
89             14 => 'LZMA (EFS)',
90             18 => 'IBM TERSE (new)',
91             19 => 'IBM LZ77 z Architecture (PFS)',
92             96 => 'JPEG recompressed', #2
93             97 => 'WavPack compressed', #2
94             98 => 'PPMd version I, Rev 1',
95             },
96             },
97             5 => {
98             Name => 'ZipModifyDate',
99             Format => 'int32u',
100             Groups => { 2 => 'Time' },
101             ValueConv => sub {
102             my $val = shift;
103             return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d',
104             ($val >> 25) + 1980, # year
105             ($val >> 21) & 0x0f, # month
106             ($val >> 16) & 0x1f, # day
107             ($val >> 11) & 0x1f, # hour
108             ($val >> 5) & 0x3f, # minute
109             ($val & 0x1f) * 2 # second
110             );
111             },
112             PrintConv => '$self->ConvertDateTime($val)',
113             },
114             7 => { Name => 'ZipCRC', Format => 'int32u', PrintConv => 'sprintf("0x%.8x",$val)' },
115             9 => { Name => 'ZipCompressedSize', Format => 'int32u' },
116             11 => { Name => 'ZipUncompressedSize', Format => 'int32u' },
117             13 => {
118             Name => 'ZipFileNameLength',
119             # don't store a tag -- just extract the value for use with ZipFileName
120             Hidden => 1,
121             RawConv => '$$self{ZipFileNameLength} = $val; undef',
122             },
123             # 14 => 'ZipExtraFieldLength',
124             15 => {
125             Name => 'ZipFileName',
126             Format => 'string[$$self{ZipFileNameLength}]',
127             },
128             _com => 'ZipFileComment',
129             );
130              
131             # GNU ZIP tags (ref 3)
132             %Image::ExifTool::ZIP::GZIP = (
133             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
134             GROUPS => { 2 => 'Other' },
135             NOTES => q{
136             These tags are extracted from GZIP (GNU ZIP) archives, but currently only
137             for the first file in the archive.
138             },
139             2 => {
140             Name => 'Compression',
141             PrintConv => {
142             8 => 'Deflated',
143             },
144             },
145             3 => {
146             Name => 'Flags',
147             PrintConv => { BITMASK => {
148             0 => 'Text',
149             1 => 'CRC16',
150             2 => 'ExtraFields',
151             3 => 'FileName',
152             4 => 'Comment',
153             }},
154             },
155             4 => {
156             Name => 'ModifyDate',
157             Format => 'int32u',
158             Groups => { 2 => 'Time' },
159             ValueConv => 'ConvertUnixTime($val,1)',
160             PrintConv => '$self->ConvertDateTime($val)',
161             },
162             8 => {
163             Name => 'ExtraFlags',
164             PrintConv => {
165             0 => '(none)',
166             2 => 'Maximum Compression',
167             4 => 'Fastest Algorithm',
168             },
169             },
170             9 => {
171             Name => 'OperatingSystem',
172             PrintConv => {
173             0 => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)',
174             1 => 'Amiga',
175             2 => 'VMS (or OpenVMS)',
176             3 => 'Unix',
177             4 => 'VM/CMS',
178             5 => 'Atari TOS',
179             6 => 'HPFS filesystem (OS/2, NT)',
180             7 => 'Macintosh',
181             8 => 'Z-System',
182             9 => 'CP/M',
183             10 => 'TOPS-20',
184             11 => 'NTFS filesystem (NT)',
185             12 => 'QDOS',
186             13 => 'Acorn RISCOS',
187             255 => 'unknown',
188             },
189             },
190             10 => 'ArchivedFileName',
191             11 => 'Comment',
192             );
193              
194             # RAR tags (ref 4)
195             %Image::ExifTool::ZIP::RAR = (
196             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
197             GROUPS => { 2 => 'Other' },
198             NOTES => 'These tags are extracted from RAR archive files.',
199             0 => {
200             Name => 'CompressedSize',
201             Format => 'int32u',
202             },
203             4 => {
204             Name => 'UncompressedSize',
205             Format => 'int32u',
206             },
207             8 => {
208             Name => 'OperatingSystem',
209             PrintConv => {
210             0 => 'MS-DOS',
211             1 => 'OS/2',
212             2 => 'Win32',
213             3 => 'Unix',
214             },
215             },
216             13 => {
217             Name => 'ModifyDate',
218             Format => 'int32u',
219             Groups => { 2 => 'Time' },
220             ValueConv => sub {
221             my $val = shift;
222             return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d',
223             ($val >> 25) + 1980, # year
224             ($val >> 21) & 0x0f, # month
225             ($val >> 16) & 0x1f, # day
226             ($val >> 11) & 0x1f, # hour
227             ($val >> 5) & 0x3f, # minute
228             ($val & 0x1f) * 2 # second
229             );
230             },
231             PrintConv => '$self->ConvertDateTime($val)',
232             },
233             18 => {
234             Name => 'PackingMethod',
235             PrintHex => 1,
236             PrintConv => {
237             0x30 => 'Stored',
238             0x31 => 'Fastest',
239             0x32 => 'Fast',
240             0x33 => 'Normal',
241             0x34 => 'Good Compression',
242             0x35 => 'Best Compression',
243             },
244             },
245             19 => {
246             Name => 'FileNameLength',
247             Format => 'int16u',
248             Hidden => 1,
249             RawConv => '$$self{FileNameLength} = $val; undef',
250             },
251             25 => {
252             Name => 'ArchivedFileName',
253             Format => 'string[$$self{FileNameLength}]',
254             },
255             );
256              
257             #------------------------------------------------------------------------------
258             # Extract information from a RAR file (ref 4)
259             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
260             # Returns: 1 on success, 0 if this wasn't a valid RAR file
261             sub ProcessRAR($$)
262             {
263 0     0 0 0 my ($et, $dirInfo) = @_;
264 0         0 my $raf = $$dirInfo{RAF};
265 0         0 my ($flags, $buff);
266              
267 0 0 0     0 return 0 unless $raf->Read($buff, 7) and $buff eq "Rar!\x1a\x07\0";
268              
269 0         0 $et->SetFileType();
270 0         0 SetByteOrder('II');
271 0         0 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR');
272 0         0 my $docNum = 0;
273              
274 0         0 for (;;) {
275             # read block header
276 0 0       0 $raf->Read($buff, 7) == 7 or last;
277 0         0 my ($type, $flags, $size) = unpack('xxCvv', $buff);
278 0         0 $size -= 7;
279 0 0       0 if ($flags & 0x8000) {
280 0 0       0 $raf->Read($buff, 4) == 4 or last;
281 0         0 $size += unpack('V',$buff) - 4;
282             }
283 0 0       0 last if $size < 0;
284 0 0       0 next unless $size; # ignore blocks with no data
285             # don't try to read very large blocks unless LargeFileSupport is enabled
286 0 0 0     0 if ($size >= 0x80000000 and not $et->Options('LargeFileSupport')) {
287 0         0 $et->Warn('Large block encountered. Aborting.');
288 0         0 last;
289             }
290             # process the block
291 0 0 0     0 if ($type == 0x74) { # file block
    0          
292             # read maximum 4 KB from a file block
293 0 0       0 my $n = $size > 4096 ? 4096 : $size;
294 0 0       0 $raf->Read($buff, $n) == $n or last;
295             # add compressed size to start of data so we can extract it with the other tags
296 0         0 $buff = pack('V',$size) . $buff;
297 0         0 $$et{DOC_NUM} = ++$docNum;
298 0         0 $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
299 0         0 $size -= $n;
300             } elsif ($type == 0x75 and $size > 6) { # comment block
301 0 0       0 $raf->Read($buff, $size) == $size or last;
302             # save comment, only if "Stored" (this is untested)
303 0 0       0 if (Get8u(\$buff, 3) == 0x30) {
304 0         0 $et->FoundTag('Comment', substr($buff, 6));
305             }
306 0         0 next;
307             }
308             # seek to the start of the next block
309 0 0 0     0 $raf->Seek($size, 1) or last if $size;
310             }
311 0         0 $$et{DOC_NUM} = 0;
312 0 0 0     0 if ($docNum > 1 and not $et->Options('Duplicates')) {
313 0         0 $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
314             }
315              
316 0         0 return 1;
317             }
318              
319             #------------------------------------------------------------------------------
320             # Extract information from a GNU ZIP file (ref 3)
321             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
322             # Returns: 1 on success, 0 if this wasn't a valid GZIP file
323             sub ProcessGZIP($$)
324             {
325 1     1 0 4 my ($et, $dirInfo) = @_;
326 1         4 my $raf = $$dirInfo{RAF};
327 1         2 my ($flags, $buff);
328              
329 1 50 33     6 return 0 unless $raf->Read($buff, 10) and $buff =~ /^\x1f\x8b\x08/;
330              
331 1         8 $et->SetFileType();
332 1         7 SetByteOrder('II');
333              
334 1         5 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::GZIP');
335 1         6 $et->HandleTag($tagTablePtr, 2, Get8u(\$buff, 2));
336 1         14 $et->HandleTag($tagTablePtr, 3, $flags = Get8u(\$buff, 3));
337 1         19 $et->HandleTag($tagTablePtr, 4, Get32u(\$buff, 4));
338 1         4 $et->HandleTag($tagTablePtr, 8, Get8u(\$buff, 8));
339 1         30 $et->HandleTag($tagTablePtr, 9, Get8u(\$buff, 9));
340              
341             # extract file name and comment if they exist
342 1 50       4 if ($flags & 0x18) {
343 1 50       13 if ($flags & 0x04) {
344             # skip extra field
345 0 0       0 $raf->Read($buff, 2) == 2 or return 1;
346 0         0 my $len = Get16u(\$buff, 0);
347 0 0       0 $raf->Read($buff, $len) == $len or return 1;
348             }
349 1 50       6 $raf->Read($buff, 4096) or return 1;
350 1         2 my $pos = 0;
351 1         2 my $tagID;
352             # loop for ArchivedFileName (10) and Comment (11) tags
353 1         14 foreach $tagID (10, 11) {
354 2 100       6 my $mask = $tagID == 10 ? 0x08 : 0x10;
355 2 50       7 next unless $flags & $mask;
356 2 50       18 my $end = $buff =~ /\0/g ? pos($buff) - 1 : length($buff);
357             # (the doc specifies the string should be ISO 8859-1,
358             # but in OS X it seems to be UTF-8, so don't translate
359             # it because I could just as easily screw it up)
360 2         7 my $str = substr($buff, $pos, $end - $pos);
361 2         8 $et->HandleTag($tagTablePtr, $tagID, $str);
362 2 50       7 last if $end >= length $buff;
363 2         13 $pos = $end + 1;
364             }
365             }
366 1         14 return 1;
367             }
368              
369             #------------------------------------------------------------------------------
370             # Call HandleTags for attributes of an Archive::Zip member
371             # Inputs: 0) ExifTool object ref, 1) member ref, 2) optional tag table ref
372             sub HandleMember($$;$)
373             {
374 28     28 0 64 my ($et, $member, $tagTablePtr) = @_;
375 28 100       85 $tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
376 28         92 $et->HandleTag($tagTablePtr, 2, $member->versionNeededToExtract());
377 28         91 $et->HandleTag($tagTablePtr, 3, $member->bitFlag());
378 28         93 $et->HandleTag($tagTablePtr, 4, $member->compressionMethod());
379 28         93 $et->HandleTag($tagTablePtr, 5, $member->lastModFileDateTime());
380 28         108 $et->HandleTag($tagTablePtr, 7, $member->crc32());
381 28         93 $et->HandleTag($tagTablePtr, 9, $member->compressedSize());
382 28         103 $et->HandleTag($tagTablePtr, 11, $member->uncompressedSize());
383 28         93 $et->HandleTag($tagTablePtr, 15, $member->fileName());
384 28         99 my $com = $member->fileComment();
385 28 50 33     297 $et->HandleTag($tagTablePtr, '_com', $com) if defined $com and length $com;
386             }
387              
388             #------------------------------------------------------------------------------
389             # Extract file from ZIP archive
390             # Inputs: 0) ExifTool ref, 1) Zip object ref, 2) file name
391             # Returns: zip member or undef it it didn't exist
392             sub ExtractFile($$$)
393             {
394 10     10 0 30 my ($et, $zip, $file) = @_;
395 10         44 my $result = $zip->memberNamed($file);
396 10         767 $et->VPrint(1, " (Extracting '${file}' from zip archive)\n");
397 10         35 return $result;
398             }
399              
400             #------------------------------------------------------------------------------
401             # Extract information from a ZIP file
402             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
403             # Returns: 1 on success, 0 if this wasn't a valid ZIP file
404             sub ProcessZIP($$)
405             {
406 5     5 0 16 my ($et, $dirInfo) = @_;
407 5         16 my $raf = $$dirInfo{RAF};
408 5         11 my ($buff, $buf2, $zip);
409              
410 5 50 33     18 return 0 unless $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/;
411              
412 5         20 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
413 5         19 my $docNum = 0;
414              
415             # use Archive::Zip if available
416 5         10 for (;;) {
417 5 50 33     13 unless (eval { require Archive::Zip } and eval { require IO::File }) {
  5         875  
  5         83014  
418 0 0 0     0 if ($$et{FILE_EXT} and $$et{FILE_EXT} ne 'ZIP') {
419 0         0 $et->Warn("Install Archive::Zip to decode compressed ZIP information");
420             }
421 0         0 last;
422             }
423             # Archive::Zip requires a seekable IO::File object
424 5         12 my $fh;
425 5 50       27 if ($raf->{TESTED} >= 0) {
    0          
426 5 50       18 unless (eval { require IO::File }) {
  5         28  
427             # (this shouldn't happen because IO::File is a prerequisite of Archive::Zip)
428 0         0 $et->Warn("Install IO::File to decode compressed ZIP information");
429 0         0 last;
430             }
431 5         29 $raf->Seek(0,0);
432 5         16 $fh = $raf->{FILE_PT};
433 5         46 bless $fh, 'IO::File'; # Archive::Zip expects an IO::File object
434 0         0 } elsif (eval { require IO::String }) {
435             # read the whole file into memory (what else can I do?)
436 0         0 $raf->Slurp();
437 0         0 $fh = new IO::String ${$raf->{BUFF_PT}};
  0         0  
438             } else {
439 0 0       0 my $type = $raf->{FILE_PT} ? 'pipe or socket' : 'scalar reference';
440 0         0 $et->Warn("Install IO::String to decode compressed ZIP information from a $type");
441 0         0 last;
442             }
443 5         35 $et->VPrint(1, " --- using Archive::Zip ---\n");
444 5         36 $zip = new Archive::Zip;
445             # catch all warnings! (Archive::Zip is bad for this)
446 5         252 local $SIG{'__WARN__'} = \&WarnProc;
447 5         34 my $status = $zip->readFromFileHandle($fh);
448 5 0 33     22527 if ($status eq '4' and $raf->{TESTED} >= 0 and eval { require IO::String } and
  0   33     0  
      0        
      0        
449             $raf->Seek(0,2) and $raf->Tell() < 100000000)
450             {
451             # try again, reading it ourself this time in an attempt to avoid
452             # a failed test with Perl 5.6.2 GNU/Linux 2.6.32-5-686 i686-linux-64int-ld
453 0         0 $raf->Seek(0,0);
454 0         0 $raf->Slurp();
455 0         0 $fh = new IO::String ${$raf->{BUFF_PT}};
  0         0  
456 0         0 $zip = new Archive::Zip;
457 0         0 $status = $zip->readFromFileHandle($fh);
458             }
459 5 50       17 if ($status) {
460 0         0 undef $zip;
461 0         0 my %err = ( 1=>'Stream end error', 3=>'Format error', 4=>'IO error' );
462 0   0     0 my $err = $err{$status} || "Error $status";
463 0         0 $et->Warn("$err reading ZIP file");
464 0         0 last;
465             }
466             # extract zip file comment
467 5         24 my $comment = $zip->zipfileComment();
468 5 50 33     71 $et->FoundTag(Comment => $comment) if defined $comment and length $comment;
469              
470 5         21 $$dirInfo{ZIP} = $zip;
471              
472             # check for an Office Open file (DOCX, etc)
473             # --> read '[Content_Types].xml' to determine the file type
474 5         10 my ($mime, @members);
475 5         25 my $cType = ExtractFile($et, $zip, '[Content_Types].xml');
476 5 100       19 if ($cType) {
477 1         8 ($buff, $status) = $zip->contents($cType);
478 1 0 33     2100 if (not $status and (
      33        
479             # first look for the main document with the expected name
480             $buff =~ m{\sPartName\s*=\s*['"](?:/ppt/presentation.xml|/word/document.xml|/xl/workbook.xml)['"][^>]*\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1} or
481             # then look for the main part
482             $buff =~ /]*\sPartName[^<]+\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/ or
483             # and if all else fails, use the default main
484             $buff =~ /ContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/))
485             {
486 1         8 $mime = $2;
487             }
488             }
489             # check for docProps if we couldn't find a MIME type
490 5 100       26 $mime or @members = $zip->membersMatching('^docProps/.*\.(xml|XML)$');
491 5 100 66     420 if ($mime or @members) {
492 1         5 $$dirInfo{MIME} = $mime;
493 1         937 require Image::ExifTool::OOXML;
494 1         6 Image::ExifTool::OOXML::ProcessDOCX($et, $dirInfo);
495 1         4 delete $$dirInfo{MIME};
496 1         6 last;
497             }
498              
499             # check for an EIP file
500 4         14 @members = $zip->membersMatching('^CaptureOne/.*\.(cos|COS)$');
501 4 100       320 if (@members) {
502 1         622 require Image::ExifTool::CaptureOne;
503 1         7 Image::ExifTool::CaptureOne::ProcessEIP($et, $dirInfo);
504 1         5 last;
505             }
506              
507             # check for an iWork file
508 3         11 @members = $zip->membersMatching('(?i)^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+\.(pages|numbers|key)/Index.(zip|xml|apxl))$');
509 3 100       581 if (@members) {
510 1         662 require Image::ExifTool::iWork;
511 1         11 Image::ExifTool::iWork::Process_iWork($et, $dirInfo);
512 1         4 last;
513             }
514              
515             # check for an Open Document, IDML or EPUB file
516 2         10 my $mType = ExtractFile($et, $zip, 'mimetype');
517 2 100       18 if ($mType) {
518 1         6 ($mime, $status) = $zip->contents($mType);
519 1 50 33     693 if (not $status and $mime =~ /([\x21-\xfe]+)/s) {
520             # clean up MIME type just in case (note that MIME is case insensitive)
521 1         7 $mime = lc $1;
522 1   50     14 $et->SetFileType($openDocType{$mime} || 'ZIP', $mime);
523 1 50       11 $et->Warn("Unrecognized MIMEType $mime") unless $openDocType{$mime};
524             # extract Open Document metadata from "meta.xml"
525 1         4 my $meta = ExtractFile($et, $zip, 'meta.xml');
526             # IDML files have metadata in a different place (ref 6)
527 1 50       6 $meta or $meta = ExtractFile($et, $zip, 'META-INF/metadata.xml');
528 1 50       4 if ($meta) {
529 1         4 ($buff, $status) = $zip->contents($meta);
530 1 50       684 unless ($status) {
531 1         7 my %dirInfo = (
532             DirName => 'XML',
533             DataPt => \$buff,
534             DirLen => length $buff,
535             DataLen => length $buff,
536             );
537             # (avoid structure warnings when copying from XML)
538 1         6 my $oldWarn = $$et{NO_STRUCT_WARN};
539 1         3 $$et{NO_STRUCT_WARN} = 1;
540 1         4 $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::Main'));
541 1         5 $$et{NO_STRUCT_WARN} = $oldWarn;
542             }
543             }
544             # process rootfile of EPUB container if applicable
545 1         3 for (;;) {
546 1 50 33     7 last if $meta and $mime ne 'application/epub+zip';
547 0         0 my $container = ExtractFile($et, $zip, 'META-INF/container.xml');
548 0         0 ($buff, $status) = $zip->contents($container);
549 0 0       0 last if $status;
550 0 0       0 $buff =~ /]*?\bfull-path=(['"])(.*?)\1/s or last;
551             # load the rootfile data (OPF extension; contains XML metadata)
552 0 0       0 my $meta2 = $zip->memberNamed($2) or last;
553 0         0 $meta = $meta2;
554 0         0 ($buff, $status) = $zip->contents($meta);
555 0 0       0 last if $status;
556             # use opf:event to generate more meaningful tag names for dc:date
557 0         0 while ($buff =~ s{([^<]+)}{$2}s) {
558 0         0 my $dcTable = GetTagTable('Image::ExifTool::XMP::dc');
559 0         0 my $tag = "${1}Date";
560             AddTagToTable($dcTable, $tag, {
561             Name => ucfirst $tag,
562             Groups => { 2 => 'Time' },
563             List => 'Seq',
564             %Image::ExifTool::XMP::dateTimeInfo
565 0 0       0 }) unless $$dcTable{$tag};
566             }
567 0         0 my %dirInfo = (
568             DataPt => \$buff,
569             DirLen => length $buff,
570             DataLen => length $buff,
571             IgnoreProp => { 'package' => 1, metadata => 1 },
572             );
573             # (avoid structure warnings when copying from XML)
574 0         0 my $oldWarn = $$et{NO_STRUCT_WARN};
575 0         0 $$et{NO_STRUCT_WARN} = 1;
576 0         0 $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::XML'));
577 0         0 $$et{NO_STRUCT_WARN} = $oldWarn;
578 0         0 last;
579             }
580 1 50 33     14 if ($openDocType{$mime} or $meta) {
581             # extract preview image(s) from "Thumbnails" directory if they exist
582 1         3 my $type;
583 1         6 my %tag = ( jpg => 'PreviewImage', png => 'PreviewPNG' );
584 1         5 foreach $type ('jpg', 'png') {
585 2         8 my $thumb = ExtractFile($et, $zip, "Thumbnails/thumbnail.$type");
586 2 100       7 next unless $thumb;
587 1         8 ($buff, $status) = $zip->contents($thumb);
588 1 50       1317 $et->FoundTag($tag{$type}, $buff) unless $status;
589             }
590 1         7 last; # all done since we recognized the MIME type or found metadata
591             }
592             # continue on to list ZIP contents...
593             }
594             }
595              
596             # otherwise just extract general ZIP information
597 1         11 $et->SetFileType();
598 1         3 @members = $zip->members();
599 1         7 my ($member, $iWorkType);
600             # special files to extract
601 1         10 my %extract = (
602             'meta.json' => 1,
603             'previews/preview.png' => 'PreviewPNG',
604             'preview.jpg' => 'PreviewImage', # (iWork 2013 files)
605             'preview-web.jpg' => 'OtherImage', # (iWork 2013 files)
606             'preview-micro.jpg' => 'ThumbnailImage', # (iWork 2013 files)
607             'QuickLook/Thumbnail.jpg' => 'ThumbnailImage', # (iWork 2009 files)
608             'QuickLook/Preview.pdf' => 'PreviewPDF', # (iWork 2009 files)
609             );
610 1         3 foreach $member (@members) {
611 1         3 $$et{DOC_NUM} = ++$docNum;
612 1         5 HandleMember($et, $member, $tagTablePtr);
613 1         3 my $file = $member->fileName();
614             # extract things from Sketch files
615 1 50 33     16 if ($extract{$file}) {
    50          
    50          
616 0         0 ($buff, $status) = $zip->contents($member);
617 0 0       0 $status and $et->Warn("Error extracting $file"), next;
618 0 0       0 if ($file eq 'meta.json') {
619 0         0 $et->ExtractInfo(\$buff, { ReEntry => 1 });
620 0 0 0     0 if ($$et{VALUE}{App} and $$et{VALUE}{App} =~ /sketch/i) {
621 0         0 $et->OverrideFileType('SKETCH');
622             }
623             } else {
624 0         0 $et->FoundTag($extract{$file} => $buff);
625             }
626             } elsif ($file eq 'Index/Document.iwa' and not $iWorkType) {
627 0   0     0 my $type = $iWorkType{$$et{FILE_EXT} || ''};
628 0   0     0 $iWorkType = $type || 'PAGES';
629             } elsif ($iWorkFile{$file}) {
630 0         0 $iWorkType = $iWorkFile{$file};
631             }
632             }
633 1 50       4 $et->OverrideFileType($iWorkType) if $iWorkType;
634 1         7 last;
635             }
636             # all done if we processed this using Archive::Zip
637 5 50       18 if ($zip) {
638 5         12 delete $$dirInfo{ZIP};
639 5         31 delete $$et{DOC_NUM};
640 5 50 33     23 if ($docNum > 1 and not $et->Options('Duplicates')) {
641 0         0 $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
642             }
643 5         139 return 1;
644             }
645             #
646             # process the ZIP file by hand (funny, but this seems easier than using Archive::Zip)
647             #
648 0           $et->VPrint(1, " -- processing as binary data --\n");
649 0           $raf->Seek(30, 0);
650 0           $et->SetFileType();
651 0           SetByteOrder('II');
652              
653             # A. Local file header:
654             # local file header signature 0) 4 bytes (0x04034b50)
655             # version needed to extract 4) 2 bytes
656             # general purpose bit flag 6) 2 bytes
657             # compression method 8) 2 bytes
658             # last mod file time 10) 2 bytes
659             # last mod file date 12) 2 bytes
660             # crc-32 14) 4 bytes
661             # compressed size 18) 4 bytes
662             # uncompressed size 22) 4 bytes
663             # file name length 26) 2 bytes
664             # extra field length 28) 2 bytes
665 0           for (;;) {
666 0           my $len = Get16u(\$buff, 26) + Get16u(\$buff, 28);
667 0 0         $raf->Read($buf2, $len) == $len or last;
668              
669 0           $$et{DOC_NUM} = ++$docNum;
670 0           $buff .= $buf2;
671 0           my %dirInfo = (
672             DataPt => \$buff,
673             DataPos => $raf->Tell() - 30 - $len,
674             DataLen => 30 + $len,
675             DirStart => 0,
676             DirLen => 30 + $len,
677             MixedTags => 1, # (to ignore FileComment tag)
678             );
679 0           $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
680 0           my $flags = Get16u(\$buff, 6);
681 0 0         if ($flags & 0x08) {
682             # we don't yet support skipping stream mode data
683             # (when this happens, the CRC, compressed size and uncompressed
684             # sizes are set to 0 in the header. Instead, they are stored
685             # after the compressed data with an optional header of 0x08074b50)
686 0           $et->Warn('Stream mode data encountered, file list may be incomplete');
687 0           last;
688             }
689 0           $len = Get32u(\$buff, 18); # file data length
690 0 0         $raf->Seek($len, 1) or last; # skip file data
691 0 0 0       $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/ or last;
692             }
693 0           delete $$et{DOC_NUM};
694 0 0 0       if ($docNum > 1 and not $et->Options('Duplicates')) {
695 0           $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
696             }
697 0           return 1;
698             }
699              
700             1; # end
701              
702             __END__