File Coverage

blib/lib/Image/ExifTool/iWork.pm
Criterion Covered Total %
statement 65 80 81.2
branch 20 50 40.0
condition 1 3 33.3
subroutine 8 8 100.0
pod 0 3 0.0
total 94 144 65.2


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: iWork.pm
3             #
4             # Description: Read Apple iWork '09 XML+ZIP files
5             #
6             # Revisions: 2009/11/11 - P. Harvey Created
7             #------------------------------------------------------------------------------
8              
9             package Image::ExifTool::iWork;
10              
11 1     1   8 use strict;
  1         2  
  1         46  
12 1     1   5 use vars qw($VERSION);
  1         2  
  1         54  
13 1     1   6 use Image::ExifTool qw(:DataAccess :Utils);
  1         2  
  1         272  
14 1     1   8 use Image::ExifTool::XMP;
  1         1  
  1         38  
15 1     1   5 use Image::ExifTool::ZIP;
  1         3  
  1         1284  
16              
17             $VERSION = '1.06';
18              
19             # test for recognized iWork document extensions and outer XML elements
20             my %iWorkType = (
21             # file extensions
22             NUMBERS => 'NUMBERS',
23             PAGES => 'PAGES',
24             KEY => 'KEY',
25             KTH => 'KTH',
26             NMBTEMPLATE => 'NMBTEMPLATE',
27             # we don't support double extensions --
28             # "PAGES.TEMPLATE" => 'Apple Pages Template',
29             # outer XML elements
30             'ls:document' => 'NUMBERS',
31             'sl:document' => 'PAGES',
32             'key:presentation' => 'KEY',
33             );
34              
35             # MIME types for iWork files (Apple has not registered these yet, but these
36             # are my best guess after doing some googling. I'm not 100% sure what "sff"
37             # indicates, but I think it refers to the new "flattened" package format)
38             my %mimeType = (
39             'NUMBERS' => 'application/x-iwork-numbers-sffnumbers',
40             'PAGES' => 'application/x-iwork-pages-sffpages',
41             'KEY' => 'application/x-iWork-keynote-sffkey',
42             'NMBTEMPLATE' => 'application/x-iwork-numbers-sfftemplate',
43             'PAGES.TEMPLATE'=> 'application/x-iwork-pages-sfftemplate',
44             'KTH' => 'application/x-iWork-keynote-sffkth',
45             );
46              
47             # iWork tags
48             %Image::ExifTool::iWork::Main = (
49             GROUPS => { 0 => 'XML', 1 => 'XML', 2 => 'Document' },
50             PROCESS_PROC => \&Image::ExifTool::XMP::ProcessXMP,
51             VARS => { NO_ID => 1 },
52             NOTES => q{
53             The Apple iWork '09 file format is a ZIP archive containing XML files
54             similar to the Office Open XML (OOXML) format. Metadata tags in iWork
55             files are extracted even if they don't appear below.
56             },
57             authors => { Name => 'Author', Groups => { 2 => 'Author' } },
58             comment => { },
59             copyright => { Groups => { 2 => 'Author' } },
60             keywords => { },
61             projects => { List => 1 },
62             title => { },
63             );
64              
65             #------------------------------------------------------------------------------
66             # Generate a tag ID for this XML tag
67             # Inputs: 0) tag property name list ref
68             # Returns: tagID
69             sub GetTagID($)
70             {
71 8     8 0 13 my $props = shift;
72 8 100       25 return 0 if $$props[-1] =~ /^\w+:ID$/; # ignore ID tags
73 7 50       43 return $$props[0] =~ /^.*?:(.*)/ ? $1 : $$props[0];
74             }
75              
76             #------------------------------------------------------------------------------
77             # We found an XMP property name/value
78             # Inputs: 0) ExifTool object ref, 1) tag table ref
79             # 2) reference to array of XMP property names (last is current property)
80             # 3) property value, 4) attribute hash ref (not used here)
81             # Returns: 1 if valid tag was found
82             sub FoundTag($$$$;$)
83             {
84 8     8 0 20 my ($et, $tagTablePtr, $props, $val, $attrs) = @_;
85 8 50       19 return 0 unless @$props;
86 8         26 my $verbose = $et->Options('Verbose');
87              
88 8 50       18 $et->VPrint(0, " | - Tag '", join('/',@$props), "'\n") if $verbose > 1;
89              
90             # un-escape XML character entities
91 8         24 $val = Image::ExifTool::XMP::UnescapeXML($val);
92             # convert from UTF8 to ExifTool Charset
93 8         25 $val = $et->Decode($val, 'UTF8');
94 8 100       18 my $tag = GetTagID($props) or return 0;
95              
96             # add any unknown tags to table
97 7 50       25 unless ($$tagTablePtr{$tag}) {
98 0 0       0 $et->VPrint(0, " [adding $tag]\n") if $verbose;
99 0         0 AddTagToTable($tagTablePtr, $tag, { Name => ucfirst $tag });
100             }
101             # save the tag
102 7         23 $et->HandleTag($tagTablePtr, $tag, $val);
103              
104 7         18 return 1;
105             }
106              
107             #------------------------------------------------------------------------------
108             # Extract information from an iWork file
109             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
110             # Returns: 1
111             # Notes: Upon entry to this routine, the file type has already been verified
112             # as ZIP and the dirInfo hash contains a 'ZIP' Archive::Zip object reference
113             sub Process_iWork($$)
114             {
115 1     1 0 4 my ($et, $dirInfo) = @_;
116 1         3 my $zip = $$dirInfo{ZIP};
117 1         2 my ($type, $index, $indexFile, $status);
118              
119             # try to determine the file type
120 1         5 local $SIG{'__WARN__'} = \&Image::ExifTool::ZIP::WarnProc;
121             # trust type given by file extension if available
122 1 50       6 $type = $iWorkType{$$et{FILE_EXT}} if $$et{FILE_EXT};
123 1 50       114 unless ($type) {
124             # read the index file
125 0         0 my @members = $zip->membersMatching('^index\.(xml|apxl)$');
126 0 0       0 if (@members) {
127 0         0 ($index, $status) = $zip->contents($members[0]);
128 0 0       0 unless ($status) {
129 0         0 $indexFile = $members[0]->fileName();
130 0 0       0 if ($index =~ /^\s*<\?xml version=[^<]+<(\w+:\w+)/s) {
131 0 0       0 $type = $iWorkType{$1} if $iWorkType{$1};
132             }
133             }
134             } else {
135 0         0 @members = $zip->membersMatching('(?i)^.*\.(pages|numbers|key)/Index.*');
136 0 0       0 if (@members) {
137 0         0 my $tmp = $members[0]->fileName();
138 0 0       0 $type = $iWorkType{uc $1} if $tmp =~ /\.(pages|numbers|key)/i;
139             }
140             }
141 0 0       0 $type or $type = 'ZIP'; # assume ZIP by default
142             }
143 1         12 $et->SetFileType($type, $mimeType{$type});
144              
145 1         5 my @members = $zip->members();
146 1         8 my $docNum = 0;
147 1         2 my $member;
148 1         2 foreach $member (@members) {
149             # get filename of this ZIP member
150 4         12 my $file = $member->fileName();
151 4 50       34 next unless defined $file;
152 4         18 $et->VPrint(0, "File: $file\n");
153             # set the document number and extract ZIP tags
154 4         9 $$et{DOC_NUM} = ++$docNum;
155 4         13 Image::ExifTool::ZIP::HandleMember($et, $member);
156              
157             # process only the index XML and JPEG thumbnail/preview files
158 4 100       26 next unless $file =~ m{^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+/preview(-micro|-web)?.jpg)$}i;
159             # get the file contents if necessary
160             # (CAREFUL! $buff MUST be local since we hand off a value ref to PreviewImage)
161 2         5 my ($buff, $buffPt);
162 2 50 33     6 if ($indexFile and $indexFile eq $file) {
163             # use the index file we already loaded
164 0         0 $buffPt = \$index;
165             } else {
166 2         13 ($buff, $status) = $zip->contents($member);
167 2 50       2251 $status and $et->Warn("Error extracting $file"), next;
168 2         4 $buffPt = \$buff;
169             }
170             # extract JPEG as PreviewImage (should only be QuickLook/Thumbnail.jpg)
171 2 100       9 if ($file =~ /\.jpg$/) {
172 1 0       4 my $type = ($file =~ /preview-(\w+)/) ? ($1 eq 'web' ? 'Other' : 'Thumbnail') : 'Preview';
    50          
173 1         6 $et->FoundTag($type . 'Image', $buffPt);
174 1         4 next;
175             }
176             # process "metadata" section of XML index file
177 1 50       10 next unless $$buffPt =~ /<(\w+):metadata>/g;
178 1         6 my $ns = $1;
179 1         2 my $p1 = pos $$buffPt;
180 1 50       16 next unless $$buffPt =~ m{}g;
181             # construct XML data from "metadata" section only
182 1         5 $$buffPt = '' . substr($$buffPt, $p1, pos($$buffPt)-$p1);
183 1         11 my %dirInfo = (
184             DataPt => $buffPt,
185             DirLen => length $$buffPt,
186             DataLen => length $$buffPt,
187             XMPParseOpts => {
188             FoundProc => \&FoundTag,
189             },
190             );
191 1         4 my $tagTablePtr = GetTagTable('Image::ExifTool::iWork::Main');
192 1         6 $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
193 1         6 undef $$buffPt; # (free memory now)
194             }
195 1         3 delete $$et{DOC_NUM};
196 1         6 return 1;
197             }
198              
199             1; # end
200              
201             __END__