File Coverage

blib/lib/Image/ExifTool/PostScript.pm
Criterion Covered Total %
statement 197 319 61.7
branch 126 242 52.0
condition 21 66 31.8
subroutine 12 15 80.0
pod 0 11 0.0
total 356 653 54.5


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: PostScript.pm
3             #
4             # Description: Read PostScript meta information
5             #
6             # Revisions: 07/08/2005 - P. Harvey Created
7             #
8             # References: 1) http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf
9             # 2) http://partners.adobe.com/public/developer/en/ps/5001.DSC_Spec.pdf
10             # 3) http://partners.adobe.com/public/developer/en/illustrator/sdk/AI7FileFormat.pdf
11             #------------------------------------------------------------------------------
12              
13             package Image::ExifTool::PostScript;
14              
15 29     29   4759 use strict;
  29         82  
  29         1162  
16 29     29   175 use vars qw($VERSION $AUTOLOAD);
  29         72  
  29         1606  
17 29     29   210 use Image::ExifTool qw(:DataAccess :Utils);
  29         84  
  29         142451  
18              
19             $VERSION = '1.44';
20              
21             sub WritePS($$);
22             sub ProcessPS($$;$);
23              
24             # PostScript tag table
25             %Image::ExifTool::PostScript::Main = (
26             PROCESS_PROC => \&ProcessPS,
27             WRITE_PROC => \&WritePS,
28             PREFERRED => 1, # always add these tags when writing
29             GROUPS => { 2 => 'Image' },
30             # Note: Make all of these tags priority 0 since the first one found at
31             # the start of the file should take priority (in case multiples exist)
32             Author => { Priority => 0, Groups => { 2 => 'Author' }, Writable => 'string' },
33             BoundingBox => { Priority => 0 },
34             Copyright => { Priority => 0, Writable => 'string' }, #2
35             CreationDate => {
36             Name => 'CreateDate',
37             Priority => 0,
38             Groups => { 2 => 'Time' },
39             Writable => 'string',
40             PrintConv => '$self->ConvertDateTime($val)',
41             PrintConvInv => '$self->InverseDateTime($val)',
42             },
43             Creator => { Priority => 0, Writable => 'string' },
44             ImageData => { Priority => 0 },
45             For => { Priority => 0, Writable => 'string', Notes => 'for whom the document was prepared'},
46             Keywords => { Priority => 0, Writable => 'string' },
47             ModDate => {
48             Name => 'ModifyDate',
49             Priority => 0,
50             Groups => { 2 => 'Time' },
51             Writable => 'string',
52             PrintConv => '$self->ConvertDateTime($val)',
53             PrintConvInv => '$self->InverseDateTime($val)',
54             },
55             Pages => { Priority => 0 },
56             Routing => { Priority => 0, Writable => 'string' }, #2
57             Subject => { Priority => 0, Writable => 'string' },
58             Title => { Priority => 0, Writable => 'string' },
59             Version => { Priority => 0, Writable => 'string' }, #2
60             # these subdirectories for documentation only
61             BeginPhotoshop => {
62             Name => 'PhotoshopData',
63             SubDirectory => {
64             TagTable => 'Image::ExifTool::Photoshop::Main',
65             },
66             },
67             BeginICCProfile => {
68             Name => 'ICC_Profile',
69             SubDirectory => {
70             TagTable => 'Image::ExifTool::ICC_Profile::Main',
71             },
72             },
73             begin_xml_packet => {
74             Name => 'XMP',
75             SubDirectory => {
76             TagTable => 'Image::ExifTool::XMP::Main',
77             },
78             },
79             TIFFPreview => {
80             Groups => { 2 => 'Preview' },
81             Binary => 1,
82             Notes => q{
83             not a real tag ID, but used to represent the TIFF preview extracted from DOS
84             EPS images
85             },
86             },
87             BeginDocument => {
88             Name => 'EmbeddedFile',
89             SubDirectory => {
90             TagTable => 'Image::ExifTool::PostScript::Main',
91             },
92             Notes => 'extracted with L option',
93             },
94             EmbeddedFileName => {
95             Notes => q{
96             not a real tag ID, but the file name from a BeginDocument statement.
97             Extracted with document metadata when L option is used
98             },
99             },
100             # AI metadata (most with a single leading '%')
101             AI9_ColorModel => {
102             Name => 'AIColorModel',
103             PrintConv => {
104             1 => 'RGB',
105             2 => 'CMYK',
106             },
107             },
108             AI3_ColorUsage => { Name => 'AIColorUsage' },
109             AI5_RulerUnits => {
110             Name => 'AIRulerUnits',
111             PrintConv => {
112             0 => 'Inches',
113             1 => 'Millimeters',
114             2 => 'Points',
115             3 => 'Picas',
116             4 => 'Centimeters',
117             6 => 'Pixels',
118             },
119             },
120             AI5_TargetResolution => { Name => 'AITargetResolution' },
121             AI5_NumLayers => { Name => 'AINumLayers' },
122             AI5_FileFormat => { Name => 'AIFileFormat' },
123             AI8_CreatorVersion => { Name => 'AICreatorVersion' }, # (double leading '%')
124             AI12_BuildNumber => { Name => 'AIBuildNumber' },
125             );
126              
127             # composite tags
128             %Image::ExifTool::PostScript::Composite = (
129             GROUPS => { 2 => 'Image' },
130             # BoundingBox is in points, not pixels,
131             # but use it anyway if ImageData is not available
132             ImageWidth => {
133             Desire => {
134             0 => 'Main:PostScript:ImageData',
135             1 => 'PostScript:BoundingBox',
136             },
137             ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 0)',
138             },
139             ImageHeight => {
140             Desire => {
141             0 => 'Main:PostScript:ImageData',
142             1 => 'PostScript:BoundingBox',
143             },
144             ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 1)',
145             },
146             );
147              
148             # add our composite tags
149             Image::ExifTool::AddCompositeTags('Image::ExifTool::PostScript');
150              
151             #------------------------------------------------------------------------------
152             # AutoLoad our writer routines when necessary
153             #
154             sub AUTOLOAD
155             {
156 19     19   142 return Image::ExifTool::DoAutoLoad($AUTOLOAD, @_);
157             }
158              
159             #------------------------------------------------------------------------------
160             # Is this a PC system
161             # Returns: true for PC systems
162             my %isPC = (MSWin32 => 1, os2 => 1, dos => 1, NetWare => 1, symbian => 1, cygwin => 1);
163             sub IsPC()
164             {
165 0     0 0 0 return $isPC{$^O};
166             }
167              
168             #------------------------------------------------------------------------------
169             # Get image width or height
170             # Inputs: 0) value list ref (ImageData, BoundingBox), 1) true to get height
171             sub ImageSize($$)
172             {
173 4     4 0 15 my ($vals, $getHeight) = @_;
174 4         12 my ($w, $h);
175 4 50 33     37 if ($$vals[0] and $$vals[0] =~ /^(\d+) (\d+)/) {
    0 0        
176 4         17 ($w, $h) = ($1, $2);
177             } elsif ($$vals[1] and $$vals[1] =~ /^(\d+) (\d+) (\d+) (\d+)/) {
178 0         0 ($w, $h) = ($3 - $1, $4 - $2);
179             }
180 4 100       68 return $getHeight ? $h : $w;
181             }
182              
183             #------------------------------------------------------------------------------
184             # Set PostScript format error warning
185             # Inputs: 0) ExifTool object reference, 1) error string
186             # Returns: 1
187             sub PSErr($$)
188             {
189 0     0 0 0 my ($et, $str) = @_;
190             # set file type if not done already
191 0         0 my $ext = $$et{FILE_EXT};
192 0 0 0     0 $et->SetFileType(($ext and $ext eq 'AI') ? 'AI' : 'PS');
193 0         0 $et->Warn("PostScript format error ($str)");
194 0         0 return 1;
195             }
196              
197             #------------------------------------------------------------------------------
198             # Return input record separator to use for the specified file
199             # Inputs: 0) RAF reference
200             # Returns: Input record separator or undef on error
201             sub GetInputRecordSeparator($)
202             {
203 8     8 0 27 my $raf = shift;
204 8         35 my $pos = $raf->Tell(); # save current position
205 8         30 my ($data, $sep);
206 8 50       45 $raf->Read($data,256) or return undef;
207 8         32 my ($a, $d) = (999,999);
208 8 100       114 $a = pos($data), pos($data) = 0 if $data =~ /\x0a/g;
209 8 100       51 $d = pos($data) if $data =~ /\x0d/g;
210 8         23 my $diff = $a - $d;
211 8 50       59 if ($diff == 1) {
    50          
    100          
    100          
212 0         0 $sep = "\x0d\x0a";
213             } elsif ($diff == -1) {
214 0         0 $sep = "\x0a\x0d";
215             } elsif ($diff > 0) {
216 3         7 $sep = "\x0d";
217             } elsif ($diff < 0) {
218 4         8 $sep = "\x0a";
219             } # else error
220 8         46 $raf->Seek($pos, 0); # restore original position
221 8         83 return $sep;
222             }
223              
224             #------------------------------------------------------------------------------
225             # Split into lines ending in any CR, LF or CR+LF combination
226             # (this is annoying, and could be avoided if EPS files didn't mix linefeeds!)
227             # Inputs: 0) data pointer, 1) reference to lines array
228             # Notes: Fills @$lines with lines from splitting $$dataPt
229             sub SplitLine($$)
230             {
231 1     1 0 5 my ($dataPt, $lines) = @_;
232 1         3 for (;;) {
233 108         145 my $endl;
234             # find the position of the first LF (\x0a)
235 108 100       314 $endl = pos($$dataPt), pos($$dataPt) = 0 if $$dataPt =~ /\x0a/g;
236 108 50       263 if ($$dataPt =~ /\x0d/g) { # find the first CR (\x0d)
    0          
237 108 100       185 if (defined $endl) {
238             # (remember, CR+LF is a DOS newline...)
239 107 50       209 $endl = pos($$dataPt) if pos($$dataPt) < $endl - 1;
240             } else {
241 1         3 $endl = pos($$dataPt);
242             }
243             } elsif (not defined $endl) {
244 0         0 push @$lines, $$dataPt;
245 0         0 last;
246             }
247 108 100       171 if (length $$dataPt == $endl) {
248 1         3 push @$lines, $$dataPt;
249 1         4 last;
250             } else {
251             # continue to split into separate lines
252 107         238 push @$lines, substr($$dataPt, 0, $endl);
253 107         298 $$dataPt = substr($$dataPt, $endl);
254             }
255             }
256             }
257              
258             #------------------------------------------------------------------------------
259             # check to be sure we haven't read past end of PS data in DOS-style file
260             # Inputs: 0) RAF ref (with PSEnd member), 1) data ref
261             # - modifies data and sets RAF to EOF if end of PS is reached
262             sub CheckPSEnd($$)
263             {
264 0     0 0 0 my ($raf, $dataPt) = @_;
265 0         0 my $pos = $raf->Tell();
266 0 0       0 if ($pos >= $$raf{PSEnd}) {
267 0         0 $raf->Seek(0, 2); # seek to end of file so we can't read any more
268 0 0       0 $$dataPt = substr($$dataPt, 0, length($$dataPt) - $pos + $$raf{PSEnd}) if $pos > $$raf{PSEnd};
269             }
270             }
271              
272             #------------------------------------------------------------------------------
273             # Read next line from EPS file
274             # Inputs: 0) RAF ref (with PSEnd member if Postscript ends before end of file)
275             # 1) array of lines from file
276             # Returns: true on success
277             sub GetNextLine($$)
278             {
279 114     114 0 215 my ($raf, $lines) = @_;
280 114         170 my ($data, $changedNL);
281 114 100       281 my $altnl = ($/ eq "\x0d") ? "\x0a" : "\x0d";
282 114         152 for (;;) {
283 114 50       271 $raf->ReadLine($data) or last;
284 114 50       250 $$raf{PSEnd} and CheckPSEnd($raf, \$data);
285             # split line if it contains other newline sequences
286 114 100       313 if ($data =~ /$altnl/) {
287 1 50 33     7 if (length($data) > 500000 and IsPC()) {
288             # patch for Windows memory problem
289 0 0       0 unless ($changedNL) {
290 0         0 $changedNL = $/;
291 0         0 $/ = $altnl;
292 0         0 $altnl = $changedNL;
293 0         0 $raf->Seek(-length($data), 1);
294 0         0 next;
295             }
296             } else {
297             # split into separate lines
298             # push @$lines, split /$altnl/, $data, -1;
299             # if (@$lines == 2 and $$lines[1] eq $/) {
300             # # handle case of DOS newline data inside file using Unix newlines
301             # $$lines[0] .= pop @$lines;
302             # }
303             # split into separate lines if necessary
304 1         6 SplitLine(\$data, $lines);
305             }
306             } else {
307 113         250 push @$lines, $data;
308             }
309 114 50       250 $/ = $changedNL if $changedNL;
310 114         448 return 1;
311             }
312 0         0 return 0;
313             }
314              
315             #------------------------------------------------------------------------------
316             # Decode comment from PostScript file
317             # Inputs: 0) comment string, 1) RAF ref, 2) reference to lines array
318             # 3) optional data reference for extra lines read from file
319             # Returns: Decoded comment string (may be an array reference)
320             # - handles multi-line comments and escape sequences
321             sub DecodeComment($$$;$)
322             {
323 31     31 0 99 my ($val, $raf, $lines, $dataPt) = @_;
324 31         208 $val =~ s/\x0d*\x0a*$//; # remove trailing CR, LF or CR/LF
325             # check for continuation comments
326 31         70 for (;;) {
327 31 50 33     117 @$lines or GetNextLine($raf, $lines) or last;
328 31 50       102 last unless $$lines[0] =~ /^%%\+/; # is the next line a continuation?
329 0 0       0 $$dataPt .= $$lines[0] if $dataPt; # add to data if necessary
330 0         0 $$lines[0] =~ s/\x0d*\x0a*$//; # remove trailing CR, LF or CR/LF
331 0         0 $val .= substr(shift(@$lines), 3); # add to value (without leading "%%+")
332             }
333 31         51 my @vals;
334             # handle bracketed string values
335 31 100       95 if ($val =~ s/^\((.*)\)$/$1/) { # remove brackets if necessary
336             # split into an array of strings if necessary
337 3         5 my $nesting = 1;
338 3         18 while ($val =~ /(\(|\))/g) {
339 0         0 my $bra = $1;
340 0         0 my $pos = pos($val) - 2;
341 0         0 my $backslashes = 0;
342 0   0     0 while ($pos and substr($val, $pos, 1) eq '\\') {
343 0         0 --$pos;
344 0         0 ++$backslashes;
345             }
346 0 0       0 next if $backslashes & 0x01; # escaped if odd number
347 0 0       0 if ($bra eq '(') {
348 0         0 ++$nesting;
349             } else {
350 0         0 --$nesting;
351 0 0       0 unless ($nesting) {
352 0         0 push @vals, substr($val, 0, pos($val)-1);
353 0         0 $val = substr($val, pos($val));
354 0 0       0 ++$nesting if $val =~ s/\s*\(//;
355             }
356             }
357             }
358 3         7 push @vals, $val;
359 3         8 foreach $val (@vals) {
360             # decode escape sequences in bracketed strings
361             # (similar to code in PDF.pm, but without line continuation)
362 3         12 while ($val =~ /\\(.)/sg) {
363 0         0 my $n = pos($val) - 2;
364 0         0 my $c = $1;
365 0         0 my $r;
366 0 0       0 if ($c =~ /[0-7]/) {
367             # get up to 2 more octal digits
368 0 0       0 $c .= $1 if $val =~ /\G([0-7]{1,2})/g;
369             # convert octal escape code
370 0         0 $r = chr(oct($c) & 0xff);
371             } else {
372             # convert escaped characters
373 0         0 ($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
374             }
375 0         0 substr($val, $n, length($c)+1) = $r;
376             # continue search after this character
377 0         0 pos($val) = $n + length($r);
378             }
379             }
380 3 50       10 $val = @vals > 1 ? \@vals : $vals[0];
381             }
382 31         82 return $val;
383             }
384              
385             #------------------------------------------------------------------------------
386             # Unescape PostScript string
387             # Inputs: 0) string
388             # Returns: unescaped string
389             sub UnescapePostScript($)
390             {
391 10     10 0 23 my $str = shift;
392             # decode escape sequences in literal strings
393 10         34 while ($str =~ /\\(.)/sg) {
394 12         24 my $n = pos($str) - 2;
395 12         20 my $c = $1;
396 12         18 my $r;
397 12 100       35 if ($c =~ /[0-7]/) {
    50          
    50          
398             # get up to 2 more octal digits
399 11 50       39 $c .= $1 if $str =~ /\G([0-7]{1,2})/g;
400             # convert octal escape code
401 11         28 $r = chr(oct($c) & 0xff);
402             } elsif ($c eq "\x0d") {
403             # the string is continued if the line ends with '\'
404             # (also remove "\x0d\x0a")
405 0 0       0 $c .= $1 if $str =~ /\G(\x0a)/g;
406 0         0 $r = '';
407             } elsif ($c eq "\x0a") {
408 0         0 $r = '';
409             } else {
410             # convert escaped characters
411 1         5 ($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
412             }
413 12         27 substr($str, $n, length($c)+1) = $r;
414             # continue search after this character
415 12         47 pos($str) = $n + length($r);
416             }
417 10         24 return $str;
418             }
419              
420             #------------------------------------------------------------------------------
421             # Extract information from EPS, PS or AI file
422             # Inputs: 0) ExifTool object reference, 1) dirInfo reference, 2) optional tag table ref
423             # Returns: 1 if this was a valid PostScript file
424             sub ProcessPS($$;$)
425             {
426 4     4 0 16 my ($et, $dirInfo, $tagTablePtr) = @_;
427 4         10 my $raf = $$dirInfo{RAF};
428 4         19 my $embedded = $et->Options('ExtractEmbedded');
429 4         24 my ($data, $dos, $endDoc, $fontTable, $comment);
430              
431             # allow read from data
432 4 50       18 unless ($raf) {
433 0         0 $raf = new File::RandomAccess($$dirInfo{DataPt});
434 0         0 $et->VerboseDir('PostScript');
435             }
436             #
437             # determine if this is a postscript file
438             #
439 4 50       17 $raf->Read($data, 4) == 4 or return 0;
440             # accept either ASCII or DOS binary postscript file format
441 4 50       37 return 0 unless $data =~ /^(%!PS|%!Ad|%!Fo|\xc5\xd0\xd3\xc6)/;
442 4 50       28 if ($data =~ /^%!Ad/) {
    50          
443             # I've seen PS files start with "%!Adobe-PS"...
444 0 0 0     0 return 0 unless $raf->Read($data, 6) == 6 and $data eq "obe-PS";
445             } elsif ($data =~ /^\xc5\xd0\xd3\xc6/) {
446             # process DOS binary file header
447             # - save DOS header then seek ahead and check PS header
448 0 0       0 $raf->Read($dos, 26) == 26 or return 0;
449 0         0 SetByteOrder('II');
450 0         0 my $psStart = Get32u(\$dos, 0);
451 0 0 0     0 unless ($raf->Seek($psStart, 0) and
      0        
452             $raf->Read($data, 4) == 4 and $data eq '%!PS')
453             {
454 0         0 return PSErr($et, 'invalid header');
455             }
456 0         0 $$raf{PSEnd} = $psStart + Get32u(\$dos, 4); # set end of PostScript data in RAF
457             } else {
458             # check for PostScript font file (PFA or PFB)
459 4         9 my $d2;
460 4 50       18 $data .= $d2 if $raf->Read($d2,12);
461 4 100       39 if ($data =~ /^%!(PS-(AdobeFont-|Bitstream )|FontType1-)/) {
462 2         11 $et->SetFileType('PFA'); # PostScript ASCII font file
463 2         9 $fontTable = GetTagTable('Image::ExifTool::Font::PSInfo');
464             # PostScript font files may contain an unformatted comments which may
465             # contain useful information, so accumulate these for the Comment tag
466 2         5 $comment = 1;
467             }
468 4         33 $raf->Seek(-length($data), 1);
469             }
470             #
471             # set the newline type based on the first newline found in the file
472             #
473 4         34 local $/ = GetInputRecordSeparator($raf);
474 4 50       18 $/ or return PSErr($et, 'invalid PS data');
475              
476             # set file type (PostScript or EPS)
477 4 50       19 $raf->ReadLine($data) or $data = '';
478 4         13 my $type;
479 4 100       23 if ($data =~ /EPSF/) {
480 2         6 $type = 'EPS';
481             } else {
482             # read next line to see if this is an Illustrator file
483 2         3 my $line2;
484 2         8 my $pos = $raf->Tell();
485 2 50 33     8 if ($raf->ReadLine($line2) and $line2 =~ /^%%Creator: Adobe Illustrator/) {
486 0         0 $type = 'AI';
487             } else {
488 2         5 $type = 'PS';
489             }
490 2         7 $raf->Seek($pos, 0);
491             }
492 4         52 $et->SetFileType($type);
493 4 50 33     20 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
494             #
495             # extract TIFF information from DOS header
496             #
497 4 50       22 $tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::PostScript::Main');
498 4 50       15 if ($dos) {
499 0         0 my $base = Get32u(\$dos, 16);
500 0 0       0 if ($base) {
501 0         0 my $pos = $raf->Tell();
502             # extract the TIFF preview
503 0         0 my $len = Get32u(\$dos, 20);
504 0         0 my $val = $et->ExtractBinary($base, $len, 'TIFFPreview');
505 0 0 0     0 if (defined $val and $val =~ /^(MM\0\x2a|II\x2a\0|Binary)/) {
506 0         0 $et->HandleTag($tagTablePtr, 'TIFFPreview', $val);
507             } else {
508 0         0 $et->Warn('Bad TIFF preview image');
509             }
510             # extract information from TIFF in DOS header
511             # (set Parent to '' to avoid setting FileType tag again)
512 0         0 my %dirInfo = (
513             Parent => '',
514             RAF => $raf,
515             Base => $base,
516             );
517 0 0       0 $et->ProcessTIFF(\%dirInfo) or $et->Warn('Bad embedded TIFF');
518             # position file pointer to extract PS information
519 0         0 $raf->Seek($pos, 0);
520             }
521             }
522             #
523             # parse the postscript
524             #
525 4         17 my ($buff, $mode, $beginToken, $endToken, $docNum, $subDocNum, $changedNL);
526 4         0 my (@lines, $altnl);
527 4 100       25 if ($/ eq "\x0d") {
528 2         6 $altnl = "\x0a";
529             } else {
530 2         6 $/ = "\x0a"; # end on any LF (even if DOS CR+LF)
531 2         5 $altnl = "\x0d";
532             }
533 4         7 for (;;) {
534 725 100       1239 if (@lines) {
535 243         354 $data = shift @lines;
536             } else {
537 482 100       1031 $raf->ReadLine($data) or last;
538             # check for alternate newlines as efficiently as possible
539 478 100       1364 if ($data =~ /$altnl/) {
540 2 50 33     13 if (length($data) > 500000 and IsPC()) {
541             # Windows can't split very long lines due to poor memory handling,
542             # so re-read the file with the other newline character instead
543             # (slower but uses less memory)
544 0 0       0 unless ($changedNL) {
545 0         0 $changedNL = 1;
546 0         0 my $t = $/;
547 0         0 $/ = $altnl;
548 0         0 $altnl = $t;
549 0         0 $raf->Seek(-length($data), 1);
550 0         0 next;
551             }
552             } else {
553             # split into separate lines
554 2         96 @lines = split /$altnl/, $data, -1;
555 2         9 $data = shift @lines;
556 2 50 33     14 if (@lines == 1 and $lines[0] eq $/) {
557             # handle case of DOS newline data inside file using Unix newlines
558 0         0 $data .= $lines[0];
559 0         0 undef @lines;
560             }
561             }
562             }
563             }
564 721         1048 undef $changedNL;
565 721 100 100     2953 if ($mode) {
    100 66        
    100 66        
    50          
    100          
    50          
    100          
566 306 50       871 if (not $endToken) {
    100          
    100          
567 0         0 $buff .= $data;
568 0 0       0 next unless $data =~ m{<\?xpacket end=.(w|r).\?>(\n|\r|$)};
569             } elsif ($data !~ /^$endToken/i) {
570 298 100       564 if ($mode eq 'XMP') {
    100          
571 216         320 $buff .= $data;
572             } elsif ($mode eq 'Document') {
573             # ignore embedded documents, but keep track of nesting level
574 29 100       95 $docNum .= '-1' if $data =~ /^$beginToken/;
575             } else {
576             # data is ASCII-hex encoded
577 53         94 $data =~ tr/0-9A-Fa-f//dc; # remove all but hex characters
578 53         153 $buff .= pack('H*', $data); # translate from hex
579             }
580 298         466 next;
581             } elsif ($mode eq 'Document') {
582 4         17 $docNum =~ s/-?\d+$//; # decrement document nesting level
583             # done with Document mode if we are back at the top level
584 4 100       13 undef $mode unless $docNum;
585 4         7 next;
586             }
587             } elsif ($endDoc and $data =~ /^$endDoc/i) {
588 4         17 $docNum =~ s/-?(\d+)$//; # decrement nesting level
589 4         11 $subDocNum = $1; # remember our last sub-document number
590 4         10 $$et{DOC_NUM} = $docNum;
591 4 100       11 undef $endDoc unless $docNum; # done with document if top level
592 4         11 next;
593             } elsif ($data =~ /^(%{1,2})(Begin)(_xml_packet|Photoshop|ICCProfile|Document|Binary)/i) {
594             # the beginning of a data block
595 13         82 my %modeLookup = (
596             _xml_packet => 'XMP',
597             photoshop => 'Photoshop',
598             iccprofile => 'ICC_Profile',
599             document => 'Document',
600             binary => undef, # (we will try to skip this)
601             );
602 13         72 $mode = $modeLookup{lc $3};
603 13 100       39 unless ($mode) {
604 2 50 33     22 if (not @lines and $data =~ /^%{1,2}BeginBinary:\s*(\d+)/i) {
605 2 50       22 $raf->Seek($1, 1) or last; # skip binary data
606             }
607 2         10 next;
608             }
609 11         17 $buff = '';
610 11         39 $beginToken = $1 . $2 . $3;
611 11 100       55 $endToken = $1 . ($2 eq 'begin' ? 'end' : 'End') . $3;
612 11 100       34 if ($mode eq 'Document') {
613             # this is either the 1st sub-document or Nth document
614 7 100       14 if ($docNum) {
615             # increase nesting level
616 1         4 $docNum .= '-' . (++$subDocNum);
617             } else {
618             # this is the Nth document
619 6         16 $docNum = $$et{DOC_COUNT} + 1;
620             }
621 7         13 $subDocNum = 0; # new level, so reset subDocNum
622 7 100       19 next unless $embedded; # skip over this document
623             # set document number for family 4-7 group names
624 4         9 $$et{DOC_NUM} = $docNum;
625 4         13 $$et{LIST_TAGS} = { }; # don't build lists across different documents
626 4         11 $$et{PROCESSED} = { }; # re-initialize processed directory lookup too
627 4         6 $endDoc = $endToken; # parse to EndDocument token
628             # reset mode to allow parsing into sub-directories
629 4         9 undef $endToken;
630 4         6 undef $mode;
631             # save document name if available
632 4 50       49 if ($data =~ /^$beginToken:\s+([^\n\r]+)/i) {
633 4         12 my $docName = $1;
634             # remove brackets if necessary
635 4 50       23 $docName = $1 if $docName =~ /^\((.*)\)$/;
636 4         18 $et->HandleTag($tagTablePtr, 'EmbeddedFileName', $docName);
637             }
638             }
639 8         1002 next;
640             } elsif ($data =~ /^<\?xpacket begin=.{7,13}W5M0MpCehiHzreSzNTczkc9d/) {
641             # pick up any stray XMP data
642 0         0 $mode = 'XMP';
643 0         0 $buff = $data;
644 0         0 undef $endToken; # no end token (just look for xpacket end)
645             # XMP could be contained in a single line (if newlines are different)
646 0 0       0 next unless $data =~ m{<\?xpacket end=.(w|r).\?>(\n|\r|$)};
647             } elsif ($data =~ /^%%?(\w+): ?(.*)/s and $$tagTablePtr{$1}) {
648 29         109 my ($tag, $val) = ($1, $2);
649             # only allow 'ImageData' and AI tags to have single leading '%'
650 29 50 66     149 next unless $data =~ /^%(%|AI\d+_)/ or $tag eq 'ImageData';
651             # decode comment string (reading continuation lines if necessary)
652 29         88 $val = DecodeComment($val, $raf, \@lines);
653 29         118 $et->HandleTag($tagTablePtr, $tag, $val);
654 29         61 next;
655             } elsif ($embedded and $data =~ /^%AI12_CompressedData/) {
656             # the rest of the file is compressed
657 0 0       0 unless (eval { require Compress::Zlib }) {
  0         0  
658 0         0 $et->Warn('Install Compress::Zlib to extract compressed embedded data');
659 0         0 last;
660             }
661             # seek back to find the start of the compressed data in the file
662 0         0 my $tlen = length($data) + @lines;
663 0         0 $tlen += length $_ foreach @lines;
664 0         0 my $backTo = $raf->Tell() - $tlen - 64;
665 0 0       0 $backTo = 0 if $backTo < 0;
666 0 0 0     0 last unless $raf->Seek($backTo, 0) and $raf->Read($data, 2048);
667 0 0       0 last unless $data =~ s/.*?%AI12_CompressedData//;
668 0         0 my $inflate = Compress::Zlib::inflateInit();
669 0 0       0 $inflate or $et->Warn('Error initializing inflate'), last;
670             # generate a PS-like file in memory from the compressed data
671 0         0 my $verbose = $et->Options('Verbose');
672 0 0       0 if ($verbose > 1) {
673 0         0 $et->VerboseDir('AI12_CompressedData (first 4kB)');
674 0         0 $et->VerboseDump(\$data);
675             }
676             # remove header if it exists (Windows AI files only)
677 0         0 $data =~ s/^.{0,256}EndData[\x0d\x0a]+//s;
678 0         0 my $val;
679 0         0 for (;;) {
680 0         0 my ($v2, $stat) = $inflate->inflate($data);
681 0 0       0 $stat == Compress::Zlib::Z_STREAM_END() and $val .= $v2, last;
682 0 0       0 $stat != Compress::Zlib::Z_OK() and undef($val), last;
683 0 0       0 if (defined $val) {
    0          
684 0         0 $val .= $v2;
685             } elsif ($v2 =~ /^%!PS/) {
686 0         0 $val = $v2;
687             } else {
688             # add postscript header (for file recognition) if it doesn't exist
689 0         0 $val = "%!PS-Adobe-3.0$/" . $v2;
690             }
691 0 0       0 $raf->Read($data, 65536) or last;
692             }
693 0 0       0 defined $val or $et->Warn('Error inflating AI compressed data'), last;
694 0 0       0 if ($verbose > 1) {
695 0         0 $et->VerboseDir('Uncompressed AI12 Data');
696 0         0 $et->VerboseDump(\$val);
697             }
698             # extract information from embedded images in the uncompressed data
699             $val = # add PS header in case it needs one
700 0         0 ProcessPS($et, { DataPt => \$val });
701 0         0 last;
702             } elsif ($fontTable) {
703 58 100       109 if (defined $comment) {
704             # extract initial comments from PostScript Font files
705 10 100       58 if ($data =~ /^%\s+(.*?)[\x0d\x0a]/) {
    100          
706 4 50       13 $comment .= "\n" if $comment;
707 4         11 $comment .= $1;
708 4         8 next;
709             } elsif ($data !~ /^%/) {
710             # stop extracting comments at the first non-comment line
711 2 50       11 $et->FoundTag('Comment', $comment) if length $comment;
712 2         5 undef $comment;
713             }
714             }
715 54 100 100     366 if ($data =~ m{^\s*/(\w+)\s*(.*)} and $$fontTable{$1}) {
    100          
716 24         83 my ($tag, $val) = ($1, $2);
717 24 100       86 if ($val =~ /^\((.*)\)/) {
    50          
718 10         24 $val = UnescapePostScript($1);
719             } elsif ($val =~ m{/?(\S+)}) {
720 14         34 $val = $1;
721             }
722 24         67 $et->HandleTag($fontTable, $tag, $val);
723             } elsif ($data =~ /^currentdict end/) {
724             # only extract tags from initial FontInfo dict
725 2         5 undef $fontTable;
726             }
727 54         88 next;
728             } else {
729 311         494 next;
730             }
731             # extract information from buffered data
732 4         27 my %dirInfo = (
733             DataPt => \$buff,
734             DataLen => length $buff,
735             DirStart => 0,
736             DirLen => length $buff,
737             Parent => 'PostScript',
738             );
739 4         21 my $subTablePtr = GetTagTable("Image::ExifTool::${mode}::Main");
740 4 50       35 unless ($et->ProcessDirectory(\%dirInfo, $subTablePtr)) {
741 0         0 $et->Warn("Error processing $mode information in PostScript file");
742             }
743 4         13 undef $buff;
744 4         17 undef $mode;
745             }
746 4 50 33     21 $mode = 'Document' if $endDoc and not $mode;
747 4 50       14 $mode and PSErr($et, "unterminated $mode data");
748 4         31 return 1;
749             }
750              
751             #------------------------------------------------------------------------------
752             # Extract information from EPS file
753             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
754             # Returns: 1 if this was a valid PostScript file
755             sub ProcessEPS($$)
756             {
757 2     2 0 10 return ProcessPS($_[0],$_[1]);
758             }
759              
760             1; # end
761              
762              
763             __END__