File Coverage

blib/lib/Image/ExifTool/WritePDF.pl
Criterion Covered Total %
statement 277 394 70.3
branch 170 308 55.1
condition 55 101 54.4
subroutine 7 9 77.7
pod 0 7 0.0
total 509 819 62.1


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: WritePDF.pl
3             #
4             # Description: Write PDF meta information
5             #
6             # Revisions: 12/08/2007 - P. Harvey Created
7             #
8             # References: 1) http://partners.adobe.com/public/developer/pdf/index_reference.html
9             #
10             # Notes: The special "PDF-update" group can be deleted to revert exiftool updates
11             #------------------------------------------------------------------------------
12             package Image::ExifTool::PDF;
13              
14 19     19   164 use strict;
  19         81  
  19         743  
15 19     19   165 use vars qw($lastFetched);
  19         55  
  19         94338  
16              
17             sub WriteObject($$);
18             sub EncodeString($);
19             sub CryptObject($);
20              
21             # comments to mark beginning and end of ExifTool incremental update
22             my $beginComment = '%BeginExifToolUpdate';
23             my $endComment = '%EndExifToolUpdate ';
24              
25             my $keyExt; # crypt key extension
26             my $pdfVer; # version of PDF file we are currently writing
27              
28             # internal tags used in dictionary objects
29             my %myDictTags = (
30             _tags => 1, _stream => 1, _decrypted => 1, _needCrypt => 1,
31             _filtered => 1, _entry_size => 1, _table => 1,
32             );
33              
34             # map for directories that we can add
35             my %pdfMap = (
36             XMP => 'PDF',
37             );
38              
39             #------------------------------------------------------------------------------
40             # Validate raw PDF values for writing (string date integer real boolean name)
41             # Inputs: 0) ExifTool object ref, 1) tagInfo hash ref, 2) raw value ref
42             # Returns: error string or undef (and possibly changes value) on success
43             sub CheckPDF($$$)
44             {
45 100     100 0 383 my ($et, $tagInfo, $valPtr) = @_;
46 100   66     615 my $format = $$tagInfo{Writable} || $tagInfo->{Table}->{WRITABLE};
47 100 50       538 if (not $format) {
    100          
    50          
    0          
    0          
    0          
    0          
48 0         0 return 'No writable format';
49             } elsif ($format eq 'string') {
50             # (encode later because list-type string tags need to be encoded as a unit)
51             } elsif ($format eq 'date') {
52             # be flexible about this for now
53 31 50       205 return 'Bad date format' unless $$valPtr =~ /^\d{4}/;
54             } elsif ($format eq 'integer') {
55 0 0       0 return 'Not an integer' unless Image::ExifTool::IsInt($$valPtr);
56             } elsif ($format eq 'real') {
57 0 0       0 return 'Not a real number' unless $$valPtr =~ /^[+-]?(?=\d|\.\d)\d*(\.\d*)?$/;
58             } elsif ($format eq 'boolean') {
59 0 0 0     0 $$valPtr = ($$valPtr and $$valPtr !~ /^f/i) ? 'true' : 'false';
60             } elsif ($format eq 'name') {
61 0 0       0 return 'Invalid PDF name' if $$valPtr =~ /\0/;
62             } else {
63 0         0 return "Invalid PDF format '${format}'";
64             }
65 100         393 return undef; # value is OK
66             }
67              
68             #------------------------------------------------------------------------------
69             # Format value for writing to PDF file
70             # Inputs: 0) ExifTool ref, 1) value, 2) format string (string,date,integer,real,boolean,name)
71             # Returns: formatted value or undef on error
72             # Notes: Called at write time, so $pdfVer may be checked
73             sub WritePDFValue($$$)
74             {
75 13     13 0 48 my ($et, $val, $format) = @_;
76 13 50       60 if (not $format) {
    100          
    50          
    0          
    0          
77 0         0 return undef;
78             } elsif ($format eq 'string') {
79             # encode as UCS2 if it contains any special characters
80 11 50       40 $val = "\xfe\xff" . $et->Encode($val,'UCS2','MM') if $val =~ /[\x80-\xff]/;
81 11         33 EncodeString(\$val);
82             } elsif ($format eq 'date') {
83             # convert date to "D:YYYYmmddHHMMSS+-HH'MM'" format
84 2         24 $val =~ s/([-+]\d{2}):(\d{2})/${1}'${2}'/; # change timezone delimiters if necessary
85 2         8 $val =~ tr/ ://d; # remove spaces and colons
86 2         8 $val = "D:$val"; # add leading "D:"
87 2         7 EncodeString(\$val);
88             } elsif ($format =~ /^(integer|real|boolean)$/) {
89             # no reformatting necessary
90             } elsif ($format eq 'name') {
91 0 0       0 return undef if $val =~ /\0/;
92 0 0       0 if ($pdfVer >= 1.2) {
93 0         0 $val =~ s/([\t\n\f\r ()<>[\]{}\/%#])/sprintf('#%.2x',ord $1)/sge;
  0         0  
94             } else {
95 0 0       0 return undef if $val =~ /[\t\n\f\r ()<>[\]{}\/%]/;
96             }
97 0         0 $val = "/$val"; # add leading '/'
98             } else {
99 0         0 return undef;
100             }
101 13         35 return $val;
102             }
103              
104             #------------------------------------------------------------------------------
105             # Encode PDF string
106             # Inputs: 0) reference to PDF string
107             # Returns: (updates string with encoded data)
108             sub EncodeString($)
109             {
110 13     13 0 43 my $strPt = shift;
111 13 50       40 if (ref $$strPt eq 'ARRAY') {
112 0         0 my $str;
113 0         0 foreach $str (@{$$strPt}) {
  0         0  
114 0         0 EncodeString(\$str);
115             }
116 0         0 return;
117             }
118 13         57 Crypt($strPt, $keyExt, 1); # encrypt if necessary
119             # encode as hex if we have any control characters (except tab)
120 13 50       39 if ($$strPt=~/[\x00-\x08\x0a-\x1f\x7f\xff]/) {
121             # encode as hex
122 0         0 my $str='';
123 0         0 my $len = length $$strPt;
124 0         0 my $i = 0;
125 0         0 for (;;) {
126 0 0       0 my $n = $len - $i or last;
127 0 0       0 $n = 40 if $n > 40; # break into reasonable-length lines
128 0 0       0 $str .= $/ if $i;
129 0         0 $str .= unpack('H*', substr($$strPt, $i, $n));
130 0         0 $i += $n;
131             }
132 0         0 $$strPt = "<$str>";
133             } else {
134 13         32 $$strPt =~ s/([()\\])/\\$1/g; # must escape round brackets and backslashes
135 13         39 $$strPt = "($$strPt)";
136             }
137             }
138              
139             #------------------------------------------------------------------------------
140             # Encrypt an object
141             # Inputs: 0) PDF object (encrypts in place)
142             # Notes: Encrypts according to "_needCrypt" dictionary entry,
143             # then deletes "_needCrypt" when done
144             sub CryptObject($)
145             {
146 0     0 0 0 my $obj = $_[0];
147 0 0       0 if (not ref $obj) {
    0          
    0          
148             # only literal strings and hex strings are encrypted
149 0 0       0 if ($obj =~ /^[(<]/) {
150 0         0 undef $lastFetched; # (reset this just in case)
151 0         0 my $val = ReadPDFValue($obj);
152 0         0 EncodeString(\$val);
153 0         0 $_[0] = $val;
154             }
155             } elsif (ref $obj eq 'HASH') {
156 0         0 my $tag;
157 0         0 my $needCrypt = $$obj{_needCrypt};
158 0         0 foreach $tag (keys %$obj) {
159 0 0       0 next if $myDictTags{$tag};
160             # re-encrypt necessary objects only (others are still encrypted)
161             # (this is really annoying, but is necessary because objects stored
162             # in encrypted streams are decrypted when extracting, but strings stored
163             # as direct objects are decrypted later since they must be decoded
164             # before being decrypted)
165 0 0       0 if ($needCrypt) {
166 0 0       0 next unless defined $$needCrypt{$tag} ? $$needCrypt{$tag} : $$needCrypt{'*'};
    0          
167             }
168 0         0 CryptObject($$obj{$tag});
169             }
170 0         0 delete $$obj{_needCrypt}; # avoid re-re-crypting
171             } elsif (ref $obj eq 'ARRAY') {
172 0         0 my $val;
173 0         0 foreach $val (@$obj) {
174 0         0 CryptObject($val);
175             }
176             }
177             }
178              
179             #------------------------------------------------------------------------------
180             # Get free entries from xref stream dictionary that we wrote previously
181             # Inputs: 0) xref dictionary reference
182             # Returns: free entry hash (keys are object numbers, values are xref entry list refs)
183             sub GetFreeEntries($)
184             {
185 0     0 0 0 my $dict = shift;
186 0         0 my %xrefFree;
187             # from the start we have only written xref stream entries in 'CNn' format,
188             # so we can simplify things for now and only support this type of entry
189 0         0 my $w = $$dict{W};
190 0 0 0     0 if (ref $w eq 'ARRAY' and "@$w" eq '1 4 2') {
191 0         0 my $size = $$dict{_entry_size}; # this will be 7 for 'CNn'
192 0         0 my $index = $$dict{Index};
193 0         0 my $len = length $$dict{_stream};
194             # scan the table for free objects
195 0         0 my $num = scalar(@$index) / 2;
196 0         0 my $pos = 0;
197 0         0 my ($i, $j);
198 0         0 for ($i=0; $i<$num; ++$i) {
199 0         0 my $start = $$index[$i*2];
200 0         0 my $count = $$index[$i*2+1];
201 0         0 for ($j=0; $j<$count; ++$j) {
202 0 0       0 last if $pos + $size > $len;
203 0         0 my @t = unpack("x$pos CNn", $$dict{_stream});
204             # add entry if object was free
205 0 0       0 $xrefFree{$start+$j} = [ $t[1], $t[2], 'f' ] if $t[0] == 0;
206 0         0 $pos += $size; # step to next entry
207             }
208             }
209             }
210 0         0 return \%xrefFree;
211             }
212              
213             #------------------------------------------------------------------------------
214             # Write PDF object
215             # Inputs: 0) output file or scalar ref, 1) PDF object
216             # Returns: true on success
217             # Notes: inserts white space before object, but none afterward
218             sub WriteObject($$)
219             {
220 208     208 0 387 my ($outfile, $obj) = @_;
221 208 100       596 if (ref $obj eq 'SCALAR') {
    100          
    100          
222 43 50       145 Write($outfile, ' ', $$obj) or return 0;
223             } elsif (ref $obj eq 'ARRAY') {
224             # write array
225 12 50       117 Write($outfile, @$obj > 10 ? $/ : ' ', '[') or return 0;
    50          
226 12         32 my $item;
227 12         33 foreach $item (@$obj) {
228 22 50       59 WriteObject($outfile, $item) or return 0;
229             }
230 12 50       62 Write($outfile, ' ]') or return 0;
231             } elsif (ref $obj eq 'HASH') {
232             # write dictionary
233 47         69 my $tag;
234 47 50       137 Write($outfile, $/, '<<') or return 0;
235             # prepare object as required if it has a stream
236 47 100       170 if ($$obj{_stream}) {
237             # encrypt stream if necessary (must be done before determining Length)
238 9 50       76 CryptStream($obj, $keyExt) if $$obj{_decrypted};
239             # write "Length" entry in dictionary
240 9         36 $$obj{Length} = length $$obj{_stream};
241 9         24 push @{$$obj{_tags}}, 'Length';
  9         29  
242             # delete Filter-related entries since we don't yet write filtered streams
243 9         22 delete $$obj{Filter};
244 9         22 delete $$obj{DecodeParms};
245 9         34 delete $$obj{DL};
246             }
247             # don't write my internal entries
248 47         295 my %wrote = %myDictTags;
249             # write tags in original order, adding new ones later alphabetically
250 47         97 foreach $tag (@{$$obj{_tags}}, sort keys %$obj) {
  47         253  
251             # ignore already-written or missing entries
252 339 100 100     1054 next if $wrote{$tag} or not defined $$obj{$tag};
253 139 50       399 Write($outfile, $/, "/$tag") or return 0;
254 139 50       375 WriteObject($outfile, $$obj{$tag}) or return 0;
255 139         300 $wrote{$tag} = 1;
256             }
257 47 50       139 Write($outfile, $/, '>>') or return 0;
258 47 100       181 if ($$obj{_stream}) {
259             # write object stream
260             # (a single 0x0d may not follow 'stream', so use 0x0d+0x0a here to be sure)
261 9 50       21 Write($outfile, $/, "stream\x0d\x0a") or return 0;
262 9 50       33 Write($outfile, $$obj{_stream}, $/, 'endstream') or return 0;
263             }
264             } else {
265             # write string, number, name or object reference
266 106         232 Write($outfile, ' ', $obj);
267             }
268 208         583 return 1;
269             }
270              
271             #------------------------------------------------------------------------------
272             # Write PDF File
273             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
274             # Returns: 1 on success, 0 if not valid PDF file, -1 on write error
275             # Notes: dictionary structure: Main --+--> Info
276             # +--> Root --> Metadata
277             sub WritePDF($$)
278             {
279 19     19 0 66 my ($et, $dirInfo) = @_;
280 19         65 my $raf = $$dirInfo{RAF};
281 19         44 my $outfile = $$dirInfo{OutFile};
282 19         67 my ($buff, %capture, %newXRef, %newObj, $objRef);
283 19         0 my ($out, $id, $gen, $obj);
284              
285             # make sure this is a PDF file
286 19         58 my $pos = $raf->Tell();
287 19 50       69 $raf->Read($buff, 1024) >= 8 or return 0;
288 19 50       176 $buff =~ /^(\s*)%PDF-(\d+\.\d+)/ or return 0;
289 19         86 $$et{PDFBase} = length $1;
290 19         79 $raf->Seek($pos, 0);
291              
292             # create a new ExifTool object and use it to read PDF and XMP information
293 19         174 my $newTool = new Image::ExifTool;
294 19         90 $newTool->Options(List => 1);
295 19         79 $newTool->Options(Password => $et->Options('Password'));
296 19         92 $newTool->Options(NoPDFList => $et->Options('NoPDFList'));
297 19         76 $$newTool{PDF_CAPTURE} = \%capture;
298 19         97 my $info = $newTool->ImageInfo($raf, 'XMP', 'PDF:*', 'Error', 'Warning');
299             # not a valid PDF file unless we got a version number
300             # (note: can't just check $$info{PDFVersion} due to possibility of XMP-pdf:PDFVersion)
301 19         114 my $vers = $newTool->GetInfo('PDF:PDFVersion');
302             # take highest version number if multiple versions in an incremental save
303 19         133 ($pdfVer) = sort { $b <=> $a } values %$vers;
  0         0  
304 19 50       81 $pdfVer or $et->Error('Missing PDF:PDFVersion'), return 0;
305             # check version number
306 19 50       62 if ($pdfVer > 1.7) {
307 0         0 $et->Warn("The PDF $pdfVer specification is not freely available", 1);
308             # (so writing by ExifTool is based on trial and error)
309             }
310             # fail if we had any serious errors while extracting information
311 19 50 33     144 if ($capture{Error} or $$info{Error}) {
312 0   0     0 $et->Error($capture{Error} || $$info{Error});
313 0         0 return 1;
314             }
315             # make sure we have everything we need to rewrite this file
316 19         97 foreach $obj (qw(Main Root xref)) {
317 57 50       179 next if $capture{$obj};
318             # any warning we received may give a clue about why this object is missing
319 0 0       0 $et->Error($$info{Warning}) if $$info{Warning};
320 0         0 $et->Error("Can't find $obj object");
321 0         0 return 1;
322             }
323 19         163 $et->InitWriteDirs(\%pdfMap, 'XMP');
324              
325             # copy file up to start of previous exiftool update or end of file
326             # (comment, startxref & EOF with 11-digit offsets and 2-byte newlines is 63 bytes)
327 19 50 33     114 $raf->Seek(-64,2) and $raf->Read($buff,64) and $raf->Seek(0,0) or return -1;
      33        
328 19         75 my $rtn = 1;
329 19         73 my $prevUpdate;
330             # (now $endComment is before "startxref", but pre-7.41 we wrote it after the EOF)
331 19 100       301 if ($buff =~ /$endComment(\d+)\s+(startxref\s+\d+\s+%%EOF\s+)?$/s) {
    50          
332 16         69 $prevUpdate = $1;
333             # rewrite the file up to the original EOF
334 16 50       167 Image::ExifTool::CopyBlock($raf, $outfile, $prevUpdate + $$et{PDFBase}) or $rtn = -1;
335             # verify that we are now at the start of an ExifTool update
336 16 50 33     102 unless ($raf->Read($buff, length $beginComment) and $buff eq $beginComment) {
337 0         0 $et->Error('Previous ExifTool update is corrupted');
338 0         0 return $rtn;
339             }
340 16 50       99 $raf->Seek($prevUpdate+$$et{PDFBase}, 0) or $rtn = -1;
341 16 100       179 if ($$et{DEL_GROUP}{'PDF-update'}) {
342 2         17 $et->VPrint(0, " Reverted previous ExifTool updates\n");
343 2         7 ++$$et{CHANGED};
344 2         42 return $rtn;
345             }
346             } elsif ($$et{DEL_GROUP}{'PDF-update'}) {
347 0         0 $et->Error('File contains no previous ExifTool update');
348 0         0 return $rtn;
349             } else {
350             # rewrite the whole file
351 3         16 while ($raf->Read($buff, 65536)) {
352 3 50       32 Write($outfile, $buff) or $rtn = -1;
353             }
354             }
355 17 50       194 $out = $et->Options('TextOut') if $et->Options('Verbose');
356             #
357             # create our new PDF objects to write
358             #
359 17         82 my $xref = $capture{xref};
360 17         53 my $mainDict = $capture{Main};
361 17         74 my $metaRef = $capture{Root}->{Metadata};
362 17         42 my $nextObject;
363              
364             # start by finding reference for info object in case it was deleted
365             # in a previous edit so we can re-use it here if adding PDF Info
366             my $prevInfoRef;
367 17 100       53 if ($prevUpdate) {
368 14 50       56 unless ($capture{Prev}) {
369 0         0 $et->Error("Can't locate trailer dictionary prior to last edit");
370 0         0 return $rtn;
371             }
372 14         40 $prevInfoRef = $capture{Prev}->{Info};
373             # start from previous size so the xref table doesn't continue
374             # to grow if we repeatedly add and delete the Metadata object
375 14         47 $nextObject = $capture{Prev}->{Size};
376             # don't re-use Meta reference if object was added in a previous update
377 14 100 66     168 undef $metaRef if $metaRef and $$metaRef=~/^(\d+)/ and $1 >= $nextObject;
      100        
378             } else {
379 3         10 $prevInfoRef = $$mainDict{Info};
380 3         13 $nextObject = $$mainDict{Size};
381             }
382              
383             # delete entire PDF group if specified
384 17         46 my $infoChanged = 0;
385 17 100 100     95 if ($$et{DEL_GROUP}{PDF} and $capture{Info}) {
386 4         29 delete $capture{Info};
387 4         25 $info = { XMP => $$info{XMP} }; # remove extracted PDF tags
388 4 50       17 print $out " Deleting PDF Info dictionary\n" if $out;
389 4         11 ++$infoChanged;
390             }
391              
392             # create new Info dictionary if necessary
393 17 100       95 $capture{Info} = { _tags => [ ] } unless $capture{Info};
394 17         40 my $infoDict = $capture{Info};
395              
396             # must pre-determine Info reference to be used in encryption
397 17   100     83 my $infoRef = $prevInfoRef || \ "$nextObject 0 R";
398 17 50       87 unless (ref $infoRef eq 'SCALAR') {
399 0         0 $et->Error("Info dictionary is not an indirect object");
400 0         0 return $rtn;
401             }
402 17         52 $keyExt = $$infoRef;
403              
404             # must encrypt all values in dictionary if they came from an encrypted stream
405 17 50       59 CryptObject($infoDict) if $$infoDict{_needCrypt};
406              
407             # must set line separator before calling WritePDFValue()
408 17         104 local $/ = $capture{newline};
409              
410             # rewrite PDF Info tags
411 17         106 my $newTags = $et->GetNewTagInfoHash(\%Image::ExifTool::PDF::Info);
412 17         43 my $tagID;
413 17         94 foreach $tagID (sort keys %$newTags) {
414 11         29 my $tagInfo = $$newTags{$tagID};
415 11         40 my $nvHash = $et->GetNewValueHash($tagInfo);
416 11         51 my (@vals, $deleted);
417 11         32 my $tag = $$tagInfo{Name};
418 11         27 my $val = $$info{$tag};
419 11         33 my $tagKey = $tag;
420 11 100       37 unless (defined $val) {
421             # must check for tag key with copy number
422 9         186 ($tagKey) = grep /^$tag/, keys %$info;
423 9 100       46 $val = $$info{$tagKey} if $tagKey;
424             }
425 11 100       37 if (defined $val) {
    50          
426 7         13 my @oldVals;
427 7 100       23 if (ref $val eq 'ARRAY') {
428 3         13 @oldVals = @$val;
429 3         8 $val = shift @oldVals;
430             }
431 7         14 for (;;) {
432 11 100       45 if ($et->IsOverwriting($nvHash, $val) > 0) {
433 5         15 $deleted = 1;
434 5         35 $et->VerboseValue("- PDF:$tag", $val);
435 5         9 ++$infoChanged;
436             } else {
437 6         15 push @vals, $val;
438             }
439 11 100       30 last unless @oldVals;
440 4         10 $val = shift @oldVals;
441             }
442             # don't write this out if we deleted all values
443 7 100       31 delete $$infoDict{$tagID} unless @vals;
444             } elsif ($$nvHash{EditOnly}) {
445 0         0 next;
446             }
447             # decide whether we want to write this tag
448             # (native PDF information is always preferred, so don't check IsCreating)
449 11 50 100     69 next unless $deleted or $$tagInfo{List} or not exists $$infoDict{$tagID};
      66        
450              
451             # add new values to existing ones
452 11         41 my @newVals = $et->GetNewValue($nvHash);
453 11 100       35 if (@newVals) {
454 9         21 push @vals, @newVals;
455 9         20 ++$infoChanged;
456 9 50       22 if ($out) {
457 0         0 foreach $val (@newVals) {
458 0         0 $et->VerboseValue("+ PDF:$tag", $val);
459             }
460             }
461             }
462 11 50       27 unless (@vals) {
463             # remove this entry from the Info dictionary if no values remain
464 0         0 delete $$infoDict{$tagID};
465 0         0 next;
466             }
467             # format value(s) for writing to PDF file
468 11   66     60 my $writable = $$tagInfo{Writable} || $Image::ExifTool::PDF::Info{WRITABLE};
469 11 100       40 if (not $$tagInfo{List}) {
    100          
470 5         20 $val = WritePDFValue($et, shift(@vals), $writable);
471             } elsif ($$tagInfo{List} eq 'array') {
472 3         10 foreach $val (@vals) {
473 5         22 $val = WritePDFValue($et, $val, $writable);
474 5 50       18 defined $val or undef(@vals), last;
475             }
476 3 50       15 $val = @vals ? \@vals : undef;
477             } else {
478 3         14 $val = WritePDFValue($et, join($et->Options('ListSep'), @vals), $writable);
479             }
480 11 50       33 if (defined $val) {
481 11         31 $$infoDict{$tagID} = $val;
482 11         39 ++$infoChanged;
483             } else {
484 0         0 $et->Warn("Error converting $$tagInfo{Name} value");
485             }
486             }
487 17 100       91 if ($infoChanged) {
    100          
488 10         35 $$et{CHANGED} += $infoChanged;
489             } elsif ($prevUpdate) {
490             # must still write Info dictionary if it was previously updated
491 6         39 my $oldPos = LocateObject($xref, $$infoRef);
492 6 100 66     51 $infoChanged = 1 if $oldPos and $oldPos > $prevUpdate;
493             }
494              
495             # create new Info dictionary if necessary
496 17 100       414 if ($infoChanged) {
497             # increment object count if we used a new object here
498 14 100       33 if (scalar(keys %{$capture{Info}}) > 1) {
  14         130  
499 10         40 $newObj{$$infoRef} = $capture{Info};# save to write later
500 10         29 $$mainDict{Info} = $infoRef; # add reference to trailer dictionary
501 10 100       50 ++$nextObject unless $prevInfoRef;
502             } else {
503             # remove Info from Main (trailer) dictionary
504 4         14 delete $$mainDict{Info};
505             # write free entry in xref table if Info existed prior to all edits
506 4 100       22 $newObj{$$infoRef} = '' if $prevInfoRef;
507             }
508             }
509              
510             # rewrite XMP
511             my %xmpInfo = (
512             DataPt => $$info{XMP},
513 17         91 Parent => 'PDF',
514             );
515 17         69 my $xmpTable = Image::ExifTool::GetTagTable('Image::ExifTool::XMP::Main');
516 17         51 my $oldChanged = $$et{CHANGED};
517 17         130 my $newXMP = $et->WriteDirectory(\%xmpInfo, $xmpTable);
518 17 100       102 $newXMP = $$info{XMP} ? ${$$info{XMP}} : '' unless defined $newXMP;
  4 100       15  
519              
520             # WriteDirectory() will increment CHANGED erroneously if non-existent
521             # XMP is deleted as a block -- so check for this
522 17 100 100     83 unless ($newXMP or $$info{XMP}) {
523 3         9 $$et{CHANGED} = $oldChanged;
524 3         14 $et->VPrint(0, " (XMP not changed -- still empty)\n");
525             }
526 17         55 my ($metaChanged, $rootChanged);
527              
528 17 100 66     142 if ($$et{CHANGED} != $oldChanged and defined $newXMP) {
    100 33        
529 10         30 $metaChanged = 1;
530             } elsif ($prevUpdate and $capture{Root}->{Metadata}) {
531             # must still write Metadata dictionary if it was previously updated
532 4         12 my $oldPos = LocateObject($xref, ${$capture{Root}->{Metadata}});
  4         27  
533 4 50 33     27 $metaChanged = 1 if $oldPos and $oldPos > $prevUpdate;
534             }
535 17 100       67 if ($metaChanged) {
536 14 100       69 if ($newXMP) {
    50          
537 9 100       26 unless ($metaRef) {
538             # allocate new PDF object
539 5         22 $metaRef = \ "$nextObject 0 R";
540 5         13 ++$nextObject;
541 5         18 $capture{Root}->{Metadata} = $metaRef;
542 5         12 $rootChanged = 1; # set flag to replace Root dictionary
543             }
544             # create the new metadata dictionary to write later
545 9         103 $newObj{$$metaRef} = {
546             Type => '/Metadata',
547             Subtype => '/XML',
548             # Length => length $newXMP, (set by WriteObject)
549             _tags => [ qw(Type Subtype) ],
550             _stream => $newXMP,
551             _decrypted => 1, # (this will be ignored if EncryptMetadata is false)
552             };
553             } elsif ($capture{Root}->{Metadata}) {
554             # free existing metadata object
555 5         16 $newObj{${$capture{Root}->{Metadata}}} = '';
  5         23  
556 5         15 delete $capture{Root}->{Metadata};
557 5         11 $rootChanged = 1; # set flag to replace Root dictionary
558             }
559             }
560             # add new Root dictionary if necessary
561 17         59 my $rootRef = $$mainDict{Root};
562 17 50       59 unless ($rootRef) {
563 0         0 $et->Error("Can't find Root dictionary");
564 0         0 return $rtn;
565             }
566 17 100 100     92 if (not $rootChanged and $prevUpdate) {
567             # must still write Root dictionary if it was previously updated
568 6         29 my $oldPos = LocateObject($xref, $$rootRef);
569 6 100 66     52 $rootChanged = 1 if $oldPos and $oldPos > $prevUpdate;
570             }
571 17 100       88 $newObj{$$rootRef} = $capture{Root} if $rootChanged;
572             #
573             # write incremental update if anything was changed
574             #
575 17 100       65 if ($$et{CHANGED}) {
    50          
576             # remember position of original EOF
577 16         90 my $oldEOF = Tell($outfile) - $$et{PDFBase};
578 16 50       60 Write($outfile, $beginComment) or $rtn = -1;
579              
580             # write new objects
581 16         93 foreach $objRef (sort keys %newObj) {
582 38 50       240 $objRef =~ /^(\d+) (\d+)/ or $rtn = -1, last;
583 38         168 ($id, $gen) = ($1, $2);
584 38 100       118 if (not $newObj{$objRef}) {
585 7 50       43 ++$gen if $gen < 65535;
586             # write free entry in xref table
587 7         28 $newXRef{$id} = [ 0, $gen, 'f' ];
588 7         19 next;
589             }
590             # create new entry for xref table
591 31         82 $newXRef{$id} = [ Tell($outfile) - $$et{PDFBase} + length($/), $gen, 'n' ];
592 31         110 $keyExt = "$id $gen obj"; # (must set for stream encryption)
593 31 50       75 Write($outfile, $/, $keyExt) or $rtn = -1;
594 31 50       132 WriteObject($outfile, $newObj{$objRef}) or $rtn = -1;
595 31 50       92 Write($outfile, $/, 'endobj') or $rtn = -1;
596             }
597              
598             # Prev points to old xref table
599 16 100       85 $$mainDict{Prev} = $capture{startxref} unless $prevUpdate;
600              
601             # add xref entry for head of free-object list
602 16         65 $newXRef{0} = [ 0, 65535, 'f' ];
603              
604             # must insert free xref entries from previous exiftool update if applicable
605 16 100       62 if ($prevUpdate) {
606 13         26 my $mainFree;
607             # extract free entries from our previous Main xref stream
608 13 50 33     76 if ($$mainDict{Type} and $$mainDict{Type} eq '/XRef') {
609 0         0 $mainFree = GetFreeEntries($xref->{dicts}->[0]);
610             } else {
611             # free entries from Main xref table already captured for us
612 13         51 $mainFree = $capture{mainFree};
613             }
614 13         78 foreach $id (sort { $a <=> $b } keys %$mainFree) {
  6         28  
615 19 100       81 $newXRef{$id} = $$mainFree{$id} unless $newXRef{$id};
616             }
617             }
618              
619             # connect linked list of free object in our xref table
620 16         54 my $prevFree = 0;
621 16         82 foreach $id (sort { $b <=> $a } keys %newXRef) { # (reverse sort)
  71         203  
622 59 100       160 next unless $newXRef{$id}->[2] eq 'f'; # skip if not free
623             # no need to add free entry for objects added by us
624             # in previous edits then freed again
625 28 100       96 if ($id >= $nextObject) {
626 3         12 delete $newXRef{$id}; # Note: deleting newXRef entry!
627 3         7 next;
628             }
629 25         57 $newXRef{$id}->[0] = $prevFree;
630 25         58 $prevFree = $id;
631             }
632              
633             # prepare our main dictionary for writing
634 16         67 $$mainDict{Size} = $nextObject; # update number of objects
635             # must change the ID if it exists
636 16 100 66     95 if (ref $$mainDict{ID} eq 'ARRAY' and @{$$mainDict{ID}} > 1) {
  6         35  
637             # increment first byte since this is an easy change to make
638 6         21 $id = $mainDict->{ID}->[1];
639 6 50 0     41 if ($id =~ /^<([0-9a-f]{2})/i) {
    0 0        
      0        
640 6         53 my $byte = unpack('H2',chr((hex($1) + 1) & 0xff));
641 6         26 substr($id, 1, 2) = $byte;
642             } elsif ($id =~ /^\((.)/s and $1 ne '\\' and $1 ne ')' and $1 ne '(') {
643 0         0 my $ch = chr((ord($1) + 1) & 0xff);
644             # avoid generating characters that could cause problems
645 0 0       0 $ch = 'a' if $ch =~ /[()\\\x00-\x08\x0a-\x1f\x7f\xff]/;
646 0         0 substr($id, 1, 1) = $ch;
647             }
648 6         22 $mainDict->{ID}->[1] = $id;
649             }
650              
651             # remember position of xref table in file (we will write this next)
652 16         93 my $startxref = Tell($outfile) - $$et{PDFBase} + length($/);
653              
654             # must write xref as a stream in xref-stream-only files
655 16 50 33     95 if ($$mainDict{Type} and $$mainDict{Type} eq '/XRef') {
656              
657             # create entry for the xref stream object itself
658 0         0 $newXRef{$nextObject++} = [ Tell($outfile) - $$et{PDFBase} + length($/), 0, 'n' ];
659 0         0 $$mainDict{Size} = $nextObject;
660             # create xref stream and Index entry
661 0         0 $$mainDict{W} = [ 1, 4, 2 ]; # int8u, int32u, int16u ('CNn')
662 0         0 $$mainDict{Index} = [ ];
663 0         0 $$mainDict{_stream} = '';
664 0         0 my @ids = sort { $a <=> $b } keys %newXRef;
  0         0  
665 0         0 while (@ids) {
666 0         0 my $startID = $ids[0];
667 0         0 for (;;) {
668 0         0 $id = shift @ids;
669 0         0 my ($pos, $gen, $type) = @{$newXRef{$id}};
  0         0  
670 0 0       0 if ($pos > 0xffffffff) {
671 0         0 $et->Error('Huge files not yet supported');
672 0         0 last;
673             }
674 0 0       0 $$mainDict{_stream} .= pack('CNn', $type eq 'f' ? 0 : 1, $pos, $gen);
675 0 0 0     0 last if not @ids or $ids[0] != $id + 1;
676             }
677             # add Index entries for this section of the xref stream
678 0         0 push @{$$mainDict{Index}}, $startID, $id - $startID + 1;
  0         0  
679             }
680             # write the xref stream object
681 0         0 $keyExt = "$id 0 obj"; # (set anyway, but xref stream should NOT be encrypted)
682 0 0       0 Write($outfile, $/, $keyExt) or $rtn = -1;
683 0 0       0 WriteObject($outfile, $mainDict) or $rtn = -1;
684 0 0       0 Write($outfile, $/, 'endobj') or $rtn = -1;
685              
686             } else {
687              
688             # write new xref table
689 16 50       57 Write($outfile, $/, 'xref', $/) or $rtn = -1;
690             # lines must be exactly 20 bytes, so pad newline if necessary
691 16 50       120 my $endl = (length($/) == 1 ? ' ' : '') . $/;
692 16         79 my @ids = sort { $a <=> $b } keys %newXRef;
  59         150  
693 16         65 while (@ids) {
694 34         69 my $startID = $ids[0];
695 34         88 $buff = '';
696 34         55 for (;;) {
697 56         94 $id = shift @ids;
698 56         84 $buff .= sprintf("%.10d %.5d %s%s", @{$newXRef{$id}}, $endl);
  56         256  
699 56 100 100     236 last if not @ids or $ids[0] != $id + 1;
700             }
701             # write this (contiguous-numbered object) section of the xref table
702 34 50       135 Write($outfile, $startID, ' ', $id - $startID + 1, $/, $buff) or $rtn = -1;
703             }
704              
705             # write main (trailer) dictionary
706 16 50       59 Write($outfile, 'trailer') or $rtn = -1;
707 16 50       46 WriteObject($outfile, $mainDict) or $rtn = -1;
708             }
709             # write trailing comment (marker to allow edits to be reverted)
710 16 50       80 Write($outfile, $/, $endComment, $oldEOF, $/) or $rtn = -1;
711              
712             # write pointer to main xref table and EOF marker
713 16 50       70 Write($outfile, 'startxref', $/, $startxref, $/, '%%EOF', $/) or $rtn = -1;
714              
715             } elsif ($prevUpdate) {
716              
717             # nothing new changed, so copy over previous incremental update
718 1 50       8 $raf->Seek($prevUpdate+$$et{PDFBase}, 0) or $rtn = -1;
719 1         5 while ($raf->Read($buff, 65536)) {
720 1 50       6 Write($outfile, $buff) or $rtn = -1;
721             }
722             }
723 17 100 66     193 if ($rtn > 0 and $$et{CHANGED} and ($$et{DEL_GROUP}{PDF} or $$et{DEL_GROUP}{XMP})) {
      100        
      100        
724 6         39 $et->Warn('ExifTool PDF edits are reversible. Deleted tags may be recovered!', 1);
725             }
726 17         291 undef $newTool;
727 17         172 undef %capture;
728 17         417 return $rtn;
729             }
730              
731              
732             1; # end
733              
734             __END__