File Coverage

lib/PDF/Data.pm
Criterion Covered Total %
statement 47 657 7.1
branch 0 406 0.0
condition 0 281 0.0
subroutine 16 65 24.6
pod 46 49 93.8
total 109 1458 7.4


line stmt bran cond sub pod time code
1             package PDF::Data;
2              
3             # Require Perl v5.16; enable warnings and UTF-8.
4 1     1   204708 use v5.16;
  1         3  
5 1     1   4 use warnings;
  1         2  
  1         30  
6 1     1   5 use utf8;
  1         1  
  1         4  
7              
8             # Declare module version. (Also in pod documentation below.)
9 1     1   377 use version; our $VERSION = version->declare('v1.0.1');
  1         1544  
  1         4  
10              
11             # Initialize modules.
12 1     1   82 use mro;
  1         2  
  1         7  
13 1     1   394 use namespace::autoclean;
  1         14134  
  1         3  
14 1     1   65 use Carp qw[carp croak confess];;
  1         1  
  1         48  
15 1     1   354 use Clone;
  1         2046  
  1         39  
16 1     1   494 use Compress::Raw::Zlib qw[:status :flush];
  1         3927  
  1         206  
17 1     1   456 use Data::Dump qw[dd dump];
  1         4201  
  1         54  
18 1     1   462 use List::MoreUtils qw[minmax];
  1         11257  
  1         4  
19 1     1   849 use List::Util qw[max];
  1         2  
  1         51  
20 1     1   422 use Math::Trig qw[pi];
  1         10807  
  1         67  
21 1     1   6 use POSIX qw[mktime strftime];
  1         1  
  1         8  
22 1     1   1441 use Scalar::Util qw[blessed reftype];
  1         2  
  1         48  
23              
24             # Use byte strings instead of Unicode character strings.
25 1     1   5 use bytes;
  1         2  
  1         5  
26              
27             # Basic parsing regular expressions.
28             our $n = qr/(?:\n|\r\n?)/; # Match a newline. (LF, CRLF or CR)
29             our $ws = qr/(?:(?:(?>%[^\r\n]*)?\s+)+)/; # Match whitespace, including PDF comments.
30              
31             # Declare prototypes.
32             sub is_hash ($);
33             sub is_array ($);
34             sub is_stream ($);
35              
36             # Utility functions.
37 0 0   0 0   sub is_hash ($) { ref $_[0] && reftype($_[0]) eq "HASH"; }
38 0 0   0 0   sub is_array ($) { ref $_[0] && reftype($_[0]) eq "ARRAY"; }
39 0 0   0 0   sub is_stream ($) { &is_hash && exists $_[0]{-data}; }
40              
41             # Create a new PDF::Data object, representing a minimal PDF file.
42             sub new {
43 0     0 1   my ($self, %args) = @_;
44              
45             # Get the class name.
46 0   0       my $class = blessed $self || $self;
47              
48             # Create a new instance using the constructor arguments.
49 0           my $pdf = bless \%args, $class;
50              
51             # Set creation timestamp.
52 0           $pdf->{Info}{CreationDate} = $pdf->timestamp;
53              
54             # Create an empty document catalog and page tree.
55 0           $pdf->{Root}{Pages} = { Kids => [], Count => 0 };
56              
57             # Validate the PDF structure and return the new instance.
58 0           return $pdf->validate;
59             }
60              
61             # Deep copy entire PDF::Data object.
62             sub clone {
63 0     0 1   my ($self) = @_;
64 0           return Clone::clone($self);
65             }
66              
67             # Create a new page with the specified size.
68             sub new_page {
69 0     0 1   my ($self, $x, $y) = @_;
70              
71             # Paper sizes.
72 0           my %sizes = (
73             LETTER => [ 8.5, 11 ],
74             LEGAL => [ 8.5, 14 ],
75             A0 => [ 33.125, 46.8125 ],
76             A1 => [ 23.375, 33.125 ],
77             A2 => [ 16.5, 23.375 ],
78             A3 => [ 11.75, 16.5 ],
79             A4 => [ 8.25, 11.75 ],
80             A5 => [ 5.875, 8.25 ],
81             A6 => [ 4.125, 5.875 ],
82             A7 => [ 2.9375, 4.125 ],
83             A8 => [ 2.0625, 2.9375 ],
84             );
85              
86             # Default page size to US Letter (8.5" x 11").
87 0 0 0       unless ($x and $y and $x > 0 and $y > 0) {
      0        
      0        
88 0   0       $x ||= "LETTER";
89 0 0         croak "Error: Unknown paper size \"$x\"!\n" unless $sizes{$x};
90 0           ($x, $y) = @{$sizes{$x}};
  0            
91             }
92              
93             # Make sure page size was specified.
94 0 0 0       croak join(": ", $self->{-file} || (), "Error: Paper size not specified!\n") unless $x and $y and $x > 0 and $y > 0;
      0        
      0        
      0        
95              
96             # Scale inches to default user space units (72 DPI).
97 0 0         $x *= 72 if $x < 72;
98 0 0         $y *= 72 if $y < 72;
99              
100             # Create and return a new page object.
101             return {
102 0           Type => "/Page",
103             MediaBox => [0, 0, $x, $y],
104             Contents => { -data => "" },
105             Resources => {
106             ProcSet => ["/PDF", "/Text"],
107             },
108             };
109             }
110              
111             # Deep copy the specified page object.
112             sub copy_page {
113 0     0 1   my ($self, $page) = @_;
114              
115             # Temporarily hide parent reference.
116 0           delete local $page->{Parent};
117              
118             # Clone the page object.
119 0           my $copied_page = Clone::clone($page);
120              
121             # return cloned page object.
122 0           return $copied_page;
123             }
124              
125             # Append the specified page to the PDF.
126             sub append_page {
127 0     0 1   my ($self, $page) = @_;
128              
129             # Increment page count for page tree root node.
130 0           $self->{Root}{Pages}{Count}++;
131              
132             # Add page object to page tree root node for simplicity.
133 0           push @{$self->{Root}{Pages}{Kids}}, $page;
  0            
134 0           $page->{Parent} = $self->{Root}{Pages};
135              
136             # Return the page object.
137 0           return $page;
138             }
139              
140             # Read and parse PDF file.
141             sub read_pdf {
142 0     0 1   my ($self, $file, %args) = @_;
143              
144             # Read entire file at once.
145 0           local $/;
146              
147             # Contents of entire PDF file.
148 0           my $data;
149              
150             # Check for standard input.
151 0 0 0       if (($file // "-") eq "-") {
152             # Read all data from standard input.
153 0           $file = "";
154 0 0         binmode STDIN or croak "$file: $!\n";
155 0           $data = ;
156 0 0         close STDIN or croak "$file: $!\n";
157             } else {
158             # Read the entire file.
159 0 0         open my $IN, '<', $file or croak "$file: $!\n";
160 0 0         binmode $IN or croak "$file: $!\n";
161 0           $data = <$IN>;
162 0 0         close $IN or croak "$file: $!\n";
163             }
164              
165             # Parse PDF file data and return new instance.
166 0           return $self->parse_pdf($data, -file => $file, %args);
167             }
168              
169             # Parse PDF file data.
170             sub parse_pdf {
171 0     0 1   my ($self, $data, %args) = @_;
172              
173             # Get the class name.
174 0   0       my $class = blessed $self || $self;
175              
176             # Create a new instance using the provided arguments.
177 0           $self = bless \%args, $class;
178              
179             # Validate PDF file structure.
180             my ($pdf_version, $startxref) = $data =~ /\A(?:%PDF-(\d+\.\d+)$n.*$n)startxref$n(\d+)$n%%EOF$n?\z/s
181 0 0 0       or croak join(": ", $self->{-file} || (), "File is not a valid PDF document!\n");
182              
183             # Check PDF version.
184 0 0 0       warn join(": ", $self->{-file} || (), "Warning: PDF version $pdf_version not supported!\n")
185             unless $pdf_version =~ /^1\.[0-7]$/;
186              
187             # Parsed indirect objects.
188 0           my $objects = {};
189              
190             # Parse PDF objects.
191 0           my @objects = $self->parse_objects($objects, $data, 0);
192              
193             # PDF trailer dictionary.
194 0           my $trailer;
195              
196             # Find trailer dictionary.
197 0           for (my $i = 0; $i < @objects; $i++) {
198 0 0         if ($objects[$i][0] eq "trailer") {
199             $i < $#objects and $objects[$i + 1][1]{type} eq "dict"
200 0 0 0       or croak join(": ", $self->{-file} || (), "Byte offset $objects[$i][1]{offset}: Invalid trailer dictionary!\n");
      0        
201 0           $trailer = $objects[$i + 1][0];
202 0           last;
203             }
204             }
205              
206             # Make sure trailer dictionary was found.
207 0 0 0       croak join(": ", $self->{-file} || (), "PDF trailer dictionary not found!\n") unless defined $trailer;
208              
209             # Resolve indirect object references.
210 0           $self->resolve_references($objects, $trailer);
211              
212             # Create a new instance from the parsed data.
213 0           my $pdf = bless $trailer, $class;
214              
215             # Add any provided arguments.
216 0           foreach my $key (sort keys %args) {
217 0           $pdf->{$key} = $args{$key};
218             }
219              
220             # Validate the PDF structure (unless the -novalidate flag is set) and return the new instance.
221 0 0         return $self->{-novalidate} ? $pdf : $pdf->validate;
222             }
223              
224             # Generate and write a new PDF file.
225             sub write_pdf {
226 0     0 1   my ($self, $file, $time) = @_;
227              
228             # Default missing timestamp to current time, but keep a zero time as a flag.
229 0   0       $time //= time;
230              
231             # Generate PDF file data.
232 0           my $pdf_data = $self->pdf_file_data($time);
233              
234             # Check if standard output is wanted.
235 0 0 0       if (($file // "-") eq "-") {
236             # Write PDF file data to standard output.
237 0           $file = "";
238 0 0         binmode STDOUT or croak "$file: $!\n";
239 0 0         print STDOUT $pdf_data or croak "$file: $!\n";
240             } else {
241             # Write PDF file data to specified output file.
242 0 0         open my $OUT, ">", $file or croak "$file: $!\n";
243 0 0         binmode $OUT or croak "$file: $!\n";
244 0 0         print $OUT $pdf_data or croak "$file: $!\n";
245 0 0         close $OUT or croak "$file: $!\n";
246              
247             # Set modification time to the specified or current timestamp, unless zero.
248 0 0         utime $time, $time, $file if $time;
249              
250             # Print success message.
251 0           print STDERR "Wrote new PDF file \"$file\".\n\n";
252             }
253             }
254              
255             # Generate PDF file data suitable for writing to an output PDF file.
256             sub pdf_file_data {
257 0     0 1   my ($self, $time) = @_;
258              
259             # Default missing timestamp to current time, but keep a zero time as a flag.
260 0   0       $time //= time;
261              
262             # Set PDF modification timestamp, unless zero.
263 0 0         $self->{Info}{ModDate} = $self->timestamp($time) if $time;
264              
265             # Set PDF producer.
266 0           $self->{Info}{Producer} = sprintf "(%s)", join " ", __PACKAGE__, $VERSION;
267              
268             # Validate the PDF structure.
269 0           $self->validate;
270              
271             # Array of indirect objects, with lookup hash as first element.
272 0           my $objects = [{}];
273              
274             # Objects seen while generating the PDF file data.
275 0           my $seen = {};
276              
277             # Start with PDF header.
278 0           my $pdf_file_data = "%PDF-1.4\n%\xBF\xF7\xA2\xFE\n\n";
279              
280             # Write all indirect objects.
281 0           my $xrefs = $self->write_indirect_objects(\$pdf_file_data, $objects, $seen);
282              
283             # Add cross-reference table.
284 0           my $startxref = length($pdf_file_data);
285 0           $pdf_file_data .= sprintf "xref\n0 %d\n", scalar @{$xrefs};
  0            
286 0           $pdf_file_data .= join("", @{$xrefs});
  0            
287              
288             # Save correct size in trailer dictionary.
289 0           $self->{Size} = scalar @{$xrefs};
  0            
290              
291             # Write trailer dictionary.
292 0           $pdf_file_data .= "trailer ";
293 0           $self->write_object(\$pdf_file_data, $objects, $seen, $self, 0);
294              
295             # Write startxref value.
296 0           $pdf_file_data =~ s/\n?\z/\n/;
297 0           $pdf_file_data .= "startxref\n$startxref\n";
298              
299             # End of PDF file data.
300 0           $pdf_file_data .= "%%EOF\n";
301              
302             # Return PDF file data.
303 0           return $pdf_file_data;
304             }
305              
306             # Dump internal structure of PDF file.
307             sub dump_pdf {
308 0     0 1   my ($self, $file, $mode) = @_;
309              
310             # Use "" instead of "-" to describe standard output.
311 0           my $filename = $file =~ s/^-$//r;
312              
313             # Open output file.
314 0 0         open my $OUT, ">$file" or croak "$filename: $!\n";
315              
316             # Data structures already seen.
317 0           my $seen = {};
318              
319             # Dump PDF structures.
320 0 0         printf $OUT "\$pdf = %s;\n", $self->dump_object($self, '$pdf', $seen, 0, $mode) or croak "$filename: $!\n";
321              
322             # Close output file.
323 0 0         close $OUT or croak "$filename: $!\n";
324              
325             # Print success message.
326 0 0         if ($mode eq "outline") {
327 0 0         print STDERR "Dumped outline of PDF internal structure to file \"$file\".\n\n" unless $file eq "-";
328             } else {
329 0 0         print STDERR "Dumped PDF internal structure to file \"$file\".\n\n" unless $file eq "-";
330             }
331             }
332              
333             # Dump outline of internal structure of PDF file.
334             sub dump_outline {
335 0     0 1   my ($self, $file) = @_;
336              
337             # Call dump_pdf() with outline parameter.
338 0   0       return $self->dump_pdf($file // "-", "outline");
339             }
340              
341             # Merge content streams.
342             sub merge_content_streams {
343 0     0 1   my ($self, $streams) = @_;
344              
345             # Make sure content is an array.
346 0 0         return $streams unless is_array $streams;
347              
348             # Remove extra trailing space from streams.
349 0           foreach my $stream (@{$streams}) {
  0            
350 0 0         die unless is_stream $stream;
351 0           $stream->{-data} =~ s/(?<=\s) \z//;
352             }
353              
354             # Concatenate stream data and calculate new length.
355 0           my $merged = { -data => join("", map { $_->{-data}; } @{$streams}) };
  0            
  0            
356 0           $merged->{Length} = length($merged->{-data});
357              
358             # Return merged content stream.
359 0           return $merged;
360             }
361              
362             # Find bounding box for a content stream.
363             sub find_bbox {
364 0     0 1   my ($self, $content_stream, $new) = @_;
365              
366             # Get data from stream, if necessary.
367 0 0         $content_stream = $content_stream->{-data} if is_stream $content_stream;
368              
369             # Split content stream into lines.
370 0           my @lines = grep { $_ ne ""; } split /\n/, $content_stream;
  0            
371              
372             # Bounding box.
373 0           my ($left, $bottom, $right, $top);
374              
375             # Regex to match a number.
376 0           my $n = qr/-?\d+(?:\.\d+)?/;
377              
378             # Determine bounding box from content stream.
379 0           foreach (@lines) {
380             # Skip neutral lines.
381 0 0         next if m{^(?:/Figure <>BDC|/PlacedGraphic /MC\d BDC|EMC|/GS\d gs|BX /Sh\d sh EX Q|[Qqh]|W n|$n $n $n $n $n $n cm)\s*$};
382              
383             # Capture coordinates from drawing operations to calculate bounding box.
384 0 0         if (my ($x1, $y1, $x2, $y2, $x3, $y3) = /^($n) ($n) (?:[ml]|($n) ($n) (?:[vy]|($n) ($n) c))$/) {
    0          
385 0           ($left, $right) = minmax grep { defined $_; } $left, $right, $x1, $x2, $x3;
  0            
386 0           ($bottom, $top) = minmax grep { defined $_; } $bottom, $top, $y1, $y2, $y3;
  0            
387             } elsif (my ($x, $y, $width, $height) = /^($n) ($n) ($n) ($n) re$/) {
388 0           ($left, $right) = minmax grep { defined $_; } $left, $right, $x, $x + $width;
  0            
389 0           ($bottom, $top) = minmax grep { defined $_; } $bottom, $top, $y, $y + $height;
  0            
390             } else {
391 0           croak "Parse error: Content line \"$_\" not recognized!\n";
392             }
393             }
394              
395             # Print bounding box and rectangle.
396 0           my $width = $right - $left;
397 0           my $height = $top - $bottom;
398 0           print STDERR "Bounding Box: $left $bottom $right $top\nRectangle: $left $bottom $width $height\n\n";
399              
400             # Return unless generating a new bounding box.
401 0 0         return unless $new;
402              
403             # Update content stream.
404 0           for ($content_stream) {
405             # Update coordinates in drawing operations.
406 0           s/^($n) ($n) ([ml])$/join " ", $self->round($1 - $left, $2 - $bottom), $3/egm;
  0            
407 0           s/^($n) ($n) ($n) ($n) ([vy])$/join " ", $self->round($1 - $left, $2 - $bottom, $3 - $left, $4 - $bottom), $5/egm;
  0            
408 0           s/^($n) ($n) ($n) ($n) ($n) ($n) (c)$/join " ", $self->round($1 - $left, $2 - $bottom, $3 - $left, $4 - $bottom, $5 - $left, $6 - $bottom), $7/egm;
  0            
409 0           s/^($n $n $n $n) ($n) ($n) (cm)$/join " ", $1, $self->round($2 - $left, $3 - $bottom), $4/egm;
  0            
410             }
411              
412             # Return content stream.
413 0           return $content_stream;
414             }
415              
416             # Make a new bounding box for a content stream.
417             sub new_bbox {
418 0     0 1   my ($self, $content_stream) = @_;
419              
420             # Call find_bbox() with "new" parameter.
421 0           $self->find_bbox($content_stream, 1);
422             }
423              
424             # Generate timestamp in PDF internal format.
425             sub timestamp {
426 0     0 1   my ($self, $time) = @_;
427              
428 0   0       $time //= time;
429 0           my @time = localtime $time;
430 0           my $tz = $time[8] * 60 - mktime(gmtime 0) / 60;
431 0           return sprintf "(D:%s%+03d'%02d)", strftime("%Y%m%d%H%M%S", @time), $tz / 60, abs($tz) % 60;
432             }
433              
434             # Round numeric values to 12 significant digits to avoid floating-point rounding error and remove trailing zeroes.
435             sub round {
436 0     0 1   my ($self, @numbers) = @_;
437              
438 0   0       @numbers = map { sprintf("%.12f", sprintf("%.12g", $_ || 0)) =~ s/\.?0+$//r; } @numbers;
  0            
439 0 0         return wantarray ? @numbers : $numbers[0];
440             }
441              
442             # Concatenate a transformation matrix with an original matrix, returning a new matrix.
443             sub concat_matrix {
444 0     0 1   my ($self, $transform, $orig) = @_;
445              
446 0           return [$self->round(
447             $transform->[0] * $orig->[0] + $transform->[1] * $orig->[2],
448             $transform->[0] * $orig->[1] + $transform->[1] * $orig->[3],
449             $transform->[2] * $orig->[0] + $transform->[3] * $orig->[2],
450             $transform->[2] * $orig->[1] + $transform->[3] * $orig->[3],
451             $transform->[4] * $orig->[0] + $transform->[5] * $orig->[2] + $orig->[4],
452             $transform->[4] * $orig->[1] + $transform->[5] * $orig->[3] + $orig->[5],
453             )];
454             }
455              
456             # Calculate the inverse of a matrix, if possible.
457             sub invert_matrix {
458 0     0 1   my ($self, $matrix) = @_;
459              
460             # Calculate the determinant of the matrix.
461 0           my $det = $self->round($matrix->[0] * $matrix->[3] - $matrix->[1] * $matrix->[2]);
462              
463             # If the determinant is zero, then the matrix is not invertible.
464 0 0         return if $det == 0;
465              
466             # Return the inverse matrix.
467 0           return [$self->round(
468             $matrix->[3] / $det,
469             -$matrix->[1] / $det,
470             -$matrix->[2] / $det,
471             $matrix->[0] / $det,
472             ($matrix->[2] * $matrix->[5] - $matrix->[3] * $matrix->[4]) / $det,
473             ($matrix->[1] * $matrix->[4] - $matrix->[0] * $matrix->[5]) / $det,
474             )];
475             }
476              
477             # Create a transformation matrix to translate the origin of the coordinate system to the specified coordinates.
478             sub translate {
479 0     0 1   my ($self, $x, $y) = @_;
480              
481             # Return a translate matrix.
482 0           return [$self->round(1, 0, 0, 1, $x, $y)];
483             }
484              
485             # Create a transformation matrix to scale the coordinate space by the specified horizontal and vertical scaling factors.
486             sub scale {
487 0     0 1   my ($self, $x, $y) = @_;
488              
489             # Return a scale matrix.
490 0           return [$self->round($x, 0, 0, $y, 0, 0)];
491             }
492              
493             # Create a transformation matrix to rotate the coordinate space counterclockwise by the specified angle (in degrees).
494             sub rotate {
495 0     0 1   my ($self, $angle) = @_;
496              
497             # Calculate the sine and cosine of the angle.
498 0           my $sin = sin($angle * pi / 180);
499 0           my $cos = cos($angle * pi / 180);
500              
501             # Return a rotate matrix.
502 0           return [$self->round($cos, $sin, -$sin, $cos, 0, 0)];
503             }
504              
505             # Validate PDF structure.
506             sub validate {
507 0     0 1   my ($self) = @_;
508              
509             # Catch validation errors.
510 0           eval {
511             # Make sure document catalog exists and has the correct type.
512 0           $self->validate_key("Root", "Type", "/Catalog", "document catalog");
513              
514             # Make sure page tree root node exists, has the correct type, and has no parent.
515 0           $self->validate_key("Root/Pages", "Type", "/Pages", "page tree root");
516 0           $self->validate_key("Root/Pages", "Parent", undef, "page tree root");
517              
518             # Validate page tree.
519 0           $self->validate_page_tree("Root/Pages", $self->{Root}{Pages});
520             };
521              
522             # Check for validation errors.
523 0 0         if ($@) {
524             # Make validation errors fatal if -validate flag is set.
525 0 0         if ($self->{-validate}) {
526 0           croak $@;
527             } else {
528 0           carp $@;
529             }
530             }
531              
532             # Return this instance.
533 0           return $self;
534             }
535              
536             # Validate page tree.
537             sub validate_page_tree {
538 0     0 1   my ($self, $path, $page_tree_node) = @_;
539              
540             # Count of leaf nodes (page objects) under this page tree node.
541 0           my $count = 0;
542              
543             # Validate children.
544 0 0 0       is_array(my $kids = $page_tree_node->{Kids}) or croak join(": ", $self->{-file} || (), "Error: $path\->{Kids} must be an array!\n");
545 0           for (my $i = 0; $i < @{$kids}; $i++) {
  0            
546 0 0 0       is_hash(my $kid = $kids->[$i]) or croak join(": ", $self->{-file} || (), "Error: $path\[$i] must be be a hash!\n");
547 0 0 0       $kid->{Type} or croak join(": ", $self->{-file} || (), "Error: $path\[$i]->{Type} is a required field!\n");
548 0 0         if ($kid->{Type} eq "/Pages") {
    0          
549 0           $count += $self->validate_page_tree("$path\[$i]", $kid);
550             } elsif ($kid->{Type} eq "/Page") {
551 0           $self->validate_page("$path\[$i]", $kid);
552 0           $count++;
553             } else {
554 0   0       croak join(": ", $self->{-file} || (), "Error: $path\[$i]->{Type} must be /Pages or /Page!\n");
555             }
556             }
557              
558             # Validate resources, if any.
559 0 0         $self->validate_resources("$path\->{Resources}", $page_tree_node->{Resources}) if is_hash($page_tree_node->{Resources});
560              
561             # Fix leaf node count if wrong.
562 0 0 0       if (($page_tree_node->{Count} || 0) != $count) {
563 0   0       warn join(": ", $self->{-file} || (), "Warning: Fixing: $path->{Count} = $count\n");
564 0           $page_tree_node->{Count} = $count;
565             }
566              
567             # Return leaf node count.
568 0           return $count;
569             }
570              
571             # Validate page object.
572             sub validate_page {
573 0     0 1   my ($self, $path, $page) = @_;
574              
575 0 0         if (my $contents = $page->{Contents}) {
576 0 0         $contents = $self->merge_content_streams($contents) if is_array($contents);
577 0 0 0       is_stream($contents) or croak join(": ", $self->{-file} || (), "Error: $path\->{Contents} must be an array or stream!\n");
578 0           $self->validate_content_stream("$path\->{Contents}", $contents);
579             }
580              
581             # Validate resources, if any.
582 0 0         $self->validate_resources("$path\->{Resources}", $page->{Resources}) if is_hash($page->{Resources});
583             }
584              
585             # Validate resources.
586             sub validate_resources {
587 0     0 1   my ($self, $path, $resources) = @_;
588              
589             # Validate XObjects, if any.
590 0 0         $self->validate_xobjects("$path\{XObject}", $resources->{XObject}) if is_hash($resources->{XObject});
591             }
592              
593             # Validate form XObjects.
594             sub validate_xobjects {
595 0     0 1   my ($self, $path, $xobjects) = @_;
596              
597             # Validate each form XObject.
598 0           foreach my $name (sort keys %{$xobjects}) {
  0            
599 0           $self->validate_xobject("$path\{$name}", $xobjects->{$name});
600             }
601             }
602              
603             # Validate a single form XObject.
604             sub validate_xobject {
605 0     0 1   my ($self, $path, $xobject) = @_;
606              
607             # Make sure the form XObject is a stream.
608 0 0 0       is_stream($xobject) or croak join(": ", $self->{-file} || (), "Error: $path must be a content stream!\n");
609              
610             # Make sure the Subtype is set to /Form.
611 0 0 0       $xobject->{Subtype} eq "/Form" or croak join(": ", $self->{-file} || (), "Error: $path\->{Subtype} must be /Form!\n");
612              
613             # Validate the form XObject content stream.
614 0           $self->validate_content_stream($path, $xobject);
615              
616             # Validate resources, if any.
617 0 0         $self->validate_resources("$path\{Resources}", $xobject->{Resources}) if is_hash($xobject->{Resources});
618             }
619              
620             # Validate content stream.
621             sub validate_content_stream {
622 0     0 1   my ($self, $path, $stream) = @_;
623              
624             # Make sure the content stream can be parsed.
625 0           my @objects = eval { $self->parse_objects({}, $stream->{-data}, 0); };
  0            
626 0 0 0       croak join(": ", $self->{-file} || (), "Error: $path: $@") if $@;
627              
628             # Minify content stream if requested.
629 0 0         $self->minify_content_stream($stream, \@objects) if $self->{-minify};
630             }
631              
632             # Minify content stream.
633             sub minify_content_stream {
634 0     0 1   my ($self, $stream, $objects) = @_;
635              
636             # Parse object stream if necessary.
637 0   0       $objects ||= [ $self->parse_objects({}, $stream->{-data}, 0) ];
638              
639             # Generate new content stream from objects.
640 0           $stream->{-data} = $self->generate_content_stream($objects);
641              
642             # Recalculate stream length.
643 0           $stream->{Length} = length $stream->{-data};
644              
645             # Sanity check.
646             die "Content stream serialization failed"
647 0           if dump([map {$_->[0]} @{$objects}]) ne
  0            
648 0 0         dump([map {$_->[0]} $self->parse_objects({}, $stream->{-data}, 0)]);
  0            
649             }
650              
651             # Generate new content stream from objects.
652             sub generate_content_stream {
653 0     0 1   my ($self, $objects) = @_;
654              
655             # Generated content stream.
656 0           my $stream = "";
657              
658             # Loop across parsed objects.
659 0           foreach my $object (@{$objects}) {
  0            
660             # Check parsed object type.
661 0 0         if ($object->[1]{type} eq "dict") {
    0          
662             # Serialize dictionary.
663 0           $self->serialize_dictionary(\$stream, $object->[0]);
664             } elsif ($object->[1]{type} eq "array") {
665             # Serialize array.
666 0           $self->serialize_array(\$stream, $object->[0]);
667             } else {
668             # Serialize string or other token.
669 0           $self->serialize_object(\$stream, $object->[0]);
670             }
671             }
672              
673             # Return generated content stream.
674 0           return $stream;
675             }
676              
677             # Serialize a hash as a dictionary object.
678             sub serialize_dictionary {
679 0     0 1   my ($self, $stream, $hash) = @_;
680              
681             # Serialize the hash key-value pairs.
682 0           my @pairs = %{$hash};
  0            
683 0           ${$stream} .= "<<";
  0            
684 0           for (my $i = 0; $i < @pairs; $i++) {
685 0 0         if ($i % 2) {
686 0 0         if (is_hash($pairs[$i])) {
    0          
687 0           $self->serialize_dictionary($stream, $pairs[$i]);
688             } elsif (is_array($pairs[$i])) {
689 0           $self->serialize_array($stream, $pairs[$i]);
690             } else {
691 0           $self->serialize_object($stream, $pairs[$i]);
692             }
693             } else {
694 0           ${$stream} .= "/$pairs[$i]";
  0            
695             }
696             }
697 0           ${$stream} .= ">>";
  0            
698             }
699              
700             # Serialize an array.
701             sub serialize_array {
702 0     0 1   my ($self, $stream, $array) = @_;
703              
704             # Serialize the array values.
705 0           ${$stream} .= "[";
  0            
706 0           foreach my $obj (@{$array}) {
  0            
707 0 0         if (is_hash($obj)) {
    0          
708 0           $self->serialize_dictionary($stream, $obj);
709             } elsif (is_array($obj)) {
710 0           $self->serialize_array($stream, $obj);
711             } else {
712 0           $self->serialize_object($stream, $obj);
713             }
714             }
715 0           ${$stream} .= "]";
  0            
716             }
717              
718             # Append the serialization of an object to the generated content stream.
719             sub serialize_object {
720 0     0 1   my ($self, $stream, $object) = @_;
721              
722             # Strip leading/trailing whitespace from object if minifying.
723 0 0         if ($self->{-minify}) {
724 0           $object =~ s/^\s+//;
725 0           $object =~ s/\s+$//;
726             }
727              
728             # Wrap the line if line length would exceed 255 characters.
729 0 0         ${$stream} .= "\n" if length((${$stream} =~ /(.*)\z/)[0]) + length($object) >= 255;
  0            
  0            
730              
731             # Add a space if necessary.
732 0 0 0       ${$stream} .= " " unless ${$stream} =~ /(^|[\s)>\[\]{}])$/ or $object =~ /^[\s()<>\[\]{}\/%]/;
  0            
  0            
733              
734             # Add the serialized object.
735 0           ${$stream} .= $object;
  0            
736             }
737              
738             # Validate the specified hash key value.
739             sub validate_key {
740 0     0 1   my ($self, $hash, $key, $value, $label) = @_;
741              
742             # Create the hash if necessary.
743 0 0         $hash = $_[1] = {} unless $hash;
744              
745             # Get the hash node from the PDF structure by path, if necessary.
746 0 0         $hash = $self->get_hash_node($hash) unless is_hash $hash;
747              
748             # Make sure the hash key has the correct value.
749 0 0 0       if (defined $value and (not defined $hash->{$key} or $hash->{$key} ne $value)) {
    0 0        
      0        
750 0 0 0       warn join(": ", $self->{-file} || (), "Warning: Fixing $label: {$key} $hash->{$key} -> $value\n") if $hash->{$key};
751 0           $hash->{$key} = $value;
752             } elsif (not defined $value and exists $hash->{$key}) {
753 0 0 0       warn join(": ", $self->{-file} || (), "Warning: Deleting $label: {$key} $hash->{$key}\n") if $hash->{$key};
754 0           delete $hash->{$key};
755             }
756              
757             # Return this instance.
758 0           return $self;
759             }
760              
761             # Get a hash node from the PDF structure by path.
762             sub get_hash_node {
763 0     0 1   my ($self, $path) = @_;
764              
765             # Split the path.
766 0           my @path = split /\//, $path;
767              
768             # Find the hash node with the specified path, creating nodes if necessary.
769 0           my $hash = $self;
770 0           foreach my $key (@path) {
771 0   0       $hash->{$key} ||= {};
772 0           $hash = $hash->{$key};
773             }
774              
775             # Return the hash node.
776 0           return $hash;
777             }
778              
779             # Parse PDF objects into Perl representations.
780             sub parse_objects {
781 0     0 1   my ($self, $objects, $data, $offset) = @_;
782              
783             # Parsed PDF objects.
784 0           my @objects;
785              
786             # Calculate EOF offset.
787 0           my $eof = $offset + length $data;
788              
789             # Copy data for parsing.
790 0           local $_ = $data;
791              
792             # Parse PDF objects in input string.
793 0           while ($_ ne "") {
794             # Update the file offset.
795 0           $offset = $eof - length $_;
796              
797             # Parse the next PDF object.
798 0 0         if (s/\A$ws//) { # Strip leading whitespace/comments.
    0          
    0          
    0          
    0          
    0          
799 0           next;
800             } elsif (s/\A(<<((?:[^<>]+|<[^<>]+>|(?1))*)$ws?>>)//) { # Dictionary: <<...>> (including nested dictionaries)
801 0           my @pairs = $self->parse_objects($objects, $2, $offset);
802 0           for (my $i = 0; $i < @pairs; $i++) {
803             $pairs[$i] = $i % 2 ? $pairs[$i][0] : $pairs[$i][1]{name}
804 0 0 0       // croak join(": ", $self->{-file} || (), "Byte offset $offset: Dictionary key is not a name!\n");
      0        
805             }
806 0           push @objects, [ { @pairs }, { type => "dict" } ];
807             } elsif (s/\A(\[((?:(?>[^\[\]]+)|(?1))*)\])//) { # Array: [...] (including nested arrays)
808 0           my $array = [ map $_->[0], $self->parse_objects($objects, $2, $offset) ];
809 0           push @objects, [ $array, { type => "array" }];
810             } elsif (s/\A(\((?:(?>[^\\()]+)|\\.|(?1))*\))//) { # String literal: (...) (including nested parens)
811 0           push @objects, [ $1, { type => "string" } ];
812             } elsif (s/\A(<[0-9A-Fa-f\s]*>)//) { # Hexadecimal string literal: <...>
813 0           push @objects, [ lc($1) =~ s/\s+//gr, { type => "hex" } ];
814             } elsif (s/\A(\/?[^\s()<>\[\]{}\/%]+)//) { # /Name, number or other token
815             # Check for tokens of special interest.
816 0           my $token = $1;
817 0 0 0       if ($token eq "obj" or $token eq "R") { # Indirect object/reference: 999 0 obj or 999 0 R
    0          
    0          
    0          
    0          
    0          
    0          
    0          
818 0           my ($id, $gen) = splice @objects, -2;
819 0 0         my $type = $token eq "R" ? "reference" : "definition";
820             "$id->[1]{type} $gen->[1]{type}" eq "int int"
821 0 0 0       or croak join(": ", $self->{-file} || (), "Byte offset $offset: $id->[0] $gen->[0] $token: Invalid indirect object $type!\n");
822 0   0       my $new_id = join("-", $id->[0], $gen->[0] || ());
823             push @objects, [
824             ($token eq "R" ? \$new_id : $new_id),
825             { type => $token, offset => $id->[1]{offset} }
826 0 0         ];
827             } elsif ($token eq "stream") { # Stream content: stream ... endstream
828 0           my ($id, $stream) = @objects[-2,-1];
829 0 0 0       $stream->[1]{type} eq "dict" or croak join(": ", $self->{-file} || (), "Byte offset $offset: Stream dictionary missing!\n");
830 0 0 0       $id->[1]{type} eq "obj" or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid indirect object definition!\n");
831 0           $_ = $_->[0] for $id, $stream;
832             defined(my $length = $stream->{Length})
833 0 0 0       or warn join(": ", $self->{-file} || (), "Byte offset $offset: Object #$id: Stream length not found in metadata!\n");
834 0           s/\A\r?\n//;
835              
836             # If the declared stream length is missing or invalid, determine the shortest possible length to make the stream valid.
837 0 0 0       unless (defined($length) && substr($_, $length) =~ /\A(\s*endstream$ws)/) {
838 0 0         if (/\A((?>(?:[^e]+|(?!endstream\s)e)*))\s*endstream$ws/) {
839 0           $length = length($1);
840             } else {
841 0   0       croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid stream definition!\n");
842             }
843             }
844              
845 0           $stream->{-data} = substr($_, 0, $length);
846 0           $stream->{-id} = $id;
847 0           $stream->{Length} = $length;
848              
849 0           $_ = substr($_, $length);
850 0           s/\A\s*endstream$ws//;
851              
852 0 0         $self->filter_stream($stream) if $stream->{Filter};
853             } elsif ($token eq "endobj") { # Indirect object definition: 999 0 obj ... endobj
854 0           my ($id, $object) = splice @objects, -2;
855 0 0 0       $id->[1]{type} eq "obj" or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid indirect object definition!\n");
856 0           $object->[1]{id} = $id->[0];
857 0           $objects->{$id->[0]} = $object;
858 0   0       $objects->{offset}{$object->[1]{offset} // $offset} = $object;
859 0           push @objects, $object;
860             } elsif ($token eq "xref") { # Cross-reference table
861             s/\A$ws\d+$ws\d+$n(?>\d{10}\ \d{5}\ [fn](?:\ [\r\n]|\r\n))+//
862 0 0 0       or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid cross-reference table!\n");
863             } elsif ($token =~ /^[+-]?\d+$/) { # Integer: [+-]999
864 0           push @objects, [ $token, { type => "int" } ];
865             } elsif ($token =~ /^[+-]?(?:\d+\.\d*|\.\d+)$/) { # Real number: [+-]999.999
866 0           push @objects, [ $token, { type => "real" } ];
867             } elsif ($token =~ /^\/(.*)$/) { # Name: /Name
868 0           push @objects, [ $token, { type => "name", name => $1 } ];
869             } elsif ($token =~ /^(?:true|false)$/) { # Boolean: true or false
870 0           push @objects, [ $token, { type => "bool", bool => $token eq "true" } ];
871             } else { # Other token
872 0           push @objects, [ $token, { type => "token" } ];
873             }
874             } else {
875 0           s/\A([^\r\n]*).*\z/$1/s;
876 0   0       croak join(": ", $self->{-file} || (), "Byte offset $offset: Parse error on input: \"$_\"\n");
877             }
878              
879             # Update offset/length of last object.
880 0   0       $objects[-1][1]{offset} //= $offset;
881 0           $objects[-1][1]{length} = $eof - length($_) - $objects[-1][1]{offset};
882             }
883              
884             # Return parsed PDF objects.
885 0           return @objects;
886             }
887              
888             # Parse PDF objects from standalone PDF data.
889             sub parse_data {
890 0     0 1   my ($self, $data) = @_;
891              
892             # Parse PDF objects from data.
893 0   0       my @objects = $self->parse_objects({}, $data // "", 0);
894              
895             # Discard parser metadata.
896 0           @objects = map { $_->[0]; } @objects;
  0            
897              
898             # Return parsed objects.
899 0 0         return wantarray ? @objects : $objects[0];
900             }
901              
902             # Filter stream data.
903             sub filter_stream {
904 0     0 1   my ($self, $stream) = @_;
905              
906             # Get stream filters, if any.
907 0 0         my @filters = $stream->{Filter} ? is_array $stream->{Filter} ? @{$stream->{Filter}} : ($stream->{Filter}) : ();
  0 0          
908              
909             # Decompress stream data if necessary.
910 0 0         if ($filters[0] eq "/FlateDecode") {
911             # Remember that this stream was compressed.
912 0           $stream->{-compress} = 1;
913              
914             # Decompress the stream.
915 0           my $zlib = new Compress::Raw::Zlib::Inflate;
916 0           my $output;
917 0           my $status = $zlib->inflate($stream->{-data}, $output);
918 0 0 0       if ($status == Z_OK or $status == Z_STREAM_END) {
919 0           $stream->{-data} = $output;
920 0           $stream->{Length} = length $output;
921             } else {
922 0   0       croak join(": ", $self->{-file} || (), "Object #$stream->{-id}: Stream inflation failed! ($zlib->msg)\n");
923             }
924              
925             # Stream is no longer compressed; remove /FlateDecode filter.
926 0           shift @filters;
927              
928             # Preserve remaining filters, if any.
929 0 0         if (@filters > 1) {
    0          
930 0           $stream->{Filter} = \@filters;
931             } elsif (@filters) {
932 0           $stream->{Filter} = shift @filters;
933             } else {
934 0           delete $stream->{Filter};
935             }
936             }
937             }
938              
939             # Compress stream data.
940             sub compress_stream {
941 0     0 1   my ($self, $stream) = @_;
942              
943             # Get stream filters, if any.
944 0 0         my @filters = $stream->{Filter} ? is_array $stream->{Filter} ? @{$stream->{Filter}} : ($stream->{Filter}) : ();
  0 0          
945              
946             # Return a new stream so the in-memory copy remains uncompressed to work with.
947 0           my $new_stream = { %{$stream} };
  0            
948 0           $new_stream->{-data} = "";
949 0           my ($zlib, $status) = Compress::Raw::Zlib::Deflate->new(-Level => 9, -Bufsize => 65536, AppendOutput => 1);
950 0 0 0       $zlib->deflate($stream->{-data}, $new_stream->{-data}) == Z_OK or croak join(": ", $self->{-file} || (), "Object #$stream->{-id}: Stream deflation failed! ($zlib->msg)\n");
951 0 0 0       $zlib->flush($new_stream->{-data}, Z_FINISH) == Z_OK or croak join(": ", $self->{-file} || (), "Object #$stream->{-id}: Stream deflation failed! ($zlib->msg)\n");
952 0           $new_stream->{Length} = length $new_stream->{-data};
953 0 0         $new_stream->{Filter} = @filters ? ["/FlateDecode", @filters] : "/FlateDecode";
954 0           return $new_stream;
955             }
956              
957             # Resolve indirect object references.
958             sub resolve_references {
959 0     0 1   my ($self, $objects, $object) = @_;
960              
961             # Replace indirect object references with a reference to the actual object.
962 0 0 0       if (ref $object and reftype($object) eq "SCALAR") {
963 0           my $id = ${$object};
  0            
964 0 0         if ($objects->{$id}) {
965 0           ($object, my $metadata) = @{$objects->{$id}};
  0            
966 0 0         return $object if $metadata->{resolved}++;
967             } else {
968 0           ($id, my $gen) = split /-/, $id;
969 0   0       $gen ||= "0";
970 0   0       warn join(": ", $self->{-file} || (), "Warning: $id $gen R: Referenced indirect object not found!\n");
971             }
972             }
973              
974             # Check object type.
975 0 0         if (is_hash $object) {
    0          
976             # Resolve references in hash values.
977 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
978 0 0         $object->{$key} = $self->resolve_references($objects, $object->{$key}) if ref $object->{$key};
979             }
980              
981             # For streams, validate the length metadata.
982 0 0         if (is_stream $object) {
983 0 0 0       substr($object->{-data}, $object->{Length}) =~ s/\A\s+\z// if $object->{Length} and length($object->{-data}) > $object->{Length};
984 0           my $len = length $object->{-data};
985 0   0       $object->{Length} ||= $len;
986             $len == $object->{Length}
987 0 0 0       or warn join(": ", $self->{-file} || (), "Warning: Object #$object->{-id}: Stream length does not match metadata! ($len != $object->{Length})\n");
988             }
989             } elsif (is_array $object) {
990             # Resolve references in array values.
991 0           foreach my $i (0 .. $#{$object}) {
  0            
992 0 0         $object->[$i] = $self->resolve_references($objects, $object->[$i]) if ref $object->[$i];
993             }
994             }
995              
996             # Return object with resolved references.
997 0           return $object;
998             }
999              
1000             # Write all indirect objects to PDF file data.
1001             sub write_indirect_objects {
1002 0     0 1   my ($self, $pdf_file_data, $objects, $seen) = @_;
1003              
1004             # Enumerate all indirect objects.
1005 0           $self->enumerate_indirect_objects($objects);
1006              
1007             # Cross-reference file offsets.
1008 0           my $xrefs = ["0000000000 65535 f \n"];
1009              
1010             # Loop across indirect objects.
1011 0           for (my $i = 1; $i <= $#{$objects}; $i++) {
  0            
1012             # Save file offset for cross-reference table.
1013 0           push @{$xrefs}, sprintf "%010d 00000 n \n", length(${$pdf_file_data});
  0            
  0            
1014              
1015             # Write the indirect object header.
1016 0           ${$pdf_file_data} .= "$i 0 obj\n";
  0            
1017              
1018             # Write the object itself.
1019 0           $self->write_object($pdf_file_data, $objects, $seen, $objects->[$i], 0);
1020              
1021             # Write the indirect object trailer.
1022 0           ${$pdf_file_data} =~ s/\n?\z/\n/;
  0            
1023 0           ${$pdf_file_data} .= "endobj\n\n";
  0            
1024             }
1025              
1026             # Return cross-reference file offsets.
1027 0           return $xrefs;
1028             }
1029              
1030             # Enumerate all indirect objects.
1031             sub enumerate_indirect_objects {
1032 0     0 1   my ($self, $objects) = @_;
1033              
1034             # Add top-level PDF indirect objects.
1035             $self->add_indirect_objects($objects,
1036             $self->{Root} ? $self->{Root} : (), # Document catalog
1037             $self->{Info} ? $self->{Info} : (), # Document information dictionary (if any)
1038             $self->{Root}{Dests} ? $self->{Root}{Dests} : (), # Named destinations (if any)
1039             $self->{Root}{Metadata} ? $self->{Root}{Metadata} : (), # Document metadata (if any)
1040             $self->{Root}{Outlines} ? $self->{Root}{Outlines} : (), # Document outline hierarchy (if any)
1041             $self->{Root}{Pages} ? $self->{Root}{Pages} : (), # Document page tree
1042             $self->{Root}{Threads} ? $self->{Root}{Threads} : (), # Articles (if any)
1043 0 0         $self->{Root}{StructTreeRoot} ? $self->{Root}{StructTreeRoot} : (), # Document structure tree (if any)
    0          
    0          
    0          
    0          
    0          
    0          
    0          
1044             );
1045              
1046             # Add optional content groups, if any.
1047 0 0         $self->add_indirect_objects($objects, @{$self->{Root}{OCProperties}{OCGs}}) if $self->{Root}{OCProperties};
  0            
1048              
1049             # Enumerate shared objects.
1050 0           $self->enumerate_shared_objects($objects, {}, {}, $self->{Root});
1051              
1052             # Add referenced indirect objects.
1053 0           for (my $i = 1; $i <= $#{$objects}; $i++) {
  0            
1054             # Get object.
1055 0           my $object = $objects->[$i];
1056              
1057             # Check object type.
1058 0 0         if (is_hash $object) {
1059             # Objects to add.
1060 0           my @objects;
1061              
1062             # Hashes to scan.
1063 0           my @hashes = $object;
1064              
1065             # Iteratively recurse through hash tree.
1066 0           while (@hashes) {
1067             # Get the next hash.
1068 0           $object = shift @hashes;
1069              
1070             # Check each hash key.
1071 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1072 0 0 0       if (($object->{Type} // "") eq "/ExtGState" and $key eq "Font" and is_array $object->{Font} and is_hash $object->{Font}[0]) {
    0 0        
    0 0        
    0 0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
1073 0           push @objects, $object->{Font}[0];
1074             } elsif ($key =~ /^(?:Data|First|ID|Last|Next|Obj|Parent|ParentTree|Popup|Prev|Root|StmOwn|Threads|Widths)$/
1075             or $key =~ /^(?:AN|Annotation|B|C|CI|DocMDP|F|FontDescriptor|I|IX|K|Lock|N|P|Pg|RI|SE|SV|V)$/ and ref $object->{$key} and is_hash $object->{$key}
1076             or is_hash $object->{$key} and ($object->{$key}{-data} or $object->{$key}{Kids} or ($object->{$key}{Type} // "") =~ /^\/(?:Filespec|Font)$/)
1077             or ($object->{S} // "") eq "/Thread" and $key eq "D"
1078             or ($object->{S} // "") eq "/Hide" and $key eq "T"
1079             ) {
1080 0           push @objects, $object->{$key};
1081             } elsif ($key =~ /^(?:Annots|B|C|CO|Fields|K|Kids|O|Pages|TrapRegions)$/ and is_array $object->{$key}) {
1082 0           push @objects, grep { is_hash $_; } @{$object->{$key}};
  0            
  0            
1083             } elsif (is_hash $object->{$key}) {
1084 0           push @hashes, $object->{$key};
1085             }
1086             }
1087             }
1088              
1089             # Add the objects found, if any.
1090 0 0         $self->add_indirect_objects($objects, @objects) if @objects;
1091             }
1092             }
1093             }
1094              
1095             # Enumerate shared objects.
1096             sub enumerate_shared_objects {
1097 0     0 1   my ($self, $objects, $seen, $ancestors, $object) = @_;
1098              
1099             # Add shared indirect objects.
1100 0 0         if ($seen->{$object}++) {
1101 0 0         $self->add_indirect_objects($objects, $object) unless $objects->[0]{$object};
1102 0           return;
1103             }
1104              
1105             # Return if this object is an ancestor of itself.
1106 0 0         return if $ancestors->{$object};
1107              
1108             # Add this object to the lookup hash of ancestors.
1109 0           $ancestors->{$object}++;
1110              
1111             # Recurse to check entire object tree.
1112 0 0         if (is_hash $object) {
    0          
1113 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1114 0 0         $self->enumerate_shared_objects($objects, $seen, $ancestors, $object->{$key}) if ref $object->{$key};
1115             }
1116             } elsif (is_array $object) {
1117 0           foreach my $obj (@{$object}) {
  0            
1118 0 0         $self->enumerate_shared_objects($objects, $seen, $ancestors, $obj) if ref $obj;
1119             }
1120             }
1121              
1122             # Remove this object from the lookup hash of ancestors.
1123 0           delete $ancestors->{$object};
1124             }
1125              
1126             # Add indirect objects.
1127             sub add_indirect_objects {
1128 0     0 1   my ($self, $objects, @objects) = @_;
1129              
1130             # Loop across specified objects.
1131 0           foreach my $object (@objects) {
1132             # Check if object exists and is not in the lookup hash yet.
1133 0 0 0       if (defined $object and not $objects->[0]{$object}) {
1134             # Add the new indirect object to the array.
1135 0           push @{$objects}, $object;
  0            
1136              
1137             # Save the object ID in the lookup hash, keyed by the object.
1138 0           $objects->[0]{$object} = $#{$objects};
  0            
1139             }
1140             }
1141             }
1142              
1143             # Write a direct object to the string of PDF file data.
1144             sub write_object {
1145 0     0 1   my ($self, $pdf_file_data, $objects, $seen, $object, $indent) = @_;
1146              
1147             # Make sure the same object isn't written twice.
1148 0 0 0       if (ref $object and $seen->{$object}++) {
1149 0   0       croak join(": ", $self->{-file} || (), "Object $object written more than once!\n");
1150             }
1151              
1152             # Check object type.
1153 0 0 0       if (is_hash $object) {
    0          
    0          
    0          
1154             # For streams, compress the stream or update the length metadata.
1155 0 0         if (is_stream $object) {
1156 0 0 0       if (($self->{-compress} or $object->{-compress}) and not ($self->{-decompress} or $object->{-decompress})) {
      0        
      0        
1157 0           $object = $self->compress_stream($object);
1158             } else {
1159 0           $object->{Length} = length $object->{-data};
1160             }
1161             }
1162              
1163             # Dictionary object.
1164 0           $self->serialize_object($pdf_file_data, "<<\n");
1165 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1166 0 0         next if $key =~ /^-/;
1167 0           my $obj = $object->{$key};
1168 0 0         $self->add_indirect_objects($objects, $obj) if is_stream $obj;
1169 0           $self->serialize_object($pdf_file_data, join("", " " x ($indent + 2), "/$key "));
1170 0 0         if (not ref $obj) {
    0          
1171 0           $self->serialize_object($pdf_file_data, "$obj\n");
1172             } elsif ($objects->[0]{$obj}) {
1173 0           $self->serialize_object($pdf_file_data, "$objects->[0]{$obj} 0 R\n");
1174             } else {
1175 0 0         $self->write_object($pdf_file_data, $objects, $seen, $object->{$key}, ref $object ? $indent + 2 : 0);
1176             }
1177             }
1178 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, ">>\n"));
1179              
1180             # For streams, write the stream data.
1181 0 0         if (is_stream $object) {
1182 0 0 0       croak join(": ", $self->{-file} || (), "Stream written as direct object!\n") if $indent;
1183 0 0         my $newline = substr($object->{-data}, -1) eq "\n" ? "" : "\n";
1184 0           ${$pdf_file_data} =~ s/\n?\z/\n/;
  0            
1185 0           ${$pdf_file_data} .= "stream\n$object->{-data}${newline}endstream\n";
  0            
1186             }
1187 0           } elsif (is_array $object and not grep { ref $_; } @{$object}) {
  0            
1188             # Array of simple objects.
1189 0 0         if ($self->{-minify}) {
1190 0           $self->serialize_array($pdf_file_data, $object);
1191             } else {
1192 0           ${$pdf_file_data} .= "[ @{$object} ]\n";
  0            
  0            
1193             }
1194             } elsif (is_array $object) {
1195             # Array object.
1196 0           $self->serialize_object($pdf_file_data, "[\n");
1197 0           my $spaces = " " x ($indent + 2);
1198 0           foreach my $obj (@{$object}) {
  0            
1199 0 0         $self->add_indirect_objects($objects, $obj) if is_stream $obj;
1200 0 0         ${$pdf_file_data} .= $spaces unless $self->{-minify};
  0            
1201 0 0         if (not ref $obj) {
    0          
1202 0           $self->serialize_object($pdf_file_data, $obj);
1203 0           $spaces = " ";
1204             } elsif ($objects->[0]{$obj}) {
1205 0           $self->serialize_object($pdf_file_data, "$objects->[0]{$obj} 0 R\n");
1206 0           $spaces = " " x ($indent + 2);
1207             } else {
1208 0           $self->write_object($pdf_file_data, $objects, $seen, $obj, $indent + 2);
1209 0           $spaces = " " x ($indent + 2);
1210             }
1211             }
1212 0 0 0       ${$pdf_file_data} .= "\n" if $spaces eq " " and not $self->{-minify};
  0            
1213 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, "]\n"));
1214             } elsif (reftype($object) eq "SCALAR") {
1215             # Unresolved indirect reference.
1216 0           my ($id, $gen) = split /-/, ${$object};
  0            
1217 0   0       $gen ||= "0";
1218 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, "($id $gen R)\n"));
1219             } else {
1220             # Simple object.
1221 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, "$object\n"));
1222             }
1223             }
1224              
1225             # Dump PDF object.
1226             sub dump_object {
1227 0     0 1   my ($self, $object, $label, $seen, $indent, $mode) = @_;
1228              
1229             # Dump output.
1230 0           my $output = "";
1231              
1232             # Check mode and object type.
1233 0 0 0       if ($mode eq "outline") {
    0 0        
    0          
    0          
    0          
    0          
1234 0 0 0       if (ref $object and $seen->{$object}) {
    0 0        
    0          
    0          
    0          
1235             # Previously-seen object; dump the label.
1236 0           $output = "$seen->{$object}";
1237             } elsif (is_hash $object) {
1238             # Hash object.
1239 0           $seen->{$object} = $label;
1240 0 0         if (is_stream $object) {
1241 0           $output = "(STREAM)";
1242             } else {
1243 0           $label =~ s/(?<=\w)$/->/;
1244 0 0         my @keys = sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object};
  0            
  0            
1245 0           my $key_len = max map length $_, @keys;
1246 0           foreach my $key (@keys) {
1247 0           my $obj = $object->{$key};
1248 0 0         next unless ref $obj;
1249 0           $output .= sprintf "%s%-${key_len}s => ", " " x ($indent + 2), $key;
1250 0 0         $output .= $self->dump_object($object->{$key}, "$label\{$key\}", $seen, ref $object ? $indent + 2 : 0, $mode) . ",\n";
1251             }
1252 0 0         if ($output) {
1253 0           $output = join("", "{ # $label\n", $output, (" " x $indent), "}");
1254             } else {
1255 0           $output = "{...}";
1256             }
1257 0           $output =~ s/\{ \# \$pdf->\n/\{\n/;
1258             }
1259 0           } elsif (is_array $object and not grep { ref $_; } @{$object}) {
  0            
1260             # Array of simple objects.
1261 0           $output = "[...]";
1262             } elsif (is_array $object) {
1263             # Array object.
1264 0           for (my $i = 0; $i < @{$object}; $i++) {
  0            
1265 0 0         $output .= sprintf "%s%s,\n", " " x ($indent + 2), $self->dump_object($object->[$i], "$label\[$i\]", $seen, $indent + 2, $mode) if ref $object->[$i];
1266             }
1267 0 0         if ($output =~ /\A\s+(.*?),\n\z/) {
    0          
1268 0           $output = "[... $1]";
1269             } elsif ($output =~ /\n/) {
1270 0           $output = join("", "[ # $label\n", $output, (" " x $indent), "]");
1271             } else {
1272 0           $output = "[$output]";
1273             }
1274             } elsif (reftype($object) eq "SCALAR") {
1275             # Unresolved indirect reference.
1276 0           my ($id, $gen) = split /-/, ${$object};
  0            
1277 0   0       $gen ||= "0";
1278 0           $output .= "\"$id $gen R\"";
1279             }
1280             } elsif (ref $object and $seen->{$object}) {
1281             # Previously-seen object; dump the label.
1282 0           $output = $seen->{$object};
1283             } elsif (is_hash $object) {
1284             # Hash object.
1285 0           $seen->{$object} = $label;
1286 0           $output = "{ # $label\n";
1287 0           $label =~ s/(?<=\w)$/->/;
1288 0 0         my @keys = sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object};
  0            
  0            
1289 0           my $key_len = max map length $_, @keys;
1290 0           foreach my $key (@keys) {
1291 0           my $obj = $object->{$key};
1292 0           $output .= sprintf "%s%-${key_len}s => ", " " x ($indent + 2), $key;
1293 0 0         if ($key eq -data) {
    0          
1294 0           chomp $obj;
1295 0 0         $output .= $obj =~ /\A(?:<\?xpacket|[\n\t -~]*\z)/ ? "<<'EOF',\n$obj\nEOF\n" : dump($obj) . "\n";
1296             } elsif (not ref $obj) {
1297 0           $output .= dump($obj) . ",\n";
1298             } else {
1299 0 0         $output .= $self->dump_object($object->{$key}, "$label\{$key\}", $seen, ref $object ? $indent + 2 : 0, $mode) . ",\n";
1300             }
1301             }
1302 0           $output .= (" " x $indent) . "}";
1303 0           $output =~ s/\{ \# \$pdf\n/\{\n/;
1304 0           } elsif (is_array $object and not grep { ref $_; } @{$object}) {
  0            
1305             # Array of simple objects.
1306 0 0         $output = sprintf "[%s]", join(", ", map { /^\d+\.\d+$/ ? $_ : dump($_); } @{$object});
  0            
  0            
1307             } elsif (is_array $object) {
1308             # Array object.
1309 0           $output .= "[ # $label\n";
1310 0           my $spaces = " " x ($indent + 2);
1311 0           for (my $i = 0; $i < @{$object}; $i++) {
  0            
1312 0           my $obj = $object->[$i];
1313 0 0         if (ref $obj) {
1314 0           $output .= sprintf "%s%s,\n", $spaces, $self->dump_object($obj, "$label\[$i\]", $seen, $indent + 2, $mode);
1315 0           $spaces = " " x ($indent + 2);
1316             } else {
1317 0           $output .= $spaces . dump($obj) . ",";
1318 0           $spaces = " ";
1319             }
1320             }
1321 0 0         $output .= ",\n" if $spaces eq " ";
1322 0           $output .= (" " x $indent) . "]";
1323             } elsif (reftype($object) eq "SCALAR") {
1324             # Unresolved indirect reference.
1325 0           my ($id, $gen) = split /-/, ${$object};
  0            
1326 0   0       $gen ||= "0";
1327 0           $output .= "\"$id $gen R\"";
1328             } else {
1329             # Simple object.
1330 0           $output = sprintf "%s%s\n", " " x $indent, dump($object);
1331             }
1332              
1333             # Return generated output.
1334 0           return $output;
1335             }
1336              
1337             1;
1338              
1339             __END__