File Coverage

lib/PDF/Data.pm
Criterion Covered Total %
statement 47 658 7.1
branch 0 406 0.0
condition 0 281 0.0
subroutine 16 65 24.6
pod 34 49 69.3
total 97 1459 6.6


line stmt bran cond sub pod time code
1             package PDF::Data;
2              
3             # Require Perl v5.16; enable warnings and UTF-8.
4 1     1   203346 use v5.16;
  1         3  
5 1     1   4 use warnings;
  1         1  
  1         29  
6 1     1   5 use utf8;
  1         2  
  1         3  
7              
8             # Declare module version. (Also in pod documentation below.)
9 1     1   372 use version; our $VERSION = version->declare('v1.0.0');
  1         1532  
  1         5  
10              
11             # Initialize modules.
12 1     1   97 use mro;
  1         1  
  1         7  
13 1     1   394 use namespace::autoclean;
  1         14069  
  1         3  
14 1     1   53 use Carp qw[carp croak confess];;
  1         2  
  1         44  
15 1     1   361 use Clone;
  1         2068  
  1         39  
16 1     1   498 use Compress::Raw::Zlib qw[:status :flush];
  1         3890  
  1         203  
17 1     1   443 use Data::Dump qw[dd dump];
  1         4200  
  1         53  
18 1     1   465 use List::MoreUtils qw[minmax];
  1         10841  
  1         4  
19 1     1   892 use List::Util qw[max];
  1         2  
  1         44  
20 1     1   418 use Math::Trig qw[pi];
  1         10684  
  1         65  
21 1     1   7 use POSIX qw[mktime strftime];
  1         1  
  1         7  
22 1     1   1353 use Scalar::Util qw[blessed reftype];
  1         2  
  1         37  
23              
24             # Use byte strings instead of Unicode character strings.
25 1     1   5 use bytes;
  1         1  
  1         12  
26              
27             # Basic parsing regular expressions.
28             our $n = qr/(?:\n|\r\n?)/; # Match a newline. (LF, CRLF or CR)
29             our $ws = qr/(?:(?:(?>%[^\r\n]*)?\s+)+)/; # Match whitespace, including PDF comments.
30              
31             # Declare prototypes.
32             sub is_hash ($);
33             sub is_array ($);
34             sub is_stream ($);
35              
36             # Utility functions.
37 0 0   0 0   sub is_hash ($) { ref $_[0] && reftype($_[0]) eq "HASH"; }
38 0 0   0 0   sub is_array ($) { ref $_[0] && reftype($_[0]) eq "ARRAY"; }
39 0 0   0 0   sub is_stream ($) { &is_hash && exists $_[0]{-data}; }
40              
41             # Create a new PDF::Data object, representing a minimal PDF file.
42             sub new {
43 0     0 1   my ($self, %args) = @_;
44              
45             # Get the class name.
46 0   0       my $class = blessed $self || $self;
47              
48             # Create a new instance using the constructor arguments.
49 0           my $pdf = bless \%args, $class;
50              
51             # Set creation timestamp.
52 0           $pdf->{Info}{CreationDate} = $pdf->timestamp;
53              
54             # Create an empty document catalog and page tree.
55 0           $pdf->{Root}{Pages} = { Kids => [], Count => 0 };
56              
57             # Validate the PDF structure and return the new instance.
58 0           return $pdf->validate;
59             }
60              
61             # Deep copy entire PDF::Data object.
62             sub clone {
63 0     0 1   my ($self) = @_;
64 0           return Clone::clone($self);
65             }
66              
67             # Create a new page with the specified size.
68             sub new_page {
69 0     0 1   my ($self, $x, $y) = @_;
70              
71             # Paper sizes.
72 0           my %sizes = (
73             LETTER => [ 8.5, 11 ],
74             A0 => [ 33.125, 46.8125 ],
75             A1 => [ 23.375, 33.125 ],
76             A2 => [ 16.5, 23.375 ],
77             A3 => [ 11.75, 16.5 ],
78             A4 => [ 8.25, 11.75 ],
79             A5 => [ 5.875, 8.25 ],
80             A6 => [ 4.125, 5.875 ],
81             A7 => [ 2.9375, 4.125 ],
82             A8 => [ 2.0625, 2.9375 ],
83             );
84              
85             # Default page size to US Letter (8.5" x 11").
86 0 0 0       unless ($x and $y and $x > 0 and $y > 0) {
      0        
      0        
87 0   0       $x ||= "LETTER";
88 0 0         croak "Error: Unknown paper size \"$x\"!\n" unless $sizes{$x};
89 0           ($x, $y) = @{$sizes{$x}};
  0            
90             }
91              
92             # Make sure page size was specified.
93 0 0 0       croak join(": ", $self->{-file} || (), "Error: Paper size not specified!\n") unless $x and $y and $x > 0 and $y > 0;
      0        
      0        
      0        
94              
95             # Scale inches to default user space units (72 DPI).
96 0 0         $x *= 72 if $x < 72;
97 0 0         $y *= 72 if $y < 72;
98              
99             # Create and return a new page object.
100             return {
101 0           Type => "/Page",
102             MediaBox => [0, 0, $x, $y],
103             Contents => { -data => "" },
104             Resources => {
105             ProcSet => ["/PDF", "/Text"],
106             },
107             };
108             }
109              
110             # Deep copy the specified page object.
111             sub copy_page {
112 0     0 1   my ($self, $page) = @_;
113              
114             # Temporarily hide parent reference.
115 0           delete local $page->{Parent};
116              
117             # Clone the page object.
118 0           my $copied_page = Clone::clone($page);
119              
120             # return cloned page object.
121 0           return $copied_page;
122             }
123              
124             # Append the specified page to the PDF.
125             sub append_page {
126 0     0 1   my ($self, $page) = @_;
127              
128             # Increment page count for page tree root node.
129 0           $self->{Root}{Pages}{Count}++;
130              
131             # Add page object to page tree root node for simplicity.
132 0           push @{$self->{Root}{Pages}{Kids}}, $page;
  0            
133 0           $page->{Parent} = $self->{Root}{Pages};
134              
135             # Return the page object.
136 0           return $page;
137             }
138              
139             # Read and parse PDF file.
140             sub read_pdf {
141 0     0 1   my ($self, $file, %args) = @_;
142              
143             # Read entire file at once.
144 0           local $/;
145              
146             # Contents of entire PDF file.
147 0           my $data;
148              
149             # Check for standard input.
150 0 0 0       if (($file // "-") eq "-") {
151             # Read all data from standard input.
152 0           $file = "";
153 0 0         binmode STDIN or croak "$file: $!\n";
154 0           $data = ;
155 0 0         close STDIN or croak "$file: $!\n";
156             } else {
157             # Read the entire file.
158 0 0         open my $IN, '<', $file or croak "$file: $!\n";
159 0 0         binmode $IN or croak "$file: $!\n";
160 0           $data = <$IN>;
161 0 0         close $IN or croak "$file: $!\n";
162             }
163              
164             # Parse PDF file data and return new instance.
165 0           return $self->parse_pdf($data, -file => $file, %args);
166             }
167              
168             # Parse PDF file data.
169             sub parse_pdf {
170 0     0 1   my ($self, $data, %args) = @_;
171              
172             # Get the class name.
173 0   0       my $class = blessed $self || $self;
174              
175             # Create a new instance using the provided arguments.
176 0           $self = bless \%args, $class;
177              
178             # Validate PDF file structure.
179             my ($pdf_version, $startxref) = $data =~ /\A(?:%PDF-(\d+\.\d+)$n.*$n)startxref$n(\d+)$n%%EOF$n?\z/s
180 0 0 0       or croak join(": ", $self->{-file} || (), "File is not a valid PDF document!\n");
181              
182             # Check PDF version.
183 0 0 0       warn join(": ", $self->{-file} || (), "Warning: PDF version $pdf_version not supported!\n")
184             unless $pdf_version =~ /^1\.[0-7]$/;
185              
186             # Parsed indirect objects.
187 0           my $objects = {};
188              
189             # Parse PDF objects.
190 0           my @objects = $self->parse_objects($objects, $data, 0);
191              
192             # PDF trailer dictionary.
193 0           my $trailer;
194              
195             # Find trailer dictionary.
196 0           for (my $i = 0; $i < @objects; $i++) {
197 0 0         if ($objects[$i][0] eq "trailer") {
198             $i < $#objects and $objects[$i + 1][1]{type} eq "dict"
199 0 0 0       or croak join(": ", $self->{-file} || (), "Byte offset $objects[$i][1]{offset}: Invalid trailer dictionary!\n");
      0        
200 0           $trailer = $objects[$i + 1][0];
201 0           last;
202             }
203             }
204              
205             # Make sure trailer dictionary was found.
206 0 0 0       croak join(": ", $self->{-file} || (), "PDF trailer dictionary not found!\n") unless defined $trailer;
207              
208             # Resolve indirect object references.
209 0           $self->resolve_references($objects, $trailer);
210              
211             # Create a new instance from the parsed data.
212 0           my $pdf = bless $trailer, $class;
213              
214             # Add any provided arguments.
215 0           foreach my $key (sort keys %args) {
216 0           $pdf->{$key} = $args{$key};
217             }
218              
219             # Validate the PDF structure (unless the -novalidate flag is set) and return the new instance.
220 0 0         return $self->{-novalidate} ? $pdf : $pdf->validate;
221             }
222              
223             # Generate and write a new PDF file.
224             sub write_pdf {
225 0     0 1   my ($self, $file, $time) = @_;
226              
227             # Default missing timestamp to current time, but keep a zero time as a flag.
228 0   0       $time //= time;
229              
230             # Generate PDF file data.
231 0           my $pdf_data = $self->pdf_file_data($time);
232              
233             # Check if standard output is wanted.
234 0 0 0       if (($file // "-") eq "-") {
235             # Write PDF file data to standard output.
236 0           $file = "";
237 0 0         binmode STDOUT or croak "$file: $!\n";
238 0 0         print STDOUT $pdf_data or croak "$file: $!\n";
239             } else {
240             # Write PDF file data to specified output file.
241 0 0         open my $OUT, ">", $file or croak "$file: $!\n";
242 0 0         binmode $OUT or croak "$file: $!\n";
243 0 0         print $OUT $pdf_data or croak "$file: $!\n";
244 0 0         close $OUT or croak "$file: $!\n";
245              
246             # Set modification time to the specified or current timestamp, unless zero.
247 0 0         utime $time, $time, $file if $time;
248              
249             # Print success message.
250 0           print STDERR "Wrote new PDF file \"$file\".\n\n";
251             }
252             }
253              
254             # Generate PDF file data suitable for writing to an output PDF file.
255             sub pdf_file_data {
256 0     0 1   my ($self, $time) = @_;
257              
258             # Default missing timestamp to current time, but keep a zero time as a flag.
259 0   0       $time //= time;
260              
261             # Set PDF modification timestamp, unless zero.
262 0 0         $self->{Info}{ModDate} = $self->timestamp($time) if $time;
263              
264             # Set PDF producer.
265 0           $self->{Info}{Producer} = sprintf "(%s)", join " ", __PACKAGE__, $VERSION;
266              
267             # Validate the PDF structure.
268 0           $self->validate;
269              
270             # Array of indirect objects, with lookup hash as first element.
271 0           my $objects = [{}];
272              
273             # Objects seen while generating the PDF file data.
274 0           my $seen = {};
275              
276             # Start with PDF header.
277 0           my $pdf_file_data = "%PDF-1.4\n%\xBF\xF7\xA2\xFE\n\n";
278              
279             # Write all indirect objects.
280 0           my $xrefs = $self->write_indirect_objects(\$pdf_file_data, $objects, $seen);
281              
282             # Add cross-reference table.
283 0           my $startxref = length($pdf_file_data);
284 0           $pdf_file_data .= sprintf "xref\n0 %d\n", scalar @{$xrefs};
  0            
285 0           $pdf_file_data .= join("", @{$xrefs});
  0            
286              
287             # Save correct size in trailer dictionary.
288 0           $self->{Size} = scalar @{$xrefs};
  0            
289              
290             # Write trailer dictionary.
291 0           $pdf_file_data .= "trailer ";
292 0           $self->write_object(\$pdf_file_data, $objects, $seen, $self, 0);
293              
294             # Write startxref value.
295 0           $pdf_file_data =~ s/\n?\z/\n/;
296 0           $pdf_file_data .= "startxref\n$startxref\n";
297              
298             # End of PDF file data.
299 0           $pdf_file_data .= "%%EOF\n";
300              
301             # Return PDF file data.
302 0           return $pdf_file_data;
303             }
304              
305             # Dump internal structure of PDF file.
306             sub dump_pdf {
307 0     0 1   my ($self, $file, $mode) = @_;
308              
309             # Use "" instead of "-" to describe standard output.
310 0           my $filename = $file =~ s/^-$//r;
311              
312             # Open output file.
313 0 0         open my $OUT, ">$file" or croak "$filename: $!\n";
314              
315             # Data structures already seen.
316 0           my $seen = {};
317              
318             # Dump PDF structures.
319 0 0         printf $OUT "\$pdf = %s;\n", $self->dump_object($self, '$pdf', $seen, 0, $mode) or croak "$filename: $!\n";
320              
321             # Close output file.
322 0 0         close $OUT or croak "$filename: $!\n";
323              
324             # Print success message.
325 0 0         if ($mode eq "outline") {
326 0 0         print STDERR "Dumped outline of PDF internal structure to file \"$file\".\n\n" unless $file eq "-";
327             } else {
328 0 0         print STDERR "Dumped PDF internal structure to file \"$file\".\n\n" unless $file eq "-";
329             }
330             }
331              
332             # Dump outline of internal structure of PDF file.
333             sub dump_outline {
334 0     0 1   my ($self, $file) = @_;
335              
336             # Call dump_pdf() with outline parameter.
337 0   0       return $self->dump_pdf($file // "-", "outline");
338             }
339              
340             # Merge content streams.
341             sub merge_content_streams {
342 0     0 1   my ($self, $streams) = @_;
343              
344             # Make sure content is an array.
345 0 0         return $streams unless is_array $streams;
346              
347             # Remove extra trailing space from streams.
348 0           foreach my $stream (@{$streams}) {
  0            
349 0 0         die unless is_stream $stream;
350 0           $stream->{-data} =~ s/(?<=\s) \z//;
351             }
352              
353             # Concatenate stream data and calculate new length.
354 0           my $merged = { -data => join("", map { $_->{-data}; } @{$streams}) };
  0            
  0            
355 0           $merged->{Length} = length($merged->{-data});
356              
357             # Return merged content stream.
358 0           return $merged;
359             }
360              
361             # Find bounding box for a content stream.
362             sub find_bbox {
363 0     0 1   my ($self, $content_stream, $new) = @_;
364              
365             # Get data from stream, if necessary.
366 0 0         $content_stream = $content_stream->{-data} if is_stream $content_stream;
367              
368             # Split content stream into lines.
369 0           my @lines = grep { $_ ne ""; } split /\n/, $content_stream;
  0            
370              
371             # Bounding box.
372 0           my ($left, $bottom, $right, $top);
373              
374             # Regex to match a number.
375 0           my $n = qr/-?\d+(?:\.\d+)?/;
376              
377             # Determine bounding box from content stream.
378 0           foreach (@lines) {
379             # Skip neutral lines.
380 0 0         next if m{^(?:/Figure <>BDC|/PlacedGraphic /MC\d BDC|EMC|/GS\d gs|BX /Sh\d sh EX Q|[Qqh]|W n|$n $n $n $n $n $n cm)\s*$};
381              
382             # Capture coordinates from drawing operations to calculate bounding box.
383 0 0         if (my ($x1, $y1, $x2, $y2, $x3, $y3) = /^($n) ($n) (?:[ml]|($n) ($n) (?:[vy]|($n) ($n) c))$/) {
    0          
384 0           ($left, $right) = minmax grep { defined $_; } $left, $right, $x1, $x2, $x3;
  0            
385 0           ($bottom, $top) = minmax grep { defined $_; } $bottom, $top, $y1, $y2, $y3;
  0            
386             } elsif (my ($x, $y, $width, $height) = /^($n) ($n) ($n) ($n) re$/) {
387 0           ($left, $right) = minmax grep { defined $_; } $left, $right, $x, $x + $width;
  0            
388 0           ($bottom, $top) = minmax grep { defined $_; } $bottom, $top, $y, $y + $height;
  0            
389             } else {
390 0           croak "Parse error: Content line \"$_\" not recognized!\n";
391             }
392             }
393              
394             # Print bounding box and rectangle.
395 0           my $width = $right - $left;
396 0           my $height = $top - $bottom;
397 0           print STDERR "Bounding Box: $left $bottom $right $top\nRectangle: $left $bottom $width $height\n\n";
398              
399             # Return unless generating a new bounding box.
400 0 0         return unless $new;
401              
402             # Update content stream.
403 0           for ($content_stream) {
404             # Update coordinates in drawing operations.
405 0           s/^($n) ($n) ([ml])$/join " ", $self->round($1 - $left, $2 - $bottom), $3/egm;
  0            
406 0           s/^($n) ($n) ($n) ($n) ([vy])$/join " ", $self->round($1 - $left, $2 - $bottom, $3 - $left, $4 - $bottom), $5/egm;
  0            
407 0           s/^($n) ($n) ($n) ($n) ($n) ($n) (c)$/join " ", $self->round($1 - $left, $2 - $bottom, $3 - $left, $4 - $bottom, $5 - $left, $6 - $bottom), $7/egm;
  0            
408 0           s/^($n $n $n $n) ($n) ($n) (cm)$/join " ", $1, $self->round($2 - $left, $3 - $bottom), $4/egm;
  0            
409             }
410              
411             # Return content stream.
412 0           return $content_stream;
413             }
414              
415             # Make a new bounding box for a content stream.
416             sub new_bbox {
417 0     0 1   my ($self, $content_stream) = @_;
418              
419             # Call find_bbox() with "new" parameter.
420 0           $self->find_bbox($content_stream, 1);
421             }
422              
423             # Generate timestamp in PDF internal format.
424             sub timestamp {
425 0     0 1   my ($self, $time) = @_;
426              
427 0   0       $time //= time;
428 0           my @time = localtime $time;
429 0           my $tz = $time[8] * 60 - mktime(gmtime 0) / 60;
430 0           return sprintf "(D:%s%+03d'%02d)", strftime("%Y%m%d%H%M%S", @time), $tz / 60, abs($tz) % 60;
431             }
432              
433             # Round numeric values to 12 significant digits to avoid floating-point rounding error and remove trailing zeroes.
434             sub round {
435 0     0 1   my ($self, @numbers) = @_;
436              
437 0   0       @numbers = map { sprintf("%.12f", sprintf("%.12g", $_ || 0)) =~ s/\.?0+$//r; } @numbers;
  0            
438 0 0         return wantarray ? @numbers : $numbers[0];
439             }
440              
441             # Concatenate a transformation matrix with an original matrix, returning a new matrix.
442             sub concat_matrix {
443 0     0 1   my ($self, $transform, $orig) = @_;
444              
445 0           return [$self->round(
446             $transform->[0] * $orig->[0] + $transform->[1] * $orig->[2],
447             $transform->[0] * $orig->[1] + $transform->[1] * $orig->[3],
448             $transform->[2] * $orig->[0] + $transform->[3] * $orig->[2],
449             $transform->[2] * $orig->[1] + $transform->[3] * $orig->[3],
450             $transform->[4] * $orig->[0] + $transform->[5] * $orig->[2] + $orig->[4],
451             $transform->[4] * $orig->[1] + $transform->[5] * $orig->[3] + $orig->[5],
452             )];
453             }
454              
455             # Calculate the inverse of a matrix, if possible.
456             sub invert_matrix {
457 0     0 1   my ($self, $matrix) = @_;
458              
459             # Calculate the determinant of the matrix.
460 0           my $det = $self->round($matrix->[0] * $matrix->[3] - $matrix->[1] * $matrix->[2]);
461              
462             # If the determinant is zero, then the matrix is not invertible.
463 0 0         return if $det == 0;
464              
465             # Return the inverse matrix.
466 0           return [$self->round(
467             $matrix->[3] / $det,
468             -$matrix->[1] / $det,
469             -$matrix->[2] / $det,
470             $matrix->[0] / $det,
471             ($matrix->[2] * $matrix->[5] - $matrix->[3] * $matrix->[4]) / $det,
472             ($matrix->[1] * $matrix->[4] - $matrix->[0] * $matrix->[5]) / $det,
473             )];
474             }
475              
476             # Create a transformation matrix to translate the origin of the coordinate system to the specified coordinates.
477             sub translate {
478 0     0 1   my ($self, $x, $y) = @_;
479              
480             # Return a translate matrix.
481 0           return [$self->round(1, 0, 0, 1, $x, $y)];
482             }
483              
484             # Create a transformation matrix to scale the coordinate space by the specified horizontal and vertical scaling factors.
485             sub scale {
486 0     0 1   my ($self, $x, $y) = @_;
487              
488             # Return a scale matrix.
489 0           return [$self->round($x, 0, 0, $y, 0, 0)];
490             }
491              
492             # Create a transformation matrix to rotate the coordinate space counterclockwise by the specified angle (in degrees).
493             sub rotate {
494 0     0 1   my ($self, $angle) = @_;
495              
496             # Calculate the sine and cosine of the angle.
497 0           my $sin = sin($angle * pi / 180);
498 0           my $cos = cos($angle * pi / 180);
499              
500             # Return a rotate matrix.
501 0           return [$self->round($cos, $sin, -$sin, $cos, 0, 0)];
502             }
503              
504             # Validate PDF structure.
505             sub validate {
506 0     0 1   my ($self) = @_;
507              
508             # Catch validation errors.
509 0           eval {
510             # Make sure document catalog exists and has the correct type.
511 0           $self->validate_key("Root", "Type", "/Catalog", "document catalog");
512              
513             # Make sure page tree root node exists, has the correct type, and has no parent.
514 0           $self->validate_key("Root/Pages", "Type", "/Pages", "page tree root");
515 0           $self->validate_key("Root/Pages", "Parent", undef, "page tree root");
516              
517             # Validate page tree.
518 0           $self->validate_page_tree("Root/Pages", $self->{Root}{Pages});
519             };
520              
521             # Check for validation errors.
522 0 0         if ($@) {
523             # Make validation errors fatal if -validate flag is set.
524 0 0         if ($self->{-validate}) {
525 0           croak $@;
526             } else {
527 0           carp $@;
528             }
529             }
530              
531             # Return this instance.
532 0           return $self;
533             }
534              
535             # Validate page tree.
536             sub validate_page_tree {
537 0     0 0   my ($self, $path, $page_tree_node) = @_;
538              
539             # Count of leaf nodes (page objects) under this page tree node.
540 0           my $count = 0;
541              
542             # Validate children.
543 0 0 0       is_array(my $kids = $page_tree_node->{Kids}) or croak join(": ", $self->{-file} || (), "Error: $path\->{Kids} must be an array!\n");
544 0           for (my $i = 0; $i < @{$kids}; $i++) {
  0            
545 0 0 0       is_hash(my $kid = $kids->[$i]) or croak join(": ", $self->{-file} || (), "Error: $path\[$i] must be be a hash!\n");
546 0 0 0       $kid->{Type} or croak join(": ", $self->{-file} || (), "Error: $path\[$i]->{Type} is a required field!\n");
547 0 0         if ($kid->{Type} eq "/Pages") {
    0          
548 0           $count += $self->validate_page_tree("$path\[$i]", $kid);
549             } elsif ($kid->{Type} eq "/Page") {
550 0           $self->validate_page("$path\[$i]", $kid);
551 0           $count++;
552             } else {
553 0   0       croak join(": ", $self->{-file} || (), "Error: $path\[$i]->{Type} must be /Pages or /Page!\n");
554             }
555             }
556              
557             # Validate resources, if any.
558 0 0         $self->validate_resources("$path\->{Resources}", $page_tree_node->{Resources}) if is_hash($page_tree_node->{Resources});
559              
560             # Fix leaf node count if wrong.
561 0 0 0       if (($page_tree_node->{Count} || 0) != $count) {
562 0   0       warn join(": ", $self->{-file} || (), "Warning: Fixing: $path->{Count} = $count\n");
563 0           $page_tree_node->{Count} = $count;
564             }
565             }
566              
567             # Validate page object.
568             sub validate_page {
569 0     0 0   my ($self, $path, $page) = @_;
570              
571 0 0         if (my $contents = $page->{Contents}) {
572 0 0         $contents = $self->merge_content_streams($contents) if is_array($contents);
573 0 0 0       is_stream($contents) or croak join(": ", $self->{-file} || (), "Error: $path\->{Contents} must be an array or stream!\n");
574 0           $self->validate_content_stream("$path\->{Contents}", $contents);
575             }
576              
577             # Validate resources, if any.
578 0 0         $self->validate_resources("$path\->{Resources}", $page->{Resources}) if is_hash($page->{Resources});
579             }
580              
581             # Validate resources.
582             sub validate_resources {
583 0     0 0   my ($self, $path, $resources) = @_;
584              
585             # Validate XObjects, if any.
586 0 0         $self->validate_xobjects("$path\{XObject}", $resources->{XObject}) if is_hash($resources->{XObject});
587             }
588              
589             # Validate form XObjects.
590             sub validate_xobjects {
591 0     0 0   my ($self, $path, $xobjects) = @_;
592              
593             # Validate each form XObject.
594 0           foreach my $name (sort keys %{$xobjects}) {
  0            
595 0           $self->validate_xobject("$path\{$name}", $xobjects->{$name});
596             }
597             }
598              
599             # Validate a single form XObject.
600             sub validate_xobject {
601 0     0 0   my ($self, $path, $xobject) = @_;
602              
603             # Make sure the form XObject is a stream.
604 0 0 0       is_stream($xobject) or croak join(": ", $self->{-file} || (), "Error: $path must be a content stream!\n");
605              
606             # Make sure the Subtype is set to /Form.
607 0 0 0       $xobject->{Subtype} eq "/Form" or croak join(": ", $self->{-file} || (), "Error: $path\->{Subtype} must be /Form!\n");
608              
609             # Validate the form XObject content stream.
610 0           $self->validate_content_stream($path, $xobject);
611              
612             # Validate resources, if any.
613 0 0         $self->validate_resources("$path\{Resources}", $xobject->{Resources}) if is_hash($xobject->{Resources});
614             }
615              
616             # Validate content stream.
617             sub validate_content_stream {
618 0     0 0   my ($self, $path, $stream) = @_;
619              
620             # Make sure the content stream can be parsed.
621 0           my @objects = eval { $self->parse_objects({}, $stream->{-data}, 0); };
  0            
622 0 0 0       croak join(": ", $self->{-file} || (), "Error: $path: $@") if $@;
623              
624             # Minify content stream if requested.
625 0 0         $self->minify_content_stream($stream, \@objects) if $self->{-minify};
626             }
627              
628             # Minify content stream.
629             sub minify_content_stream {
630 0     0 0   my ($self, $stream, $objects) = @_;
631              
632             # Parse object stream if necessary.
633 0   0       $objects ||= [ $self->parse_objects({}, $stream->{-data}, 0) ];
634              
635             # Generate new content stream from objects.
636 0           $stream->{-data} = $self->generate_content_stream($objects);
637              
638             # Recalculate stream length.
639 0           $stream->{Length} = length $stream->{-data};
640              
641             # Sanity check.
642             die "Content stream serialization failed"
643 0           if dump([map {$_->[0]} @{$objects}]) ne
  0            
644 0 0         dump([map {$_->[0]} $self->parse_objects({}, $stream->{-data}, 0)]);
  0            
645             }
646              
647             # Generate new content stream from objects.
648             sub generate_content_stream {
649 0     0 0   my ($self, $objects) = @_;
650              
651             # Generated content stream.
652 0           my $stream = "";
653              
654             # Loop across parsed objects.
655 0           my $last_object;
656 0           foreach my $object (@{$objects}) {
  0            
657             # Check parsed object type.
658 0 0         if ($object->[1]{type} eq "dict") {
    0          
659             # Serialize dictionary.
660 0           $self->serialize_dictionary(\$stream, $object->[0]);
661             } elsif ($object->[1]{type} eq "array") {
662             # Serialize array.
663 0           $self->serialize_array(\$stream, $object->[0]);
664             } else {
665             # Serialize string or other token.
666 0           $self->serialize_object(\$stream, $object->[0]);
667             }
668             } continue {
669 0           $last_object = $object;
670             }
671              
672             # Return generated content stream.
673 0           return $stream;
674             }
675              
676             # Serialize a hash as a dictionary object.
677             sub serialize_dictionary {
678 0     0 0   my ($self, $stream, $hash) = @_;
679              
680             # Serialize the hash key-value pairs.
681 0           my @pairs = %{$hash};
  0            
682 0           ${$stream} .= "<<";
  0            
683 0           for (my $i = 0; $i < @pairs; $i++) {
684 0 0         if ($i % 2) {
685 0 0         if (is_hash($pairs[$i])) {
    0          
686 0           $self->serialize_dictionary($stream, $pairs[$i]);
687             } elsif (is_array($pairs[$i])) {
688 0           $self->serialize_array($stream, $pairs[$i]);
689             } else {
690 0           $self->serialize_object($stream, $pairs[$i]);
691             }
692             } else {
693 0           ${$stream} .= "/$pairs[$i]";
  0            
694             }
695             }
696 0           ${$stream} .= ">>";
  0            
697             }
698              
699             # Serialize an array.
700             sub serialize_array {
701 0     0 0   my ($self, $stream, $array) = @_;
702              
703             # Serialize the array values.
704 0           ${$stream} .= "[";
  0            
705 0           foreach my $obj (@{$array}) {
  0            
706 0 0         if (is_hash($obj)) {
    0          
707 0           $self->serialize_dictionary($stream, $obj);
708             } elsif (is_array($obj)) {
709 0           $self->serialize_array($stream, $obj);
710             } else {
711 0           $self->serialize_object($stream, $obj);
712             }
713             }
714 0           ${$stream} .= "]";
  0            
715             }
716              
717             # Append the serialization of an object to the generated content stream.
718             sub serialize_object {
719 0     0 0   my ($self, $stream, $object) = @_;
720              
721             # Strip leading/trailing whitespace from object if minifying.
722 0 0         if ($self->{-minify}) {
723 0           $object =~ s/^\s+//;
724 0           $object =~ s/\s+$//;
725             }
726              
727             # Wrap the line if line length would exceed 255 characters.
728 0 0         ${$stream} .= "\n" if length((${$stream} =~ /(.*)\z/)[0]) + length($object) >= 255;
  0            
  0            
729              
730             # Add a space if necessary.
731 0 0 0       ${$stream} .= " " unless ${$stream} =~ /(^|[\s)>\[\]{}])$/ or $object =~ /^[\s()<>\[\]{}\/%]/;
  0            
  0            
732              
733             # Add the serialized object.
734 0           ${$stream} .= $object;
  0            
735             }
736              
737             # Validate the specified hash key value.
738             sub validate_key {
739 0     0 1   my ($self, $hash, $key, $value, $label) = @_;
740              
741             # Create the hash if necessary.
742 0 0         $hash = $_[1] = {} unless $hash;
743              
744             # Get the hash node from the PDF structure by path, if necessary.
745 0 0         $hash = $self->get_hash_node($hash) unless is_hash $hash;
746              
747             # Make sure the hash key has the correct value.
748 0 0 0       if (defined $value and (not defined $hash->{$key} or $hash->{$key} ne $value)) {
    0 0        
      0        
749 0 0 0       warn join(": ", $self->{-file} || (), "Warning: Fixing $label: {$key} $hash->{$key} -> $value\n") if $hash->{$key};
750 0           $hash->{$key} = $value;
751             } elsif (not defined $value and exists $hash->{$key}) {
752 0 0 0       warn join(": ", $self->{-file} || (), "Warning: Deleting $label: {$key} $hash->{$key}\n") if $hash->{$key};
753 0           delete $hash->{$key};
754             }
755              
756             # Return this instance.
757 0           return $self;
758             }
759              
760             # Get a hash node from the PDF structure by path.
761             sub get_hash_node {
762 0     0 1   my ($self, $path) = @_;
763              
764             # Split the path.
765 0           my @path = split /\//, $path;
766              
767             # Find the hash node with the specified path, creating nodes if necessary.
768 0           my $hash = $self;
769 0           foreach my $key (@path) {
770 0   0       $hash->{$key} ||= {};
771 0           $hash = $hash->{$key};
772             }
773              
774             # Return the hash node.
775 0           return $hash;
776             }
777              
778             # Parse PDF objects into Perl representations.
779             sub parse_objects {
780 0     0 1   my ($self, $objects, $data, $offset) = @_;
781              
782             # Parsed PDF objects.
783 0           my @objects;
784              
785             # Calculate EOF offset.
786 0           my $eof = $offset + length $data;
787              
788             # Copy data for parsing.
789 0           local $_ = $data;
790              
791             # Parse PDF objects in input string.
792 0           while ($_ ne "") {
793             # Update the file offset.
794 0           $offset = $eof - length $_;
795              
796             # Parse the next PDF object.
797 0 0         if (s/\A$ws//) { # Strip leading whitespace/comments.
    0          
    0          
    0          
    0          
    0          
798 0           next;
799             } elsif (s/\A(<<((?:[^<>]+|<[^<>]+>|(?1))*)$ws?>>)//) { # Dictionary: <<...>> (including nested dictionaries)
800 0           my @pairs = $self->parse_objects($objects, $2, $offset);
801 0           for (my $i = 0; $i < @pairs; $i++) {
802             $pairs[$i] = $i % 2 ? $pairs[$i][0] : $pairs[$i][1]{name}
803 0 0 0       // croak join(": ", $self->{-file} || (), "Byte offset $offset: Dictionary key is not a name!\n");
      0        
804             }
805 0           push @objects, [ { @pairs }, { type => "dict" } ];
806             } elsif (s/\A(\[((?:(?>[^\[\]]+)|(?1))*)\])//) { # Array: [...] (including nested arrays)
807 0           my $array = [ map $_->[0], $self->parse_objects($objects, $2, $offset) ];
808 0           push @objects, [ $array, { type => "array" }];
809             } elsif (s/\A(\((?:(?>[^\\()]+)|\\.|(?1))*\))//) { # String literal: (...) (including nested parens)
810 0           push @objects, [ $1, { type => "string" } ];
811             } elsif (s/\A(<[0-9A-Fa-f\s]*>)//) { # Hexadecimal string literal: <...>
812 0           push @objects, [ lc($1) =~ s/\s+//gr, { type => "hex" } ];
813             } elsif (s/\A(\/?[^\s()<>\[\]{}\/%]+)//) { # /Name, number or other token
814             # Check for tokens of special interest.
815 0           my $token = $1;
816 0 0 0       if ($token eq "obj" or $token eq "R") { # Indirect object/reference: 999 0 obj or 999 0 R
    0          
    0          
    0          
    0          
    0          
    0          
    0          
817 0           my ($id, $gen) = splice @objects, -2;
818 0 0         my $type = $token eq "R" ? "reference" : "definition";
819             "$id->[1]{type} $gen->[1]{type}" eq "int int"
820 0 0 0       or croak join(": ", $self->{-file} || (), "Byte offset $offset: $id->[0] $gen->[0] $token: Invalid indirect object $type!\n");
821 0   0       my $new_id = join("-", $id->[0], $gen->[0] || ());
822             push @objects, [
823             ($token eq "R" ? \$new_id : $new_id),
824             { type => $token, offset => $id->[1]{offset} }
825 0 0         ];
826             } elsif ($token eq "stream") { # Stream content: stream ... endstream
827 0           my ($id, $stream) = @objects[-2,-1];
828 0 0 0       $stream->[1]{type} eq "dict" or croak join(": ", $self->{-file} || (), "Byte offset $offset: Stream dictionary missing!\n");
829 0 0 0       $id->[1]{type} eq "obj" or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid indirect object definition!\n");
830 0           $_ = $_->[0] for $id, $stream;
831             defined(my $length = $stream->{Length})
832 0 0 0       or warn join(": ", $self->{-file} || (), "Byte offset $offset: Object #$id: Stream length not found in metadata!\n");
833 0           s/\A\r?\n//;
834              
835             # If the declared stream length is missing or invalid, determine the shortest possible length to make the stream valid.
836 0 0 0       unless (defined($length) && substr($_, $length) =~ /\A(\s*endstream$ws)/) {
837 0 0         if (/\A((?>(?:[^e]+|(?!endstream\s)e)*))\s*endstream$ws/) {
838 0           $length = length($1);
839             } else {
840 0   0       croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid stream definition!\n");
841             }
842             }
843              
844 0           $stream->{-data} = substr($_, 0, $length);
845 0           $stream->{-id} = $id;
846 0           $stream->{Length} = $length;
847              
848 0           $_ = substr($_, $length);
849 0           s/\A\s*endstream$ws//;
850              
851 0 0         $self->filter_stream($stream) if $stream->{Filter};
852             } elsif ($token eq "endobj") { # Indirect object definition: 999 0 obj ... endobj
853 0           my ($id, $object) = splice @objects, -2;
854 0 0 0       $id->[1]{type} eq "obj" or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid indirect object definition!\n");
855 0           $object->[1]{id} = $id->[0];
856 0           $objects->{$id->[0]} = $object;
857 0   0       $objects->{offset}{$object->[1]{offset} // $offset} = $object;
858 0           push @objects, $object;
859             } elsif ($token eq "xref") { # Cross-reference table
860             s/\A$ws\d+$ws\d+$n(?>\d{10}\ \d{5}\ [fn](?:\ [\r\n]|\r\n))+//
861 0 0 0       or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid cross-reference table!\n");
862             } elsif ($token =~ /^[+-]?\d+$/) { # Integer: [+-]999
863 0           push @objects, [ $token, { type => "int" } ];
864             } elsif ($token =~ /^[+-]?(?:\d+\.\d*|\.\d+)$/) { # Real number: [+-]999.999
865 0           push @objects, [ $token, { type => "real" } ];
866             } elsif ($token =~ /^\/(.*)$/) { # Name: /Name
867 0           push @objects, [ $token, { type => "name", name => $1 } ];
868             } elsif ($token =~ /^(?:true|false)$/) { # Boolean: true or false
869 0           push @objects, [ $token, { type => "bool", bool => $token eq "true" } ];
870             } else { # Other token
871 0           push @objects, [ $token, { type => "token" } ];
872             }
873             } else {
874 0           s/\A([^\r\n]*).*\z/$1/s;
875 0   0       croak join(": ", $self->{-file} || (), "Byte offset $offset: Parse error on input: \"$_\"\n");
876             }
877              
878             # Update offset/length of last object.
879 0   0       $objects[-1][1]{offset} //= $offset;
880 0           $objects[-1][1]{length} = $eof - length($_) - $objects[-1][1]{offset};
881             }
882              
883             # Return parsed PDF objects.
884 0           return @objects;
885             }
886              
887             # Parse PDF objects from standalone PDF data.
888             sub parse_data {
889 0     0 0   my ($self, $data) = @_;
890              
891             # Parse PDF objects from data.
892 0   0       my @objects = $self->parse_objects({}, $data // "", 0);
893              
894             # Discard parser metadata.
895 0           @objects = map { $_->[0]; } @objects;
  0            
896              
897             # Return parsed objects.
898 0 0         return wantarray ? @objects : $objects[0];
899             }
900              
901             # Filter stream data.
902             sub filter_stream {
903 0     0 1   my ($self, $stream) = @_;
904              
905             # Get stream filters, if any.
906 0 0         my @filters = $stream->{Filter} ? is_array $stream->{Filter} ? @{$stream->{Filter}} : ($stream->{Filter}) : ();
  0 0          
907              
908             # Decompress stream data if necessary.
909 0 0         if ($filters[0] eq "/FlateDecode") {
910             # Remember that this stream was compressed.
911 0           $stream->{-compress} = 1;
912              
913             # Decompress the stream.
914 0           my $zlib = new Compress::Raw::Zlib::Inflate;
915 0           my $output;
916 0           my $status = $zlib->inflate($stream->{-data}, $output);
917 0 0 0       if ($status == Z_OK or $status == Z_STREAM_END) {
918 0           $stream->{-data} = $output;
919 0           $stream->{Length} = length $output;
920             } else {
921 0   0       croak join(": ", $self->{-file} || (), "Object #$stream->{-id}: Stream inflation failed! ($zlib->msg)\n");
922             }
923              
924             # Stream is no longer compressed; remove /FlateDecode filter.
925 0           shift @filters;
926              
927             # Preserve remaining filters, if any.
928 0 0         if (@filters > 1) {
    0          
929 0           $stream->{Filter} = \@filters;
930             } elsif (@filters) {
931 0           $stream->{Filter} = shift @filters;
932             } else {
933 0           delete $stream->{Filter};
934             }
935             }
936             }
937              
938             # Compress stream data.
939             sub compress_stream {
940 0     0 1   my ($self, $stream) = @_;
941              
942             # Get stream filters, if any.
943 0 0         my @filters = $stream->{Filter} ? is_array $stream->{Filter} ? @{$stream->{Filter}} : ($stream->{Filter}) : ();
  0 0          
944              
945             # Return a new stream so the in-memory copy remains uncompressed to work with.
946 0           my $new_stream = { %{$stream} };
  0            
947 0           $new_stream->{-data} = "";
948 0           my ($zlib, $status) = Compress::Raw::Zlib::Deflate->new(-Level => 9, -Bufsize => 65536, AppendOutput => 1);
949 0 0 0       $zlib->deflate($stream->{-data}, $new_stream->{-data}) == Z_OK or croak join(": ", $self->{-file} || (), "Object #$stream->{-id}: Stream deflation failed! ($zlib->msg)\n");
950 0 0 0       $zlib->flush($new_stream->{-data}, Z_FINISH) == Z_OK or croak join(": ", $self->{-file} || (), "Object #$stream->{-id}: Stream deflation failed! ($zlib->msg)\n");
951 0           $new_stream->{Length} = length $new_stream->{-data};
952 0 0         $new_stream->{Filter} = @filters ? ["/FlateDecode", @filters] : "/FlateDecode";
953 0           return $new_stream;
954             }
955              
956             # Resolve indirect object references.
957             sub resolve_references {
958 0     0 1   my ($self, $objects, $object) = @_;
959              
960             # Replace indirect object references with a reference to the actual object.
961 0 0 0       if (ref $object and reftype($object) eq "SCALAR") {
962 0           my $id = ${$object};
  0            
963 0 0         if ($objects->{$id}) {
964 0           ($object, my $metadata) = @{$objects->{$id}};
  0            
965 0 0         return $object if $metadata->{resolved}++;
966             } else {
967 0           ($id, my $gen) = split /-/, $id;
968 0   0       $gen ||= "0";
969 0   0       warn join(": ", $self->{-file} || (), "Warning: $id $gen R: Referenced indirect object not found!\n");
970             }
971             }
972              
973             # Check object type.
974 0 0         if (is_hash $object) {
    0          
975             # Resolve references in hash values.
976 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
977 0 0         $object->{$key} = $self->resolve_references($objects, $object->{$key}) if ref $object->{$key};
978             }
979              
980             # For streams, validate the length metadata.
981 0 0         if (is_stream $object) {
982 0 0 0       substr($object->{-data}, $object->{Length}) =~ s/\A\s+\z// if $object->{Length} and length($object->{-data}) > $object->{Length};
983 0           my $len = length $object->{-data};
984 0   0       $object->{Length} ||= $len;
985             $len == $object->{Length}
986 0 0 0       or warn join(": ", $self->{-file} || (), "Warning: Object #$object->{-id}: Stream length does not match metadata! ($len != $object->{Length})\n");
987             }
988             } elsif (is_array $object) {
989             # Resolve references in array values.
990 0           foreach my $i (0 .. $#{$object}) {
  0            
991 0 0         $object->[$i] = $self->resolve_references($objects, $object->[$i]) if ref $object->[$i];
992             }
993             }
994              
995             # Return object with resolved references.
996 0           return $object;
997             }
998              
999             # Write all indirect objects to PDF file data.
1000             sub write_indirect_objects {
1001 0     0 1   my ($self, $pdf_file_data, $objects, $seen) = @_;
1002              
1003             # Enumerate all indirect objects.
1004 0           $self->enumerate_indirect_objects($objects);
1005              
1006             # Cross-reference file offsets.
1007 0           my $xrefs = ["0000000000 65535 f \n"];
1008              
1009             # Loop across indirect objects.
1010 0           for (my $i = 1; $i <= $#{$objects}; $i++) {
  0            
1011             # Save file offset for cross-reference table.
1012 0           push @{$xrefs}, sprintf "%010d 00000 n \n", length(${$pdf_file_data});
  0            
  0            
1013              
1014             # Write the indirect object header.
1015 0           ${$pdf_file_data} .= "$i 0 obj\n";
  0            
1016              
1017             # Write the object itself.
1018 0           $self->write_object($pdf_file_data, $objects, $seen, $objects->[$i], 0);
1019              
1020             # Write the indirect object trailer.
1021 0           ${$pdf_file_data} =~ s/\n?\z/\n/;
  0            
1022 0           ${$pdf_file_data} .= "endobj\n\n";
  0            
1023             }
1024              
1025             # Return cross-reference file offsets.
1026 0           return $xrefs;
1027             }
1028              
1029             # Enumerate all indirect objects.
1030             sub enumerate_indirect_objects {
1031 0     0 1   my ($self, $objects) = @_;
1032              
1033             # Add top-level PDF indirect objects.
1034             $self->add_indirect_objects($objects,
1035             $self->{Root} ? $self->{Root} : (), # Document catalog
1036             $self->{Info} ? $self->{Info} : (), # Document information dictionary (if any)
1037             $self->{Root}{Dests} ? $self->{Root}{Dests} : (), # Named destinations (if any)
1038             $self->{Root}{Metadata} ? $self->{Root}{Metadata} : (), # Document metadata (if any)
1039             $self->{Root}{Outlines} ? $self->{Root}{Outlines} : (), # Document outline hierarchy (if any)
1040             $self->{Root}{Pages} ? $self->{Root}{Pages} : (), # Document page tree
1041             $self->{Root}{Threads} ? $self->{Root}{Threads} : (), # Articles (if any)
1042 0 0         $self->{Root}{StructTreeRoot} ? $self->{Root}{StructTreeRoot} : (), # Document structure tree (if any)
    0          
    0          
    0          
    0          
    0          
    0          
    0          
1043             );
1044              
1045             # Add optional content groups, if any.
1046 0 0         $self->add_indirect_objects($objects, @{$self->{Root}{OCProperties}{OCGs}}) if $self->{Root}{OCProperties};
  0            
1047              
1048             # Enumerate shared objects.
1049 0           $self->enumerate_shared_objects($objects, {}, {}, $self->{Root});
1050              
1051             # Add referenced indirect objects.
1052 0           for (my $i = 1; $i <= $#{$objects}; $i++) {
  0            
1053             # Get object.
1054 0           my $object = $objects->[$i];
1055              
1056             # Check object type.
1057 0 0         if (is_hash $object) {
1058             # Objects to add.
1059 0           my @objects;
1060              
1061             # Hashes to scan.
1062 0           my @hashes = $object;
1063              
1064             # Iteratively recurse through hash tree.
1065 0           while (@hashes) {
1066             # Get the next hash.
1067 0           $object = shift @hashes;
1068              
1069             # Check each hash key.
1070 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1071 0 0 0       if (($object->{Type} // "") eq "/ExtGState" and $key eq "Font" and is_array $object->{Font} and is_hash $object->{Font}[0]) {
    0 0        
    0 0        
    0 0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
1072 0           push @objects, $object->{Font}[0];
1073             } elsif ($key =~ /^(?:Data|First|ID|Last|Next|Obj|Parent|ParentTree|Popup|Prev|Root|StmOwn|Threads|Widths)$/
1074             or $key =~ /^(?:AN|Annotation|B|C|CI|DocMDP|F|FontDescriptor|I|IX|K|Lock|N|P|Pg|RI|SE|SV|V)$/ and ref $object->{$key} and is_hash $object->{$key}
1075             or is_hash $object->{$key} and ($object->{$key}{-data} or $object->{$key}{Kids} or ($object->{$key}{Type} // "") =~ /^\/(?:Filespec|Font)$/)
1076             or ($object->{S} // "") eq "/Thread" and $key eq "D"
1077             or ($object->{S} // "") eq "/Hide" and $key eq "T"
1078             ) {
1079 0           push @objects, $object->{$key};
1080             } elsif ($key =~ /^(?:Annots|B|C|CO|Fields|K|Kids|O|Pages|TrapRegions)$/ and is_array $object->{$key}) {
1081 0           push @objects, grep { is_hash $_; } @{$object->{$key}};
  0            
  0            
1082             } elsif (is_hash $object->{$key}) {
1083 0           push @hashes, $object->{$key};
1084             }
1085             }
1086             }
1087              
1088             # Add the objects found, if any.
1089 0 0         $self->add_indirect_objects($objects, @objects) if @objects;
1090             }
1091             }
1092             }
1093              
1094             # Enumerate shared objects.
1095             sub enumerate_shared_objects {
1096 0     0 1   my ($self, $objects, $seen, $ancestors, $object) = @_;
1097              
1098             # Add shared indirect objects.
1099 0 0         if ($seen->{$object}++) {
1100 0 0         $self->add_indirect_objects($objects, $object) unless $objects->[0]{$object};
1101 0           return;
1102             }
1103              
1104             # Return if this object is an ancestor of itself.
1105 0 0         return if $ancestors->{$object};
1106              
1107             # Add this object to the lookup hash of ancestors.
1108 0           $ancestors->{$object}++;
1109              
1110             # Recurse to check entire object tree.
1111 0 0         if (is_hash $object) {
    0          
1112 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1113 0 0         $self->enumerate_shared_objects($objects, $seen, $ancestors, $object->{$key}) if ref $object->{$key};
1114             }
1115             } elsif (is_array $object) {
1116 0           foreach my $obj (@{$object}) {
  0            
1117 0 0         $self->enumerate_shared_objects($objects, $seen, $ancestors, $obj) if ref $obj;
1118             }
1119             }
1120              
1121             # Remove this object from the lookup hash of ancestors.
1122 0           delete $ancestors->{$object};
1123             }
1124              
1125             # Add indirect objects.
1126             sub add_indirect_objects {
1127 0     0 1   my ($self, $objects, @objects) = @_;
1128              
1129             # Loop across specified objects.
1130 0           foreach my $object (@objects) {
1131             # Check if object exists and is not in the lookup hash yet.
1132 0 0 0       if (defined $object and not $objects->[0]{$object}) {
1133             # Add the new indirect object to the array.
1134 0           push @{$objects}, $object;
  0            
1135              
1136             # Save the object ID in the lookup hash, keyed by the object.
1137 0           $objects->[0]{$object} = $#{$objects};
  0            
1138             }
1139             }
1140             }
1141              
1142             # Write a direct object to the string of PDF file data.
1143             sub write_object {
1144 0     0 1   my ($self, $pdf_file_data, $objects, $seen, $object, $indent) = @_;
1145              
1146             # Make sure the same object isn't written twice.
1147 0 0 0       if (ref $object and $seen->{$object}++) {
1148 0   0       croak join(": ", $self->{-file} || (), "Object $object written more than once!\n");
1149             }
1150              
1151             # Check object type.
1152 0 0 0       if (is_hash $object) {
    0          
    0          
    0          
1153             # For streams, compress the stream or update the length metadata.
1154 0 0         if (is_stream $object) {
1155 0 0 0       if (($self->{-compress} or $object->{-compress}) and not ($self->{-decompress} or $object->{-decompress})) {
      0        
      0        
1156 0           $object = $self->compress_stream($object);
1157             } else {
1158 0           $object->{Length} = length $object->{-data};
1159             }
1160             }
1161              
1162             # Dictionary object.
1163 0           $self->serialize_object($pdf_file_data, "<<\n");
1164 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1165 0 0         next if $key =~ /^-/;
1166 0           my $obj = $object->{$key};
1167 0 0         $self->add_indirect_objects($objects, $obj) if is_stream $obj;
1168 0           $self->serialize_object($pdf_file_data, join("", " " x ($indent + 2), "/$key "));
1169 0 0         if (not ref $obj) {
    0          
1170 0           $self->serialize_object($pdf_file_data, "$obj\n");
1171             } elsif ($objects->[0]{$obj}) {
1172 0           $self->serialize_object($pdf_file_data, "$objects->[0]{$obj} 0 R\n");
1173             } else {
1174 0 0         $self->write_object($pdf_file_data, $objects, $seen, $object->{$key}, ref $object ? $indent + 2 : 0);
1175             }
1176             }
1177 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, ">>\n"));
1178              
1179             # For streams, write the stream data.
1180 0 0         if (is_stream $object) {
1181 0 0 0       croak join(": ", $self->{-file} || (), "Stream written as direct object!\n") if $indent;
1182 0 0         my $newline = substr($object->{-data}, -1) eq "\n" ? "" : "\n";
1183 0           ${$pdf_file_data} =~ s/\n?\z/\n/;
  0            
1184 0           ${$pdf_file_data} .= "stream\n$object->{-data}${newline}endstream\n";
  0            
1185             }
1186 0           } elsif (is_array $object and not grep { ref $_; } @{$object}) {
  0            
1187             # Array of simple objects.
1188 0 0         if ($self->{-minify}) {
1189 0           $self->serialize_array($pdf_file_data, $object);
1190             } else {
1191 0           ${$pdf_file_data} .= "[ @{$object} ]\n";
  0            
  0            
1192             }
1193             } elsif (is_array $object) {
1194             # Array object.
1195 0           $self->serialize_object($pdf_file_data, "[\n");
1196 0           my $spaces = " " x ($indent + 2);
1197 0           foreach my $obj (@{$object}) {
  0            
1198 0 0         $self->add_indirect_objects($objects, $obj) if is_stream $obj;
1199 0 0         ${$pdf_file_data} .= $spaces unless $self->{-minify};
  0            
1200 0 0         if (not ref $obj) {
    0          
1201 0           $self->serialize_object($pdf_file_data, $obj);
1202 0           $spaces = " ";
1203             } elsif ($objects->[0]{$obj}) {
1204 0           $self->serialize_object($pdf_file_data, "$objects->[0]{$obj} 0 R\n");
1205 0           $spaces = " " x ($indent + 2);
1206             } else {
1207 0           $self->write_object($pdf_file_data, $objects, $seen, $obj, $indent + 2);
1208 0           $spaces = " " x ($indent + 2);
1209             }
1210             }
1211 0 0 0       ${$pdf_file_data} .= "\n" if $spaces eq " " and not $self->{-minify};
  0            
1212 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, "]\n"));
1213             } elsif (reftype($object) eq "SCALAR") {
1214             # Unresolved indirect reference.
1215 0           my ($id, $gen) = split /-/, ${$object};
  0            
1216 0   0       $gen ||= "0";
1217 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, "($id $gen R)\n"));
1218             } else {
1219             # Simple object.
1220 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, "$object\n"));
1221             }
1222             }
1223              
1224             # Dump PDF object.
1225             sub dump_object {
1226 0     0 1   my ($self, $object, $label, $seen, $indent, $mode) = @_;
1227              
1228             # Dump output.
1229 0           my $output = "";
1230              
1231             # Check mode and object type.
1232 0 0 0       if ($mode eq "outline") {
    0 0        
    0          
    0          
    0          
    0          
1233 0 0 0       if (ref $object and $seen->{$object}) {
    0 0        
    0          
    0          
    0          
1234             # Previously-seen object; dump the label.
1235 0           $output = "$seen->{$object}";
1236             } elsif (is_hash $object) {
1237             # Hash object.
1238 0           $seen->{$object} = $label;
1239 0 0         if (is_stream $object) {
1240 0           $output = "(STREAM)";
1241             } else {
1242 0           $label =~ s/(?<=\w)$/->/;
1243 0 0         my @keys = sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object};
  0            
  0            
1244 0           my $key_len = max map length $_, @keys;
1245 0           foreach my $key (@keys) {
1246 0           my $obj = $object->{$key};
1247 0 0         next unless ref $obj;
1248 0           $output .= sprintf "%s%-${key_len}s => ", " " x ($indent + 2), $key;
1249 0 0         $output .= $self->dump_object($object->{$key}, "$label\{$key\}", $seen, ref $object ? $indent + 2 : 0, $mode) . ",\n";
1250             }
1251 0 0         if ($output) {
1252 0           $output = join("", "{ # $label\n", $output, (" " x $indent), "}");
1253             } else {
1254 0           $output = "{...}";
1255             }
1256 0           $output =~ s/\{ \# \$pdf->\n/\{\n/;
1257             }
1258 0           } elsif (is_array $object and not grep { ref $_; } @{$object}) {
  0            
1259             # Array of simple objects.
1260 0           $output = "[...]";
1261             } elsif (is_array $object) {
1262             # Array object.
1263 0           for (my $i = 0; $i < @{$object}; $i++) {
  0            
1264 0 0         $output .= sprintf "%s%s,\n", " " x ($indent + 2), $self->dump_object($object->[$i], "$label\[$i\]", $seen, $indent + 2, $mode) if ref $object->[$i];
1265             }
1266 0 0         if ($output =~ /\A\s+(.*?),\n\z/) {
    0          
1267 0           $output = "[... $1]";
1268             } elsif ($output =~ /\n/) {
1269 0           $output = join("", "[ # $label\n", $output, (" " x $indent), "]");
1270             } else {
1271 0           $output = "[$output]";
1272             }
1273             } elsif (reftype($object) eq "SCALAR") {
1274             # Unresolved indirect reference.
1275 0           my ($id, $gen) = split /-/, ${$object};
  0            
1276 0   0       $gen ||= "0";
1277 0           $output .= "\"$id $gen R\"";
1278             }
1279             } elsif (ref $object and $seen->{$object}) {
1280             # Previously-seen object; dump the label.
1281 0           $output = $seen->{$object};
1282             } elsif (is_hash $object) {
1283             # Hash object.
1284 0           $seen->{$object} = $label;
1285 0           $output = "{ # $label\n";
1286 0           $label =~ s/(?<=\w)$/->/;
1287 0 0         my @keys = sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object};
  0            
  0            
1288 0           my $key_len = max map length $_, @keys;
1289 0           foreach my $key (@keys) {
1290 0           my $obj = $object->{$key};
1291 0           $output .= sprintf "%s%-${key_len}s => ", " " x ($indent + 2), $key;
1292 0 0         if ($key eq -data) {
    0          
1293 0           chomp $obj;
1294 0 0         $output .= $obj =~ /\A(?:<\?xpacket|[\n\t -~]*\z)/ ? "<<'EOF',\n$obj\nEOF\n" : dump($obj) . "\n";
1295             } elsif (not ref $obj) {
1296 0           $output .= dump($obj) . ",\n";
1297             } else {
1298 0 0         $output .= $self->dump_object($object->{$key}, "$label\{$key\}", $seen, ref $object ? $indent + 2 : 0, $mode) . ",\n";
1299             }
1300             }
1301 0           $output .= (" " x $indent) . "}";
1302 0           $output =~ s/\{ \# \$pdf\n/\{\n/;
1303 0           } elsif (is_array $object and not grep { ref $_; } @{$object}) {
  0            
1304             # Array of simple objects.
1305 0 0         $output = sprintf "[%s]", join(", ", map { /^\d+\.\d+$/ ? $_ : dump($_); } @{$object});
  0            
  0            
1306             } elsif (is_array $object) {
1307             # Array object.
1308 0           $output .= "[ # $label\n";
1309 0           my $spaces = " " x ($indent + 2);
1310 0           for (my $i = 0; $i < @{$object}; $i++) {
  0            
1311 0           my $obj = $object->[$i];
1312 0 0         if (ref $obj) {
1313 0           $output .= sprintf "%s%s,\n", $spaces, $self->dump_object($obj, "$label\[$i\]", $seen, $indent + 2, $mode);
1314 0           $spaces = " " x ($indent + 2);
1315             } else {
1316 0           $output .= $spaces . dump($obj) . ",";
1317 0           $spaces = " ";
1318             }
1319             }
1320 0 0         $output .= ",\n" if $spaces eq " ";
1321 0           $output .= (" " x $indent) . "]";
1322             } elsif (reftype($object) eq "SCALAR") {
1323             # Unresolved indirect reference.
1324 0           my ($id, $gen) = split /-/, ${$object};
  0            
1325 0   0       $gen ||= "0";
1326 0           $output .= "\"$id $gen R\"";
1327             } else {
1328             # Simple object.
1329 0           $output = sprintf "%s%s\n", " " x $indent, dump($object);
1330             }
1331              
1332             # Return generated output.
1333 0           return $output;
1334             }
1335              
1336             1;
1337              
1338             __END__