File Coverage

blib/lib/PDF/Builder/Basic/PDF/File.pm
Criterion Covered Total %
statement 574 829 69.2
branch 208 412 50.4
condition 65 158 41.1
subroutine 44 47 93.6
pod 26 26 100.0
total 917 1472 62.3


line stmt bran cond sub pod time code
1             #=======================================================================
2             #
3             # THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
4             #
5             # Copyright Martin Hosken
6             #
7             # Modified for PDF::API2 by Alfred Reibenschuh
8             #
9             # No warranty or expression of effectiveness, least of all regarding
10             # anyone's safety, is implied in this software or documentation.
11             #
12             # This specific module is licensed under the Perl Artistic License.
13             # Effective 28 January 2021, the original author and copyright holder,
14             # Martin Hosken, has given permission to use and redistribute this module
15             # under the MIT license.
16             #
17             #=======================================================================
18             package PDF::Builder::Basic::PDF::File;
19              
20 39     39   71895 use strict;
  39         117  
  39         1371  
21 39     39   262 use warnings;
  39         95  
  39         2801  
22              
23             our $VERSION = '3.025'; # VERSION
24             our $LAST_UPDATE = '3.024'; # manually update whenever code is changed
25              
26             =head1 NAME
27              
28             PDF::Builder::Basic::PDF::File - Holds the trailers and cross-reference tables for a PDF file
29              
30             =head1 SYNOPSIS
31              
32             $p = PDF::Builder::Basic::PDF::File->open("filename.pdf", 1);
33             $p->new_obj($obj_ref);
34             $p->free_obj($obj_ref);
35             $p->append_file();
36             $p->close_file();
37             $p->release(); # IMPORTANT!
38              
39             =head1 DESCRIPTION
40              
41             This class keeps track of the directory aspects of a PDF file. There are two
42             parts to the directory: the main directory object, which is the parent to all
43             other objects, and a chain of cross-reference tables and corresponding trailer
44             dictionaries, starting with the main directory object.
45              
46             =head1 INSTANCE VARIABLES
47              
48             Within this class hierarchy, rather than making everything visible via methods,
49             which would be a lot of work, there are various instance variables which are
50             accessible via associative array referencing. To distinguish instance variables
51             from content variables (which may come from the PDF content itself), each such
52             variable name will start with a space.
53              
54             Variable names which do not start with a space directly reflect elements in a
55             PDF dictionary. In the case of a C, the
56             elements reflect those in the trailer dictionary.
57              
58             Since some variables are not designed for class users to access, variables are
59             marked in the documentation with B<(R)> to indicate that such an entry should
60             only be used as B information. B<(P)> indicates that the information
61             is B, and not designed for user use at all, but is included in the
62             documentation for completeness and to ensure that nobody else tries to use it.
63              
64             =over
65              
66             =item newroot
67              
68             This variable allows the user to create a new root entry to occur in the trailer
69             dictionary which is output when the file is written or appended. If you wish to
70             override the root element in the dictionary you have, use this entry to indicate
71             that without losing the current Root entry. Notice that newroot should point to
72             a PDF level object and not just to a dictionary, which does not have object
73             status.
74              
75             =item INFILE (R)
76              
77             Contains the filehandle used to read this information into this PDF directory.
78             It is an IO object.
79              
80             =item fname (R)
81              
82             This is the filename which is reflected by INFILE, or the original IO object
83             passed in.
84              
85             =item update (R)
86              
87             This indicates that the read file has been opened for update and that at some
88             point, C<< $p->appendfile() >> can be called to update the file with the
89             changes that have been made to the memory representation.
90              
91             =item maxobj (R)
92              
93             Contains the first usable object number above any that have already appeared
94             in the file so far.
95              
96             =item outlist (P)
97              
98             This is a list of Objind which are to be output when the next C
99             or C occurs.
100              
101             =item firstfree (P)
102              
103             Contains the first free object in the free object list. Free objects are removed
104             from the front of the list and added to the end.
105              
106             =item lastfree (P)
107              
108             Contains the last free object in the free list. It may be the same as the
109             C if there is only one free object.
110              
111             =item objcache (P)
112              
113             All objects are held in the cache to ensure that a system only has one
114             occurrence of each object. In effect, the objind class acts as a container type
115             class to hold the PDF object structure, and it would be unfortunate if there
116             were two identical place-holders floating around a system.
117              
118             =item epos (P)
119              
120             The end location of the read-file.
121              
122             =back
123              
124             Each trailer dictionary contains a number of private instance variables which
125             hold the chain together.
126              
127             =over
128              
129             =item loc (P)
130              
131             Contains the location of the start of the cross-reference table preceding the
132             trailer.
133              
134             =item xref (P)
135              
136             Contains an anonymous array of each cross-reference table entry.
137              
138             =item prev (P)
139              
140             A reference to the previous table. Note this differs from the Prev entry which
141             is in PDF, which contains the location of the previous cross-reference table.
142              
143             =back
144              
145             =head1 METHODS
146              
147             =over
148              
149             =cut
150              
151 39     39   293 use Scalar::Util qw(blessed weaken);
  39         118  
  39         2443  
152              
153 39     39   275 use vars qw($cr $irreg_char $reg_char $ws_char $delim_char %types);
  39         97  
  39         6644  
154              
155             $ws_char = '[ \t\r\n\f\0]';
156             $delim_char = '[][<>{}()/%]';
157             $reg_char = '[^][<>{}()/% \t\r\n\f\0]';
158             $irreg_char = '[][<>{}()/% \t\r\n\f\0]';
159             # \015 = x0D = CR or \r, \012 = x0A = LF or \n
160             # TBD a line-end character is space CR ' \r', space LF ' \n', or CR LF '\r\n'
161             # have seen working PDFs with just a CR and space CR
162             $cr = '\s*(?:\015|\012|(?:\015\012))';
163              
164             my $re_comment = qr/(?:\%[^\r\n]*)/;
165             my $re_whitespace = qr/(?:[ \t\r\n\f\0]|$re_comment)/;
166              
167             %types = (
168             'Page' => 'PDF::Builder::Basic::PDF::Page',
169             'Pages' => 'PDF::Builder::Basic::PDF::Pages',
170             );
171              
172             my $readDebug = 0;
173              
174 39     39   352 use Carp;
  39         120  
  39         2792  
175 39     39   791 use IO::File;
  39         9782  
  39         5693  
176              
177             # Now for the basic PDF types
178 39     39   777 use PDF::Builder::Basic::PDF::Utils;
  39         95  
  39         3923  
179              
180 39     39   287 use PDF::Builder::Basic::PDF::Array;
  39         81  
  39         1080  
181 39     39   236 use PDF::Builder::Basic::PDF::Bool;
  39         83  
  39         1008  
182 39     39   239 use PDF::Builder::Basic::PDF::Dict;
  39         77  
  39         945  
183 39     39   232 use PDF::Builder::Basic::PDF::Name;
  39         83  
  39         1016  
184 39     39   229 use PDF::Builder::Basic::PDF::Number;
  39         92  
  39         1089  
185 39     39   220 use PDF::Builder::Basic::PDF::Objind;
  39         86  
  39         1183  
186 39     39   245 use PDF::Builder::Basic::PDF::String;
  39         82  
  39         1044  
187 39     39   18170 use PDF::Builder::Basic::PDF::Page;
  39         101  
  39         1322  
188 39     39   251 use PDF::Builder::Basic::PDF::Pages;
  39         74  
  39         774  
189 39     39   215 use PDF::Builder::Basic::PDF::Null;
  39         85  
  39         1093  
190 39     39   196 use POSIX qw(ceil floor);
  39         93  
  39         276  
191              
192             =item PDF::Builder::Basic::PDF::File->new()
193              
194             Creates a new, empty file object which can act as the host to other PDF objects.
195             Since there is no file associated with this object, it is assumed that the
196             object is created in readiness for creating a new PDF file.
197              
198             =cut
199              
200             sub new {
201 217     217 1 607 my ($class, $root) = @_;
202 217         688 my $self = $class->_new();
203              
204 217 50       973 unless ($root) {
205 217         803 $root = PDFDict();
206 217         735 $root->{'Type'} = PDFName('Catalog');
207             }
208 217         809 $self->new_obj($root);
209 217         484 $self->{'Root'} = $root;
210              
211 217         846 return $self;
212             }
213              
214             =item $p = PDF::Builder::Basic::PDF::File->open($filename, $update, %options)
215              
216             Opens the file and reads all the trailers and cross reference tables to build
217             a complete directory of objects.
218              
219             C<$filename> may be a string or an IO object.
220              
221             C<$update> specifies whether this file is being opened for updating and editing
222             (I value), or simply to be read (I or undefined value).
223              
224             C<%options> may include
225              
226             =over
227              
228             =item diags => 1
229              
230             If C is set to 1, various warning messages will be given if a
231             suspicious PDF structure is found, and some fixup may be attempted. There is
232             no guarantee that any fixup will change the PDF to legitimate, or that there
233             won't be other problems found further down the line. If this flag is I
234             given, and a structural problem is found, it is fairly likely that errors (and
235             even a program B) may happen further along. If you experience crashes
236             when reading in a PDF file, try running with C and see what is reported.
237              
238             There are many PDF files out "in the wild" which, while failing to conform to
239             Adobe's standards, appear to be tolerated by PDF Readers. Thus, Builder will no
240             longer fail on them, but merely comment on their existence.
241              
242             =back
243              
244             =cut
245              
246             sub open {
247 18     18 1 83 my ($class, $filename, $update, %options) = @_;
248             # copy dashed option names to preferred undashed names
249 18 50 33     78 if (defined $options{'-diags'} && !defined $options{'diags'}) { $options{'diags'} = delete($options{'-diags'}); }
  0         0  
250 18         43 my ($fh, $buffer);
251 18 50       96 $options{'diags'} = 0 if not defined $options{'diags'}; # default
252              
253 18         43 my $comment = ''; # any comment jammed into the PDF header
254 18         69 my $self = $class->_new();
255 18 50       84 if (ref $filename) {
256 18         55 $self->{' INFILE'} = $filename;
257 18 50       62 if ($update) {
258 18         46 $self->{' update'} = 1;
259 18         59 $self->{' OUTFILE'} = $filename;
260             }
261 18         36 $fh = $filename;
262             } else {
263 0 0       0 die "File '$filename' does not exist!" unless -f $filename;
264 0   0     0 $fh = IO::File->new(($update ? '+' : '') . "<$filename") || return;
265 0         0 $self->{' INFILE'} = $fh;
266 0 0       0 if ($update) {
267 0         0 $self->{' update'} = 1;
268 0         0 $self->{' OUTFILE'} = $fh;
269 0         0 $self->{' fname'} = $filename;
270             }
271             }
272 18         112 binmode $fh, ':raw';
273 18         261 $fh->seek(0, 0); # go to start of file
274 18         217 $fh->read($buffer, 255);
275 18 50       686 unless ($buffer =~ m/^\%PDF\-(\d+\.\d+)(.*?)$cr/mo) {
276 0         0 die "$filename does not contain a valid PDF version number";
277             }
278 18         86 $self->{' version'} = $1;
279             # can't run verCheckInput() yet, as full ' version' not set
280 18 50 33     144 if (defined $2 && length($2) > 0) {
281 0         0 $comment = $2; # save for output as comment
282             # since we just echo the original header + comment, unless that causes
283             # problems in some Readers, we can just leave it be (no call to strip
284             # out inline comment and create a separate comment further along).
285             }
286              
287 18         83 $fh->seek(0, 2); # go to end of file
288 18         285 my $end = $fh->tell();
289 18         155 $self->{' epos'} = $end;
290 18         76 foreach my $offset (1 .. 64) {
291 36         172 $fh->seek($end - 16 * $offset, 0);
292 36         229 $fh->read($buffer, 16 * $offset);
293 36 100       829 last if $buffer =~ m/startxref($cr|\s*)\d+($cr|\s*)\%\%eof.*?/i;
294             }
295 18 50       433 unless ($buffer =~ m/startxref[^\d]+([0-9]+)($cr|\s*)\%\%eof.*?/i) {
296 0 0       0 if ($options{'diags'} == 1) {
297 0         0 warn "Malformed PDF file $filename"; #orig 'die'
298             }
299             }
300 18         91 my $xpos = $1;
301 18         60 $self->{' xref_position'} = $xpos;
302              
303 18         141 my $tdict = $self->readxrtr($xpos, %options);
304 18         101 foreach my $key (keys %$tdict) {
305 133         297 $self->{$key} = $tdict->{$key};
306             }
307              
308 18         149 return $self;
309             } # end of open()
310              
311             =item $new_version = $p->version($version, %opts) # Set
312              
313             =item $ver = $p->version() # Get
314              
315             Gets/sets the PDF version (e.g., 1.5). Setting sets both the header and
316             trailer versions. Getting returns the higher of header and trailer versions.
317              
318             For compatibility with earlier releases, if no decimal point is given, assume
319             "1." precedes the number given.
320              
321             A warning message is given if you attempt to I the PDF version, as you
322             might have already read in a higher level file, or used a higher level feature.
323             This message is suppressed if the 'silent' option is given with any value.
324              
325             =cut
326              
327             sub version {
328 31     31 1 67 my $self = shift();
329              
330             # current version is the higher of trailer and header versions
331 31         98 my $header_version = $self->header_version();
332 31         107 my $trailer_version = $self->trailer_version();
333 31 100 100     174 my $old_version = (defined $trailer_version &&
334             $trailer_version > $header_version)?
335             $trailer_version: $header_version;
336              
337 31 100       112 if (@_) { # Set, possibly with options
338 3         6 my $version = shift();
339 3         6 my %opts = @_;
340             # copy dashed option names to preferred undashed names
341 3 50 33     13 if (defined $opts{'-silent'} && !defined $opts{'silent'}) { $opts{'silent'} = delete($opts{'-silent'}); }
  0         0  
342            
343             # 1.x and 2.x versions allowed
344 3 50       38 if ($version =~ m/^\d+$/) { $version = "1.$version"; } # no x.? assume it's 1.something
  0         0  
345             # check if well formed 1.x and 2.x
346 3 50       16 if ($version !~ /^[12]\.[0-9]+$/) {
347 0 0       0 croak "Invalid version '$version' ignored" unless defined $opts{'silent'};
348 0         0 return $old_version;
349             }
350              
351 3 50       9 if ($old_version > $version) {
352 0 0       0 croak "Warning: call to header_version() to LOWER the output PDF version number!" unless defined $opts{'silent'};
353             }
354            
355             # have already squawked about any problems with $version
356 3         13 $self->header_version($version, 'silent'=>1);
357             #if ($version >= 1.4) { # min 1.4 level
358 3         10 $self->trailer_version($version, 'silent'=>1);
359             #}
360             #else {
361             # delete $self->{'Root'}->{'Version'};
362             # $self->out_obj($self->{'Root'});
363             #}
364 3         9 return $version;
365             }
366              
367             # Get
368 28         101 return $old_version;
369             }
370              
371             =item $new_version = $p->header_version($version, %opts) # Set
372              
373             =item $version = $p->header_version() # Get
374              
375             Gets/sets the PDF version stored in the file header.
376              
377             For compatibility with earlier releases, if no decimal point is given, assume
378             "1." precedes the number given.
379              
380             A warning message is given if you attempt to I the PDF version, as you
381             might have already read in a higher level file, or used a higher level feature.
382             This message is suppressed if the 'silent' option is given with any value.
383              
384             =cut
385              
386             sub header_version {
387 39     39 1 89 my $self = shift();
388              
389             # current (header) version
390 39         93 my $old_version = $self->{' version'};
391              
392 39 100       101 if (@_) { # Set, permits versions 1.x and 2.x
393 6         12 my $version = shift();
394 6         15 my %opts = @_;
395             # copy dashed option names to preferred undashed names
396 6 50 33     22 if (defined $opts{'-silent'} && !defined $opts{'silent'}) { $opts{'silent'} = delete($opts{'-silent'}); }
  0         0  
397            
398             # 1.x and 2.x versions allowed
399 6 50       48 if ($version =~ m/^\d+$/) { $version = "1.$version"; } # no x.? assume it's 1.something
  0         0  
400             # check if well formed 1.x and 2.x
401 6 50       37 if ($version !~ /^[12]\.[0-9]+$/) {
402 0 0       0 croak "Invalid header_version '$version' ignored" unless defined $opts{'silent'};
403 0         0 return $old_version;
404             }
405              
406 6 50       24 if ($old_version > $version) {
407 0 0       0 croak "Warning: call to header_version() to LOWER the output PDF version number!" unless defined $opts{'silent'};
408             }
409            
410 6         19 $self->{' version'} = $version;
411 6         19 return $version;
412             }
413              
414             # Get
415 33         90 return $old_version;
416             }
417              
418             =item $new_version = $p->trailer_version($version, %opts) # Set
419              
420             =item $version = $p->trailer_version() # Get
421              
422             Gets/sets the PDF version stored in the document catalog.
423              
424             Note that the minimum PDF level for a trailer version is 1.4. It is not
425             permitted to set a PDF level of 1.3 or lower. An existing PDF (read in) of
426             1.3 or below returns undefined.
427              
428             For compatibility with earlier releases, if no decimal point is given, assume
429             "1." precedes the number given.
430              
431             A warning message is given if you attempt to I the PDF version, as you
432             might have already read in a higher level file, or used a higher level feature.
433             This message is suppressed if the 'silent' option is given with any value.
434              
435             =cut
436              
437             sub trailer_version {
438 37     37 1 90 my $self = shift();
439              
440 37         78 my $old_version = undef;
441 37 100       126 if ($self->{'Root'}->{'Version'}) {
442 12         42 $self->{'Root'}->{'Version'}->realise();
443 12         35 $old_version = $self->{'Root'}->{'Version'}->val();
444             }
445              
446 37 100       117 if (@_) { # Set, allows versions 1.x and 2.x
447 5         11 my $version = shift();
448 5         11 my %opts = @_;
449             # copy dashed option names to preferred undashed names
450 5 50 33     15 if (defined $opts{'-silent'} && !defined $opts{'silent'}) { $opts{'silent'} = delete($opts{'-silent'}); }
  0         0  
451            
452             # 1.x and 2.x versions allowed
453 5 50       23 if ($version =~ m/^\d+$/) { $version = "1.$version"; } # no x.? assume it's 1.something
  0         0  
454             # check if well formed 1.x and 2.x
455 5 50       20 if ($version !~ /^[12]\.[0-9]+$/) {
456 0 0       0 croak "Invalid trailer_version '$version' ignored" unless defined $opts{'silent'};
457 0         0 return $old_version;
458             }
459              
460 5 50 66     22 if (defined $old_version && $old_version > $version) {
461 0 0       0 croak "Warning: call to trailer_version() to LOWER the output PDF version number!" unless defined $opts{'silent'};
462             }
463            
464 5         16 $self->{'Root'}->{'Version'} = PDFName($version);
465 5         31 $self->out_obj($self->{'Root'});
466 5         11 return $version;
467             }
468              
469             # Get
470 32         89 return $old_version;
471             }
472              
473             =item $prev_version = $p->require_version($version)
474              
475             Ensures that the PDF version is at least C<$version>.
476             Silently sets the version to the higher level.
477              
478             =cut
479              
480             sub require_version {
481 3     3 1 609 my ($self, $min_version) = @_;
482 3         9 my $current_version = $self->version();
483 3 100       13 $self->version($min_version) if $current_version < $min_version;
484 3         8 return $current_version;
485             }
486              
487             =item $p->release()
488              
489             Releases ALL of the memory used by the PDF document and all of its
490             component objects. After calling this method, do B expect to
491             have anything left in the C object
492             (so if you need to save, be sure to do it before calling this method).
493              
494             B, that it is important that you call this method on any
495             C object when you wish to destroy it and
496             free up its memory. Internally, PDF files have an enormous number of
497             cross-references, and this causes circular references within the
498             internal data structures. Calling C causes a brute-force
499             cleanup of the data structures, freeing up all of the memory. Once
500             you've called this method, though, don't expect to be able to do
501             anything else with the C object; it'll
502             have B internal state whatsoever.
503              
504             =cut
505              
506             # Maintainer's Question: Couldn't this be handled by a DESTROY method
507             # instead of requiring an explicit call to release()?
508             sub release {
509 179     179 1 335 my $self = shift();
510              
511 179 50       523 return $self unless ref($self);
512 179         840 my @tofree = values %$self;
513              
514 179         782 foreach my $key (keys %$self) {
515 2907         3868 $self->{$key} = undef;
516 2907         4342 delete $self->{$key};
517             }
518              
519             # PDFs with highly-interconnected page trees or outlines can hit Perl's
520             # recursion limit pretty easily, so disable the warning for this specific
521             # loop.
522 39     39   60595 no warnings 'recursion'; ## no critic
  39         114  
  39         334918  
523              
524 179         795 while (my $item = shift @tofree) {
525 8797 100 100     30765 if (blessed($item) and $item->can('release')) {
    100          
    100          
526 2547         5507 $item->release(1);
527             } elsif (ref($item) eq 'ARRAY') {
528 1862         5327 push @tofree, @$item;
529             } elsif (ref($item) eq 'HASH') {
530 1065         2825 push @tofree, values %$item;
531 1065         2767 foreach my $key (keys %$item) {
532 4596         6105 $item->{$key} = undef;
533 4596         8163 delete $item->{$key};
534             }
535             } else {
536 3323         7497 $item = undef;
537             }
538             }
539              
540 179         662 return;
541             } # end of release()
542              
543             =item $p->append_file()
544              
545             Appends the objects for output to the read file and then appends the
546             appropriate table.
547              
548             =cut
549              
550             sub append_file {
551 8     8 1 21 my $self = shift();
552 8 50       35 return unless $self->{' update'};
553              
554 8         21 my $fh = $self->{' INFILE'};
555              
556             # hack to upgrade pdf-version number to support requested features in
557             # higher versions than the pdf was originally created. WARNING: new version
558             # must be exactly SAME length as the old (e.g., 1.6 replacing 1.4), or
559             # problems are likely with overwriting header. perhaps some day we will
560             # need to check the old version being ovewritten, and adjust something to
561             # avoid corrupting the file.
562 8   50     30 my $version = $self->{' version'} || 1.4;
563 8         44 $fh->seek(0, 0);
564             # assume that any existing EOL after version will be reused
565 8         119 $fh->print("%PDF-$version");
566              
567 8         89 my $tdict = PDFDict();
568 8         44 $tdict->{'Prev'} = PDFNum($self->{' loc'});
569 8         23 $tdict->{'Info'} = $self->{'Info'};
570 8 50       40 if (defined $self->{' newroot'}) {
571 0         0 $tdict->{'Root'} = $self->{' newroot'};
572             } else {
573 8         29 $tdict->{'Root'} = $self->{'Root'};
574             }
575 8         26 $tdict->{'Size'} = $self->{'Size'};
576              
577 8         67 foreach my $key (grep { $_ !~ m/^\s/ } keys %$self) {
  151         345  
578 25 50       69 $tdict->{$key} = $self->{$key} unless defined $tdict->{$key};
579             }
580              
581 8         46 $fh->seek($self->{' epos'}, 0);
582 8         102 $self->out_trailer($tdict, $self->{' update'});
583 8         69 close $self->{' OUTFILE'};
584              
585 8         70 return;
586             } # end of append_file()
587              
588             =item $p->out_file($fname)
589              
590             Writes a PDF file to a file of the given filename, based on the current list of
591             objects to be output. It creates the trailer dictionary based on information
592             in C<$self>.
593              
594             $fname may be a string or an IO object.
595              
596             =cut
597              
598             sub out_file {
599 171     171 1 442 my ($self, $fname) = @_;
600              
601 171         532 $self = $self->create_file($fname);
602 171         465 $self = $self->close_file();
603              
604 171         485 return $self;
605             }
606              
607             =item $p->create_file($fname)
608              
609             Creates a new output file (no check is made of an existing open file) of
610             the given filename or IO object. Note: make sure that C<< $p->{' version'} >>
611             is set correctly before calling this function.
612              
613             =cut
614              
615             sub create_file {
616 171     171 1 363 my ($self, $filename) = @_;
617 171         270 my $fh;
618              
619 171         350 $self->{' fname'} = $filename;
620 171 50       466 if (ref $filename) {
621 171         318 $fh = $filename;
622             } else {
623 0   0     0 $fh = IO::File->new(">$filename") || die "Unable to open $filename for writing";
624 0         0 binmode($fh,':raw');
625             }
626              
627 171         339 $self->{' OUTFILE'} = $fh;
628 171   50     1950 $fh->print('%PDF-' . ($self->{' version'} // '1.4') . "\n");
629 171         1573 $fh->print("%\xC6\xCD\xCD\xB5\n"); # and some binary stuff in a comment.
630              
631             # PDF spec requires 4 or more "binary" bytes (128 or higher value) in a
632             # comment immediately following the PDF-x.y header, to alert reader that
633             # there is binary data. Actual values are apparently arbitrary. This DOES
634             # mean that other comments can NOT be inserted between the header and the
635             # binary comment! PDF::Builder always outputs this comment, so is always
636             # claiming binary data (no harm done?).
637              
638 171         1025 return $self;
639             }
640              
641             =item $p->close_file()
642              
643             Closes up the open file for output, by outputting the trailer, etc.
644              
645             =cut
646              
647             sub close_file {
648 171     171 1 284 my $self = shift();
649              
650 171         520 my $tdict = PDFDict();
651 171 50       740 $tdict->{'Info'} = $self->{'Info'} if defined $self->{'Info'};
652 171 50 33     739 $tdict->{'Root'} = (defined $self->{' newroot'} and $self->{' newroot'} ne '') ? $self->{' newroot'} : $self->{'Root'};
653              
654             # remove all freed objects from the outlist, AND the outlist_cache if not updating
655             # NO! Don't do that thing! In fact, let out_trailer do the opposite!
656              
657 171   33     646 $tdict->{'Size'} = $self->{'Size'} || PDFNum(1);
658 171 50       546 $tdict->{'Prev'} = PDFNum($self->{' loc'}) if $self->{' loc'};
659 171 50       450 if ($self->{' update'}) {
660 0         0 foreach my $key (grep { $_ !~ m/^[\s\-]/ } keys %$self) {
  0         0  
661 0 0       0 $tdict->{$key} = $self->{$key} unless defined $tdict->{$key};
662             }
663              
664 0         0 my $fh = $self->{' INFILE'};
665 0         0 $fh->seek($self->{' epos'}, 0);
666             }
667              
668 171         849 $self->out_trailer($tdict, $self->{' update'});
669 171         770 close($self->{' OUTFILE'});
670 171 50 33     776 if ($^O eq 'MacOS' and not ref($self->{' fname'})) {
671 0         0 MacPerl::SetFileInfo('CARO', 'TEXT', $self->{' fname'});
672             }
673              
674 171         922 return $self;
675             } # end of close_file()
676              
677             =item ($value, $str) = $p->readval($str, %opts)
678              
679             Reads a PDF value from the current position in the file. If C<$str> is too
680             short, read some more from the current location in the file until the whole
681             object is read. This is a recursive call which may slurp in a whole big stream
682             (unprocessed).
683              
684             Returns the recursive data structure read and also the current C<$str> that has
685             been read from the file.
686              
687             =cut
688              
689             sub readval {
690 1471     1471 1 13336 my ($self, $str, %opts) = @_;
691 1471         2352 my $fh = $self->{' INFILE'};
692 1471         2023 my ($result, $value);
693              
694 1471 100       2753 my $update = defined($opts{'update'}) ? $opts{'update'} : 1;
695 1471 100       3182 $str = update($fh, $str) if $update;
696              
697 1471         4318 $str =~ s/^$ws_char+//; # Ignore initial white space
698 1471         3636 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
699              
700 1471 100       18144 if ($str =~ m/^<
    100          
    100          
    100          
    100          
    50          
    100          
    50          
    50          
    0          
701             # Dictionary
702 162         438 $str = substr ($str, 2);
703 162 100       435 $str = update($fh, $str) if $update;
704 162         516 $result = PDFDict();
705              
706 162         415 while ($str !~ m/^>>/) {
707 453         1439 $str =~ s/^$ws_char+//; # Ignore initial white space
708 453         1330 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
709              
710 453 50       2320 if ($str =~ s|^/($reg_char+)||) {
    0          
    0          
711 453         1246 my $key = PDF::Builder::Basic::PDF::Name::name_to_string($1, $self);
712 453         1641 ($value, $str) = $self->readval($str, %opts);
713             # per Vadim Repin (RT 131147) CHG 1. His conclusion is that
714             # it is highly unlikely, but remotely possible, that there
715             # could be legitimate use of Null objects that should NOT be
716             # prevented from bubbling up. If such a case is discovered, we
717             # might have to try Klaus Ethgen's more limited (in scope)
718             # patch in ./Pages.pm. See full discussion in RT 131147 for
719             # details on what's going on and how this fixes it.
720             #$result->{$key} = $value; # original code
721 453 50       1651 $result->{$key} = $value
722             unless ref($value) eq 'PDF::Builder::Basic::PDF::Null';
723             } elsif ($str =~ s|^/$ws_char+||) {
724             # fixes a broken key problem of acrobat. -- fredo
725 0         0 ($value, $str) = $self->readval($str, %opts);
726 0         0 $result->{'null'} = $value;
727             } elsif ($str =~ s|^//|/|) {
728             # fixes again a broken key problem of illustrator/enfocus. -- fredo
729 0         0 ($value, $str) = $self->readval($str, %opts);
730 0         0 $result->{'null'} = $value;
731             } else {
732 0         0 die "Invalid dictionary key";
733             }
734 453 100       1148 $str = update($fh, $str) if $update; # thanks gareth.jones@stud.man.ac.uk
735             }
736              
737 162         542 $str =~ s/^>>//;
738 162 100       451 $str = update($fh, $str) if $update;
739             # streams can't be followed by a lone carriage-return.
740             # fredo: yes they can !!! -- use the MacOS, Luke.
741             # TBD isn't this covered by $cr as space CR?
742 162 100 66     522 if (($str =~ s/^stream(?:(?:\015\012)|\012|\015)//) and ($result->{'Length'}->val() != 0)) { # stream
743 11         40 my $length = $result->{'Length'}->val();
744 11         38 $result->{' streamsrc'} = $fh;
745 11         41 $result->{' streamloc'} = $fh->tell() - length($str);
746              
747 11 50       119 unless ($opts{'nostreams'}) {
748 11 50       35 if ($length > length($str)) {
749 0         0 $value = $str;
750 0         0 $length -= length($str);
751 0         0 read $fh, $str, $length + 11; # slurp the whole stream!
752             } else {
753 11         28 $value = '';
754             }
755 11         38 $value .= substr($str, 0, $length);
756 11         31 $result->{' stream'} = $value;
757 11         25 $result->{' nofilt'} = 1;
758 11 50       48 $str = update($fh, $str, 1) if $update; # tell update we are in-stream and only need an endstream
759 11         46 $str = substr($str, index($str, 'endstream') + 9);
760             }
761             }
762              
763 162 100 100     692 if (defined $result->{'Type'} and defined $types{$result->{'Type'}->val()}) {
764 38         142 bless $result, $types{$result->{'Type'}->val()};
765             }
766             # gdj: FIXME: if any of the ws chars were crs, then the whole
767             # string might not have been read.
768              
769             } elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R/s) {
770             # Indirect Object
771 166         452 my $num = $1;
772 166         285 $value = $2;
773 166         1454 $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R//s;
774 166 100       505 unless ($result = $self->test_obj($num, $value)) {
775 131         479 $result = PDF::Builder::Basic::PDF::Objind->new();
776 131         449 $result->{' objnum'} = $num;
777 131         277 $result->{' objgen'} = $value;
778 131         322 $self->add_obj($result, $num, $value);
779             }
780 166         347 $result->{' parent'} = $self;
781 166         542 weaken $result->{' parent'};
782             #$result->{' realised'} = 0;
783             # removed to address changes being lost when an indirect object
784             # is realised twice
785             # gdj: FIXME: if any of the ws chars were crs, then the whole
786             # string might not have been read.
787              
788             } elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj/s) {
789             # Object
790 104         203 my $obj;
791 104         272 my $num = $1;
792 104         195 $value = $2;
793 104         1098 $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj//s;
794 104         457 ($obj, $str) = $self->readval($str, %opts);
795 104 100       294 if ($result = $self->test_obj($num, $value)) {
796 90         336 $result->merge($obj);
797             } else {
798 14         25 $result = $obj;
799 14         41 $self->add_obj($result, $num, $value);
800 14         27 $result->{' realised'} = 1;
801             }
802 104 100       310 $str = update($fh, $str) if $update; # thanks to kundrat@kundrat.sk
803 104         654 $str =~ s/^endobj//;
804              
805             } elsif ($str =~ m|^/($reg_char*)|s) {
806             # Name
807 573         1504 $value = $1;
808 573         2833 $str =~ s|^/($reg_char*)||s;
809 573         1999 $result = PDF::Builder::Basic::PDF::Name->from_pdf($value, $self);
810              
811             } elsif ($str =~ m/^\(/) {
812             # Literal String
813             # We now need to find an unbalanced, unescaped right-paren.
814             # This can't be done with a regex.
815 2         7 my $value = '(';
816 2         7 $str = substr($str, 1);
817              
818 2         6 my $nested_level = 1;
819 2         6 while (1) {
820             # Ignore everything up to the first escaped or parenthesis character
821 2 50       11 if ($str =~ /^([^\\()]+)(.*)/s) {
822 2         221 $value .= $1;
823 2         147 $str = $2;
824             }
825              
826             # Ignore escaped parentheses
827 2 50       68 if ($str =~ /^(\\[()])/) {
    50          
    50          
    0          
828 0         0 $value .= $1;
829 0         0 $str = substr($str, 2);
830              
831             } elsif ($str =~ /^\(/) {
832             # Left parenthesis: increase nesting
833 0         0 $value .= '(';
834 0         0 $str = substr($str, 1);
835 0         0 $nested_level++;
836              
837             } elsif ($str =~ /^\)/) {
838             # Right parenthesis: decrease nesting
839 2         6 $value .= ')';
840 2         6 $str = substr($str, 1);
841 2         5 $nested_level--;
842 2 50       9 last unless $nested_level;
843              
844             } elsif ($str =~ /^(\\[^()])/) {
845             # Other escaped character
846 0         0 $value .= $1;
847 0         0 $str = substr($str, 2);
848              
849             } else {
850             # If there wasn't an escaped or parenthesis character,
851             # read some more.
852              
853             # We don't use update because we don't want to remove
854             # whitespace or comments.
855 0 0       0 $fh->read($str, 255, length($str)) or die 'Unterminated string.';
856             }
857             } # end while(TRUE) loop
858              
859 2         15 $result = PDF::Builder::Basic::PDF::String->from_pdf($value);
860             # end Literal String check
861              
862             } elsif ($str =~ m/^
863             # Hex String
864 0         0 $str =~ s/^
865 0         0 $fh->read($str, 255, length($str)) while (0 > index($str, '>'));
866 0         0 ($value, $str) = ($str =~ /^(.*?)>(.*)/s);
867 0         0 $result = PDF::Builder::Basic::PDF::String->from_pdf('<' . $value . '>');
868              
869             } elsif ($str =~ m/^\[/) {
870             # Array
871 94         359 $str =~ s/^\[//;
872 94 50       304 $str = update($fh, $str) if $update;
873 94         319 $result = PDFArray();
874 94         260 while ($str !~ m/^\]/) {
875 778         2416 $str =~ s/^$ws_char+//; # Ignore initial white space
876 778         2186 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
877              
878 778         2311 ($value, $str) = $self->readval($str, %opts);
879 778         2372 $result->add_elements($value);
880 778 50       1825 $str = update($fh, $str) if $update; # str might just be exhausted!
881             }
882 94         353 $str =~ s/^\]//;
883              
884             } elsif ($str =~ m/^(true|false)($irreg_char|$)/) {
885             # Boolean
886 0         0 $value = $1;
887 0         0 $str =~ s/^(?:true|false)//;
888 0         0 $result = PDF::Builder::Basic::PDF::Bool->from_pdf($value);
889              
890             } elsif ($str =~ m/^([+-.0-9]+)($irreg_char|$)/) {
891             # Number
892 370         936 $value = $1;
893 370         1162 $str =~ s/^([+-.0-9]+)//;
894              
895             # If $str only consists of whitespace (or is empty), call update to
896             # see if this is the beginning of an indirect object or reference
897 370 100 100     4369 if ($update and ($str =~ /^$re_whitespace*$/s or $str =~ /^$re_whitespace+[0-9]+$re_whitespace*$/s)) {
      100        
898 6         57 $str =~ s/^$re_whitespace+/ /s;
899 6         55 $str =~ s/$re_whitespace+$/ /s;
900 6         16 $str = update($fh, $str);
901 6 100       82 if ($str =~ m/^$re_whitespace*([0-9]+)$re_whitespace+(?:R|obj)/s) {
902 4         19 return $self->readval("$value $str", %opts);
903             }
904             }
905              
906 366         1352 $result = PDF::Builder::Basic::PDF::Number->from_pdf($value);
907              
908             } elsif ($str =~ m/^null($irreg_char|$)/) {
909             # Null
910 0         0 $str =~ s/^null//;
911 0         0 $result = PDF::Builder::Basic::PDF::Null->new();
912              
913             } else {
914 0         0 die "Can't parse `$str' near " . ($fh->tell()) . " length " . length($str) . ".";
915             }
916              
917 1467         6943 $str =~ s/^$ws_char+//s;
918 1467         4930 return ($result, $str);
919             } # end of readval()
920              
921             =item $ref = $p->read_obj($objind, %opts)
922              
923             Given an indirect object reference, locate it and read the object returning
924             the read in object.
925              
926             =cut
927              
928             sub read_obj {
929 86     86 1 196 my ($self, $objind, %opts) = @_;
930              
931 86   50     270 my $res = $self->read_objnum($objind->{' objnum'}, $objind->{' objgen'}, %opts) || return;
932 86 50       289 $objind->merge($res) unless $objind eq $res;
933              
934 86         305 return $objind;
935             }
936              
937             =item $ref = $p->read_objnum($num, $gen, %opts)
938              
939             Returns a fully read object of given number and generation in this file
940              
941             =cut
942              
943             sub read_objnum {
944 94     94 1 2520 my ($self, $num, $gen, %opts) = @_;
945              
946 94 50       258 croak 'Undefined object number in call to read_objnum($num, $gen)' unless defined $num;
947 94 50       215 croak 'Undefined object generation in call to read_objnum($num, $gen)' unless defined $gen;
948 94 50       456 croak "Invalid object number '$num' in call to read_objnum" unless $num =~ /^[0-9]+$/;
949 94 50       314 croak "Invalid object generation '$gen' in call to read_objnum" unless $gen =~ /^[0-9]+$/;
950              
951 94   50     270 my $object_location = $self->locate_obj($num, $gen) || return;
952 94         170 my $object;
953              
954             # Compressed object
955 94 100       219 if (ref($object_location)) {
956 4         7 my ($object_stream_num, $object_stream_pos) = @{$object_location};
  4         10  
957              
958 4         35 my $object_stream = $self->read_objnum($object_stream_num, 0, %opts);
959 4 50       12 die 'Cannot find the compressed object stream' unless $object_stream;
960              
961 4 50       25 $object_stream->read_stream() if $object_stream->{' nofilt'};
962              
963             # An object stream starts with pairs of integers containing object numbers and
964             # stream offsets relative to the First key
965 4         7 my $fh;
966             my $pairs;
967 4 50       11 unless ($object_stream->{' streamfile'}) {
968 4         15 $pairs = substr($object_stream->{' stream'}, 0, $object_stream->{'First'}->val());
969             } else {
970 0         0 CORE::open($fh, '<', $object_stream->{' streamfile'});
971 0         0 read($fh, $pairs, $object_stream->{'First'}->val());
972             }
973 4         30 my @map = split /\s+/, $pairs;
974              
975             # Find the offset of the object in the stream
976 4         10 my $index = $object_stream_pos * 2;
977 4 50       15 die "Objind $num does not exist at index $index" unless $map[$index] == $num;
978 4         10 my $start = $map[$index + 1];
979              
980             # Unless this is the last object in the stream, its length is
981             # determined by the offset of the next object.
982 4         10 my $last_object_in_stream = $map[-2];
983 4         7 my $length;
984 4 100       22 if ($last_object_in_stream == $num) {
985 2 50       9 if ($object_stream->{' stream'}) {
986 2         12 $length = length($object_stream->{' stream'}) - $object_stream->{'First'}->val() - $start;
987             } else {
988 0         0 $length = (-s $object_stream->{' streamfile'}) - $object_stream->{'First'}->val() - $start;
989             }
990             } else {
991 2         7 my $next_start = $map[$index + 3];
992 2         7 $length = $next_start - $start;
993             }
994              
995             # Read the object from the stream
996 4         12 my $stream = "$num 0 obj ";
997 4 50       13 unless ($object_stream->{' streamfile'}) {
998 4         13 $stream .= substr($object_stream->{' stream'}, $object_stream->{'First'}->val() + $start, $length);
999             } else {
1000 0         0 seek($fh, $object_stream->{'First'}->val() + $start, 0);
1001 0         0 read($fh, $stream, $length, length($stream));
1002 0         0 close $fh;
1003             }
1004              
1005 4         17 ($object) = $self->readval($stream, %opts, update => 0);
1006 4         19 return $object;
1007             }
1008              
1009 90         316 my $current_location = $self->{' INFILE'}->tell();
1010 90         646 $self->{' INFILE'}->seek($object_location, 0);
1011 90         509 ($object) = $self->readval('', %opts);
1012 90         434 $self->{' INFILE'}->seek($current_location, 0);
1013              
1014 90         668 return $object;
1015             } # end of read_objnum()
1016              
1017             =item $objind = $p->new_obj($obj)
1018              
1019             Creates a new, free object reference based on free space in the cross reference
1020             chain. If nothing is free, then think up a new number. If C<$obj>, then turns
1021             that object into this new object rather than returning a new object.
1022              
1023             =cut
1024              
1025             sub new_obj {
1026 1365     1365 1 2724 my ($self, $base) = @_;
1027 1365         2080 my $res;
1028              
1029 1365 50 66     3620 if (defined $self->{' free'} and scalar @{$self->{' free'}} > 0) {
  14         81  
1030 0         0 $res = shift(@{$self->{' free'}});
  0         0  
1031 0 0       0 if (defined $base) {
1032 0         0 my ($num, $gen) = @{$self->{' objects'}{$res->uid()}};
  0         0  
1033 0         0 $self->remove_obj($res);
1034 0         0 $self->add_obj($base, $num, $gen);
1035 0         0 return $self->out_obj($base);
1036             } else {
1037 0         0 $self->{' objects'}{$res->uid()}[2] = 0;
1038 0         0 return $res;
1039             }
1040             }
1041              
1042 1365         2241 my $tdict = $self;
1043 1365         1952 my $i;
1044 1365         3087 while (defined $tdict) {
1045 1366 50       4114 $i = $tdict->{' xref'}{defined($i) ? $i : ''}[0];
1046 1366   33     3427 while (defined $i and $i != 0) {
1047 0         0 my ($ni, $ng) = @{$tdict->{' xref'}{$i}};
  0         0  
1048 0 0       0 unless (defined $self->locate_obj($i, $ng)) {
1049 0 0       0 if (defined $base) {
1050 0         0 $self->add_obj($base, $i, $ng);
1051 0         0 return $base;
1052             } else {
1053 0   0     0 $res = $self->test_obj($i, $ng) || $self->add_obj(PDF::Builder::Basic::PDF::Objind->new(), $i, $ng);
1054 0         0 $self->out_obj($res);
1055 0         0 return $res;
1056             }
1057             }
1058 0         0 $i = $ni;
1059             }
1060 1366         3026 $tdict = $tdict->{' prev'};
1061             }
1062              
1063 1365         2525 $i = $self->{' maxobj'}++;
1064 1365 50       2568 if (defined $base) {
1065 1365         3975 $self->add_obj($base, $i, 0);
1066 1365         3618 $self->out_obj($base);
1067 1365         3168 return $base;
1068             } else {
1069 0         0 $res = $self->add_obj(PDF::Builder::Basic::PDF::Objind->new(), $i, 0);
1070 0         0 $self->out_obj($res);
1071 0         0 return $res;
1072             }
1073             }
1074              
1075             =item $p->out_obj($obj)
1076              
1077             Indicates that the given object reference should appear in the output xref
1078             table whether with data or freed.
1079              
1080             =cut
1081              
1082             sub out_obj {
1083 2991     2991 1 5495 my ($self, $obj) = @_;
1084              
1085             # This is why we've been keeping the outlist CACHE around; to speed
1086             # up this method by orders of magnitude (it saves up from having to
1087             # grep the full outlist each time through as we'll just do a lookup
1088             # in the hash) (which is super-fast).
1089 2991 100       8456 unless (exists $self->{' outlist_cache'}{$obj}) {
1090 1382         2107 push @{$self->{' outlist'}}, $obj;
  1382         3155  
1091             # weaken $self->{' outlist'}->[-1];
1092 1382         4718 $self->{' outlist_cache'}{$obj} = 1;
1093             }
1094              
1095 2991         5615 return $obj;
1096             }
1097              
1098             =item $p->free_obj($obj)
1099              
1100             Marks an object reference for output as being freed.
1101              
1102             =cut
1103              
1104             sub free_obj {
1105 0     0 1 0 my ($self, $obj) = @_;
1106              
1107 0         0 push @{$self->{' free'}}, $obj;
  0         0  
1108 0         0 $self->{' objects'}{$obj->uid()}[2] = 1;
1109 0         0 $self->out_obj($obj);
1110              
1111 0         0 return;
1112             }
1113              
1114             =item $p->remove_obj($objind)
1115              
1116             Removes the object from all places where we might remember it.
1117              
1118             =cut
1119              
1120             sub remove_obj {
1121 0     0 1 0 my ($self, $objind) = @_;
1122              
1123             # who says it has to be fast
1124 0         0 delete $self->{' objects'}{$objind->uid()};
1125 0         0 delete $self->{' outlist_cache'}{$objind};
1126 0         0 delete $self->{' printed_cache'}{$objind};
1127 0         0 @{$self->{' outlist'}} = grep { $_ ne $objind } @{$self->{' outlist'}};
  0         0  
  0         0  
  0         0  
1128 0         0 @{$self->{' printed'}} = grep { $_ ne $objind } @{$self->{' printed'}};
  0         0  
  0         0  
  0         0  
1129             $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} = undef
1130 0 0       0 if $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} eq $objind;
1131              
1132 0         0 return $self;
1133             }
1134              
1135             =item $p->ship_out(@objects)
1136              
1137             =item $p->ship_out()
1138              
1139             Ships the given objects (or all objects for output if C<@objects> is empty) to
1140             the currently open output file (assuming there is one). Freed objects are not
1141             shipped, and once an object is shipped it is switched such that this file
1142             becomes its source and it will not be shipped again unless out_obj is called
1143             again. Notice that a shipped out object can be re-output or even freed, but
1144             that it will not cause the data already output to be changed.
1145              
1146             =cut
1147              
1148             sub ship_out {
1149 184     184 1 406 my ($self, @objects) = @_;
1150              
1151 184 50       471 return unless defined $self->{' OUTFILE'};
1152 184         342 my $fh = $self->{' OUTFILE'};
1153 184         519 seek($fh, 0, 2); # go to the end of the file
1154              
1155 184 50       540 @objects = @{$self->{' outlist'}} unless scalar @objects > 0;
  184         543  
1156 184         469 foreach my $objind (@objects) {
1157 1074 50       3106 next unless $objind->is_obj($self);
1158 1074         1934 my $j = -1;
1159 1074         1798 for (my $i = 0; $i < scalar @{$self->{' outlist'}}; $i++) {
  1074         2552  
1160 1074 50       3237 if ($self->{' outlist'}[$i] eq $objind) {
1161 1074         1622 $j = $i;
1162 1074         1964 last;
1163             }
1164             }
1165 1074 50       2291 next if $j < 0;
1166 1074         1562 splice(@{$self->{' outlist'}}, $j, 1);
  1074         2084  
1167 1074         3215 delete $self->{' outlist_cache'}{$objind};
1168 1074 50       1527 next if grep { $_ eq $objind } @{$self->{' free'}};
  0         0  
  1074         2849  
1169              
1170 1074 50       2314 map { $fh->print("\% $_ \n") } split(/$cr/, $objind->{' comments'}) if $objind->{' comments'};
  0         0  
1171 1074         2996 $self->{' locs'}{$objind->uid()} = $fh->tell();
1172 1074         1930 my ($objnum, $objgen) = @{$self->{' objects'}{$objind->uid()}}[0..1];
  1074         2370  
1173 1074         3220 $fh->printf('%d %d obj ', $objnum, $objgen);
1174 1074         11176 $objind->outobjdeep($fh, $self);
1175 1074         2921 $fh->print("\nendobj\n");
1176              
1177             # Note that we've output this obj, not forgetting to update
1178             # the cache of what's printed.
1179 1074 50       7416 unless (exists $self->{' printed_cache'}{$objind}) {
1180 1074         1639 push @{$self->{' printed'}}, $objind;
  1074         2964  
1181 1074         3817 $self->{' printed_cache'}{$objind}++;
1182             }
1183             }
1184              
1185 184         524 return $self;
1186             } # end of ship_out()
1187              
1188             =item $p->copy($outpdf, \&filter)
1189              
1190             Iterates over every object in the file reading the object, calling C
1191             with the object, and outputting the result. If C is not defined,
1192             just copies input to output.
1193              
1194             =cut
1195              
1196             sub copy {
1197 0     0 1 0 my ($self, $outpdf, $filter) = @_;
1198 0         0 my ($obj, $minl, $mini, $ming);
1199              
1200 0         0 foreach my $key (grep { not m/^[\s\-]/ } keys %$self) {
  0         0  
1201 0 0       0 $outpdf->{$key} = $self->{$key} unless defined $outpdf->{$key};
1202             }
1203              
1204 0         0 my $tdict = $self;
1205 0         0 while (defined $tdict) {
1206 0         0 foreach my $i (sort {$a <=> $b} keys %{$tdict->{' xref'}}) {
  0         0  
  0         0  
1207 0         0 my ($nl, $ng, $nt) = @{$tdict->{' xref'}{$i}};
  0         0  
1208 0 0       0 next unless $nt eq 'n';
1209              
1210 0 0 0     0 if ($nl < $minl or $mini == 0) {
1211 0         0 $mini = $i;
1212 0         0 $ming = $ng;
1213 0         0 $minl = $nl;
1214             }
1215 0 0       0 unless ($obj = $self->test_obj($i, $ng)) {
1216 0         0 $obj = PDF::Builder::Basic::PDF::Objind->new();
1217 0         0 $obj->{' objnum'} = $i;
1218 0         0 $obj->{' objgen'} = $ng;
1219 0         0 $self->add_obj($obj, $i, $ng);
1220 0         0 $obj->{' parent'} = $self;
1221 0         0 weaken $obj->{' parent'};
1222 0         0 $obj->{' realised'} = 0;
1223             }
1224 0         0 $obj->realise();
1225 0 0       0 my $res = defined $filter ? &{$filter}($obj) : $obj;
  0         0  
1226 0 0 0     0 $outpdf->new_obj($res) unless (!$res || $res->is_obj($outpdf));
1227             }
1228 0         0 $tdict = $tdict->{' prev'};
1229             }
1230              
1231             # test for linearized and remove it from output
1232 0         0 $obj = $self->test_obj($mini, $ming);
1233 0 0 0     0 if ($obj->isa('PDF::Builder::Basic::PDF::Dict') && $obj->{'Linearized'}) {
1234 0         0 $outpdf->free_obj($obj);
1235             }
1236              
1237 0         0 return $self;
1238             } # end of copy()
1239              
1240             =back
1241              
1242             =head1 PRIVATE METHODS & FUNCTIONS
1243              
1244             The following methods and functions are considered B to this class.
1245             This does not mean you cannot use them if you have a need, just that they
1246             aren't really designed for users of this class.
1247              
1248             =over
1249              
1250             =item $offset = $p->locate_obj($num, $gen)
1251              
1252             Returns a file offset to the object asked for by following the chain of cross
1253             reference tables until it finds the one you want.
1254              
1255             =cut
1256              
1257             sub locate_obj {
1258 94     94 1 236 my ($self, $num, $gen) = @_;
1259              
1260 94         182 my $tdict = $self;
1261 94         226 while (defined $tdict) {
1262 103 100       322 if (ref $tdict->{' xref'}{$num}) {
1263 94         197 my $ref = $tdict->{' xref'}{$num};
1264 94 100       229 return $ref unless scalar(@$ref) == 3;
1265              
1266 90 50       254 if ($ref->[1] == $gen) {
1267 90 50       477 return $ref->[0] if $ref->[2] eq 'n';
1268 0         0 return; # if $ref->[2] eq 'f';
1269             }
1270             }
1271 9         24 $tdict = $tdict->{' prev'};
1272             }
1273              
1274 0         0 return;
1275             }
1276              
1277             =item update($fh, $str, $instream)
1278              
1279             Keeps reading C<$fh> for more data to ensure that C<$str> has at least a line
1280             full for C to work on. At this point we also take the opportunity to
1281             ignore comments.
1282              
1283             =cut
1284              
1285             sub update {
1286 3242     3242 1 5667 my ($fh, $str, $instream) = @_;
1287              
1288 3242 50       5618 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1289 3242 100       5149 if ($instream) {
1290             # we are inside a (possible binary) stream
1291             # so we fetch data till we see an 'endstream'
1292             # -- fredo/2004-09-03
1293 11   33     68 while ($str !~ m/endstream/ and not $fh->eof()) {
1294 0 0       0 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1295 0         0 $fh->read($str, 314, length($str));
1296             }
1297             } else {
1298 3231         13252 $str =~ s/^$ws_char*//;
1299 3231   100     147106 while ($str !~ m/$cr/ and not $fh->eof()) {
1300 128 50       1100 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1301 128         441 $fh->read($str, 314, length($str));
1302 128         4521 $str =~ s/^$ws_char*//so;
1303             }
1304 3231         8125 while ($str =~ m/^\%/) { # restructured by fredo/2003-03-23
1305 1 50       4 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1306 1   33     35 $fh->read($str, 314, length($str)) while ($str !~ m/$cr/ and not $fh->eof());
1307 1         25 $str =~ s/^\%[^\015\012]*$ws_char*//so; # fixed for reportlab -- fredo
1308             }
1309             }
1310              
1311 3242         7744 return $str;
1312             } # end of update()
1313              
1314             =item $objind = $p->test_obj($num, $gen)
1315              
1316             Tests the cache to see whether an object reference (which may or may not have
1317             been getobj()ed) has been cached. Returns it if it has.
1318              
1319             =cut
1320              
1321             sub test_obj {
1322 270     270 1 578 my ($self, $num, $gen) = @_;
1323              
1324 270         1080 return $self->{' objcache'}{$num, $gen};
1325             }
1326              
1327             =item $p->add_obj($objind)
1328              
1329             Adds the given object to the internal object cache.
1330              
1331             =cut
1332              
1333             sub add_obj {
1334 1510     1510 1 3392 my ($self, $obj, $num, $gen) = @_;
1335              
1336 1510         5203 $self->{' objcache'}{$num, $gen} = $obj;
1337 1510         7420 $self->{' objects'}{$obj->uid()} = [$num, $gen];
1338             # weaken $self->{' objcache'}{$num, $gen};
1339 1510         3065 return $obj;
1340             }
1341              
1342             =item $tdict = $p->readxrtr($xpos, %options)
1343              
1344             Recursive function which reads each of the cross-reference and trailer tables
1345             in turn until there are no more.
1346              
1347             Returns a dictionary corresponding to the trailer chain. Each trailer also
1348             includes the corresponding cross-reference table.
1349              
1350             The structure of the xref private element in a trailer dictionary is of an
1351             anonymous hash of cross reference elements by object number. Each element
1352             consists of an array of 3 elements corresponding to the three elements read
1353             in [location, generation number, free or used]. See the PDF specification
1354             for details.
1355              
1356             See C for options allowed.
1357              
1358             =cut
1359              
1360             sub _unpack_xref_stream {
1361 78     78   148 my ($self, $width, $data) = @_;
1362              
1363             # handle some oddball cases
1364 78 50       223 if ($width == 3) {
    50          
    50          
    50          
1365 0         0 $data = "\x00$data";
1366 0         0 $width = 4;
1367             } elsif ($width == 5) {
1368 0         0 $data = "\x00\x00\x00$data";
1369 0         0 $width = 8;
1370             } elsif ($width == 6) {
1371 0         0 $data = "\x00\x00$data";
1372 0         0 $width = 8;
1373             } elsif ($width == 7) {
1374 0         0 $data = "\x00$data";
1375 0         0 $width = 8;
1376             }
1377             # in all cases, "Network" (Big-Endian) byte order assumed
1378 78 100       155 return unpack('C', $data) if $width == 1;
1379 52 50       121 return unpack('n', $data) if $width == 2;
1380 0 0       0 return unpack('N', $data) if $width == 4;
1381 0 0       0 if ($width == 8) {
1382             # Some ways other packages handle this, without Perl-64, according
1383             # to Vadim Repin. Possibly they end up converting the value to
1384             # "double" behind the scenes if on a 32-bit platform?
1385             # PDF::Tiny return hex unpack('H16', $data);
1386             # CAM::PDF my @b = unpack('C*', $data);
1387             # my $i=0; ($i <<= 8) += shift @b while @b; return $i;
1388            
1389 0 0       0 if (substr($data, 0, 4) eq "\x00\x00\x00\x00") {
1390             # can treat as 32 bit unsigned int
1391 0         0 return unpack('N', substr($data, 4, 4));
1392             } else {
1393             # requires 64-bit platform (chip and Perl), else fatal error
1394             # it may blow up and produce a smoking crater if 32-bit Perl!
1395             # also note that Q needs Big-Endian flag (>) specified, else
1396             # it will use the native chip order (Big- or Little- Endian)
1397 0         0 return unpack('Q>', $data);
1398             }
1399             }
1400              
1401 0         0 die "Unsupported field width: $width. 1-8 supported.";
1402             }
1403              
1404             sub readxrtr {
1405 21     21 1 107 my ($self, $xpos, %options) = @_;
1406             # $xpos SHOULD be pointing to "xref" keyword
1407             # copy dashed option names to preferred undashed names
1408 21 50 33     101 if (defined $options{'-diags'} && !defined $options{'diags'}) { $options{'diags'} = delete($options{'-diags'}); }
  0         0  
1409              
1410 21         51 my ($tdict, $buf, $xmin, $xnum, $xdiff);
1411              
1412 21         49 my $fh = $self->{' INFILE'};
1413 21         90 $fh->seek($xpos, 0);
1414 21         181 $fh->read($buf, 22); # 22 should overlap into first subsection
1415 21         218 $buf = update($fh, $buf); # fix for broken JAWS xref calculation.
1416              
1417 21         54 my $xlist = {};
1418              
1419             ## it seems that some products calculate wrong prev entries (short)
1420             ## so we seek ahead to find one -- fredo; save for now
1421             #while ($buf !~ m/^xref$cr/i && !eof($fh)) {
1422             # $buf =~ s/^(\s+|\S+|.)//i;
1423             # $buf = update($fh, $buf);
1424             #}
1425              
1426 21 100       336 if ($buf =~ s/^xref$cr//i) { # remove xrefEOL from buffer
    50          
1427             # Plain XRef tables.
1428             #
1429             # look to match startobj# count# EOL of first (or only) subsection
1430             # supposed to be single ASCII space between numbers, but this is
1431             # more lenient for some writers, allowing 1 or more whitespace
1432 18         44 my $subsection_count = 0;
1433 18         30 my $entry_format_error = 0;
1434 18         45 my $xrefListEmpty = 0;
1435              
1436 18         529 while ($buf =~ m/^$ws_char*([0-9]+)$ws_char+([0-9]+)$ws_char*$cr(.*?)$/s) {
1437 23         58 my $old_buf = $buf;
1438 23         74 $xmin = $1; # starting object number of this subsection
1439 23         48 $xnum = $2; # number of entries in this subsection
1440 23         63 $buf = $3; # remainder of buffer
1441 23         39 $subsection_count++;
1442             # go back and warn if other than single space separating numbers
1443 23 50       316 unless ($old_buf =~ /^[0-9]+ [0-9]+$cr/) { #orig 'warn'
1444 0 0       0 if ($options{'diags'} == 1) {
1445             # See PDF 1.7 section 7.5.4: Cross-Reference Table
1446 0         0 warn "Malformed xref: subsection header needs a single\n" .
1447             "ASCII space between the numbers and no extra spaces.\n";
1448             }
1449             }
1450 23         57 $xdiff = length($buf); # how much remaining in buffer
1451              
1452             # in case xnum == 0 is permitted (or used and tolerated by readers),
1453             # skip over entry reads and go to next subsection
1454 23 50       81 if ($xnum < 1) {
1455 0 0       0 if ($options{'diags'} == 1) {
1456 0         0 warn "Xref subsection has 0 entries. Skipped.\n";
1457             }
1458 0         0 $xrefListEmpty = 1;
1459 0         0 next;
1460             }
1461              
1462             # read chunk of entire subsection list
1463 23         63 my $entry_size = 20;
1464             # test read first entry, see if $cr in expected place, adjust size
1465 23         124 $fh->read($buf, $entry_size * 1 - $xdiff + 15, $xdiff);
1466 23 50       521 if ($buf =~ m/^(.*?)$cr/) {
1467 23         87 $entry_size = length($1) + 2;
1468             }
1469 23 50 33     111 if ($entry_size != 20 && $options{'diags'} == 1) {
1470 0         0 warn "Xref entries supposed to be 20 bytes long, are $entry_size.\n";
1471             }
1472 23         51 $xdiff = length($buf);
1473              
1474             # read remaining entries
1475 23         115 $fh->read($buf, $entry_size * $xnum - $xdiff + 15, $xdiff);
1476             # each entry is two integers and flag. spec says single ASCII space
1477             # between each field and certain length for each (10, 5, 1), so
1478             # this appears to be more lenient than spec
1479             # is object 0 supposed to be in subsection 1, or is any place OK?
1480 23   66     822 while ($xnum-- > 0 and
1481             $buf =~ m/^$ws_char*(\d+)$ws_char+(\d+)$ws_char+([nf])$ws_char*$cr/) {
1482             # check if format doesn't match spec
1483 132 50 33     875 if ($buf =~ m/^\d{10} \d{5} [nf]$cr/ ||
1484             $entry_format_error) {
1485             # format OK or have already reported format problem
1486             } else {
1487 0 0       0 if ($options{'diags'} == 1) {
1488 0         0 warn "Xref entry readable, but doesn't meet PDF spec.\n";
1489             }
1490 0         0 $entry_format_error++;
1491             }
1492              
1493 132         1054 $buf =~ s/^$ws_char*(\d+)$ws_char+(\d+)$ws_char+([nf])$ws_char*$cr//;
1494             # $1 = object's starting offset in file (n) or
1495             # next object in free list (f) [0 if last]
1496             # $2 = generation number (n) or 65535 for object 0 (f) or
1497             # next generation number (f)
1498             # $3 = flag (n = object in use, f = free)
1499             # buf reduced by entry just processed
1500 132 50       367 if (exists $xlist->{$xmin}) {
1501 0 0       0 if ($options{'diags'} == 1) {
1502 0         0 warn "Duplicate object number $xmin in xref table ignored.\n";
1503             }
1504             } else {
1505 132         595 $xlist->{$xmin} = [$1, $2, $3];
1506 132 50 66     404 if ($xmin == 0 && $subsection_count > 1 && $options{'diags'} == 1) {
      33        
1507 0         0 warn "Xref object 0 entry not in first subsection.\n";
1508             }
1509             }
1510 132         952 $xmin++;
1511             } # traverse one subsection for objects xmin through xmin+xnum-1
1512             # go back for next subsection (if any)
1513             } # loop through xref subsections
1514             # fall through to here when run out of xref subsections
1515             # xlist should have two or more object entries, may not be contiguous
1516              
1517             # should have an object 0
1518             # at this point, no idea if object 0 was in first subsection (legal?)
1519             # could attempt a fixup if no object 0 found. many fixups are quite
1520             # risky and could end up corrupting the free list.
1521             # there's no guarantee that a proper free list will result, but any
1522             # error should hopefully be caught further on
1523 18 0 33     70 if (!exists $xlist->{'0'} && !$xrefListEmpty) {
1524             # for now, 1 subsection starting with 1, and only object 1 in
1525             # free list, try to fix up
1526 0 0 0     0 if ($subsection_count == 1 && exists $xlist->{'1'}) {
1527             # apparently a common enough error in PDF writers
1528              
1529 0 0 0     0 if ($xlist->{'1'}[0] == 0 && # only member of free list
      0        
1530             $xlist->{'1'}[1] == 65535 &&
1531             $xlist->{'1'}[2] eq 'f') {
1532             # object 1 appears to be the free list head, so shift
1533             # down all objects
1534 0 0       0 if ($options{'diags'} == 1) {
1535 0         0 warn "xref appears to be mislabeled starting with 1. Shift down all elements.\n";
1536             }
1537 0         0 my $next = 1;
1538 0         0 while (exists $xlist->{$next}) {
1539 0         0 $xlist->{$next - 1} = $xlist->{$next};
1540 0         0 $next++;
1541             }
1542 0         0 delete $xlist->{--$next};
1543              
1544             } else {
1545             # if object 1 does not appear to be a free list head,
1546             # insert a new object 0
1547 0 0       0 if ($options{'diags'} == 1) {
1548 0         0 warn "Xref appears to be missing object 0. Insert a new one.\n";
1549             }
1550 0         0 $xlist->{'0'} = [0, 65535, 'f'];
1551             }
1552             } else {
1553 0 0       0 if ($options{'diags'} == 1) {
1554 0         0 warn "Malformed cross reference list in PDF file $self->{' fname'} -- no object 0 (free list head)\n";
1555             }
1556 0         0 $xlist->{'0'} = [0, 65535, 'f'];
1557             }
1558             } # no object 0 entry
1559              
1560             # build/validate the free list (and no active objects have f flag)
1561 18         61 my @free_list;
1562 18         39 foreach (sort {$a <=> $b} keys %{ $xlist }) {
  249         406  
  18         152  
1563             # if 'f' flag, is in free list
1564 132 100       351 if ($xlist->{$_}[2] eq 'f') {
    50          
1565 18 50 33     116 if ($xlist->{$_}[1] <= 0 && $options{'diags'} == 1) {
1566 0         0 warn "Xref free list entry $_ with bad next generation number.\n";
1567             } else {
1568 18         73 push @free_list, $_; # should be in numeric order (0 first)
1569             }
1570             } elsif ($xlist->{$_}[2] eq 'n') {
1571 114 50 33     270 if ($xlist->{$_}[0] <= 0 && $options{'diags'} == 1) {
1572 0         0 warn "Xref active object $_ entry with bad length ".($xlist->{$_}[1])."\n";
1573             }
1574 114 50 33     282 if ($xlist->{$_}[1] < 0 && $options{'diags'} == 1) {
1575 0         0 warn "Xref active object $_ entry with bad generation number ".($xlist->{$_}[1])."\n";
1576             }
1577             } else {
1578 0 0       0 if ($options{'diags'} == 1) {
1579 0         0 warn "Xref entry has flag that is not 'f' or 'n'.\n";
1580             }
1581             }
1582             } # go through xlist and build free_list and check entries
1583             # traverse free list and check that "next object" is also in free list
1584 18         80 my $next_free = 0; # object 0 should always be in free list
1585 18 50 33     90 if ($xlist->{'0'}[1] != 65535 && $options{'diags'} == 1) {
1586 0         0 warn "Object 0 next generation is not 65535.\n";
1587             }
1588             do {
1589 18 50       87 if ($xlist->{$next_free}[2] ne 'f') {
1590 0 0       0 if ($options{'diags'} == 1) {
1591 0         0 warn "Corrupted free object list: next=$next_free is not a free object.\n";
1592             }
1593 0         0 $next_free = 0; # force end of free list
1594             } else {
1595 18         53 $next_free = $xlist->{$next_free}[0];
1596             }
1597             # remove this entry from free list array
1598 18         155 splice(@free_list, index(@free_list, $next_free), 1);
1599 18   33     36 } while ($next_free && exists $xlist->{$next_free});
1600 18 50 33     100 if (scalar @free_list && $options{'diags'} == 1) {
1601 0         0 warn "Corrupted xref list: object(s) @free_list marked as free, but are not in free chain.\n";
1602             }
1603              
1604             # done with cross reference table, so go on to trailer
1605 18 50 33     128 if ($buf !~ /^\s*trailer\b/i && $options{'diags'} == 1) { #orig 'die'
1606 0         0 warn "Malformed trailer in PDF file $self->{' fname'} at " . ($fh->tell() - length($buf));
1607             }
1608              
1609 18         85 $buf =~ s/^\s*trailer\b//i;
1610              
1611 18         86 ($tdict, $buf) = $self->readval($buf);
1612              
1613             } elsif ($buf =~ m/^(\d+)\s+(\d+)\s+obj/i) {
1614 3         17 my ($xref_obj, $xref_gen) = ($1, $2);
1615 3         16 $PDF::Builder::global_pdf->verCheckOutput(1.5, "importing cross-reference stream");
1616             # XRef streams
1617 3         13 ($tdict, $buf) = $self->readval($buf);
1618              
1619 3 50       13 unless ($tdict->{' stream'}) {
1620 0 0       0 if ($options{'diags'} == 1) {
1621 0         0 warn "Malformed XRefStm at $xref_obj $xref_gen obj in PDF file $self->{' fname'}";
1622             }
1623             }
1624 3         15 $tdict->read_stream(1);
1625              
1626 3         10 my $stream = $tdict->{' stream'};
1627 3         7 my @widths = map { $_->val() } @{$tdict->{'W'}->val()};
  9         37  
  3         14  
1628              
1629 3         7 my $start = 0;
1630 3         9 my $last;
1631              
1632             my @index;
1633 3 100       11 if (defined $tdict->{'Index'}) {
1634 1         2 @index = map { $_->val() } @{$tdict->{'Index'}->val()};
  2         6  
  1         13  
1635             } else {
1636 2         8 @index = (0, $tdict->{'Size'}->val());
1637             }
1638              
1639 3         11 while (scalar @index) {
1640 3         8 $start = shift(@index);
1641 3         9 $last = $start + shift(@index) - 1;
1642              
1643 3         14 for my $i ($start...$last) {
1644             # Replaced "for $xmin" because it creates a loop-specific local
1645             # variable, and we need $xmin to be correct for maxobj below.
1646 26         39 $xmin = $i;
1647              
1648 26         43 my @cols;
1649              
1650 26         37 for my $w (@widths) {
1651 78         133 my $data;
1652 78 50       212 $data = $self->_unpack_xref_stream($w, substr($stream, 0, $w, '')) if $w;
1653              
1654 78         154 push @cols, $data;
1655             }
1656              
1657 26 100       52 $cols[0] = 1 unless defined $cols[0];
1658 26 50 33     55 if ($cols[0] > 2 && $options{'diags'} == 1) {
1659 0         0 warn "Invalid XRefStm entry type ($cols[0]) at $xref_obj $xref_gen obj";
1660             }
1661              
1662 26 50       54 next if exists $xlist->{$xmin};
1663              
1664 26 50       78 my @objind = ($cols[1], defined($cols[2]) ? $cols[2] : ($xmin ? 0 : 65535));
    100          
1665 26 100       64 push @objind, ($cols[0] == 0? 'f': 'n') if $cols[0] < 2;
    100          
1666              
1667 26         80 $xlist->{$xmin} = \@objind;
1668             }
1669             }
1670              
1671             } else { #orig 'die'
1672 0 0       0 if ($options{'diags'} == 1) {
1673 0         0 warn "Malformed xref in PDF file $self->{' fname'}";
1674             }
1675             }
1676              
1677             # did we get to here without managing to set $xmin?
1678 21   50     102 $xmin ||= 0;
1679              
1680 21         79 $tdict->{' loc'} = $xpos;
1681 21         50 $tdict->{' xref'} = $xlist;
1682 21 100       115 $self->{' maxobj'} = $xmin + 1 if $xmin + 1 > $self->{' maxobj'};
1683             $tdict->{' prev'} = $self->readxrtr($tdict->{'Prev'}->val(), %options)
1684 21 100 66     86 if (defined $tdict->{'Prev'} and $tdict->{'Prev'}->val() != 0);
1685 21 100       91 delete $tdict->{' prev'} unless defined $tdict->{' prev'};
1686              
1687 21         97 return $tdict;
1688             } # end of readxrtr()
1689              
1690             =item $p->out_trailer($tdict, $update)
1691              
1692             =item $p->out_trailer($tdict)
1693              
1694             Outputs the body and trailer for a PDF file by outputting all the objects in
1695             the ' outlist' and then outputting a xref table for those objects and any
1696             freed ones. It then outputs the trailing dictionary and the trailer code.
1697              
1698             =cut
1699              
1700             sub out_trailer {
1701 179     179 1 644 my ($self, $tdict, $update) = @_;
1702              
1703 179         434 my $fh = $self->{' OUTFILE'};
1704              
1705 179         325 while (@{$self->{' outlist'}}) {
  363         1104  
1706 184         542 $self->ship_out();
1707             }
1708              
1709 179         714 $tdict->{'Size'} = PDFNum($self->{' maxobj'});
1710              
1711 179         590 my $tloc = $fh->tell();
1712             ## $fh->print("xref\n");
1713             # instead of directly outputting (fh->print) xreflist, we accumulate in @out
1714 179         965 my @out;
1715 179 100       309 my @xreflist = sort { $self->{' objects'}{$a->uid()}[0] <=> $self->{' objects'}{$b->uid()}[0] } (@{$self->{' printed'} || []}, @{$self->{' free'} || []});
  1610 100       3465  
  179         662  
  179         980  
1716              
1717 179         469 my ($i, $j, $k);
1718 179 100       449 unless ($update) {
1719 171         314 $i = 1;
1720 171         569 for ($j = 0; $j < @xreflist; $j++) {
1721 1052         1388 my @inserts;
1722 1052         1557 $k = $xreflist[$j];
1723 1052         2226 while ($i < $self->{' objects'}{$k->uid()}[0]) {
1724 0         0 my ($n) = PDF::Builder::Basic::PDF::Objind->new();
1725 0         0 $self->add_obj($n, $i, 0);
1726 0         0 $self->free_obj($n);
1727 0         0 push(@inserts, $n);
1728 0         0 $i++;
1729             }
1730 1052         1786 splice(@xreflist, $j, 0, @inserts);
1731 1052         1443 $j += @inserts;
1732 1052         2215 $i++;
1733             }
1734             }
1735              
1736 179 100       401 my @freelist = sort { $self->{' objects'}{$a->uid()}[0] <=> $self->{' objects'}{$b->uid()}[0] } @{$self->{' free'} || []};
  0         0  
  179         613  
1737              
1738 179         330 $j = 0; my $first = -1; $k = 0;
  179         304  
  179         293  
1739 179         528 for ($i = 0; $i <= $#xreflist + 1; $i++) {
1740 1253 100 100     3538 if ($i > $#xreflist || $self->{' objects'}{$xreflist[$i]->uid()}[0] != $j + 1) {
1741             ## $fh->print(($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid()}[0] ") . ($i - $first) . "\n");
1742 191 100       971 push @out, ($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid()}[0] ") . ($i - $first) . "\n";
1743 191 100       490 if ($first == -1) {
1744             ## $fh->printf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid()}[0] : 0);
1745 179 50       919 push @out, sprintf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid()}[0] : 0);
1746 179         351 $first = 0;
1747             }
1748 191         524 for ($j = $first; $j < $i; $j++) {
1749 1074         1786 my $xref = $xreflist[$j];
1750 1074 50 33     2544 if (defined $freelist[$k] && defined $xref && "$freelist[$k]" eq "$xref") {
      33        
1751 0         0 $k++;
1752             ## $fh->print(pack("A10AA5A4",
1753             push(@out, pack("A10AA5A4",
1754             sprintf("%010d", (defined $freelist[$k] ?
1755             $self->{' objects'}{$freelist[$k]->uid()}[0] : 0)), " ",
1756 0 0       0 sprintf("%05d", $self->{' objects'}{$xref->uid()}[1] + 1),
1757             " f \n"));
1758             } else {
1759             ## $fh->print(pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid()}), " ",
1760             push(@out, pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid()}), " ",
1761 1074         2519 sprintf("%05d", $self->{' objects'}{$xref->uid()}[1]),
1762             " n \n"));
1763             }
1764             }
1765 191         391 $first = $i;
1766 191 100       767 $j = $self->{' objects'}{$xreflist[$i]->uid()}[0] if ($i < scalar @xreflist);
1767              
1768             } else {
1769 1062         2346 $j++;
1770             }
1771             } # end for loop through xreflists
1772             ## $fh->print("trailer\n");
1773             ## $tdict->outobjdeep($fh, $self);
1774             ## $fh->print("\nstartxref\n$tloc\n%%EOF\n");
1775             ## start new code for 117184 fix by Vadim. @out has array of xref content
1776 179 50 33     651 if (exists $tdict->{'Type'} and $tdict->{'Type'}->val() eq 'XRef') {
1777              
1778 0         0 my (@index, @stream);
1779 0         0 for (@out) { # @out is the accumulated cross reference list
1780 0         0 my @a = split;
1781 0 0       0 @a == 2 ? push @index, @a : push @stream, \@a;
1782             }
1783 0         0 my $i = $self->{' maxobj'}++;
1784 0         0 $self->add_obj($tdict, $i, 0);
1785 0         0 $self->out_obj($tdict);
1786              
1787 0         0 push @index, $i, 1;
1788 0         0 push @stream, [ $tloc, 0, 'n' ];
1789              
1790 0 0       0 my $len = $tloc > 0xFFFF ? 4 : 2; # don't expect files > 4 Gb
1791 0 0       0 my $tpl = $tloc > 0xFFFF ? 'CNC' : 'CnC'; # don't expect gennum > 255, it's absurd.
1792             # Adobe doesn't use them anymore anyway
1793 0         0 my $sstream = '';
1794 0         0 my @prev = ( 0 ) x ( $len + 2 ); # init prev to all 0's
1795 0         0 for (@stream) {
1796             # OK to zero out gennum of 65535 for a cross reference stream,
1797             # rather than just truncating to 255 -- Vadim
1798 0 0 0     0 $_->[ 1 ] = 0 if $_->[ 1 ] == 65535 and
1799             $_->[ 2 ] eq 'f';
1800             # make sure is 0..255, since will pack with 'C' code -- Phil
1801 0 0       0 if ($_->[1] > 0xFF) {
1802 0         0 print "generation number ".($_->[1])." in entry '$_->[0] $_->[1] $_->[2]' exceeds 256, reduced to ".($_->[1] & 0x00FF)."\n";
1803             }
1804 0         0 $_->[ 1 ] &= 0x00FF;
1805 0 0       0 my @line = unpack 'C*', pack $tpl, $_->[ 2 ] eq 'n'? 1 : 0, @{ $_ }[ 0 .. 1 ];
  0         0  
1806              
1807             $sstream .= pack 'C*', 2, # prepend filtering method, "PNG Up"
1808 0         0 map {($line[ $_ ] - $prev[ $_ ] + 256) % 256} 0 .. $#line;
  0         0  
1809 0         0 @prev = @line;
1810             }
1811             # build a dictionary for the cross reference stream
1812 0         0 $tdict->{'Size'} = PDFNum($i + 1);
1813 0         0 $tdict->{'Index'} = PDFArray(map { PDFNum($_) } @index);
  0         0  
1814 0         0 $tdict->{'W'} = PDFArray(map { PDFNum($_) } 1, $len, 1);
  0         0  
1815 0         0 $tdict->{'Filter'} = PDFName('FlateDecode');
1816              
1817             # it's compressed
1818 0         0 $tdict->{'DecodeParms'} = PDFDict();
1819 0         0 $tdict->{'DecodeParms'}->val()->{'Predictor'} = PDFNum(12);
1820 0         0 $tdict->{'DecodeParms'}->val()->{'Columns'} = PDFNum($len + 2);
1821              
1822 0         0 $sstream = PDF::Builder::Basic::PDF::Filter::FlateDecode->new()->outfilt($sstream, 1);
1823 0         0 $tdict->{' stream'} = $sstream;
1824 0         0 $tdict->{' nofilt'} = 1;
1825 0         0 delete $tdict->{'Length'};
1826 0         0 $self->ship_out();
1827             } else {
1828             # delete may be moved later by Vadim closer to where XRefStm created
1829 179         361 delete $tdict->{'XRefStm'};
1830             # almost the original code
1831 179         700 $fh->print("xref\n", @out, "trailer\n");
1832 179         1855 $tdict->outobjdeep($fh, $self);
1833 179         603 $fh->print("\n");
1834             }
1835 179         1662 $fh->print("startxref\n$tloc\n%%EOF\n");
1836             ## end of new code
1837              
1838 179         1374 return;
1839             } # end of out_trailer()
1840              
1841             =item PDF::Builder::Basic::PDF::File->_new()
1842              
1843             Creates a very empty PDF file object (used by new() and open())
1844              
1845             =cut
1846              
1847             sub _new {
1848 235     235   516 my $class = shift();
1849 235         521 my $self = {};
1850              
1851 235         554 bless $self, $class;
1852 235         848 $self->{' outlist'} = [];
1853 235         542 $self->{' outlist_cache'} = {}; # A cache of what's in the 'outlist'
1854 235         490 $self->{' maxobj'} = 1;
1855 235         505 $self->{' objcache'} = {};
1856 235         568 $self->{' objects'} = {};
1857              
1858 235         564 return $self;
1859             }
1860              
1861             1;
1862              
1863             =back
1864              
1865             =head1 AUTHOR
1866              
1867             Martin Hosken Martin_Hosken@sil.org
1868              
1869             Copyright Martin Hosken 1999 and onwards
1870              
1871             No warranty or expression of effectiveness, least of all regarding anyone's
1872             safety, is implied in this software or documentation.