File Coverage

blib/lib/PDF/Builder/Basic/PDF/File.pm
Criterion Covered Total %
statement 574 829 69.2
branch 208 412 50.4
condition 65 158 41.1
subroutine 44 47 93.6
pod 26 26 100.0
total 917 1472 62.3


line stmt bran cond sub pod time code
1             #=======================================================================
2             #
3             # THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
4             #
5             # Copyright Martin Hosken
6             #
7             # Modified for PDF::API2 by Alfred Reibenschuh
8             #
9             # No warranty or expression of effectiveness, least of all regarding
10             # anyone's safety, is implied in this software or documentation.
11             #
12             # This specific module is licensed under the Perl Artistic License.
13             # Effective 28 January 2021, the original author and copyright holder,
14             # Martin Hosken, has given permission to use and redistribute this module
15             # under the MIT license.
16             #
17             #=======================================================================
18             package PDF::Builder::Basic::PDF::File;
19              
20 39     39   58503 use strict;
  39         99  
  39         1175  
21 39     39   205 use warnings;
  39         70  
  39         2493  
22              
23             our $VERSION = '3.024'; # VERSION
24             our $LAST_UPDATE = '3.024'; # manually update whenever code is changed
25              
26             =head1 NAME
27              
28             PDF::Builder::Basic::PDF::File - Holds the trailers and cross-reference tables for a PDF file
29              
30             =head1 SYNOPSIS
31              
32             $p = PDF::Builder::Basic::PDF::File->open("filename.pdf", 1);
33             $p->new_obj($obj_ref);
34             $p->free_obj($obj_ref);
35             $p->append_file();
36             $p->close_file();
37             $p->release(); # IMPORTANT!
38              
39             =head1 DESCRIPTION
40              
41             This class keeps track of the directory aspects of a PDF file. There are two
42             parts to the directory: the main directory object, which is the parent to all
43             other objects, and a chain of cross-reference tables and corresponding trailer
44             dictionaries, starting with the main directory object.
45              
46             =head1 INSTANCE VARIABLES
47              
48             Within this class hierarchy, rather than making everything visible via methods,
49             which would be a lot of work, there are various instance variables which are
50             accessible via associative array referencing. To distinguish instance variables
51             from content variables (which may come from the PDF content itself), each such
52             variable name will start with a space.
53              
54             Variable names which do not start with a space directly reflect elements in a
55             PDF dictionary. In the case of a C, the
56             elements reflect those in the trailer dictionary.
57              
58             Since some variables are not designed for class users to access, variables are
59             marked in the documentation with B<(R)> to indicate that such an entry should
60             only be used as B information. B<(P)> indicates that the information
61             is B, and not designed for user use at all, but is included in the
62             documentation for completeness and to ensure that nobody else tries to use it.
63              
64             =over
65              
66             =item newroot
67              
68             This variable allows the user to create a new root entry to occur in the trailer
69             dictionary which is output when the file is written or appended. If you wish to
70             override the root element in the dictionary you have, use this entry to indicate
71             that without losing the current Root entry. Notice that newroot should point to
72             a PDF level object and not just to a dictionary, which does not have object
73             status.
74              
75             =item INFILE (R)
76              
77             Contains the filehandle used to read this information into this PDF directory.
78             It is an IO object.
79              
80             =item fname (R)
81              
82             This is the filename which is reflected by INFILE, or the original IO object
83             passed in.
84              
85             =item update (R)
86              
87             This indicates that the read file has been opened for update and that at some
88             point, C<< $p->appendfile() >> can be called to update the file with the
89             changes that have been made to the memory representation.
90              
91             =item maxobj (R)
92              
93             Contains the first usable object number above any that have already appeared
94             in the file so far.
95              
96             =item outlist (P)
97              
98             This is a list of Objind which are to be output when the next C
99             or C occurs.
100              
101             =item firstfree (P)
102              
103             Contains the first free object in the free object list. Free objects are removed
104             from the front of the list and added to the end.
105              
106             =item lastfree (P)
107              
108             Contains the last free object in the free list. It may be the same as the
109             C if there is only one free object.
110              
111             =item objcache (P)
112              
113             All objects are held in the cache to ensure that a system only has one
114             occurrence of each object. In effect, the objind class acts as a container type
115             class to hold the PDF object structure, and it would be unfortunate if there
116             were two identical place-holders floating around a system.
117              
118             =item epos (P)
119              
120             The end location of the read-file.
121              
122             =back
123              
124             Each trailer dictionary contains a number of private instance variables which
125             hold the chain together.
126              
127             =over
128              
129             =item loc (P)
130              
131             Contains the location of the start of the cross-reference table preceding the
132             trailer.
133              
134             =item xref (P)
135              
136             Contains an anonymous array of each cross-reference table entry.
137              
138             =item prev (P)
139              
140             A reference to the previous table. Note this differs from the Prev entry which
141             is in PDF, which contains the location of the previous cross-reference table.
142              
143             =back
144              
145             =head1 METHODS
146              
147             =over
148              
149             =cut
150              
151 39     39   223 use Scalar::Util qw(blessed weaken);
  39         88  
  39         2035  
152              
153 39     39   210 use vars qw($cr $irreg_char $reg_char $ws_char $delim_char %types);
  39         74  
  39         5620  
154              
155             $ws_char = '[ \t\r\n\f\0]';
156             $delim_char = '[][<>{}()/%]';
157             $reg_char = '[^][<>{}()/% \t\r\n\f\0]';
158             $irreg_char = '[][<>{}()/% \t\r\n\f\0]';
159             # \015 = x0D = CR or \r, \012 = x0A = LF or \n
160             # TBD a line-end character is space CR ' \r', space LF ' \n', or CR LF '\r\n'
161             # have seen working PDFs with just a CR and space CR
162             $cr = '\s*(?:\015|\012|(?:\015\012))';
163              
164             my $re_comment = qr/(?:\%[^\r\n]*)/;
165             my $re_whitespace = qr/(?:[ \t\r\n\f\0]|$re_comment)/;
166              
167             %types = (
168             'Page' => 'PDF::Builder::Basic::PDF::Page',
169             'Pages' => 'PDF::Builder::Basic::PDF::Pages',
170             );
171              
172             my $readDebug = 0;
173              
174 39     39   313 use Carp;
  39         85  
  39         2314  
175 39     39   683 use IO::File;
  39         7443  
  39         4866  
176              
177             # Now for the basic PDF types
178 39     39   630 use PDF::Builder::Basic::PDF::Utils;
  39         85  
  39         3406  
179              
180 39     39   246 use PDF::Builder::Basic::PDF::Array;
  39         62  
  39         908  
181 39     39   199 use PDF::Builder::Basic::PDF::Bool;
  39         86  
  39         814  
182 39     39   184 use PDF::Builder::Basic::PDF::Dict;
  39         81  
  39         784  
183 39     39   207 use PDF::Builder::Basic::PDF::Name;
  39         76  
  39         828  
184 39     39   199 use PDF::Builder::Basic::PDF::Number;
  39         69  
  39         896  
185 39     39   176 use PDF::Builder::Basic::PDF::Objind;
  39         75  
  39         989  
186 39     39   195 use PDF::Builder::Basic::PDF::String;
  39         74  
  39         826  
187 39     39   15326 use PDF::Builder::Basic::PDF::Page;
  39         100  
  39         1169  
188 39     39   218 use PDF::Builder::Basic::PDF::Pages;
  39         67  
  39         643  
189 39     39   171 use PDF::Builder::Basic::PDF::Null;
  39         76  
  39         896  
190 39     39   170 use POSIX qw(ceil floor);
  39         69  
  39         243  
191              
192             =item PDF::Builder::Basic::PDF::File->new()
193              
194             Creates a new, empty file object which can act as the host to other PDF objects.
195             Since there is no file associated with this object, it is assumed that the
196             object is created in readiness for creating a new PDF file.
197              
198             =cut
199              
200             sub new {
201 217     217 1 609 my ($class, $root) = @_;
202 217         881 my $self = $class->_new();
203              
204 217 50       569 unless ($root) {
205 217         893 $root = PDFDict();
206 217         660 $root->{'Type'} = PDFName('Catalog');
207             }
208 217         958 $self->new_obj($root);
209 217         466 $self->{'Root'} = $root;
210              
211 217         767 return $self;
212             }
213              
214             =item $p = PDF::Builder::Basic::PDF::File->open($filename, $update, %options)
215              
216             Opens the file and reads all the trailers and cross reference tables to build
217             a complete directory of objects.
218              
219             C<$filename> may be a string or an IO object.
220              
221             C<$update> specifies whether this file is being opened for updating and editing
222             (I value), or simply to be read (I or undefined value).
223              
224             C<%options> may include
225              
226             =over
227              
228             =item diags => 1
229              
230             If C is set to 1, various warning messages will be given if a
231             suspicious PDF structure is found, and some fixup may be attempted. There is
232             no guarantee that any fixup will change the PDF to legitimate, or that there
233             won't be other problems found further down the line. If this flag is I
234             given, and a structural problem is found, it is fairly likely that errors (and
235             even a program B) may happen further along. If you experience crashes
236             when reading in a PDF file, try running with C and see what is reported.
237              
238             There are many PDF files out "in the wild" which, while failing to conform to
239             Adobe's standards, appear to be tolerated by PDF Readers. Thus, Builder will no
240             longer fail on them, but merely comment on their existence.
241              
242             =back
243              
244             =cut
245              
246             sub open {
247 18     18 1 73 my ($class, $filename, $update, %options) = @_;
248             # copy dashed option names to preferred undashed names
249 18 50 33     94 if (defined $options{'-diags'} && !defined $options{'diags'}) { $options{'diags'} = delete($options{'-diags'}); }
  0         0  
250 18         34 my ($fh, $buffer);
251 18 50       74 $options{'diags'} = 0 if not defined $options{'diags'}; # default
252              
253 18         34 my $comment = ''; # any comment jammed into the PDF header
254 18         57 my $self = $class->_new();
255 18 50       71 if (ref $filename) {
256 18         42 $self->{' INFILE'} = $filename;
257 18 50       40 if ($update) {
258 18         49 $self->{' update'} = 1;
259 18         61 $self->{' OUTFILE'} = $filename;
260             }
261 18         27 $fh = $filename;
262             } else {
263 0 0       0 die "File '$filename' does not exist!" unless -f $filename;
264 0   0     0 $fh = IO::File->new(($update ? '+' : '') . "<$filename") || return;
265 0         0 $self->{' INFILE'} = $fh;
266 0 0       0 if ($update) {
267 0         0 $self->{' update'} = 1;
268 0         0 $self->{' OUTFILE'} = $fh;
269 0         0 $self->{' fname'} = $filename;
270             }
271             }
272 18         82 binmode $fh, ':raw';
273 18         158 $fh->seek(0, 0); # go to start of file
274 18         175 $fh->read($buffer, 255);
275 18 50       587 unless ($buffer =~ m/^\%PDF\-(\d+\.\d+)(.*?)$cr/mo) {
276 0         0 die "$filename does not contain a valid PDF version number";
277             }
278 18         82 $self->{' version'} = $1;
279             # can't run verCheckInput() yet, as full ' version' not set
280 18 50 33     124 if (defined $2 && length($2) > 0) {
281 0         0 $comment = $2; # save for output as comment
282             # since we just echo the original header + comment, unless that causes
283             # problems in some Readers, we can just leave it be (no call to strip
284             # out inline comment and create a separate comment further along).
285             }
286              
287 18         69 $fh->seek(0, 2); # go to end of file
288 18         160 my $end = $fh->tell();
289 18         113 $self->{' epos'} = $end;
290 18         63 foreach my $offset (1 .. 64) {
291 36         173 $fh->seek($end - 16 * $offset, 0);
292 36         207 $fh->read($buffer, 16 * $offset);
293 36 100       661 last if $buffer =~ m/startxref($cr|\s*)\d+($cr|\s*)\%\%eof.*?/i;
294             }
295 18 50       370 unless ($buffer =~ m/startxref[^\d]+([0-9]+)($cr|\s*)\%\%eof.*?/i) {
296 0 0       0 if ($options{'diags'} == 1) {
297 0         0 warn "Malformed PDF file $filename"; #orig 'die'
298             }
299             }
300 18         64 my $xpos = $1;
301 18         46 $self->{' xref_position'} = $xpos;
302              
303 18         93 my $tdict = $self->readxrtr($xpos, %options);
304 18         77 foreach my $key (keys %$tdict) {
305 133         270 $self->{$key} = $tdict->{$key};
306             }
307              
308 18         127 return $self;
309             } # end of open()
310              
311             =item $new_version = $p->version($version, %opts) # Set
312              
313             =item $ver = $p->version() # Get
314              
315             Gets/sets the PDF version (e.g., 1.5). Setting sets both the header and
316             trailer versions. Getting returns the higher of header and trailer versions.
317              
318             For compatibility with earlier releases, if no decimal point is given, assume
319             "1." precedes the number given.
320              
321             A warning message is given if you attempt to I the PDF version, as you
322             might have already read in a higher level file, or used a higher level feature.
323             This message is suppressed if the 'silent' option is given with any value.
324              
325             =cut
326              
327             sub version {
328 31     31 1 56 my $self = shift();
329              
330             # current version is the higher of trailer and header versions
331 31         113 my $header_version = $self->header_version();
332 31         91 my $trailer_version = $self->trailer_version();
333 31 100 100     136 my $old_version = (defined $trailer_version &&
334             $trailer_version > $header_version)?
335             $trailer_version: $header_version;
336              
337 31 100       73 if (@_) { # Set, possibly with options
338 3         7 my $version = shift();
339 3         8 my %opts = @_;
340             # copy dashed option names to preferred undashed names
341 3 50 33     15 if (defined $opts{'-silent'} && !defined $opts{'silent'}) { $opts{'silent'} = delete($opts{'-silent'}); }
  0         0  
342            
343             # 1.x and 2.x versions allowed
344 3 50       34 if ($version =~ m/^\d+$/) { $version = "1.$version"; } # no x.? assume it's 1.something
  0         0  
345             # check if well formed 1.x and 2.x
346 3 50       20 if ($version !~ /^[12]\.[0-9]+$/) {
347 0 0       0 croak "Invalid version '$version' ignored" unless defined $opts{'silent'};
348 0         0 return $old_version;
349             }
350              
351 3 50       14 if ($old_version > $version) {
352 0 0       0 croak "Warning: call to header_version() to LOWER the output PDF version number!" unless defined $opts{'silent'};
353             }
354            
355             # have already squawked about any problems with $version
356 3         13 $self->header_version($version, 'silent'=>1);
357             #if ($version >= 1.4) { # min 1.4 level
358 3         13 $self->trailer_version($version, 'silent'=>1);
359             #}
360             #else {
361             # delete $self->{'Root'}->{'Version'};
362             # $self->out_obj($self->{'Root'});
363             #}
364 3         7 return $version;
365             }
366              
367             # Get
368 28         86 return $old_version;
369             }
370              
371             =item $new_version = $p->header_version($version, %opts) # Set
372              
373             =item $version = $p->header_version() # Get
374              
375             Gets/sets the PDF version stored in the file header.
376              
377             For compatibility with earlier releases, if no decimal point is given, assume
378             "1." precedes the number given.
379              
380             A warning message is given if you attempt to I the PDF version, as you
381             might have already read in a higher level file, or used a higher level feature.
382             This message is suppressed if the 'silent' option is given with any value.
383              
384             =cut
385              
386             sub header_version {
387 39     39 1 79 my $self = shift();
388              
389             # current (header) version
390 39         89 my $old_version = $self->{' version'};
391              
392 39 100       118 if (@_) { # Set, permits versions 1.x and 2.x
393 6         11 my $version = shift();
394 6         20 my %opts = @_;
395             # copy dashed option names to preferred undashed names
396 6 50 33     19 if (defined $opts{'-silent'} && !defined $opts{'silent'}) { $opts{'silent'} = delete($opts{'-silent'}); }
  0         0  
397            
398             # 1.x and 2.x versions allowed
399 6 50       39 if ($version =~ m/^\d+$/) { $version = "1.$version"; } # no x.? assume it's 1.something
  0         0  
400             # check if well formed 1.x and 2.x
401 6 50       27 if ($version !~ /^[12]\.[0-9]+$/) {
402 0 0       0 croak "Invalid header_version '$version' ignored" unless defined $opts{'silent'};
403 0         0 return $old_version;
404             }
405              
406 6 50       32 if ($old_version > $version) {
407 0 0       0 croak "Warning: call to header_version() to LOWER the output PDF version number!" unless defined $opts{'silent'};
408             }
409            
410 6         15 $self->{' version'} = $version;
411 6         15 return $version;
412             }
413              
414             # Get
415 33         99 return $old_version;
416             }
417              
418             =item $new_version = $p->trailer_version($version, %opts) # Set
419              
420             =item $version = $p->trailer_version() # Get
421              
422             Gets/sets the PDF version stored in the document catalog.
423              
424             Note that the minimum PDF level for a trailer version is 1.4. It is not
425             permitted to set a PDF level of 1.3 or lower. An existing PDF (read in) of
426             1.3 or below returns undefined.
427              
428             For compatibility with earlier releases, if no decimal point is given, assume
429             "1." precedes the number given.
430              
431             A warning message is given if you attempt to I the PDF version, as you
432             might have already read in a higher level file, or used a higher level feature.
433             This message is suppressed if the 'silent' option is given with any value.
434              
435             =cut
436              
437             sub trailer_version {
438 37     37 1 66 my $self = shift();
439              
440 37         90 my $old_version = undef;
441 37 100       118 if ($self->{'Root'}->{'Version'}) {
442 12         42 $self->{'Root'}->{'Version'}->realise();
443 12         30 $old_version = $self->{'Root'}->{'Version'}->val();
444             }
445              
446 37 100       101 if (@_) { # Set, allows versions 1.x and 2.x
447 5         12 my $version = shift();
448 5         12 my %opts = @_;
449             # copy dashed option names to preferred undashed names
450 5 50 33     18 if (defined $opts{'-silent'} && !defined $opts{'silent'}) { $opts{'silent'} = delete($opts{'-silent'}); }
  0         0  
451            
452             # 1.x and 2.x versions allowed
453 5 50       34 if ($version =~ m/^\d+$/) { $version = "1.$version"; } # no x.? assume it's 1.something
  0         0  
454             # check if well formed 1.x and 2.x
455 5 50       30 if ($version !~ /^[12]\.[0-9]+$/) {
456 0 0       0 croak "Invalid trailer_version '$version' ignored" unless defined $opts{'silent'};
457 0         0 return $old_version;
458             }
459              
460 5 50 66     23 if (defined $old_version && $old_version > $version) {
461 0 0       0 croak "Warning: call to trailer_version() to LOWER the output PDF version number!" unless defined $opts{'silent'};
462             }
463            
464 5         23 $self->{'Root'}->{'Version'} = PDFName($version);
465 5         22 $self->out_obj($self->{'Root'});
466 5         22 return $version;
467             }
468              
469             # Get
470 32         57 return $old_version;
471             }
472              
473             =item $prev_version = $p->require_version($version)
474              
475             Ensures that the PDF version is at least C<$version>.
476             Silently sets the version to the higher level.
477              
478             =cut
479              
480             sub require_version {
481 3     3 1 799 my ($self, $min_version) = @_;
482 3         10 my $current_version = $self->version();
483 3 100       12 $self->version($min_version) if $current_version < $min_version;
484 3         7 return $current_version;
485             }
486              
487             =item $p->release()
488              
489             Releases ALL of the memory used by the PDF document and all of its
490             component objects. After calling this method, do B expect to
491             have anything left in the C object
492             (so if you need to save, be sure to do it before calling this method).
493              
494             B, that it is important that you call this method on any
495             C object when you wish to destroy it and
496             free up its memory. Internally, PDF files have an enormous number of
497             cross-references, and this causes circular references within the
498             internal data structures. Calling C causes a brute-force
499             cleanup of the data structures, freeing up all of the memory. Once
500             you've called this method, though, don't expect to be able to do
501             anything else with the C object; it'll
502             have B internal state whatsoever.
503              
504             =cut
505              
506             # Maintainer's Question: Couldn't this be handled by a DESTROY method
507             # instead of requiring an explicit call to release()?
508             sub release {
509 179     179 1 336 my $self = shift();
510              
511 179 50       545 return $self unless ref($self);
512 179         874 my @tofree = values %$self;
513              
514 179         738 foreach my $key (keys %$self) {
515 2907         3230 $self->{$key} = undef;
516 2907         3609 delete $self->{$key};
517             }
518              
519             # PDFs with highly-interconnected page trees or outlines can hit Perl's
520             # recursion limit pretty easily, so disable the warning for this specific
521             # loop.
522 39     39   50132 no warnings 'recursion'; ## no critic
  39         101  
  39         274883  
523              
524 179         768 while (my $item = shift @tofree) {
525 8783 100 100     25953 if (blessed($item) and $item->can('release')) {
    100          
    100          
526 2546         4550 $item->release(1);
527             } elsif (ref($item) eq 'ARRAY') {
528 1860         4686 push @tofree, @$item;
529             } elsif (ref($item) eq 'HASH') {
530 1063         2433 push @tofree, values %$item;
531 1063         2371 foreach my $key (keys %$item) {
532 4583         5091 $item->{$key} = undef;
533 4583         7053 delete $item->{$key};
534             }
535             } else {
536 3314         6314 $item = undef;
537             }
538             }
539              
540 179         719 return;
541             } # end of release()
542              
543             =item $p->append_file()
544              
545             Appends the objects for output to the read file and then appends the
546             appropriate table.
547              
548             =cut
549              
550             sub append_file {
551 8     8 1 17 my $self = shift();
552 8 50       25 return unless $self->{' update'};
553              
554 8         16 my $fh = $self->{' INFILE'};
555              
556             # hack to upgrade pdf-version number to support requested features in
557             # higher versions than the pdf was originally created. WARNING: new version
558             # must be exactly SAME length as the old (e.g., 1.6 replacing 1.4), or
559             # problems are likely with overwriting header. perhaps some day we will
560             # need to check the old version being ovewritten, and adjust something to
561             # avoid corrupting the file.
562 8   50     25 my $version = $self->{' version'} || 1.4;
563 8         39 $fh->seek(0, 0);
564             # assume that any existing EOL after version will be reused
565 8         107 $fh->print("%PDF-$version");
566              
567 8         89 my $tdict = PDFDict();
568 8         40 $tdict->{'Prev'} = PDFNum($self->{' loc'});
569 8         24 $tdict->{'Info'} = $self->{'Info'};
570 8 50       26 if (defined $self->{' newroot'}) {
571 0         0 $tdict->{'Root'} = $self->{' newroot'};
572             } else {
573 8         38 $tdict->{'Root'} = $self->{'Root'};
574             }
575 8         20 $tdict->{'Size'} = $self->{'Size'};
576              
577 8         52 foreach my $key (grep { $_ !~ m/^\s/ } keys %$self) {
  151         268  
578 25 50       62 $tdict->{$key} = $self->{$key} unless defined $tdict->{$key};
579             }
580              
581 8         41 $fh->seek($self->{' epos'}, 0);
582 8         58 $self->out_trailer($tdict, $self->{' update'});
583 8         29 close $self->{' OUTFILE'};
584              
585 8         47 return;
586             } # end of append_file()
587              
588             =item $p->out_file($fname)
589              
590             Writes a PDF file to a file of the given filename, based on the current list of
591             objects to be output. It creates the trailer dictionary based on information
592             in C<$self>.
593              
594             $fname may be a string or an IO object.
595              
596             =cut
597              
598             sub out_file {
599 171     171 1 459 my ($self, $fname) = @_;
600              
601 171         610 $self = $self->create_file($fname);
602 171         493 $self = $self->close_file();
603              
604 171         2422 return $self;
605             }
606              
607             =item $p->create_file($fname)
608              
609             Creates a new output file (no check is made of an existing open file) of
610             the given filename or IO object. Note: make sure that C<< $p->{' version'} >>
611             is set correctly before calling this function.
612              
613             =cut
614              
615             sub create_file {
616 171     171 1 350 my ($self, $filename) = @_;
617 171         256 my $fh;
618              
619 171         383 $self->{' fname'} = $filename;
620 171 50       481 if (ref $filename) {
621 171         274 $fh = $filename;
622             } else {
623 0   0     0 $fh = IO::File->new(">$filename") || die "Unable to open $filename for writing";
624 0         0 binmode($fh,':raw');
625             }
626              
627 171         334 $self->{' OUTFILE'} = $fh;
628 171   50     2172 $fh->print('%PDF-' . ($self->{' version'} // '1.4') . "\n");
629 171         1626 $fh->print("%\xC6\xCD\xCD\xB5\n"); # and some binary stuff in a comment.
630              
631             # PDF spec requires 4 or more "binary" bytes (128 or higher value) in a
632             # comment immediately following the PDF-x.y header, to alert reader that
633             # there is binary data. Actual values are apparently arbitrary. This DOES
634             # mean that other comments can NOT be inserted between the header and the
635             # binary comment! PDF::Builder always outputs this comment, so is always
636             # claiming binary data (no harm done?).
637              
638 171         1009 return $self;
639             }
640              
641             =item $p->close_file()
642              
643             Closes up the open file for output, by outputting the trailer, etc.
644              
645             =cut
646              
647             sub close_file {
648 171     171 1 269 my $self = shift();
649              
650 171         495 my $tdict = PDFDict();
651 171 50       653 $tdict->{'Info'} = $self->{'Info'} if defined $self->{'Info'};
652 171 50 33     716 $tdict->{'Root'} = (defined $self->{' newroot'} and $self->{' newroot'} ne '') ? $self->{' newroot'} : $self->{'Root'};
653              
654             # remove all freed objects from the outlist, AND the outlist_cache if not updating
655             # NO! Don't do that thing! In fact, let out_trailer do the opposite!
656              
657 171   33     669 $tdict->{'Size'} = $self->{'Size'} || PDFNum(1);
658 171 50       505 $tdict->{'Prev'} = PDFNum($self->{' loc'}) if $self->{' loc'};
659 171 50       434 if ($self->{' update'}) {
660 0         0 foreach my $key (grep { $_ !~ m/^[\s\-]/ } keys %$self) {
  0         0  
661 0 0       0 $tdict->{$key} = $self->{$key} unless defined $tdict->{$key};
662             }
663              
664 0         0 my $fh = $self->{' INFILE'};
665 0         0 $fh->seek($self->{' epos'}, 0);
666             }
667              
668 171         965 $self->out_trailer($tdict, $self->{' update'});
669 171         922 close($self->{' OUTFILE'});
670 171 50 33     939 if ($^O eq 'MacOS' and not ref($self->{' fname'})) {
671 0         0 MacPerl::SetFileInfo('CARO', 'TEXT', $self->{' fname'});
672             }
673              
674 171         894 return $self;
675             } # end of close_file()
676              
677             =item ($value, $str) = $p->readval($str, %opts)
678              
679             Reads a PDF value from the current position in the file. If C<$str> is too
680             short, read some more from the current location in the file until the whole
681             object is read. This is a recursive call which may slurp in a whole big stream
682             (unprocessed).
683              
684             Returns the recursive data structure read and also the current C<$str> that has
685             been read from the file.
686              
687             =cut
688              
689             sub readval {
690 1471     1471 1 13096 my ($self, $str, %opts) = @_;
691 1471         1944 my $fh = $self->{' INFILE'};
692 1471         1732 my ($result, $value);
693              
694 1471 100       2305 my $update = defined($opts{'update'}) ? $opts{'update'} : 1;
695 1471 100       2677 $str = update($fh, $str) if $update;
696              
697 1471         3703 $str =~ s/^$ws_char+//; # Ignore initial white space
698 1471         3342 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
699              
700 1471 100       15305 if ($str =~ m/^<
    100          
    100          
    100          
    100          
    50          
    100          
    50          
    50          
    0          
701             # Dictionary
702 162         360 $str = substr ($str, 2);
703 162 100       376 $str = update($fh, $str) if $update;
704 162         468 $result = PDFDict();
705              
706 162         367 while ($str !~ m/^>>/) {
707 453         1234 $str =~ s/^$ws_char+//; # Ignore initial white space
708 453         1072 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
709              
710 453 50       2083 if ($str =~ s|^/($reg_char+)||) {
    0          
    0          
711 453         1031 my $key = PDF::Builder::Basic::PDF::Name::name_to_string($1, $self);
712 453         1428 ($value, $str) = $self->readval($str, %opts);
713             # per Vadim Repin (RT 131147) CHG 1. His conclusion is that
714             # it is highly unlikely, but remotely possible, that there
715             # could be legitimate use of Null objects that should NOT be
716             # prevented from bubbling up. If such a case is discovered, we
717             # might have to try Klaus Ethgen's more limited (in scope)
718             # patch in ./Pages.pm. See full discussion in RT 131147 for
719             # details on what's going on and how this fixes it.
720             #$result->{$key} = $value; # original code
721 453 50       1413 $result->{$key} = $value
722             unless ref($value) eq 'PDF::Builder::Basic::PDF::Null';
723             } elsif ($str =~ s|^/$ws_char+||) {
724             # fixes a broken key problem of acrobat. -- fredo
725 0         0 ($value, $str) = $self->readval($str, %opts);
726 0         0 $result->{'null'} = $value;
727             } elsif ($str =~ s|^//|/|) {
728             # fixes again a broken key problem of illustrator/enfocus. -- fredo
729 0         0 ($value, $str) = $self->readval($str, %opts);
730 0         0 $result->{'null'} = $value;
731             } else {
732 0         0 die "Invalid dictionary key";
733             }
734 453 100       1025 $str = update($fh, $str) if $update; # thanks gareth.jones@stud.man.ac.uk
735             }
736              
737 162         449 $str =~ s/^>>//;
738 162 100       374 $str = update($fh, $str) if $update;
739             # streams can't be followed by a lone carriage-return.
740             # fredo: yes they can !!! -- use the MacOS, Luke.
741             # TBD isn't this covered by $cr as space CR?
742 162 100 66     434 if (($str =~ s/^stream(?:(?:\015\012)|\012|\015)//) and ($result->{'Length'}->val() != 0)) { # stream
743 11         34 my $length = $result->{'Length'}->val();
744 11         27 $result->{' streamsrc'} = $fh;
745 11         38 $result->{' streamloc'} = $fh->tell() - length($str);
746              
747 11 50       84 unless ($opts{'nostreams'}) {
748 11 50       32 if ($length > length($str)) {
749 0         0 $value = $str;
750 0         0 $length -= length($str);
751 0         0 read $fh, $str, $length + 11; # slurp the whole stream!
752             } else {
753 11         18 $value = '';
754             }
755 11         37 $value .= substr($str, 0, $length);
756 11         27 $result->{' stream'} = $value;
757 11         20 $result->{' nofilt'} = 1;
758 11 50       42 $str = update($fh, $str, 1) if $update; # tell update we are in-stream and only need an endstream
759 11         38 $str = substr($str, index($str, 'endstream') + 9);
760             }
761             }
762              
763 162 100 100     550 if (defined $result->{'Type'} and defined $types{$result->{'Type'}->val()}) {
764 38         103 bless $result, $types{$result->{'Type'}->val()};
765             }
766             # gdj: FIXME: if any of the ws chars were crs, then the whole
767             # string might not have been read.
768              
769             } elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R/s) {
770             # Indirect Object
771 166         413 my $num = $1;
772 166         244 $value = $2;
773 166         1205 $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R//s;
774 166 100       404 unless ($result = $self->test_obj($num, $value)) {
775 131         408 $result = PDF::Builder::Basic::PDF::Objind->new();
776 131         362 $result->{' objnum'} = $num;
777 131         264 $result->{' objgen'} = $value;
778 131         267 $self->add_obj($result, $num, $value);
779             }
780 166         313 $result->{' parent'} = $self;
781 166         482 weaken $result->{' parent'};
782             #$result->{' realised'} = 0;
783             # removed to address changes being lost when an indirect object
784             # is realised twice
785             # gdj: FIXME: if any of the ws chars were crs, then the whole
786             # string might not have been read.
787              
788             } elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj/s) {
789             # Object
790 104         174 my $obj;
791 104         221 my $num = $1;
792 104         194 $value = $2;
793 104         973 $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj//s;
794 104         364 ($obj, $str) = $self->readval($str, %opts);
795 104 100       240 if ($result = $self->test_obj($num, $value)) {
796 90         256 $result->merge($obj);
797             } else {
798 14         21 $result = $obj;
799 14         41 $self->add_obj($result, $num, $value);
800 14         21 $result->{' realised'} = 1;
801             }
802 104 100       261 $str = update($fh, $str) if $update; # thanks to kundrat@kundrat.sk
803 104         540 $str =~ s/^endobj//;
804              
805             } elsif ($str =~ m|^/($reg_char*)|s) {
806             # Name
807 573         1261 $value = $1;
808 573         2359 $str =~ s|^/($reg_char*)||s;
809 573         1665 $result = PDF::Builder::Basic::PDF::Name->from_pdf($value, $self);
810              
811             } elsif ($str =~ m/^\(/) {
812             # Literal String
813             # We now need to find an unbalanced, unescaped right-paren.
814             # This can't be done with a regex.
815 2         5 my $value = '(';
816 2         23 $str = substr($str, 1);
817              
818 2         20 my $nested_level = 1;
819 2         6 while (1) {
820             # Ignore everything up to the first escaped or parenthesis character
821 2 50       18 if ($str =~ /^([^\\()]+)(.*)/s) {
822 2         8 $value .= $1;
823 2         6 $str = $2;
824             }
825              
826             # Ignore escaped parentheses
827 2 50       24 if ($str =~ /^(\\[()])/) {
    50          
    50          
    0          
828 0         0 $value .= $1;
829 0         0 $str = substr($str, 2);
830              
831             } elsif ($str =~ /^\(/) {
832             # Left parenthesis: increase nesting
833 0         0 $value .= '(';
834 0         0 $str = substr($str, 1);
835 0         0 $nested_level++;
836              
837             } elsif ($str =~ /^\)/) {
838             # Right parenthesis: decrease nesting
839 2         5 $value .= ')';
840 2         6 $str = substr($str, 1);
841 2         4 $nested_level--;
842 2 50       6 last unless $nested_level;
843              
844             } elsif ($str =~ /^(\\[^()])/) {
845             # Other escaped character
846 0         0 $value .= $1;
847 0         0 $str = substr($str, 2);
848              
849             } else {
850             # If there wasn't an escaped or parenthesis character,
851             # read some more.
852              
853             # We don't use update because we don't want to remove
854             # whitespace or comments.
855 0 0       0 $fh->read($str, 255, length($str)) or die 'Unterminated string.';
856             }
857             } # end while(TRUE) loop
858              
859 2         10 $result = PDF::Builder::Basic::PDF::String->from_pdf($value);
860             # end Literal String check
861              
862             } elsif ($str =~ m/^
863             # Hex String
864 0         0 $str =~ s/^
865 0         0 $fh->read($str, 255, length($str)) while (0 > index($str, '>'));
866 0         0 ($value, $str) = ($str =~ /^(.*?)>(.*)/s);
867 0         0 $result = PDF::Builder::Basic::PDF::String->from_pdf('<' . $value . '>');
868              
869             } elsif ($str =~ m/^\[/) {
870             # Array
871 94         353 $str =~ s/^\[//;
872 94 50       258 $str = update($fh, $str) if $update;
873 94         254 $result = PDFArray();
874 94         213 while ($str !~ m/^\]/) {
875 778         2053 $str =~ s/^$ws_char+//; # Ignore initial white space
876 778         1726 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
877              
878 778         1832 ($value, $str) = $self->readval($str, %opts);
879 778         1986 $result->add_elements($value);
880 778 50       1396 $str = update($fh, $str) if $update; # str might just be exhausted!
881             }
882 94         283 $str =~ s/^\]//;
883              
884             } elsif ($str =~ m/^(true|false)($irreg_char|$)/) {
885             # Boolean
886 0         0 $value = $1;
887 0         0 $str =~ s/^(?:true|false)//;
888 0         0 $result = PDF::Builder::Basic::PDF::Bool->from_pdf($value);
889              
890             } elsif ($str =~ m/^([+-.0-9]+)($irreg_char|$)/) {
891             # Number
892 370         840 $value = $1;
893 370         1042 $str =~ s/^([+-.0-9]+)//;
894              
895             # If $str only consists of whitespace (or is empty), call update to
896             # see if this is the beginning of an indirect object or reference
897 370 100 100     3710 if ($update and ($str =~ /^$re_whitespace*$/s or $str =~ /^$re_whitespace+[0-9]+$re_whitespace*$/s)) {
      100        
898 6         48 $str =~ s/^$re_whitespace+/ /s;
899 6         39 $str =~ s/$re_whitespace+$/ /s;
900 6         14 $str = update($fh, $str);
901 6 100       62 if ($str =~ m/^$re_whitespace*([0-9]+)$re_whitespace+(?:R|obj)/s) {
902 4         17 return $self->readval("$value $str", %opts);
903             }
904             }
905              
906 366         1110 $result = PDF::Builder::Basic::PDF::Number->from_pdf($value);
907              
908             } elsif ($str =~ m/^null($irreg_char|$)/) {
909             # Null
910 0         0 $str =~ s/^null//;
911 0         0 $result = PDF::Builder::Basic::PDF::Null->new();
912              
913             } else {
914 0         0 die "Can't parse `$str' near " . ($fh->tell()) . " length " . length($str) . ".";
915             }
916              
917 1467         5512 $str =~ s/^$ws_char+//s;
918 1467         4247 return ($result, $str);
919             } # end of readval()
920              
921             =item $ref = $p->read_obj($objind, %opts)
922              
923             Given an indirect object reference, locate it and read the object returning
924             the read in object.
925              
926             =cut
927              
928             sub read_obj {
929 86     86 1 160 my ($self, $objind, %opts) = @_;
930              
931 86   50     215 my $res = $self->read_objnum($objind->{' objnum'}, $objind->{' objgen'}, %opts) || return;
932 86 50       239 $objind->merge($res) unless $objind eq $res;
933              
934 86         249 return $objind;
935             }
936              
937             =item $ref = $p->read_objnum($num, $gen, %opts)
938              
939             Returns a fully read object of given number and generation in this file
940              
941             =cut
942              
943             sub read_objnum {
944 94     94 1 2256 my ($self, $num, $gen, %opts) = @_;
945              
946 94 50       221 croak 'Undefined object number in call to read_objnum($num, $gen)' unless defined $num;
947 94 50       187 croak 'Undefined object generation in call to read_objnum($num, $gen)' unless defined $gen;
948 94 50       368 croak "Invalid object number '$num' in call to read_objnum" unless $num =~ /^[0-9]+$/;
949 94 50       248 croak "Invalid object generation '$gen' in call to read_objnum" unless $gen =~ /^[0-9]+$/;
950              
951 94   50     226 my $object_location = $self->locate_obj($num, $gen) || return;
952 94         146 my $object;
953              
954             # Compressed object
955 94 100       190 if (ref($object_location)) {
956 4         7 my ($object_stream_num, $object_stream_pos) = @{$object_location};
  4         9  
957              
958 4         30 my $object_stream = $self->read_objnum($object_stream_num, 0, %opts);
959 4 50       13 die 'Cannot find the compressed object stream' unless $object_stream;
960              
961 4 50       20 $object_stream->read_stream() if $object_stream->{' nofilt'};
962              
963             # An object stream starts with pairs of integers containing object numbers and
964             # stream offsets relative to the First key
965 4         8 my $fh;
966             my $pairs;
967 4 50       20 unless ($object_stream->{' streamfile'}) {
968 4         15 $pairs = substr($object_stream->{' stream'}, 0, $object_stream->{'First'}->val());
969             } else {
970 0         0 CORE::open($fh, '<', $object_stream->{' streamfile'});
971 0         0 read($fh, $pairs, $object_stream->{'First'}->val());
972             }
973 4         27 my @map = split /\s+/, $pairs;
974              
975             # Find the offset of the object in the stream
976 4         9 my $index = $object_stream_pos * 2;
977 4 50       11 die "Objind $num does not exist at index $index" unless $map[$index] == $num;
978 4         10 my $start = $map[$index + 1];
979              
980             # Unless this is the last object in the stream, its length is
981             # determined by the offset of the next object.
982 4         8 my $last_object_in_stream = $map[-2];
983 4         5 my $length;
984 4 100       18 if ($last_object_in_stream == $num) {
985 2 50       6 if ($object_stream->{' stream'}) {
986 2         9 $length = length($object_stream->{' stream'}) - $object_stream->{'First'}->val() - $start;
987             } else {
988 0         0 $length = (-s $object_stream->{' streamfile'}) - $object_stream->{'First'}->val() - $start;
989             }
990             } else {
991 2         7 my $next_start = $map[$index + 3];
992 2         4 $length = $next_start - $start;
993             }
994              
995             # Read the object from the stream
996 4         12 my $stream = "$num 0 obj ";
997 4 50       9 unless ($object_stream->{' streamfile'}) {
998 4         13 $stream .= substr($object_stream->{' stream'}, $object_stream->{'First'}->val() + $start, $length);
999             } else {
1000 0         0 seek($fh, $object_stream->{'First'}->val() + $start, 0);
1001 0         0 read($fh, $stream, $length, length($stream));
1002 0         0 close $fh;
1003             }
1004              
1005 4         14 ($object) = $self->readval($stream, %opts, update => 0);
1006 4         17 return $object;
1007             }
1008              
1009 90         274 my $current_location = $self->{' INFILE'}->tell();
1010 90         574 $self->{' INFILE'}->seek($object_location, 0);
1011 90         411 ($object) = $self->readval('', %opts);
1012 90         382 $self->{' INFILE'}->seek($current_location, 0);
1013              
1014 90         574 return $object;
1015             } # end of read_objnum()
1016              
1017             =item $objind = $p->new_obj($obj)
1018              
1019             Creates a new, free object reference based on free space in the cross reference
1020             chain. If nothing is free, then think up a new number. If C<$obj>, then turns
1021             that object into this new object rather than returning a new object.
1022              
1023             =cut
1024              
1025             sub new_obj {
1026 1365     1365 1 2341 my ($self, $base) = @_;
1027 1365         1754 my $res;
1028              
1029 1365 50 66     3367 if (defined $self->{' free'} and scalar @{$self->{' free'}} > 0) {
  14         57  
1030 0         0 $res = shift(@{$self->{' free'}});
  0         0  
1031 0 0       0 if (defined $base) {
1032 0         0 my ($num, $gen) = @{$self->{' objects'}{$res->uid()}};
  0         0  
1033 0         0 $self->remove_obj($res);
1034 0         0 $self->add_obj($base, $num, $gen);
1035 0         0 return $self->out_obj($base);
1036             } else {
1037 0         0 $self->{' objects'}{$res->uid()}[2] = 0;
1038 0         0 return $res;
1039             }
1040             }
1041              
1042 1365         1983 my $tdict = $self;
1043 1365         1659 my $i;
1044 1365         2667 while (defined $tdict) {
1045 1366 50       3888 $i = $tdict->{' xref'}{defined($i) ? $i : ''}[0];
1046 1366   33     3071 while (defined $i and $i != 0) {
1047 0         0 my ($ni, $ng) = @{$tdict->{' xref'}{$i}};
  0         0  
1048 0 0       0 unless (defined $self->locate_obj($i, $ng)) {
1049 0 0       0 if (defined $base) {
1050 0         0 $self->add_obj($base, $i, $ng);
1051 0         0 return $base;
1052             } else {
1053 0   0     0 $res = $self->test_obj($i, $ng) || $self->add_obj(PDF::Builder::Basic::PDF::Objind->new(), $i, $ng);
1054 0         0 $self->out_obj($res);
1055 0         0 return $res;
1056             }
1057             }
1058 0         0 $i = $ni;
1059             }
1060 1366         2619 $tdict = $tdict->{' prev'};
1061             }
1062              
1063 1365         2196 $i = $self->{' maxobj'}++;
1064 1365 50       2489 if (defined $base) {
1065 1365         3505 $self->add_obj($base, $i, 0);
1066 1365         3100 $self->out_obj($base);
1067 1365         2566 return $base;
1068             } else {
1069 0         0 $res = $self->add_obj(PDF::Builder::Basic::PDF::Objind->new(), $i, 0);
1070 0         0 $self->out_obj($res);
1071 0         0 return $res;
1072             }
1073             }
1074              
1075             =item $p->out_obj($obj)
1076              
1077             Indicates that the given object reference should appear in the output xref
1078             table whether with data or freed.
1079              
1080             =cut
1081              
1082             sub out_obj {
1083 2991     2991 1 4628 my ($self, $obj) = @_;
1084              
1085             # This is why we've been keeping the outlist CACHE around; to speed
1086             # up this method by orders of magnitude (it saves up from having to
1087             # grep the full outlist each time through as we'll just do a lookup
1088             # in the hash) (which is super-fast).
1089 2991 100       7087 unless (exists $self->{' outlist_cache'}{$obj}) {
1090 1382         1779 push @{$self->{' outlist'}}, $obj;
  1382         2724  
1091             # weaken $self->{' outlist'}->[-1];
1092 1382         3836 $self->{' outlist_cache'}{$obj} = 1;
1093             }
1094              
1095 2991         4692 return $obj;
1096             }
1097              
1098             =item $p->free_obj($obj)
1099              
1100             Marks an object reference for output as being freed.
1101              
1102             =cut
1103              
1104             sub free_obj {
1105 0     0 1 0 my ($self, $obj) = @_;
1106              
1107 0         0 push @{$self->{' free'}}, $obj;
  0         0  
1108 0         0 $self->{' objects'}{$obj->uid()}[2] = 1;
1109 0         0 $self->out_obj($obj);
1110              
1111 0         0 return;
1112             }
1113              
1114             =item $p->remove_obj($objind)
1115              
1116             Removes the object from all places where we might remember it.
1117              
1118             =cut
1119              
1120             sub remove_obj {
1121 0     0 1 0 my ($self, $objind) = @_;
1122              
1123             # who says it has to be fast
1124 0         0 delete $self->{' objects'}{$objind->uid()};
1125 0         0 delete $self->{' outlist_cache'}{$objind};
1126 0         0 delete $self->{' printed_cache'}{$objind};
1127 0         0 @{$self->{' outlist'}} = grep { $_ ne $objind } @{$self->{' outlist'}};
  0         0  
  0         0  
  0         0  
1128 0         0 @{$self->{' printed'}} = grep { $_ ne $objind } @{$self->{' printed'}};
  0         0  
  0         0  
  0         0  
1129             $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} = undef
1130 0 0       0 if $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} eq $objind;
1131              
1132 0         0 return $self;
1133             }
1134              
1135             =item $p->ship_out(@objects)
1136              
1137             =item $p->ship_out()
1138              
1139             Ships the given objects (or all objects for output if C<@objects> is empty) to
1140             the currently open output file (assuming there is one). Freed objects are not
1141             shipped, and once an object is shipped it is switched such that this file
1142             becomes its source and it will not be shipped again unless out_obj is called
1143             again. Notice that a shipped out object can be re-output or even freed, but
1144             that it will not cause the data already output to be changed.
1145              
1146             =cut
1147              
1148             sub ship_out {
1149 184     184 1 394 my ($self, @objects) = @_;
1150              
1151 184 50       476 return unless defined $self->{' OUTFILE'};
1152 184         361 my $fh = $self->{' OUTFILE'};
1153 184         520 seek($fh, 0, 2); # go to the end of the file
1154              
1155 184 50       503 @objects = @{$self->{' outlist'}} unless scalar @objects > 0;
  184         516  
1156 184         435 foreach my $objind (@objects) {
1157 1074 50       2705 next unless $objind->is_obj($self);
1158 1074         1710 my $j = -1;
1159 1074         1663 for (my $i = 0; $i < scalar @{$self->{' outlist'}}; $i++) {
  1074         2160  
1160 1074 50       2754 if ($self->{' outlist'}[$i] eq $objind) {
1161 1074         1467 $j = $i;
1162 1074         1799 last;
1163             }
1164             }
1165 1074 50       1932 next if $j < 0;
1166 1074         1244 splice(@{$self->{' outlist'}}, $j, 1);
  1074         1785  
1167 1074         2687 delete $self->{' outlist_cache'}{$objind};
1168 1074 50       1361 next if grep { $_ eq $objind } @{$self->{' free'}};
  0         0  
  1074         2488  
1169              
1170 1074 50       1972 map { $fh->print("\% $_ \n") } split(/$cr/, $objind->{' comments'}) if $objind->{' comments'};
  0         0  
1171 1074         2782 $self->{' locs'}{$objind->uid()} = $fh->tell();
1172 1074         1728 my ($objnum, $objgen) = @{$self->{' objects'}{$objind->uid()}}[0..1];
  1074         1923  
1173 1074         2766 $fh->printf('%d %d obj ', $objnum, $objgen);
1174 1074         10218 $objind->outobjdeep($fh, $self);
1175 1074         2714 $fh->print("\nendobj\n");
1176              
1177             # Note that we've output this obj, not forgetting to update
1178             # the cache of what's printed.
1179 1074 50       6145 unless (exists $self->{' printed_cache'}{$objind}) {
1180 1074         1352 push @{$self->{' printed'}}, $objind;
  1074         2496  
1181 1074         3530 $self->{' printed_cache'}{$objind}++;
1182             }
1183             }
1184              
1185 184         521 return $self;
1186             } # end of ship_out()
1187              
1188             =item $p->copy($outpdf, \&filter)
1189              
1190             Iterates over every object in the file reading the object, calling C
1191             with the object, and outputting the result. If C is not defined,
1192             just copies input to output.
1193              
1194             =cut
1195              
1196             sub copy {
1197 0     0 1 0 my ($self, $outpdf, $filter) = @_;
1198 0         0 my ($obj, $minl, $mini, $ming);
1199              
1200 0         0 foreach my $key (grep { not m/^[\s\-]/ } keys %$self) {
  0         0  
1201 0 0       0 $outpdf->{$key} = $self->{$key} unless defined $outpdf->{$key};
1202             }
1203              
1204 0         0 my $tdict = $self;
1205 0         0 while (defined $tdict) {
1206 0         0 foreach my $i (sort {$a <=> $b} keys %{$tdict->{' xref'}}) {
  0         0  
  0         0  
1207 0         0 my ($nl, $ng, $nt) = @{$tdict->{' xref'}{$i}};
  0         0  
1208 0 0       0 next unless $nt eq 'n';
1209              
1210 0 0 0     0 if ($nl < $minl or $mini == 0) {
1211 0         0 $mini = $i;
1212 0         0 $ming = $ng;
1213 0         0 $minl = $nl;
1214             }
1215 0 0       0 unless ($obj = $self->test_obj($i, $ng)) {
1216 0         0 $obj = PDF::Builder::Basic::PDF::Objind->new();
1217 0         0 $obj->{' objnum'} = $i;
1218 0         0 $obj->{' objgen'} = $ng;
1219 0         0 $self->add_obj($obj, $i, $ng);
1220 0         0 $obj->{' parent'} = $self;
1221 0         0 weaken $obj->{' parent'};
1222 0         0 $obj->{' realised'} = 0;
1223             }
1224 0         0 $obj->realise();
1225 0 0       0 my $res = defined $filter ? &{$filter}($obj) : $obj;
  0         0  
1226 0 0 0     0 $outpdf->new_obj($res) unless (!$res || $res->is_obj($outpdf));
1227             }
1228 0         0 $tdict = $tdict->{' prev'};
1229             }
1230              
1231             # test for linearized and remove it from output
1232 0         0 $obj = $self->test_obj($mini, $ming);
1233 0 0 0     0 if ($obj->isa('PDF::Builder::Basic::PDF::Dict') && $obj->{'Linearized'}) {
1234 0         0 $outpdf->free_obj($obj);
1235             }
1236              
1237 0         0 return $self;
1238             } # end of copy()
1239              
1240             =back
1241              
1242             =head1 PRIVATE METHODS & FUNCTIONS
1243              
1244             The following methods and functions are considered B to this class.
1245             This does not mean you cannot use them if you have a need, just that they
1246             aren't really designed for users of this class.
1247              
1248             =over
1249              
1250             =item $offset = $p->locate_obj($num, $gen)
1251              
1252             Returns a file offset to the object asked for by following the chain of cross
1253             reference tables until it finds the one you want.
1254              
1255             =cut
1256              
1257             sub locate_obj {
1258 94     94 1 171 my ($self, $num, $gen) = @_;
1259              
1260 94         144 my $tdict = $self;
1261 94         186 while (defined $tdict) {
1262 103 100       263 if (ref $tdict->{' xref'}{$num}) {
1263 94         148 my $ref = $tdict->{' xref'}{$num};
1264 94 100       188 return $ref unless scalar(@$ref) == 3;
1265              
1266 90 50       207 if ($ref->[1] == $gen) {
1267 90 50       382 return $ref->[0] if $ref->[2] eq 'n';
1268 0         0 return; # if $ref->[2] eq 'f';
1269             }
1270             }
1271 9         20 $tdict = $tdict->{' prev'};
1272             }
1273              
1274 0         0 return;
1275             }
1276              
1277             =item update($fh, $str, $instream)
1278              
1279             Keeps reading C<$fh> for more data to ensure that C<$str> has at least a line
1280             full for C to work on. At this point we also take the opportunity to
1281             ignore comments.
1282              
1283             =cut
1284              
1285             sub update {
1286 3242     3242 1 4808 my ($fh, $str, $instream) = @_;
1287              
1288 3242 50       4613 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1289 3242 100       4271 if ($instream) {
1290             # we are inside a (possible binary) stream
1291             # so we fetch data till we see an 'endstream'
1292             # -- fredo/2004-09-03
1293 11   33     39 while ($str !~ m/endstream/ and not $fh->eof()) {
1294 0 0       0 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1295 0         0 $fh->read($str, 314, length($str));
1296             }
1297             } else {
1298 3231         11127 $str =~ s/^$ws_char*//;
1299 3231   100     119295 while ($str !~ m/$cr/ and not $fh->eof()) {
1300 128 50       882 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1301 128         387 $fh->read($str, 314, length($str));
1302 128         3814 $str =~ s/^$ws_char*//so;
1303             }
1304 3231         7337 while ($str =~ m/^\%/) { # restructured by fredo/2003-03-23
1305 1 50       3 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1306 1   33     27 $fh->read($str, 314, length($str)) while ($str !~ m/$cr/ and not $fh->eof());
1307 1         21 $str =~ s/^\%[^\015\012]*$ws_char*//so; # fixed for reportlab -- fredo
1308             }
1309             }
1310              
1311 3242         6670 return $str;
1312             } # end of update()
1313              
1314             =item $objind = $p->test_obj($num, $gen)
1315              
1316             Tests the cache to see whether an object reference (which may or may not have
1317             been getobj()ed) has been cached. Returns it if it has.
1318              
1319             =cut
1320              
1321             sub test_obj {
1322 270     270 1 478 my ($self, $num, $gen) = @_;
1323              
1324 270         860 return $self->{' objcache'}{$num, $gen};
1325             }
1326              
1327             =item $p->add_obj($objind)
1328              
1329             Adds the given object to the internal object cache.
1330              
1331             =cut
1332              
1333             sub add_obj {
1334 1510     1510 1 3030 my ($self, $obj, $num, $gen) = @_;
1335              
1336 1510         4712 $self->{' objcache'}{$num, $gen} = $obj;
1337 1510         5852 $self->{' objects'}{$obj->uid()} = [$num, $gen];
1338             # weaken $self->{' objcache'}{$num, $gen};
1339 1510         2598 return $obj;
1340             }
1341              
1342             =item $tdict = $p->readxrtr($xpos, %options)
1343              
1344             Recursive function which reads each of the cross-reference and trailer tables
1345             in turn until there are no more.
1346              
1347             Returns a dictionary corresponding to the trailer chain. Each trailer also
1348             includes the corresponding cross-reference table.
1349              
1350             The structure of the xref private element in a trailer dictionary is of an
1351             anonymous hash of cross reference elements by object number. Each element
1352             consists of an array of 3 elements corresponding to the three elements read
1353             in [location, generation number, free or used]. See the PDF specification
1354             for details.
1355              
1356             See C for options allowed.
1357              
1358             =cut
1359              
1360             sub _unpack_xref_stream {
1361 78     78   124 my ($self, $width, $data) = @_;
1362              
1363             # handle some oddball cases
1364 78 50       166 if ($width == 3) {
    50          
    50          
    50          
1365 0         0 $data = "\x00$data";
1366 0         0 $width = 4;
1367             } elsif ($width == 5) {
1368 0         0 $data = "\x00\x00\x00$data";
1369 0         0 $width = 8;
1370             } elsif ($width == 6) {
1371 0         0 $data = "\x00\x00$data";
1372 0         0 $width = 8;
1373             } elsif ($width == 7) {
1374 0         0 $data = "\x00$data";
1375 0         0 $width = 8;
1376             }
1377             # in all cases, "Network" (Big-Endian) byte order assumed
1378 78 100       124 return unpack('C', $data) if $width == 1;
1379 52 50       94 return unpack('n', $data) if $width == 2;
1380 0 0       0 return unpack('N', $data) if $width == 4;
1381 0 0       0 if ($width == 8) {
1382             # Some ways other packages handle this, without Perl-64, according
1383             # to Vadim Repin. Possibly they end up converting the value to
1384             # "double" behind the scenes if on a 32-bit platform?
1385             # PDF::Tiny return hex unpack('H16', $data);
1386             # CAM::PDF my @b = unpack('C*', $data);
1387             # my $i=0; ($i <<= 8) += shift @b while @b; return $i;
1388            
1389 0 0       0 if (substr($data, 0, 4) eq "\x00\x00\x00\x00") {
1390             # can treat as 32 bit unsigned int
1391 0         0 return unpack('N', substr($data, 4, 4));
1392             } else {
1393             # requires 64-bit platform (chip and Perl), else fatal error
1394             # it may blow up and produce a smoking crater if 32-bit Perl!
1395             # also note that Q needs Big-Endian flag (>) specified, else
1396             # it will use the native chip order (Big- or Little- Endian)
1397 0         0 return unpack('Q>', $data);
1398             }
1399             }
1400              
1401 0         0 die "Unsupported field width: $width. 1-8 supported.";
1402             }
1403              
1404             sub readxrtr {
1405 21     21 1 104 my ($self, $xpos, %options) = @_;
1406             # $xpos SHOULD be pointing to "xref" keyword
1407             # copy dashed option names to preferred undashed names
1408 21 50 33     88 if (defined $options{'-diags'} && !defined $options{'diags'}) { $options{'diags'} = delete($options{'-diags'}); }
  0         0  
1409              
1410 21         41 my ($tdict, $buf, $xmin, $xnum, $xdiff);
1411              
1412 21         44 my $fh = $self->{' INFILE'};
1413 21         83 $fh->seek($xpos, 0);
1414 21         157 $fh->read($buf, 22); # 22 should overlap into first subsection
1415 21         140 $buf = update($fh, $buf); # fix for broken JAWS xref calculation.
1416              
1417 21         40 my $xlist = {};
1418              
1419             ## it seems that some products calculate wrong prev entries (short)
1420             ## so we seek ahead to find one -- fredo; save for now
1421             #while ($buf !~ m/^xref$cr/i && !eof($fh)) {
1422             # $buf =~ s/^(\s+|\S+|.)//i;
1423             # $buf = update($fh, $buf);
1424             #}
1425              
1426 21 100       257 if ($buf =~ s/^xref$cr//i) { # remove xrefEOL from buffer
    50          
1427             # Plain XRef tables.
1428             #
1429             # look to match startobj# count# EOL of first (or only) subsection
1430             # supposed to be single ASCII space between numbers, but this is
1431             # more lenient for some writers, allowing 1 or more whitespace
1432 18         36 my $subsection_count = 0;
1433 18         32 my $entry_format_error = 0;
1434 18         36 my $xrefListEmpty = 0;
1435              
1436 18         451 while ($buf =~ m/^$ws_char*([0-9]+)$ws_char+([0-9]+)$ws_char*$cr(.*?)$/s) {
1437 23         56 my $old_buf = $buf;
1438 23         52 $xmin = $1; # starting object number of this subsection
1439 23         42 $xnum = $2; # number of entries in this subsection
1440 23         39 $buf = $3; # remainder of buffer
1441 23         34 $subsection_count++;
1442             # go back and warn if other than single space separating numbers
1443 23 50       266 unless ($old_buf =~ /^[0-9]+ [0-9]+$cr/) { #orig 'warn'
1444 0 0       0 if ($options{'diags'} == 1) {
1445             # See PDF 1.7 section 7.5.4: Cross-Reference Table
1446 0         0 warn "Malformed xref: subsection header needs a single\n" .
1447             "ASCII space between the numbers and no extra spaces.\n";
1448             }
1449             }
1450 23         55 $xdiff = length($buf); # how much remaining in buffer
1451              
1452             # in case xnum == 0 is permitted (or used and tolerated by readers),
1453             # skip over entry reads and go to next subsection
1454 23 50       57 if ($xnum < 1) {
1455 0 0       0 if ($options{'diags'} == 1) {
1456 0         0 warn "Xref subsection has 0 entries. Skipped.\n";
1457             }
1458 0         0 $xrefListEmpty = 1;
1459 0         0 next;
1460             }
1461              
1462             # read chunk of entire subsection list
1463 23         50 my $entry_size = 20;
1464             # test read first entry, see if $cr in expected place, adjust size
1465 23         99 $fh->read($buf, $entry_size * 1 - $xdiff + 15, $xdiff);
1466 23 50       439 if ($buf =~ m/^(.*?)$cr/) {
1467 23         62 $entry_size = length($1) + 2;
1468             }
1469 23 50 33     67 if ($entry_size != 20 && $options{'diags'} == 1) {
1470 0         0 warn "Xref entries supposed to be 20 bytes long, are $entry_size.\n";
1471             }
1472 23         42 $xdiff = length($buf);
1473              
1474             # read remaining entries
1475 23         101 $fh->read($buf, $entry_size * $xnum - $xdiff + 15, $xdiff);
1476             # each entry is two integers and flag. spec says single ASCII space
1477             # between each field and certain length for each (10, 5, 1), so
1478             # this appears to be more lenient than spec
1479             # is object 0 supposed to be in subsection 1, or is any place OK?
1480 23   66     621 while ($xnum-- > 0 and
1481             $buf =~ m/^$ws_char*(\d+)$ws_char+(\d+)$ws_char+([nf])$ws_char*$cr/) {
1482             # check if format doesn't match spec
1483 132 50 33     781 if ($buf =~ m/^\d{10} \d{5} [nf]$cr/ ||
1484             $entry_format_error) {
1485             # format OK or have already reported format problem
1486             } else {
1487 0 0       0 if ($options{'diags'} == 1) {
1488 0         0 warn "Xref entry readable, but doesn't meet PDF spec.\n";
1489             }
1490 0         0 $entry_format_error++;
1491             }
1492              
1493 132         915 $buf =~ s/^$ws_char*(\d+)$ws_char+(\d+)$ws_char+([nf])$ws_char*$cr//;
1494             # $1 = object's starting offset in file (n) or
1495             # next object in free list (f) [0 if last]
1496             # $2 = generation number (n) or 65535 for object 0 (f) or
1497             # next generation number (f)
1498             # $3 = flag (n = object in use, f = free)
1499             # buf reduced by entry just processed
1500 132 50       288 if (exists $xlist->{$xmin}) {
1501 0 0       0 if ($options{'diags'} == 1) {
1502 0         0 warn "Duplicate object number $xmin in xref table ignored.\n";
1503             }
1504             } else {
1505 132         441 $xlist->{$xmin} = [$1, $2, $3];
1506 132 50 66     338 if ($xmin == 0 && $subsection_count > 1 && $options{'diags'} == 1) {
      33        
1507 0         0 warn "Xref object 0 entry not in first subsection.\n";
1508             }
1509             }
1510 132         814 $xmin++;
1511             } # traverse one subsection for objects xmin through xmin+xnum-1
1512             # go back for next subsection (if any)
1513             } # loop through xref subsections
1514             # fall through to here when run out of xref subsections
1515             # xlist should have two or more object entries, may not be contiguous
1516              
1517             # should have an object 0
1518             # at this point, no idea if object 0 was in first subsection (legal?)
1519             # could attempt a fixup if no object 0 found. many fixups are quite
1520             # risky and could end up corrupting the free list.
1521             # there's no guarantee that a proper free list will result, but any
1522             # error should hopefully be caught further on
1523 18 0 33     54 if (!exists $xlist->{'0'} && !$xrefListEmpty) {
1524             # for now, 1 subsection starting with 1, and only object 1 in
1525             # free list, try to fix up
1526 0 0 0     0 if ($subsection_count == 1 && exists $xlist->{'1'}) {
1527             # apparently a common enough error in PDF writers
1528              
1529 0 0 0     0 if ($xlist->{'1'}[0] == 0 && # only member of free list
      0        
1530             $xlist->{'1'}[1] == 65535 &&
1531             $xlist->{'1'}[2] eq 'f') {
1532             # object 1 appears to be the free list head, so shift
1533             # down all objects
1534 0 0       0 if ($options{'diags'} == 1) {
1535 0         0 warn "xref appears to be mislabeled starting with 1. Shift down all elements.\n";
1536             }
1537 0         0 my $next = 1;
1538 0         0 while (exists $xlist->{$next}) {
1539 0         0 $xlist->{$next - 1} = $xlist->{$next};
1540 0         0 $next++;
1541             }
1542 0         0 delete $xlist->{--$next};
1543              
1544             } else {
1545             # if object 1 does not appear to be a free list head,
1546             # insert a new object 0
1547 0 0       0 if ($options{'diags'} == 1) {
1548 0         0 warn "Xref appears to be missing object 0. Insert a new one.\n";
1549             }
1550 0         0 $xlist->{'0'} = [0, 65535, 'f'];
1551             }
1552             } else {
1553 0 0       0 if ($options{'diags'} == 1) {
1554 0         0 warn "Malformed cross reference list in PDF file $self->{' fname'} -- no object 0 (free list head)\n";
1555             }
1556 0         0 $xlist->{'0'} = [0, 65535, 'f'];
1557             }
1558             } # no object 0 entry
1559              
1560             # build/validate the free list (and no active objects have f flag)
1561 18         30 my @free_list;
1562 18         36 foreach (sort {$a <=> $b} keys %{ $xlist }) {
  248         326  
  18         99  
1563             # if 'f' flag, is in free list
1564 132 100       264 if ($xlist->{$_}[2] eq 'f') {
    50          
1565 18 50 33     73 if ($xlist->{$_}[1] <= 0 && $options{'diags'} == 1) {
1566 0         0 warn "Xref free list entry $_ with bad next generation number.\n";
1567             } else {
1568 18         45 push @free_list, $_; # should be in numeric order (0 first)
1569             }
1570             } elsif ($xlist->{$_}[2] eq 'n') {
1571 114 50 33     257 if ($xlist->{$_}[0] <= 0 && $options{'diags'} == 1) {
1572 0         0 warn "Xref active object $_ entry with bad length ".($xlist->{$_}[1])."\n";
1573             }
1574 114 50 33     239 if ($xlist->{$_}[1] < 0 && $options{'diags'} == 1) {
1575 0         0 warn "Xref active object $_ entry with bad generation number ".($xlist->{$_}[1])."\n";
1576             }
1577             } else {
1578 0 0       0 if ($options{'diags'} == 1) {
1579 0         0 warn "Xref entry has flag that is not 'f' or 'n'.\n";
1580             }
1581             }
1582             } # go through xlist and build free_list and check entries
1583             # traverse free list and check that "next object" is also in free list
1584 18         50 my $next_free = 0; # object 0 should always be in free list
1585 18 50 33     56 if ($xlist->{'0'}[1] != 65535 && $options{'diags'} == 1) {
1586 0         0 warn "Object 0 next generation is not 65535.\n";
1587             }
1588             do {
1589 18 50       85 if ($xlist->{$next_free}[2] ne 'f') {
1590 0 0       0 if ($options{'diags'} == 1) {
1591 0         0 warn "Corrupted free object list: next=$next_free is not a free object.\n";
1592             }
1593 0         0 $next_free = 0; # force end of free list
1594             } else {
1595 18         46 $next_free = $xlist->{$next_free}[0];
1596             }
1597             # remove this entry from free list array
1598 18         126 splice(@free_list, index(@free_list, $next_free), 1);
1599 18   33     27 } while ($next_free && exists $xlist->{$next_free});
1600 18 50 33     77 if (scalar @free_list && $options{'diags'} == 1) {
1601 0         0 warn "Corrupted xref list: object(s) @free_list marked as free, but are not in free chain.\n";
1602             }
1603              
1604             # done with cross reference table, so go on to trailer
1605 18 50 33     121 if ($buf !~ /^\s*trailer\b/i && $options{'diags'} == 1) { #orig 'die'
1606 0         0 warn "Malformed trailer in PDF file $self->{' fname'} at " . ($fh->tell() - length($buf));
1607             }
1608              
1609 18         65 $buf =~ s/^\s*trailer\b//i;
1610              
1611 18         80 ($tdict, $buf) = $self->readval($buf);
1612              
1613             } elsif ($buf =~ m/^(\d+)\s+(\d+)\s+obj/i) {
1614 3         15 my ($xref_obj, $xref_gen) = ($1, $2);
1615 3         17 $PDF::Builder::global_pdf->verCheckOutput(1.5, "importing cross-reference stream");
1616             # XRef streams
1617 3         11 ($tdict, $buf) = $self->readval($buf);
1618              
1619 3 50       11 unless ($tdict->{' stream'}) {
1620 0 0       0 if ($options{'diags'} == 1) {
1621 0         0 warn "Malformed XRefStm at $xref_obj $xref_gen obj in PDF file $self->{' fname'}";
1622             }
1623             }
1624 3         24 $tdict->read_stream(1);
1625              
1626 3         8 my $stream = $tdict->{' stream'};
1627 3         6 my @widths = map { $_->val() } @{$tdict->{'W'}->val()};
  9         17  
  3         15  
1628              
1629 3         6 my $start = 0;
1630 3         6 my $last;
1631              
1632             my @index;
1633 3 100       15 if (defined $tdict->{'Index'}) {
1634 1         2 @index = map { $_->val() } @{$tdict->{'Index'}->val()};
  2         4  
  1         4  
1635             } else {
1636 2         10 @index = (0, $tdict->{'Size'}->val());
1637             }
1638              
1639 3         10 while (scalar @index) {
1640 3         6 $start = shift(@index);
1641 3         10 $last = $start + shift(@index) - 1;
1642              
1643 3         12 for my $i ($start...$last) {
1644             # Replaced "for $xmin" because it creates a loop-specific local
1645             # variable, and we need $xmin to be correct for maxobj below.
1646 26         28 $xmin = $i;
1647              
1648 26         29 my @cols;
1649              
1650 26         34 for my $w (@widths) {
1651 78         81 my $data;
1652 78 50       196 $data = $self->_unpack_xref_stream($w, substr($stream, 0, $w, '')) if $w;
1653              
1654 78         118 push @cols, $data;
1655             }
1656              
1657 26 100       44 $cols[0] = 1 unless defined $cols[0];
1658 26 50 33     56 if ($cols[0] > 2 && $options{'diags'} == 1) {
1659 0         0 warn "Invalid XRefStm entry type ($cols[0]) at $xref_obj $xref_gen obj";
1660             }
1661              
1662 26 50       46 next if exists $xlist->{$xmin};
1663              
1664 26 50       65 my @objind = ($cols[1], defined($cols[2]) ? $cols[2] : ($xmin ? 0 : 65535));
    100          
1665 26 100       55 push @objind, ($cols[0] == 0? 'f': 'n') if $cols[0] < 2;
    100          
1666              
1667 26         68 $xlist->{$xmin} = \@objind;
1668             }
1669             }
1670              
1671             } else { #orig 'die'
1672 0 0       0 if ($options{'diags'} == 1) {
1673 0         0 warn "Malformed xref in PDF file $self->{' fname'}";
1674             }
1675             }
1676              
1677             # did we get to here without managing to set $xmin?
1678 21   50     68 $xmin ||= 0;
1679              
1680 21         63 $tdict->{' loc'} = $xpos;
1681 21         51 $tdict->{' xref'} = $xlist;
1682 21 100       75 $self->{' maxobj'} = $xmin + 1 if $xmin + 1 > $self->{' maxobj'};
1683             $tdict->{' prev'} = $self->readxrtr($tdict->{'Prev'}->val(), %options)
1684 21 100 66     86 if (defined $tdict->{'Prev'} and $tdict->{'Prev'}->val() != 0);
1685 21 100       95 delete $tdict->{' prev'} unless defined $tdict->{' prev'};
1686              
1687 21         69 return $tdict;
1688             } # end of readxrtr()
1689              
1690             =item $p->out_trailer($tdict, $update)
1691              
1692             =item $p->out_trailer($tdict)
1693              
1694             Outputs the body and trailer for a PDF file by outputting all the objects in
1695             the ' outlist' and then outputting a xref table for those objects and any
1696             freed ones. It then outputs the trailing dictionary and the trailer code.
1697              
1698             =cut
1699              
1700             sub out_trailer {
1701 179     179 1 754 my ($self, $tdict, $update) = @_;
1702              
1703 179         410 my $fh = $self->{' OUTFILE'};
1704              
1705 179         301 while (@{$self->{' outlist'}}) {
  363         1160  
1706 184         583 $self->ship_out();
1707             }
1708              
1709 179         781 $tdict->{'Size'} = PDFNum($self->{' maxobj'});
1710              
1711 179         581 my $tloc = $fh->tell();
1712             ## $fh->print("xref\n");
1713             # instead of directly outputting (fh->print) xreflist, we accumulate in @out
1714 179         839 my @out;
1715 179 100       306 my @xreflist = sort { $self->{' objects'}{$a->uid()}[0] <=> $self->{' objects'}{$b->uid()}[0] } (@{$self->{' printed'} || []}, @{$self->{' free'} || []});
  1610 100       3010  
  179         605  
  179         936  
1716              
1717 179         493 my ($i, $j, $k);
1718 179 100       526 unless ($update) {
1719 171         271 $i = 1;
1720 171         493 for ($j = 0; $j < @xreflist; $j++) {
1721 1052         1193 my @inserts;
1722 1052         1372 $k = $xreflist[$j];
1723 1052         1902 while ($i < $self->{' objects'}{$k->uid()}[0]) {
1724 0         0 my ($n) = PDF::Builder::Basic::PDF::Objind->new();
1725 0         0 $self->add_obj($n, $i, 0);
1726 0         0 $self->free_obj($n);
1727 0         0 push(@inserts, $n);
1728 0         0 $i++;
1729             }
1730 1052         1587 splice(@xreflist, $j, 0, @inserts);
1731 1052         1221 $j += @inserts;
1732 1052         1811 $i++;
1733             }
1734             }
1735              
1736 179 100       313 my @freelist = sort { $self->{' objects'}{$a->uid()}[0] <=> $self->{' objects'}{$b->uid()}[0] } @{$self->{' free'} || []};
  0         0  
  179         603  
1737              
1738 179         322 $j = 0; my $first = -1; $k = 0;
  179         295  
  179         312  
1739 179         579 for ($i = 0; $i <= $#xreflist + 1; $i++) {
1740 1253 100 100     3030 if ($i > $#xreflist || $self->{' objects'}{$xreflist[$i]->uid()}[0] != $j + 1) {
1741             ## $fh->print(($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid()}[0] ") . ($i - $first) . "\n");
1742 191 100       841 push @out, ($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid()}[0] ") . ($i - $first) . "\n";
1743 191 100       469 if ($first == -1) {
1744             ## $fh->printf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid()}[0] : 0);
1745 179 50       978 push @out, sprintf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid()}[0] : 0);
1746 179         387 $first = 0;
1747             }
1748 191         523 for ($j = $first; $j < $i; $j++) {
1749 1074         1597 my $xref = $xreflist[$j];
1750 1074 50 33     2246 if (defined $freelist[$k] && defined $xref && "$freelist[$k]" eq "$xref") {
      33        
1751 0         0 $k++;
1752             ## $fh->print(pack("A10AA5A4",
1753             push(@out, pack("A10AA5A4",
1754             sprintf("%010d", (defined $freelist[$k] ?
1755             $self->{' objects'}{$freelist[$k]->uid()}[0] : 0)), " ",
1756 0 0       0 sprintf("%05d", $self->{' objects'}{$xref->uid()}[1] + 1),
1757             " f \n"));
1758             } else {
1759             ## $fh->print(pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid()}), " ",
1760             push(@out, pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid()}), " ",
1761 1074         2325 sprintf("%05d", $self->{' objects'}{$xref->uid()}[1]),
1762             " n \n"));
1763             }
1764             }
1765 191         356 $first = $i;
1766 191 100       648 $j = $self->{' objects'}{$xreflist[$i]->uid()}[0] if ($i < scalar @xreflist);
1767              
1768             } else {
1769 1062         1919 $j++;
1770             }
1771             } # end for loop through xreflists
1772             ## $fh->print("trailer\n");
1773             ## $tdict->outobjdeep($fh, $self);
1774             ## $fh->print("\nstartxref\n$tloc\n%%EOF\n");
1775             ## start new code for 117184 fix by Vadim. @out has array of xref content
1776 179 50 33     713 if (exists $tdict->{'Type'} and $tdict->{'Type'}->val() eq 'XRef') {
1777              
1778 0         0 my (@index, @stream);
1779 0         0 for (@out) { # @out is the accumulated cross reference list
1780 0         0 my @a = split;
1781 0 0       0 @a == 2 ? push @index, @a : push @stream, \@a;
1782             }
1783 0         0 my $i = $self->{' maxobj'}++;
1784 0         0 $self->add_obj($tdict, $i, 0);
1785 0         0 $self->out_obj($tdict);
1786              
1787 0         0 push @index, $i, 1;
1788 0         0 push @stream, [ $tloc, 0, 'n' ];
1789              
1790 0 0       0 my $len = $tloc > 0xFFFF ? 4 : 2; # don't expect files > 4 Gb
1791 0 0       0 my $tpl = $tloc > 0xFFFF ? 'CNC' : 'CnC'; # don't expect gennum > 255, it's absurd.
1792             # Adobe doesn't use them anymore anyway
1793 0         0 my $sstream = '';
1794 0         0 my @prev = ( 0 ) x ( $len + 2 ); # init prev to all 0's
1795 0         0 for (@stream) {
1796             # OK to zero out gennum of 65535 for a cross reference stream,
1797             # rather than just truncating to 255 -- Vadim
1798 0 0 0     0 $_->[ 1 ] = 0 if $_->[ 1 ] == 65535 and
1799             $_->[ 2 ] eq 'f';
1800             # make sure is 0..255, since will pack with 'C' code -- Phil
1801 0 0       0 if ($_->[1] > 0xFF) {
1802 0         0 print "generation number ".($_->[1])." in entry '$_->[0] $_->[1] $_->[2]' exceeds 256, reduced to ".($_->[1] & 0x00FF)."\n";
1803             }
1804 0         0 $_->[ 1 ] &= 0x00FF;
1805 0 0       0 my @line = unpack 'C*', pack $tpl, $_->[ 2 ] eq 'n'? 1 : 0, @{ $_ }[ 0 .. 1 ];
  0         0  
1806              
1807             $sstream .= pack 'C*', 2, # prepend filtering method, "PNG Up"
1808 0         0 map {($line[ $_ ] - $prev[ $_ ] + 256) % 256} 0 .. $#line;
  0         0  
1809 0         0 @prev = @line;
1810             }
1811             # build a dictionary for the cross reference stream
1812 0         0 $tdict->{'Size'} = PDFNum($i + 1);
1813 0         0 $tdict->{'Index'} = PDFArray(map { PDFNum($_) } @index);
  0         0  
1814 0         0 $tdict->{'W'} = PDFArray(map { PDFNum($_) } 1, $len, 1);
  0         0  
1815 0         0 $tdict->{'Filter'} = PDFName('FlateDecode');
1816              
1817             # it's compressed
1818 0         0 $tdict->{'DecodeParms'} = PDFDict();
1819 0         0 $tdict->{'DecodeParms'}->val()->{'Predictor'} = PDFNum(12);
1820 0         0 $tdict->{'DecodeParms'}->val()->{'Columns'} = PDFNum($len + 2);
1821              
1822 0         0 $sstream = PDF::Builder::Basic::PDF::Filter::FlateDecode->new()->outfilt($sstream, 1);
1823 0         0 $tdict->{' stream'} = $sstream;
1824 0         0 $tdict->{' nofilt'} = 1;
1825 0         0 delete $tdict->{'Length'};
1826 0         0 $self->ship_out();
1827             } else {
1828             # delete may be moved later by Vadim closer to where XRefStm created
1829 179         344 delete $tdict->{'XRefStm'};
1830             # almost the original code
1831 179         706 $fh->print("xref\n", @out, "trailer\n");
1832 179         1682 $tdict->outobjdeep($fh, $self);
1833 179         461 $fh->print("\n");
1834             }
1835 179         1348 $fh->print("startxref\n$tloc\n%%EOF\n");
1836             ## end of new code
1837              
1838 179         1207 return;
1839             } # end of out_trailer()
1840              
1841             =item PDF::Builder::Basic::PDF::File->_new()
1842              
1843             Creates a very empty PDF file object (used by new() and open())
1844              
1845             =cut
1846              
1847             sub _new {
1848 235     235   480 my $class = shift();
1849 235         463 my $self = {};
1850              
1851 235         492 bless $self, $class;
1852 235         788 $self->{' outlist'} = [];
1853 235         576 $self->{' outlist_cache'} = {}; # A cache of what's in the 'outlist'
1854 235         501 $self->{' maxobj'} = 1;
1855 235         511 $self->{' objcache'} = {};
1856 235         618 $self->{' objects'} = {};
1857              
1858 235         505 return $self;
1859             }
1860              
1861             1;
1862              
1863             =back
1864              
1865             =head1 AUTHOR
1866              
1867             Martin Hosken Martin_Hosken@sil.org
1868              
1869             Copyright Martin Hosken 1999 and onwards
1870              
1871             No warranty or expression of effectiveness, least of all regarding anyone's
1872             safety, is implied in this software or documentation.