File Coverage

blib/lib/PDF/API3/Compat/API2/Basic/PDF/File.pm
Criterion Covered Total %
statement 63 620 10.1
branch 0 212 0.0
condition 0 84 0.0
subroutine 18 45 40.0
pod 22 26 84.6
total 103 987 10.4


line stmt bran cond sub pod time code
1             #=======================================================================
2             # ____ ____ _____ _ ____ ___ ____
3             # | _ \| _ \| ___| _ _ / \ | _ \_ _| |___ \
4             # | |_) | | | | |_ (_) (_) / _ \ | |_) | | __) |
5             # | __/| |_| | _| _ _ / ___ \| __/| | / __/
6             # |_| |____/|_| (_) (_) /_/ \_\_| |___| |_____|
7             #
8             # A Perl Module Chain o faciliae he Creaion and Modificaion
9             # of High-Quality "Portable Document Format (PDF)" Files.
10             #
11             #=======================================================================
12             #
13             # THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
14             #
15             #
16             # Copyright Martin Hosken
17             #
18             # Modified for PDF::API3::Compat::API2 by Alfred Reibenschuh
19             #
20             # No warranty or expression of effectiveness, least of all regarding
21             # anyone's safety, is implied in this software or documentation.
22             #
23             # This specific module is licensed under the Perl Artistic License.
24             #
25             #
26             # $Id: File.pm,v 2.9 2007/11/03 20:31:53 areibens Exp $
27             #
28             #=======================================================================
29             package PDF::API3::Compat::API2::Basic::PDF::File;
30            
31             =head1 NAME
32            
33             PDF::API3::Compat::API2::Basic::PDF::File - Holds the trailers and cross-reference tables for a PDF file
34            
35             =head1 SYNOPSIS
36            
37             $p = PDF::API3::Compat::API2::Basic::PDF::File->open("filename.pdf", 1);
38             $p->new_obj($obj_ref);
39             $p->free_obj($obj_ref);
40             $p->append_file;
41             $p->close_file;
42             $p->release; # IMPORTANT!
43            
44             =head1 DESCRIPTION
45            
46             This class keeps track of the directory aspects of a PDF file. There are two
47             parts to the directory: the main directory object which is the parent to all
48             other objects and a chain of cross-reference tables and corresponding trailer
49             dictionaries starting with the main directory object.
50            
51             =head1 INSTANCE VARIABLES
52            
53             Within this class hierarchy, rather than making everything visible via methods,
54             which would be a lot of work, there are various instance variables which are
55             accessible via associative array referencing. To distinguish instance variables
56             from content variables (which may come from the PDF content itself), each such
57             variable will start with a space.
58            
59             Variables which do not start with a space directly reflect elements in a PDF
60             dictionary. In the case of a PDF::API3::Compat::API2::Basic::PDF::File, the elements reflect those in the
61             trailer dictionary.
62            
63             Since some variables are not designed for class users to access, variables are
64             marked in the documentation with (R) to indicate that such an entry should only
65             be used as read-only information. (P) indicates that the information is private
66             and not designed for user use at all, but is included in the documentation for
67             completeness and to ensure that nobody else tries to use it.
68            
69             =over
70            
71             =item newroot
72            
73             This variable allows the user to create a new root entry to occur in the trailer
74             dictionary which is output when the file is written or appended. If you wish to
75             over-ride the root element in the dictionary you have, use this entry to indicate
76             that without losing the current Root entry. Notice that newroot should point to
77             a PDF level object and not just to a dictionary which does not have object status.
78            
79             =item INFILE (R)
80            
81             Contains the filehandle used to read this information into this PDF directory. Is
82             an IO object.
83            
84             =item fname (R)
85            
86             This is the filename which is reflected by INFILE, or the original IO object passed
87             in.
88            
89             =item update (R)
90            
91             This indicates that the read file has been opened for update and that at some
92             point, $p->appendfile() can be called to update the file with the changes that
93             have been made to the memory representation.
94            
95             =item maxobj (R)
96            
97             Contains the first useable object number above any that have already appeared
98             in the file so far.
99            
100             =item outlist (P)
101            
102             This is a list of Objind which are to be output when the next appendfile or outfile
103             occurs.
104            
105             =item firstfree (P)
106            
107             Contains the first free object in the free object list. Free objects are removed
108             from the front of the list and added to the end.
109            
110             =item lastfree (P)
111            
112             Contains the last free object in the free list. It may be the same as the firstfree
113             if there is only one free object.
114            
115             =item objcache (P)
116            
117             All objects are held in the cache to ensure that a system only has one occurrence of
118             each object. In effect, the objind class acts as a container type class to hold the
119             PDF object structure and it would be unfortunate if there were two identical
120             place-holders floating around a system.
121            
122             =item epos (P)
123            
124             The end location of the read-file.
125            
126             =back
127            
128             Each trailer dictionary contains a number of private instance variables which
129             hold the chain together.
130            
131             =over
132            
133             =item loc (P)
134            
135             Contains the location of the start of the cross-reference table preceding the
136             trailer.
137            
138             =item xref (P)
139            
140             Contains an anonymous array of each cross-reference table entry.
141            
142             =item prev (P)
143            
144             A reference to the previous table. Note this differs from the Prev entry which
145             is in PDF which contains the location of the previous cross-reference table.
146            
147             =back
148            
149             =head1 METHODS
150            
151             =cut
152            
153            
154             BEGIN
155             {
156 1     1   6 use strict;
  1         2  
  1         41  
157 1     1   12 no strict "refs";
  1         2  
  1         33  
158 1     1   5 use vars qw($cr $irreg_char $reg_char $ws_char $delim_char %types $VERSION);
  1         3  
  1         120  
159             # no warnings qw(uninitialized);
160 1     1   5 use strict;
  1         1  
  1         168  
161            
162 1     1   2 my ($t, $type);
163            
164 1         2 $ws_char = '[ \t\r\n\f\0]';
165 1         2 $delim_char = '[][<>{}()/%]';
166 1         1 $reg_char = '[^][<>{}()/% \t\r\n\f\0]';
167 1         2 $irreg_char = '[][<>{}()/% \t\r\n\f\0]';
168             ## $cr = "$ws_char*(?:\015|\012|(?:\015\012))";
169 1         1 $cr = '\s*(?:\015|\012|(?:\015\012))';
170 1         6 %types = (
171             'Page' => 'PDF::API3::Compat::API2::Basic::PDF::Page',
172             'Pages' => 'PDF::API3::Compat::API2::Basic::PDF::Pages'
173             );
174            
175 1         10 foreach $type (keys %types)
176             {
177 2         6 $t = $types{$type};
178 2         12 $t =~ s|::|/|og;
179 2         790 require "$t.pm";
180             }
181 1         22 $PDF::API3::Compat::API2::Basic::PDF::File::readDebug=0;
182             }
183            
184            
185            
186 1     1   7 use IO::File;
  1         1  
  1         171  
187            
188             # Now for the basic PDF types
189 1     1   5 use PDF::API3::Compat::API2::Basic::PDF::Utils;
  1         1  
  1         87  
190            
191 1     1   4 use PDF::API3::Compat::API2::Basic::PDF::Array;
  1         2  
  1         20  
192 1     1   14 use PDF::API3::Compat::API2::Basic::PDF::Bool;
  1         3  
  1         20  
193 1     1   6 use PDF::API3::Compat::API2::Basic::PDF::Dict;
  1         3  
  1         18  
194 1     1   5 use PDF::API3::Compat::API2::Basic::PDF::Name;
  1         1  
  1         39  
195 1     1   7 use PDF::API3::Compat::API2::Basic::PDF::Number;
  1         2  
  1         22  
196 1     1   5 use PDF::API3::Compat::API2::Basic::PDF::Objind;
  1         2  
  1         15  
197 1     1   7 use PDF::API3::Compat::API2::Basic::PDF::String;
  1         1  
  1         25  
198 1     1   5 use PDF::API3::Compat::API2::Basic::PDF::Page;
  1         2  
  1         32  
199 1     1   5 use PDF::API3::Compat::API2::Basic::PDF::Pages;
  1         2  
  1         19  
200 1     1   6 use PDF::API3::Compat::API2::Basic::PDF::Null;
  1         1  
  1         31  
201            
202 1     1   5 no warnings qw[ deprecated recursion uninitialized ];
  1         1  
  1         8336  
203            
204             ( $VERSION ) = sprintf '%i.%03i', split(/\./,('$Revision: 2.9 $' =~ /Revision: (\S+)\s/)[0]); # $Date: 2007/11/03 20:31:53 $
205            
206             #IMPORTED INTO PDF::API3::Compat::API2
207            
208             #$VERSION = "0.23"; # MJPH 14-AUG-2002 Fix MANIFEST
209             #$VERSION = "0.22"; # MJPH 26-JUL-2002 Add PDF::API3::Compat::API2::Basic::PDF::File::copy, tidy up update(), sort out out_trailer
210             # Fix to not remove string final CRs when reading dictionaries
211             #$VERSION = "0.21"; # GJ 8-JUN-2002 Tidy up regexps, add PDF::API3::Compat::API2::Basic::PDF::Null
212             #$VERSION = "0.20"; # MJPH 27-APR-2002 $trailer->{'Size'} becomes max num objects, fix line end problem,
213             # remove warnings, update release code
214             #$VERSION = "0.19"; # MJPH 5-FEB-2002 fix hex keys and ASCII85 filter
215             #$VERSION = "0.18"; # MJPH 1-DEC-2001 add encryption hooks
216             #$VERSION = "0.17"; # GST 18-JUL-2001 Handle \) in strings and tidy up endobj handling, no uninitialized warnings
217             #$VERSION = "0.16"; # GST 18-JUL-2001 Major performance tweaks
218             #$VERSION = "0.15"; # GST 30-MAY-2001 Memory leaks fixed
219             #$VERSION = "0.14"; # MJPH 2-MAY-2001 More little bug fixes, added read_objnum
220             #$VERSION = "0.13"; # MJPH 23-MAR-2001 General bug fix release
221             #$VERSION = "0.12"; # MJPH 29-JUL-2000 Add font subsetting, random page insertion
222             #$VERSION = "0.11"; # MJPH 18-JUL-2000 Add pdfstamp.plx and more debugging
223             #$VERSION = "0.10"; # MJPH 27-JUN-2000 Tidy up some bugs - names
224             #$VERSION = "0.09"; # MJPH 31-MAR-2000 Copy trailer dictionary properly
225             #$VERSION = "0.08"; # MJPH 07-FEB-2000 Add null element
226             #$VERSION = "0.07"; # MJPH 01-DEC-1999 Debug for pdfbklt
227             #$VERSION = "0.06"; # MJPH 11-SEP-1999 Sort out unixisms
228             #$VERSION = "0.05"; # MJPH 9-SEP-1999 Add ship_out
229             #$VERSION = "0.04"; # MJPH 14-JUL-1999 Correct paths for tarball release
230             #$VERSION = "0.03"; # MJPH 14-JUL-1999 Correct paths for tarball release
231             #$VERSION = "0.02"; # MJPH 30-JUN-1999 Transfer from old library
232            
233            
234             =head2 PDF::API3::Compat::API2::Basic::PDF::File->new
235            
236             Creates a new, empty file object which can act as the host to other PDF objects.
237             Since there is no file associated with this object, it is assumed that the
238             object is created in readiness for creating a new PDF file.
239            
240             =cut
241            
242             sub new
243             {
244 0     0 1   my ($class, $root) = @_;
245 0           my ($self) = $class->_new;
246            
247 0 0         unless ($root)
248             {
249 0           $root = PDFDict();
250 0           $root->{'Type'} = PDFName("Catalog");
251             }
252 0           $self->new_obj($root);
253 0           $self->{'Root'} = $root;
254 0           $self;
255             }
256            
257            
258             =head2 $p = PDF::API3::Compat::API2::Basic::PDF::File->open($filename, $update)
259            
260             Opens the file and reads all the trailers and cross reference tables to build
261             a complete directory of objects.
262            
263             $update specifies whether this file is being opened for updating and editing,
264             or simply to be read.
265            
266             $filename may be an IO object
267            
268             =cut
269            
270             sub open
271             {
272 0     0 1   my ($class, $fname, $update) = @_;
273 0           my ($self, $buf, $xpos, $end, $tdict, $k);
274 0           my ($fh);
275            
276 0           $self = $class->_new;
277 0 0         if (ref $fname)
278             {
279 0           $self->{' INFILE'} = $fname;
280 0 0         if ($update)
281             {
282 0           $self->{' update'} = 1;
283 0           $self->{' OUTFILE'} = $fname;
284             }
285 0           $fh = $fname;
286             }
287             else
288             {
289 0 0         die "File '$fname' does not exist !" unless(-f $fname);
290 0   0       $fh = IO::File->new(($update ? "+" : "") . "<$fname") || return undef;
291 0           $self->{' INFILE'} = $fh;
292 0 0         if ($update)
293             {
294 0           $self->{' update'} = 1;
295 0           $self->{' OUTFILE'} = $fh;
296 0           $self->{' fname'} = $fname;
297             }
298             }
299 0           binmode($fh,':raw');
300 0           $fh->seek(0, 0); # go to start of file
301 0           $fh->read($buf, 255);
302 0 0         if ($buf !~ m/^\%PDF\-1\.\d+\s*$cr/mo)
303 0           { die "$fname not a PDF file version 1.x"; }
304            
305 0           $fh->seek(0, 2); # go to end of file
306 0           $end = $fh->tell();
307 0           $self->{' epos'} = $end;
308 0           foreach my $eoff (1..64)
309             {
310 0           $fh->seek($end - 16*$eoff, 0);
311 0           $fh->read($buf, 16*$eoff);
312 0 0         if ($buf =~ m/startxref($cr|\s*)\d+($cr|\s*)\%\%eof.*?/oi)
313             {
314 0           last;
315             }
316             }
317 0 0         if ($buf !~ m/startxref[^\d]+([0-9]+)($cr|\s*)\%\%eof.*?/oi)
318 0           { die "Malformed PDF file $fname"; }
319 0           $xpos = $1;
320            
321 0           $tdict = $self->readxrtr($xpos, $self);
322 0           foreach $k (keys %{$tdict})
  0            
323 0           { $self->{$k} = $tdict->{$k}; }
324 0           return $self;
325             }
326            
327             =head2 $p->release()
328            
329             Releases ALL of the memory used by the PDF document and all of its component
330             objects. After calling this method, do B expect to have anything left in
331             the C object (so if you need to save, be sure to do it before
332             calling this method).
333            
334             B, that it is important that you call this method on any
335             C object when you wish to destruct it and free up its memory.
336             Internally, PDF files have an enormous number of cross-references and this
337             causes circular references within the internal data structures. Calling
338             'C' forces a brute-force cleanup of the data structures, freeing up
339             all of the memory. Once you've called this method, though, don't expect to be
340             able to do anything else with the C object; it'll have B
341             internal state whatsoever.
342            
343             B As part of the brute-force cleanup done here, this method
344             will throw a warning message whenever unexpected key values are found within
345             the C object. This is done to help ensure that any unexpected
346             and unfreed values are brought to your attention so that you can bug us to keep
347             the module updated properly; otherwise the potential for memory leaks due to
348             dangling circular references will exist.
349            
350             =cut
351            
352             sub release
353             {
354 0     0 1   my ($self,$explicitDestruct) = @_;
355            
356 0 0         return($self) unless(ref $self);
357 0           my @tofree = values %{$self};
  0            
358 0           map { $self->{$_}=undef; delete $self->{$_}; } keys %{$self};
  0            
  0            
  0            
359 0           while (my $item = shift @tofree)
360             {
361 0 0         if (UNIVERSAL::can($item,'release'))
    0          
    0          
362             {
363 0           $item->release(1);
364             }
365             elsif (ref($item) eq 'ARRAY')
366             {
367 0           push( @tofree, @{$item} );
  0            
368             }
369             elsif (ref($item) eq 'HASH')
370             {
371 0           push( @tofree, values %{$item} );
  0            
372 0           map { $item->{$_}=undef; delete $item->{$_}; } keys %{$item};
  0            
  0            
  0            
373             }
374             else
375             {
376 0           $item=undef;
377             }
378             }
379             }
380            
381             =head2 $p->append_file()
382            
383             Appends the objects for output to the read file and then appends the appropriate tale.
384            
385             =cut
386            
387             sub append_file
388             {
389 0     0 1   my ($self) = @_;
390 0           my ($tdict, $fh, $t, $buf, $ver);
391            
392 0 0         return undef unless ($self->{' update'});
393            
394 0           $fh = $self->{' INFILE'};
395            
396             # hack to upgrade pdf-version number to support
397             # requested features in higher versions that
398             # the pdf was originally created.
399 0   0       $ver=$self->{' version'} || 4;
400 0           $fh->seek(0,0);
401 0           $fh->print("%PDF-1.$ver\n");
402            
403 0           $tdict = PDFDict();
404 0           $tdict->{'Prev'} = PDFNum($self->{' loc'});
405 0           $tdict->{'Info'} = $self->{'Info'};
406 0 0         if (defined $self->{' newroot'})
407 0           { $tdict->{'Root'} = $self->{' newroot'}; }
408             else
409 0           { $tdict->{'Root'} = $self->{'Root'}; }
410 0           $tdict->{'Size'} = $self->{'Size'};
411            
412 0           foreach $t (grep ($_ !~ m/^\s/o, keys %$self))
413 0 0         { $tdict->{$t} = $self->{$t} unless defined $tdict->{$t}; }
414            
415 0           $fh->seek($self->{' epos'}, 0);
416 0           $self->out_trailer($tdict,$self->{' update'});
417 0           close($self->{' OUTFILE'});
418             }
419            
420            
421             =head2 $p->out_file($fname)
422            
423             Writes a PDF file to a file of the given filename based on the current list of
424             objects to be output. It creates the trailer dictionary based on information
425             in $self.
426            
427             $fname may be an IO object;
428            
429             =cut
430            
431             sub out_file
432             {
433 0     0 1   my ($self, $fname) = @_;
434            
435 0           $self->create_file($fname);
436 0           $self->close_file;
437 0           $self;
438             }
439            
440            
441             =head2 $p->create_file($fname)
442            
443             Creates a new output file (no check is made of an existing open file) of
444             the given filename or IO object. Note, make sure that $p->{' version'} is set
445             correctly before calling this function.
446            
447             =cut
448            
449             sub create_file
450             {
451 0     0 1   my ($self, $fname) = @_;
452 0           my ($fh);
453            
454 0           $self->{' fname'} = $fname;
455 0 0         if (ref $fname)
456 0           { $fh = $fname; }
457             else
458             {
459 0   0       $fh = IO::File->new(">$fname") || die "Unable to open $fname for writing";
460 0           binmode($fh,':raw');
461             }
462            
463 0           $self->{' OUTFILE'} = $fh;
464 0   0       $fh->print('%PDF-1.' . ($self->{' version'} || '2') . "\n");
465 0           $fh->print("%\xC7\xEC\xF3\xA2\n"); # and some binary stuff in a comment
466 0           $self;
467             }
468            
469            
470             =head2 $p->close_file
471            
472             Closes up the open file for output by outputting the trailer etc.
473            
474             =cut
475            
476             sub close_file
477             {
478 0     0 1   my ($self) = @_;
479 0           my ($fh, $tdict, $t);
480            
481 0           $tdict = PDFDict();
482 0 0         $tdict->{'Info'} = $self->{'Info'} if defined $self->{'Info'};
483 0 0 0       $tdict->{'Root'} = (defined $self->{' newroot'} and $self->{' newroot'} ne "") ? $self->{' newroot'} : $self->{'Root'};
484            
485             # remove all freed objects from the outlist, AND the outlist_cache if not updating
486             # NO! Don't do that thing! In fact, let out_trailer do the opposite!
487            
488            
489 0   0       $tdict->{'Size'} = $self->{'Size'} || PDFNum(1);
490 0 0         $tdict->{'Prev'} = PDFNum($self->{' loc'}) if ($self->{' loc'});
491 0 0         if ($self->{' update'})
492             {
493 0           foreach $t (grep ($_ !~ m/^[\s\-]/o, keys %$self))
494 0 0         { $tdict->{$t} = $self->{$t} unless defined $tdict->{$t}; }
495            
496 0           $fh = $self->{' INFILE'};
497 0           $fh->seek($self->{' epos'}, 0);
498             }
499            
500 0           $self->out_trailer($tdict, $self->{' update'});
501 0           close($self->{' OUTFILE'});
502 0 0 0       MacPerl::SetFileInfo("CARO", "TEXT", $self->{' fname'})
503             if ($^O eq "MacOS" && !ref($self->{' fname'}));
504 0           $self;
505             }
506            
507             =head2 ($value, $str) = $p->readval($str, %opts)
508            
509             Reads a PDF value from the current position in the file. If $str is too short
510             then read some more from the current location in the file until the whole object
511             is read. This is a recursive call which may slurp in a whole big stream (unprocessed).
512            
513             Returns the recursive data structure read and also the current $str that has been
514             read from the file.
515            
516             =cut
517            
518             sub readval
519             {
520 0     0 1   my ($self, $str, %opts) = @_;
521 0           my ($fh) = $self->{' INFILE'};
522 0           my ($res, $key, $value, $k);
523            
524 0           $str = update($fh, $str);
525             ## updateRef($fh, \$str);
526            
527 0 0         if ($str =~ m/^<
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
528             {
529 0           $str = substr ($str, 2);
530 0           $str = update($fh, $str);
531             ## updateRef($fh, \$str);
532 0           $res = PDFDict();
533            
534 0           while ($str !~ m/^>>/o) {
535 0 0         if ($str =~ s|^/($reg_char+)||o)
    0          
    0          
536             {
537 0           $k = PDF::API3::Compat::API2::Basic::PDF::Name::name_to_string ($1, $self);
538 0           ($value, $str) = $self->readval($str, %opts);
539 0           $res->{$k} = $value;
540             }
541             elsif($str =~ s|^/$ws_char+||o)
542             {
543             # fixes a broken key problem of acrobat. -- fredo
544 0           ($value, $str) = $self->readval($str, %opts);
545 0           $res->{null} = $value;
546             }
547             elsif($str =~ s|^//|/|o)
548             {
549             # fixes again a broken key problem of illustrator/enfocus. -- fredo
550 0           ($value, $str) = $self->readval($str, %opts);
551 0           $res->{null} = $value;
552             }
553 0           $str = update($fh, $str); # thanks gareth.jones@stud.man.ac.uk
554             ## updateRef($fh, \$str); # thanks gareth.jones@stud.man.ac.uk
555             }
556 0           $str =~ s/^>>//o;
557 0           $str = update($fh, $str);
558             ## updateRef($fh, \$str);
559             # streams can't be followed by a lone carriage-return.
560             # fredo: yes they can !!! -- use the MacOS Luke.
561 0 0 0       if (($str =~ s/^stream(?:(?:\015\012)|\012|\015)//o)
562             && ($res->{'Length'}->val != 0)) # stream
563             {
564 0           $k = $res->{'Length'}->val;
565 0           $res->{' streamsrc'} = $fh;
566 0           $res->{' streamloc'} = $fh->tell - length($str);
567 0 0         unless ($opts{'nostreams'})
568             {
569 0 0         if ($k > length($str))
570             {
571 0           $value = $str;
572 0           $k -= length($str);
573 0           read ($fh, $str, $k + 11); # slurp the whole stream!
574             } else
575 0           { $value = ''; }
576 0           $value .= substr($str, 0, $k);
577 0           $res->{' stream'} = $value;
578 0           $res->{' nofilt'} = 1;
579 0           $str = update($fh, $str, 1); # tell update we are in-stream and only need an endstream
580             ## updateRef($fh, \$str, 1); # tell update we are in-stream and only need an endstream
581             #$str =~ s/^endstream//o; # we cannot regexpr here since we need the first endstream only
582             # so we do the following:
583 0           my $wh = index($str,'endstream');
584 0           $str = substr($str,$wh+9);
585             }
586             }
587            
588 0 0 0       bless $res, $types{$res->{'Type'}->val}
589             if (defined $res->{'Type'} && defined $types{$res->{'Type'}->val});
590             # gdj: FIXME: if any of the ws chars were crs, then the whole
591             # string might not have been read.
592             }
593             elsif ($str =~ m/^([0-9]+)$ws_char+([0-9]+)$ws_char+R/so) # objind
594             {
595 0           $k = $1;
596 0           $value = $2;
597 0           $str =~ s/^([0-9]+)$ws_char+([0-9]+)$ws_char+R//so;
598 0 0         unless ($res = $self->test_obj($k, $value))
599             {
600 0           $res = PDF::API3::Compat::API2::Basic::PDF::Objind->new();
601 0           $res->{' objnum'} = $k;
602 0           $res->{' objgen'} = $value;
603 0           $self->add_obj($res, $k, $value);
604             }
605 0           $res->{' parent'} = $self;
606 0           $res->{' realised'} = 0;
607             # gdj: FIXME: if any of the ws chars were crs, then the whole
608             # string might not have been read.
609             }
610             elsif ($str =~ m/^([0-9]+)$ws_char+([0-9]+)$ws_char+obj/so) # object data
611             {
612 0           my ($obj);
613 0           $k = $1;
614 0           $value = $2;
615 0           $str =~ s/^([0-9]+)$ws_char+([0-9]+)$ws_char+obj//so;
616 0           ($obj, $str) = $self->readval($str, %opts, 'objnum' => $k, 'objgen' => $value);
617 0 0         if ($res = $self->test_obj($k, $value))
618 0           { $res->merge($obj); }
619             else
620             {
621 0           $res = $obj;
622 0           $self->add_obj($res, $k, $value);
623 0           $res->{' realised'} = 1;
624             }
625 0           $str = update($fh, $str); # thanks to kundrat@kundrat.sk
626             ## updateRef($fh, \$str); # thanks to kundrat@kundrat.sk
627 0           $str =~ s/^endobj//o;
628             } elsif ($str =~ m|^/($reg_char+)|so) # name
629             {
630             # " <- Fix colourization
631 0           $value = $1;
632 0           $str =~ s|^/($reg_char+)||so;
633 0           $res = PDF::API3::Compat::API2::Basic::PDF::Name->from_pdf($value, $self);
634             }
635             elsif ($str =~ m/^\(/o) # literal string
636             {
637 0           $str =~ s/^\(//o;
638             # We now need to find an unbalanced, unescaped right-paren.
639             # This can't be done with regexps.
640 0           my ($value) = "";
641             # The current level of nesting, when this reaches 0 we have finished.
642 0           my ($nested) = 1;
643 0           while (1) {
644             # Remove everything up to the first (possibly escaped) paren.
645 0           $str =~ /^((?:[^\\()]|\\[^()])*)(.*)/so;
646 0           $value .= $1;
647 0           $str = $2;
648            
649 0 0         if ($str =~ /^(\\[()])/o)
    0          
    0          
650             {
651             # An escaped paren. This would be tricky to do with
652             # the regexp above (it's very difficult to be certain
653             # that all cases are covered so I think it's better to
654             # deal with them explicitly).
655 0           $str = substr ($str, 2);
656 0           $value = $value . $1;
657             }
658             elsif ($str =~ /^\)/o)
659             {
660             # Right paren
661 0           $nested--;
662 0           $str = substr ($str, 1);
663 0 0         if ($nested == 0)
664 0           { last; }
665 0           $value = $value . ')';
666             }
667             elsif ($str =~ /^\(/o)
668             {
669             # Left paren
670 0           $nested++;
671 0           $str = substr ($str, 1);
672 0           $value = $value . '(';
673             }
674             else
675             {
676             # No parens, we must read more. We don't use update
677             # because we don't want to remove whitespace or
678             # comments.
679 0 0         $fh->read($str, 255, length($str)) or die "Unterminated string.";
680             }
681             }
682            
683 0           $res = PDF::API3::Compat::API2::Basic::PDF::String->from_pdf($value);
684             }
685             elsif ($str =~ m/^
686             {
687 0           $str =~ s/^
688 0           $fh->read($str, 255, length($str)) while (0 > index( $str, '>' ));
689 0           ($value, $str) = ($str =~ /^(.*?)>(.*?)$/so);
690 0           $res = PDF::API3::Compat::API2::Basic::PDF::String->from_pdf("<" . $value . ">");
691             }
692             elsif ($str =~ m/^\[/o) # array
693             {
694 0           $str =~ s/^\[//o;
695 0           $str = update($fh, $str);
696             ## updateRef($fh, \$str);
697 0           $res = PDFArray();
698 0           while ($str !~ m/^\]/o)
699             {
700 0           ($value, $str) = $self->readval($str, %opts);
701 0           $res->add_elements($value);
702 0           $str = update($fh, $str); # str might just be exhausted!
703             ## updateRef($fh, \$str); # str might just be exhausted!
704             }
705 0           $str =~ s/^\]//o;
706             }
707             elsif ($str =~ m/^(true|false)$irreg_char/o) # boolean
708             {
709 0           $value = $1;
710 0           $str =~ s/^(?:true|false)//o;
711 0           $res = PDF::API3::Compat::API2::Basic::PDF::Bool->from_pdf($value);
712             }
713             elsif ($str =~ m/^([+-.0-9]+)$irreg_char/o) # number
714             {
715 0           $value = $1;
716 0           $str =~ s/^([+-.0-9]+)//o;
717 0           $res = PDF::API3::Compat::API2::Basic::PDF::Number->from_pdf($value);
718             }
719             elsif ($str =~ m/^null$irreg_char/o)
720             {
721 0           $str =~ s/^null//o;
722 0           $res = PDF::API3::Compat::API2::Basic::PDF::Null->new;
723             }
724             else
725             {
726             # finally give up
727 0           die "Can't parse `$str' near " . ($fh->tell()) . " length " . length($str) . ".";
728             }
729            
730 0           $str =~ s/^$ws_char*//os;
731 0           return ($res, $str);
732             }
733            
734            
735             =head2 $ref = $p->read_obj($objind, %opts)
736            
737             Given an indirect object reference, locate it and read the object returning
738             the read in object.
739            
740             =cut
741            
742             sub read_obj
743             {
744 0     0 1   my ($self, $objind, %opts) = @_;
745 0           my ($loc, $res, $str, $oldloc);
746            
747             # return ($objind) if $self->{' objects'}{$objind->uid};
748 0   0       $res = $self->read_objnum($objind->{' objnum'}, $objind->{' objgen'}, %opts) || return undef;
749 0 0         $objind->merge($res) unless ($objind eq $res);
750 0           return $objind;
751             }
752            
753            
754             =head2 $ref = $p->read_objnum($num, $gen, %opts)
755            
756             Returns a fully read object of given number and generation in this file
757            
758             =cut
759            
760             sub read_objnum
761             {
762 0     0 1   my ($self, $num, $gen, %opts) = @_;
763 0           my ($res, $loc, $str, $oldloc);
764            
765 0   0       $loc = $self->locate_obj($num, $gen) || return undef;
766 0           $oldloc = $self->{' INFILE'}->tell;
767 0           $self->{' INFILE'}->seek($loc, 0);
768 0           ($res, $str) = $self->readval('', %opts, 'objnum' => $num, 'objgen' => $gen);
769 0           $self->{' INFILE'}->seek($oldloc, 0);
770 0           $res;
771             }
772            
773            
774             =head2 $objind = $p->new_obj($obj)
775            
776             Creates a new, free object reference based on free space in the cross reference chain.
777             If nothing free then thinks up a new number. If $obj then turns that object into this
778             new object rather than returning a new object.
779            
780             =cut
781            
782             sub new_obj
783             {
784 0     0 1   my ($self, $base) = @_;
785 0           my ($res);
786 0           my ($tdict, $i, $ni, $ng);
787            
788 0 0 0       if (defined $self->{' free'} and scalar @{$self->{' free'}} > 0)
  0            
789             {
790 0           $res = shift(@{$self->{' free'}});
  0            
791 0 0         if (defined $base)
792             {
793 0           my ($num, $gen) = @{$self->{' objects'}{$res->uid}};
  0            
794 0           $self->remove_obj($res);
795 0           $self->add_obj($base, $num, $gen);
796 0           return $self->out_obj($base);
797             }
798             else
799             {
800 0           $self->{' objects'}{$res->uid}[2] = 0;
801 0           return $res;
802             }
803             }
804            
805 0           $tdict = $self;
806 0           while (defined $tdict)
807             {
808 0 0         $i = $tdict->{' xref'}{defined($i)?$i:''}[0];
809 0   0       while (defined $i and $i != 0)
810             {
811 0           ($ni, $ng) = @{$tdict->{' xref'}{$i}};
  0            
812 0 0         if (!defined $self->locate_obj($i, $ng))
813             {
814 0 0         if (defined $base)
815             {
816 0           $self->add_obj($base, $i, $ng);
817 0           return $base;
818             }
819             else
820             {
821 0   0       $res = $self->test_obj($i, $ng)
822             || $self->add_obj(PDF::API3::Compat::API2::Basic::PDF::Objind->new(), $i, $ng);
823 0           $tdict->{' xref'}{$i}[0] = $tdict->{' xref'}{$i}[0];
824 0           $self->out_obj($res);
825 0           return $res;
826             }
827             }
828 0           $i = $ni;
829             }
830 0           $tdict = $tdict->{' prev'};
831             }
832            
833 0           $i = $self->{' maxobj'}++;
834 0 0         if (defined $base)
835             {
836 0           $self->add_obj($base, $i, 0);
837 0           $self->out_obj($base);
838 0           return $base;
839             }
840             else
841             {
842 0           $res = $self->add_obj(PDF::API3::Compat::API2::Basic::PDF::Objind->new(), $i, 0);
843 0           $self->out_obj($res);
844 0           return $res;
845             }
846             }
847            
848            
849             =head2 $p->out_obj($objind)
850            
851             Indicates that the given object reference should appear in the output xref
852             table whether with data or freed.
853            
854             =cut
855            
856             sub out_obj
857             {
858 0     0 1   my ($self, $obj) = @_;
859            
860             # This is why we've been keeping the outlist CACHE around; to speed
861             # up this method by orders of magnitude (it saves up from having to
862             # grep the full outlist each time through as we'll just do a lookup
863             # in the hash) (which is super-fast).
864 0 0         if (!exists $self->{' outlist_cache'}{$obj})
865             {
866 0           push( @{$self->{' outlist'}}, $obj );
  0            
867 0           $self->{' outlist_cache'}{$obj}++;
868             }
869 0           $obj;
870             }
871            
872            
873             =head2 $p->free_obj($objind)
874            
875             Marks an object reference for output as being freed.
876            
877             =cut
878            
879             sub free_obj
880             {
881 0     0 1   my ($self, $obj) = @_;
882            
883 0           push(@{$self->{' free'}}, $obj);
  0            
884 0           $self->{' objects'}{$obj->uid}[2] = 1;
885 0           $self->out_obj($obj);
886             }
887            
888            
889             =head2 $p->remove_obj($objind)
890            
891             Removes the object from all places where we might remember it
892            
893             =cut
894            
895             sub remove_obj
896             {
897 0     0 1   my ($self, $objind) = @_;
898            
899             # who says it has to be fast
900 0           delete $self->{' objects'}{$objind->uid};
901 0           delete $self->{' outlist_cache'}{$objind};
902 0           delete $self->{' printed_cache'}{$objind};
903 0           @{$self->{' outlist'}} = grep($_ ne $objind, @{$self->{' outlist'}});
  0            
  0            
904 0           @{$self->{' printed'}} = grep($_ ne $objind, @{$self->{' printed'}});
  0            
  0            
905 0 0         $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} = undef
906             if ($self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} eq $objind);
907 0           $self;
908             }
909            
910            
911             =head2 $p->ship_out(@objects)
912            
913             Ships the given objects (or all objects for output if @objects is empty) to
914             the currently open output file (assuming there is one). Freed objects are not
915             shipped, and once an object is shipped it is switched such that this file
916             becomes its source and it will not be shipped again unless out_obj is called
917             again. Notice that a shipped out object can be re-output or even freed, but
918             that it will not cause the data already output to be changed.
919            
920             =cut
921            
922             sub ship_out
923             {
924 0     0 1   my ($self, @objs) = @_;
925 0           my ($n, $fh, $objind, $i, $j);
926 0           my ($objnum, $objgen);
927            
928 0 0         return unless defined($fh = $self->{' OUTFILE'});
929 0           seek($fh, 0, 2); # go to the end of the file
930            
931 0 0         @objs = @{$self->{' outlist'}} unless (scalar @objs > 0);
  0            
932 0           foreach $objind (@objs)
933             {
934 0 0         next unless $objind->is_obj($self);
935 0           $j = -1;
936 0           for ($i = 0; $i < scalar @{$self->{' outlist'}}; $i++)
  0            
937             {
938 0 0         if ($self->{' outlist'}[$i] eq $objind)
939             {
940 0           $j = $i;
941 0           last;
942             }
943             }
944 0 0         next if ($j < 0);
945 0           splice(@{$self->{' outlist'}}, $j, 1);
  0            
946 0           delete $self->{' outlist_cache'}{$objind};
947 0 0         next if grep {$_ eq $objind} @{$self->{' free'}};
  0            
  0            
948            
949 0 0         map { $fh->print("\% $_ \n") } split(/$cr/,$objind->{' comments'}) if($objind->{' comments'});
  0            
950 0           $self->{' locs'}{$objind->uid} = $fh->tell;
951 0           ($objnum, $objgen) = @{$self->{' objects'}{$objind->uid}}[0..1];
  0            
952 0           $fh->printf("%d %d obj ", $objnum, $objgen);
953 0           $objind->outobjdeep($fh, $self, 'objnum' => $objnum, 'objgen' => $objgen);
954 0           $fh->print(" endobj\n");
955            
956             # Note that we've output this obj, not forgetting to update the cache
957             # of whats printed.
958 0 0         unless (exists $self->{' printed_cache'}{$objind})
959             {
960 0           push( @{$self->{' printed'}}, $objind );
  0            
961 0           $self->{' printed_cache'}{$objind}++;
962             }
963             }
964 0           $self;
965             }
966            
967             =head2 $p->copy($outpdf, \&filter)
968            
969             Iterates over every object in the file reading the object, calling filter with the object
970             and outputting the result. if filter is not defined, then just copies input to output.
971            
972             =cut
973            
974             sub copy
975             {
976 0     0 1   my ($self, $out, $filt) = @_;
977 0           my ($tdict, $i, $nl, $ng, $nt, $res, $obj, $minl, $mini, $ming);
978            
979 0           foreach $i (grep (!m/^[\s\-]/o, keys %{$self}))
  0            
980 0 0         { $out->{$i} = $self->{$i} unless defined $out->{$i}; }
981            
982 0           $tdict = $self;
983 0           while (defined $tdict)
984             {
985 0           foreach $i (sort {$a <=> $b} keys %{$tdict->{' xref'}})
  0            
  0            
986             {
987 0           ($nl, $ng, $nt) = @{$tdict->{' xref'}{$i}};
  0            
988 0 0         next unless $nt eq 'n';
989            
990 0 0 0       if ($nl < $minl || $mini == 0)
991             {
992 0           $mini = $i;
993 0           $ming = $ng;
994 0           $minl = $nl;
995             }
996 0 0         unless ($obj = $self->test_obj($i, $ng))
997             {
998 0           $obj = PDF::API3::Compat::API2::Basic::PDF::Objind->new();
999 0           $obj->{' objnum'} = $i;
1000 0           $obj->{' objgen'} = $ng;
1001 0           $self->add_obj($obj, $i, $ng);
1002 0           $obj->{' parent'} = $self;
1003 0           $obj->{' realised'} = 0;
1004             }
1005 0           $obj->realise;
1006 0 0         $res = defined $filt ? &{$filt}($obj) : $obj;
  0            
1007 0 0 0       $out->new_obj($res) unless (!$res || $res->is_obj($out));
1008             }
1009 0           $tdict = $tdict->{' prev'};
1010             }
1011            
1012             # test for linearized and remove it from output
1013 0           $obj = $self->test_obj($mini, $ming);
1014 0 0 0       if ($obj->isa('PDF::API3::Compat::API2::Basic::PDF::Dict') && $obj->{'Linearized'})
1015 0           { $out->free_obj($obj); }
1016            
1017 0           $self;
1018             }
1019            
1020            
1021             =head1 PRIVATE METHODS & FUNCTIONS
1022            
1023             The following methods and functions are considered private to this class. This
1024             does not mean you cannot use them if you have a need, just that they aren't really
1025             designed for users of this class.
1026            
1027             =head2 $offset = $p->locate_obj($num, $gen)
1028            
1029             Returns a file offset to the object asked for by following the chain of cross
1030             reference tables until it finds the one you want.
1031            
1032             =cut
1033            
1034             sub locate_obj
1035             {
1036 0     0 1   my ($self, $num, $gen) = @_;
1037 0           my ($tdict, $ref);
1038            
1039 0           $tdict = $self;
1040 0           while (defined $tdict)
1041             {
1042 0 0         if (ref $tdict->{' xref'}{$num})
1043             {
1044 0           $ref = $tdict->{' xref'}{$num};
1045 0 0         if ($ref->[1] == $gen)
1046             {
1047 0 0         return $ref->[0] if ($ref->[2] eq "n");
1048 0           return undef; # if $ref->[2] eq "f"
1049             }
1050             }
1051 0           $tdict = $tdict->{' prev'}
1052             }
1053 0           return undef;
1054             }
1055            
1056            
1057             =head2 update($fh, $str, $instream)
1058            
1059             Keeps reading $fh for more data to ensure that $str has at least a line full
1060             for C to work on. At this point we also take the opportunity to ignore
1061             comments.
1062            
1063             =cut
1064            
1065             sub update
1066             {
1067 0     0 1   my ($fh, $str, $instream) = @_;
1068 0 0         print STDERR "fpos=".tell($fh)." strlen=".length($str)."\n" if($readDebug);
1069 0 0         if($instream) {
1070             # we are inside a (possible binary) stream
1071             # so we fetch data till we see an 'endstream'
1072             # -- fredo/2004-09-03
1073 0   0       while ($str !~ m/endstream/o && !$fh->eof)
1074             {
1075 0 0         print STDERR "fpos=".tell($fh)." strlen=".length($str)."\n" if($readDebug);
1076 0           $fh->read($str, 314, length($str));
1077             }
1078             } else {
1079 0           $str =~ s/^$ws_char*//o;
1080 0   0       while ($str !~ m/$cr/o && !$fh->eof)
1081             {
1082 0 0         print STDERR "fpos=".tell($fh)." strlen=".length($str)."\n" if($readDebug);
1083 0           $fh->read($str, 314, length($str));
1084 0           $str =~ s/^$ws_char*//so;
1085             }
1086 0           while ($str =~ m/^\%/o) # restructured by fredo/2003-03-23
1087             {
1088 0 0         print STDERR "fpos=".tell($fh)." strlen=".length($str)."\n" if($readDebug);
1089 0   0       $fh->read($str, 314, length($str)) while ($str !~ m/$cr/o && !$fh->eof);
1090 0           $str =~ s/^\%[^\015\012]+$ws_char*//so; # fixed for reportlab -- fredo
1091             }
1092             }
1093            
1094 0           return $str;
1095             }
1096            
1097             sub updateRef
1098             {
1099 0     0 0   my ($fh, $str, $instream) = @_;
1100 0           my $inlen=16; my $inoff=1;
  0            
1101 0 0         print STDERR "fpos=".tell($fh)." strlen=".length($$str)."\n" if($readDebug);
1102 0 0         if($instream) {
1103             # we are inside a (possible binary) stream
1104             # so we fetch data till we see an 'endstream'
1105             # -- fredo/2004-09-03
1106 0   0       while ($$str !~ m/endstream/o && !$fh->eof)
1107             {
1108 0 0         print STDERR "fpos=".tell($fh)." strlen=".length($$str)."\n" if($readDebug);
1109 0           $fh->read($$str, 314, length($$str));
1110             }
1111             } else {
1112 0           $$str =~ s/^$ws_char*//o;
1113 0   0       while ($$str !~ m/$cr/o && !$fh->eof && length($$str) < 16000)
      0        
1114             {
1115 0 0         print STDERR "fpos=".tell($fh)." strlen=".length($$str)."\n" if($readDebug);
1116 0           $fh->read($$str, $inlen, length($$str));
1117 0           $$str =~ s/^$ws_char*//so;
1118 0           $inlen+=$inoff;
1119             }
1120 0           while ($$str =~ m/^\%/o) # restructured by fredo/2003-03-23
1121             {
1122 0 0         print STDERR "fpos=".tell($fh)." strlen=".length($$str)."\n" if($readDebug);
1123 0   0       $fh->read($$str, $inlen, length($$str)) while ($$str !~ m/$cr/o && !$fh->eof);
1124 0           $$str =~ s/^\%[^\015\012]+$ws_char*//so; # fixed for reportlab -- fredo
1125 0           $inlen+=$inoff;
1126             }
1127             }
1128            
1129 0           return undef;
1130             }
1131            
1132            
1133             =head2 $objind = $p->test_obj($num, $gen)
1134            
1135             Tests the cache to see whether an object reference (which may or may not have
1136             been getobj()ed) has been cached. Returns it if it has.
1137            
1138             =cut
1139            
1140             sub test_obj
1141 0     0 1   { $_[0]->{' objcache'}{$_[1], $_[2]}; }
1142            
1143            
1144             =head2 $p->add_obj($objind)
1145            
1146             Adds the given object to the internal object cache.
1147            
1148             =cut
1149            
1150             sub add_obj
1151             {
1152 0     0 1   my ($self, $obj, $num, $gen) = @_;
1153            
1154 0           $self->{' objcache'}{$num, $gen} = $obj;
1155 0           $self->{' objects'}{$obj->uid} = [$num, $gen];
1156 0           return $obj;
1157             }
1158            
1159            
1160             =head2 $tdict = $p->readxrtr($xpos)
1161            
1162             Recursive function which reads each of the cross-reference and trailer tables
1163             in turn until there are no more.
1164            
1165             Returns a dictionary corresponding to the trailer chain. Each trailer also
1166             includes the corresponding cross-reference table.
1167            
1168             The structure of the xref private element in a trailer dictionary is of an
1169             anonymous hash of cross reference elements by object number. Each element
1170             consists of an array of 3 elements corresponding to the three elements read
1171             in [location, generation number, free or used]. See the PDF Specification
1172             for details.
1173            
1174             =cut
1175            
1176             sub readxrtr
1177             {
1178 0     0 1   my ($self, $xpos) = @_;
1179 0           my ($tdict, $xlist, $buf, $xmin, $xnum, $fh, $xdiff);
1180            
1181 0           $fh = $self->{' INFILE'};
1182 0           $fh->seek($xpos, 0);
1183 0           $fh->read($buf, 22);
1184 0           $buf=update($fh,$buf); # fix for broken JAWS xref calculation.
1185            
1186             ## seams that some products calculate wrong prev entries (short)
1187             ## so we seek ahead to find one -- fredo; save for now
1188             #while($buf !~ m/^xref$cr/oi && !eof($fh))
1189             #{
1190             # $buf =~ s/^(\s+|\S+|.)//oi;
1191             # $buf=update($fh,$buf);
1192             #}
1193            
1194 0 0         if ($buf !~ m/^xref$cr/oi)
1195             {
1196             #if($self == $root)
1197             #{
1198 0           die "Malformed xref in PDF file $self->{' fname'}";
1199             #}
1200             #else # be tolerant if its not the root object
1201             #{
1202             # return undef;
1203             #}
1204             }
1205 0           $buf =~ s/^xref$cr//oi;
1206            
1207 0           $xlist = {};
1208 0           while ($buf =~ m/^([0-9]+)$ws_char+([0-9]+)$cr(.*?)$/so)
1209             {
1210 0           $xmin = $1;
1211 0           $xnum = $2;
1212 0           $buf = $3;
1213 0           $xdiff = length($buf);
1214            
1215 0           $fh->read($buf, 20 * $xnum - $xdiff + 15, $xdiff);
1216 0   0       while ($xnum-- > 0 && $buf =~ s/^0*([0-9]*)$ws_char+0*([0-9]+)$ws_char+([nf])$cr//o)
1217             {
1218 0 0         $xlist->{$xmin} = [$1, $2, $3] unless(exists $xlist->{$xmin});
1219 0           $xmin++;
1220             }
1221             }
1222            
1223 0 0         if ($buf !~ /^\s*trailer\b/oi)
1224 0           { die "Malformed trailer in PDF file $self->{' fname'} at " . ($fh->tell - length($buf)); }
1225            
1226 0           $buf =~ s/^\s*trailer\b//oi;
1227            
1228 0           ($tdict, $buf) = $self->readval($buf);
1229 0           $tdict->{' loc'} = $xpos;
1230 0           $tdict->{' xref'} = $xlist;
1231 0 0         $self->{' maxobj'} = $xmin if $xmin > $self->{' maxobj'};
1232 0 0 0       $tdict->{' prev'} = $self->readxrtr($tdict->{'Prev'}->val)
1233             if (defined $tdict->{'Prev'} && $tdict->{'Prev'}->val != 0);
1234 0 0         delete $tdict->{' prev'} unless(defined $tdict->{' prev'});
1235 0           return $tdict;
1236             }
1237            
1238            
1239             =head2 $p->out_trailer($tdict)
1240            
1241             Outputs the body and trailer for a PDF file by outputting all the objects in
1242             the ' outlist' and then outputting a xref table for those objects and any
1243             freed ones. It then outputs the trailing dictionary and the trailer code.
1244            
1245             =cut
1246            
1247             sub out_trailer
1248             {
1249 0     0 1   my ($self, $tdict, $update) = @_;
1250 0           my ($objind, $j, $i, $iend, @xreflist, $first, $k, $xref, $tloc, @freelist);
1251 0           my (%locs, $size);
1252 0           my ($fh) = $self->{' OUTFILE'};
1253            
1254 0           while (@{$self->{' outlist'}})
  0            
1255 0           { $self->ship_out; }
1256            
1257             # $size = @{$self->{' printed'}} + @{$self->{' free'}};
1258             # $tdict->{'Size'} = PDFNum($tdict->{'Size'}->val + $size);
1259             # PDFSpec 1.3 says for /Size: (Required) Total number of entries in the file’s
1260             # cross-reference table, including the original table and all updates. Which
1261             # is what the previous two lines implement.
1262             # But this seems to make Acrobat croak on saving so we try the following from
1263             # basil.duval@epfl.ch
1264 0           $tdict->{'Size'} = PDFNum($self->{' maxobj'});
1265            
1266 0           $tloc = $fh->tell;
1267 0           $fh->print("xref\n");
1268            
1269 0           @xreflist = sort {$self->{' objects'}{$a->uid}[0] <=>
  0            
1270             $self->{' objects'}{$b->uid}[0]}
1271 0           (@{$self->{' printed'}}, @{$self->{' free'}});
  0            
1272            
1273 0 0         unless ($update)
1274             {
1275 0           $i = 1;
1276 0           for ($j = 0; $j < @xreflist; $j++)
1277             {
1278 0           my (@inserts);
1279 0           $k = $xreflist[$j];
1280 0           while ($i < $self->{' objects'}{$k->uid}[0])
1281             {
1282 0           my ($n) = PDF::API3::Compat::API2::Basic::PDF::Objind->new();
1283 0           $self->add_obj($n, $i, 0);
1284 0           $self->free_obj($n);
1285 0           push(@inserts, $n);
1286 0           $i++;
1287             }
1288 0           splice(@xreflist, $j, 0, @inserts);
1289 0           $j += @inserts;
1290 0           $i++;
1291             }
1292             }
1293            
1294 0           @freelist = sort {$self->{' objects'}{$a->uid}[0] <=>
  0            
1295 0           $self->{' objects'}{$b->uid}[0]} @{$self->{' free'}};
1296            
1297 0           $j = 0; $first = -1; $k = 0;
  0            
  0            
1298 0           for ($i = 0; $i <= $#xreflist + 1; $i++)
1299             {
1300             # if ($i == 0)
1301             # {
1302             # $first = $i; $j = $xreflist[0]->{' objnum'};
1303             # $fh->printf("0 1\n%010d 65535 f \n", $ff);
1304             # }
1305 0 0 0       if ($i > $#xreflist || $self->{' objects'}{$xreflist[$i]->uid}[0] != $j + 1)
1306             {
1307 0 0         $fh->print(($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid}[0] ") . ($i - $first) . "\n");
1308 0 0         if ($first == -1)
1309             {
1310 0 0         $fh->printf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid}[0] : 0);
1311 0           $first = 0;
1312             }
1313 0           for ($j = $first; $j < $i; $j++)
1314             {
1315 0           $xref = $xreflist[$j];
1316 0 0 0       if (defined $freelist[$k] && defined $xref && "$freelist[$k]" eq "$xref")
      0        
1317             {
1318 0           $k++;
1319 0 0         $fh->print(pack("A10AA5A4",
1320             sprintf("%010d", (defined $freelist[$k] ?
1321             $self->{' objects'}{$freelist[$k]->uid}[0] : 0)), " ",
1322             sprintf("%05d", $self->{' objects'}{$xref->uid}[1] + 1),
1323             " f \n"));
1324             } else
1325             {
1326 0           $fh->print(pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid}), " ",
1327             sprintf("%05d", $self->{' objects'}{$xref->uid}[1]),
1328             " n \n"));
1329             }
1330             }
1331 0           $first = $i;
1332 0 0         $j = $self->{' objects'}{$xreflist[$i]->uid}[0] if ($i < scalar @xreflist);
1333             } else
1334 0           { $j++; }
1335             }
1336 0           $fh->print("trailer\n");
1337 0           $tdict->outobjdeep($fh, $self);
1338 0           $fh->print("\nstartxref\n$tloc\n" . '%%EOF' . "\n");
1339             }
1340            
1341            
1342             =head2 PDF::API3::Compat::API2::Basic::PDF::File->_new
1343            
1344             Creates a very empty PDF file object (used by new and open)
1345            
1346             =cut
1347            
1348             sub _new
1349             {
1350 0     0     my ($class) = @_;
1351 0           my ($self) = {};
1352            
1353 0           bless $self, $class;
1354 0           $self->{' outlist'} = [];
1355 0           $self->{' outlist_cache'} = {}; # A cache of whats in the 'outlist'
1356 0           $self->{' maxobj'} = 1;
1357 0           $self->{' objcache'} = {};
1358 0           $self->{' objects'} = {};
1359 0           $self;
1360             }
1361            
1362             sub save_xml
1363             {
1364 0     0 0   my ($self,$fh) = @_;
1365 0           my ($tdict, $t, $buf, $ver);
1366            
1367 0           $fh->print("\n");
1368            
1369 0           $tdict = PDFDict();
1370 0 0         $tdict->{'Prev'} = PDFNum($self->{' loc'}) if ($self->{' loc'});
1371 0   0       $tdict->{'Size'} = $self->{'Size'} || PDFNum(1);
1372 0           $tdict->{'Info'} = $self->{'Info'};
1373 0 0         if (defined $self->{' newroot'})
1374 0           { $tdict->{'Root'} = $self->{' newroot'}; }
1375             else
1376 0           { $tdict->{'Root'} = $self->{'Root'}; }
1377 0           $tdict->{'Size'} = $self->{'Size'};
1378            
1379 0           foreach $t (grep ($_ !~ m/^\s/o, keys %$self))
1380 0 0         { $tdict->{$t} = $self->{$t} unless defined $tdict->{$t}; }
1381            
1382 0           $self->out_xml($tdict,$fh);
1383 0           $fh->print("\n");
1384             }
1385            
1386             sub ship_xml
1387             {
1388 0     0 0   my ($self, $fh, @objs) = @_;
1389 0           my ($n, $objind, $i, $j);
1390 0           my ($objnum, $objgen);
1391            
1392 0 0         @objs = @{$self->{' outlist'}} unless (scalar @objs > 0);
  0            
1393 0           foreach $objind (@objs)
1394             {
1395 0 0         next unless $objind->is_obj($self);
1396 0           $j = -1;
1397 0           for ($i = 0; $i < scalar @{$self->{' outlist'}}; $i++)
  0            
1398             {
1399 0 0         if ($self->{' outlist'}[$i] eq $objind)
1400             {
1401 0           $j = $i;
1402 0           last;
1403             }
1404             }
1405 0 0         next if ($j < 0);
1406 0           splice(@{$self->{' outlist'}}, $j, 1);
  0            
1407 0           delete $self->{' outlist_cache'}{$objind};
1408 0 0         next if grep {$_ eq $objind} @{$self->{' free'}};
  0            
  0            
1409            
1410 0           ($objnum, $objgen) = @{$self->{' objects'}{$objind->uid}}[0..1];
  0            
1411 0           $fh->printf("\n", $objnum, $objgen);
1412 0           $objind->outxmldeep($fh, $self, 'objnum' => $objnum, 'objgen' => $objgen,-xmlfh=>$fh);
1413 0           $fh->print("\n");
1414            
1415             # Note that we've output this obj, not forgetting to update the cache
1416             # of whats printed.
1417 0 0         unless (exists $self->{' printed_cache'}{$objind})
1418             {
1419 0           push( @{$self->{' printed'}}, $objind );
  0            
1420 0           $self->{' printed_cache'}{$objind}++;
1421             }
1422             }
1423 0           $self;
1424             }
1425            
1426             sub out_xml
1427             {
1428 0     0 0   my ($self, $tdict, $fh) = @_;
1429 0           my ($objind, $j, $i, $iend, @xreflist, $first, $k, $xref, $tloc, @freelist);
1430 0           my (%locs, $size);
1431            
1432 0           while (@{$self->{' outlist'}})
  0            
1433 0           { $self->ship_xml($fh); }
1434            
1435             }
1436            
1437             1;
1438            
1439             =head1 AUTHOR
1440            
1441             Martin Hosken Martin_Hosken@sil.org
1442            
1443             Copyright Martin Hosken 1999 and onwards
1444            
1445             No warranty or expression of effectiveness, least of all regarding anyone's
1446             safety, is implied in this software or documentation.
1447            
1448             =head2 Licensing
1449            
1450             This Perl Text::PDF module is licensed under the Perl Artistic License.
1451