line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#======================================================================= |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW: |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Copyright Martin Hosken |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Modified for PDF::API2 by Alfred Reibenschuh |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# No warranty or expression of effectiveness, least of all regarding |
10
|
|
|
|
|
|
|
# anyone's safety, is implied in this software or documentation. |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# This specific module is licensed under the Perl Artistic License. |
13
|
|
|
|
|
|
|
# Effective 28 January 2021, the original author and copyright holder, |
14
|
|
|
|
|
|
|
# Martin Hosken, has given permission to use and redistribute this module |
15
|
|
|
|
|
|
|
# under the MIT license. |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
#======================================================================= |
18
|
|
|
|
|
|
|
package PDF::Builder::Basic::PDF::File; |
19
|
|
|
|
|
|
|
|
20
|
35
|
|
|
35
|
|
70516
|
use strict; |
|
35
|
|
|
|
|
115
|
|
|
35
|
|
|
|
|
1329
|
|
21
|
35
|
|
|
35
|
|
199
|
use warnings; |
|
35
|
|
|
|
|
83
|
|
|
35
|
|
|
|
|
2635
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our $VERSION = '3.023'; # VERSION |
24
|
|
|
|
|
|
|
our $LAST_UPDATE = '3.023'; # manually update whenever code is changed |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 NAME |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
PDF::Builder::Basic::PDF::File - Holds the trailers and cross-reference tables for a PDF file |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 SYNOPSIS |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$p = PDF::Builder::Basic::PDF::File->open("filename.pdf", 1); |
33
|
|
|
|
|
|
|
$p->new_obj($obj_ref); |
34
|
|
|
|
|
|
|
$p->free_obj($obj_ref); |
35
|
|
|
|
|
|
|
$p->append_file(); |
36
|
|
|
|
|
|
|
$p->close_file(); |
37
|
|
|
|
|
|
|
$p->release(); # IMPORTANT! |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 DESCRIPTION |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
This class keeps track of the directory aspects of a PDF file. There are two |
42
|
|
|
|
|
|
|
parts to the directory: the main directory object, which is the parent to all |
43
|
|
|
|
|
|
|
other objects, and a chain of cross-reference tables and corresponding trailer |
44
|
|
|
|
|
|
|
dictionaries, starting with the main directory object. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 INSTANCE VARIABLES |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Within this class hierarchy, rather than making everything visible via methods, |
49
|
|
|
|
|
|
|
which would be a lot of work, there are various instance variables which are |
50
|
|
|
|
|
|
|
accessible via associative array referencing. To distinguish instance variables |
51
|
|
|
|
|
|
|
from content variables (which may come from the PDF content itself), each such |
52
|
|
|
|
|
|
|
variable will start with a space. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Variables which do not start with a space directly reflect elements in a PDF |
55
|
|
|
|
|
|
|
dictionary. In the case of a C, the elements |
56
|
|
|
|
|
|
|
reflect those in the trailer dictionary. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Since some variables are not designed for class users to access, variables are |
59
|
|
|
|
|
|
|
marked in the documentation with B<(R)> to indicate that such an entry should |
60
|
|
|
|
|
|
|
only be used as B information. B<(P)> indicates that the information |
61
|
|
|
|
|
|
|
is B, and not designed for user use at all, but is included in the |
62
|
|
|
|
|
|
|
documentation for completeness and to ensure that nobody else tries to use it. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=over |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item newroot |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
This variable allows the user to create a new root entry to occur in the trailer |
69
|
|
|
|
|
|
|
dictionary which is output when the file is written or appended. If you wish to |
70
|
|
|
|
|
|
|
override the root element in the dictionary you have, use this entry to indicate |
71
|
|
|
|
|
|
|
that without losing the current Root entry. Notice that newroot should point to |
72
|
|
|
|
|
|
|
a PDF level object and not just to a dictionary which does not have object |
73
|
|
|
|
|
|
|
status. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item INFILE (R) |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Contains the filehandle used to read this information into this PDF directory. |
78
|
|
|
|
|
|
|
It is an IO object. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item fname (R) |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
This is the filename which is reflected by INFILE, or the original IO object |
83
|
|
|
|
|
|
|
passed in. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=item update (R) |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
This indicates that the read file has been opened for update and that at some |
88
|
|
|
|
|
|
|
point, C<< $p->appendfile() >> can be called to update the file with the |
89
|
|
|
|
|
|
|
changes that have been made to the memory representation. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=item maxobj (R) |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Contains the first usable object number above any that have already appeared |
94
|
|
|
|
|
|
|
in the file so far. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=item outlist (P) |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
This is a list of Objind which are to be output when the next C |
99
|
|
|
|
|
|
|
or C occurs. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=item firstfree (P) |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Contains the first free object in the free object list. Free objects are removed |
104
|
|
|
|
|
|
|
from the front of the list and added to the end. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item lastfree (P) |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Contains the last free object in the free list. It may be the same as the |
109
|
|
|
|
|
|
|
C if there is only one free object. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=item objcache (P) |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
All objects are held in the cache to ensure that a system only has one |
114
|
|
|
|
|
|
|
occurrence of each object. In effect, the objind class acts as a container type |
115
|
|
|
|
|
|
|
class to hold the PDF object structure, and it would be unfortunate if there |
116
|
|
|
|
|
|
|
were two identical place-holders floating around a system. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item epos (P) |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
The end location of the read-file. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=back |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Each trailer dictionary contains a number of private instance variables which |
125
|
|
|
|
|
|
|
hold the chain together. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=over |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=item loc (P) |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Contains the location of the start of the cross-reference table preceding the |
132
|
|
|
|
|
|
|
trailer. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item xref (P) |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Contains an anonymous array of each cross-reference table entry. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=item prev (P) |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
A reference to the previous table. Note this differs from the Prev entry which |
141
|
|
|
|
|
|
|
is in PDF, which contains the location of the previous cross-reference table. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=back |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head1 METHODS |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
148
|
|
|
|
|
|
|
|
149
|
35
|
|
|
35
|
|
238
|
use Scalar::Util qw(blessed weaken); |
|
35
|
|
|
|
|
98
|
|
|
35
|
|
|
|
|
2162
|
|
150
|
|
|
|
|
|
|
|
151
|
35
|
|
|
35
|
|
233
|
use vars qw($cr $irreg_char $reg_char $ws_char $delim_char %types); |
|
35
|
|
|
|
|
97
|
|
|
35
|
|
|
|
|
6735
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
$ws_char = '[ \t\r\n\f\0]'; |
154
|
|
|
|
|
|
|
$delim_char = '[][<>{}()/%]'; |
155
|
|
|
|
|
|
|
$reg_char = '[^][<>{}()/% \t\r\n\f\0]'; |
156
|
|
|
|
|
|
|
$irreg_char = '[][<>{}()/% \t\r\n\f\0]'; |
157
|
|
|
|
|
|
|
# \015 = x0D = CR or \r, \012 = x0A = LF or \n |
158
|
|
|
|
|
|
|
# TBD a line-end character is space CR ' \r', space LF ' \n', or CR LF '\r\n' |
159
|
|
|
|
|
|
|
# have seen working PDFs with just a CR and space CR |
160
|
|
|
|
|
|
|
$cr = '\s*(?:\015|\012|(?:\015\012))'; |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
my $re_comment = qr/(?:\%[^\r\n]*)/; |
163
|
|
|
|
|
|
|
my $re_whitespace = qr/(?:[ \t\r\n\f\0]|$re_comment)/; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
%types = ( |
166
|
|
|
|
|
|
|
'Page' => 'PDF::Builder::Basic::PDF::Page', |
167
|
|
|
|
|
|
|
'Pages' => 'PDF::Builder::Basic::PDF::Pages', |
168
|
|
|
|
|
|
|
); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
my $readDebug = 0; |
171
|
|
|
|
|
|
|
|
172
|
35
|
|
|
35
|
|
286
|
use Carp; |
|
35
|
|
|
|
|
95
|
|
|
35
|
|
|
|
|
2913
|
|
173
|
35
|
|
|
35
|
|
825
|
use IO::File; |
|
35
|
|
|
|
|
8938
|
|
|
35
|
|
|
|
|
5009
|
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# Now for the basic PDF types |
176
|
35
|
|
|
35
|
|
731
|
use PDF::Builder::Basic::PDF::Utils; |
|
35
|
|
|
|
|
81
|
|
|
35
|
|
|
|
|
2964
|
|
177
|
|
|
|
|
|
|
|
178
|
35
|
|
|
35
|
|
247
|
use PDF::Builder::Basic::PDF::Array; |
|
35
|
|
|
|
|
86
|
|
|
35
|
|
|
|
|
977
|
|
179
|
35
|
|
|
35
|
|
199
|
use PDF::Builder::Basic::PDF::Bool; |
|
35
|
|
|
|
|
79
|
|
|
35
|
|
|
|
|
1660
|
|
180
|
35
|
|
|
35
|
|
229
|
use PDF::Builder::Basic::PDF::Dict; |
|
35
|
|
|
|
|
73
|
|
|
35
|
|
|
|
|
833
|
|
181
|
35
|
|
|
35
|
|
179
|
use PDF::Builder::Basic::PDF::Name; |
|
35
|
|
|
|
|
75
|
|
|
35
|
|
|
|
|
962
|
|
182
|
35
|
|
|
35
|
|
198
|
use PDF::Builder::Basic::PDF::Number; |
|
35
|
|
|
|
|
81
|
|
|
35
|
|
|
|
|
889
|
|
183
|
35
|
|
|
35
|
|
188
|
use PDF::Builder::Basic::PDF::Objind; |
|
35
|
|
|
|
|
76
|
|
|
35
|
|
|
|
|
1021
|
|
184
|
35
|
|
|
35
|
|
197
|
use PDF::Builder::Basic::PDF::String; |
|
35
|
|
|
|
|
78
|
|
|
35
|
|
|
|
|
1076
|
|
185
|
35
|
|
|
35
|
|
17366
|
use PDF::Builder::Basic::PDF::Page; |
|
35
|
|
|
|
|
114
|
|
|
35
|
|
|
|
|
1337
|
|
186
|
35
|
|
|
35
|
|
233
|
use PDF::Builder::Basic::PDF::Pages; |
|
35
|
|
|
|
|
74
|
|
|
35
|
|
|
|
|
772
|
|
187
|
35
|
|
|
35
|
|
183
|
use PDF::Builder::Basic::PDF::Null; |
|
35
|
|
|
|
|
73
|
|
|
35
|
|
|
|
|
931
|
|
188
|
35
|
|
|
35
|
|
212
|
use POSIX qw(ceil floor); |
|
35
|
|
|
|
|
72
|
|
|
35
|
|
|
|
|
287
|
|
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head2 PDF::Builder::Basic::PDF::File->new() |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Creates a new, empty file object which can act as the host to other PDF objects. |
193
|
|
|
|
|
|
|
Since there is no file associated with this object, it is assumed that the |
194
|
|
|
|
|
|
|
object is created in readiness for creating a new PDF file. |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=cut |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub new { |
199
|
162
|
|
|
162
|
1
|
454
|
my ($class, $root) = @_; |
200
|
162
|
|
|
|
|
684
|
my $self = $class->_new(); |
201
|
|
|
|
|
|
|
|
202
|
162
|
50
|
|
|
|
506
|
unless ($root) { |
203
|
162
|
|
|
|
|
620
|
$root = PDFDict(); |
204
|
162
|
|
|
|
|
498
|
$root->{'Type'} = PDFName('Catalog'); |
205
|
|
|
|
|
|
|
} |
206
|
162
|
|
|
|
|
648
|
$self->new_obj($root); |
207
|
162
|
|
|
|
|
388
|
$self->{'Root'} = $root; |
208
|
|
|
|
|
|
|
|
209
|
162
|
|
|
|
|
614
|
return $self; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head2 $p = PDF::Builder::Basic::PDF::File->open($filename, $update, %options) |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
Opens the file and reads all the trailers and cross reference tables to build |
215
|
|
|
|
|
|
|
a complete directory of objects. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
C<$filename> may be a string or an IO object. |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
C<$update> specifies whether this file is being opened for updating and editing |
220
|
|
|
|
|
|
|
(I value), or simply to be read (I or undefined value). |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
C<%options> may include |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=over |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item -diags => 1 |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
If C<-diags> is set to 1, various warning messages will be given if a |
229
|
|
|
|
|
|
|
suspicious PDF structure is found, and some fixup may be attempted. There is |
230
|
|
|
|
|
|
|
no guarantee that any fixup will change the PDF to legitimate, or that there |
231
|
|
|
|
|
|
|
won't be other problems found further down the line. If this flag is I |
232
|
|
|
|
|
|
|
given, and a structural problem is found, it is fairly likely that errors (and |
233
|
|
|
|
|
|
|
even a program B) may happen further along. If you experience crashes |
234
|
|
|
|
|
|
|
when reading in a PDF file, try running with C<-diags> and see what is reported. |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
There are many PDF files out "in the wild" which, while failing to conform to |
237
|
|
|
|
|
|
|
Adobe's standards, appear to be tolerated by PDF Readers. Thus, Builder will no |
238
|
|
|
|
|
|
|
longer fail on them, but merely comment on their existence. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=back |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=cut |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub open { |
245
|
15
|
|
|
15
|
1
|
72
|
my ($class, $filename, $update, %options) = @_; |
246
|
15
|
|
|
|
|
42
|
my ($fh, $buffer); |
247
|
15
|
50
|
|
|
|
73
|
$options{'-diags'} = 0 if not defined $options{'-diags'}; # default |
248
|
|
|
|
|
|
|
|
249
|
15
|
|
|
|
|
35
|
my $comment = ''; # any comment jammed into the PDF header |
250
|
15
|
|
|
|
|
60
|
my $self = $class->_new(); |
251
|
15
|
50
|
|
|
|
58
|
if (ref $filename) { |
252
|
15
|
|
|
|
|
39
|
$self->{' INFILE'} = $filename; |
253
|
15
|
50
|
|
|
|
53
|
if ($update) { |
254
|
15
|
|
|
|
|
49
|
$self->{' update'} = 1; |
255
|
15
|
|
|
|
|
49
|
$self->{' OUTFILE'} = $filename; |
256
|
|
|
|
|
|
|
} |
257
|
15
|
|
|
|
|
38
|
$fh = $filename; |
258
|
|
|
|
|
|
|
} else { |
259
|
0
|
0
|
|
|
|
0
|
die "File '$filename' does not exist!" unless -f $filename; |
260
|
0
|
|
0
|
|
|
0
|
$fh = IO::File->new(($update ? '+' : '') . "<$filename") || return; |
261
|
0
|
|
|
|
|
0
|
$self->{' INFILE'} = $fh; |
262
|
0
|
0
|
|
|
|
0
|
if ($update) { |
263
|
0
|
|
|
|
|
0
|
$self->{' update'} = 1; |
264
|
0
|
|
|
|
|
0
|
$self->{' OUTFILE'} = $fh; |
265
|
0
|
|
|
|
|
0
|
$self->{' fname'} = $filename; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
15
|
|
|
|
|
81
|
binmode $fh, ':raw'; |
269
|
15
|
|
|
|
|
208
|
$fh->seek(0, 0); # go to start of file |
270
|
15
|
|
|
|
|
192
|
$fh->read($buffer, 255); |
271
|
15
|
50
|
|
|
|
654
|
unless ($buffer =~ m/^\%PDF\-(\d+\.\d+)(.*?)$cr/mo) { |
272
|
0
|
|
|
|
|
0
|
die "$filename does not contain a PDF version"; |
273
|
|
|
|
|
|
|
} |
274
|
15
|
|
|
|
|
79
|
$self->{' version'} = $1; |
275
|
|
|
|
|
|
|
# can't run verCheckInput() yet, as full ' version' not set |
276
|
15
|
50
|
33
|
|
|
152
|
if (defined $2 && length($2) > 0) { |
277
|
0
|
|
|
|
|
0
|
$comment = $2; # save for output as comment |
278
|
|
|
|
|
|
|
# since we just echo the original header + comment, unless that causes |
279
|
|
|
|
|
|
|
# problems in some Readers, we can just leave it be (no call to strip |
280
|
|
|
|
|
|
|
# out inline comment and create a separate comment further along). |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
15
|
|
|
|
|
472
|
$fh->seek(0, 2); # go to end of file |
284
|
15
|
|
|
|
|
164
|
my $end = $fh->tell(); |
285
|
15
|
|
|
|
|
110
|
$self->{' epos'} = $end; |
286
|
15
|
|
|
|
|
65
|
foreach my $offset (1 .. 64) { |
287
|
30
|
|
|
|
|
132
|
$fh->seek($end - 16 * $offset, 0); |
288
|
30
|
|
|
|
|
191
|
$fh->read($buffer, 16 * $offset); |
289
|
30
|
100
|
|
|
|
712
|
last if $buffer =~ m/startxref($cr|\s*)\d+($cr|\s*)\%\%eof.*?/i; |
290
|
|
|
|
|
|
|
} |
291
|
15
|
50
|
|
|
|
355
|
unless ($buffer =~ m/startxref[^\d]+([0-9]+)($cr|\s*)\%\%eof.*?/i) { |
292
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
293
|
0
|
|
|
|
|
0
|
warn "Malformed PDF file $filename"; #orig 'die' |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
} |
296
|
15
|
|
|
|
|
116
|
my $xpos = $1; |
297
|
15
|
|
|
|
|
46
|
$self->{' xref_position'} = $xpos; |
298
|
|
|
|
|
|
|
|
299
|
15
|
|
|
|
|
97
|
my $tdict = $self->readxrtr($xpos, %options); |
300
|
15
|
|
|
|
|
74
|
foreach my $key (keys %$tdict) { |
301
|
115
|
|
|
|
|
235
|
$self->{$key} = $tdict->{$key}; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
15
|
|
|
|
|
178
|
return $self; |
305
|
|
|
|
|
|
|
} # end of open() |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=head2 $p->release() |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
Releases ALL of the memory used by the PDF document and all of its |
310
|
|
|
|
|
|
|
component objects. After calling this method, do B expect to |
311
|
|
|
|
|
|
|
have anything left in the C object |
312
|
|
|
|
|
|
|
(so if you need to save, be sure to do it before calling this method). |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
B, that it is important that you call this method on any |
315
|
|
|
|
|
|
|
C object when you wish to destroy it and |
316
|
|
|
|
|
|
|
free up its memory. Internally, PDF files have an enormous number of |
317
|
|
|
|
|
|
|
cross-references, and this causes circular references within the |
318
|
|
|
|
|
|
|
internal data structures. Calling C causes a brute-force |
319
|
|
|
|
|
|
|
cleanup of the data structures, freeing up all of the memory. Once |
320
|
|
|
|
|
|
|
you've called this method, though, don't expect to be able to do |
321
|
|
|
|
|
|
|
anything else with the C object; it'll |
322
|
|
|
|
|
|
|
have B internal state whatsoever. |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# Maintainer's Question: Couldn't this be handled by a DESTROY method |
327
|
|
|
|
|
|
|
# instead of requiring an explicit call to release()? |
328
|
|
|
|
|
|
|
sub release { |
329
|
127
|
|
|
127
|
1
|
274
|
my $self = shift(); |
330
|
|
|
|
|
|
|
|
331
|
127
|
50
|
|
|
|
456
|
return $self unless ref($self); |
332
|
127
|
|
|
|
|
627
|
my @tofree = values %$self; |
333
|
|
|
|
|
|
|
|
334
|
127
|
|
|
|
|
567
|
foreach my $key (keys %$self) { |
335
|
2075
|
|
|
|
|
3379
|
$self->{$key} = undef; |
336
|
2075
|
|
|
|
|
3142
|
delete $self->{$key}; |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
127
|
|
|
|
|
586
|
while (my $item = shift @tofree) { |
340
|
6253
|
100
|
100
|
|
|
22034
|
if (blessed($item) and $item->can('release')) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
341
|
1800
|
|
|
|
|
3872
|
$item->release(1); |
342
|
|
|
|
|
|
|
} elsif (ref($item) eq 'ARRAY') { |
343
|
1332
|
|
|
|
|
4003
|
push @tofree, @$item; |
344
|
|
|
|
|
|
|
} elsif (ref($item) eq 'HASH') { |
345
|
756
|
|
|
|
|
2065
|
push @tofree, values %$item; |
346
|
756
|
|
|
|
|
1981
|
foreach my $key (keys %$item) { |
347
|
3268
|
|
|
|
|
4337
|
$item->{$key} = undef; |
348
|
3268
|
|
|
|
|
6085
|
delete $item->{$key}; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
} else { |
351
|
2365
|
|
|
|
|
5112
|
$item = undef; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
127
|
|
|
|
|
504
|
return; |
356
|
|
|
|
|
|
|
} # end of release() |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=head2 $p->append_file() |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
Appends the objects for output to the read file and then appends the |
361
|
|
|
|
|
|
|
appropriate table. |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=cut |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub append_file { |
366
|
8
|
|
|
8
|
1
|
18
|
my $self = shift(); |
367
|
8
|
50
|
|
|
|
28
|
return unless $self->{' update'}; |
368
|
|
|
|
|
|
|
|
369
|
8
|
|
|
|
|
22
|
my $fh = $self->{' INFILE'}; |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# hack to upgrade pdf-version number to support requested features in |
372
|
|
|
|
|
|
|
# higher versions than the pdf was originally created. WARNING: new version |
373
|
|
|
|
|
|
|
# must be exactly SAME length as the old (e.g., 1.6 replacing 1.4), or |
374
|
|
|
|
|
|
|
# problems are likely with overwriting header. perhaps some day we will |
375
|
|
|
|
|
|
|
# need to check the old version being ovewritten, and adjust something to |
376
|
|
|
|
|
|
|
# avoid corrupting the file. |
377
|
8
|
|
50
|
|
|
30
|
my $version = $self->{' version'} || 1.4; |
378
|
8
|
|
|
|
|
46
|
$fh->seek(0, 0); |
379
|
|
|
|
|
|
|
# assume that any existing EOL after version will be reused |
380
|
8
|
|
|
|
|
120
|
$fh->print("%PDF-$version"); |
381
|
|
|
|
|
|
|
|
382
|
8
|
|
|
|
|
108
|
my $tdict = PDFDict(); |
383
|
8
|
|
|
|
|
54
|
$tdict->{'Prev'} = PDFNum($self->{' loc'}); |
384
|
8
|
|
|
|
|
22
|
$tdict->{'Info'} = $self->{'Info'}; |
385
|
8
|
50
|
|
|
|
24
|
if (defined $self->{' newroot'}) { |
386
|
0
|
|
|
|
|
0
|
$tdict->{'Root'} = $self->{' newroot'}; |
387
|
|
|
|
|
|
|
} else { |
388
|
8
|
|
|
|
|
20
|
$tdict->{'Root'} = $self->{'Root'}; |
389
|
|
|
|
|
|
|
} |
390
|
8
|
|
|
|
|
19
|
$tdict->{'Size'} = $self->{'Size'}; |
391
|
|
|
|
|
|
|
|
392
|
8
|
|
|
|
|
50
|
foreach my $key (grep { $_ !~ m/^\s/ } keys %$self) { |
|
151
|
|
|
|
|
292
|
|
393
|
25
|
50
|
|
|
|
57
|
$tdict->{$key} = $self->{$key} unless defined $tdict->{$key}; |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
|
396
|
8
|
|
|
|
|
39
|
$fh->seek($self->{' epos'}, 0); |
397
|
8
|
|
|
|
|
89
|
$self->out_trailer($tdict, $self->{' update'}); |
398
|
8
|
|
|
|
|
34
|
close $self->{' OUTFILE'}; |
399
|
|
|
|
|
|
|
|
400
|
8
|
|
|
|
|
57
|
return; |
401
|
|
|
|
|
|
|
} # end of append_file() |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head2 $p->out_file($fname) |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Writes a PDF file to a file of the given filename, based on the current list of |
406
|
|
|
|
|
|
|
objects to be output. It creates the trailer dictionary based on information |
407
|
|
|
|
|
|
|
in C<$self>. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
$fname may be a string or an IO object. |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=cut |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
sub out_file { |
414
|
119
|
|
|
119
|
1
|
312
|
my ($self, $fname) = @_; |
415
|
|
|
|
|
|
|
|
416
|
119
|
|
|
|
|
403
|
$self = $self->create_file($fname); |
417
|
119
|
|
|
|
|
365
|
$self = $self->close_file(); |
418
|
|
|
|
|
|
|
|
419
|
119
|
|
|
|
|
368
|
return $self; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=head2 $p->create_file($fname) |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
Creates a new output file (no check is made of an existing open file) of |
425
|
|
|
|
|
|
|
the given filename or IO object. Note: make sure that C<< $p->{' version'} >> |
426
|
|
|
|
|
|
|
is set correctly before calling this function. |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=cut |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub create_file { |
431
|
119
|
|
|
119
|
1
|
303
|
my ($self, $filename) = @_; |
432
|
119
|
|
|
|
|
224
|
my $fh; |
433
|
|
|
|
|
|
|
|
434
|
119
|
|
|
|
|
274
|
$self->{' fname'} = $filename; |
435
|
119
|
50
|
|
|
|
344
|
if (ref $filename) { |
436
|
119
|
|
|
|
|
218
|
$fh = $filename; |
437
|
|
|
|
|
|
|
} else { |
438
|
0
|
|
0
|
|
|
0
|
$fh = IO::File->new(">$filename") || die "Unable to open $filename for writing"; |
439
|
0
|
|
|
|
|
0
|
binmode($fh,':raw'); |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
119
|
|
|
|
|
273
|
$self->{' OUTFILE'} = $fh; |
443
|
119
|
|
50
|
|
|
1524
|
$fh->print('%PDF-' . ($self->{' version'} || '1.4') . "\n"); |
444
|
119
|
|
|
|
|
1136
|
$fh->print("%\xC6\xCD\xCD\xB5\n"); # and some binary stuff in a comment. |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# PDF spec requires 4 or more "binary" bytes (128 or higher value) in a |
447
|
|
|
|
|
|
|
# comment immediately following the PDF-x.y header, to alert reader that |
448
|
|
|
|
|
|
|
# there is binary data. Actual values are apparently arbitrary. This DOES |
449
|
|
|
|
|
|
|
# mean that other comments can NOT be inserted between the header and the |
450
|
|
|
|
|
|
|
# binary comment! PDF::Builder always outputs this comment, so is always |
451
|
|
|
|
|
|
|
# claiming binary data (no harm done?). |
452
|
|
|
|
|
|
|
|
453
|
119
|
|
|
|
|
674
|
return $self; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head2 $p->close_file() |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
Closes up the open file for output, by outputting the trailer, etc. |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=cut |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
sub close_file { |
463
|
119
|
|
|
119
|
1
|
295
|
my $self = shift(); |
464
|
|
|
|
|
|
|
|
465
|
119
|
|
|
|
|
393
|
my $tdict = PDFDict(); |
466
|
119
|
50
|
|
|
|
503
|
$tdict->{'Info'} = $self->{'Info'} if defined $self->{'Info'}; |
467
|
119
|
50
|
33
|
|
|
520
|
$tdict->{'Root'} = (defined $self->{' newroot'} and $self->{' newroot'} ne '') ? $self->{' newroot'} : $self->{'Root'}; |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
# remove all freed objects from the outlist, AND the outlist_cache if not updating |
470
|
|
|
|
|
|
|
# NO! Don't do that thing! In fact, let out_trailer do the opposite! |
471
|
|
|
|
|
|
|
|
472
|
119
|
|
33
|
|
|
481
|
$tdict->{'Size'} = $self->{'Size'} || PDFNum(1); |
473
|
119
|
50
|
|
|
|
405
|
$tdict->{'Prev'} = PDFNum($self->{' loc'}) if $self->{' loc'}; |
474
|
119
|
50
|
|
|
|
403
|
if ($self->{' update'}) { |
475
|
0
|
|
|
|
|
0
|
foreach my $key (grep { $_ !~ m/^[\s\-]/ } keys %$self) { |
|
0
|
|
|
|
|
0
|
|
476
|
0
|
0
|
|
|
|
0
|
$tdict->{$key} = $self->{$key} unless defined $tdict->{$key}; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
|
479
|
0
|
|
|
|
|
0
|
my $fh = $self->{' INFILE'}; |
480
|
0
|
|
|
|
|
0
|
$fh->seek($self->{' epos'}, 0); |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
|
483
|
119
|
|
|
|
|
697
|
$self->out_trailer($tdict, $self->{' update'}); |
484
|
119
|
|
|
|
|
620
|
close($self->{' OUTFILE'}); |
485
|
119
|
50
|
33
|
|
|
600
|
if ($^O eq 'MacOS' and not ref($self->{' fname'})) { |
486
|
0
|
|
|
|
|
0
|
MacPerl::SetFileInfo('CARO', 'TEXT', $self->{' fname'}); |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
119
|
|
|
|
|
712
|
return $self; |
490
|
|
|
|
|
|
|
} # end of close_file() |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head2 ($value, $str) = $p->readval($str, %opts) |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Reads a PDF value from the current position in the file. If C<$str> is too |
495
|
|
|
|
|
|
|
short, read some more from the current location in the file until the whole |
496
|
|
|
|
|
|
|
object is read. This is a recursive call which may slurp in a whole big stream |
497
|
|
|
|
|
|
|
(unprocessed). |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Returns the recursive data structure read and also the current C<$str> that has |
500
|
|
|
|
|
|
|
been read from the file. |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=cut |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
sub readval { |
505
|
1304
|
|
|
1304
|
1
|
12578
|
my ($self, $str, %opts) = @_; |
506
|
1304
|
|
|
|
|
1792
|
my $fh = $self->{' INFILE'}; |
507
|
1304
|
|
|
|
|
1562
|
my ($result, $value); |
508
|
|
|
|
|
|
|
|
509
|
1304
|
100
|
|
|
|
2271
|
my $update = defined($opts{'update'}) ? $opts{'update'} : 1; |
510
|
1304
|
100
|
|
|
|
2535
|
$str = update($fh, $str) if $update; |
511
|
|
|
|
|
|
|
|
512
|
1304
|
|
|
|
|
3258
|
$str =~ s/^$ws_char+//; # Ignore initial white space |
513
|
1304
|
|
|
|
|
2733
|
$str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments |
514
|
|
|
|
|
|
|
|
515
|
1304
|
100
|
|
|
|
14933
|
if ($str =~ m/^<
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
516
|
|
|
|
|
|
|
# Dictionary |
517
|
134
|
|
|
|
|
323
|
$str = substr ($str, 2); |
518
|
134
|
100
|
|
|
|
334
|
$str = update($fh, $str) if $update; |
519
|
134
|
|
|
|
|
408
|
$result = PDFDict(); |
520
|
|
|
|
|
|
|
|
521
|
134
|
|
|
|
|
322
|
while ($str !~ m/^>>/) { |
522
|
374
|
|
|
|
|
1191
|
$str =~ s/^$ws_char+//; # Ignore initial white space |
523
|
374
|
|
|
|
|
959
|
$str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments |
524
|
|
|
|
|
|
|
|
525
|
374
|
50
|
|
|
|
1905
|
if ($str =~ s|^/($reg_char+)||) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
526
|
374
|
|
|
|
|
938
|
my $key = PDF::Builder::Basic::PDF::Name::name_to_string($1, $self); |
527
|
374
|
|
|
|
|
1388
|
($value, $str) = $self->readval($str, %opts); |
528
|
|
|
|
|
|
|
# per Vadim Repin (RT 131147) CHG 1. His conclusion is that |
529
|
|
|
|
|
|
|
# it is highly unlikely, but remotely possible, that there |
530
|
|
|
|
|
|
|
# could be legitimate use of Null objects that should NOT be |
531
|
|
|
|
|
|
|
# prevented from bubbling up. If such a case is discovered, we |
532
|
|
|
|
|
|
|
# might have to try Klaus Ethgen's more limited (in scope) |
533
|
|
|
|
|
|
|
# patch in ./Pages.pm. See full discussion in RT 131147 for |
534
|
|
|
|
|
|
|
# details on what's going on and how this fixes it. |
535
|
|
|
|
|
|
|
#$result->{$key} = $value; # original code |
536
|
374
|
50
|
|
|
|
1332
|
$result->{$key} = $value |
537
|
|
|
|
|
|
|
unless ref($value) eq 'PDF::Builder::Basic::PDF::Null'; |
538
|
|
|
|
|
|
|
} elsif ($str =~ s|^/$ws_char+||) { |
539
|
|
|
|
|
|
|
# fixes a broken key problem of acrobat. -- fredo |
540
|
0
|
|
|
|
|
0
|
($value, $str) = $self->readval($str, %opts); |
541
|
0
|
|
|
|
|
0
|
$result->{'null'} = $value; |
542
|
|
|
|
|
|
|
} elsif ($str =~ s|^//|/|) { |
543
|
|
|
|
|
|
|
# fixes again a broken key problem of illustrator/enfocus. -- fredo |
544
|
0
|
|
|
|
|
0
|
($value, $str) = $self->readval($str, %opts); |
545
|
0
|
|
|
|
|
0
|
$result->{'null'} = $value; |
546
|
|
|
|
|
|
|
} else { |
547
|
0
|
|
|
|
|
0
|
die "Invalid dictionary key"; |
548
|
|
|
|
|
|
|
} |
549
|
374
|
100
|
|
|
|
966
|
$str = update($fh, $str) if $update; # thanks gareth.jones@stud.man.ac.uk |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
134
|
|
|
|
|
399
|
$str =~ s/^>>//; |
553
|
134
|
100
|
|
|
|
375
|
$str = update($fh, $str) if $update; |
554
|
|
|
|
|
|
|
# streams can't be followed by a lone carriage-return. |
555
|
|
|
|
|
|
|
# fredo: yes they can !!! -- use the MacOS, Luke. |
556
|
|
|
|
|
|
|
# TBD isn't this covered by $cr as space CR? |
557
|
134
|
100
|
66
|
|
|
459
|
if (($str =~ s/^stream(?:(?:\015\012)|\012|\015)//) and ($result->{'Length'}->val() != 0)) { # stream |
558
|
11
|
|
|
|
|
40
|
my $length = $result->{'Length'}->val(); |
559
|
11
|
|
|
|
|
30
|
$result->{' streamsrc'} = $fh; |
560
|
11
|
|
|
|
|
41
|
$result->{' streamloc'} = $fh->tell() - length($str); |
561
|
|
|
|
|
|
|
|
562
|
11
|
50
|
|
|
|
96
|
unless ($opts{'nostreams'}) { |
563
|
11
|
50
|
|
|
|
33
|
if ($length > length($str)) { |
564
|
0
|
|
|
|
|
0
|
$value = $str; |
565
|
0
|
|
|
|
|
0
|
$length -= length($str); |
566
|
0
|
|
|
|
|
0
|
read $fh, $str, $length + 11; # slurp the whole stream! |
567
|
|
|
|
|
|
|
} else { |
568
|
11
|
|
|
|
|
22
|
$value = ''; |
569
|
|
|
|
|
|
|
} |
570
|
11
|
|
|
|
|
35
|
$value .= substr($str, 0, $length); |
571
|
11
|
|
|
|
|
30
|
$result->{' stream'} = $value; |
572
|
11
|
|
|
|
|
22
|
$result->{' nofilt'} = 1; |
573
|
11
|
50
|
|
|
|
41
|
$str = update($fh, $str, 1) if $update; # tell update we are in-stream and only need an endstream |
574
|
11
|
|
|
|
|
49
|
$str = substr($str, index($str, 'endstream') + 9); |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
|
578
|
134
|
100
|
100
|
|
|
519
|
if (defined $result->{'Type'} and defined $types{$result->{'Type'}->val()}) { |
579
|
31
|
|
|
|
|
105
|
bless $result, $types{$result->{'Type'}->val()}; |
580
|
|
|
|
|
|
|
} |
581
|
|
|
|
|
|
|
# gdj: FIXME: if any of the ws chars were crs, then the whole |
582
|
|
|
|
|
|
|
# string might not have been read. |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
} elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R/s) { |
585
|
|
|
|
|
|
|
# Indirect Object |
586
|
129
|
|
|
|
|
310
|
my $num = $1; |
587
|
129
|
|
|
|
|
222
|
$value = $2; |
588
|
129
|
|
|
|
|
1063
|
$str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R//s; |
589
|
129
|
100
|
|
|
|
377
|
unless ($result = $self->test_obj($num, $value)) { |
590
|
107
|
|
|
|
|
383
|
$result = PDF::Builder::Basic::PDF::Objind->new(); |
591
|
107
|
|
|
|
|
328
|
$result->{' objnum'} = $num; |
592
|
107
|
|
|
|
|
215
|
$result->{' objgen'} = $value; |
593
|
107
|
|
|
|
|
265
|
$self->add_obj($result, $num, $value); |
594
|
|
|
|
|
|
|
} |
595
|
129
|
|
|
|
|
242
|
$result->{' parent'} = $self; |
596
|
129
|
|
|
|
|
433
|
weaken $result->{' parent'}; |
597
|
|
|
|
|
|
|
#$result->{' realised'} = 0; |
598
|
|
|
|
|
|
|
# removed to address changes being lost when an indirect object |
599
|
|
|
|
|
|
|
# is realised twice |
600
|
|
|
|
|
|
|
# gdj: FIXME: if any of the ws chars were crs, then the whole |
601
|
|
|
|
|
|
|
# string might not have been read. |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
} elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj/s) { |
604
|
|
|
|
|
|
|
# Object |
605
|
86
|
|
|
|
|
165
|
my $obj; |
606
|
86
|
|
|
|
|
194
|
my $num = $1; |
607
|
86
|
|
|
|
|
158
|
$value = $2; |
608
|
86
|
|
|
|
|
950
|
$str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj//s; |
609
|
86
|
|
|
|
|
359
|
($obj, $str) = $self->readval($str, %opts); |
610
|
86
|
100
|
|
|
|
246
|
if ($result = $self->test_obj($num, $value)) { |
611
|
72
|
|
|
|
|
234
|
$result->merge($obj); |
612
|
|
|
|
|
|
|
} else { |
613
|
14
|
|
|
|
|
23
|
$result = $obj; |
614
|
14
|
|
|
|
|
39
|
$self->add_obj($result, $num, $value); |
615
|
14
|
|
|
|
|
29
|
$result->{' realised'} = 1; |
616
|
|
|
|
|
|
|
} |
617
|
86
|
100
|
|
|
|
243
|
$str = update($fh, $str) if $update; # thanks to kundrat@kundrat.sk |
618
|
86
|
|
|
|
|
515
|
$str =~ s/^endobj//; |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
} elsif ($str =~ m|^/($reg_char*)|s) { |
621
|
|
|
|
|
|
|
# Name |
622
|
522
|
|
|
|
|
1136
|
$value = $1; |
623
|
522
|
|
|
|
|
2080
|
$str =~ s|^/($reg_char*)||s; |
624
|
522
|
|
|
|
|
1485
|
$result = PDF::Builder::Basic::PDF::Name->from_pdf($value, $self); |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
} elsif ($str =~ m/^\(/) { |
627
|
|
|
|
|
|
|
# Literal String |
628
|
|
|
|
|
|
|
# We now need to find an unbalanced, unescaped right-paren. |
629
|
|
|
|
|
|
|
# This can't be done with a regex. |
630
|
1
|
|
|
|
|
4
|
my $value = '('; |
631
|
1
|
|
|
|
|
4
|
$str = substr($str, 1); |
632
|
|
|
|
|
|
|
|
633
|
1
|
|
|
|
|
3
|
my $nested_level = 1; |
634
|
1
|
|
|
|
|
2
|
while (1) { |
635
|
|
|
|
|
|
|
# Ignore everything up to the first escaped or parenthesis character |
636
|
1
|
50
|
|
|
|
7
|
if ($str =~ /^([^\\()]+)(.*)/s) { |
637
|
1
|
|
|
|
|
4
|
$value .= $1; |
638
|
1
|
|
|
|
|
3
|
$str = $2; |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
# Ignore escaped parentheses |
642
|
1
|
50
|
|
|
|
16
|
if ($str =~ /^(\\[()])/) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
643
|
0
|
|
|
|
|
0
|
$value .= $1; |
644
|
0
|
|
|
|
|
0
|
$str = substr($str, 2); |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
} elsif ($str =~ /^\(/) { |
647
|
|
|
|
|
|
|
# Left parenthesis: increase nesting |
648
|
0
|
|
|
|
|
0
|
$value .= '('; |
649
|
0
|
|
|
|
|
0
|
$str = substr($str, 1); |
650
|
0
|
|
|
|
|
0
|
$nested_level++; |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
} elsif ($str =~ /^\)/) { |
653
|
|
|
|
|
|
|
# Right parenthesis: decrease nesting |
654
|
1
|
|
|
|
|
3
|
$value .= ')'; |
655
|
1
|
|
|
|
|
3
|
$str = substr($str, 1); |
656
|
1
|
|
|
|
|
2
|
$nested_level--; |
657
|
1
|
50
|
|
|
|
12
|
last unless $nested_level; |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
} elsif ($str =~ /^(\\[^()])/) { |
660
|
|
|
|
|
|
|
# Other escaped character |
661
|
0
|
|
|
|
|
0
|
$value .= $1; |
662
|
0
|
|
|
|
|
0
|
$str = substr($str, 2); |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
} else { |
665
|
|
|
|
|
|
|
# If there wasn't an escaped or parenthesis character, |
666
|
|
|
|
|
|
|
# read some more. |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
# We don't use update because we don't want to remove |
669
|
|
|
|
|
|
|
# whitespace or comments. |
670
|
0
|
0
|
|
|
|
0
|
$fh->read($str, 255, length($str)) or die 'Unterminated string.'; |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
} # end while(TRUE) loop |
673
|
|
|
|
|
|
|
|
674
|
1
|
|
|
|
|
5
|
$result = PDF::Builder::Basic::PDF::String->from_pdf($value); |
675
|
|
|
|
|
|
|
# end Literal String check |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
} elsif ($str =~ m/^) { |
678
|
|
|
|
|
|
|
# Hex String |
679
|
0
|
|
|
|
|
0
|
$str =~ s/^/; |
680
|
0
|
|
|
|
|
0
|
$fh->read($str, 255, length($str)) while (0 > index($str, '>')); |
681
|
0
|
|
|
|
|
0
|
($value, $str) = ($str =~ /^(.*?)>(.*)/s); |
682
|
0
|
|
|
|
|
0
|
$result = PDF::Builder::Basic::PDF::String->from_pdf('<' . $value . '>'); |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
} elsif ($str =~ m/^\[/) { |
685
|
|
|
|
|
|
|
# Array |
686
|
83
|
|
|
|
|
305
|
$str =~ s/^\[//; |
687
|
83
|
50
|
|
|
|
237
|
$str = update($fh, $str) if $update; |
688
|
83
|
|
|
|
|
257
|
$result = PDFArray(); |
689
|
83
|
|
|
|
|
231
|
while ($str !~ m/^\]/) { |
690
|
729
|
|
|
|
|
1826
|
$str =~ s/^$ws_char+//; # Ignore initial white space |
691
|
729
|
|
|
|
|
1454
|
$str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments |
692
|
|
|
|
|
|
|
|
693
|
729
|
|
|
|
|
1600
|
($value, $str) = $self->readval($str, %opts); |
694
|
729
|
|
|
|
|
1900
|
$result->add_elements($value); |
695
|
729
|
50
|
|
|
|
1334
|
$str = update($fh, $str) if $update; # str might just be exhausted! |
696
|
|
|
|
|
|
|
} |
697
|
83
|
|
|
|
|
263
|
$str =~ s/^\]//; |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
} elsif ($str =~ m/^(true|false)($irreg_char|$)/) { |
700
|
|
|
|
|
|
|
# Boolean |
701
|
0
|
|
|
|
|
0
|
$value = $1; |
702
|
0
|
|
|
|
|
0
|
$str =~ s/^(?:true|false)//; |
703
|
0
|
|
|
|
|
0
|
$result = PDF::Builder::Basic::PDF::Bool->from_pdf($value); |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
} elsif ($str =~ m/^([+-.0-9]+)($irreg_char|$)/) { |
706
|
|
|
|
|
|
|
# Number |
707
|
349
|
|
|
|
|
699
|
$value = $1; |
708
|
349
|
|
|
|
|
949
|
$str =~ s/^([+-.0-9]+)//; |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
# If $str only consists of whitespace (or is empty), call update to |
711
|
|
|
|
|
|
|
# see if this is the beginning of an indirect object or reference |
712
|
349
|
100
|
100
|
|
|
3442
|
if ($update and ($str =~ /^$re_whitespace*$/s or $str =~ /^$re_whitespace+[0-9]+$re_whitespace*$/s)) { |
|
|
|
100
|
|
|
|
|
713
|
6
|
|
|
|
|
41
|
$str =~ s/^$re_whitespace+/ /s; |
714
|
6
|
|
|
|
|
59
|
$str =~ s/$re_whitespace+$/ /s; |
715
|
6
|
|
|
|
|
64
|
$str = update($fh, $str); |
716
|
6
|
100
|
|
|
|
89
|
if ($str =~ m/^$re_whitespace*([0-9]+)$re_whitespace+(?:R|obj)/s) { |
717
|
4
|
|
|
|
|
19
|
return $self->readval("$value $str", %opts); |
718
|
|
|
|
|
|
|
} |
719
|
|
|
|
|
|
|
} |
720
|
|
|
|
|
|
|
|
721
|
345
|
|
|
|
|
1057
|
$result = PDF::Builder::Basic::PDF::Number->from_pdf($value); |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
} elsif ($str =~ m/^null($irreg_char|$)/) { |
724
|
|
|
|
|
|
|
# Null |
725
|
0
|
|
|
|
|
0
|
$str =~ s/^null//; |
726
|
0
|
|
|
|
|
0
|
$result = PDF::Builder::Basic::PDF::Null->new(); |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
} else { |
729
|
0
|
|
|
|
|
0
|
die "Can't parse `$str' near " . ($fh->tell()) . " length " . length($str) . "."; |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
|
732
|
1300
|
|
|
|
|
4992
|
$str =~ s/^$ws_char+//s; |
733
|
1300
|
|
|
|
|
3808
|
return ($result, $str); |
734
|
|
|
|
|
|
|
} # end of readval() |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
=head2 $ref = $p->read_obj($objind, %opts) |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
Given an indirect object reference, locate it and read the object returning |
739
|
|
|
|
|
|
|
the read in object. |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=cut |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
sub read_obj { |
744
|
68
|
|
|
68
|
1
|
145
|
my ($self, $objind, %opts) = @_; |
745
|
|
|
|
|
|
|
|
746
|
68
|
|
50
|
|
|
222
|
my $res = $self->read_objnum($objind->{' objnum'}, $objind->{' objgen'}, %opts) || return; |
747
|
68
|
50
|
|
|
|
249
|
$objind->merge($res) unless $objind eq $res; |
748
|
|
|
|
|
|
|
|
749
|
68
|
|
|
|
|
230
|
return $objind; |
750
|
|
|
|
|
|
|
} |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
=head2 $ref = $p->read_objnum($num, $gen, %opts) |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
Returns a fully read object of given number and generation in this file |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
=cut |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
sub read_objnum { |
759
|
76
|
|
|
76
|
1
|
2550
|
my ($self, $num, $gen, %opts) = @_; |
760
|
|
|
|
|
|
|
|
761
|
76
|
50
|
|
|
|
224
|
croak 'Undefined object number in call to read_objnum($num, $gen)' unless defined $num; |
762
|
76
|
50
|
|
|
|
166
|
croak 'Undefined object generation in call to read_objnum($num, $gen)' unless defined $gen; |
763
|
76
|
50
|
|
|
|
360
|
croak "Invalid object number '$num' in call to read_objnum" unless $num =~ /^[0-9]+$/; |
764
|
76
|
50
|
|
|
|
242
|
croak "Invalid object generation '$gen' in call to read_objnum" unless $gen =~ /^[0-9]+$/; |
765
|
|
|
|
|
|
|
|
766
|
76
|
|
50
|
|
|
223
|
my $object_location = $self->locate_obj($num, $gen) || return; |
767
|
76
|
|
|
|
|
125
|
my $object; |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
# Compressed object |
770
|
76
|
100
|
|
|
|
155
|
if (ref($object_location)) { |
771
|
4
|
|
|
|
|
9
|
my ($object_stream_num, $object_stream_pos) = @{$object_location}; |
|
4
|
|
|
|
|
10
|
|
772
|
|
|
|
|
|
|
|
773
|
4
|
|
|
|
|
50
|
my $object_stream = $self->read_objnum($object_stream_num, 0, %opts); |
774
|
4
|
50
|
|
|
|
14
|
die 'Cannot find the compressed object stream' unless $object_stream; |
775
|
|
|
|
|
|
|
|
776
|
4
|
50
|
|
|
|
33
|
$object_stream->read_stream() if $object_stream->{' nofilt'}; |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
# An object stream starts with pairs of integers containing object numbers and |
779
|
|
|
|
|
|
|
# stream offsets relative to the First key |
780
|
4
|
|
|
|
|
6
|
my $fh; |
781
|
|
|
|
|
|
|
my $pairs; |
782
|
4
|
50
|
|
|
|
16
|
unless ($object_stream->{' streamfile'}) { |
783
|
4
|
|
|
|
|
15
|
$pairs = substr($object_stream->{' stream'}, 0, $object_stream->{'First'}->val()); |
784
|
|
|
|
|
|
|
} else { |
785
|
0
|
|
|
|
|
0
|
CORE::open($fh, '<', $object_stream->{' streamfile'}); |
786
|
0
|
|
|
|
|
0
|
read($fh, $pairs, $object_stream->{'First'}->val()); |
787
|
|
|
|
|
|
|
} |
788
|
4
|
|
|
|
|
29
|
my @map = split /\s+/, $pairs; |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
# Find the offset of the object in the stream |
791
|
4
|
|
|
|
|
13
|
my $index = $object_stream_pos * 2; |
792
|
4
|
50
|
|
|
|
16
|
die "Objind $num does not exist at index $index" unless $map[$index] == $num; |
793
|
4
|
|
|
|
|
10
|
my $start = $map[$index + 1]; |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# Unless this is the last object in the stream, its length is |
796
|
|
|
|
|
|
|
# determined by the offset of the next object. |
797
|
4
|
|
|
|
|
18
|
my $last_object_in_stream = $map[-2]; |
798
|
4
|
|
|
|
|
8
|
my $length; |
799
|
4
|
100
|
|
|
|
12
|
if ($last_object_in_stream == $num) { |
800
|
2
|
50
|
|
|
|
22
|
if ($object_stream->{' stream'}) { |
801
|
2
|
|
|
|
|
11
|
$length = length($object_stream->{' stream'}) - $object_stream->{'First'}->val() - $start; |
802
|
|
|
|
|
|
|
} else { |
803
|
0
|
|
|
|
|
0
|
$length = (-s $object_stream->{' streamfile'}) - $object_stream->{'First'}->val() - $start; |
804
|
|
|
|
|
|
|
} |
805
|
|
|
|
|
|
|
} else { |
806
|
2
|
|
|
|
|
7
|
my $next_start = $map[$index + 3]; |
807
|
2
|
|
|
|
|
7
|
$length = $next_start - $start; |
808
|
|
|
|
|
|
|
} |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
# Read the object from the stream |
811
|
4
|
|
|
|
|
22
|
my $stream = "$num 0 obj "; |
812
|
4
|
50
|
|
|
|
13
|
unless ($object_stream->{' streamfile'}) { |
813
|
4
|
|
|
|
|
13
|
$stream .= substr($object_stream->{' stream'}, $object_stream->{'First'}->val() + $start, $length); |
814
|
|
|
|
|
|
|
} else { |
815
|
0
|
|
|
|
|
0
|
seek($fh, $object_stream->{'First'}->val() + $start, 0); |
816
|
0
|
|
|
|
|
0
|
read($fh, $stream, $length, length($stream)); |
817
|
0
|
|
|
|
|
0
|
close $fh; |
818
|
|
|
|
|
|
|
} |
819
|
|
|
|
|
|
|
|
820
|
4
|
|
|
|
|
20
|
($object) = $self->readval($stream, %opts, update => 0); |
821
|
4
|
|
|
|
|
21
|
return $object; |
822
|
|
|
|
|
|
|
} |
823
|
|
|
|
|
|
|
|
824
|
72
|
|
|
|
|
250
|
my $current_location = $self->{' INFILE'}->tell(); |
825
|
72
|
|
|
|
|
496
|
$self->{' INFILE'}->seek($object_location, 0); |
826
|
72
|
|
|
|
|
372
|
($object) = $self->readval('', %opts); |
827
|
72
|
|
|
|
|
343
|
$self->{' INFILE'}->seek($current_location, 0); |
828
|
|
|
|
|
|
|
|
829
|
72
|
|
|
|
|
503
|
return $object; |
830
|
|
|
|
|
|
|
} # end of read_objnum() |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=head2 $objind = $p->new_obj($obj) |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
Creates a new, free object reference based on free space in the cross reference |
835
|
|
|
|
|
|
|
chain. If nothing is free, then think up a new number. If C<$obj>, then turns |
836
|
|
|
|
|
|
|
that object into this new object rather than returning a new object. |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
=cut |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
sub new_obj { |
841
|
1020
|
|
|
1020
|
1
|
2143
|
my ($self, $base) = @_; |
842
|
1020
|
|
|
|
|
1653
|
my $res; |
843
|
|
|
|
|
|
|
|
844
|
1020
|
50
|
66
|
|
|
2859
|
if (defined $self->{' free'} and scalar @{$self->{' free'}} > 0) { |
|
10
|
|
|
|
|
50
|
|
845
|
0
|
|
|
|
|
0
|
$res = shift(@{$self->{' free'}}); |
|
0
|
|
|
|
|
0
|
|
846
|
0
|
0
|
|
|
|
0
|
if (defined $base) { |
847
|
0
|
|
|
|
|
0
|
my ($num, $gen) = @{$self->{' objects'}{$res->uid()}}; |
|
0
|
|
|
|
|
0
|
|
848
|
0
|
|
|
|
|
0
|
$self->remove_obj($res); |
849
|
0
|
|
|
|
|
0
|
$self->add_obj($base, $num, $gen); |
850
|
0
|
|
|
|
|
0
|
return $self->out_obj($base); |
851
|
|
|
|
|
|
|
} else { |
852
|
0
|
|
|
|
|
0
|
$self->{' objects'}{$res->uid()}[2] = 0; |
853
|
0
|
|
|
|
|
0
|
return $res; |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
} |
856
|
|
|
|
|
|
|
|
857
|
1020
|
|
|
|
|
1693
|
my $tdict = $self; |
858
|
1020
|
|
|
|
|
1575
|
my $i; |
859
|
1020
|
|
|
|
|
2447
|
while (defined $tdict) { |
860
|
1021
|
50
|
|
|
|
3364
|
$i = $tdict->{' xref'}{defined($i) ? $i : ''}[0]; |
861
|
1021
|
|
33
|
|
|
2838
|
while (defined $i and $i != 0) { |
862
|
0
|
|
|
|
|
0
|
my ($ni, $ng) = @{$tdict->{' xref'}{$i}}; |
|
0
|
|
|
|
|
0
|
|
863
|
0
|
0
|
|
|
|
0
|
unless (defined $self->locate_obj($i, $ng)) { |
864
|
0
|
0
|
|
|
|
0
|
if (defined $base) { |
865
|
0
|
|
|
|
|
0
|
$self->add_obj($base, $i, $ng); |
866
|
0
|
|
|
|
|
0
|
return $base; |
867
|
|
|
|
|
|
|
} else { |
868
|
0
|
|
0
|
|
|
0
|
$res = $self->test_obj($i, $ng) || $self->add_obj(PDF::Builder::Basic::PDF::Objind->new(), $i, $ng); |
869
|
0
|
|
|
|
|
0
|
$self->out_obj($res); |
870
|
0
|
|
|
|
|
0
|
return $res; |
871
|
|
|
|
|
|
|
} |
872
|
|
|
|
|
|
|
} |
873
|
0
|
|
|
|
|
0
|
$i = $ni; |
874
|
|
|
|
|
|
|
} |
875
|
1021
|
|
|
|
|
2306
|
$tdict = $tdict->{' prev'}; |
876
|
|
|
|
|
|
|
} |
877
|
|
|
|
|
|
|
|
878
|
1020
|
|
|
|
|
2030
|
$i = $self->{' maxobj'}++; |
879
|
1020
|
50
|
|
|
|
2124
|
if (defined $base) { |
880
|
1020
|
|
|
|
|
3220
|
$self->add_obj($base, $i, 0); |
881
|
1020
|
|
|
|
|
2939
|
$self->out_obj($base); |
882
|
1020
|
|
|
|
|
2303
|
return $base; |
883
|
|
|
|
|
|
|
} else { |
884
|
0
|
|
|
|
|
0
|
$res = $self->add_obj(PDF::Builder::Basic::PDF::Objind->new(), $i, 0); |
885
|
0
|
|
|
|
|
0
|
$self->out_obj($res); |
886
|
0
|
|
|
|
|
0
|
return $res; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
} |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
=head2 $p->out_obj($obj) |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
Indicates that the given object reference should appear in the output xref |
893
|
|
|
|
|
|
|
table whether with data or freed. |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
=cut |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
sub out_obj { |
898
|
2246
|
|
|
2246
|
1
|
4160
|
my ($self, $obj) = @_; |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
# This is why we've been keeping the outlist CACHE around; to speed |
901
|
|
|
|
|
|
|
# up this method by orders of magnitude (it saves up from having to |
902
|
|
|
|
|
|
|
# grep the full outlist each time through as we'll just do a lookup |
903
|
|
|
|
|
|
|
# in the hash) (which is super-fast). |
904
|
2246
|
100
|
|
|
|
6263
|
unless (exists $self->{' outlist_cache'}{$obj}) { |
905
|
1037
|
|
|
|
|
1627
|
push @{$self->{' outlist'}}, $obj; |
|
1037
|
|
|
|
|
2637
|
|
906
|
|
|
|
|
|
|
# weaken $self->{' outlist'}->[-1]; |
907
|
1037
|
|
|
|
|
3269
|
$self->{' outlist_cache'}{$obj} = 1; |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
|
910
|
2246
|
|
|
|
|
4174
|
return $obj; |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
=head2 $p->free_obj($obj) |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
Marks an object reference for output as being freed. |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
=cut |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
sub free_obj { |
920
|
0
|
|
|
0
|
1
|
0
|
my ($self, $obj) = @_; |
921
|
|
|
|
|
|
|
|
922
|
0
|
|
|
|
|
0
|
push @{$self->{' free'}}, $obj; |
|
0
|
|
|
|
|
0
|
|
923
|
0
|
|
|
|
|
0
|
$self->{' objects'}{$obj->uid()}[2] = 1; |
924
|
0
|
|
|
|
|
0
|
$self->out_obj($obj); |
925
|
|
|
|
|
|
|
|
926
|
0
|
|
|
|
|
0
|
return; |
927
|
|
|
|
|
|
|
} |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
=head2 $p->remove_obj($objind) |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
Removes the object from all places where we might remember it. |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
=cut |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
sub remove_obj { |
936
|
0
|
|
|
0
|
1
|
0
|
my ($self, $objind) = @_; |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
# who says it has to be fast |
939
|
0
|
|
|
|
|
0
|
delete $self->{' objects'}{$objind->uid()}; |
940
|
0
|
|
|
|
|
0
|
delete $self->{' outlist_cache'}{$objind}; |
941
|
0
|
|
|
|
|
0
|
delete $self->{' printed_cache'}{$objind}; |
942
|
0
|
|
|
|
|
0
|
@{$self->{' outlist'}} = grep { $_ ne $objind } @{$self->{' outlist'}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
943
|
0
|
|
|
|
|
0
|
@{$self->{' printed'}} = grep { $_ ne $objind } @{$self->{' printed'}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
944
|
|
|
|
|
|
|
$self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} = undef |
945
|
0
|
0
|
|
|
|
0
|
if $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} eq $objind; |
946
|
|
|
|
|
|
|
|
947
|
0
|
|
|
|
|
0
|
return $self; |
948
|
|
|
|
|
|
|
} |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
=head2 $p->ship_out(@objects) |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
=head2 $p->ship_out() |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
Ships the given objects (or all objects for output if C<@objects> is empty) to |
955
|
|
|
|
|
|
|
the currently open output file (assuming there is one). Freed objects are not |
956
|
|
|
|
|
|
|
shipped, and once an object is shipped it is switched such that this file |
957
|
|
|
|
|
|
|
becomes its source and it will not be shipped again unless out_obj is called |
958
|
|
|
|
|
|
|
again. Notice that a shipped out object can be re-output or even freed, but |
959
|
|
|
|
|
|
|
that it will not cause the data already output to be changed. |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
=cut |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
sub ship_out { |
964
|
130
|
|
|
130
|
1
|
312
|
my ($self, @objects) = @_; |
965
|
|
|
|
|
|
|
|
966
|
130
|
50
|
|
|
|
370
|
return unless defined $self->{' OUTFILE'}; |
967
|
130
|
|
|
|
|
259
|
my $fh = $self->{' OUTFILE'}; |
968
|
130
|
|
|
|
|
388
|
seek($fh, 0, 2); # go to the end of the file |
969
|
|
|
|
|
|
|
|
970
|
130
|
50
|
|
|
|
412
|
@objects = @{$self->{' outlist'}} unless scalar @objects > 0; |
|
130
|
|
|
|
|
415
|
|
971
|
130
|
|
|
|
|
340
|
foreach my $objind (@objects) { |
972
|
752
|
50
|
|
|
|
2308
|
next unless $objind->is_obj($self); |
973
|
752
|
|
|
|
|
1354
|
my $j = -1; |
974
|
752
|
|
|
|
|
1288
|
for (my $i = 0; $i < scalar @{$self->{' outlist'}}; $i++) { |
|
752
|
|
|
|
|
1849
|
|
975
|
752
|
50
|
|
|
|
2340
|
if ($self->{' outlist'}[$i] eq $objind) { |
976
|
752
|
|
|
|
|
1171
|
$j = $i; |
977
|
752
|
|
|
|
|
1418
|
last; |
978
|
|
|
|
|
|
|
} |
979
|
|
|
|
|
|
|
} |
980
|
752
|
50
|
|
|
|
1621
|
next if $j < 0; |
981
|
752
|
|
|
|
|
1167
|
splice(@{$self->{' outlist'}}, $j, 1); |
|
752
|
|
|
|
|
1490
|
|
982
|
752
|
|
|
|
|
2112
|
delete $self->{' outlist_cache'}{$objind}; |
983
|
752
|
50
|
|
|
|
1190
|
next if grep { $_ eq $objind } @{$self->{' free'}}; |
|
0
|
|
|
|
|
0
|
|
|
752
|
|
|
|
|
2054
|
|
984
|
|
|
|
|
|
|
|
985
|
752
|
50
|
|
|
|
1620
|
map { $fh->print("\% $_ \n") } split(/$cr/, $objind->{' comments'}) if $objind->{' comments'}; |
|
0
|
|
|
|
|
0
|
|
986
|
752
|
|
|
|
|
2271
|
$self->{' locs'}{$objind->uid()} = $fh->tell(); |
987
|
752
|
|
|
|
|
1730
|
my ($objnum, $objgen) = @{$self->{' objects'}{$objind->uid()}}[0..1]; |
|
752
|
|
|
|
|
1593
|
|
988
|
752
|
|
|
|
|
2532
|
$fh->printf('%d %d obj ', $objnum, $objgen); |
989
|
752
|
|
|
|
|
8453
|
$objind->outobjdeep($fh, $self); |
990
|
752
|
|
|
|
|
2017
|
$fh->print("\nendobj\n"); |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
# Note that we've output this obj, not forgetting to update |
993
|
|
|
|
|
|
|
# the cache of what's printed. |
994
|
752
|
50
|
|
|
|
5270
|
unless (exists $self->{' printed_cache'}{$objind}) { |
995
|
752
|
|
|
|
|
1101
|
push @{$self->{' printed'}}, $objind; |
|
752
|
|
|
|
|
2202
|
|
996
|
752
|
|
|
|
|
2783
|
$self->{' printed_cache'}{$objind}++; |
997
|
|
|
|
|
|
|
} |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
|
1000
|
130
|
|
|
|
|
384
|
return $self; |
1001
|
|
|
|
|
|
|
} # end of ship_out() |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
=head2 $p->copy($outpdf, \&filter) |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
Iterates over every object in the file reading the object, calling C |
1006
|
|
|
|
|
|
|
with the object, and outputting the result. If C is not defined, |
1007
|
|
|
|
|
|
|
just copies input to output. |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
=cut |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
sub copy { |
1012
|
0
|
|
|
0
|
1
|
0
|
my ($self, $outpdf, $filter) = @_; |
1013
|
0
|
|
|
|
|
0
|
my ($obj, $minl, $mini, $ming); |
1014
|
|
|
|
|
|
|
|
1015
|
0
|
|
|
|
|
0
|
foreach my $key (grep { not m/^[\s\-]/ } keys %$self) { |
|
0
|
|
|
|
|
0
|
|
1016
|
0
|
0
|
|
|
|
0
|
$outpdf->{$key} = $self->{$key} unless defined $outpdf->{$key}; |
1017
|
|
|
|
|
|
|
} |
1018
|
|
|
|
|
|
|
|
1019
|
0
|
|
|
|
|
0
|
my $tdict = $self; |
1020
|
0
|
|
|
|
|
0
|
while (defined $tdict) { |
1021
|
0
|
|
|
|
|
0
|
foreach my $i (sort {$a <=> $b} keys %{$tdict->{' xref'}}) { |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1022
|
0
|
|
|
|
|
0
|
my ($nl, $ng, $nt) = @{$tdict->{' xref'}{$i}}; |
|
0
|
|
|
|
|
0
|
|
1023
|
0
|
0
|
|
|
|
0
|
next unless $nt eq 'n'; |
1024
|
|
|
|
|
|
|
|
1025
|
0
|
0
|
0
|
|
|
0
|
if ($nl < $minl or $mini == 0) { |
1026
|
0
|
|
|
|
|
0
|
$mini = $i; |
1027
|
0
|
|
|
|
|
0
|
$ming = $ng; |
1028
|
0
|
|
|
|
|
0
|
$minl = $nl; |
1029
|
|
|
|
|
|
|
} |
1030
|
0
|
0
|
|
|
|
0
|
unless ($obj = $self->test_obj($i, $ng)) { |
1031
|
0
|
|
|
|
|
0
|
$obj = PDF::Builder::Basic::PDF::Objind->new(); |
1032
|
0
|
|
|
|
|
0
|
$obj->{' objnum'} = $i; |
1033
|
0
|
|
|
|
|
0
|
$obj->{' objgen'} = $ng; |
1034
|
0
|
|
|
|
|
0
|
$self->add_obj($obj, $i, $ng); |
1035
|
0
|
|
|
|
|
0
|
$obj->{' parent'} = $self; |
1036
|
0
|
|
|
|
|
0
|
weaken $obj->{' parent'}; |
1037
|
0
|
|
|
|
|
0
|
$obj->{' realised'} = 0; |
1038
|
|
|
|
|
|
|
} |
1039
|
0
|
|
|
|
|
0
|
$obj->realise(); |
1040
|
0
|
0
|
|
|
|
0
|
my $res = defined $filter ? &{$filter}($obj) : $obj; |
|
0
|
|
|
|
|
0
|
|
1041
|
0
|
0
|
0
|
|
|
0
|
$outpdf->new_obj($res) unless (!$res || $res->is_obj($outpdf)); |
1042
|
|
|
|
|
|
|
} |
1043
|
0
|
|
|
|
|
0
|
$tdict = $tdict->{' prev'}; |
1044
|
|
|
|
|
|
|
} |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
# test for linearized and remove it from output |
1047
|
0
|
|
|
|
|
0
|
$obj = $self->test_obj($mini, $ming); |
1048
|
0
|
0
|
0
|
|
|
0
|
if ($obj->isa('PDF::Builder::Basic::PDF::Dict') && $obj->{'Linearized'}) { |
1049
|
0
|
|
|
|
|
0
|
$outpdf->free_obj($obj); |
1050
|
|
|
|
|
|
|
} |
1051
|
|
|
|
|
|
|
|
1052
|
0
|
|
|
|
|
0
|
return $self; |
1053
|
|
|
|
|
|
|
} # end of copy() |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
=head1 PRIVATE METHODS & FUNCTIONS |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
The following methods and functions are considered B to this class. |
1058
|
|
|
|
|
|
|
This does not mean you cannot use them if you have a need, just that they |
1059
|
|
|
|
|
|
|
aren't really designed for users of this class. |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
=head2 $offset = $p->locate_obj($num, $gen) |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
Returns a file offset to the object asked for by following the chain of cross |
1064
|
|
|
|
|
|
|
reference tables until it finds the one you want. |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
=cut |
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
sub locate_obj { |
1069
|
76
|
|
|
76
|
1
|
154
|
my ($self, $num, $gen) = @_; |
1070
|
|
|
|
|
|
|
|
1071
|
76
|
|
|
|
|
116
|
my $tdict = $self; |
1072
|
76
|
|
|
|
|
157
|
while (defined $tdict) { |
1073
|
85
|
100
|
|
|
|
268
|
if (ref $tdict->{' xref'}{$num}) { |
1074
|
76
|
|
|
|
|
124
|
my $ref = $tdict->{' xref'}{$num}; |
1075
|
76
|
100
|
|
|
|
176
|
return $ref unless scalar(@$ref) == 3; |
1076
|
|
|
|
|
|
|
|
1077
|
72
|
50
|
|
|
|
185
|
if ($ref->[1] == $gen) { |
1078
|
72
|
50
|
|
|
|
345
|
return $ref->[0] if $ref->[2] eq 'n'; |
1079
|
0
|
|
|
|
|
0
|
return; # if $ref->[2] eq 'f'; |
1080
|
|
|
|
|
|
|
} |
1081
|
|
|
|
|
|
|
} |
1082
|
9
|
|
|
|
|
19
|
$tdict = $tdict->{' prev'}; |
1083
|
|
|
|
|
|
|
} |
1084
|
|
|
|
|
|
|
|
1085
|
0
|
|
|
|
|
0
|
return; |
1086
|
|
|
|
|
|
|
} |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
=head2 update($fh, $str, $instream) |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
Keeps reading C<$fh> for more data to ensure that C<$str> has at least a line |
1091
|
|
|
|
|
|
|
full for C to work on. At this point we also take the opportunity to |
1092
|
|
|
|
|
|
|
ignore comments. |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
=cut |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
sub update { |
1097
|
2859
|
|
|
2859
|
1
|
4755
|
my ($fh, $str, $instream) = @_; |
1098
|
|
|
|
|
|
|
|
1099
|
2859
|
50
|
|
|
|
4264
|
print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug; |
1100
|
2859
|
100
|
|
|
|
3896
|
if ($instream) { |
1101
|
|
|
|
|
|
|
# we are inside a (possible binary) stream |
1102
|
|
|
|
|
|
|
# so we fetch data till we see an 'endstream' |
1103
|
|
|
|
|
|
|
# -- fredo/2004-09-03 |
1104
|
11
|
|
33
|
|
|
71
|
while ($str !~ m/endstream/ and not $fh->eof()) { |
1105
|
0
|
0
|
|
|
|
0
|
print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug; |
1106
|
0
|
|
|
|
|
0
|
$fh->read($str, 314, length($str)); |
1107
|
|
|
|
|
|
|
} |
1108
|
|
|
|
|
|
|
} else { |
1109
|
2848
|
|
|
|
|
10007
|
$str =~ s/^$ws_char*//; |
1110
|
2848
|
|
100
|
|
|
115631
|
while ($str !~ m/$cr/ and not $fh->eof()) { |
1111
|
107
|
50
|
|
|
|
909
|
print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug; |
1112
|
107
|
|
|
|
|
385
|
$fh->read($str, 314, length($str)); |
1113
|
107
|
|
|
|
|
3702
|
$str =~ s/^$ws_char*//so; |
1114
|
|
|
|
|
|
|
} |
1115
|
2848
|
|
|
|
|
6114
|
while ($str =~ m/^\%/) { # restructured by fredo/2003-03-23 |
1116
|
1
|
50
|
|
|
|
4
|
print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug; |
1117
|
1
|
|
33
|
|
|
32
|
$fh->read($str, 314, length($str)) while ($str !~ m/$cr/ and not $fh->eof()); |
1118
|
1
|
|
|
|
|
25
|
$str =~ s/^\%[^\015\012]*$ws_char*//so; # fixed for reportlab -- fredo |
1119
|
|
|
|
|
|
|
} |
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
2859
|
|
|
|
|
5846
|
return $str; |
1123
|
|
|
|
|
|
|
} # end of update() |
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
=head2 $objind = $p->test_obj($num, $gen) |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
Tests the cache to see whether an object reference (which may or may not have |
1128
|
|
|
|
|
|
|
been getobj()ed) has been cached. Returns it if it has. |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
=cut |
1131
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
sub test_obj { |
1133
|
215
|
|
|
215
|
1
|
422
|
my ($self, $num, $gen) = @_; |
1134
|
|
|
|
|
|
|
|
1135
|
215
|
|
|
|
|
827
|
return $self->{' objcache'}{$num, $gen}; |
1136
|
|
|
|
|
|
|
} |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
=head2 $p->add_obj($objind) |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
Adds the given object to the internal object cache. |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
=cut |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
sub add_obj { |
1145
|
1141
|
|
|
1141
|
1
|
2543
|
my ($self, $obj, $num, $gen) = @_; |
1146
|
|
|
|
|
|
|
|
1147
|
1141
|
|
|
|
|
4305
|
$self->{' objcache'}{$num, $gen} = $obj; |
1148
|
1141
|
|
|
|
|
5096
|
$self->{' objects'}{$obj->uid()} = [$num, $gen]; |
1149
|
|
|
|
|
|
|
# weaken $self->{' objcache'}{$num, $gen}; |
1150
|
1141
|
|
|
|
|
2325
|
return $obj; |
1151
|
|
|
|
|
|
|
} |
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
=head2 $tdict = $p->readxrtr($xpos, %options) |
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
Recursive function which reads each of the cross-reference and trailer tables |
1156
|
|
|
|
|
|
|
in turn until there are no more. |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
Returns a dictionary corresponding to the trailer chain. Each trailer also |
1159
|
|
|
|
|
|
|
includes the corresponding cross-reference table. |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
The structure of the xref private element in a trailer dictionary is of an |
1162
|
|
|
|
|
|
|
anonymous hash of cross reference elements by object number. Each element |
1163
|
|
|
|
|
|
|
consists of an array of 3 elements corresponding to the three elements read |
1164
|
|
|
|
|
|
|
in [location, generation number, free or used]. See the PDF specification |
1165
|
|
|
|
|
|
|
for details. |
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
See C for options allowed. |
1168
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
=cut |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
sub _unpack_xref_stream { |
1172
|
78
|
|
|
78
|
|
147
|
my ($self, $width, $data) = @_; |
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
# handle some oddball cases |
1175
|
78
|
50
|
|
|
|
238
|
if ($width == 3) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1176
|
0
|
|
|
|
|
0
|
$data = "\x00$data"; |
1177
|
0
|
|
|
|
|
0
|
$width = 4; |
1178
|
|
|
|
|
|
|
} elsif ($width == 5) { |
1179
|
0
|
|
|
|
|
0
|
$data = "\x00\x00\x00$data"; |
1180
|
0
|
|
|
|
|
0
|
$width = 8; |
1181
|
|
|
|
|
|
|
} elsif ($width == 6) { |
1182
|
0
|
|
|
|
|
0
|
$data = "\x00\x00$data"; |
1183
|
0
|
|
|
|
|
0
|
$width = 8; |
1184
|
|
|
|
|
|
|
} elsif ($width == 7) { |
1185
|
0
|
|
|
|
|
0
|
$data = "\x00$data"; |
1186
|
0
|
|
|
|
|
0
|
$width = 8; |
1187
|
|
|
|
|
|
|
} |
1188
|
|
|
|
|
|
|
# in all cases, "Network" (Big-Endian) byte order assumed |
1189
|
78
|
100
|
|
|
|
158
|
return unpack('C', $data) if $width == 1; |
1190
|
52
|
50
|
|
|
|
117
|
return unpack('n', $data) if $width == 2; |
1191
|
0
|
0
|
|
|
|
0
|
return unpack('N', $data) if $width == 4; |
1192
|
0
|
0
|
|
|
|
0
|
if ($width == 8) { |
1193
|
|
|
|
|
|
|
# Some ways other packages handle this, without Perl-64, according |
1194
|
|
|
|
|
|
|
# to Vadim Repin. Possibly they end up converting the value to |
1195
|
|
|
|
|
|
|
# "double" behind the scenes if on a 32-bit platform? |
1196
|
|
|
|
|
|
|
# PDF::Tiny return hex unpack('H16', $data); |
1197
|
|
|
|
|
|
|
# CAM::PDF my @b = unpack('C*', $data); |
1198
|
|
|
|
|
|
|
# my $i=0; ($i <<= 8) += shift @b while @b; return $i; |
1199
|
|
|
|
|
|
|
|
1200
|
0
|
0
|
|
|
|
0
|
if (substr($data, 0, 4) eq "\x00\x00\x00\x00") { |
1201
|
|
|
|
|
|
|
# can treat as 32 bit unsigned int |
1202
|
0
|
|
|
|
|
0
|
return unpack('N', substr($data, 4, 4)); |
1203
|
|
|
|
|
|
|
} else { |
1204
|
|
|
|
|
|
|
# requires 64-bit platform (chip and Perl), else fatal error |
1205
|
|
|
|
|
|
|
# it may blow up and produce a smoking crater if 32-bit Perl! |
1206
|
|
|
|
|
|
|
# also note that Q needs Big-Endian flag (>) specified, else |
1207
|
|
|
|
|
|
|
# it will use the native chip order (Big- or Little- Endian) |
1208
|
0
|
|
|
|
|
0
|
return unpack('Q>', $data); |
1209
|
|
|
|
|
|
|
} |
1210
|
|
|
|
|
|
|
} |
1211
|
|
|
|
|
|
|
|
1212
|
0
|
|
|
|
|
0
|
die "Unsupported field width: $width. 1-8 supported."; |
1213
|
|
|
|
|
|
|
} |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
sub readxrtr { |
1216
|
18
|
|
|
18
|
1
|
79
|
my ($self, $xpos, %options) = @_; |
1217
|
|
|
|
|
|
|
# $xpos SHOULD be pointing to "xref" keyword |
1218
|
18
|
|
|
|
|
46
|
my ($tdict, $buf, $xmin, $xnum, $xdiff); |
1219
|
|
|
|
|
|
|
|
1220
|
18
|
|
|
|
|
51
|
my $fh = $self->{' INFILE'}; |
1221
|
18
|
|
|
|
|
72
|
$fh->seek($xpos, 0); |
1222
|
18
|
|
|
|
|
136
|
$fh->read($buf, 22); # 22 should overlap into first subsection |
1223
|
18
|
|
|
|
|
130
|
$buf = update($fh, $buf); # fix for broken JAWS xref calculation. |
1224
|
|
|
|
|
|
|
|
1225
|
18
|
|
|
|
|
79
|
my $xlist = {}; |
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
## it seems that some products calculate wrong prev entries (short) |
1228
|
|
|
|
|
|
|
## so we seek ahead to find one -- fredo; save for now |
1229
|
|
|
|
|
|
|
#while ($buf !~ m/^xref$cr/i && !eof($fh)) { |
1230
|
|
|
|
|
|
|
# $buf =~ s/^(\s+|\S+|.)//i; |
1231
|
|
|
|
|
|
|
# $buf = update($fh, $buf); |
1232
|
|
|
|
|
|
|
#} |
1233
|
|
|
|
|
|
|
|
1234
|
18
|
100
|
|
|
|
272
|
if ($buf =~ s/^xref$cr//i) { # remove xrefEOL from buffer |
|
|
50
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
# Plain XRef tables. |
1236
|
|
|
|
|
|
|
# |
1237
|
|
|
|
|
|
|
# look to match startobj# count# EOL of first (or only) subsection |
1238
|
|
|
|
|
|
|
# supposed to be single ASCII space between numbers, but this is |
1239
|
|
|
|
|
|
|
# more lenient for some writers, allowing 1 or more whitespace |
1240
|
15
|
|
|
|
|
30
|
my $subsection_count = 0; |
1241
|
15
|
|
|
|
|
26
|
my $entry_format_error = 0; |
1242
|
15
|
|
|
|
|
32
|
my $xrefListEmpty = 0; |
1243
|
|
|
|
|
|
|
|
1244
|
15
|
|
|
|
|
420
|
while ($buf =~ m/^$ws_char*([0-9]+)$ws_char+([0-9]+)$ws_char*$cr(.*?)$/s) { |
1245
|
20
|
|
|
|
|
44
|
my $old_buf = $buf; |
1246
|
20
|
|
|
|
|
48
|
$xmin = $1; # starting object number of this subsection |
1247
|
20
|
|
|
|
|
37
|
$xnum = $2; # number of entries in this subsection |
1248
|
20
|
|
|
|
|
34
|
$buf = $3; # remainder of buffer |
1249
|
20
|
|
|
|
|
31
|
$subsection_count++; |
1250
|
|
|
|
|
|
|
# go back and warn if other than single space separating numbers |
1251
|
20
|
50
|
|
|
|
308
|
unless ($old_buf =~ /^[0-9]+ [0-9]+$cr/) { #orig 'warn' |
1252
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1253
|
|
|
|
|
|
|
# See PDF 1.7 section 7.5.4: Cross-Reference Table |
1254
|
0
|
|
|
|
|
0
|
warn "Malformed xref: subsection header needs a single\n" . |
1255
|
|
|
|
|
|
|
"ASCII space between the numbers and no extra spaces.\n"; |
1256
|
|
|
|
|
|
|
} |
1257
|
|
|
|
|
|
|
} |
1258
|
20
|
|
|
|
|
45
|
$xdiff = length($buf); # how much remaining in buffer |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
# in case xnum == 0 is permitted (or used and tolerated by readers), |
1261
|
|
|
|
|
|
|
# skip over entry reads and go to next subsection |
1262
|
20
|
50
|
|
|
|
68
|
if ($xnum < 1) { |
1263
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1264
|
0
|
|
|
|
|
0
|
warn "Xref subsection has 0 entries. Skipped.\n"; |
1265
|
|
|
|
|
|
|
} |
1266
|
0
|
|
|
|
|
0
|
$xrefListEmpty = 1; |
1267
|
0
|
|
|
|
|
0
|
next; |
1268
|
|
|
|
|
|
|
} |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
# read chunk of entire subsection list |
1271
|
20
|
|
|
|
|
32
|
my $entry_size = 20; |
1272
|
|
|
|
|
|
|
# test read first entry, see if $cr in expected place, adjust size |
1273
|
20
|
|
|
|
|
84
|
$fh->read($buf, $entry_size * 1 - $xdiff + 15, $xdiff); |
1274
|
20
|
50
|
|
|
|
435
|
if ($buf =~ m/^(.*?)$cr/) { |
1275
|
20
|
|
|
|
|
87
|
$entry_size = length($1) + 2; |
1276
|
|
|
|
|
|
|
} |
1277
|
20
|
50
|
33
|
|
|
77
|
if ($entry_size != 20 && $options{'-diags'} == 1) { |
1278
|
0
|
|
|
|
|
0
|
warn "Xref entries supposed to be 20 bytes long, are $entry_size.\n"; |
1279
|
|
|
|
|
|
|
} |
1280
|
20
|
|
|
|
|
36
|
$xdiff = length($buf); |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
# read remaining entries |
1283
|
20
|
|
|
|
|
142
|
$fh->read($buf, $entry_size * $xnum - $xdiff + 15, $xdiff); |
1284
|
|
|
|
|
|
|
# each entry is two integers and flag. spec says single ASCII space |
1285
|
|
|
|
|
|
|
# between each field and certain length for each (10, 5, 1), so |
1286
|
|
|
|
|
|
|
# this appears to be more lenient than spec |
1287
|
|
|
|
|
|
|
# is object 0 supposed to be in subsection 1, or is any place OK? |
1288
|
20
|
|
66
|
|
|
656
|
while ($xnum-- > 0 and |
1289
|
|
|
|
|
|
|
$buf =~ m/^$ws_char*(\d+)$ws_char+(\d+)$ws_char+([nf])$ws_char*$cr/) { |
1290
|
|
|
|
|
|
|
# check if format doesn't match spec |
1291
|
104
|
50
|
33
|
|
|
663
|
if ($buf =~ m/^\d{10} \d{5} [nf]$cr/ || |
1292
|
|
|
|
|
|
|
$entry_format_error) { |
1293
|
|
|
|
|
|
|
# format OK or have already reported format problem |
1294
|
|
|
|
|
|
|
} else { |
1295
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1296
|
0
|
|
|
|
|
0
|
warn "Xref entry readable, but doesn't meet PDF spec.\n"; |
1297
|
|
|
|
|
|
|
} |
1298
|
0
|
|
|
|
|
0
|
$entry_format_error++; |
1299
|
|
|
|
|
|
|
} |
1300
|
|
|
|
|
|
|
|
1301
|
104
|
|
|
|
|
779
|
$buf =~ s/^$ws_char*(\d+)$ws_char+(\d+)$ws_char+([nf])$ws_char*$cr//; |
1302
|
|
|
|
|
|
|
# $1 = object's starting offset in file (n) or |
1303
|
|
|
|
|
|
|
# next object in free list (f) [0 if last] |
1304
|
|
|
|
|
|
|
# $2 = generation number (n) or 65535 for object 0 (f) or |
1305
|
|
|
|
|
|
|
# next generation number (f) |
1306
|
|
|
|
|
|
|
# $3 = flag (n = object in use, f = free) |
1307
|
|
|
|
|
|
|
# buf reduced by entry just processed |
1308
|
104
|
50
|
|
|
|
234
|
if (exists $xlist->{$xmin}) { |
1309
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1310
|
0
|
|
|
|
|
0
|
warn "Duplicate object number $xmin in xref table ignored.\n"; |
1311
|
|
|
|
|
|
|
} |
1312
|
|
|
|
|
|
|
} else { |
1313
|
104
|
|
|
|
|
379
|
$xlist->{$xmin} = [$1, $2, $3]; |
1314
|
104
|
50
|
66
|
|
|
288
|
if ($xmin == 0 && $subsection_count > 1 && $options{'-diags'} == 1) { |
|
|
|
33
|
|
|
|
|
1315
|
0
|
|
|
|
|
0
|
warn "Xref object 0 entry not in first subsection.\n"; |
1316
|
|
|
|
|
|
|
} |
1317
|
|
|
|
|
|
|
} |
1318
|
104
|
|
|
|
|
656
|
$xmin++; |
1319
|
|
|
|
|
|
|
} # traverse one subsection for objects xmin through xmin+xnum-1 |
1320
|
|
|
|
|
|
|
# go back for next subsection (if any) |
1321
|
|
|
|
|
|
|
} # loop through xref subsections |
1322
|
|
|
|
|
|
|
# fall through to here when run out of xref subsections |
1323
|
|
|
|
|
|
|
# xlist should have two or more object entries, may not be contiguous |
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
# should have an object 0 |
1326
|
|
|
|
|
|
|
# at this point, no idea if object 0 was in first subsection (legal?) |
1327
|
|
|
|
|
|
|
# could attempt a fixup if no object 0 found. many fixups are quite |
1328
|
|
|
|
|
|
|
# risky and could end up corrupting the free list. |
1329
|
|
|
|
|
|
|
# there's no guarantee that a proper free list will result, but any |
1330
|
|
|
|
|
|
|
# error should hopefully be caught further on |
1331
|
15
|
0
|
33
|
|
|
51
|
if (!exists $xlist->{'0'} && !$xrefListEmpty) { |
1332
|
|
|
|
|
|
|
# for now, 1 subsection starting with 1, and only object 1 in |
1333
|
|
|
|
|
|
|
# free list, try to fix up |
1334
|
0
|
0
|
0
|
|
|
0
|
if ($subsection_count == 1 && exists $xlist->{'1'}) { |
1335
|
|
|
|
|
|
|
# apparently a common enough error in PDF writers |
1336
|
|
|
|
|
|
|
|
1337
|
0
|
0
|
0
|
|
|
0
|
if ($xlist->{'1'}[0] == 0 && # only member of free list |
|
|
|
0
|
|
|
|
|
1338
|
|
|
|
|
|
|
$xlist->{'1'}[1] == 65535 && |
1339
|
|
|
|
|
|
|
$xlist->{'1'}[2] eq 'f') { |
1340
|
|
|
|
|
|
|
# object 1 appears to be the free list head, so shift |
1341
|
|
|
|
|
|
|
# down all objects |
1342
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1343
|
0
|
|
|
|
|
0
|
warn "xref appears to be mislabeled starting with 1. Shift down all elements.\n"; |
1344
|
|
|
|
|
|
|
} |
1345
|
0
|
|
|
|
|
0
|
my $next = 1; |
1346
|
0
|
|
|
|
|
0
|
while (exists $xlist->{$next}) { |
1347
|
0
|
|
|
|
|
0
|
$xlist->{$next - 1} = $xlist->{$next}; |
1348
|
0
|
|
|
|
|
0
|
$next++; |
1349
|
|
|
|
|
|
|
} |
1350
|
0
|
|
|
|
|
0
|
delete $xlist->{--$next}; |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
} else { |
1353
|
|
|
|
|
|
|
# if object 1 does not appear to be a free list head, |
1354
|
|
|
|
|
|
|
# insert a new object 0 |
1355
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1356
|
0
|
|
|
|
|
0
|
warn "Xref appears to be missing object 0. Insert a new one.\n"; |
1357
|
|
|
|
|
|
|
} |
1358
|
0
|
|
|
|
|
0
|
$xlist->{'0'} = [0, 65535, 'f']; |
1359
|
|
|
|
|
|
|
} |
1360
|
|
|
|
|
|
|
} else { |
1361
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1362
|
0
|
|
|
|
|
0
|
warn "Malformed cross reference list in PDF file $self->{' fname'} -- no object 0 (free list head)\n"; |
1363
|
|
|
|
|
|
|
} |
1364
|
0
|
|
|
|
|
0
|
$xlist->{'0'} = [0, 65535, 'f']; |
1365
|
|
|
|
|
|
|
} |
1366
|
|
|
|
|
|
|
} # no object 0 entry |
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
# build/validate the free list (and no active objects have f flag) |
1369
|
15
|
|
|
|
|
50
|
my @free_list; |
1370
|
15
|
|
|
|
|
28
|
foreach (sort {$a <=> $b} keys %{ $xlist }) { |
|
183
|
|
|
|
|
264
|
|
|
15
|
|
|
|
|
121
|
|
1371
|
|
|
|
|
|
|
# if 'f' flag, is in free list |
1372
|
104
|
100
|
|
|
|
261
|
if ($xlist->{$_}[2] eq 'f') { |
|
|
50
|
|
|
|
|
|
1373
|
15
|
50
|
33
|
|
|
93
|
if ($xlist->{$_}[1] <= 0 && $options{'-diags'} == 1) { |
1374
|
0
|
|
|
|
|
0
|
warn "Xref free list entry $_ with bad next generation number.\n"; |
1375
|
|
|
|
|
|
|
} else { |
1376
|
15
|
|
|
|
|
45
|
push @free_list, $_; # should be in numeric order (0 first) |
1377
|
|
|
|
|
|
|
} |
1378
|
|
|
|
|
|
|
} elsif ($xlist->{$_}[2] eq 'n') { |
1379
|
89
|
50
|
33
|
|
|
187
|
if ($xlist->{$_}[0] <= 0 && $options{'-diags'} == 1) { |
1380
|
0
|
|
|
|
|
0
|
warn "Xref active object $_ entry with bad length ".($xlist->{$_}[1])."\n"; |
1381
|
|
|
|
|
|
|
} |
1382
|
89
|
50
|
33
|
|
|
253
|
if ($xlist->{$_}[1] < 0 && $options{'-diags'} == 1) { |
1383
|
0
|
|
|
|
|
0
|
warn "Xref active object $_ entry with bad generation number ".($xlist->{$_}[1])."\n"; |
1384
|
|
|
|
|
|
|
} |
1385
|
|
|
|
|
|
|
} else { |
1386
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1387
|
0
|
|
|
|
|
0
|
warn "Xref entry has flag that is not 'f' or 'n'.\n"; |
1388
|
|
|
|
|
|
|
} |
1389
|
|
|
|
|
|
|
} |
1390
|
|
|
|
|
|
|
} # go through xlist and build free_list and check entries |
1391
|
|
|
|
|
|
|
# traverse free list and check that "next object" is also in free list |
1392
|
15
|
|
|
|
|
40
|
my $next_free = 0; # object 0 should always be in free list |
1393
|
15
|
50
|
33
|
|
|
88
|
if ($xlist->{'0'}[1] != 65535 && $options{'-diags'} == 1) { |
1394
|
0
|
|
|
|
|
0
|
warn "Object 0 next generation is not 65535.\n"; |
1395
|
|
|
|
|
|
|
} |
1396
|
|
|
|
|
|
|
do { |
1397
|
15
|
50
|
|
|
|
58
|
if ($xlist->{$next_free}[2] ne 'f') { |
1398
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1399
|
0
|
|
|
|
|
0
|
warn "Corrupted free object list: next=$next_free is not a free object.\n"; |
1400
|
|
|
|
|
|
|
} |
1401
|
0
|
|
|
|
|
0
|
$next_free = 0; # force end of free list |
1402
|
|
|
|
|
|
|
} else { |
1403
|
15
|
|
|
|
|
38
|
$next_free = $xlist->{$next_free}[0]; |
1404
|
|
|
|
|
|
|
} |
1405
|
|
|
|
|
|
|
# remove this entry from free list array |
1406
|
15
|
|
|
|
|
114
|
splice(@free_list, index(@free_list, $next_free), 1); |
1407
|
15
|
|
33
|
|
|
53
|
} while ($next_free && exists $xlist->{$next_free}); |
1408
|
15
|
50
|
33
|
|
|
65
|
if (scalar @free_list && $options{'-diags'} == 1) { |
1409
|
0
|
|
|
|
|
0
|
warn "Corrupted xref list: object(s) @free_list marked as free, but are not in free chain.\n"; |
1410
|
|
|
|
|
|
|
} |
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
# done with cross reference table, so go on to trailer |
1413
|
15
|
50
|
33
|
|
|
101
|
if ($buf !~ /^\s*trailer\b/i && $options{'-diags'} == 1) { #orig 'die' |
1414
|
0
|
|
|
|
|
0
|
warn "Malformed trailer in PDF file $self->{' fname'} at " . ($fh->tell() - length($buf)); |
1415
|
|
|
|
|
|
|
} |
1416
|
|
|
|
|
|
|
|
1417
|
15
|
|
|
|
|
59
|
$buf =~ s/^\s*trailer\b//i; |
1418
|
|
|
|
|
|
|
|
1419
|
15
|
|
|
|
|
94
|
($tdict, $buf) = $self->readval($buf); |
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
} elsif ($buf =~ m/^(\d+)\s+(\d+)\s+obj/i) { |
1422
|
3
|
|
|
|
|
15
|
my ($xref_obj, $xref_gen) = ($1, $2); |
1423
|
|
|
|
|
|
|
|
1424
|
3
|
|
|
|
|
23
|
PDF::Builder->verCheckOutput(1.5, "importing cross-reference stream"); |
1425
|
|
|
|
|
|
|
# XRef streams |
1426
|
3
|
|
|
|
|
16
|
($tdict, $buf) = $self->readval($buf); |
1427
|
|
|
|
|
|
|
|
1428
|
3
|
50
|
|
|
|
14
|
unless ($tdict->{' stream'}) { |
1429
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1430
|
0
|
|
|
|
|
0
|
warn "Malformed XRefStm at $xref_obj $xref_gen obj in PDF file $self->{' fname'}"; |
1431
|
|
|
|
|
|
|
} |
1432
|
|
|
|
|
|
|
} |
1433
|
3
|
|
|
|
|
17
|
$tdict->read_stream(1); |
1434
|
|
|
|
|
|
|
|
1435
|
3
|
|
|
|
|
8
|
my $stream = $tdict->{' stream'}; |
1436
|
3
|
|
|
|
|
7
|
my @widths = map { $_->val() } @{$tdict->{'W'}->val()}; |
|
9
|
|
|
|
|
24
|
|
|
3
|
|
|
|
|
14
|
|
1437
|
|
|
|
|
|
|
|
1438
|
3
|
|
|
|
|
7
|
my $start = 0; |
1439
|
3
|
|
|
|
|
19
|
my $last; |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
my @index; |
1442
|
3
|
100
|
|
|
|
11
|
if (defined $tdict->{'Index'}) { |
1443
|
1
|
|
|
|
|
2
|
@index = map { $_->val() } @{$tdict->{'Index'}->val()}; |
|
2
|
|
|
|
|
21
|
|
|
1
|
|
|
|
|
5
|
|
1444
|
|
|
|
|
|
|
} else { |
1445
|
2
|
|
|
|
|
8
|
@index = (0, $tdict->{'Size'}->val()); |
1446
|
|
|
|
|
|
|
} |
1447
|
|
|
|
|
|
|
|
1448
|
3
|
|
|
|
|
9
|
while (scalar @index) { |
1449
|
3
|
|
|
|
|
7
|
$start = shift(@index); |
1450
|
3
|
|
|
|
|
19
|
$last = $start + shift(@index) - 1; |
1451
|
|
|
|
|
|
|
|
1452
|
3
|
|
|
|
|
13
|
for my $i ($start...$last) { |
1453
|
|
|
|
|
|
|
# Replaced "for $xmin" because it creates a loop-specific local |
1454
|
|
|
|
|
|
|
# variable, and we need $xmin to be correct for maxobj below. |
1455
|
26
|
|
|
|
|
44
|
$xmin = $i; |
1456
|
|
|
|
|
|
|
|
1457
|
26
|
|
|
|
|
33
|
my @cols; |
1458
|
|
|
|
|
|
|
|
1459
|
26
|
|
|
|
|
39
|
for my $w (@widths) { |
1460
|
78
|
|
|
|
|
106
|
my $data; |
1461
|
78
|
50
|
|
|
|
214
|
$data = $self->_unpack_xref_stream($w, substr($stream, 0, $w, '')) if $w; |
1462
|
|
|
|
|
|
|
|
1463
|
78
|
|
|
|
|
154
|
push @cols, $data; |
1464
|
|
|
|
|
|
|
} |
1465
|
|
|
|
|
|
|
|
1466
|
26
|
100
|
|
|
|
50
|
$cols[0] = 1 unless defined $cols[0]; |
1467
|
26
|
50
|
33
|
|
|
50
|
if ($cols[0] > 2 && $options{'-diags'} == 1) { |
1468
|
0
|
|
|
|
|
0
|
warn "Invalid XRefStm entry type ($cols[0]) at $xref_obj $xref_gen obj"; |
1469
|
|
|
|
|
|
|
} |
1470
|
|
|
|
|
|
|
|
1471
|
26
|
50
|
|
|
|
63
|
next if exists $xlist->{$xmin}; |
1472
|
|
|
|
|
|
|
|
1473
|
26
|
50
|
|
|
|
61
|
my @objind = ($cols[1], defined($cols[2]) ? $cols[2] : ($xmin ? 0 : 65535)); |
|
|
100
|
|
|
|
|
|
1474
|
26
|
100
|
|
|
|
70
|
push @objind, ($cols[0] == 0? 'f': 'n') if $cols[0] < 2; |
|
|
100
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
|
1476
|
26
|
|
|
|
|
107
|
$xlist->{$xmin} = \@objind; |
1477
|
|
|
|
|
|
|
} |
1478
|
|
|
|
|
|
|
} |
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
} else { #orig 'die' |
1481
|
0
|
0
|
|
|
|
0
|
if ($options{'-diags'} == 1) { |
1482
|
0
|
|
|
|
|
0
|
warn "Malformed xref in PDF file $self->{' fname'}"; |
1483
|
|
|
|
|
|
|
} |
1484
|
|
|
|
|
|
|
} |
1485
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
# did we get to here without managing to set $xmin? |
1487
|
18
|
|
50
|
|
|
68
|
$xmin ||= 0; |
1488
|
|
|
|
|
|
|
|
1489
|
18
|
|
|
|
|
65
|
$tdict->{' loc'} = $xpos; |
1490
|
18
|
|
|
|
|
50
|
$tdict->{' xref'} = $xlist; |
1491
|
18
|
100
|
|
|
|
88
|
$self->{' maxobj'} = $xmin + 1 if $xmin + 1 > $self->{' maxobj'}; |
1492
|
|
|
|
|
|
|
$tdict->{' prev'} = $self->readxrtr($tdict->{'Prev'}->val(), %options) |
1493
|
18
|
100
|
66
|
|
|
83
|
if (defined $tdict->{'Prev'} and $tdict->{'Prev'}->val() != 0); |
1494
|
18
|
100
|
|
|
|
72
|
delete $tdict->{' prev'} unless defined $tdict->{' prev'}; |
1495
|
|
|
|
|
|
|
|
1496
|
18
|
|
|
|
|
63
|
return $tdict; |
1497
|
|
|
|
|
|
|
} # end of readxrtr() |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=head2 $p->out_trailer($tdict, $update) |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
=head2 $p->out_trailer($tdict) |
1502
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
Outputs the body and trailer for a PDF file by outputting all the objects in |
1504
|
|
|
|
|
|
|
the ' outlist' and then outputting a xref table for those objects and any |
1505
|
|
|
|
|
|
|
freed ones. It then outputs the trailing dictionary and the trailer code. |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
=cut |
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
sub out_trailer { |
1510
|
127
|
|
|
127
|
1
|
506
|
my ($self, $tdict, $update) = @_; |
1511
|
|
|
|
|
|
|
|
1512
|
127
|
|
|
|
|
321
|
my $fh = $self->{' OUTFILE'}; |
1513
|
|
|
|
|
|
|
|
1514
|
127
|
|
|
|
|
221
|
while (@{$self->{' outlist'}}) { |
|
257
|
|
|
|
|
804
|
|
1515
|
130
|
|
|
|
|
504
|
$self->ship_out(); |
1516
|
|
|
|
|
|
|
} |
1517
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
# $size = @{$self->{' printed'}} + @{$self->{' free'}}; |
1519
|
|
|
|
|
|
|
# $tdict->{'Size'} = PDFNum($tdict->{'Size'}->val() + $size); |
1520
|
|
|
|
|
|
|
# PDFSpec 1.3 says for /Size: (Required) Total number of entries in the file's |
1521
|
|
|
|
|
|
|
# cross-reference table, including the original table and all updates. Which |
1522
|
|
|
|
|
|
|
# is what the previous two lines implement. |
1523
|
|
|
|
|
|
|
# But this seems to make Acrobat croak on saving so we try the following from |
1524
|
|
|
|
|
|
|
# basil.duval@epfl.ch |
1525
|
127
|
|
|
|
|
632
|
$tdict->{'Size'} = PDFNum($self->{' maxobj'}); |
1526
|
|
|
|
|
|
|
|
1527
|
127
|
|
|
|
|
440
|
my $tloc = $fh->tell(); |
1528
|
|
|
|
|
|
|
## $fh->print("xref\n"); |
1529
|
|
|
|
|
|
|
# instead of directly outputting (fh->print) xreflist, we accumulate in @out |
1530
|
127
|
|
|
|
|
669
|
my @out; |
1531
|
127
|
100
|
|
|
|
255
|
my @xreflist = sort { $self->{' objects'}{$a->uid()}[0] <=> $self->{' objects'}{$b->uid()}[0] } (@{$self->{' printed'} || []}, @{$self->{' free'} || []}); |
|
1116
|
100
|
|
|
|
2466
|
|
|
127
|
|
|
|
|
459
|
|
|
127
|
|
|
|
|
794
|
|
1532
|
|
|
|
|
|
|
|
1533
|
127
|
|
|
|
|
339
|
my ($i, $j, $k); |
1534
|
127
|
100
|
|
|
|
329
|
unless ($update) { |
1535
|
119
|
|
|
|
|
205
|
$i = 1; |
1536
|
119
|
|
|
|
|
374
|
for ($j = 0; $j < @xreflist; $j++) { |
1537
|
730
|
|
|
|
|
976
|
my @inserts; |
1538
|
730
|
|
|
|
|
1042
|
$k = $xreflist[$j]; |
1539
|
730
|
|
|
|
|
1562
|
while ($i < $self->{' objects'}{$k->uid()}[0]) { |
1540
|
0
|
|
|
|
|
0
|
my ($n) = PDF::Builder::Basic::PDF::Objind->new(); |
1541
|
0
|
|
|
|
|
0
|
$self->add_obj($n, $i, 0); |
1542
|
0
|
|
|
|
|
0
|
$self->free_obj($n); |
1543
|
0
|
|
|
|
|
0
|
push(@inserts, $n); |
1544
|
0
|
|
|
|
|
0
|
$i++; |
1545
|
|
|
|
|
|
|
} |
1546
|
730
|
|
|
|
|
1191
|
splice(@xreflist, $j, 0, @inserts); |
1547
|
730
|
|
|
|
|
1022
|
$j += @inserts; |
1548
|
730
|
|
|
|
|
1530
|
$i++; |
1549
|
|
|
|
|
|
|
} |
1550
|
|
|
|
|
|
|
} |
1551
|
|
|
|
|
|
|
|
1552
|
127
|
100
|
|
|
|
241
|
my @freelist = sort { $self->{' objects'}{$a->uid()}[0] <=> $self->{' objects'}{$b->uid()}[0] } @{$self->{' free'} || []}; |
|
0
|
|
|
|
|
0
|
|
|
127
|
|
|
|
|
475
|
|
1553
|
|
|
|
|
|
|
|
1554
|
127
|
|
|
|
|
247
|
$j = 0; my $first = -1; $k = 0; |
|
127
|
|
|
|
|
205
|
|
|
127
|
|
|
|
|
255
|
|
1555
|
127
|
|
|
|
|
440
|
for ($i = 0; $i <= $#xreflist + 1; $i++) { |
1556
|
|
|
|
|
|
|
# if ($i == 0) { |
1557
|
|
|
|
|
|
|
# $first = $i; $j = $xreflist[0]->{' objnum'}; |
1558
|
|
|
|
|
|
|
# $fh->printf("0 1\n%010d 65535 f \n", $ff); |
1559
|
|
|
|
|
|
|
# } |
1560
|
879
|
100
|
100
|
|
|
2556
|
if ($i > $#xreflist || $self->{' objects'}{$xreflist[$i]->uid()}[0] != $j + 1) { |
1561
|
|
|
|
|
|
|
## $fh->print(($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid()}[0] ") . ($i - $first) . "\n"); |
1562
|
139
|
100
|
|
|
|
670
|
push @out, ($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid()}[0] ") . ($i - $first) . "\n"; |
1563
|
139
|
100
|
|
|
|
462
|
if ($first == -1) { |
1564
|
|
|
|
|
|
|
## $fh->printf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid()}[0] : 0); |
1565
|
127
|
50
|
|
|
|
798
|
push @out, sprintf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid()}[0] : 0); |
1566
|
127
|
|
|
|
|
259
|
$first = 0; |
1567
|
|
|
|
|
|
|
} |
1568
|
139
|
|
|
|
|
419
|
for ($j = $first; $j < $i; $j++) { |
1569
|
752
|
|
|
|
|
1244
|
my $xref = $xreflist[$j]; |
1570
|
752
|
50
|
33
|
|
|
1842
|
if (defined $freelist[$k] && defined $xref && "$freelist[$k]" eq "$xref") { |
|
|
|
33
|
|
|
|
|
1571
|
0
|
|
|
|
|
0
|
$k++; |
1572
|
|
|
|
|
|
|
## $fh->print(pack("A10AA5A4", |
1573
|
|
|
|
|
|
|
push(@out, pack("A10AA5A4", |
1574
|
|
|
|
|
|
|
sprintf("%010d", (defined $freelist[$k] ? |
1575
|
|
|
|
|
|
|
$self->{' objects'}{$freelist[$k]->uid()}[0] : 0)), " ", |
1576
|
0
|
0
|
|
|
|
0
|
sprintf("%05d", $self->{' objects'}{$xref->uid()}[1] + 1), |
1577
|
|
|
|
|
|
|
" f \n")); |
1578
|
|
|
|
|
|
|
} else { |
1579
|
|
|
|
|
|
|
## $fh->print(pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid()}), " ", |
1580
|
|
|
|
|
|
|
push(@out, pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid()}), " ", |
1581
|
752
|
|
|
|
|
1839
|
sprintf("%05d", $self->{' objects'}{$xref->uid()}[1]), |
1582
|
|
|
|
|
|
|
" n \n")); |
1583
|
|
|
|
|
|
|
} |
1584
|
|
|
|
|
|
|
} |
1585
|
139
|
|
|
|
|
276
|
$first = $i; |
1586
|
139
|
100
|
|
|
|
557
|
$j = $self->{' objects'}{$xreflist[$i]->uid()}[0] if ($i < scalar @xreflist); |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
} else { |
1589
|
740
|
|
|
|
|
1642
|
$j++; |
1590
|
|
|
|
|
|
|
} |
1591
|
|
|
|
|
|
|
} # end for loop through xreflists |
1592
|
|
|
|
|
|
|
## $fh->print("trailer\n"); |
1593
|
|
|
|
|
|
|
## $tdict->outobjdeep($fh, $self); |
1594
|
|
|
|
|
|
|
## $fh->print("\nstartxref\n$tloc\n%%EOF\n"); |
1595
|
|
|
|
|
|
|
## start new code for 117184 fix by Vadim. @out has array of xref content |
1596
|
127
|
50
|
33
|
|
|
471
|
if (exists $tdict->{'Type'} and $tdict->{'Type'}->val() eq 'XRef') { |
1597
|
|
|
|
|
|
|
|
1598
|
0
|
|
|
|
|
0
|
my (@index, @stream); |
1599
|
0
|
|
|
|
|
0
|
for (@out) { # @out is the accumulated cross reference list |
1600
|
0
|
|
|
|
|
0
|
my @a = split; |
1601
|
0
|
0
|
|
|
|
0
|
@a == 2 ? push @index, @a : push @stream, \@a; |
1602
|
|
|
|
|
|
|
} |
1603
|
0
|
|
|
|
|
0
|
my $i = $self->{' maxobj'}++; |
1604
|
0
|
|
|
|
|
0
|
$self->add_obj($tdict, $i, 0); |
1605
|
0
|
|
|
|
|
0
|
$self->out_obj($tdict); |
1606
|
|
|
|
|
|
|
|
1607
|
0
|
|
|
|
|
0
|
push @index, $i, 1; |
1608
|
0
|
|
|
|
|
0
|
push @stream, [ $tloc, 0, 'n' ]; |
1609
|
|
|
|
|
|
|
|
1610
|
0
|
0
|
|
|
|
0
|
my $len = $tloc > 0xFFFF ? 4 : 2; # don't expect files > 4 Gb |
1611
|
0
|
0
|
|
|
|
0
|
my $tpl = $tloc > 0xFFFF ? 'CNC' : 'CnC'; # don't expect gennum > 255, it's absurd. |
1612
|
|
|
|
|
|
|
# Adobe doesn't use them anymore anyway |
1613
|
0
|
|
|
|
|
0
|
my $sstream = ''; |
1614
|
0
|
|
|
|
|
0
|
my @prev = ( 0 ) x ( $len + 2 ); # init prev to all 0's |
1615
|
0
|
|
|
|
|
0
|
for (@stream) { |
1616
|
|
|
|
|
|
|
# OK to zero out gennum of 65535 for a cross reference stream, |
1617
|
|
|
|
|
|
|
# rather than just truncating to 255 -- Vadim |
1618
|
0
|
0
|
0
|
|
|
0
|
$_->[ 1 ] = 0 if $_->[ 1 ] == 65535 and |
1619
|
|
|
|
|
|
|
$_->[ 2 ] eq 'f'; |
1620
|
|
|
|
|
|
|
# make sure is 0..255, since will pack with 'C' code -- Phil |
1621
|
0
|
0
|
|
|
|
0
|
if ($_->[1] > 0xFF) { |
1622
|
0
|
|
|
|
|
0
|
print "generation number ".($_->[1])." in entry '$_->[0] $_->[1] $_->[2]' exceeds 256, reduced to ".($_->[1] & 0x00FF)."\n"; |
1623
|
|
|
|
|
|
|
} |
1624
|
0
|
|
|
|
|
0
|
$_->[ 1 ] &= 0x00FF; |
1625
|
0
|
0
|
|
|
|
0
|
my @line = unpack 'C*', pack $tpl, $_->[ 2 ] eq 'n'? 1 : 0, @{ $_ }[ 0 .. 1 ]; |
|
0
|
|
|
|
|
0
|
|
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
$sstream .= pack 'C*', 2, # prepend filtering method, "PNG Up" |
1628
|
0
|
|
|
|
|
0
|
map {($line[ $_ ] - $prev[ $_ ] + 256) % 256} 0 .. $#line; |
|
0
|
|
|
|
|
0
|
|
1629
|
0
|
|
|
|
|
0
|
@prev = @line; |
1630
|
|
|
|
|
|
|
} |
1631
|
|
|
|
|
|
|
# build a dictionary for the cross reference stream |
1632
|
0
|
|
|
|
|
0
|
$tdict->{'Size'} = PDFNum($i + 1); |
1633
|
0
|
|
|
|
|
0
|
$tdict->{'Index'} = PDFArray(map { PDFNum($_) } @index); |
|
0
|
|
|
|
|
0
|
|
1634
|
0
|
|
|
|
|
0
|
$tdict->{'W'} = PDFArray(map { PDFNum($_) } 1, $len, 1); |
|
0
|
|
|
|
|
0
|
|
1635
|
0
|
|
|
|
|
0
|
$tdict->{'Filter'} = PDFName('FlateDecode'); |
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
# it's compressed |
1638
|
0
|
|
|
|
|
0
|
$tdict->{'DecodeParms'} = PDFDict(); |
1639
|
0
|
|
|
|
|
0
|
$tdict->{'DecodeParms'}->val()->{'Predictor'} = PDFNum(12); |
1640
|
0
|
|
|
|
|
0
|
$tdict->{'DecodeParms'}->val()->{'Columns'} = PDFNum($len + 2); |
1641
|
|
|
|
|
|
|
|
1642
|
0
|
|
|
|
|
0
|
$sstream = PDF::Builder::Basic::PDF::Filter::FlateDecode->new()->outfilt($sstream, 1); |
1643
|
0
|
|
|
|
|
0
|
$tdict->{' stream'} = $sstream; |
1644
|
0
|
|
|
|
|
0
|
$tdict->{' nofilt'} = 1; |
1645
|
0
|
|
|
|
|
0
|
delete $tdict->{'Length'}; |
1646
|
0
|
|
|
|
|
0
|
$self->ship_out(); |
1647
|
|
|
|
|
|
|
} else { |
1648
|
|
|
|
|
|
|
# delete may be moved later by Vadim closer to where XRefStm created |
1649
|
127
|
|
|
|
|
258
|
delete $tdict->{'XRefStm'}; |
1650
|
|
|
|
|
|
|
# almost the original code |
1651
|
127
|
|
|
|
|
569
|
$fh->print("xref\n", @out, "trailer\n"); |
1652
|
127
|
|
|
|
|
1416
|
$tdict->outobjdeep($fh, $self); |
1653
|
127
|
|
|
|
|
383
|
$fh->print("\n"); |
1654
|
|
|
|
|
|
|
} |
1655
|
127
|
|
|
|
|
1043
|
$fh->print("startxref\n$tloc\n%%EOF\n"); |
1656
|
|
|
|
|
|
|
## end of new code |
1657
|
|
|
|
|
|
|
|
1658
|
127
|
|
|
|
|
947
|
return; |
1659
|
|
|
|
|
|
|
} # end of out_trailer() |
1660
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
=head2 PDF::Builder::Basic::PDF::File->_new() |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
Creates a very empty PDF file object (used by new() and open()) |
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
=cut |
1666
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
sub _new { |
1668
|
177
|
|
|
177
|
|
404
|
my $class = shift(); |
1669
|
177
|
|
|
|
|
370
|
my $self = {}; |
1670
|
|
|
|
|
|
|
|
1671
|
177
|
|
|
|
|
396
|
bless $self, $class; |
1672
|
177
|
|
|
|
|
623
|
$self->{' outlist'} = []; |
1673
|
177
|
|
|
|
|
431
|
$self->{' outlist_cache'} = {}; # A cache of what's in the 'outlist' |
1674
|
177
|
|
|
|
|
398
|
$self->{' maxobj'} = 1; |
1675
|
177
|
|
|
|
|
456
|
$self->{' objcache'} = {}; |
1676
|
177
|
|
|
|
|
424
|
$self->{' objects'} = {}; |
1677
|
|
|
|
|
|
|
|
1678
|
177
|
|
|
|
|
444
|
return $self; |
1679
|
|
|
|
|
|
|
} |
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
1; |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
=head1 AUTHOR |
1684
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
Martin Hosken Martin_Hosken@sil.org |
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
Copyright Martin Hosken 1999 and onwards |
1688
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
No warranty or expression of effectiveness, least of all regarding anyone's |
1690
|
|
|
|
|
|
|
safety, is implied in this software or documentation. |