| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package WARC::Fields; # -*- CPerl -*- |
|
2
|
|
|
|
|
|
|
|
|
3
|
26
|
|
|
26
|
|
68687
|
use strict; |
|
|
26
|
|
|
|
|
61
|
|
|
|
26
|
|
|
|
|
776
|
|
|
4
|
26
|
|
|
26
|
|
127
|
use warnings; |
|
|
26
|
|
|
|
|
48
|
|
|
|
26
|
|
|
|
|
619
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
26
|
|
|
26
|
|
122
|
use Carp; |
|
|
26
|
|
|
|
|
62
|
|
|
|
26
|
|
|
|
|
1311
|
|
|
7
|
26
|
|
|
26
|
|
13732
|
use Encode; |
|
|
26
|
|
|
|
|
235637
|
|
|
|
26
|
|
|
|
|
1756
|
|
|
8
|
26
|
|
|
26
|
|
191
|
use Scalar::Util; |
|
|
26
|
|
|
|
|
52
|
|
|
|
26
|
|
|
|
|
1198
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @ISA = qw(); |
|
11
|
|
|
|
|
|
|
|
|
12
|
26
|
|
|
26
|
|
506
|
use WARC; *WARC::Fields::VERSION = \$WARC::VERSION; |
|
|
26
|
|
|
|
|
55
|
|
|
|
26
|
|
|
|
|
1310
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
WARC::Fields - WARC record headers and application/warc-fields |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
require WARC::Fields; |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
$f = new WARC::Fields; |
|
23
|
|
|
|
|
|
|
$f = $record->fields; # get WARC record headers |
|
24
|
|
|
|
|
|
|
$g = $f->clone; # make writable copy |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$g->set_readonly; # make read-only |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
$f->field('WARC-Type' => 'metadata'); # set |
|
29
|
|
|
|
|
|
|
$value = $f->field('WARC-Type'); # get |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
$fields_text = $f->as_string; # get WARC header lines for display |
|
32
|
|
|
|
|
|
|
$fields_block = $f->as_block; # format for WARC file |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
tie @field_names, ref $f, $f; # bind ordered list of field names |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
tie %fields, ref $f, $f; # bind hash of field names => values |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
$entry = $f->[$num]; # tie an anonymous array and access it |
|
39
|
|
|
|
|
|
|
$value = $f->{$name}; # likewise with an anonymous tied hash |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
$name = "$entry"; # tied array returns objects |
|
42
|
|
|
|
|
|
|
$value = $entry->value; # one specific value |
|
43
|
|
|
|
|
|
|
$offset = $entry->offset; # N of M with same name |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
foreach (keys %{$f}) { ... } # iterate over names, in order |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=cut |
|
48
|
|
|
|
|
|
|
|
|
49
|
26
|
|
|
26
|
|
2170
|
use overload '@{}' => \&_as_tied_array, '%{}' => \&_as_tied_hash; |
|
|
26
|
|
|
|
|
1756
|
|
|
|
26
|
|
|
|
|
235
|
|
|
50
|
26
|
|
|
26
|
|
1906
|
use overload fallback => 1; |
|
|
26
|
|
|
|
|
51
|
|
|
|
26
|
|
|
|
|
79
|
|
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# This implementation uses column-oriented storage, with an array as the |
|
53
|
|
|
|
|
|
|
# underlying object and constants to select array offsets. |
|
54
|
|
|
|
|
|
|
# |
|
55
|
|
|
|
|
|
|
# The NAMES and VALUES columns are always valid, but the MVOFF and INDEX |
|
56
|
|
|
|
|
|
|
# positions may be undefined and are lazily rebuilt when needed. |
|
57
|
|
|
|
|
|
|
|
|
58
|
26
|
|
|
|
|
2862
|
use constant { NAMES => 0, VALUES => 1, MVOFF => 2, |
|
59
|
26
|
|
|
26
|
|
1540
|
INDEX => 3, IS_RO => 4, C_TA => 5, C_TH => 6 }; |
|
|
26
|
|
|
|
|
51
|
|
|
60
|
26
|
|
|
26
|
|
173
|
use constant OBJECT_INDEX => qw/NAMES VALUES MVOFF INDEX IS_RO/; |
|
|
26
|
|
|
|
|
48
|
|
|
|
26
|
|
|
|
|
1639
|
|
|
61
|
26
|
|
|
26
|
|
151
|
use constant OBJECT_INIT => undef, undef, undef, undef, 0, undef, undef; |
|
|
26
|
|
|
|
|
48
|
|
|
|
26
|
|
|
|
|
77460
|
|
|
62
|
|
|
|
|
|
|
|
|
63
|
654
|
|
|
654
|
|
7551
|
sub DESTROY { my $ob = shift; |
|
64
|
654
|
100
|
|
|
|
1713
|
untie @{$$ob->[C_TA]} if defined $$ob->[C_TA]; |
|
|
3
|
|
|
|
|
23
|
|
|
65
|
654
|
100
|
|
|
|
1237
|
untie %{$$ob->[C_TH]} if defined $$ob->[C_TH]; |
|
|
61
|
|
|
|
|
179
|
|
|
66
|
654
|
|
|
|
|
837
|
our $_total_destroyed; $_total_destroyed++ } |
|
|
654
|
|
|
|
|
6481
|
|
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# NAMES: array of field names, exactly as written |
|
69
|
|
|
|
|
|
|
# VALUES: array of field values |
|
70
|
|
|
|
|
|
|
# MVOFF: array of offsets for multiple-valued fields |
|
71
|
|
|
|
|
|
|
# INDEX: hash of case-folded field names to array of row numbers |
|
72
|
|
|
|
|
|
|
# IS_RO: boolean: TRUE if this object is read-only |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# C_TA: cache: tied array for array dereference |
|
75
|
|
|
|
|
|
|
# C_TH: cache: tide hash for hash dereference |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub _rebuild_INDEX { |
|
78
|
30
|
|
|
30
|
|
45
|
my $self = shift; |
|
79
|
30
|
|
|
|
|
52
|
my %idx = (); |
|
80
|
|
|
|
|
|
|
|
|
81
|
30
|
|
|
|
|
61
|
for (my $i = 0; $i < @{$$self->[NAMES]}; $i++) { |
|
|
113
|
|
|
|
|
262
|
|
|
82
|
83
|
|
|
|
|
99
|
push @{$idx{lc $$self->[NAMES][$i]}}, $i; |
|
|
83
|
|
|
|
|
222
|
|
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
|
|
85
|
30
|
|
|
|
|
71
|
$$self->[INDEX] = \%idx; |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub _update_INDEX { |
|
89
|
72
|
|
|
72
|
|
104
|
my $self = shift; # INDEX slot must be valid |
|
90
|
72
|
|
|
|
|
93
|
my $base = shift; # row number where an insertion or removal was made |
|
91
|
72
|
|
|
|
|
94
|
my $count = shift; # how many rows were inserted (+) or removed (-) |
|
92
|
|
|
|
|
|
|
|
|
93
|
72
|
|
|
|
|
115
|
my %done = (); |
|
94
|
|
|
|
|
|
|
|
|
95
|
72
|
100
|
|
|
|
162
|
for (my $i = ($base < 0) ? 0 : $base; $i < @{$$self->[NAMES]}; $i++) { |
|
|
186
|
|
|
|
|
457
|
|
|
96
|
114
|
|
|
|
|
191
|
my $key = lc $$self->[NAMES][$i]; |
|
97
|
114
|
100
|
|
|
|
226
|
next if $done{$key}; |
|
98
|
72
|
|
|
|
|
154
|
@{$$self->[INDEX]{$key}} = |
|
99
|
72
|
100
|
|
|
|
91
|
map { $_ + ($_ > $base ? $count : 0) } @{$$self->[INDEX]{$key}}; |
|
|
118
|
|
|
|
|
255
|
|
|
|
72
|
|
|
|
|
159
|
|
|
100
|
72
|
|
|
|
|
167
|
$done{$key}++; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub _rebuild_MVOFF { |
|
105
|
6
|
|
|
6
|
|
11
|
my $self = shift; |
|
106
|
|
|
|
|
|
|
|
|
107
|
6
|
|
|
|
|
14
|
my @mvoff = ();#(undef) x scalar @{$$self->[NAMES]}; |
|
108
|
6
|
|
|
|
|
13
|
my %cidx = (); # counted index; references to unique entries |
|
109
|
|
|
|
|
|
|
|
|
110
|
6
|
|
|
|
|
10
|
foreach my $name (@{$$self->[NAMES]}) { |
|
|
6
|
|
|
|
|
15
|
|
|
111
|
10028
|
|
|
|
|
13164
|
my $key = lc $name; |
|
112
|
10028
|
100
|
|
|
|
18062
|
if (not defined $cidx{$key}) { |
|
|
|
100
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# first time this key is seen |
|
114
|
26
|
|
|
|
|
36
|
push @mvoff, undef; |
|
115
|
26
|
|
|
|
|
61
|
$cidx{$key} = \$mvoff[$#mvoff]; |
|
116
|
|
|
|
|
|
|
} elsif (ref $cidx{$key} eq 'SCALAR') { |
|
117
|
|
|
|
|
|
|
# second time this key is seen |
|
118
|
4
|
|
|
|
|
10
|
push @mvoff, 1; |
|
119
|
4
|
|
|
|
|
7
|
${$cidx{$key}} = 0; # replace undefined value |
|
|
4
|
|
|
|
|
9
|
|
|
120
|
4
|
|
|
|
|
10
|
$cidx{$key} = 2; # prepare counter |
|
121
|
|
|
|
|
|
|
} else { |
|
122
|
|
|
|
|
|
|
# third or later time this key is seen |
|
123
|
9998
|
|
|
|
|
14386
|
push @mvoff, $cidx{$key}++; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
|
|
127
|
6
|
|
|
|
|
23
|
$$self->[MVOFF] = \@mvoff; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub _dbg_dump { |
|
131
|
51
|
|
|
51
|
|
5421
|
my $self = shift; |
|
132
|
|
|
|
|
|
|
|
|
133
|
51
|
|
|
|
|
75
|
my @mvoff = (' ') x @{$$self->[NAMES]}; |
|
|
51
|
|
|
|
|
2527
|
|
|
134
|
20016
|
100
|
|
|
|
29332
|
@mvoff = map { defined $_ ? $_ : 'U' } |
|
135
|
51
|
100
|
|
|
|
168
|
@{$$self->[MVOFF]}[0 .. $#{$$self->[NAMES]}] |
|
|
5
|
|
|
|
|
210
|
|
|
|
5
|
|
|
|
|
279
|
|
|
136
|
|
|
|
|
|
|
if defined $$self->[MVOFF]; |
|
137
|
51
|
|
|
|
|
508
|
my @widths = map {length} qw/ROW MVO NAME/; |
|
|
153
|
|
|
|
|
264
|
|
|
138
|
|
|
|
|
|
|
|
|
139
|
51
|
|
|
|
|
88
|
foreach my $row (0 .. $#{$$self->[NAMES]}) { |
|
|
51
|
|
|
|
|
172
|
|
|
140
|
20233
|
100
|
|
|
|
30996
|
$widths[0] = length $row if length $row > $widths[0]; |
|
141
|
20233
|
100
|
|
|
|
33252
|
$widths[1] = length $mvoff[$row] if length $mvoff[$row] > $widths[1]; |
|
142
|
20233
|
100
|
|
|
|
35139
|
$widths[2] = length $$self->[NAMES][$row] |
|
143
|
|
|
|
|
|
|
if length $$self->[NAMES][$row] > $widths[2]; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
51
|
|
|
|
|
263
|
my $out = sprintf ' %4$*1$s %5$*2$s %6$*3$s %7$s', @widths, |
|
147
|
|
|
|
|
|
|
qw/ ROW MVO NAME VALUE /; |
|
148
|
51
|
|
|
|
|
181
|
$out .= "\n".('=' x length $out)."\n"; |
|
149
|
|
|
|
|
|
|
$out .= join "\n", map |
|
150
|
20233
|
100
|
|
|
|
79413
|
{ sprintf ' %4$*1$d %5$*2$s %6$*3$s%7$1s %8$s', @widths, |
|
|
|
100
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
($_, $mvoff[$_], $$self->[NAMES][$_], |
|
152
|
|
|
|
|
|
|
(defined $$self->[VALUES][$_] ? ':' : ' '), |
|
153
|
|
|
|
|
|
|
(defined $$self->[VALUES][$_] ? $$self->[VALUES][$_] : '*deleted*')) } |
|
154
|
51
|
|
|
|
|
100
|
0 .. $#{$$self->[NAMES]}; |
|
|
51
|
|
|
|
|
353
|
|
|
155
|
|
|
|
|
|
|
|
|
156
|
51
|
|
|
|
|
2401
|
return $out; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# From RFC2616: |
|
160
|
|
|
|
|
|
|
# CTL =
|
|
161
|
|
|
|
|
|
|
# (octets 0 - 31) and DEL (127)> |
|
162
|
|
|
|
|
|
|
# LWS = [CRLF] 1*( SP | HT ) |
|
163
|
|
|
|
|
|
|
# separators = "(" | ")" | "<" | ">" | "@" |
|
164
|
|
|
|
|
|
|
# | "," | ";" | ":" | "\" | <"> |
|
165
|
|
|
|
|
|
|
# | "/" | "[" | "]" | "?" | "=" |
|
166
|
|
|
|
|
|
|
# | "{" | "}" | SP | HT |
|
167
|
|
|
|
|
|
|
my $PARSE_RE__LWS = qr/(?:\015\012)?[ \t]+/; |
|
168
|
|
|
|
|
|
|
my $PARSE_RE__separator = qr[[][)(><}{@,;:/"\\?=[:space:]]]; |
|
169
|
|
|
|
|
|
|
my $PARSE_RE__not_separator = qr[[^][)(><}{@,;:/"\\?=[:space:]]]; |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# From WARC specification: |
|
172
|
|
|
|
|
|
|
# field-name = token |
|
173
|
|
|
|
|
|
|
# token = 1*
|
|
174
|
|
|
|
|
|
|
# except CTLs or separators> |
|
175
|
|
|
|
|
|
|
my $PARSE_RE__token = qr/[!#$%'*+-.0-9A-Z^_`a-z|~]+/; |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
The C class encapsulates information in the |
|
180
|
|
|
|
|
|
|
"application/warc-fields" format used for WARC record headers. This is a |
|
181
|
|
|
|
|
|
|
simple key-value format closely analogous to HTTP headers, however |
|
182
|
|
|
|
|
|
|
differences are significant enough that the C class cannot |
|
183
|
|
|
|
|
|
|
be reliably reused for WARC fields. |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Instances of this class are usually created as member variables of the |
|
186
|
|
|
|
|
|
|
C class, but can also be returned as the content of WARC |
|
187
|
|
|
|
|
|
|
records with Content-Type "application/warc-fields". |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
Instances of C retrieved from WARC files are read-only and |
|
190
|
|
|
|
|
|
|
will croak() if any attempt is made to change their contents. |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
This class strives to faithfully represent the contents of a WARC file, |
|
193
|
|
|
|
|
|
|
while providing a simple interface to answer simple questions. |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head2 Multiple Values |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Most WARC headers may only appear once and with a single value in valid |
|
198
|
|
|
|
|
|
|
WARC records, with the notable exception of the WARC-Concurrent-To header. |
|
199
|
|
|
|
|
|
|
C neither attempts to enforce nor relies upon this |
|
200
|
|
|
|
|
|
|
constraint. Headers that appear multiple times are considered to have |
|
201
|
|
|
|
|
|
|
multiple values. When iterating a tied hash, all values of a recurring |
|
202
|
|
|
|
|
|
|
header are collected and returned with the B occurrence of its key. |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Multiple values are returned from the C method and tied hash |
|
205
|
|
|
|
|
|
|
interface as array references, and are set by passing in an array |
|
206
|
|
|
|
|
|
|
reference. Existing rows are reused where possible when updating a field |
|
207
|
|
|
|
|
|
|
with multiple values. If the new array reference contains fewer items |
|
208
|
|
|
|
|
|
|
(including the special case of replacing multiple values with a single |
|
209
|
|
|
|
|
|
|
value) excess rows are deleted. If the new array reference requires |
|
210
|
|
|
|
|
|
|
additional rows to be inserted, they are inserted immediately after the |
|
211
|
|
|
|
|
|
|
last existing row for a field, with the same name case as that row. |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
Precise control of the layout is available using the tied array interface, |
|
214
|
|
|
|
|
|
|
but the ordering of the header rows is not constrained in the WARC |
|
215
|
|
|
|
|
|
|
specification. |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head2 Field Name Mangling |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
As with C, the '_' character is converted to '-' in field |
|
220
|
|
|
|
|
|
|
names unless the first character of the name is ':', which cannot itself |
|
221
|
|
|
|
|
|
|
appear in a field name. Unlike C, the leading ':' is |
|
222
|
|
|
|
|
|
|
stripped off immediately and the name stored otherwise exactly as given. |
|
223
|
|
|
|
|
|
|
The C method and tied hash interface allow this convenience feature. |
|
224
|
|
|
|
|
|
|
The field names exposed via the tied array interface are reported |
|
225
|
|
|
|
|
|
|
B as they appear in the WARC file. |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
Strictly, "X-Crazy-Header" and "X_Crazy_Header" are two B |
|
228
|
|
|
|
|
|
|
headers that the above convenience mechanism conflates. The solution is |
|
229
|
|
|
|
|
|
|
simple: if (and only if) a header field B with the B |
|
230
|
|
|
|
|
|
|
name given, it is used, otherwise C occurs and the name is |
|
231
|
|
|
|
|
|
|
rechecked for another exact match. If no match is found, case is folded |
|
232
|
|
|
|
|
|
|
and a third check performed. If a match is found, the existing header is |
|
233
|
|
|
|
|
|
|
updated, otherwise a new header is created with character case as given. |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
The WARC specification specifically states that field names are |
|
236
|
|
|
|
|
|
|
case-insensitive, accordingly, "X-Crazy-Header" and "X-CRAZY-HeAdEr" are |
|
237
|
|
|
|
|
|
|
considered the same header for the C method and tied hash interface. |
|
238
|
|
|
|
|
|
|
They will appear exactly as given in the tied array interface, however. |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=cut |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
# This function handles two different canonicalizations: |
|
243
|
|
|
|
|
|
|
# (1) case folding as required by the WARC specification |
|
244
|
|
|
|
|
|
|
# (2) convenience translation s/_/-/g, |
|
245
|
|
|
|
|
|
|
# (2a) suppressed if m/^:/, which is removed |
|
246
|
|
|
|
|
|
|
# (2b) overridden by an exact match |
|
247
|
|
|
|
|
|
|
# To make this work: |
|
248
|
|
|
|
|
|
|
# --- all keys in INDEX are case-folded |
|
249
|
|
|
|
|
|
|
# --- all keys in NAMES preserve case |
|
250
|
|
|
|
|
|
|
# --- existing keys are case-folded by this function |
|
251
|
|
|
|
|
|
|
# --- new keys translate s/_/-/g but preserve case |
|
252
|
|
|
|
|
|
|
sub _find_key { |
|
253
|
7553
|
|
|
7553
|
|
9385
|
my $self = shift; # INDEX slot must be valid |
|
254
|
7553
|
|
|
|
|
9625
|
my $k = shift; |
|
255
|
7553
|
|
|
|
|
8485
|
my $key; ($key = $k) =~ s/^://; |
|
|
7553
|
|
|
|
|
11430
|
|
|
256
|
7553
|
|
|
|
|
9183
|
my $pad = $key; |
|
257
|
7553
|
|
|
|
|
10453
|
my $is_quoted = ($k =~ m/^:/); |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# exact case-folded match? |
|
260
|
7553
|
100
|
|
|
|
21340
|
return lc $key if defined $$self->[INDEX]{lc $key}; |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# case-folded match after s/_/-/g? |
|
263
|
3698
|
|
|
|
|
7522
|
$pad =~ s/_/-/g; |
|
264
|
3698
|
100
|
100
|
|
|
10002
|
return lc $pad if defined $$self->[INDEX]{lc $pad} && !$is_quoted; |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# not found ==> a new key will be made |
|
267
|
2709
|
100
|
|
|
|
5593
|
return $is_quoted ? $key : $pad; |
|
268
|
|
|
|
|
|
|
} |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# called only if there is no or one current value |
|
271
|
|
|
|
|
|
|
sub _set_single_value { |
|
272
|
12
|
|
|
12
|
|
18
|
my $self = shift; # INDEX slot must be valid |
|
273
|
12
|
|
|
|
|
17
|
my $key = shift; # as returned from _find_key |
|
274
|
12
|
|
|
|
|
20
|
my $value = shift; |
|
275
|
|
|
|
|
|
|
|
|
276
|
12
|
100
|
|
|
|
217
|
croak "attempt to modify read-only object" if $$self->[IS_RO]; |
|
277
|
|
|
|
|
|
|
|
|
278
|
11
|
100
|
|
|
|
32
|
unless (defined $$self->[INDEX]{lc $key}) { |
|
279
|
|
|
|
|
|
|
# insert new key |
|
280
|
6
|
|
|
|
|
9
|
push @{$$self->[NAMES]}, $key; # preserve original key |
|
|
6
|
|
|
|
|
16
|
|
|
281
|
6
|
|
|
|
|
14
|
$key = lc $key; # fold key case |
|
282
|
6
|
|
|
|
|
7
|
push @{$$self->[INDEX]{$key}}, $#{$$self->[NAMES]}; |
|
|
6
|
|
|
|
|
18
|
|
|
|
6
|
|
|
|
|
11
|
|
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
11
|
|
|
|
|
37
|
$$self->[VALUES][$$self->[INDEX]{$key}[0]] = $value; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
sub _key_multiple_value_p { |
|
289
|
3801
|
|
|
3801
|
|
4758
|
my $self = shift; # INDEX slot must be valid |
|
290
|
3801
|
|
|
|
|
4882
|
my $key = shift; # as returned from _find_key |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# For this to be true, the key must already exist, which means that |
|
293
|
|
|
|
|
|
|
# _find_key has case-folded it already. |
|
294
|
|
|
|
|
|
|
return (defined $$self->[INDEX]{$key} |
|
295
|
3801
|
|
100
|
|
|
7733
|
&& 1 < scalar @{$$self->[INDEX]{$key}}) |
|
296
|
|
|
|
|
|
|
} |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# called in all cases where multiple values are involved |
|
299
|
|
|
|
|
|
|
sub _set_multiple_value { |
|
300
|
56
|
|
|
56
|
|
82
|
my $self = shift; # INDEX slot must be valid |
|
301
|
56
|
|
|
|
|
81
|
my $key = shift; # as returned from _find_key |
|
302
|
56
|
|
|
|
|
74
|
my $value_aref = shift; |
|
303
|
|
|
|
|
|
|
|
|
304
|
56
|
100
|
|
|
|
351
|
croak "attempt to modify read-only object" if $$self->[IS_RO]; |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
my $cur_count = (defined $$self->[INDEX]{$key} |
|
307
|
54
|
|
100
|
|
|
146
|
&& scalar @{$$self->[INDEX]{$key}}); |
|
308
|
54
|
|
|
|
|
108
|
my $new_count = scalar @$value_aref; |
|
309
|
|
|
|
|
|
|
|
|
310
|
54
|
100
|
|
|
|
105
|
unless ($cur_count) { |
|
311
|
|
|
|
|
|
|
# insert new key |
|
312
|
16
|
|
|
|
|
26
|
push @{$$self->[NAMES]}, $key; # preserve original key |
|
|
16
|
|
|
|
|
44
|
|
|
313
|
16
|
|
|
|
|
31
|
push @{$$self->[VALUES]}, undef; # prepare slot |
|
|
16
|
|
|
|
|
34
|
|
|
314
|
16
|
|
|
|
|
30
|
$key = lc $key; # fold key case |
|
315
|
16
|
|
|
|
|
26
|
push @{$$self->[INDEX]{$key}}, $#{$$self->[NAMES]}; |
|
|
16
|
|
|
|
|
51
|
|
|
|
16
|
|
|
|
|
40
|
|
|
316
|
16
|
|
|
|
|
29
|
$cur_count = 1; # account for the added slot |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
# $key is always case-folded at this point |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# adjust table to accommodate new number of values |
|
321
|
54
|
100
|
|
|
|
144
|
if ($cur_count > $new_count) { |
|
|
|
100
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# remove extra rows |
|
323
|
27
|
|
|
|
|
38
|
foreach my $extra_row (reverse sort |
|
324
|
27
|
|
|
|
|
136
|
splice @{$$self->[INDEX]{$key}}, $new_count) { |
|
325
|
40
|
|
|
|
|
65
|
splice @{$$self->[NAMES]}, $extra_row, 1; |
|
|
40
|
|
|
|
|
79
|
|
|
326
|
40
|
|
|
|
|
58
|
splice @{$$self->[VALUES]}, $extra_row, 1; |
|
|
40
|
|
|
|
|
66
|
|
|
327
|
40
|
|
|
|
|
89
|
_update_INDEX($self, $extra_row, -1); |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
# special case: removing a field entirely |
|
330
|
27
|
100
|
|
|
|
89
|
if ($new_count == 0) { |
|
331
|
|
|
|
|
|
|
# This is here to catch a hypothetical bug before data is corrupted. |
|
332
|
|
|
|
|
|
|
die "stray INDEX entries left after removing field" |
|
333
|
|
|
|
|
|
|
# uncoverable branch true |
|
334
|
19
|
50
|
|
|
|
30
|
unless scalar @{$$self->[INDEX]{$key}} == 0; |
|
|
19
|
|
|
|
|
63
|
|
|
335
|
19
|
|
|
|
|
49
|
delete $$self->[INDEX]{$key}; |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
} elsif ($cur_count < $new_count) { |
|
338
|
|
|
|
|
|
|
# add more rows |
|
339
|
15
|
|
|
|
|
30
|
my $last_row = $$self->[INDEX]{$key}[-1]; |
|
340
|
15
|
|
|
|
|
26
|
my $new_rows = $new_count - $cur_count; |
|
341
|
15
|
|
|
|
|
46
|
_update_INDEX($self, $last_row, $new_rows); |
|
342
|
15
|
|
|
|
|
39
|
splice @{$$self->[NAMES]}, 1+$last_row, 0, |
|
|
15
|
|
|
|
|
2428
|
|
|
343
|
|
|
|
|
|
|
(($$self->[NAMES][$last_row]) x $new_rows); |
|
344
|
15
|
|
|
|
|
22
|
splice @{$$self->[VALUES]}, 1+$last_row, 0, ((undef) x $new_rows); |
|
|
15
|
|
|
|
|
571
|
|
|
345
|
15
|
|
|
|
|
27
|
push @{$$self->[INDEX]{$key}}, 1+$last_row .. $last_row+$new_rows; |
|
|
15
|
|
|
|
|
1257
|
|
|
346
|
|
|
|
|
|
|
} # otherwise, $cur_count == $new_count |
|
347
|
54
|
100
|
|
|
|
307
|
$$self->[MVOFF] = undef unless $cur_count == $new_count; |
|
348
|
|
|
|
|
|
|
# there are always $new_count rows with $key at this point |
|
349
|
|
|
|
|
|
|
|
|
350
|
54
|
|
|
|
|
136
|
for (my $i = 0; $i < $new_count; $i++) |
|
351
|
9968
|
|
|
|
|
21741
|
{ $$self->[VALUES][$$self->[INDEX]{$key}[$i]] = $value_aref->[$i] } |
|
352
|
|
|
|
|
|
|
} |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head2 Methods |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=over |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=item $f = WARC::Fields-Enew |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
Construct a new C object. Initial contents can be passed as |
|
361
|
|
|
|
|
|
|
key-value pairs to this constructor and will be added in the given order. |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
Repeating a key or supplying an array reference as a value assigns multiple |
|
364
|
|
|
|
|
|
|
values to a key. To reduce the risk of confusion, only quoting with a |
|
365
|
|
|
|
|
|
|
leading ':' overrides the convenience feature of applying C when |
|
366
|
|
|
|
|
|
|
constructing a C object. The exact match rules used when |
|
367
|
|
|
|
|
|
|
setting values on an existing object do not apply here. |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Field names given when constructing a WARC::Fields object are otherwise |
|
370
|
|
|
|
|
|
|
stored exactly as given, with case preserved, even when other names that |
|
371
|
|
|
|
|
|
|
fold to the same string have been given earlier in the argument list. |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=cut |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub new { |
|
376
|
220
|
|
|
220
|
1
|
15014
|
my $class = shift; |
|
377
|
220
|
|
|
|
|
468
|
my $ob = [OBJECT_INIT]; |
|
378
|
220
|
|
|
|
|
352
|
my $k; my $v; |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# explicitly initialize NAMES and VALUES to allow as_string and as_block |
|
381
|
|
|
|
|
|
|
# methods to be called on empty objects |
|
382
|
220
|
|
|
|
|
322
|
$ob->[NAMES] = []; |
|
383
|
220
|
|
|
|
|
371
|
$ob->[VALUES] = []; |
|
384
|
|
|
|
|
|
|
|
|
385
|
220
|
|
|
|
|
624
|
while (($k, $v) = splice @_, 0, 2) { |
|
386
|
792
|
100
|
|
|
|
1665
|
croak "key without value" unless defined $v; |
|
387
|
|
|
|
|
|
|
|
|
388
|
791
|
100
|
|
|
|
1414
|
if ($k =~ m/^:/) { $k =~ s/^:// } else { $k =~ s/_/-/g } |
|
|
7
|
|
|
|
|
18
|
|
|
|
784
|
|
|
|
|
1860
|
|
|
389
|
|
|
|
|
|
|
|
|
390
|
791
|
100
|
|
|
|
2149
|
croak "reference to field with no name" unless $k =~ m/./; |
|
391
|
789
|
100
|
|
|
|
2210
|
croak "reference to invalid field name" if $k !~ m/^$PARSE_RE__token$/o; |
|
392
|
|
|
|
|
|
|
|
|
393
|
787
|
100
|
|
|
|
1369
|
if (ref $v eq 'ARRAY') { |
|
394
|
3
|
|
|
|
|
8
|
foreach my $value (@$v) { |
|
395
|
8
|
|
|
|
|
11
|
push @{$ob->[NAMES]}, $k; |
|
|
8
|
|
|
|
|
14
|
|
|
396
|
8
|
|
|
|
|
11
|
push @{$ob->[VALUES]}, $value; |
|
|
8
|
|
|
|
|
12
|
|
|
397
|
8
|
|
|
|
|
11
|
push @{$ob->[INDEX]{lc $k}}, $#{$ob->[NAMES]}; |
|
|
8
|
|
|
|
|
17
|
|
|
|
8
|
|
|
|
|
24
|
|
|
398
|
|
|
|
|
|
|
} |
|
399
|
|
|
|
|
|
|
} else { |
|
400
|
784
|
|
|
|
|
957
|
push @{$ob->[NAMES]}, $k; |
|
|
784
|
|
|
|
|
1468
|
|
|
401
|
784
|
|
|
|
|
1029
|
push @{$ob->[VALUES]}, $v; |
|
|
784
|
|
|
|
|
1262
|
|
|
402
|
784
|
|
|
|
|
964
|
push @{$ob->[INDEX]{lc $k}}, $#{$ob->[NAMES]}; |
|
|
784
|
|
|
|
|
1673
|
|
|
|
784
|
|
|
|
|
2587
|
|
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
} |
|
405
|
|
|
|
|
|
|
|
|
406
|
215
|
|
|
|
|
296
|
{our $_total_newly_constructed; $_total_newly_constructed++} |
|
|
215
|
|
|
|
|
248
|
|
|
|
215
|
|
|
|
|
265
|
|
|
407
|
215
|
|
|
|
|
1007
|
bless \ $ob, $class; |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=item $f-Eclone |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
Copy a C object. A copy of a read-only object is writable. |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=cut |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub clone { |
|
417
|
8
|
|
|
8
|
1
|
14
|
my $self = shift; |
|
418
|
8
|
|
|
|
|
25
|
my $new = [OBJECT_INIT]; |
|
419
|
|
|
|
|
|
|
|
|
420
|
8
|
|
|
|
|
15
|
$new->[NAMES] = [@{$$self->[NAMES]}]; |
|
|
8
|
|
|
|
|
701
|
|
|
421
|
8
|
|
|
|
|
16
|
$new->[VALUES] = [@{$$self->[VALUES]}]; |
|
|
8
|
|
|
|
|
1440
|
|
|
422
|
8
|
100
|
|
|
|
35
|
$new->[MVOFF] = [@{$$self->[MVOFF]}] if defined $$self->[MVOFF]; |
|
|
1
|
|
|
|
|
301
|
|
|
423
|
30
|
|
|
|
|
38
|
$new->[INDEX] = {map {$_ => [@{$$self->[INDEX]{$_}}]} |
|
|
30
|
|
|
|
|
94
|
|
|
424
|
8
|
100
|
|
|
|
29
|
keys %{$$self->[INDEX]}} if defined $$self->[INDEX]; |
|
|
7
|
|
|
|
|
30
|
|
|
425
|
|
|
|
|
|
|
|
|
426
|
8
|
|
|
|
|
18
|
{our $_total_newly_cloned; $_total_newly_cloned++} |
|
|
8
|
|
|
|
|
11
|
|
|
|
8
|
|
|
|
|
16
|
|
|
427
|
8
|
|
|
|
|
31
|
bless \ $new, ref $self; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=item $f-Efield( $name ) |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=item $f-Efield( $name =E $value ) |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=item $f-Efield( $n1 =E $v1, $n2 =E $v2, ... ) |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
Get or set the value of one or more fields. The field name is not case |
|
437
|
|
|
|
|
|
|
sensitive, but C will preserve its case if a new entry is |
|
438
|
|
|
|
|
|
|
created. |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Setting a field to C effectively deletes that field, although it |
|
441
|
|
|
|
|
|
|
remains visible in the tied array interface and will retain its position if |
|
442
|
|
|
|
|
|
|
a new value is assigned. Setting a field to an empty array reference |
|
443
|
|
|
|
|
|
|
removes that field entirely. |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=cut |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
sub field { |
|
448
|
6519
|
|
|
6519
|
1
|
36562
|
my $self = shift; |
|
449
|
|
|
|
|
|
|
|
|
450
|
6519
|
100
|
|
|
|
13106
|
_rebuild_INDEX($self) unless defined $$self->[INDEX]; |
|
451
|
|
|
|
|
|
|
|
|
452
|
6519
|
|
|
|
|
8474
|
my $k; my $v; my $have_value_arg = scalar @_ > 1; |
|
|
6519
|
|
|
|
|
9708
|
|
|
453
|
6519
|
|
|
|
|
14832
|
while (($k, $v) = splice @_, 0, 2) { |
|
454
|
6519
|
|
|
|
|
11055
|
my $key = $self->_find_key($k); |
|
455
|
|
|
|
|
|
|
|
|
456
|
6519
|
100
|
|
|
|
17274
|
croak "reference to field with no name" unless $key =~ m/./; |
|
457
|
6517
|
100
|
|
|
|
15323
|
croak "reference to invalid field name" if $key !~ m/^$PARSE_RE__token$/o; |
|
458
|
|
|
|
|
|
|
|
|
459
|
6514
|
100
|
|
|
|
10679
|
if (not $have_value_arg) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# get a value |
|
461
|
6452
|
100
|
|
|
|
15886
|
return undef unless defined $$self->[INDEX]{$key}; |
|
462
|
3787
|
100
|
|
|
|
6040
|
return $$self->[VALUES][$$self->[INDEX]{$key}[0]] |
|
463
|
|
|
|
|
|
|
unless $self->_key_multiple_value_p($key); |
|
464
|
78
|
|
|
|
|
278
|
return [grep {defined $_} |
|
465
|
25
|
|
|
|
|
55
|
map {$$self->[VALUES][$_]} @{$$self->[INDEX]{$key}}]; |
|
|
78
|
|
|
|
|
153
|
|
|
|
25
|
|
|
|
|
74
|
|
|
466
|
|
|
|
|
|
|
} # otherwise set a value |
|
467
|
|
|
|
|
|
|
elsif (UNIVERSAL::isa($v, 'ARRAY')) |
|
468
|
48
|
|
|
|
|
104
|
{ $self->_set_multiple_value($key, $v) } |
|
469
|
|
|
|
|
|
|
elsif ($self->_key_multiple_value_p($key)) |
|
470
|
|
|
|
|
|
|
# has multiple values, but now only setting a single value |
|
471
|
2
|
|
|
|
|
7
|
{ $self->_set_multiple_value($key, [$v]) } |
|
472
|
|
|
|
|
|
|
else |
|
473
|
12
|
|
|
|
|
27
|
{ $self->_set_single_value($key, $v) } |
|
474
|
59
|
|
|
|
|
211
|
$have_value_arg = scalar @_ > 1; |
|
475
|
|
|
|
|
|
|
} |
|
476
|
59
|
|
|
|
|
155
|
return (); # return nothing |
|
477
|
|
|
|
|
|
|
# Note that setting one or more fields and then getting a field is |
|
478
|
|
|
|
|
|
|
# possible as a side-effect of this organization, but is explicitly NOT |
|
479
|
|
|
|
|
|
|
# supported. That trick is NOT part of the stable API. |
|
480
|
|
|
|
|
|
|
} |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=item $f = WARC::Fields-Eparse( $text ) |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=item $f = WARC::Fields-Eparse( from =E $fh ) |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
=item $f = parse WARC::Fields from =E $fh |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
Construct a new C object, reading initial contents from the |
|
489
|
|
|
|
|
|
|
provided text string or filehandle. |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
The C method throws an exception if it encounters input that it does |
|
492
|
|
|
|
|
|
|
not understand. |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
If the C method encounters a field name with a leading ':', which |
|
495
|
|
|
|
|
|
|
implies an empty name and is not allowed, the leading ':' is silently |
|
496
|
|
|
|
|
|
|
dropped from the line and parsing retried. If the line is not valid after |
|
497
|
|
|
|
|
|
|
this change, the C method throws an exception. This feature is in |
|
498
|
|
|
|
|
|
|
keeping with the general principle of "be liberal in what you accept" and |
|
499
|
|
|
|
|
|
|
is a preemptive workaround for a predicted bug in other implementations. |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=cut |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
sub parse { |
|
504
|
556
|
|
|
556
|
1
|
1964
|
my $class = shift; |
|
505
|
556
|
|
|
|
|
748
|
my $text = shift; |
|
506
|
556
|
|
|
|
|
694
|
my $rd; |
|
507
|
|
|
|
|
|
|
|
|
508
|
556
|
100
|
|
|
|
1002
|
if ($text eq 'from') { |
|
509
|
551
|
|
|
|
|
724
|
$rd = shift; |
|
510
|
|
|
|
|
|
|
} else { |
|
511
|
|
|
|
|
|
|
# This fails iff perl was built without PerlIO, which is non-default. |
|
512
|
|
|
|
|
|
|
# uncoverable branch true |
|
513
|
5
|
50
|
|
|
|
64
|
open $rd, '<', \$text or die "failure opening stream on variable: $!"; |
|
514
|
|
|
|
|
|
|
} |
|
515
|
|
|
|
|
|
|
|
|
516
|
556
|
|
|
|
|
941
|
my @names = (); |
|
517
|
556
|
|
|
|
|
721
|
my @values = (); |
|
518
|
556
|
|
|
|
|
776
|
my %idx = (); |
|
519
|
556
|
|
|
|
|
671
|
my $at_end = 0; |
|
520
|
|
|
|
|
|
|
|
|
521
|
556
|
|
|
|
|
1236
|
local *_; |
|
522
|
556
|
|
|
|
|
1629
|
while (<$rd>) { |
|
523
|
4043
|
|
|
|
|
41979
|
s/[\015\012]+$//; |
|
524
|
4043
|
100
|
|
|
|
13373
|
if (m/^:?($PARSE_RE__token):\s+(.*)$/o) # $1 -- name $2 -- value |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
525
|
3487
|
|
|
|
|
7675
|
{ push @names, $1; push @values, $2; push @{$idx{lc $1}}, $#names } |
|
|
3487
|
|
|
|
|
5791
|
|
|
|
3487
|
|
|
|
|
4011
|
|
|
|
3487
|
|
|
|
|
14329
|
|
|
526
|
|
|
|
|
|
|
elsif (m/^\s+(\S.*)$/) # $1 -- continued value |
|
527
|
1
|
|
|
|
|
10
|
{ $values[$#values] .= ' '.$1 } |
|
528
|
553
|
|
|
|
|
831
|
elsif (m/^$/) { $at_end = 1; last } |
|
|
553
|
|
|
|
|
888
|
|
|
529
|
2
|
|
|
|
|
362
|
else { croak "unrecognized input: $_" } |
|
530
|
|
|
|
|
|
|
} |
|
531
|
|
|
|
|
|
|
|
|
532
|
554
|
100
|
|
|
|
1198
|
carp "end-of-input before end marker" unless $at_end; |
|
533
|
|
|
|
|
|
|
|
|
534
|
554
|
|
|
|
|
970
|
@values = map {Encode::decode_utf8($_)} @values; |
|
|
3487
|
|
|
|
|
44876
|
|
|
535
|
|
|
|
|
|
|
|
|
536
|
554
|
|
|
|
|
8820
|
my $ob = [OBJECT_INIT]; |
|
537
|
554
|
|
|
|
|
963
|
$ob->[NAMES] = \@names; |
|
538
|
554
|
|
|
|
|
807
|
$ob->[VALUES] = \@values; |
|
539
|
554
|
|
|
|
|
805
|
$ob->[INDEX] = \%idx; |
|
540
|
|
|
|
|
|
|
|
|
541
|
554
|
|
|
|
|
722
|
{our $_total_newly_parsed; $_total_newly_parsed++} |
|
|
554
|
|
|
|
|
698
|
|
|
|
554
|
|
|
|
|
743
|
|
|
542
|
554
|
|
|
|
|
2127
|
bless \ $ob, $class; |
|
543
|
|
|
|
|
|
|
} |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=item $f-Eas_block |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=item $f-Eas_string |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
Return the contents as a formatted WARC header or application/warc-fields |
|
550
|
|
|
|
|
|
|
block. The C method uses network line endings and UTF-8 as |
|
551
|
|
|
|
|
|
|
specified for the WARC format, while the C method uses the local |
|
552
|
|
|
|
|
|
|
line endings and does not perform encoding. |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=cut |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
sub _as_text { |
|
557
|
48
|
|
|
48
|
|
74
|
my $self = shift; |
|
558
|
48
|
|
|
|
|
68
|
my $newline = shift; |
|
559
|
48
|
|
|
|
|
63
|
my $out = ''; |
|
560
|
|
|
|
|
|
|
|
|
561
|
48
|
|
|
|
|
81
|
for (my $i = 0; $i < @{$$self->[NAMES]}; $i++) { |
|
|
258
|
|
|
|
|
630
|
|
|
562
|
210
|
100
|
|
|
|
403
|
next unless defined $$self->[VALUES][$i]; |
|
563
|
203
|
|
|
|
|
451
|
$out .= $$self->[NAMES][$i] . ': ' . $$self->[VALUES][$i] . $newline; |
|
564
|
|
|
|
|
|
|
} |
|
565
|
|
|
|
|
|
|
|
|
566
|
48
|
|
|
|
|
227
|
return $out; |
|
567
|
|
|
|
|
|
|
} |
|
568
|
|
|
|
|
|
|
|
|
569
|
1
|
|
|
1
|
1
|
4
|
sub as_block { Encode::encode('UTF-8', _as_text(shift, WARC::CRLF)) } |
|
570
|
47
|
|
|
47
|
1
|
4701
|
sub as_string { _as_text(shift, "\n") } |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=item $f-Eset_readonly |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
Mark a C object read-only. All methods that modify the |
|
575
|
|
|
|
|
|
|
object will croak() if called on a read-only object. |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=cut |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
sub set_readonly { |
|
580
|
680
|
|
|
680
|
1
|
1181
|
my $self = shift; |
|
581
|
|
|
|
|
|
|
|
|
582
|
680
|
|
|
|
|
1654
|
$$self->[IS_RO] = 1; |
|
583
|
|
|
|
|
|
|
} |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
=back |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=head2 Tied Array Access |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
The order of fields can be fully controlled by tying an array to a |
|
590
|
|
|
|
|
|
|
C object and manipulating the array using ordinary Perl |
|
591
|
|
|
|
|
|
|
operations. The C and C functions are likely to be useful |
|
592
|
|
|
|
|
|
|
for reordering array elements if desired. |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
C will croak() if an attempt is made to set a field name with |
|
595
|
|
|
|
|
|
|
a leading ':' using the tied array interface. |
|
596
|
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
=cut |
|
598
|
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
sub TIEARRAY { |
|
600
|
4
|
|
|
4
|
|
15
|
my $class = shift; |
|
601
|
4
|
|
|
|
|
13
|
my $ob = shift; |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
# This method must ignore the given class to allow the "empty subclass" |
|
604
|
|
|
|
|
|
|
# test to pass. If a subclass really wants, an override for TIEARRAY |
|
605
|
|
|
|
|
|
|
# itself can call SUPER::TIEARRAY and re-bless the returned reference |
|
606
|
|
|
|
|
|
|
# into the desired class. |
|
607
|
4
|
|
|
|
|
7
|
$WARC::Fields::TiedArray::_total_tied++; |
|
608
|
4
|
|
|
|
|
24
|
bless \ $ob, 'WARC::Fields::TiedArray'; |
|
609
|
|
|
|
|
|
|
} |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
{ |
|
612
|
|
|
|
|
|
|
package WARC::Fields::TiedArray::Entry; |
|
613
|
|
|
|
|
|
|
|
|
614
|
26
|
|
|
26
|
|
240
|
use Carp; |
|
|
26
|
|
|
|
|
64
|
|
|
|
26
|
|
|
|
|
2251
|
|
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
BEGIN { $WARC::Fields::TiedArray::Entry::{$_} = $WARC::Fields::{$_} |
|
617
|
26
|
|
|
26
|
|
1118
|
for WARC::Fields::OBJECT_INDEX; } |
|
618
|
|
|
|
|
|
|
|
|
619
|
26
|
|
|
26
|
|
198
|
use constant { NAME => 0, VALUE => 1, TABLE => 2, ROW => 3 }; |
|
|
26
|
|
|
|
|
51
|
|
|
|
26
|
|
|
|
|
2565
|
|
|
620
|
|
|
|
|
|
|
|
|
621
|
26
|
|
|
26
|
|
174
|
use overload '""' => 'name', fallback => 1; |
|
|
26
|
|
|
|
|
49
|
|
|
|
26
|
|
|
|
|
186
|
|
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=pod |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
The tied array interface accepts simple string values but returns objects |
|
626
|
|
|
|
|
|
|
with additional information. The returned object has an overloaded string |
|
627
|
|
|
|
|
|
|
conversion that yields the name for that entry but additionally has |
|
628
|
|
|
|
|
|
|
C and C methods. |
|
629
|
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
An entry object is bound to a slot in its parent C object, |
|
631
|
|
|
|
|
|
|
but will be copied if it is assigned to another slot in the same or another |
|
632
|
|
|
|
|
|
|
C object. |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
Due to complex aliasing rules necessary for array slice assignment to work |
|
635
|
|
|
|
|
|
|
for permuting rows in the table, entry objects must be short-lived. |
|
636
|
|
|
|
|
|
|
Storing the object read from a tied array and attempting to use it after |
|
637
|
|
|
|
|
|
|
modifying its parent C object produces unspecified results. |
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=over |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=item $entry = $array[$n] |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=item $entry = $f-E[$n] |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
The tied array C method returns a "entry object" instead of the name |
|
646
|
|
|
|
|
|
|
itself. |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=cut |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub _new { |
|
651
|
223
|
|
|
223
|
|
311
|
my $class = shift; |
|
652
|
223
|
|
|
|
|
292
|
my $table = shift; |
|
653
|
223
|
|
|
|
|
260
|
my $row = shift; |
|
654
|
|
|
|
|
|
|
|
|
655
|
223
|
|
|
|
|
1018
|
bless [$$table->[NAMES][$row], $$table->[VALUES][$row], |
|
656
|
|
|
|
|
|
|
$table, $row], $class; |
|
657
|
|
|
|
|
|
|
} |
|
658
|
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=item $name = "$entry" |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=item $name = $entry-Ename |
|
662
|
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
=item $name = "$f-E[$n]" |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=item $name = $f-E[$n]-Ename |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
The C method on a entry object returns the field name. |
|
668
|
|
|
|
|
|
|
String conversion is overloaded to call this method. |
|
669
|
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
=cut |
|
671
|
|
|
|
|
|
|
|
|
672
|
62
|
|
|
62
|
|
9094
|
sub name { (shift)->[NAME] } |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=item $value = $entry-Evalue |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=item $value = $array[$n]-Evalue |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
=item $value = $f-E[$n]-Evalue |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=item $entry-Evalue( $new_value ) |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=item $array[$n]-Evalue( $new_value ) |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
=item $f-E[$n]-Evalue( $new_value ) |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
The C method on a entry object returns the field value for this |
|
687
|
|
|
|
|
|
|
particular entry. Only a single scalar is returned, even if multiple |
|
688
|
|
|
|
|
|
|
entries share the same name. |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
If given an argument, the C method replaces the value for this |
|
691
|
|
|
|
|
|
|
particular entry. The argument will be coerced to a string. |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
=cut |
|
694
|
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
sub value { |
|
696
|
154
|
|
|
154
|
|
220
|
my $self = shift; |
|
697
|
|
|
|
|
|
|
|
|
698
|
154
|
100
|
|
|
|
272
|
if (scalar @_ == 0) { # get this value |
|
699
|
34
|
|
|
|
|
160
|
return $self->[VALUE]; |
|
700
|
|
|
|
|
|
|
} else { # update this value |
|
701
|
120
|
100
|
|
|
|
144
|
croak "attempt to modify read-only object" if ${$self->[TABLE]}->[IS_RO]; |
|
|
120
|
|
|
|
|
382
|
|
|
702
|
119
|
|
|
|
|
174
|
my $newval = shift; |
|
703
|
119
|
|
|
|
|
191
|
${$self->[TABLE]}->[VALUES]->[$self->[ROW]] = $self->[VALUE] = "$newval"; |
|
|
119
|
|
|
|
|
218
|
|
|
704
|
119
|
|
|
|
|
290
|
return (); # and return nothing |
|
705
|
|
|
|
|
|
|
} |
|
706
|
|
|
|
|
|
|
} |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=item $offset = $entry-Eoffset |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=item $offset = $array[$n]-Eoffset |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
=item $offset = $f-E[$n]-Eoffset |
|
713
|
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
The C method on a entry object returns the position of this entry |
|
715
|
|
|
|
|
|
|
amongst multiple entries with the same field name. These positions are |
|
716
|
|
|
|
|
|
|
numbered from zero and are identical to the positions in the array |
|
717
|
|
|
|
|
|
|
reference returned for this entry's field name from the C method or |
|
718
|
|
|
|
|
|
|
the tied hash interface. |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=cut |
|
721
|
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
sub offset { |
|
723
|
21
|
|
|
21
|
|
36
|
my $self = shift; |
|
724
|
21
|
100
|
|
|
|
27
|
$self->[TABLE]->_rebuild_MVOFF unless defined ${$self->[TABLE]}->[MVOFF]; |
|
|
21
|
|
|
|
|
76
|
|
|
725
|
21
|
|
|
|
|
39
|
return ${$self->[TABLE]}->[MVOFF]->[$self->[ROW]]; |
|
|
21
|
|
|
|
|
115
|
|
|
726
|
|
|
|
|
|
|
} |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=back |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=cut |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
} |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
{ |
|
735
|
|
|
|
|
|
|
package WARC::Fields::TiedArray::LooseEntry; |
|
736
|
|
|
|
|
|
|
|
|
737
|
26
|
|
|
26
|
|
8967
|
use Carp; |
|
|
26
|
|
|
|
|
56
|
|
|
|
26
|
|
|
|
|
2164
|
|
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
BEGIN { $WARC::Fields::TiedArray::LooseEntry::{$_} = $WARC::Fields::{$_} |
|
740
|
26
|
|
|
26
|
|
927
|
for WARC::Fields::OBJECT_INDEX; } |
|
741
|
|
|
|
|
|
|
|
|
742
|
26
|
|
|
26
|
|
179
|
use constant { NAME => 0, VALUE => 1 }; |
|
|
26
|
|
|
|
|
58
|
|
|
|
26
|
|
|
|
|
1985
|
|
|
743
|
|
|
|
|
|
|
|
|
744
|
26
|
|
|
26
|
|
4615
|
BEGIN { our @ISA = qw(WARC::Fields::TiedArray::Entry) } |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
# This is a special type of "entry object" that is not associated with a |
|
747
|
|
|
|
|
|
|
# table row, returned from POP, SHIFT, and SPLICE when needed. |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
sub _new { |
|
750
|
24
|
|
|
24
|
|
397
|
my $class = shift; |
|
751
|
24
|
|
|
|
|
34
|
my $name = shift; |
|
752
|
24
|
|
|
|
|
29
|
my $value = shift; |
|
753
|
|
|
|
|
|
|
|
|
754
|
24
|
|
|
|
|
74
|
bless [$name, $value], $class; |
|
755
|
|
|
|
|
|
|
} |
|
756
|
|
|
|
|
|
|
|
|
757
|
18
|
|
|
18
|
|
892
|
sub name { return (shift)->[NAME] } |
|
758
|
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
sub value { |
|
760
|
17
|
|
|
17
|
|
25
|
my $self = shift; |
|
761
|
|
|
|
|
|
|
|
|
762
|
17
|
100
|
|
|
|
36
|
if (scalar @_ == 0) # get |
|
763
|
16
|
|
|
|
|
39
|
{ return $self->[VALUE] } |
|
764
|
|
|
|
|
|
|
else # set |
|
765
|
1
|
|
|
|
|
157
|
{ croak "Loose array entries are read-only." } |
|
766
|
|
|
|
|
|
|
} |
|
767
|
|
|
|
|
|
|
|
|
768
|
1
|
|
|
1
|
|
7
|
sub offset { return undef } |
|
769
|
|
|
|
|
|
|
} |
|
770
|
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
{ |
|
772
|
|
|
|
|
|
|
package WARC::Fields::TiedArray; |
|
773
|
|
|
|
|
|
|
|
|
774
|
26
|
|
|
26
|
|
196
|
use Carp; |
|
|
26
|
|
|
|
|
66
|
|
|
|
26
|
|
|
|
|
1975
|
|
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
BEGIN { $WARC::Fields::TiedArray::{$_} = $WARC::Fields::{$_} |
|
777
|
26
|
|
|
26
|
|
40949
|
for WARC::Fields::OBJECT_INDEX; } |
|
778
|
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
# The underlying object is a reference to a WARC::Fields object. |
|
780
|
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
sub FETCH { |
|
782
|
223
|
|
|
223
|
|
8869
|
my $self = shift; |
|
783
|
223
|
|
|
|
|
289
|
my $row = shift; |
|
784
|
223
|
|
|
|
|
642
|
return (ref($self).'::Entry')->_new($$self, $row); |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
sub STORE { |
|
788
|
13
|
|
|
13
|
|
464
|
my $self = shift; |
|
789
|
13
|
|
|
|
|
17
|
my $row = shift; |
|
790
|
13
|
|
|
|
|
18
|
my $name = shift; |
|
791
|
|
|
|
|
|
|
|
|
792
|
13
|
100
|
|
|
|
156
|
croak "attempt to modify read-only object" if $$$self->[IS_RO]; |
|
793
|
|
|
|
|
|
|
|
|
794
|
12
|
100
|
|
|
|
19
|
$self->STORESIZE($row + 1) if $#{$$$self->[NAMES]} < $row; |
|
|
12
|
|
|
|
|
32
|
|
|
795
|
|
|
|
|
|
|
|
|
796
|
12
|
100
|
|
|
|
59
|
if (UNIVERSAL::isa($name, ref($self).'::Entry')) { |
|
797
|
|
|
|
|
|
|
# copy entry |
|
798
|
8
|
100
|
|
|
|
17
|
croak "attempt to set invalid name" |
|
799
|
|
|
|
|
|
|
if $name->name !~ m/^$PARSE_RE__token$/o; |
|
800
|
7
|
|
|
|
|
36
|
$$$self->[NAMES]->[$row] = $name->name; |
|
801
|
7
|
|
|
|
|
14
|
$$$self->[VALUES]->[$row] = $name->value; |
|
802
|
|
|
|
|
|
|
} else { |
|
803
|
|
|
|
|
|
|
# set name |
|
804
|
4
|
100
|
|
|
|
342
|
croak "attempt to set invalid name" |
|
805
|
|
|
|
|
|
|
if "$name" !~ m/^$PARSE_RE__token$/o; |
|
806
|
2
|
|
|
|
|
8
|
$$$self->[NAMES]->[$row] = "$name"; |
|
807
|
|
|
|
|
|
|
} |
|
808
|
9
|
|
|
|
|
48
|
$$$self->[MVOFF] = undef; |
|
809
|
9
|
|
|
|
|
35
|
$$$self->[INDEX] = undef; |
|
810
|
|
|
|
|
|
|
} |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
sub FETCHSIZE { |
|
813
|
38
|
|
|
38
|
|
5182
|
my $self = shift; |
|
814
|
38
|
|
|
|
|
51
|
return scalar @{$$$self->[NAMES]}; |
|
|
38
|
|
|
|
|
295
|
|
|
815
|
|
|
|
|
|
|
} |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
sub STORESIZE { |
|
818
|
5
|
|
|
5
|
|
12
|
my $self = shift; |
|
819
|
5
|
|
|
|
|
7
|
my $count = shift; |
|
820
|
|
|
|
|
|
|
|
|
821
|
5
|
100
|
100
|
|
|
25
|
croak "attempt to modify read-only object" |
|
822
|
|
|
|
|
|
|
if $$$self->[IS_RO] && $count != $self->FETCHSIZE(); |
|
823
|
|
|
|
|
|
|
|
|
824
|
4
|
100
|
|
|
|
12
|
if ($count > $self->FETCHSIZE()) { |
|
|
|
100
|
|
|
|
|
|
|
825
|
2
|
|
|
|
|
5
|
my $needed = $count - $self->FETCHSIZE(); |
|
826
|
2
|
|
|
|
|
4
|
push @{$$$self->[NAMES]}, ('X-Undefined-Field-Name') x $needed; |
|
|
2
|
|
|
|
|
7
|
|
|
827
|
2
|
|
|
|
|
3
|
push @{$$$self->[VALUES]}, (undef) x $needed; |
|
|
2
|
|
|
|
|
9
|
|
|
828
|
|
|
|
|
|
|
} elsif ($count < $self->FETCHSIZE()) { |
|
829
|
1
|
|
|
|
|
2
|
splice @{$$$self->[NAMES]}, $count; |
|
|
1
|
|
|
|
|
4
|
|
|
830
|
1
|
|
|
|
|
2
|
splice @{$$$self->[VALUES]}, $count; |
|
|
1
|
|
|
|
|
4
|
|
|
831
|
1
|
|
|
|
|
5
|
$$$self->[INDEX] = undef; |
|
832
|
1
|
|
|
|
|
5
|
} else { return } # no actual change |
|
833
|
|
|
|
|
|
|
} |
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
0
|
|
|
sub EXTEND { |
|
836
|
|
|
|
|
|
|
# do nothing |
|
837
|
|
|
|
|
|
|
} |
|
838
|
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
sub EXISTS { |
|
840
|
14
|
|
|
14
|
|
2053
|
my $self = shift; |
|
841
|
14
|
|
|
|
|
22
|
my $row = shift; |
|
842
|
14
|
|
|
|
|
82
|
return defined $$$self->[VALUES]->[$row]; |
|
843
|
|
|
|
|
|
|
} |
|
844
|
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
sub DELETE { |
|
846
|
2
|
|
|
2
|
|
5
|
my $self = shift; |
|
847
|
2
|
|
|
|
|
4
|
my $row = shift; |
|
848
|
|
|
|
|
|
|
|
|
849
|
2
|
100
|
|
|
|
115
|
croak "attempt to modify read-only object" if $$$self->[IS_RO]; |
|
850
|
|
|
|
|
|
|
|
|
851
|
1
|
|
|
|
|
4
|
my $old_value = $$$self->[VALUES]->[$row]; |
|
852
|
1
|
|
|
|
|
3
|
$$$self->[VALUES]->[$row] = undef; |
|
853
|
1
|
|
|
|
|
2
|
$$$self->[MVOFF] = undef; |
|
854
|
1
|
|
|
|
|
5
|
$$$self->[INDEX] = undef; |
|
855
|
1
|
|
|
|
|
4
|
return $old_value; |
|
856
|
|
|
|
|
|
|
} |
|
857
|
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
sub CLEAR { |
|
859
|
3
|
|
|
3
|
|
6
|
my $self = shift; |
|
860
|
|
|
|
|
|
|
|
|
861
|
3
|
100
|
|
|
|
118
|
croak "attempt to modify read-only object" if $$$self->[IS_RO]; |
|
862
|
|
|
|
|
|
|
|
|
863
|
2
|
|
|
|
|
9
|
$$$self->[NAMES] = []; |
|
864
|
2
|
|
|
|
|
5
|
$$$self->[VALUES] = []; |
|
865
|
2
|
|
|
|
|
6
|
$$$self->[MVOFF] = undef; |
|
866
|
2
|
|
|
|
|
6
|
$$$self->[INDEX] = undef; |
|
867
|
2
|
|
|
|
|
6
|
return undef; |
|
868
|
|
|
|
|
|
|
} |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
sub PUSH { |
|
871
|
7
|
|
|
7
|
|
769
|
my $self = shift; |
|
872
|
|
|
|
|
|
|
|
|
873
|
7
|
100
|
100
|
|
|
137
|
croak "attempt to modify read-only object" |
|
874
|
|
|
|
|
|
|
if $$$self->[IS_RO] && scalar @_; |
|
875
|
|
|
|
|
|
|
|
|
876
|
6
|
|
|
|
|
16
|
foreach my $item (@_) { |
|
877
|
103
|
|
|
|
|
141
|
my $name; my $value; |
|
878
|
103
|
100
|
|
|
|
332
|
if (UNIVERSAL::isa($item, ref($self).'::Entry')) |
|
879
|
2
|
|
|
|
|
6
|
{ $name = $item->name; $value = $item->value } |
|
|
2
|
|
|
|
|
7
|
|
|
880
|
|
|
|
|
|
|
else |
|
881
|
101
|
|
|
|
|
149
|
{ $name = "$item"; $value = undef } |
|
|
101
|
|
|
|
|
125
|
|
|
882
|
103
|
100
|
|
|
|
553
|
croak "attempt to set invalid name" if $name !~ m/^$PARSE_RE__token$/o; |
|
883
|
101
|
|
|
|
|
143
|
push @{$$$self->[NAMES]}, $name; |
|
|
101
|
|
|
|
|
550
|
|
|
884
|
101
|
|
|
|
|
150
|
push @{$$$self->[VALUES]}, $value; |
|
|
101
|
|
|
|
|
332
|
|
|
885
|
|
|
|
|
|
|
} |
|
886
|
4
|
|
|
|
|
10
|
$$$self->[MVOFF] = undef; |
|
887
|
4
|
|
|
|
|
119
|
$$$self->[INDEX] = undef; |
|
888
|
4
|
|
|
|
|
8
|
return scalar @{$$$self->[NAMES]}; |
|
|
4
|
|
|
|
|
16
|
|
|
889
|
|
|
|
|
|
|
} |
|
890
|
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
sub POP { |
|
892
|
5
|
|
|
5
|
|
10
|
my $self = shift; |
|
893
|
|
|
|
|
|
|
|
|
894
|
5
|
100
|
|
|
|
128
|
croak "attempt to modify read-only object" if $$$self->[IS_RO]; |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
my $ret = WARC::Fields::TiedArray::LooseEntry->_new |
|
897
|
4
|
|
|
|
|
8
|
(pop @{$$$self->[NAMES]}, pop @{$$$self->[VALUES]}); |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
15
|
|
|
898
|
4
|
100
|
|
|
|
14
|
pop @{$$$self->[MVOFF]} if defined $$$self->[MVOFF]; |
|
|
2
|
|
|
|
|
18
|
|
|
899
|
4
|
|
|
|
|
10
|
$$$self->[INDEX] = undef; |
|
900
|
|
|
|
|
|
|
|
|
901
|
4
|
|
|
|
|
14
|
return $ret; |
|
902
|
|
|
|
|
|
|
} |
|
903
|
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
sub SHIFT { |
|
905
|
2
|
|
|
2
|
|
5
|
my $self = shift; |
|
906
|
|
|
|
|
|
|
|
|
907
|
2
|
100
|
|
|
|
118
|
croak "attempt to modify read-only object" if $$$self->[IS_RO]; |
|
908
|
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
my $ret = WARC::Fields::TiedArray::LooseEntry->_new |
|
910
|
1
|
|
|
|
|
2
|
(shift @{$$$self->[NAMES]}, shift @{$$$self->[VALUES]}); |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
5
|
|
|
911
|
1
|
|
|
|
|
4
|
$$$self->[MVOFF] = undef; |
|
912
|
1
|
|
|
|
|
4
|
$$$self->[INDEX] = undef; |
|
913
|
|
|
|
|
|
|
|
|
914
|
1
|
|
|
|
|
4
|
return $ret; |
|
915
|
|
|
|
|
|
|
} |
|
916
|
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
sub UNSHIFT { |
|
918
|
6
|
|
|
6
|
|
730
|
my $self = shift; |
|
919
|
|
|
|
|
|
|
|
|
920
|
6
|
100
|
100
|
|
|
170
|
croak "attempt to modify read-only object" |
|
921
|
|
|
|
|
|
|
if $$$self->[IS_RO] && scalar @_; |
|
922
|
|
|
|
|
|
|
|
|
923
|
5
|
|
|
|
|
12
|
foreach my $item (@_) { |
|
924
|
4
|
|
|
|
|
7
|
my $name; my $value; |
|
925
|
4
|
100
|
|
|
|
23
|
if (UNIVERSAL::isa($item, ref($self).'::Entry')) |
|
926
|
2
|
|
|
|
|
7
|
{ $name = $item->name; $value = $item->value } |
|
|
2
|
|
|
|
|
5
|
|
|
927
|
|
|
|
|
|
|
else |
|
928
|
2
|
|
|
|
|
4
|
{ $name = "$item"; $value = undef } |
|
|
2
|
|
|
|
|
4
|
|
|
929
|
4
|
100
|
|
|
|
286
|
croak "attempt to set invalid name" if $name !~ m/^$PARSE_RE__token$/o; |
|
930
|
2
|
|
|
|
|
3
|
unshift @{$$$self->[NAMES]}, $name; |
|
|
2
|
|
|
|
|
8
|
|
|
931
|
2
|
|
|
|
|
5
|
unshift @{$$$self->[VALUES]}, $value; |
|
|
2
|
|
|
|
|
7
|
|
|
932
|
|
|
|
|
|
|
} |
|
933
|
3
|
|
|
|
|
7
|
$$$self->[MVOFF] = undef; |
|
934
|
3
|
|
|
|
|
6
|
$$$self->[INDEX] = undef; |
|
935
|
3
|
|
|
|
|
8
|
return scalar @{$$$self->[NAMES]}; |
|
|
3
|
|
|
|
|
12
|
|
|
936
|
|
|
|
|
|
|
} |
|
937
|
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
sub SPLICE { |
|
939
|
10
|
|
|
10
|
|
777
|
my $self = shift; |
|
940
|
10
|
|
|
|
|
15
|
my $offset = shift; |
|
941
|
10
|
|
|
|
|
17
|
my $length = shift; |
|
942
|
|
|
|
|
|
|
|
|
943
|
10
|
100
|
|
|
|
29
|
$offset = 0 unless defined $offset; |
|
944
|
10
|
100
|
|
|
|
24
|
$length = $self->FETCHSIZE() - $offset unless defined $length; |
|
945
|
|
|
|
|
|
|
|
|
946
|
10
|
100
|
100
|
|
|
42
|
return () unless ($length != 0 || scalar @_); |
|
947
|
|
|
|
|
|
|
|
|
948
|
9
|
100
|
|
|
|
234
|
croak "attempt to modify read-only object" if $$$self->[IS_RO]; |
|
949
|
|
|
|
|
|
|
|
|
950
|
7
|
|
|
|
|
13
|
my @new_names = (); my @new_values = (); my @old_names; my @old_values; |
|
|
7
|
|
|
|
|
12
|
|
|
|
7
|
|
|
|
|
11
|
|
|
951
|
|
|
|
|
|
|
|
|
952
|
7
|
|
|
|
|
17
|
foreach my $item (@_) { |
|
953
|
17
|
100
|
|
|
|
62
|
if (UNIVERSAL::isa($item, ref($self).'::Entry')) { |
|
954
|
11
|
|
|
|
|
22
|
push @new_names, $item->name; |
|
955
|
11
|
|
|
|
|
19
|
push @new_values, $item->value; |
|
956
|
|
|
|
|
|
|
} else { |
|
957
|
6
|
|
|
|
|
15
|
push @new_names, "$item"; |
|
958
|
6
|
|
|
|
|
11
|
push @new_values, undef; |
|
959
|
|
|
|
|
|
|
} |
|
960
|
|
|
|
|
|
|
} |
|
961
|
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
croak "attempt to set invalid name" |
|
963
|
7
|
100
|
|
|
|
14
|
if grep { $_ !~ m/^$PARSE_RE__token$/o } @new_names; |
|
|
17
|
|
|
|
|
322
|
|
|
964
|
|
|
|
|
|
|
|
|
965
|
5
|
|
|
|
|
10
|
@old_names = splice @{$$$self->[NAMES]}, $offset, $length, @new_names; |
|
|
5
|
|
|
|
|
20
|
|
|
966
|
5
|
|
|
|
|
12
|
@old_values = splice @{$$$self->[VALUES]}, $offset, $length, @new_values; |
|
|
5
|
|
|
|
|
15
|
|
|
967
|
|
|
|
|
|
|
|
|
968
|
5
|
|
|
|
|
9
|
my @ret = (); |
|
969
|
|
|
|
|
|
|
|
|
970
|
5
|
|
|
|
|
15
|
for (my $i = 0; $i < scalar @old_names; $i++) |
|
971
|
18
|
|
|
|
|
40
|
{ push @ret, WARC::Fields::TiedArray::LooseEntry->_new |
|
972
|
|
|
|
|
|
|
($old_names[$i], $old_values[$i]) } |
|
973
|
|
|
|
|
|
|
|
|
974
|
5
|
|
|
|
|
12
|
$$$self->[MVOFF] = undef; |
|
975
|
5
|
|
|
|
|
10
|
$$$self->[INDEX] = undef; |
|
976
|
5
|
|
|
|
|
28
|
return @ret; |
|
977
|
|
|
|
|
|
|
} |
|
978
|
|
|
|
|
|
|
|
|
979
|
4
|
|
|
4
|
|
9
|
sub UNTIE { our $_total_untied; $_total_untied++ } |
|
|
4
|
|
|
|
|
17
|
|
|
980
|
|
|
|
|
|
|
|
|
981
|
4
|
|
|
4
|
|
10
|
sub DESTROY { our $_total_destroyed; $_total_destroyed++ } |
|
|
4
|
|
|
|
|
14
|
|
|
982
|
|
|
|
|
|
|
} |
|
983
|
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
=head2 Tied Hash Access |
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
The contents of a C object can be easily examined by tying a |
|
987
|
|
|
|
|
|
|
hash to the object. Reading or setting a hash key is equivalent to the |
|
988
|
|
|
|
|
|
|
C method, but the tied hash will iterate keys and values in the |
|
989
|
|
|
|
|
|
|
order in which each key B appears in the internal table. |
|
990
|
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
Like the tied array interface, the tied hash interface returns magical |
|
992
|
|
|
|
|
|
|
objects that internally refer back to the parent C object. |
|
993
|
|
|
|
|
|
|
These objects remain valid if the underlying C object is |
|
994
|
|
|
|
|
|
|
changed, but further use may produce surprising and unspecified results. |
|
995
|
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
The use of magical objects enables the values in a tied hash to B |
|
997
|
|
|
|
|
|
|
be arrays, even for keys that do not exist (the array will have zero |
|
998
|
|
|
|
|
|
|
elements) or that have only one value (the array will have a string |
|
999
|
|
|
|
|
|
|
conversion that produces that one value). This allows a tied hash to |
|
1000
|
|
|
|
|
|
|
support autovivification of an array value just as Perl's own hashes do. |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
=cut |
|
1003
|
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
sub TIEHASH { |
|
1005
|
68
|
|
|
68
|
|
117
|
my $class = shift; |
|
1006
|
68
|
|
|
|
|
98
|
my $ob = shift; |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
# This method must ignore the given class to allow the "empty subclass" |
|
1009
|
|
|
|
|
|
|
# test to pass. If a subclass really wants, an override for TIEHASH |
|
1010
|
|
|
|
|
|
|
# itself can call SUPER::TIEHASH and re-bless the returned reference |
|
1011
|
|
|
|
|
|
|
# into the desired class. |
|
1012
|
68
|
|
|
|
|
87
|
$WARC::Fields::TiedHash::_total_tied++; |
|
1013
|
68
|
|
|
|
|
401
|
bless \ $ob, 'WARC::Fields::TiedHash'; |
|
1014
|
|
|
|
|
|
|
} |
|
1015
|
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
{ |
|
1017
|
|
|
|
|
|
|
package WARC::Fields::TiedHash::ValueArray; |
|
1018
|
|
|
|
|
|
|
|
|
1019
|
26
|
|
|
26
|
|
216
|
use Carp; |
|
|
26
|
|
|
|
|
58
|
|
|
|
26
|
|
|
|
|
2146
|
|
|
1020
|
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
BEGIN { $WARC::Fields::TiedHash::ValueArray::{$_} = $WARC::Fields::{$_} |
|
1022
|
26
|
|
|
26
|
|
973
|
for WARC::Fields::OBJECT_INDEX; } |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
26
|
|
|
26
|
|
186
|
use constant { TABLE => 0, KEY => 1, KEYc => 2 }; # KEYc -- canonical KEY |
|
|
26
|
|
|
|
|
114
|
|
|
|
26
|
|
|
|
|
51820
|
|
|
1025
|
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
sub TIEARRAY { |
|
1027
|
458
|
|
|
458
|
|
669
|
my $class = shift; |
|
1028
|
458
|
|
|
|
|
672
|
my $table = shift; |
|
1029
|
458
|
100
|
|
|
|
1021
|
$table->_rebuild_INDEX unless defined $$table->[INDEX]; |
|
1030
|
458
|
|
|
|
|
855
|
my $key = $table->_find_key(shift); # needs INDEX |
|
1031
|
|
|
|
|
|
|
|
|
1032
|
458
|
|
|
|
|
737
|
{ our $_total_tied; $_total_tied++ } |
|
|
458
|
|
|
|
|
556
|
|
|
|
458
|
|
|
|
|
588
|
|
|
1033
|
458
|
|
|
|
|
1571
|
bless [$table, $key, lc $key], $class; |
|
1034
|
|
|
|
|
|
|
} |
|
1035
|
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
sub FETCH { |
|
1037
|
764
|
|
|
764
|
|
1630
|
my $self = shift; |
|
1038
|
764
|
|
|
|
|
959
|
my $offset = shift; |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
764
|
100
|
|
|
|
907
|
$self->[TABLE]->_rebuild_INDEX unless defined ${$self->[TABLE]}->[INDEX]; |
|
|
764
|
|
|
|
|
1613
|
|
|
1041
|
|
|
|
|
|
|
|
|
1042
|
764
|
|
|
|
|
974
|
my $row = ${$self->[TABLE]}->[INDEX]{$self->[KEYc]}[$offset]; |
|
|
764
|
|
|
|
|
1387
|
|
|
1043
|
764
|
100
|
|
|
|
1243
|
return defined $row ? ${$self->[TABLE]}->[VALUES][$row] : undef; |
|
|
748
|
|
|
|
|
3277
|
|
|
1044
|
|
|
|
|
|
|
} |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
sub STORE { |
|
1047
|
10
|
|
|
10
|
|
544
|
my $self = shift; |
|
1048
|
10
|
|
|
|
|
13
|
my $offset = shift; |
|
1049
|
10
|
|
|
|
|
16
|
my $value = shift; |
|
1050
|
|
|
|
|
|
|
|
|
1051
|
10
|
|
|
|
|
14
|
my $T = $self->[TABLE]; |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
10
|
100
|
|
|
|
142
|
croak "attempt to modify read-only object" if $$T->[IS_RO]; |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
9
|
100
|
|
|
|
21
|
$T->_rebuild_INDEX unless defined $$T->[INDEX]; |
|
1056
|
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
$self->STORESIZE($offset + 1) |
|
1058
|
|
|
|
|
|
|
if not defined $$T->[INDEX]{$self->[KEYc]} |
|
1059
|
9
|
100
|
100
|
|
|
31
|
or $#{$$T->[INDEX]{$self->[KEYc]}} < $offset; |
|
|
8
|
|
|
|
|
31
|
|
|
1060
|
|
|
|
|
|
|
|
|
1061
|
9
|
|
|
|
|
50
|
$$T->[VALUES][$$T->[INDEX]{$self->[KEYc]}[$offset]] = "$value"; |
|
1062
|
|
|
|
|
|
|
} |
|
1063
|
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
sub FETCHSIZE { |
|
1065
|
788
|
|
|
788
|
|
1659
|
my $self = shift; |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
788
|
100
|
|
|
|
1004
|
$self->[TABLE]->_rebuild_INDEX unless defined ${$self->[TABLE]}->[INDEX]; |
|
|
788
|
|
|
|
|
1801
|
|
|
1068
|
|
|
|
|
|
|
|
|
1069
|
778
|
|
|
|
|
924
|
return scalar @{${$self->[TABLE]}->[INDEX]{$self->[KEYc]}} |
|
|
778
|
|
|
|
|
2844
|
|
|
1070
|
788
|
100
|
|
|
|
1015
|
if defined ${$self->[TABLE]}->[INDEX]{$self->[KEYc]}; |
|
|
788
|
|
|
|
|
1826
|
|
|
1071
|
10
|
|
|
|
|
82
|
return 0; # otherwise: key does not exist |
|
1072
|
|
|
|
|
|
|
} |
|
1073
|
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
sub STORESIZE { |
|
1075
|
8
|
|
|
8
|
|
16
|
my $self = shift; |
|
1076
|
8
|
|
|
|
|
10
|
my $count = shift; |
|
1077
|
|
|
|
|
|
|
|
|
1078
|
8
|
|
|
|
|
13
|
my $T = $self->[TABLE]; |
|
1079
|
|
|
|
|
|
|
|
|
1080
|
8
|
100
|
100
|
|
|
28
|
croak "attempt to modify read-only object" |
|
1081
|
|
|
|
|
|
|
if $$T->[IS_RO] && $count != $self->FETCHSIZE(); |
|
1082
|
|
|
|
|
|
|
|
|
1083
|
7
|
100
|
|
|
|
21
|
$T->_rebuild_INDEX unless defined $$T->[INDEX]; |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
7
|
|
|
|
|
14
|
my @new = (); |
|
1086
|
6
|
|
|
|
|
19
|
@new = @{$$T->[VALUES]}[@{$$T->[INDEX]{$self->[KEYc]}}] |
|
|
6
|
|
|
|
|
14
|
|
|
1087
|
7
|
100
|
|
|
|
25
|
if defined $$T->[INDEX]{$self->[KEYc]}; |
|
1088
|
7
|
100
|
|
|
|
16
|
if ($count > $self->FETCHSIZE()) |
|
|
|
100
|
|
|
|
|
|
|
1089
|
4
|
|
|
|
|
10
|
{ push @new, (undef) x ($count - $self->FETCHSIZE()) } |
|
1090
|
|
|
|
|
|
|
elsif ($count < $self->FETCHSIZE()) |
|
1091
|
2
|
|
|
|
|
9
|
{ @new = @new[0..($count-1)] } |
|
1092
|
1
|
|
|
|
|
4
|
else { return } # no actual change |
|
1093
|
6
|
|
|
|
|
16
|
$T->field($self->[KEY] => \@new); |
|
1094
|
|
|
|
|
|
|
} |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
0
|
|
|
sub EXTEND { |
|
1097
|
|
|
|
|
|
|
# do nothing |
|
1098
|
|
|
|
|
|
|
} |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
sub EXISTS { |
|
1101
|
5
|
|
|
5
|
|
553
|
my $self = shift; |
|
1102
|
5
|
|
|
|
|
8
|
my $offset = shift; |
|
1103
|
|
|
|
|
|
|
|
|
1104
|
5
|
100
|
|
|
|
8
|
$self->[TABLE]->_rebuild_INDEX unless defined ${$self->[TABLE]}->[INDEX]; |
|
|
5
|
|
|
|
|
21
|
|
|
1105
|
|
|
|
|
|
|
|
|
1106
|
5
|
|
|
|
|
11
|
return exists ${$self->[TABLE]}->[INDEX]{$self->[KEYc]}[$offset]; |
|
|
5
|
|
|
|
|
34
|
|
|
1107
|
|
|
|
|
|
|
} |
|
1108
|
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
sub DELETE { |
|
1110
|
4
|
|
|
4
|
|
539
|
my $self = shift; |
|
1111
|
4
|
|
|
|
|
8
|
my $offset = shift; |
|
1112
|
|
|
|
|
|
|
|
|
1113
|
4
|
100
|
|
|
|
5
|
croak "attempt to modify read-only object" if ${$self->[TABLE]}->[IS_RO]; |
|
|
4
|
|
|
|
|
124
|
|
|
1114
|
|
|
|
|
|
|
|
|
1115
|
3
|
100
|
|
|
|
7
|
$self->[TABLE]->_rebuild_INDEX unless defined ${$self->[TABLE]}->[INDEX]; |
|
|
3
|
|
|
|
|
14
|
|
|
1116
|
|
|
|
|
|
|
|
|
1117
|
3
|
|
|
|
|
6
|
my $row = ${$self->[TABLE]}->[INDEX]{$self->[KEYc]}[$offset]; |
|
|
3
|
|
|
|
|
7
|
|
|
1118
|
3
|
|
|
|
|
6
|
my $old_value = ${$self->[TABLE]}->[VALUES][$row]; |
|
|
3
|
|
|
|
|
7
|
|
|
1119
|
3
|
|
|
|
|
5
|
${$self->[TABLE]}->[VALUES][$row] = undef; |
|
|
3
|
|
|
|
|
6
|
|
|
1120
|
3
|
|
|
|
|
12
|
return $old_value; |
|
1121
|
|
|
|
|
|
|
} |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
sub CLEAR { |
|
1124
|
2
|
|
|
2
|
|
7
|
my $self = shift; |
|
1125
|
|
|
|
|
|
|
|
|
1126
|
2
|
100
|
|
|
|
3
|
croak "attempt to modify read-only object" if ${$self->[TABLE]}->[IS_RO]; |
|
|
2
|
|
|
|
|
115
|
|
|
1127
|
|
|
|
|
|
|
|
|
1128
|
1
|
|
|
|
|
5
|
$self->[TABLE]->field($self->[KEY] => []); |
|
1129
|
|
|
|
|
|
|
} |
|
1130
|
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
sub PUSH { |
|
1132
|
11
|
|
|
11
|
|
21
|
my $self = shift; |
|
1133
|
|
|
|
|
|
|
|
|
1134
|
11
|
|
|
|
|
22
|
my $T = $self->[TABLE]; |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
11
|
100
|
100
|
|
|
146
|
croak "attempt to modify read-only object" |
|
1137
|
|
|
|
|
|
|
if $$T->[IS_RO] && scalar @_; |
|
1138
|
|
|
|
|
|
|
|
|
1139
|
10
|
100
|
|
|
|
25
|
$T->_rebuild_INDEX unless defined $$T->[INDEX]; |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
10
|
100
|
|
|
|
39
|
if (defined $$T->[INDEX]{$self->[KEYc]}) { |
|
1142
|
|
|
|
|
|
|
# key exists ==> extend table efficiently |
|
1143
|
5
|
|
|
|
|
13
|
my $last_row = $$T->[INDEX]{$self->[KEYc]}[-1]; |
|
1144
|
5
|
|
|
|
|
10
|
my $new_rows = scalar @_; |
|
1145
|
5
|
|
|
|
|
8
|
splice @{$$T->[NAMES]}, 1+$last_row, 0, |
|
|
5
|
|
|
|
|
20
|
|
|
1146
|
|
|
|
|
|
|
(($$T->[NAMES][$last_row]) x $new_rows); |
|
1147
|
5
|
|
|
|
|
9
|
splice @{$$T->[VALUES]}, 1+$last_row, 0, map {"$_"} @_; |
|
|
5
|
|
|
|
|
15
|
|
|
|
4
|
|
|
|
|
15
|
|
|
1148
|
5
|
|
|
|
|
17
|
$T->_update_INDEX($last_row, $new_rows); |
|
1149
|
5
|
|
|
|
|
9
|
push @{$$T->[INDEX]{$self->[KEYc]}}, 1+$last_row .. $last_row+$new_rows; |
|
|
5
|
|
|
|
|
15
|
|
|
1150
|
|
|
|
|
|
|
} else { |
|
1151
|
|
|
|
|
|
|
# make key ==> use existing setter |
|
1152
|
5
|
|
|
|
|
33
|
$T->_set_multiple_value($self->[KEY], [map {"$_"} @_]); |
|
|
9909
|
|
|
|
|
19171
|
|
|
1153
|
|
|
|
|
|
|
} |
|
1154
|
10
|
|
|
|
|
1349
|
$$T->[MVOFF] = undef; |
|
1155
|
|
|
|
|
|
|
} |
|
1156
|
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
sub POP { |
|
1158
|
7
|
|
|
7
|
|
547
|
my $self = shift; |
|
1159
|
|
|
|
|
|
|
|
|
1160
|
7
|
|
|
|
|
12
|
my $T = $self->[TABLE]; |
|
1161
|
|
|
|
|
|
|
|
|
1162
|
7
|
100
|
|
|
|
128
|
croak "attempt to modify read-only object" if $$T->[IS_RO]; |
|
1163
|
|
|
|
|
|
|
|
|
1164
|
6
|
100
|
|
|
|
18
|
$T->_rebuild_INDEX unless defined $$T->[INDEX]; |
|
1165
|
|
|
|
|
|
|
|
|
1166
|
6
|
100
|
|
|
|
19
|
return undef unless defined $$T->[INDEX]{$self->[KEYc]}; |
|
1167
|
|
|
|
|
|
|
|
|
1168
|
5
|
|
|
|
|
11
|
my $rem_row = $$T->[INDEX]{$self->[KEYc]}[-1]; |
|
1169
|
5
|
|
|
|
|
10
|
my $value = $$T->[VALUES][$rem_row]; |
|
1170
|
|
|
|
|
|
|
|
|
1171
|
5
|
|
|
|
|
6
|
splice @{$$T->[NAMES]}, $rem_row, 1; |
|
|
5
|
|
|
|
|
12
|
|
|
1172
|
5
|
|
|
|
|
9
|
splice @{$$T->[VALUES]}, $rem_row, 1; |
|
|
5
|
|
|
|
|
12
|
|
|
1173
|
5
|
100
|
|
|
|
15
|
splice @{$$T->[MVOFF]}, $rem_row, 1 if defined $$T->[MVOFF]; |
|
|
1
|
|
|
|
|
4
|
|
|
1174
|
5
|
|
|
|
|
14
|
$T->_update_INDEX($rem_row, -1); |
|
1175
|
5
|
|
|
|
|
10
|
pop @{$$T->[INDEX]{$self->[KEYc]}}; |
|
|
5
|
|
|
|
|
12
|
|
|
1176
|
|
|
|
|
|
|
# special case: popped last value |
|
1177
|
|
|
|
|
|
|
delete $$T->[INDEX]{$self->[KEYc]} |
|
1178
|
5
|
100
|
|
|
|
8
|
if scalar @{$$T->[INDEX]{$self->[KEYc]}} == 0; |
|
|
5
|
|
|
|
|
17
|
|
|
1179
|
|
|
|
|
|
|
|
|
1180
|
5
|
|
|
|
|
27
|
return $value; |
|
1181
|
|
|
|
|
|
|
} |
|
1182
|
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
sub SHIFT { |
|
1184
|
6
|
|
|
6
|
|
535
|
my $self = shift; |
|
1185
|
|
|
|
|
|
|
|
|
1186
|
6
|
|
|
|
|
12
|
my $T = $self->[TABLE]; |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
6
|
100
|
|
|
|
125
|
croak "attempt to modify read-only object" if $$T->[IS_RO]; |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
5
|
100
|
|
|
|
15
|
$T->_rebuild_INDEX unless defined $$T->[INDEX]; |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
5
|
100
|
|
|
|
21
|
return undef unless defined $$T->[INDEX]{$self->[KEYc]}; |
|
1193
|
|
|
|
|
|
|
|
|
1194
|
4
|
|
|
|
|
10
|
my $rem_row = $$T->[INDEX]{$self->[KEYc]}[0]; |
|
1195
|
4
|
|
|
|
|
9
|
my $value = $$T->[VALUES][$rem_row]; |
|
1196
|
|
|
|
|
|
|
|
|
1197
|
4
|
|
|
|
|
7
|
splice @{$$T->[NAMES]}, $rem_row, 1; |
|
|
4
|
|
|
|
|
10
|
|
|
1198
|
4
|
|
|
|
|
8
|
splice @{$$T->[VALUES]}, $rem_row, 1; |
|
|
4
|
|
|
|
|
7
|
|
|
1199
|
4
|
|
|
|
|
10
|
$$T->[MVOFF] = undef; |
|
1200
|
4
|
|
|
|
|
11
|
$T->_update_INDEX($rem_row, -1); |
|
1201
|
4
|
|
|
|
|
6
|
shift @{$$T->[INDEX]{$self->[KEYc]}}; |
|
|
4
|
|
|
|
|
10
|
|
|
1202
|
|
|
|
|
|
|
# special case: shifted last value |
|
1203
|
|
|
|
|
|
|
delete $$T->[INDEX]{$self->[KEYc]} |
|
1204
|
4
|
100
|
|
|
|
7
|
if scalar @{$$T->[INDEX]{$self->[KEYc]}} == 0; |
|
|
4
|
|
|
|
|
19
|
|
|
1205
|
|
|
|
|
|
|
|
|
1206
|
4
|
|
|
|
|
28
|
return $value; |
|
1207
|
|
|
|
|
|
|
} |
|
1208
|
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
sub UNSHIFT { |
|
1210
|
5
|
|
|
5
|
|
10
|
my $self = shift; |
|
1211
|
|
|
|
|
|
|
|
|
1212
|
5
|
|
|
|
|
11
|
my $T = $self->[TABLE]; |
|
1213
|
|
|
|
|
|
|
|
|
1214
|
5
|
100
|
100
|
|
|
127
|
croak "attempt to modify read-only object" |
|
1215
|
|
|
|
|
|
|
if $$T->[IS_RO] && scalar @_; |
|
1216
|
|
|
|
|
|
|
|
|
1217
|
4
|
100
|
|
|
|
15
|
$T->_rebuild_INDEX unless defined $$T->[INDEX]; |
|
1218
|
|
|
|
|
|
|
|
|
1219
|
4
|
100
|
|
|
|
18
|
if (defined $$T->[INDEX]{$self->[KEYc]}) { |
|
1220
|
|
|
|
|
|
|
# key exists ==> extend table efficiently |
|
1221
|
3
|
|
|
|
|
11
|
my $first_row = $$T->[INDEX]{$self->[KEYc]}[0]; |
|
1222
|
3
|
|
|
|
|
4
|
my $new_rows = scalar @_; |
|
1223
|
3
|
|
|
|
|
6
|
splice @{$$T->[NAMES]}, $first_row, 0, |
|
|
3
|
|
|
|
|
10
|
|
|
1224
|
|
|
|
|
|
|
(($$T->[NAMES][$first_row]) x $new_rows); |
|
1225
|
3
|
|
|
|
|
7
|
splice @{$$T->[VALUES]}, $first_row, 0, map {"$_"} @_; |
|
|
3
|
|
|
|
|
10
|
|
|
|
2
|
|
|
|
|
9
|
|
|
1226
|
3
|
|
|
|
|
11
|
$T->_update_INDEX($first_row - 1, $new_rows); |
|
1227
|
3
|
|
|
|
|
4
|
unshift @{$$T->[INDEX]{$self->[KEYc]}}, |
|
|
3
|
|
|
|
|
12
|
|
|
1228
|
|
|
|
|
|
|
$first_row .. $first_row-1+$new_rows; |
|
1229
|
|
|
|
|
|
|
} else { |
|
1230
|
|
|
|
|
|
|
# make key ==> use existing setter |
|
1231
|
1
|
|
|
|
|
4
|
$T->_set_multiple_value($self->[KEY], [map {"$_"} @_]); |
|
|
4
|
|
|
|
|
10
|
|
|
1232
|
|
|
|
|
|
|
} |
|
1233
|
4
|
|
|
|
|
17
|
$$T->[MVOFF] = undef; |
|
1234
|
|
|
|
|
|
|
} |
|
1235
|
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
sub SPLICE { |
|
1237
|
9
|
|
|
9
|
|
15
|
my $self = shift; |
|
1238
|
9
|
|
|
|
|
12
|
my $offset = shift; |
|
1239
|
9
|
|
|
|
|
15
|
my $length = shift; |
|
1240
|
|
|
|
|
|
|
|
|
1241
|
9
|
100
|
|
|
|
23
|
$offset = 0 unless defined $offset; |
|
1242
|
9
|
100
|
|
|
|
20
|
$length = $self->FETCHSIZE() - $offset unless defined $length; |
|
1243
|
|
|
|
|
|
|
|
|
1244
|
9
|
100
|
100
|
|
|
32
|
return () unless ($length != 0 || scalar @_); |
|
1245
|
|
|
|
|
|
|
|
|
1246
|
8
|
|
|
|
|
15
|
my $T = $self->[TABLE]; |
|
1247
|
|
|
|
|
|
|
|
|
1248
|
8
|
100
|
|
|
|
233
|
croak "attempt to modify read-only object" if $$T->[IS_RO]; |
|
1249
|
|
|
|
|
|
|
|
|
1250
|
6
|
100
|
|
|
|
17
|
$T->_rebuild_INDEX unless defined $$T->[INDEX]; |
|
1251
|
|
|
|
|
|
|
|
|
1252
|
6
|
|
|
|
|
11
|
my @new = (); |
|
1253
|
5
|
|
|
|
|
17
|
@new = @{$$T->[VALUES]}[@{$$T->[INDEX]{$self->[KEYc]}}] |
|
|
5
|
|
|
|
|
12
|
|
|
1254
|
6
|
100
|
|
|
|
16
|
if defined $$T->[INDEX]{$self->[KEYc]}; |
|
1255
|
6
|
|
|
|
|
17
|
my @old = splice @new, $offset, $length, map {"$_"} @_; |
|
|
3
|
|
|
|
|
11
|
|
|
1256
|
6
|
|
|
|
|
22
|
$self->[TABLE]->field($self->[KEY] => \@new); |
|
1257
|
|
|
|
|
|
|
|
|
1258
|
6
|
|
|
|
|
42
|
return @old; |
|
1259
|
|
|
|
|
|
|
} |
|
1260
|
|
|
|
|
|
|
|
|
1261
|
458
|
|
|
458
|
|
587
|
sub UNTIE { our $_total_untied; $_total_untied++ } |
|
|
458
|
|
|
|
|
811
|
|
|
1262
|
|
|
|
|
|
|
|
|
1263
|
458
|
|
|
458
|
|
533
|
sub DESTROY { our $_total_destroyed; $_total_destroyed++ } |
|
|
458
|
|
|
|
|
1491
|
|
|
1264
|
|
|
|
|
|
|
} |
|
1265
|
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
{ |
|
1267
|
|
|
|
|
|
|
package WARC::Fields::TiedHash::Value; |
|
1268
|
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
# This package is a magical array that appears to be a string if it has |
|
1270
|
|
|
|
|
|
|
# only one value, otherwise string conversion gives the usual |
|
1271
|
|
|
|
|
|
|
# nearly-useless debugging value. |
|
1272
|
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
# The actual underlying array is a tied array that forwards mutating |
|
1274
|
|
|
|
|
|
|
# operations to the original WARC::Fields object. |
|
1275
|
|
|
|
|
|
|
|
|
1276
|
26
|
|
|
26
|
|
209
|
use overload '""' => '_as_string', fallback => 1; |
|
|
26
|
|
|
|
|
51
|
|
|
|
26
|
|
|
|
|
125
|
|
|
1277
|
|
|
|
|
|
|
|
|
1278
|
26
|
|
|
26
|
|
2242
|
use Scalar::Util qw/refaddr reftype/; |
|
|
26
|
|
|
|
|
144
|
|
|
|
26
|
|
|
|
|
4988
|
|
|
1279
|
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
sub _new { |
|
1281
|
458
|
|
|
458
|
|
670
|
my $class = shift; |
|
1282
|
458
|
|
|
|
|
578
|
my $parent = shift; |
|
1283
|
458
|
|
|
|
|
540
|
my $key = shift; |
|
1284
|
|
|
|
|
|
|
|
|
1285
|
458
|
|
|
|
|
603
|
my @values; |
|
1286
|
458
|
|
|
|
|
1441
|
tie @values, (ref($parent).'::ValueArray'), $$parent, $key; |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
458
|
|
|
|
|
2497
|
bless \@values, $class; |
|
1289
|
|
|
|
|
|
|
} |
|
1290
|
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
sub _as_string { |
|
1292
|
676
|
|
|
676
|
|
11590
|
my $self = shift; |
|
1293
|
|
|
|
|
|
|
|
|
1294
|
676
|
100
|
|
|
|
1968
|
return scalar @$self == 1 |
|
1295
|
|
|
|
|
|
|
? $self->[0] : sprintf ('%s(0x%x)', reftype $self, refaddr $self); |
|
1296
|
|
|
|
|
|
|
} |
|
1297
|
|
|
|
|
|
|
|
|
1298
|
458
|
|
|
458
|
|
2921
|
sub DESTROY { untie @{(shift)} } |
|
|
458
|
|
|
|
|
973
|
|
|
1299
|
|
|
|
|
|
|
} |
|
1300
|
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
{ |
|
1302
|
|
|
|
|
|
|
package WARC::Fields::TiedHash; |
|
1303
|
|
|
|
|
|
|
|
|
1304
|
26
|
|
|
26
|
|
215
|
use Carp; |
|
|
26
|
|
|
|
|
160
|
|
|
|
26
|
|
|
|
|
2149
|
|
|
1305
|
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
BEGIN { $WARC::Fields::TiedHash::{$_} = $WARC::Fields::{$_} |
|
1307
|
26
|
|
|
26
|
|
20806
|
for WARC::Fields::OBJECT_INDEX; } |
|
1308
|
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
# The underlying object is a reference to a WARC::Fields object. |
|
1310
|
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
sub FETCH { |
|
1312
|
461
|
|
|
461
|
|
1944
|
my $self = shift; |
|
1313
|
461
|
|
|
|
|
614
|
my $key = shift; |
|
1314
|
461
|
100
|
|
|
|
2289
|
croak "reference to invalid field name" if $key !~ m/^$PARSE_RE__token$/o; |
|
1315
|
458
|
|
|
|
|
1467
|
return (ref($self).'::Value')->_new($self, $key); |
|
1316
|
|
|
|
|
|
|
} |
|
1317
|
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
sub STORE { |
|
1319
|
11
|
|
|
11
|
|
402
|
my $self = shift; |
|
1320
|
11
|
|
|
|
|
20
|
my $key = shift; |
|
1321
|
11
|
|
|
|
|
18
|
my $value = shift; |
|
1322
|
|
|
|
|
|
|
|
|
1323
|
11
|
100
|
|
|
|
246
|
croak "attempt to modify read-only object" if $$$self->[IS_RO]; |
|
1324
|
|
|
|
|
|
|
|
|
1325
|
9
|
|
|
|
|
25
|
$$self->field($key => $value); |
|
1326
|
|
|
|
|
|
|
} |
|
1327
|
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
sub DELETE { |
|
1329
|
3
|
|
|
3
|
|
11
|
my $self = shift; |
|
1330
|
3
|
|
|
|
|
6
|
my $key = shift; |
|
1331
|
|
|
|
|
|
|
|
|
1332
|
3
|
100
|
|
|
|
124
|
croak "attempt to modify read-only object" if $$$self->[IS_RO]; |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
2
|
|
|
|
|
9
|
$$self->field($key => []); |
|
1335
|
|
|
|
|
|
|
} |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
sub CLEAR { |
|
1338
|
2
|
|
|
2
|
|
9
|
my $self = shift; |
|
1339
|
|
|
|
|
|
|
|
|
1340
|
2
|
100
|
|
|
|
120
|
croak "attempt to modify read-only object" if $$$self->[IS_RO]; |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
1
|
|
|
|
|
4
|
$$$self->[NAMES] = []; |
|
1343
|
1
|
|
|
|
|
3
|
$$$self->[VALUES] = []; |
|
1344
|
1
|
|
|
|
|
3
|
$$$self->[MVOFF] = undef; |
|
1345
|
1
|
|
|
|
|
4
|
$$$self->[INDEX] = undef; |
|
1346
|
1
|
|
|
|
|
3
|
return undef; |
|
1347
|
|
|
|
|
|
|
} |
|
1348
|
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
sub EXISTS { |
|
1350
|
15
|
|
|
15
|
|
38
|
my $self = shift; |
|
1351
|
15
|
|
|
|
|
23
|
my $key = shift; |
|
1352
|
|
|
|
|
|
|
|
|
1353
|
15
|
100
|
|
|
|
55
|
$$self->_rebuild_INDEX unless defined $$$self->[INDEX]; |
|
1354
|
15
|
|
|
|
|
39
|
return exists $$$self->[INDEX]->{$$self->_find_key($key)}; |
|
1355
|
|
|
|
|
|
|
} |
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
sub FIRSTKEY { |
|
1358
|
61
|
|
|
61
|
|
101
|
my $self = shift; |
|
1359
|
|
|
|
|
|
|
|
|
1360
|
61
|
|
|
|
|
219
|
return $$$self->[NAMES][0]; |
|
1361
|
|
|
|
|
|
|
} |
|
1362
|
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
sub NEXTKEY { |
|
1364
|
307
|
|
|
307
|
|
462
|
my $self = shift; |
|
1365
|
307
|
|
|
|
|
383
|
my $from_key = shift; |
|
1366
|
|
|
|
|
|
|
|
|
1367
|
307
|
100
|
|
|
|
595
|
$$self->_rebuild_INDEX unless defined $$$self->[INDEX]; |
|
1368
|
|
|
|
|
|
|
|
|
1369
|
307
|
|
|
|
|
369
|
my $i; |
|
1370
|
307
|
|
100
|
|
|
541
|
for ($i = $$$self->[INDEX]{$$self->_find_key($from_key)}[0] + 1; |
|
1371
|
|
|
|
|
|
|
defined $$$self->[NAMES][$i] and |
|
1372
|
|
|
|
|
|
|
$i != $$$self->[INDEX]{$$self->_find_key($$$self->[NAMES][$i])}[0]; |
|
1373
|
|
|
|
|
|
|
$i++) {} |
|
1374
|
307
|
|
|
|
|
1096
|
return $$$self->[NAMES][$i]; |
|
1375
|
|
|
|
|
|
|
} |
|
1376
|
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
sub SCALAR { |
|
1378
|
3
|
|
|
3
|
|
11
|
my $self = shift; |
|
1379
|
3
|
|
|
|
|
5
|
return scalar @{$$$self->[NAMES]}; |
|
|
3
|
|
|
|
|
18
|
|
|
1380
|
|
|
|
|
|
|
} |
|
1381
|
|
|
|
|
|
|
|
|
1382
|
68
|
|
|
68
|
|
90
|
sub UNTIE { our $_total_untied; $_total_untied++ } |
|
|
68
|
|
|
|
|
170
|
|
|
1383
|
|
|
|
|
|
|
|
|
1384
|
68
|
|
|
68
|
|
82
|
sub DESTROY { our $_total_destroyed; $_total_destroyed++ } |
|
|
68
|
|
|
|
|
128
|
|
|
1385
|
|
|
|
|
|
|
} |
|
1386
|
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
=head2 Overloaded Dereference Operators |
|
1388
|
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
The C class provides overloaded dereference operators for |
|
1390
|
|
|
|
|
|
|
array and hash dereferencing. The overloaded operators provide an |
|
1391
|
|
|
|
|
|
|
anonymous tied array or hash as needed, allowing the object itself to be |
|
1392
|
|
|
|
|
|
|
used as a reference to its tied array and hash interfaces. There is a |
|
1393
|
|
|
|
|
|
|
caveat, however, so read on. |
|
1394
|
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
=cut |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
sub _as_tied_array { |
|
1398
|
|
|
|
|
|
|
# To avoid confusing bugs due to typos producing overloaded dereferences |
|
1399
|
|
|
|
|
|
|
# instead of intended accesses to the internal object, this feature |
|
1400
|
|
|
|
|
|
|
# cannot be used from within this module. |
|
1401
|
168
|
100
|
|
168
|
|
7644
|
if (scalar caller =~ m/^WARC::Fields/) { |
|
1402
|
1
|
|
|
|
|
4
|
local $Carp::CarpLevel = 1; |
|
1403
|
1
|
|
|
|
|
148
|
confess "overloaded array dereference in internal code" |
|
1404
|
|
|
|
|
|
|
} |
|
1405
|
|
|
|
|
|
|
|
|
1406
|
167
|
|
|
|
|
245
|
my $self = shift; |
|
1407
|
|
|
|
|
|
|
|
|
1408
|
167
|
100
|
|
|
|
665
|
return $$self->[C_TA] if defined $$self->[C_TA]; |
|
1409
|
|
|
|
|
|
|
|
|
1410
|
3
|
|
|
|
|
5
|
my @array; $$self->[C_TA] = \@array; |
|
|
3
|
|
|
|
|
7
|
|
|
1411
|
3
|
|
|
|
|
5
|
Scalar::Util::weaken ${tie @array, ref $self, $self}; |
|
|
3
|
|
|
|
|
12
|
|
|
1412
|
3
|
|
|
|
|
12
|
return $$self->[C_TA]; |
|
1413
|
|
|
|
|
|
|
} |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
sub _as_tied_hash { |
|
1416
|
|
|
|
|
|
|
# To avoid confusing bugs due to typos producing overloaded dereferences |
|
1417
|
|
|
|
|
|
|
# instead of intended accesses to the internal object, this feature |
|
1418
|
|
|
|
|
|
|
# cannot be used from within this module. |
|
1419
|
266
|
100
|
|
266
|
|
8333
|
if (scalar caller =~ m/^WARC::Fields/) { |
|
1420
|
1
|
|
|
|
|
3
|
local $Carp::CarpLevel = 1; |
|
1421
|
1
|
|
|
|
|
70
|
confess "overloaded hash dereference in internal code" |
|
1422
|
|
|
|
|
|
|
} |
|
1423
|
|
|
|
|
|
|
|
|
1424
|
265
|
|
|
|
|
429
|
my $self = shift; |
|
1425
|
|
|
|
|
|
|
|
|
1426
|
265
|
100
|
|
|
|
1127
|
return $$self->[C_TH] if defined $$self->[C_TH]; |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
61
|
|
|
|
|
115
|
my %hash; $$self->[C_TH] = \%hash; |
|
|
61
|
|
|
|
|
105
|
|
|
1429
|
61
|
|
|
|
|
120
|
Scalar::Util::weaken ${tie %hash, ref $self, $self}; |
|
|
61
|
|
|
|
|
219
|
|
|
1430
|
61
|
|
|
|
|
289
|
return $$self->[C_TH]; |
|
1431
|
|
|
|
|
|
|
} |
|
1432
|
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
=head3 Reference Count Trickery with Overloaded Dereference Operators |
|
1434
|
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
To avoid problems, the underlying tied object is a reference to the parent |
|
1436
|
|
|
|
|
|
|
object. For ordinary use of C, this is a strong reference, however, |
|
1437
|
|
|
|
|
|
|
the anonymous tied array and hash are cached in the object to avoid having |
|
1438
|
|
|
|
|
|
|
to C a new object every time the dereference operators are used. |
|
1439
|
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
To prevent memory leaks due to circular references, the overloaded |
|
1441
|
|
|
|
|
|
|
dereference operators tie a I reference to the parent object. The |
|
1442
|
|
|
|
|
|
|
tied aggregate always holds a strong reference to its object, but when the |
|
1443
|
|
|
|
|
|
|
dereference operators are used, that inner object is a I reference to |
|
1444
|
|
|
|
|
|
|
the actual C object. |
|
1445
|
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
The caveat is thus: do not attempt to save a reference to the array or hash |
|
1447
|
|
|
|
|
|
|
produced by dereferencing a C object. The parent |
|
1448
|
|
|
|
|
|
|
C object must remain in scope for as long as any anonymous |
|
1449
|
|
|
|
|
|
|
tied aggregates exist. |
|
1450
|
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
=cut |
|
1452
|
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
1; |
|
1454
|
|
|
|
|
|
|
__END__ |