line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Spreadsheet::Compare::Reader; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
2907
|
use Mojo::Base -base, -signatures; |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
29
|
|
4
|
4
|
|
|
4
|
|
1178
|
use Spreadsheet::Compare::Common; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
36
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
#<<< |
7
|
|
|
|
|
|
|
use Spreadsheet::Compare::Config { |
8
|
0
|
|
|
|
|
0
|
identity => sub { [] }, |
9
|
36
|
|
|
|
|
283
|
skip => sub { {} }, |
10
|
4
|
|
|
|
|
62
|
chunk => undef, |
11
|
|
|
|
|
|
|
has_header => undef, |
12
|
4
|
|
|
4
|
|
41
|
}, make_attributes => 1; |
|
4
|
|
|
|
|
10
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
has can_chunk => 0, ro => 1; |
15
|
|
|
|
|
|
|
has exhausted => undef, ro => 1; |
16
|
|
|
|
|
|
|
has chunker => sub {}, ro => 1; |
17
|
|
|
|
|
|
|
has skipper => sub {}, ro => 1; |
18
|
|
|
|
|
|
|
has header => undef, ro => 1; |
19
|
|
|
|
|
|
|
has result => sub { [] }, ro => 1; |
20
|
|
|
|
|
|
|
has side => sub { $_[0]->index ? 'right' : 'left' }, ro => 1; |
21
|
|
|
|
|
|
|
has side_name => sub { $_[0]->index ? 'right' : 'left' }, ro => 1; |
22
|
|
|
|
|
|
|
has index => sub { croak 'Parameter "index" not set' }, ro => 1; |
23
|
|
|
|
|
|
|
#>>> |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has h2i => sub { |
26
|
|
|
|
|
|
|
my $hd = $_[0]->header; |
27
|
|
|
|
|
|
|
return { map { $hd->[$_] => $_ } 0 .. $#$hd }; |
28
|
|
|
|
|
|
|
}; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
72
|
|
|
72
|
0
|
119
|
sub init ($self) { |
|
72
|
|
|
|
|
110
|
|
|
72
|
|
|
|
|
97
|
|
32
|
72
|
50
|
|
|
|
174
|
$self->{__ro__skipper} = _make_skipper( $self->skip ) if $self->skip; |
33
|
72
|
50
|
66
|
|
|
240
|
WARN 'chunking not supported by ', ref($self), "\n" |
34
|
|
|
|
|
|
|
if defined( $self->chunk ) && !$self->can_chunk; |
35
|
72
|
100
|
66
|
|
|
438
|
$self->{__ro__chunker} = _make_chunker( $self->chunk ) |
36
|
|
|
|
|
|
|
if defined( $self->chunk ) && $self->can_chunk; |
37
|
72
|
|
|
|
|
438
|
return $self; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
|
41
|
0
|
|
|
0
|
1
|
0
|
sub setup () { croak 'Method "setup" not implemented by subclass' } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
42
|
|
|
|
|
|
|
|
43
|
0
|
|
|
0
|
1
|
0
|
sub fetch () { croak 'Method "fetch" not implemented by subclass' } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Returns reference to a subroutine that checks a given record |
47
|
|
|
|
|
|
|
# for being subject to a "skip record" according to the test definition. |
48
|
|
|
|
|
|
|
# Returns true, when the record should be skipped. |
49
|
|
|
|
|
|
|
#<<< |
50
|
72
|
|
|
72
|
|
414
|
sub _make_skipper ($skip) { |
|
72
|
|
|
|
|
122
|
|
|
72
|
|
|
|
|
103
|
|
51
|
|
|
|
|
|
|
my %skip_info = pairmap { |
52
|
0
|
|
|
0
|
|
0
|
my( $negate, $regex ) = $b =~ /^(!?)(.+)$/; |
53
|
0
|
0
|
|
|
|
0
|
$a => { |
54
|
|
|
|
|
|
|
negate => $negate ? 1 : 0, |
55
|
|
|
|
|
|
|
regex => qr/$regex/ |
56
|
|
|
|
|
|
|
}; |
57
|
72
|
|
|
|
|
853
|
} %$skip; |
58
|
9542
|
|
|
9542
|
|
13086
|
return sub ($rec) { |
|
9542
|
|
|
|
|
14487
|
|
|
9542
|
|
|
|
|
13649
|
|
59
|
9542
|
|
|
|
|
92578
|
return any { $_ } pairgrep { $rec->val($a) =~ /$b->{regex}/ ^ $b->{negate} } %skip_info; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
60
|
72
|
|
|
|
|
584
|
}; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
#>>> |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Returns reference to a subroutine that generates a chunk name for a given record |
66
|
|
|
|
|
|
|
# with the settings under 'chunk' in the test definition. |
67
|
20
|
|
|
20
|
|
170
|
sub _make_chunker ( $chunk ) { |
|
20
|
|
|
|
|
33
|
|
|
20
|
|
|
|
|
29
|
|
68
|
20
|
|
|
|
|
66
|
DEBUG "returning chunker"; |
69
|
3042
|
|
|
3042
|
|
4178
|
return sub ($rec) { |
|
3042
|
|
|
|
|
12297
|
|
|
3042
|
|
|
|
|
4527
|
|
70
|
3042
|
|
|
|
|
4476
|
my $chunk_name; |
71
|
3042
|
100
|
|
|
|
6691
|
if ( ref($chunk) ) { |
72
|
1274
|
|
|
|
|
2430
|
my $key = $chunk->{column}; |
73
|
1274
|
|
|
|
|
5185
|
my $regex = qr/$chunk->{regex}/; |
74
|
1274
|
|
|
|
|
3785
|
($chunk_name) = $rec->val($key) =~ /$regex/; |
75
|
1274
|
|
50
|
|
|
3957
|
$chunk_name //= ''; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
else { |
78
|
1768
|
|
|
|
|
4745
|
$chunk_name = $rec->val($chunk); |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
3042
|
|
|
|
|
11960
|
DEBUG "Chunk name: $chunk_name"; |
82
|
|
|
|
|
|
|
|
83
|
3042
|
|
|
|
|
25032
|
return $chunk_name; |
84
|
20
|
|
|
|
|
228
|
}; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
1; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head1 NAME |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Spreadsheet::Compare::Reader - Abstract Reader Base Class |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=head1 SYNOPSIS |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
package Spreadsheet::Compare::MyReader; |
97
|
|
|
|
|
|
|
use Mojo::Base 'Spreadsheet::Compare::Reader'; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub setup {...} |
100
|
|
|
|
|
|
|
sub fetch {...} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head1 DESCRIPTION |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Spreadsheet::Compare::Reader is an abstract base class for spreadsheet reader backends. |
105
|
|
|
|
|
|
|
Available reader classes in this distribution are |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=over 4 |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item * L<Spreadsheet::Compare::Reader::CSV> for CSV files |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=item * L<Spreadsheet::Compare::Reader::DB> for Databases |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item * L<Spreadsheet::Compare::Reader::FIX> for fixed size column files |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item * L<Spreadsheet::Compare::Reader::WB> for various spreadsheet formats like XLSX, ODS, ... |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=back |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
This module defines the methods and attributes that are used by a Spreadsheet::Compare::Reader |
120
|
|
|
|
|
|
|
subclass. The methods setup and fetch have to be overridden by the derived class and will |
121
|
|
|
|
|
|
|
croak otherwise. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
When subclassing consider using L<Spreadsheet::Compare::Common> for convenience. |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
If not stated otherwise, read write attributes can be set as options from the config file |
128
|
|
|
|
|
|
|
passed to L<Spreadsheet::Compare> or L<spreadcomp>. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head2 can_chunk |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
(B<readonly>) Will be set to a true value by the Reader module if the Reader supports |
133
|
|
|
|
|
|
|
chunking. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 chunk |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
possible values: <column> |
138
|
|
|
|
|
|
|
or |
139
|
|
|
|
|
|
|
{ column => <column>, regex => <regex> }, |
140
|
|
|
|
|
|
|
default: undef |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
Process the input in batches defined by the content of a column. When the |
143
|
|
|
|
|
|
|
regex form is used it has to have a capturing expression. The result will |
144
|
|
|
|
|
|
|
be used as identifier for the chunk. For example: |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
chunk: |
147
|
|
|
|
|
|
|
column: RECORD_NBR |
148
|
|
|
|
|
|
|
regex: '(\d{2})$' |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
will take the last two digits of the numbers in column RECORD_NBR, resulting |
151
|
|
|
|
|
|
|
in up to 100 batches. This is useful for very large files that do not fit |
152
|
|
|
|
|
|
|
entirely into memory (see L<Spreadsheet::Compare/LIMITING MEMORY USAGE>). |
153
|
|
|
|
|
|
|
Reading for each batch will be handled sequentially to save memory. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
All records will be read twice, first for creating the lookup info for the chunks |
156
|
|
|
|
|
|
|
and second for the actual data. This will significantly increase execution time. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head2 chunker |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
(B<readonly>) A reference to a generated subroutine that returns the chunk name |
161
|
|
|
|
|
|
|
for a record based on the settings from L</chunk>. This will be called from the |
162
|
|
|
|
|
|
|
Reader sublasses. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head2 exhausted |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
(B<readonly>) Will be true if the reader has no more records to read. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head2 has_header |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
possible values: bool |
171
|
|
|
|
|
|
|
default: undefined |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Specify whether the file contains a header line. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=head2 header |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
(B<readonly>) A reference to an array with the header names or (in case there is no |
178
|
|
|
|
|
|
|
named header) the zero based indexes. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 identity |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
possible values: <array of column numbers or names> |
183
|
|
|
|
|
|
|
default: [] |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Defines the identity to indentify and match a single record. If L</has_header> is |
186
|
|
|
|
|
|
|
true, the header names can be used. If not, the column numbers (zero based) will |
187
|
|
|
|
|
|
|
be used as header names. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
examples for config file entries: |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
identity: [rec_nbr, rec_type] |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
identity: |
194
|
|
|
|
|
|
|
- rec_nbr |
195
|
|
|
|
|
|
|
- rec_type |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
identity: [3, 4, 17] |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head2 index |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
(B<readonly>) 0 for the reader on the left and 1 for the reader on the right side of the comparison. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=head2 result |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
(B<readonly>) A reference to an array with the currently read data after a call to fetch |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 side |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
(B<readonly>) 'left' for the reader on the left and 'right' for the reader on the right side of the comparison. |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 side_name |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
possible values: <string> |
214
|
|
|
|
|
|
|
default: '' |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
The name for the side of the comparison used for reporting. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head2 skip |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
possible values: <key value pairs> |
221
|
|
|
|
|
|
|
default: undef |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Skip lines by column content. Keys must be column names (when the input has column |
224
|
|
|
|
|
|
|
headers, see L</has_header>) or numbers, the |
225
|
|
|
|
|
|
|
values are interpreted as regular expressions. A leading '!' negates the regex. |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
Example: |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
skip: |
230
|
|
|
|
|
|
|
Name: ^XYZ- |
231
|
|
|
|
|
|
|
Price: !\d |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head2 skipper |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
(B<readonly>) A reference to a generated subroutine that returns true or false |
236
|
|
|
|
|
|
|
depending on whether the record should be skipped according to the value of L</skip>. |
237
|
|
|
|
|
|
|
This will be called from the Reader sublasses. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=head1 METHODS |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
The methods L</setup> and L</fetch> have to be overridden by derived classes. |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=head2 fetch($size) |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
Fetch $size records from the source. |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head2 setup() |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Will be called by L<Spreadsheet::Compare::Single> at the start of a comparison. |
250
|
|
|
|
|
|
|
This is for setup tasks before handling the first fetch (eg. opening a file, |
251
|
|
|
|
|
|
|
reading the header, ...) |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=cut |