File Coverage

blib/lib/Spreadsheet/Compare/Reader.pm

Criterion	Covered	Total	%
statement	43	54	79.6
branch	6	10	60.0
condition	5	8	62.5
subroutine	8	11	72.7
pod	2	3	66.6
total	64	86	74.4

line	stmt	bran	cond	sub	pod	time	code
1							package Spreadsheet::Compare::Reader;
2
3	4			4		2907	use Mojo::Base -base, -signatures;
	4					11
	4					29
4	4			4		1178	use Spreadsheet::Compare::Common;
	4					10
	4					36
5
6							#<<<
7							use Spreadsheet::Compare::Config {
8	0					0	identity => sub { [] },
9	36					283	skip => sub { {} },
10	4					62	chunk => undef,
11							has_header => undef,
12	4			4		41	}, make_attributes => 1;
	4					10
13
14							has can_chunk => 0, ro => 1;
15							has exhausted => undef, ro => 1;
16							has chunker => sub {}, ro => 1;
17							has skipper => sub {}, ro => 1;
18							has header => undef, ro => 1;
19							has result => sub { [] }, ro => 1;
20							has side => sub { $_[0]->index ? 'right' : 'left' }, ro => 1;
21							has side_name => sub { $_[0]->index ? 'right' : 'left' }, ro => 1;
22							has index => sub { croak 'Parameter "index" not set' }, ro => 1;
23							#>>>
24
25							has h2i => sub {
26							my $hd = $_[0]->header;
27							return { map { $hd->[$_] => $_ } 0 .. $#$hd };
28							};
29
30
31	72			72	0	119	sub init ($self) {
	72					110
	72					97
32	72	50				174	$self->{__ro__skipper} = _make_skipper( $self->skip ) if $self->skip;
33	72	50	66			240	WARN 'chunking not supported by ', ref($self), "\n"
34							if defined( $self->chunk ) && !$self->can_chunk;
35	72	100	66			438	$self->{__ro__chunker} = _make_chunker( $self->chunk )
36							if defined( $self->chunk ) && $self->can_chunk;
37	72					438	return $self;
38							}
39
40
41	0			0	1	0	sub setup () { croak 'Method "setup" not implemented by subclass' }
	0					0
	0					0
42
43	0			0	1	0	sub fetch () { croak 'Method "fetch" not implemented by subclass' }
	0					0
	0					0
44
45
46							# Returns reference to a subroutine that checks a given record
47							# for being subject to a "skip record" according to the test definition.
48							# Returns true, when the record should be skipped.
49							#<<<
50	72			72		414	sub _make_skipper ($skip) {
	72					122
	72					103
51							my %skip_info = pairmap {
52	0			0		0	my( $negate, $regex ) = $b =~ /^(!?)(.+)$/;
53	0	0				0	$a => {
54							negate => $negate ? 1 : 0,
55							regex => qr/$regex/
56							};
57	72					853	} %$skip;
58	9542			9542		13086	return sub ($rec) {
	9542					14487
	9542					13649
59	9542					92578	return any { $_ } pairgrep { $rec->val($a) =~ /$b->{regex}/ ^ $b->{negate} } %skip_info;
	0					0
	0					0
60	72					584	};
61							}
62							#>>>
63
64
65							# Returns reference to a subroutine that generates a chunk name for a given record
66							# with the settings under 'chunk' in the test definition.
67	20			20		170	sub _make_chunker ( $chunk ) {
	20					33
	20					29
68	20					66	DEBUG "returning chunker";
69	3042			3042		4178	return sub ($rec) {
	3042					12297
	3042					4527
70	3042					4476	my $chunk_name;
71	3042	100				6691	if ( ref($chunk) ) {
72	1274					2430	my $key = $chunk->{column};
73	1274					5185	my $regex = qr/$chunk->{regex}/;
74	1274					3785	($chunk_name) = $rec->val($key) =~ /$regex/;
75	1274		50			3957	$chunk_name //= '';
76							}
77							else {
78	1768					4745	$chunk_name = $rec->val($chunk);
79							}
80
81	3042					11960	DEBUG "Chunk name: $chunk_name";
82
83	3042					25032	return $chunk_name;
84	20					228	};
85							}
86
87
88							1;
89
90							=head1 NAME
91
92							Spreadsheet::Compare::Reader - Abstract Reader Base Class
93
94							=head1 SYNOPSIS
95
96							package Spreadsheet::Compare::MyReader;
97							use Mojo::Base 'Spreadsheet::Compare::Reader';
98
99							sub setup {...}
100							sub fetch {...}
101
102							=head1 DESCRIPTION
103
104							Spreadsheet::Compare::Reader is an abstract base class for spreadsheet reader backends.
105							Available reader classes in this distribution are
106
107							=over 4
108
109							=item * L<Spreadsheet::Compare::Reader::CSV> for CSV files
110
111							=item * L<Spreadsheet::Compare::Reader::DB> for Databases
112
113							=item * L<Spreadsheet::Compare::Reader::FIX> for fixed size column files
114
115							=item * L<Spreadsheet::Compare::Reader::WB> for various spreadsheet formats like XLSX, ODS, ...
116
117							=back
118
119							This module defines the methods and attributes that are used by a Spreadsheet::Compare::Reader
120							subclass. The methods setup and fetch have to be overridden by the derived class and will
121							croak otherwise.
122
123							When subclassing consider using L<Spreadsheet::Compare::Common> for convenience.
124
125							=head1 ATTRIBUTES
126
127							If not stated otherwise, read write attributes can be set as options from the config file
128							passed to L<Spreadsheet::Compare> or L<spreadcomp>.
129
130							=head2 can_chunk
131
132							(B<readonly>) Will be set to a true value by the Reader module if the Reader supports
133							chunking.
134
135							=head2 chunk
136
137							possible values: <column>
138							or
139							{ column => <column>, regex => <regex> },
140							default: undef
141
142							Process the input in batches defined by the content of a column. When the
143							regex form is used it has to have a capturing expression. The result will
144							be used as identifier for the chunk. For example:
145
146							chunk:
147							column: RECORD_NBR
148							regex: '(\d{2})$'
149
150							will take the last two digits of the numbers in column RECORD_NBR, resulting
151							in up to 100 batches. This is useful for very large files that do not fit
152							entirely into memory (see L<Spreadsheet::Compare/LIMITING MEMORY USAGE>).
153							Reading for each batch will be handled sequentially to save memory.
154
155							All records will be read twice, first for creating the lookup info for the chunks
156							and second for the actual data. This will significantly increase execution time.
157
158							=head2 chunker
159
160							(B<readonly>) A reference to a generated subroutine that returns the chunk name
161							for a record based on the settings from L</chunk>. This will be called from the
162							Reader sublasses.
163
164							=head2 exhausted
165
166							(B<readonly>) Will be true if the reader has no more records to read.
167
168							=head2 has_header
169
170							possible values: bool
171							default: undefined
172
173							Specify whether the file contains a header line.
174
175							=head2 header
176
177							(B<readonly>) A reference to an array with the header names or (in case there is no
178							named header) the zero based indexes.
179
180							=head2 identity
181
182							possible values: <array of column numbers or names>
183							default: []
184
185							Defines the identity to indentify and match a single record. If L</has_header> is
186							true, the header names can be used. If not, the column numbers (zero based) will
187							be used as header names.
188
189							examples for config file entries:
190
191							identity: [rec_nbr, rec_type]
192
193							identity:
194							- rec_nbr
195							- rec_type
196
197							identity: [3, 4, 17]
198
199							=head2 index
200
201							(B<readonly>) 0 for the reader on the left and 1 for the reader on the right side of the comparison.
202
203							=head2 result
204
205							(B<readonly>) A reference to an array with the currently read data after a call to fetch
206
207							=head2 side
208
209							(B<readonly>) 'left' for the reader on the left and 'right' for the reader on the right side of the comparison.
210
211							=head2 side_name
212
213							possible values: <string>
214							default: ''
215
216							The name for the side of the comparison used for reporting.
217
218							=head2 skip
219
220							possible values: <key value pairs>
221							default: undef
222
223							Skip lines by column content. Keys must be column names (when the input has column
224							headers, see L</has_header>) or numbers, the
225							values are interpreted as regular expressions. A leading '!' negates the regex.
226
227							Example:
228
229							skip:
230							Name: ^XYZ-
231							Price: !\d
232
233							=head2 skipper
234
235							(B<readonly>) A reference to a generated subroutine that returns true or false
236							depending on whether the record should be skipped according to the value of L</skip>.
237							This will be called from the Reader sublasses.
238
239							=head1 METHODS
240
241							The methods L</setup> and L</fetch> have to be overridden by derived classes.
242
243							=head2 fetch($size)
244
245							Fetch $size records from the source.
246
247							=head2 setup()
248
249							Will be called by L<Spreadsheet::Compare::Single> at the start of a comparison.
250							This is for setup tasks before handling the first fetch (eg. opening a file,
251							reading the header, ...)
252
253							=cut