File Coverage

blib/lib/Spreadsheet/Compare/Reader.pm
Criterion Covered Total %
statement 43 54 79.6
branch 6 10 60.0
condition 5 8 62.5
subroutine 8 11 72.7
pod 2 3 66.6
total 64 86 74.4


line stmt bran cond sub pod time code
1             package Spreadsheet::Compare::Reader;
2              
3 4     4   2907 use Mojo::Base -base, -signatures;
  4         11  
  4         29  
4 4     4   1178 use Spreadsheet::Compare::Common;
  4         10  
  4         36  
5              
6             #<<<
7             use Spreadsheet::Compare::Config {
8 0         0 identity => sub { [] },
9 36         283 skip => sub { {} },
10 4         62 chunk => undef,
11             has_header => undef,
12 4     4   41 }, make_attributes => 1;
  4         10  
13              
14             has can_chunk => 0, ro => 1;
15             has exhausted => undef, ro => 1;
16             has chunker => sub {}, ro => 1;
17             has skipper => sub {}, ro => 1;
18             has header => undef, ro => 1;
19             has result => sub { [] }, ro => 1;
20             has side => sub { $_[0]->index ? 'right' : 'left' }, ro => 1;
21             has side_name => sub { $_[0]->index ? 'right' : 'left' }, ro => 1;
22             has index => sub { croak 'Parameter "index" not set' }, ro => 1;
23             #>>>
24              
25             has h2i => sub {
26             my $hd = $_[0]->header;
27             return { map { $hd->[$_] => $_ } 0 .. $#$hd };
28             };
29              
30              
31 72     72 0 119 sub init ($self) {
  72         110  
  72         97  
32 72 50       174 $self->{__ro__skipper} = _make_skipper( $self->skip ) if $self->skip;
33 72 50 66     240 WARN 'chunking not supported by ', ref($self), "\n"
34             if defined( $self->chunk ) && !$self->can_chunk;
35 72 100 66     438 $self->{__ro__chunker} = _make_chunker( $self->chunk )
36             if defined( $self->chunk ) && $self->can_chunk;
37 72         438 return $self;
38             }
39              
40              
41 0     0 1 0 sub setup () { croak 'Method "setup" not implemented by subclass' }
  0         0  
  0         0  
42              
43 0     0 1 0 sub fetch () { croak 'Method "fetch" not implemented by subclass' }
  0         0  
  0         0  
44              
45              
46             # Returns reference to a subroutine that checks a given record
47             # for being subject to a "skip record" according to the test definition.
48             # Returns true, when the record should be skipped.
49             #<<<
50 72     72   414 sub _make_skipper ($skip) {
  72         122  
  72         103  
51             my %skip_info = pairmap {
52 0     0   0 my( $negate, $regex ) = $b =~ /^(!?)(.+)$/;
53 0 0       0 $a => {
54             negate => $negate ? 1 : 0,
55             regex => qr/$regex/
56             };
57 72         853 } %$skip;
58 9542     9542   13086 return sub ($rec) {
  9542         14487  
  9542         13649  
59 9542         92578 return any { $_ } pairgrep { $rec->val($a) =~ /$b->{regex}/ ^ $b->{negate} } %skip_info;
  0         0  
  0         0  
60 72         584 };
61             }
62             #>>>
63              
64              
65             # Returns reference to a subroutine that generates a chunk name for a given record
66             # with the settings under 'chunk' in the test definition.
67 20     20   170 sub _make_chunker ( $chunk ) {
  20         33  
  20         29  
68 20         66 DEBUG "returning chunker";
69 3042     3042   4178 return sub ($rec) {
  3042         12297  
  3042         4527  
70 3042         4476 my $chunk_name;
71 3042 100       6691 if ( ref($chunk) ) {
72 1274         2430 my $key = $chunk->{column};
73 1274         5185 my $regex = qr/$chunk->{regex}/;
74 1274         3785 ($chunk_name) = $rec->val($key) =~ /$regex/;
75 1274   50     3957 $chunk_name //= '';
76             }
77             else {
78 1768         4745 $chunk_name = $rec->val($chunk);
79             }
80              
81 3042         11960 DEBUG "Chunk name: $chunk_name";
82              
83 3042         25032 return $chunk_name;
84 20         228 };
85             }
86              
87              
88             1;
89              
90             =head1 NAME
91              
92             Spreadsheet::Compare::Reader - Abstract Reader Base Class
93              
94             =head1 SYNOPSIS
95              
96             package Spreadsheet::Compare::MyReader;
97             use Mojo::Base 'Spreadsheet::Compare::Reader';
98              
99             sub setup {...}
100             sub fetch {...}
101              
102             =head1 DESCRIPTION
103              
104             Spreadsheet::Compare::Reader is an abstract base class for spreadsheet reader backends.
105             Available reader classes in this distribution are
106              
107             =over 4
108              
109             =item * L<Spreadsheet::Compare::Reader::CSV> for CSV files
110              
111             =item * L<Spreadsheet::Compare::Reader::DB> for Databases
112              
113             =item * L<Spreadsheet::Compare::Reader::FIX> for fixed size column files
114              
115             =item * L<Spreadsheet::Compare::Reader::WB> for various spreadsheet formats like XLSX, ODS, ...
116              
117             =back
118              
119             This module defines the methods and attributes that are used by a Spreadsheet::Compare::Reader
120             subclass. The methods setup and fetch have to be overridden by the derived class and will
121             croak otherwise.
122              
123             When subclassing consider using L<Spreadsheet::Compare::Common> for convenience.
124              
125             =head1 ATTRIBUTES
126              
127             If not stated otherwise, read write attributes can be set as options from the config file
128             passed to L<Spreadsheet::Compare> or L<spreadcomp>.
129              
130             =head2 can_chunk
131              
132             (B<readonly>) Will be set to a true value by the Reader module if the Reader supports
133             chunking.
134              
135             =head2 chunk
136              
137             possible values: <column>
138             or
139             { column => <column>, regex => <regex> },
140             default: undef
141              
142             Process the input in batches defined by the content of a column. When the
143             regex form is used it has to have a capturing expression. The result will
144             be used as identifier for the chunk. For example:
145              
146             chunk:
147             column: RECORD_NBR
148             regex: '(\d{2})$'
149              
150             will take the last two digits of the numbers in column RECORD_NBR, resulting
151             in up to 100 batches. This is useful for very large files that do not fit
152             entirely into memory (see L<Spreadsheet::Compare/LIMITING MEMORY USAGE>).
153             Reading for each batch will be handled sequentially to save memory.
154              
155             All records will be read twice, first for creating the lookup info for the chunks
156             and second for the actual data. This will significantly increase execution time.
157              
158             =head2 chunker
159              
160             (B<readonly>) A reference to a generated subroutine that returns the chunk name
161             for a record based on the settings from L</chunk>. This will be called from the
162             Reader sublasses.
163              
164             =head2 exhausted
165              
166             (B<readonly>) Will be true if the reader has no more records to read.
167              
168             =head2 has_header
169              
170             possible values: bool
171             default: undefined
172              
173             Specify whether the file contains a header line.
174              
175             =head2 header
176              
177             (B<readonly>) A reference to an array with the header names or (in case there is no
178             named header) the zero based indexes.
179              
180             =head2 identity
181              
182             possible values: <array of column numbers or names>
183             default: []
184              
185             Defines the identity to indentify and match a single record. If L</has_header> is
186             true, the header names can be used. If not, the column numbers (zero based) will
187             be used as header names.
188              
189             examples for config file entries:
190              
191             identity: [rec_nbr, rec_type]
192              
193             identity:
194             - rec_nbr
195             - rec_type
196              
197             identity: [3, 4, 17]
198              
199             =head2 index
200              
201             (B<readonly>) 0 for the reader on the left and 1 for the reader on the right side of the comparison.
202              
203             =head2 result
204              
205             (B<readonly>) A reference to an array with the currently read data after a call to fetch
206              
207             =head2 side
208              
209             (B<readonly>) 'left' for the reader on the left and 'right' for the reader on the right side of the comparison.
210              
211             =head2 side_name
212              
213             possible values: <string>
214             default: ''
215              
216             The name for the side of the comparison used for reporting.
217              
218             =head2 skip
219              
220             possible values: <key value pairs>
221             default: undef
222              
223             Skip lines by column content. Keys must be column names (when the input has column
224             headers, see L</has_header>) or numbers, the
225             values are interpreted as regular expressions. A leading '!' negates the regex.
226              
227             Example:
228              
229             skip:
230             Name: ^XYZ-
231             Price: !\d
232              
233             =head2 skipper
234              
235             (B<readonly>) A reference to a generated subroutine that returns true or false
236             depending on whether the record should be skipped according to the value of L</skip>.
237             This will be called from the Reader sublasses.
238              
239             =head1 METHODS
240              
241             The methods L</setup> and L</fetch> have to be overridden by derived classes.
242              
243             =head2 fetch($size)
244              
245             Fetch $size records from the source.
246              
247             =head2 setup()
248              
249             Will be called by L<Spreadsheet::Compare::Single> at the start of a comparison.
250             This is for setup tasks before handling the first fetch (eg. opening a file,
251             reading the header, ...)
252              
253             =cut