File Coverage

blib/lib/Spreadsheet/Compare/Single.pm
Criterion Covered Total %
statement 302 334 90.4
branch 112 182 61.5
condition 59 93 63.4
subroutine 14 23 60.8
pod 1 3 33.3
total 488 635 76.8


line stmt bran cond sub pod time code
1             package Spreadsheet::Compare::Single;
2              
3 14     14   111 use Mojo::Base 'Mojo::EventEmitter', -signatures;
  14         30  
  14         128  
4 14     14   4293 use Spreadsheet::Compare::Common;
  14         169  
  14         154  
5 14     14   7628 use Spreadsheet::Compare::Record;
  14         53  
  14         113  
6              
7             #<<<
8             use Spreadsheet::Compare::Config {
9             allow_duplicates => 0,
10             below_limit_is_equal => 0,
11             convert_numbers => 0,
12             decimal_separator => '.',
13             diff_relative => [],
14             digital_grouping_symbol => '',
15             fetch_size => 1000,
16             fetch_limit => 0,
17             ignore => [],
18             ignore_strings => 0,
19             is_sorted => 0,
20             left => 'left',
21             limit_abs => 0,
22             limit_rel => 0,
23             _numerical_regex => sub { #<<<
24 18         418 my $ds = $_[0]->_ds;
25 18         114 my $dgs = $_[0]->_dgs;
26 18         479 return qr/
27             ^
28             [-+]?
29             [0-9$dgs]*
30             $ds?
31             [0-9]+
32             ([eE][-+]?[0-9]+)?
33             $
34             /x;
35             }, #>>>
36 14         544 readers => [],
37             report_all_data => 0,
38             report_diff_row => 0,
39             right => 'right',
40             title => '',
41 14     14   2468 }, protected => 1, make_attributes => 1;
  14         32  
42             #>>>
43 14     14 0 102 sub counter_names { return qw/left right same diff limit miss add dup/ }
44              
45             has _ds => sub { quotemeta( $_[0]->decimal_separator ) };
46             has _dgs => sub { quotemeta( $_[0]->digital_grouping_symbol ) };
47              
48             my( $debug, $trace );
49              
50 18     18 0 36 sub init ($self) {
  18         77  
  18         90  
51              
52 18 50       69 croak 'The ignore parameter has to be an array reference'
53             unless ref( my $ignore = $self->ignore ) eq 'ARRAY';
54 18 50       119 croak 'The diff_relative parameter has to be an array reference'
55             unless ref( my $diffr = $self->diff_relative ) eq 'ARRAY';
56              
57             # speed up logging
58 18         139 ( $trace, $debug ) = get_log_settings();
59              
60 18         314 $self->diff_relative( { map { $_ => 1 } @$diffr } );
  0         0  
61              
62             # make sure certain attributes are evaluated and available as hash values
63 18         178 $self->$_ for qw(
64             convert_numbers diff_relative _dgs _ds ignore
65             limit_abs limit_rel _numerical_regex
66             );
67              
68 18         57 return $self;
69             }
70              
71              
72 18     18 1 37 sub compare ($self) { ## no critic (ProhibitExcessComplexity)
  18         49  
  18         33  
73              
74 18         76 my $readers = $self->readers;
75 18 50 33     247 croak "readers is not an array ref" unless $readers and ref($readers) eq 'ARRAY';
76 18         59 for my $idx ( 0, 1 ) {
77 36         101 my $r = $readers->[$idx];
78 36 50 33     304 croak "invalid reader at index $idx" unless ref($r) and $r->isa('Spreadsheet::Compare::Reader');
79 36         155 $r->setup();
80             }
81              
82 18 50   0   89 $debug and DEBUG "Header left :", sub { Dump( $readers->[0]->header ) };
  0         0  
83 18 50   0   56 $debug and DEBUG "Header right:", sub { Dump( $readers->[1]->header ) };
  0         0  
84              
85             # create internal lookups
86 18         99 $self->{_look}{hdr} = $readers->[0]->header;
87 18         161 $self->{_look}{h2i} = my $h2i = $readers->[0]->h2i;
88 18         690 $self->{_look}{i2h} = { reverse %$h2i };
89 18 50       104 $self->{_look}{ign} = { map { defined( $h2i->{$_} ) ? ( $h2i->{$_} => 1 ) : () } $self->ignore->@* };
  25         540  
90              
91 18         191 $self->emit( '_after_reader_setup', $self->{_look} );
92 18         222 $self->_set_limits( $self->{_look}{hdr} );
93              
94             # shortcuts to the result arrays
95 18         89 my $list_l = $readers->[0]->result();
96 18         62 my $list_r = $readers->[1]->result();
97              
98 18 50       66 $trace and TRACE "Array ref left:", $list_l;
99 18 50       55 $trace and TRACE "Array ref right:", $list_r;
100              
101 18         73 my @streams = qw/Differences Missing Additional Duplicates/;
102 18 50       70 push @streams, 'All' if $self->report_all_data;
103              
104             # emit events for each stream, this will be subscribed by reporters
105 18         148 for my $name (@streams) {
106 72         538 $self->emit( 'add_stream', $name );
107 72         551 $self->emit( 'write_header', $name );
108             }
109              
110 18         138 my @diff_columns;
111              
112             # fetch a configured number of records possibly sorted by the identity column
113             # and compare everything that is alphanumerically smaller than the
114             # last fetched identity value
115 18 100       65 my $size = $self->is_sorted ? $self->fetch_size : ~0;
116 18         140 my $last_pass = 0;
117 18         37 my $fetches = 0;
118 18         179 my %count = qw/left 0 right 0 same 0 diff 0 miss 0 add 0 dup 0 limit 0/;
119 18         69 while ( $last_pass == 0 ) {
120              
121 79         2861 my $done = 1;
122 79         233 for my $r (@$readers) {
123 158         1040 my $fnbr = $r->fetch($size);
124 158         841 INFO "Fetched $fnbr records from ", $r->side;
125 158   100     3188 $done &&= $r->exhausted;
126             }
127              
128 79   100     434 my $limit_reached =
129             $self->is_sorted && $self->fetch_size && $self->fetch_limit && ( ++$fetches >= $self->fetch_limit );
130 79   100     1216 $last_pass = $done || $limit_reached;
131              
132 79         457 $self->emit('after_fetch');
133              
134             # TODO: (issue) solve contradiction of chunks <> partial fetches
135             # chunks are always complete but can be unsorted, last_id makes no sense in that case
136             # this will mess up duplicate counting
137             # partial fetches are always sorted
138              
139 79 100 66     1314 $last_pass = 1 if $readers->[0]->exhausted and $readers->[1]->exhausted;
140 79         854 INFO "last_pass:$last_pass";
141              
142             $trace and TRACE "Ids on the left:", sub {
143 0     0   0 Dump( [ map { $_->id } @$list_l ] );
  0         0  
144 79 50       672 };
145             $trace and TRACE "Ids on the right:", sub {
146 0     0   0 Dump( [ map { $_->id } @$list_r ] );
  0         0  
147 79 50       241 };
148              
149 79 50       483 my $last_id_l = @$list_l ? $list_l->[-1]->id : '';
150 79 50       385 my $last_id_r = @$list_r ? $list_r->[-1]->id : '';
151              
152 79 50       335 unless ( $self->allow_duplicates ) {
153             # check for duplicates in the id column and
154             # put them into the 'Duplicates' sheet
155              
156 79         628 for my $rec ( $self->_check_duplicates( $list_l, $readers->[0]->side )->@* ) {
157 18         156 $self->emit( 'write_row', 'Duplicates', $rec );
158             }
159              
160 79         559 for my $rec ( $self->_check_duplicates( $list_r, $readers->[1]->side )->@* ) {
161 18         179 $self->emit( 'write_row', 'Duplicates', $rec );
162             }
163              
164 79         405 $count{dup} = $self->{_dup_sum};
165             }
166              
167             # generate lookup hash by identity column
168 79         198 my %look_r;
169 79         370 for my $i ( 0 .. $#$list_r ) {
170 4780         20725 my $r = $list_r->[$i];
171 4780   100     8475 $look_r{ $r->id } //= [];
172 4780         33297 push $look_r{ $r->id }->@*, $i;
173             }
174              
175 79 50       563 $debug and DEBUG scalar( keys %look_r ), " unique ids found on the right";
176              
177             # pass 1: loop over all records on the left
178             # if a corresponding record on the right is found
179             # ==> calculate the diffs
180             # ==> write to the corresponding output stream
181             # ==> mark record on the right side as DONE
182 79 50       225 $debug and DEBUG "Processing ", scalar(@$list_l), " records on the left";
183 79         297 LEFT_ROW: while ( my $rec = shift @$list_l ) {
184              
185 4780 100       155684 $self->emit( 'counters', \%count ) if $count{left} % $self->fetch_size == 0;
186              
187 4780         42632 my $id = $rec->id;
188              
189 4780 50       32698 $debug and DEBUG "left row with ID >>$id<<";
190 4780 50       15098 $self->emit( 'write_row', 'All', $rec )
191             if $self->report_all_data;
192              
193             # compare only those records that are safe and will not
194             # be fetched later
195 4780 100 100     29050 if (
      66        
      66        
196             $self->is_sorted
197             and not $last_pass
198             and ( $id eq $last_id_l
199             or $id eq $last_id_r )
200             ) {
201 9 50       127 $debug and DEBUG "reached end of last fetch on the left side";
202 9         44 unshift @$list_l, $rec;
203 9 50       47 if ( !$self->allow_duplicates ) {
204 9         114 $self->{_dup_seen}{ $readers->[0]->side }{$id}--;
205             }
206 9         106 last LEFT_ROW;
207             }
208              
209 4771         33685 $count{left}++;
210              
211             # get the corresponding record on the right side
212 4771         21533 my( $cor, $diff_rec ) = $self->_get_match( $rec, $list_r, $look_r{$id} );
213              
214             # add to missing sheet if no corresponding record is found
215 4771 50       13437 unless ($cor) {
216              
217 0 0       0 $debug and DEBUG "Missing right row for ID >>$id<<";
218              
219 0         0 $self->emit( 'write_row', 'Missing', $rec );
220 0         0 $count{miss}++;
221              
222 0 0       0 $self->emit(
223             'write_row',
224             'All',
225             Spreadsheet::Compare::Record->new(
226             rec => [],
227             reader => $readers->[1],
228             )
229             ) if $self->report_all_data;
230              
231 0         0 next LEFT_ROW;
232             }
233              
234 4771 50       11823 $debug and DEBUG "found record on the right side";
235              
236 4771         12586 $count{right}++;
237              
238 4771 50       13450 $self->emit( 'write_row', 'All', $cor )
239             if $self->report_all_data;
240              
241 4771         27638 my $diff = $diff_rec->diff_info;
242              
243             # skip record if no difference is found
244 4771 100 66     29908 if (
      66        
245             $diff->{equal}
246             or ( $diff->{limit}
247             and $self->below_limit_is_equal )
248             ) {
249 2087 50       3818 $debug and DEBUG "the records are identical";
250 2087         3654 $count{same}++;
251 2087         37998 next LEFT_ROW;
252             }
253              
254 2684 50       17172 $debug and DEBUG "difference found";
255              
256 2684         7305 $count{diff}++;
257 2684 100       8941 $count{limit}++ if $diff->{limit};
258              
259 2684         7738 for my $i ( 0 .. $diff_rec->limit_mask->$#* ) {
260 182384 100       691775 $diff_columns[$i]++ if $diff_rec->limit_mask->[$i] > 0;
261             }
262              
263 2684         20148 $self->emit( 'write_fmt_row', 'Differences', $rec );
264              
265 2684         25530 $self->emit( 'write_fmt_row', 'Differences', $cor );
266              
267 2684 50       26840 $self->emit( 'write_fmt_row', 'Differences', $diff_rec )
268             if $self->report_diff_row;
269              
270             } # pass 1 LEFT_ROW
271              
272 79         1809 $self->emit( 'counters', \%count );
273              
274 79 50       1123 $debug and DEBUG scalar( keys $self->{_matched_right}->%* ), " matched records on the right";
275              
276             # use not already matched records from the right
277 79 100       847 @$list_r = map { $self->{_matched_right}{$_} ? () : $list_r->[$_] } 0 .. $#$list_r;
  4780         111103  
278              
279 79 50       1197 $debug and DEBUG scalar(@$list_r), " records left after pass 1";
280              
281             # pass 2: loop over all remaining entries on the right
282             # and add them to the additional sheet
283 79         358 RIGHT_ROW: while ( my $rec = shift @$list_r ) {
284              
285 9         66 my $id = $rec->{id};
286             # compare only those records that are safe and will not
287             # be fetched later
288 9 50 33     103 if (
      33        
289             not $last_pass
290             and ( $id eq $last_id_l
291             or $id eq $last_id_r )
292             ) {
293 9 50       47 $debug and DEBUG "reached end of last fetch on the right side";
294 9         36 unshift @$list_r, $rec;
295 9 50       64 if ( !$self->allow_duplicates ) {
296 9         131 $self->{_dup_seen}{ $readers->[1]->side }{$id}--;
297             }
298 9         114 last RIGHT_ROW;
299             }
300              
301 0 0       0 $debug and DEBUG "found additional record on the right side $id";
302              
303 0         0 $count{right}++;
304              
305 0 0       0 if ( $self->report_all_data ) {
306 0         0 $self->emit(
307             'write_row',
308             'All',
309             Spreadsheet::Compare::Record->new(
310             rec => [],
311             reader => $readers->[0],
312             )
313             );
314 0         0 $self->emit( 'write_row', 'All', $rec );
315             }
316              
317 0         0 $self->emit( 'write_row', 'Additional', $rec );
318              
319 0         0 $count{add}++;
320             } # pass 2 RIGHT_ROW
321              
322 79         402 $self->emit( 'counters', \%count );
323              
324             } # fetch data
325              
326 18         1531 $self->emit( 'mark_header', 'Differences', \@diff_columns );
327              
328 18         169 $self->emit( 'final_counters', \%count );
329              
330 18 50   0   123 $debug and DEBUG "Counters:", sub { Dump( \%count ) };
  0         0  
331              
332 18         114 return \%count;
333             }
334              
335              
336             # Check a list of record-hashes for duplicates in the field(s)
337             # specified by the identity. Return all affected records.
338 158     158   1070 sub _check_duplicates ( $self, $list, $side ) {
  158         290  
  158         311  
  158         265  
  158         258  
339              
340 158 50       426 $debug and DEBUG "checking duplicates";
341              
342 158         279 my @dup_list;
343              
344 158         445 for my $rec (@$list) {
345              
346 9560         20029 my $id = $rec->id;
347              
348 9560 50       28941 $trace and TRACE "id:$id";
349 9560 100       19779 if ( $self->{_dup_seen}{$side}{$id} ) {
350 36 50       94 $debug and DEBUG "duplicate for id=$id, side $side";
351 36         118 push @dup_list, $rec;
352             }
353 9560         25254 $self->{_dup_seen}{$side}{$id}++;
354             }
355              
356 158         421 for my $side (qw/left right/) {
357 316         824 $self->{_dup_side}{$side} = 0;
358 316         24675 for my $id ( keys $self->{_dup_seen}{$side}->%* ) {
359 103067 100       220422 $self->{_dup_side}{$side}++ if $self->{_dup_seen}{$side}{$id} > 1;
360             }
361             }
362              
363 158         1245 $self->{_dup_sum} = max( $self->{_dup_side}{$side}, $self->{_dup_side}{$side} );
364              
365 158         646 return ( \@dup_list );
366             }
367              
368              
369             # compare two records $l and $r:
370             # - ignore fields in the ign lookup
371             # - mark records as different if the absolute or relative limits are exceeded
372             # - convert strings to numbers if configured
373 4786     4786   8085 sub _compare_record ( $self, $l, $r ) { ## no critic (ProhibitExcessComplexity)
  4786         8841  
  4786         9748  
  4786         7543  
  4786         6226  
374              
375 4786         8751 state $diff_default = {
376             ABS_FIELD => '',
377             ABS_VALUE => '',
378             REL_FIELD => '',
379             REL_VALUE => '',
380             limit => 0,
381             equal => 1,
382             };
383              
384 4786         35971 my %diff = %$diff_default;
385 4786         12612 my @limit_mask;
386             my @diff_rec;
387              
388             my $drec = Spreadsheet::Compare::Record->new(
389             diff_info => \%diff,
390             limit_mask => \@limit_mask,
391             rec => \@diff_rec,
392             side => 'diff',
393             side_name => 'diff',
394             h2i => $self->{_look}{h2i},
395 4786         31035 );
396              
397 4786         17530 $l->diff_info( \%diff );
398 4786         35128 $r->diff_info( \%diff );
399 4786         32073 $l->limit_mask( \@limit_mask );
400 4786         29155 $r->limit_mask( \@limit_mask );
401              
402 4786         27448 my $lrec = $l->rec;
403 4786         23414 my $rrec = $r->rec;
404              
405 4786 50   0   23068 $trace and TRACE "LREC", sub { Dump($lrec) };
  0         0  
406 4786 50   0   10379 $trace and TRACE "RREC", sub { Dump($rrec) };
  0         0  
407              
408 4786 100       11676 if ( $l->hash eq $r->hash ) {
409 474 50       1199 $debug and DEBUG "the record hashes are identical";
410 474         1233 @diff_rec = map { 'EQ_HASH' } @$lrec;
  36795         45896  
411 474         1263 @limit_mask = map { 0 } @$lrec;
  36795         43665  
412 474         1617 return $drec;
413             }
414              
415 4312         14889 my $i2h = $self->{_look}{i2h};
416 4312         8345 my $all_check = my $all_below = 0;
417 4312         14818 for my $idx ( 0 .. $#$lrec ) {
418              
419 271809         485861 my $key = $i2h->{$idx};
420              
421 271809         384341 $limit_mask[$idx] = 0;
422              
423 271809 100       470150 if ( $self->{_look}{ign}{$idx} ) {
424 14220         26066 $diff_rec[$idx] = 'IGNORED';
425 14220         23267 next;
426             }
427              
428 257589   50     481488 my $lorig = my $lval = $lrec->[$idx] // '';
429 257589   50     476133 my $rorig = my $rval = $rrec->[$idx] // '';
430              
431 257589 50       413068 if ( $self->{convert_numbers} ) {
432 0         0 $self->_convert_number($lval);
433 0         0 $self->_convert_number($rval);
434 0 0       0 $debug and DEBUG "$key: converted values >>$lval<< >>$rval<<";
435 0         0 $lrec->[$idx] = $lval;
436 0         0 $rrec->[$idx] = $rval;
437             }
438              
439 257589 100       433344 if ( $lorig eq $rorig ) {
440 246227 50       365477 $debug and DEBUG "$key: $lorig == $rorig in string comparison";
441 246227         353410 $diff_rec[$idx] = 'EQ_STR';
442 246227         378165 next;
443             }
444              
445 11362         20447 my $rxreal = $self->{_numerical_regex};
446 11362 50 33     30722 if ( $self->ignore_strings and $lorig !~ /$rxreal/ and $rorig !~ /$rxreal/ ) {
    100 33        
      66        
447 0 0       0 $debug and DEBUG "$key: skip string comparison because ignore_strings is set";
448 0         0 $diff_rec[$idx] = 'SKIP_STR';
449 0         0 next;
450             }
451             elsif ( $lorig !~ /$rxreal/ or $rorig !~ /$rxreal/ ) {
452 208 50       1609 $debug and DEBUG "$key: $lorig != $rorig in string comparison";
453 208         366 $diff_rec[$idx] = 'NEQ_STR';
454 208         308 $diff{equal} = 0;
455 208         290 $limit_mask[$idx] = 1;
456 208         367 next;
457             }
458              
459 14     14   228 no warnings qw/numeric/; ## no critic (ProhibitNoWarnings)
  14         38  
  14         18873  
460              
461 11154         189661 $self->_convert_number( $lval, 1 );
462 11154         26218 $self->_convert_number( $rval, 1 );
463              
464 11154 100       24133 if ( $lval == $rval ) {
465 3893 50       10353 $debug and DEBUG "$key: $lval == $rval in numerical comparison";
466 3893         11383 $diff_rec[$idx] = 'EQ_NUM';
467 3893         10877 next;
468             }
469              
470 7261 50       13716 $debug and DEBUG "$key: $lval != $rval in numerical comparison";
471              
472 7261         19244 my $limit_abs = $self->{limit_abs}{$key};
473 7261         16234 my $limit_rel = $self->{limit_rel}{$key};
474              
475 7261         15536 my $diff = abs( $rval - $lval );
476              
477 7261         15392 $diff_rec[$idx] = $diff;
478 7261         12054 $diff{equal} = 0;
479 7261         10392 $limit_mask[$idx] = 1;
480              
481 7261         10228 my $below = my $check = 0;
482 7261 50       15807 if ( $limit_abs ne 'none' ) {
483 7261         9800 $check++;
484 7261 100       18258 if ( $diff <= $limit_abs ) {
485 5968 50       10843 $debug and DEBUG "$key: diff $diff is below absolute limit ", $limit_abs;
486 5968         8270 $below++;
487             }
488 7261 100       20008 if ( $diff > $diff{ABS_VALUE} ) {
489 3463         6270 $diff{ABS_FIELD} = $key;
490 3463         7290 $diff{ABS_VALUE} = $diff;
491             }
492             }
493              
494 7261 100 100     28799 my $rdiff =
495             ( $rval == 0 or $lval == 0 )
496             ? 1
497             : ( $diff / abs($lval) );
498              
499 7261 50       17794 $diff_rec[$idx] = sprintf( '%.4f', $rdiff ) if $self->{diff_relative}{$key};
500              
501 7261 50       14913 if ( $limit_rel ne 'none' ) {
502 7261         11071 $check++;
503 7261 100       15182 if ( $rdiff <= $limit_rel ) {
504 6706 50       12682 $debug and DEBUG "$key: diff $rdiff is below relative limit ", $limit_rel;
505 6706         9313 $below++;
506             }
507 7261 100       16112 if ( $rdiff > $diff{REL_VALUE} ) {
508 4432         6899 $diff{REL_FIELD} = $key;
509 4432         6905 $diff{REL_VALUE} = $rdiff;
510             }
511             }
512              
513 7261 100 66     20914 if ( $check && ( $below == $check ) ) {
514 5956 50       12463 $debug and DEBUG "$key: mark as diff but below limit";
515 5956         8695 $limit_mask[$idx] = -1;
516             }
517              
518 7261         10052 $all_check += $check;
519 7261         15388 $all_below += $below;
520             }
521              
522 4312   100     18204 $diff{limit} = $all_check && ( $all_check == $all_below );
523              
524 4312         14871 return $drec;
525             }
526              
527              
528             # Try to find the best matching record on the right list and
529             # save the result in an internal lookup hash
530              
531 4771     4771   8224 sub _get_match ( $self, $lrec, $rrecs, $rindexes ) {
  4771         7860  
  4771         7411  
  4771         7495  
  4771         7659  
  4771         8558  
532              
533 4771         8331 my @found;
534 4771         7424 my $pos = 0;
535 4771         12460 for my $ridx (@$rindexes) {
536 4786         11980 my $rrec = $rrecs->[$ridx];
537              
538             # compare the records
539 4786         15297 my $diff_rec = $self->_compare_record( $lrec, $rrec );
540 4786         17529 my $limit_mask = $diff_rec->limit_mask;
541             my %found = (
542             ridx => $ridx,
543             drec => $diff_rec,
544             pos => $pos++,
545 308604         454894 below => scalar( grep { $_ == -1 } @$limit_mask ),
546 4786         33626 above => scalar( grep { $_ == 1 } @$limit_mask ),
  308604         466604  
547             );
548              
549 4786         16299 my $diff = $diff_rec->diff_info;
550 4786 100 66     39429 if ( $diff->{equal} or ( $diff->{limit} and $self->below_limit_is_equal ) ) {
      66        
551 2087 50       4310 $debug and DEBUG "found direct match";
552 2087         4838 @found = \%found;
553 2087         4785 last;
554             }
555             else {
556 2699         25087 push @found, \%found;
557             }
558             }
559              
560 4771 50       11996 return unless @found;
561              
562 4771 0       15686 my $match = ( sort { $a->{below} <=> $b->{below} || $a->{above} <=> $b->{above} } @found )[0];
  15         128  
563 4771         12860 splice( @$rindexes, $match->{pos}, 1 );
564 4771         12478 my $ridx = $match->{ridx};
565 4771         22225 $self->{_matched_right}{$ridx} = 1;
566              
567 4771         22015 return $rrecs->[$ridx], $match->{drec};
568             }
569              
570             # TODO: (issue) tests for individual limits
571              
572             # transform limit configuration into separate limits for each header column
573 18     18   46 sub _set_limits ( $self, $head ) {
  18         37  
  18         29  
  18         30  
574              
575 18         66 my $labs = $self->limit_abs;
576 18         116 my $lrel = $self->limit_rel;
577 18         131 my $def = config_defaults();
578              
579             my $limit_abs_def =
580             ref($labs)
581             ? ( $labs->{__default__} // $def->{limit_abs} )
582 18 100 33     109 : ( $labs // $def->{limit_abs} );
      33        
583              
584             my $limit_rel_def =
585             ref($lrel)
586             ? ( $lrel->{__default__} // $def->{limit_rel} )
587 18 100 33     82 : ( $lrel // $def->{limit_rel} );
      33        
588              
589 18 100       102 $self->limit_abs( $labs = {} ) if ref($labs) ne 'HASH';
590 18 100       166 $self->limit_rel( $lrel = {} ) if ref($lrel) ne 'HASH';
591 18         99 for my $key (@$head) {
592 521   100     1899 $labs->{$key} //= $limit_abs_def;
593 521   100     1519 $lrel->{$key} //= $limit_rel_def;
594             }
595              
596 18 50   0   52 $debug and DEBUG "absolute limits:", sub { Dump($labs) };
  0         0  
597 18 50   0   54 $debug and DEBUG "relative limits:", sub { Dump($lrel) };
  0         0  
598              
599 18         46 return 1;
600             }
601              
602              
603             # Converts $string to a numerical value. Unless $force is
604             # true conversion will only be performed if $string matches
605             # the internal representation for numerical strings (depending
606             # on the setings for decimal_separator and digital_grouping_symbol)
607 22308     22308   32451 sub _convert_number ( $self, $string, $force = '' ) { ## no critic (RequireArgUnpacking)
  22308         30369  
  22308         32354  
  22308         30623  
  22308         27029  
608 22308         33129 my $rir = $self->{_numerical_regex};
609 22308         33926 my $dgs = $self->{_dgs};
610 22308         32012 my $ds = $self->{_ds};
611              
612 22308 50 33     46648 return $self unless $force or $string =~ /^$rir$/;
613              
614 14     14   145 no warnings qw/numeric/; ## no critic (ProhibitNoWarnings)
  14         47  
  14         2554  
615 22308 100       37246 $_[1] =~ s/$dgs//g if $dgs;
616 22308 100 66     61240 $_[1] =~ s/$ds/\./ if $ds and $ds ne '\.';
617 22308         48805 $_[1] += 0;
618              
619 22308         34671 return $self;
620             }
621              
622              
623             1;
624              
625             =head1 NAME
626              
627             Spreadsheet::Compare::Single - Module for comparing two spreadsheet datasets
628              
629             =head1 SYNOPSIS
630              
631             use Spreadsheet::Compare::Single;
632             my $single = Spreadsheet::Compare::Single->new(%args);
633             my $result = $single->compare();
634              
635             =head1 DESCRIPTION
636              
637             Spreadsheet::Compare::Single analyses differences between two similar record sets
638             according to a defined configuration set.
639              
640             =head1 ATTRIBUTES
641              
642             All attributes return the object on setting and the current value if called without parameter.
643              
644             $single->attr($value);
645             my $value = $single->attr;
646              
647             They will usually be set by L<Spreadsheet::Compare>, after reading the values from a config file.
648              
649             =head2 allow_duplicates
650              
651             possible values: 0|1
652             default: 0
653              
654             Try to match identical records even when a unique identity cannot be
655             constructed. This can significantly increase compare times on large datasets.
656              
657             =head2 below_limit_is_equal
658              
659             possible values: 0|1
660             default: 0
661              
662             Normally differences that are inside configured limits will still be counted as
663             differences (only marked visually as low priority). Setting below_limit_is_equal
664             to a true value will result in the record counted as equal.
665              
666             =head2 convert_numbers
667              
668             possible values: 0|1
669             default: 0
670              
671             Convert content that is treated as a numerical value to an actual numeric
672             value (by simply adding 0). This is e.g. handy for having numerical data in
673             Excel output instead of strings that look like numbers. This will not affect
674             the optional 'All' report that can be created with the L<report_all_data> option.
675              
676             =head2 decimal_separator
677              
678             possible values: <string>
679             default: '.'
680              
681             Decimal separator for numerical values
682              
683             =head2 diff_relative
684              
685             possible values: <list of column names>
686             default: []
687              
688             Report the relative instead of the absolute difference if
689             L<Spreadsheet::Compare::Reporter/report_diff_row> is set to a true value.
690              
691             Example (as YAML config):
692              
693             diff_relative: [2,3,4]
694              
695             or
696              
697             diff_relative:
698             - Price
699             - Quantity
700              
701             =head2 digital_grouping_symbol
702              
703             possible values: <string>
704             default: ','
705              
706             Digital grouping symbol for numerical values
707              
708             =head2 fetch_size
709              
710             possible values: <integer>
711             default: 1000
712              
713             When L</is_sorted> is set, L</fetch_size> determines the number of records
714             fetched into memory at a time.
715              
716             =head2 fetch_limit
717              
718             possible values: <integer>
719             default: 0
720              
721             When L</is_sorted> is set, L</fetch_limit> determines the number of fetches
722             (of size L</fetch_size>) before the comparison stops. This is useful during
723             setup with large datasets where you may have columns that are different for
724             every row and that you better add to the ignore list. Just remember to unset
725             this value once you are done.
726              
727             =head2 ignore
728              
729             possible values: <list of columns>
730             default: empty list
731              
732             Columns to ignore while comparing data. If L<Spreadsheet::Compare::Reader/header> is set
733             the column names have to be used. Else use the zero based column number.
734              
735             =head2 ignore_strings
736              
737             possible values: 0|1
738             default: 0
739              
740             Only compare numerical data. This skips comparisons where both sides are not considered
741             to be numerical values. This depends on the setting for L</decimal_separator> and
742             L</digital_grouping_symbol>
743              
744             =head2 is_sorted
745              
746             possible values: 0|1
747             default: 0
748              
749             Assume data is sorted by identity. This is needed for fetching data in
750             smaller batches (see L</fetch_size>) to use less memory.
751              
752             =head2 left
753              
754             possible values: <string>
755             default: 'left'
756              
757             Name for the input on the left side of the comparison. Used for reporting.
758              
759             =head2 limit_abs
760              
761             possible values: <number or key/value pairs>
762             default: 0
763              
764             Single value or one entry per column for specifying absolute tolorance intervals.
765             Differences inside the tolerance interval will be counted and reported
766             separately from differences outside of it. The default value of 0 means no tolerance
767             limit, the value B<'none'> skips the limit check with the side effect that the deviation
768             will not be considered in statistics output (column with highest absolute deviation).
769             The special key B<'__default__'> can be used to set a default for all (numerical)
770             columns, and subsequently setting a different limit on selected columns.
771              
772             Example (as YAML config):
773              
774             limit_abs: 0.01
775              
776             or
777              
778             limit_abs:
779             __default__: 0.01
780             Price: 0.0001
781             Quantity: 1
782             Size: none
783              
784             =head2 limit_rel
785              
786             possible values: <number or keys/values>
787             default: undef
788              
789             Single value or one entry per column for specifying relative tolerance intervals
790             (decimal value, not a percentage). Differences inside the tolerance
791             interval will be counted and reported separately from differences outside of it.
792             The default value of 0 means no tolerance limit, the value B<'none'> skips the
793             limit check with the side effect that the deviation will not be considered in
794             statistics output (column with highest relative deviation). The special key
795             B<'__default__'> can be used to set a default for all (numerical) columns, and
796             subsequently setting a different limit on selected columns.
797              
798             limit_rel: 0.01
799              
800             or
801              
802             limit_rel:
803             __default__: 0.1
804             Price: 0.01
805             Quantity: 1
806             Size: none
807              
808              
809             =head2 readers
810              
811             This attribute cannot be set from a config file.
812              
813             possible values: <list of exactly 2 Reader objects>
814             default: []
815              
816             The readers have to be two objects of L<Spreadsheet::Compare::Reader> subclasses
817             representing the left and the right side of the comparison.
818              
819             =head2 report_all_data
820              
821             Add an additional output stream containing all data from both sides of the comparison.
822             This will slow down reporting on large datasets.
823              
824             =head2 right
825              
826             possible values: <string>
827             default: 'right'
828              
829             Name for the input on the right side of the comparison. Used for reporting.
830              
831             =head2 title
832              
833             possible values: <string>
834             default: ''
835              
836             A title for the comparison
837              
838             =head1 CONSTRUCTOR
839              
840             =head2 new
841              
842             my $single = Spreadsheet::Compare::Single->new(%attributes);
843             my $single = Spreadsheet::Compare::Single->new(\%attributes);
844              
845             or
846              
847             my $single = Spreadsheet::Compare::Single->new
848             ->title('Regression Test 1')
849             ->readers([$r_left, $r_right]);
850              
851             Construct a new Spreadsheet::Compare::Single object. All comparison attributes can be given to the
852             constructor or set individualy via their chainable set methods.
853              
854             =head1 METHODS
855              
856             =head2 compare
857              
858             Run all configured tests of the run configuration.
859             Return a hashref with counters:
860              
861             {
862             add => <number of additional records on the right>,
863             diff => <number of found differences>,
864             dup => <number of duplicate rows (maximum of left and right)>,
865             left => <number of records on the left>,
866             limit => <number of record with differences below set ste limits>,
867             miss => <number of records missing on the right>,
868             right => <number of records on the right>,
869             same => <number of identical records>,
870             }
871              
872             Before running compare the readers have to be set.
873              
874             =head1 EVENTS
875              
876             Spreadsheet::Compare::Single is a L<Mojo::EventEmitter>.
877              
878             The reporting events correspond to the methods that are implemented by
879             sublasses of L<Spreadsheet::Compare::Reporter>. L<Spreadsheet::Compare>
880             will subscribe to the events and call the methods.
881              
882             Spreadsheet::Compare::Single emits the following events
883              
884             =head2 add_stream
885              
886             $single->on(add_stream => sub ($obj, $name) {
887             say "new stream $name for ", $obj->title;
888             });
889              
890             Reporting event. Signaling that a new reporting stream should be created and will be later
891             referenced for reporting data lines.
892              
893             The possible stream names are 'Differences', 'Missing', 'Additional', 'Duplicates' and 'All'.
894              
895             =head2 after_fetch
896              
897             $single->on(after_fetch => sub ($obj) {
898             say "next fetch for ", $obj->title;
899             });
900              
901             Emitted directly after a fetch from the readers.
902              
903             =head2 counters
904              
905             require Data::Dumper;
906             $single->on(counters => sub ($obj, $counters) {
907             say "next fetch for ", $obj->title, ":", Dumper($counters);
908             });
909              
910             Emitted for every handled record. Don't rely on the numbers of calls, but on the content
911             of the %$counters hash if you want to know how many lines where actually read from the readers.
912             This can be used for progress reporting.
913              
914             =head2 final_counters
915              
916             require Data::Dumper;
917             $single->on(final_counters => sub ($obj, $counters) {
918             say "next fetch for $title:", Dumper($counters);
919             });
920              
921             Emitted after completing a single comparison.
922              
923             =head2 mark_header
924              
925             $single->on(mark_header => sub ($obj, 'Differences', $mask) {
926             # mark columns
927             });
928              
929             Reporting event. Emitted after completing a single comparison with a mask describing which columns
930             had differences (key:column_index, value:false/true)
931              
932             =head2 write_fmt_row
933              
934             $single->on(write_fmt_row => sub ($obj, 'Differences', $record) {
935             # write record to stream;
936             });
937              
938             Reporting event. Write a formatted record to the 'Differences' output stream
939             The record is an L<Spreadsheet::Compare::Record> and will contain information
940             about the differences found (see L<Spreadsheet::Compare::Record/limit_mask>).
941              
942             =head2 write_header
943              
944             $single->on(write_header => sub ($obj, $stream) {
945             # write header for stream;
946             });
947              
948             Reporting event. Write the header for an output stream.
949              
950             =head2 write_row
951              
952             $single->on(write_row => sub ($obj, $stream, $record) {
953             # write record to stream;
954             });
955              
956             Reporting event. Write a default formatted record to an output stream.
957             The record is an L<Spreadsheet::Compare::Record>
958              
959             =cut