File Coverage

blib/lib/Spreadsheet/Compare/Single.pm
Criterion Covered Total %
statement 302 334 90.4
branch 112 182 61.5
condition 59 93 63.4
subroutine 14 23 60.8
pod 1 3 33.3
total 488 635 76.8


line stmt bran cond sub pod time code
1             package Spreadsheet::Compare::Single;
2              
3 14     14   108 use Mojo::Base 'Mojo::EventEmitter', -signatures;
  14         33  
  14         112  
4 14     14   3890 use Spreadsheet::Compare::Common;
  14         176  
  14         135  
5 14     14   7504 use Spreadsheet::Compare::Record;
  14         42  
  14         172  
6              
7             #<<<
8             use Spreadsheet::Compare::Config {
9             allow_duplicates => 0,
10             below_limit_is_equal => 0,
11             convert_numbers => 0,
12             decimal_separator => '.',
13             diff_relative => [],
14             digital_grouping_symbol => '',
15             fetch_size => 1000,
16             fetch_limit => 0,
17             ignore => [],
18             ignore_strings => 0,
19             is_sorted => 0,
20             left => 'left',
21             limit_abs => 0,
22             limit_rel => 0,
23             _numerical_regex => sub { #<<<
24 18         396 my $ds = $_[0]->_ds;
25 18         104 my $dgs = $_[0]->_dgs;
26 18         470 return qr/
27             ^
28             [-+]?
29             [0-9$dgs]*
30             $ds?
31             [0-9]+
32             ([eE][-+]?[0-9]+)?
33             $
34             /x;
35             }, #>>>
36 14         411 readers => [],
37             report_all_data => 0,
38             report_diff_row => 0,
39             right => 'right',
40             title => '',
41 14     14   2526 }, protected => 1, make_attributes => 1;
  14         33  
42             #>>>
43 14     14 0 92 sub counter_names { return qw/left right same diff limit miss add dup/ }
44              
45             has _ds => sub { quotemeta( $_[0]->decimal_separator ) };
46             has _dgs => sub { quotemeta( $_[0]->digital_grouping_symbol ) };
47              
48             my( $debug, $trace );
49              
50 18     18 0 50 sub init ($self) {
  18         81  
  18         75  
51              
52 18 50       69 croak 'The ignore parameter has to be an array reference'
53             unless ref( my $ignore = $self->ignore ) eq 'ARRAY';
54 18 50       113 croak 'The diff_relative parameter has to be an array reference'
55             unless ref( my $diffr = $self->diff_relative ) eq 'ARRAY';
56              
57             # speed up logging
58 18         130 ( $trace, $debug ) = get_log_settings();
59              
60 18         319 $self->diff_relative( { map { $_ => 1 } @$diffr } );
  0         0  
61              
62             # make sure certain attributes are evaluated and available as hash values
63 18         188 $self->$_ for qw(
64             convert_numbers diff_relative _dgs _ds ignore
65             limit_abs limit_rel _numerical_regex
66             );
67              
68 18         53 return $self;
69             }
70              
71              
72 18     18 1 40 sub compare ($self) { ## no critic (ProhibitExcessComplexity)
  18         45  
  18         32  
73              
74 18         60 my $readers = $self->readers;
75 18 50 33     181 croak "readers is not an array ref" unless $readers and ref($readers) eq 'ARRAY';
76 18         56 for my $idx ( 0, 1 ) {
77 36         101 my $r = $readers->[$idx];
78 36 50 33     319 croak "invalid reader at index $idx" unless ref($r) and $r->isa('Spreadsheet::Compare::Reader');
79 36         152 $r->setup();
80             }
81              
82 18 50   0   62 $debug and DEBUG "Header left :", sub { Dump( $readers->[0]->header ) };
  0         0  
83 18 50   0   53 $debug and DEBUG "Header right:", sub { Dump( $readers->[1]->header ) };
  0         0  
84              
85             # create internal lookups
86 18         110 $self->{_look}{hdr} = $readers->[0]->header;
87 18         166 $self->{_look}{h2i} = my $h2i = $readers->[0]->h2i;
88 18         716 $self->{_look}{i2h} = { reverse %$h2i };
89 18 50       108 $self->{_look}{ign} = { map { defined( $h2i->{$_} ) ? ( $h2i->{$_} => 1 ) : () } $self->ignore->@* };
  25         153  
90              
91 18         202 $self->emit( '_after_reader_setup', $self->{_look} );
92 18         240 $self->_set_limits( $self->{_look}{hdr} );
93              
94             # shortcuts to the result arrays
95 18         97 my $list_l = $readers->[0]->result();
96 18         85 my $list_r = $readers->[1]->result();
97              
98 18 50       56 $trace and TRACE "Array ref left:", $list_l;
99 18 50       55 $trace and TRACE "Array ref right:", $list_r;
100              
101 18         70 my @streams = qw/Differences Missing Additional Duplicates/;
102 18 50       75 push @streams, 'All' if $self->report_all_data;
103              
104             # emit events for each stream, this will be subscribed by reporters
105 18         161 for my $name (@streams) {
106 72         523 $self->emit( 'add_stream', $name );
107 72         508 $self->emit( 'write_header', $name );
108             }
109              
110 18         140 my @diff_columns;
111              
112             # fetch a configured number of records possibly sorted by the identity column
113             # and compare everything that is alphanumerically smaller than the
114             # last fetched identity value
115 18 100       69 my $size = $self->is_sorted ? $self->fetch_size : ~0;
116 18         131 my $last_pass = 0;
117 18         40 my $fetches = 0;
118 18         170 my %count = qw/left 0 right 0 same 0 diff 0 miss 0 add 0 dup 0 limit 0/;
119 18         108 while ( $last_pass == 0 ) {
120              
121 79         2904 my $done = 1;
122 79         252 for my $r (@$readers) {
123 158         1127 my $fnbr = $r->fetch($size);
124 158         862 INFO "Fetched $fnbr records from ", $r->side;
125 158   100     3237 $done &&= $r->exhausted;
126             }
127              
128 79   100     452 my $limit_reached =
129             $self->is_sorted && $self->fetch_size && $self->fetch_limit && ( ++$fetches >= $self->fetch_limit );
130 79   100     1150 $last_pass = $done || $limit_reached;
131              
132 79         443 $self->emit('after_fetch');
133              
134             # TODO: (issue) solve contradiction of chunks <> partial fetches
135             # chunks are always complete but can be unsorted, last_id makes no sense in that case
136             # this will mess up duplicate counting
137             # partial fetches are always sorted
138              
139 79 100 66     1246 $last_pass = 1 if $readers->[0]->exhausted and $readers->[1]->exhausted;
140 79         802 INFO "last_pass:$last_pass";
141              
142             $trace and TRACE "Ids on the left:", sub {
143 0     0   0 Dump( [ map { $_->id } @$list_l ] );
  0         0  
144 79 50       710 };
145             $trace and TRACE "Ids on the right:", sub {
146 0     0   0 Dump( [ map { $_->id } @$list_r ] );
  0         0  
147 79 50       272 };
148              
149 79 50       462 my $last_id_l = @$list_l ? $list_l->[-1]->id : '';
150 79 50       494 my $last_id_r = @$list_r ? $list_r->[-1]->id : '';
151              
152 79 50       375 unless ( $self->allow_duplicates ) {
153             # check for duplicates in the id column and
154             # put them into the 'Duplicates' sheet
155              
156 79         746 for my $rec ( $self->_check_duplicates( $list_l, $readers->[0]->side )->@* ) {
157 18         216 $self->emit( 'write_row', 'Duplicates', $rec );
158             }
159              
160 79         714 for my $rec ( $self->_check_duplicates( $list_r, $readers->[1]->side )->@* ) {
161 18         265 $self->emit( 'write_row', 'Duplicates', $rec );
162             }
163              
164 79         459 $count{dup} = $self->{_dup_sum};
165             }
166              
167             # generate lookup hash by identity column
168 79         219 my %look_r;
169 79         372 for my $i ( 0 .. $#$list_r ) {
170 4780         21376 my $r = $list_r->[$i];
171 4780   100     8781 $look_r{ $r->id } //= [];
172 4780         34767 push $look_r{ $r->id }->@*, $i;
173             }
174              
175 79 50       609 $debug and DEBUG scalar( keys %look_r ), " unique ids found on the right";
176              
177             # pass 1: loop over all records on the left
178             # if a corresponding record on the right is found
179             # ==> calculate the diffs
180             # ==> write to the corresponding output stream
181             # ==> mark record on the right side as DONE
182 79 50       322 $debug and DEBUG "Processing ", scalar(@$list_l), " records on the left";
183 79         320 LEFT_ROW: while ( my $rec = shift @$list_l ) {
184              
185 4780 100       129624 $self->emit( 'counters', \%count ) if $count{left} % $self->fetch_size == 0;
186              
187 4780         39361 my $id = $rec->id;
188              
189 4780 50       32451 $debug and DEBUG "left row with ID >>$id<<";
190 4780 50       12894 $self->emit( 'write_row', 'All', $rec )
191             if $self->report_all_data;
192              
193             # compare only those records that are safe and will not
194             # be fetched later
195 4780 100 100     25735 if (
      66        
      66        
196             $self->is_sorted
197             and not $last_pass
198             and ( $id eq $last_id_l
199             or $id eq $last_id_r )
200             ) {
201 9 50       103 $debug and DEBUG "reached end of last fetch on the left side";
202 9         32 unshift @$list_l, $rec;
203 9 50       45 if ( !$self->allow_duplicates ) {
204 9         121 $self->{_dup_seen}{ $readers->[0]->side }{$id}--;
205             }
206 9         89 last LEFT_ROW;
207             }
208              
209 4771         30281 $count{left}++;
210              
211             # get the corresponding record on the right side
212 4771         20968 my( $cor, $diff_rec ) = $self->_get_match( $rec, $list_r, $look_r{$id} );
213              
214             # add to missing sheet if no corresponding record is found
215 4771 50       13578 unless ($cor) {
216              
217 0 0       0 $debug and DEBUG "Missing right row for ID >>$id<<";
218              
219 0         0 $self->emit( 'write_row', 'Missing', $rec );
220 0         0 $count{miss}++;
221              
222 0 0       0 $self->emit(
223             'write_row',
224             'All',
225             Spreadsheet::Compare::Record->new(
226             rec => [],
227             reader => $readers->[1],
228             )
229             ) if $self->report_all_data;
230              
231 0         0 next LEFT_ROW;
232             }
233              
234 4771 50       10369 $debug and DEBUG "found record on the right side";
235              
236 4771         11783 $count{right}++;
237              
238 4771 50       17662 $self->emit( 'write_row', 'All', $cor )
239             if $self->report_all_data;
240              
241 4771         26819 my $diff = $diff_rec->diff_info;
242              
243             # skip record if no difference is found
244 4771 100 66     26421 if (
      66        
245             $diff->{equal}
246             or ( $diff->{limit}
247             and $self->below_limit_is_equal )
248             ) {
249 2087 50       3773 $debug and DEBUG "the records are identical";
250 2087         4176 $count{same}++;
251 2087         37602 next LEFT_ROW;
252             }
253              
254 2684 50       15524 $debug and DEBUG "difference found";
255              
256 2684         6275 $count{diff}++;
257 2684 100       8079 $count{limit}++ if $diff->{limit};
258              
259 2684         6402 for my $i ( 0 .. $diff_rec->limit_mask->$#* ) {
260 182384 100       735806 $diff_columns[$i]++ if $diff_rec->limit_mask->[$i] > 0;
261             }
262              
263 2684         19415 $self->emit( 'write_fmt_row', 'Differences', $rec );
264              
265 2684         24565 $self->emit( 'write_fmt_row', 'Differences', $cor );
266              
267 2684 50       24616 $self->emit( 'write_fmt_row', 'Differences', $diff_rec )
268             if $self->report_diff_row;
269              
270             } # pass 1 LEFT_ROW
271              
272 79         1850 $self->emit( 'counters', \%count );
273              
274 79 50       1085 $debug and DEBUG scalar( keys $self->{_matched_right}->%* ), " matched records on the right";
275              
276             # use not already matched records from the right
277 79 100       775 @$list_r = map { $self->{_matched_right}{$_} ? () : $list_r->[$_] } 0 .. $#$list_r;
  4780         110509  
278              
279 79 50       1066 $debug and DEBUG scalar(@$list_r), " records left after pass 1";
280              
281             # pass 2: loop over all remaining entries on the right
282             # and add them to the additional sheet
283 79         356 RIGHT_ROW: while ( my $rec = shift @$list_r ) {
284              
285 9         69 my $id = $rec->{id};
286             # compare only those records that are safe and will not
287             # be fetched later
288 9 50 33     92 if (
      33        
289             not $last_pass
290             and ( $id eq $last_id_l
291             or $id eq $last_id_r )
292             ) {
293 9 50       34 $debug and DEBUG "reached end of last fetch on the right side";
294 9         33 unshift @$list_r, $rec;
295 9 50       61 if ( !$self->allow_duplicates ) {
296 9         117 $self->{_dup_seen}{ $readers->[1]->side }{$id}--;
297             }
298 9         79 last RIGHT_ROW;
299             }
300              
301 0 0       0 $debug and DEBUG "found additional record on the right side $id";
302              
303 0         0 $count{right}++;
304              
305 0 0       0 if ( $self->report_all_data ) {
306 0         0 $self->emit(
307             'write_row',
308             'All',
309             Spreadsheet::Compare::Record->new(
310             rec => [],
311             reader => $readers->[0],
312             )
313             );
314 0         0 $self->emit( 'write_row', 'All', $rec );
315             }
316              
317 0         0 $self->emit( 'write_row', 'Additional', $rec );
318              
319 0         0 $count{add}++;
320             } # pass 2 RIGHT_ROW
321              
322 79         402 $self->emit( 'counters', \%count );
323              
324             } # fetch data
325              
326 18         1449 $self->emit( 'mark_header', 'Differences', \@diff_columns );
327              
328 18         212 $self->emit( 'final_counters', \%count );
329              
330 18 50   0   120 $debug and DEBUG "Counters:", sub { Dump( \%count ) };
  0         0  
331              
332 18         111 return \%count;
333             }
334              
335              
336             # Check a list of record-hashes for duplicates in the field(s)
337             # specified by the identity. Return all affected records.
338 158     158   1209 sub _check_duplicates ( $self, $list, $side ) {
  158         336  
  158         329  
  158         263  
  158         279  
339              
340 158 50       468 $debug and DEBUG "checking duplicates";
341              
342 158         283 my @dup_list;
343              
344 158         454 for my $rec (@$list) {
345              
346 9560         20578 my $id = $rec->id;
347              
348 9560 50       31045 $trace and TRACE "id:$id";
349 9560 100       20577 if ( $self->{_dup_seen}{$side}{$id} ) {
350 36 50       113 $debug and DEBUG "duplicate for id=$id, side $side";
351 36         121 push @dup_list, $rec;
352             }
353 9560         25794 $self->{_dup_seen}{$side}{$id}++;
354             }
355              
356 158         410 for my $side (qw/left right/) {
357 316         976 $self->{_dup_side}{$side} = 0;
358 316         29366 for my $id ( keys $self->{_dup_seen}{$side}->%* ) {
359 103067 100       229073 $self->{_dup_side}{$side}++ if $self->{_dup_seen}{$side}{$id} > 1;
360             }
361             }
362              
363 158         1120 $self->{_dup_sum} = max( $self->{_dup_side}{$side}, $self->{_dup_side}{$side} );
364              
365 158         774 return ( \@dup_list );
366             }
367              
368              
369             # compare two records $l and $r:
370             # - ignore fields in the ign lookup
371             # - mark records as different if the absolute or relative limits are exceeded
372             # - convert strings to numbers if configured
373 4786     4786   7452 sub _compare_record ( $self, $l, $r ) { ## no critic (ProhibitExcessComplexity)
  4786         8499  
  4786         6782  
  4786         7057  
  4786         7384  
374              
375 4786         8409 state $diff_default = {
376             ABS_FIELD => '',
377             ABS_VALUE => '',
378             REL_FIELD => '',
379             REL_VALUE => '',
380             limit => 0,
381             equal => 1,
382             };
383              
384 4786         33354 my %diff = %$diff_default;
385 4786         11552 my @limit_mask;
386             my @diff_rec;
387              
388             my $drec = Spreadsheet::Compare::Record->new(
389             diff_info => \%diff,
390             limit_mask => \@limit_mask,
391             rec => \@diff_rec,
392             side => 'diff',
393             side_name => 'diff',
394             h2i => $self->{_look}{h2i},
395 4786         30425 );
396              
397 4786         15218 $l->diff_info( \%diff );
398 4786         36111 $r->diff_info( \%diff );
399 4786         35044 $l->limit_mask( \@limit_mask );
400 4786         28590 $r->limit_mask( \@limit_mask );
401              
402 4786         29173 my $lrec = $l->rec;
403 4786         23917 my $rrec = $r->rec;
404              
405 4786 50   0   25384 $trace and TRACE "LREC", sub { Dump($lrec) };
  0         0  
406 4786 50   0   10461 $trace and TRACE "RREC", sub { Dump($rrec) };
  0         0  
407              
408 4786 100       11325 if ( $l->hash eq $r->hash ) {
409 474 50       1607 $debug and DEBUG "the record hashes are identical";
410 474         1060 @diff_rec = map { 'EQ_HASH' } @$lrec;
  36795         51998  
411 474         1234 @limit_mask = map { 0 } @$lrec;
  36795         48303  
412 474         1762 return $drec;
413             }
414              
415 4312         14651 my $i2h = $self->{_look}{i2h};
416 4312         8927 my $all_check = my $all_below = 0;
417 4312         14767 for my $idx ( 0 .. $#$lrec ) {
418              
419 271809         478767 my $key = $i2h->{$idx};
420              
421 271809         400362 $limit_mask[$idx] = 0;
422              
423 271809 100       490670 if ( $self->{_look}{ign}{$idx} ) {
424 14220         24533 $diff_rec[$idx] = 'IGNORED';
425 14220         23944 next;
426             }
427              
428 257589   50     507008 my $lorig = my $lval = $lrec->[$idx] // '';
429 257589   50     492011 my $rorig = my $rval = $rrec->[$idx] // '';
430              
431 257589 50       434808 if ( $self->{convert_numbers} ) {
432 0         0 $self->_convert_number($lval);
433 0         0 $self->_convert_number($rval);
434 0 0       0 $debug and DEBUG "$key: converted values >>$lval<< >>$rval<<";
435 0         0 $lrec->[$idx] = $lval;
436 0         0 $rrec->[$idx] = $rval;
437             }
438              
439 257589 100       446930 if ( $lorig eq $rorig ) {
440 246227 50       394559 $debug and DEBUG "$key: $lorig == $rorig in string comparison";
441 246227         366466 $diff_rec[$idx] = 'EQ_STR';
442 246227         397275 next;
443             }
444              
445 11362         19465 my $rxreal = $self->{_numerical_regex};
446 11362 50 33     31909 if ( $self->ignore_strings and $lorig !~ /$rxreal/ and $rorig !~ /$rxreal/ ) {
    100 33        
      66        
447 0 0       0 $debug and DEBUG "$key: skip string comparison because ignore_strings is set";
448 0         0 $diff_rec[$idx] = 'SKIP_STR';
449 0         0 next;
450             }
451             elsif ( $lorig !~ /$rxreal/ or $rorig !~ /$rxreal/ ) {
452 208 50       1883 $debug and DEBUG "$key: $lorig != $rorig in string comparison";
453 208         401 $diff_rec[$idx] = 'NEQ_STR';
454 208         296 $diff{equal} = 0;
455 208         299 $limit_mask[$idx] = 1;
456 208         400 next;
457             }
458              
459 14     14   185 no warnings qw/numeric/; ## no critic (ProhibitNoWarnings)
  14         34  
  14         19177  
460              
461 11154         192856 $self->_convert_number( $lval, 1 );
462 11154         25319 $self->_convert_number( $rval, 1 );
463              
464 11154 100       24461 if ( $lval == $rval ) {
465 3893 50       10895 $debug and DEBUG "$key: $lval == $rval in numerical comparison";
466 3893         10798 $diff_rec[$idx] = 'EQ_NUM';
467 3893         10092 next;
468             }
469              
470 7261 50       13311 $debug and DEBUG "$key: $lval != $rval in numerical comparison";
471              
472 7261         18095 my $limit_abs = $self->{limit_abs}{$key};
473 7261         16130 my $limit_rel = $self->{limit_rel}{$key};
474              
475 7261         15002 my $diff = abs( $rval - $lval );
476              
477 7261         16746 $diff_rec[$idx] = $diff;
478 7261         12146 $diff{equal} = 0;
479 7261         11695 $limit_mask[$idx] = 1;
480              
481 7261         11248 my $below = my $check = 0;
482 7261 50       15697 if ( $limit_abs ne 'none' ) {
483 7261         10209 $check++;
484 7261 100       18828 if ( $diff <= $limit_abs ) {
485 5968 50       11458 $debug and DEBUG "$key: diff $diff is below absolute limit ", $limit_abs;
486 5968         9487 $below++;
487             }
488 7261 100       18909 if ( $diff > $diff{ABS_VALUE} ) {
489 3463         7040 $diff{ABS_FIELD} = $key;
490 3463         7434 $diff{ABS_VALUE} = $diff;
491             }
492             }
493              
494 7261 100 100     28821 my $rdiff =
495             ( $rval == 0 or $lval == 0 )
496             ? 1
497             : ( $diff / abs($lval) );
498              
499 7261 50       17186 $diff_rec[$idx] = sprintf( '%.4f', $rdiff ) if $self->{diff_relative}{$key};
500              
501 7261 50       14221 if ( $limit_rel ne 'none' ) {
502 7261         12118 $check++;
503 7261 100       17307 if ( $rdiff <= $limit_rel ) {
504 6706 50       12355 $debug and DEBUG "$key: diff $rdiff is below relative limit ", $limit_rel;
505 6706         9510 $below++;
506             }
507 7261 100       16425 if ( $rdiff > $diff{REL_VALUE} ) {
508 4432         7969 $diff{REL_FIELD} = $key;
509 4432         8813 $diff{REL_VALUE} = $rdiff;
510             }
511             }
512              
513 7261 100 66     23649 if ( $check && ( $below == $check ) ) {
514 5956 50       11008 $debug and DEBUG "$key: mark as diff but below limit";
515 5956         9027 $limit_mask[$idx] = -1;
516             }
517              
518 7261         10660 $all_check += $check;
519 7261         16617 $all_below += $below;
520             }
521              
522 4312   100     17237 $diff{limit} = $all_check && ( $all_check == $all_below );
523              
524 4312         14754 return $drec;
525             }
526              
527              
528             # Try to find the best matching record on the right list and
529             # save the result in an internal lookup hash
530              
531 4771     4771   8143 sub _get_match ( $self, $lrec, $rrecs, $rindexes ) {
  4771         8436  
  4771         6908  
  4771         8668  
  4771         8385  
  4771         7254  
532              
533 4771         7912 my @found;
534 4771         7800 my $pos = 0;
535 4771         12051 for my $ridx (@$rindexes) {
536 4786         10973 my $rrec = $rrecs->[$ridx];
537              
538             # compare the records
539 4786         12941 my $diff_rec = $self->_compare_record( $lrec, $rrec );
540 4786         18429 my $limit_mask = $diff_rec->limit_mask;
541             my %found = (
542             ridx => $ridx,
543             drec => $diff_rec,
544             pos => $pos++,
545 308604         478096 below => scalar( grep { $_ == -1 } @$limit_mask ),
546 4786         31178 above => scalar( grep { $_ == 1 } @$limit_mask ),
  308604         485115  
547             );
548              
549 4786         13224 my $diff = $diff_rec->diff_info;
550 4786 100 66     35748 if ( $diff->{equal} or ( $diff->{limit} and $self->below_limit_is_equal ) ) {
      66        
551 2087 50       4476 $debug and DEBUG "found direct match";
552 2087         4264 @found = \%found;
553 2087         4304 last;
554             }
555             else {
556 2699         22269 push @found, \%found;
557             }
558             }
559              
560 4771 50       11680 return unless @found;
561              
562 4771 0       14285 my $match = ( sort { $a->{below} <=> $b->{below} || $a->{above} <=> $b->{above} } @found )[0];
  15         138  
563 4771         12896 splice( @$rindexes, $match->{pos}, 1 );
564 4771         10607 my $ridx = $match->{ridx};
565 4771         19498 $self->{_matched_right}{$ridx} = 1;
566              
567 4771         21231 return $rrecs->[$ridx], $match->{drec};
568             }
569              
570             # TODO: (issue) tests for individual limits
571              
572             # transform limit configuration into separate limits for each header column
573 18     18   48 sub _set_limits ( $self, $head ) {
  18         41  
  18         33  
  18         28  
574              
575 18         59 my $labs = $self->limit_abs;
576 18         121 my $lrel = $self->limit_rel;
577 18         139 my $def = config_defaults();
578              
579             my $limit_abs_def =
580             ref($labs)
581             ? ( $labs->{__default__} // $def->{limit_abs} )
582 18 100 33     97 : ( $labs // $def->{limit_abs} );
      33        
583              
584             my $limit_rel_def =
585             ref($lrel)
586             ? ( $lrel->{__default__} // $def->{limit_rel} )
587 18 100 33     79 : ( $lrel // $def->{limit_rel} );
      33        
588              
589 18 100       97 $self->limit_abs( $labs = {} ) if ref($labs) ne 'HASH';
590 18 100       179 $self->limit_rel( $lrel = {} ) if ref($lrel) ne 'HASH';
591 18         107 for my $key (@$head) {
592 521   100     2124 $labs->{$key} //= $limit_abs_def;
593 521   100     1574 $lrel->{$key} //= $limit_rel_def;
594             }
595              
596 18 50   0   64 $debug and DEBUG "absolute limits:", sub { Dump($labs) };
  0         0  
597 18 50   0   55 $debug and DEBUG "relative limits:", sub { Dump($lrel) };
  0         0  
598              
599 18         44 return 1;
600             }
601              
602              
603             # Converts $string to a numerical value. Unless $force is
604             # true conversion will only be performed if $string matches
605             # the internal representation for numerical strings (depending
606             # on the setings for decimal_separator and digital_grouping_symbol)
607 22308     22308   32382 sub _convert_number ( $self, $string, $force = '' ) { ## no critic (RequireArgUnpacking)
  22308         31999  
  22308         34973  
  22308         31038  
  22308         27658  
608 22308         34455 my $rir = $self->{_numerical_regex};
609 22308         33300 my $dgs = $self->{_dgs};
610 22308         35054 my $ds = $self->{_ds};
611              
612 22308 50 33     46426 return $self unless $force or $string =~ /^$rir$/;
613              
614 14     14   145 no warnings qw/numeric/; ## no critic (ProhibitNoWarnings)
  14         36  
  14         2542  
615 22308 100       39094 $_[1] =~ s/$dgs//g if $dgs;
616 22308 100 66     67216 $_[1] =~ s/$ds/\./ if $ds and $ds ne '\.';
617 22308         49603 $_[1] += 0;
618              
619 22308         37367 return $self;
620             }
621              
622              
623             1;
624              
625             =head1 NAME
626              
627             Spreadsheet::Compare::Single - Module for comparing two spreadsheet datasets
628              
629             =head1 SYNOPSIS
630              
631             use Spreadsheet::Compare::Single;
632             my $single = Spreadsheet::Compare::Single->new(%args);
633             my $result = $single->compare();
634              
635             =head1 DESCRIPTION
636              
637             Spreadsheet::Compare::Single analyses differences between two similar record sets
638             according to a defined configuration set.
639              
640             =head1 ATTRIBUTES
641              
642             All attributes return the object on setting and the current value if called without parameter.
643              
644             $single->attr($value);
645             my $value = $single->attr;
646              
647             They will usually be set by L<Spreadsheet::Compare>, after reading the values from a config file.
648              
649             =head2 allow_duplicates
650              
651             possible values: 0|1
652             default: 0
653              
654             Try to match identical records even when a unique identity cannot be
655             constructed. This can significantly increase compare times on large datasets.
656              
657             =head2 below_limit_is_equal
658              
659             possible values: 0|1
660             default: 0
661              
662             Normally differences that are inside configured limits will still be counted as
663             differences (only marked visually as low priority). Setting below_limit_is_equal
664             to a true value will result in the record counted as equal.
665              
666             =head2 convert_numbers
667              
668             possible values: 0|1
669             default: 0
670              
671             Convert content that is treated as a numerical value to an actual numeric
672             value (by simply adding 0). This is e.g. handy for having numerical data in
673             Excel output instead of strings that look like numbers. This will not affect
674             the optional 'All' report that can be created with the L<report_all_data> option.
675              
676             =head2 decimal_separator
677              
678             possible values: <string>
679             default: '.'
680              
681             Decimal separator for numerical values
682              
683             =head2 diff_relative
684              
685             possible values: <list of column names>
686             default: []
687              
688             Report the relative instead of the absolute difference if
689             L<Spreadsheet::Compare::Reporter/report_diff_row> is set to a true value.
690              
691             Example (as YAML config):
692              
693             diff_relative: [2,3,4]
694              
695             or
696              
697             diff_relative:
698             - Price
699             - Quantity
700              
701             =head2 digital_grouping_symbol
702              
703             possible values: <string>
704             default: ','
705              
706             Digital grouping symbol for numerical values
707              
708             =head2 fetch_size
709              
710             possible values: <integer>
711             default: 1000
712              
713             When L</is_sorted> is set, L</fetch_size> determines the number of records
714             fetched into memory at a time.
715              
716             =head2 fetch_limit
717              
718             possible values: <integer>
719             default: 0
720              
721             When L</is_sorted> is set, L</fetch_limit> determines the number of fetches
722             (of size L</fetch_size>) before the comparison stops. This is useful during
723             setup with large datasets where you may have columns that are different for
724             every row and that you better add to the ignore list. Just remember to unset
725             this value once you are done.
726              
727             =head2 ignore
728              
729             possible values: <list of columns>
730             default: empty list
731              
732             Columns to ignore while comparing data. If L<Spreadsheet::Compare::Reader/header> is set
733             the column names have to be used. Else use the zero based column number.
734              
735             =head2 ignore_strings
736              
737             possible values: 0|1
738             default: 0
739              
740             Only compare numerical data. This skips comparisons where both sides are not considered
741             to be numerical values. This depends on the setting for L</decimal_separator> and
742             L</digital_grouping_symbol>
743              
744             =head2 is_sorted
745              
746             possible values: 0|1
747             default: 0
748              
749             Assume data is sorted by identity. This is needed for fetching data in
750             smaller batches (see L</fetch_size>) to use less memory.
751              
752             =head2 left
753              
754             possible values: <string>
755             default: 'left'
756              
757             Name for the input on the left side of the comparison. Used for reporting.
758              
759             =head2 limit_abs
760              
761             possible values: <number or key/value pairs>
762             default: 0
763              
764             Single value or one entry per column for specifying absolute tolorance intervals.
765             Differences inside the tolerance interval will be counted and reported
766             separately from differences outside of it. The default value of 0 means no tolerance
767             limit, the value B<'none'> skips the limit check with the side effect that the deviation
768             will not be considered in statistics output (column with highest absolute deviation).
769             The special key B<'__default__'> can be used to set a default for all (numerical)
770             columns, and subsequently setting a different limit on selected columns.
771              
772             Example (as YAML config):
773              
774             limit_abs: 0.01
775              
776             or
777              
778             limit_abs:
779             __default__: 0.01
780             Price: 0.0001
781             Quantity: 1
782             Size: none
783              
784             =head2 limit_rel
785              
786             possible values: <number or keys/values>
787             default: undef
788              
789             Single value or one entry per column for specifying relative tolerance intervals
790             (decimal value, not a percentage). Differences inside the tolerance
791             interval will be counted and reported separately from differences outside of it.
792             The default value of 0 means no tolerance limit, the value B<'none'> skips the
793             limit check with the side effect that the deviation will not be considered in
794             statistics output (column with highest relative deviation). The special key
795             B<'__default__'> can be used to set a default for all (numerical) columns, and
796             subsequently setting a different limit on selected columns.
797              
798             limit_rel: 0.01
799              
800             or
801              
802             limit_rel:
803             __default__: 0.1
804             Price: 0.01
805             Quantity: 1
806             Size: none
807              
808              
809             =head2 readers
810              
811             This attribute cannot be set from a config file.
812              
813             possible values: <list of exactly 2 Reader objects>
814             default: []
815              
816             The readers have to be two objects of L<Spreadsheet::Compare::Reader> subclasses
817             representing the left and the right side of the comparison.
818              
819             =head2 right
820              
821             possible values: <string>
822             default: 'right'
823              
824             Name for the input on the right side of the comparison. Used for reporting.
825              
826             =head2 title
827              
828             possible values: <string>
829             default: ''
830              
831             A title for the comparison
832              
833             =head1 CONSTRUCTOR
834              
835             =head2 new
836              
837             my $single = Spreadsheet::Compare::Single->new(%attributes);
838             my $single = Spreadsheet::Compare::Single->new(\%attributes);
839              
840             or
841              
842             my $single = Spreadsheet::Compare::Single->new
843             ->title('Regression Test 1')
844             ->readers([$r_left, $r_right]);
845              
846             Construct a new Spreadsheet::Compare::Single object. All comparison attributes can be given to the
847             constructor or set individualy via their chainable set methods.
848              
849             =head1 METHODS
850              
851             =head2 compare
852              
853             Run all configured tests of the run configuration.
854             Return a hashref with counters:
855              
856             {
857             add => <number of additional records on the right>,
858             diff => <number of found differences>,
859             dup => <number of duplicate rows (maximum of left and right)>,
860             left => <number of records on the left>,
861             limit => <number of record with differences below set ste limits>,
862             miss => <number of records missing on the right>,
863             right => <number of records on the right>,
864             same => <number of identical records>,
865             }
866              
867             Before running compare the readers have to be set.
868              
869             =head1 EVENTS
870              
871             Spreadsheet::Compare::Single is a L<Mojo::EventEmitter>.
872              
873             The reporting events correspond to the methods that are implemented by
874             sublasses of L<Spreadsheet::Compare::Reporter>. L<Spreadsheet::Compare>
875             will subscribe to the events and call the methods.
876              
877             Spreadsheet::Compare::Single emits the following events
878              
879             =head2 add_stream
880              
881             $single->on(add_stream => sub ($obj, $name) {
882             say "new stream $name for ", $obj->title;
883             });
884              
885             Reporting event. Signaling that a new reporting stream should be created and will be later
886             referenced for reporting data lines.
887              
888             The possible stream names are 'Differences', 'Missing', 'Additional', 'Duplicates' and 'All'.
889              
890             =head2 after_fetch
891              
892             $single->on(after_fetch => sub ($obj) {
893             say "next fetch for ", $obj->title;
894             });
895              
896             Emitted directly after a fetch from the readers.
897              
898             =head2 counters
899              
900             require Data::Dumper;
901             $single->on(counters => sub ($obj, $counters) {
902             say "next fetch for ", $obj->title, ":", Dumper($counters);
903             });
904              
905             Emitted for every handled record. Don't rely on the numbers of calls, but on the content
906             of the %$counters hash if you want to know how many lines where actually read from the readers.
907             This can be used for progress reporting.
908              
909             =head2 final_counters
910              
911             require Data::Dumper;
912             $single->on(final_counters => sub ($obj, $counters) {
913             say "next fetch for $title:", Dumper($counters);
914             });
915              
916             Emitted after completing a single comparison.
917              
918             =head2 mark_header
919              
920             $single->on(mark_header => sub ($obj, 'Differences', $mask) {
921             # mark columns
922             });
923              
924             Reporting event. Emitted after completing a single comparison with a mask describing which columns
925             had differences (key:column_index, value:false/true)
926              
927             =head2 write_fmt_row
928              
929             $single->on(write_fmt_row => sub ($obj, 'Differences', $record) {
930             # write record to stream;
931             });
932              
933             Reporting event. Write a formatted record to the 'Differences' output stream
934             The record is an L<Spreadsheet::Compare::Record> and will contain information
935             about the differences found (see L<Spreadsheet::Compare::Record/limit_mask>).
936              
937             =head2 write_header
938              
939             $single->on(write_header => sub ($obj, $stream) {
940             # write header for stream;
941             });
942              
943             Reporting event. Write the header for an output stream.
944              
945             =head2 write_row
946              
947             $single->on(write_row => sub ($obj, $stream, $record) {
948             # write record to stream;
949             });
950              
951             Reporting event. Write a default formatted record to an output stream.
952             The record is an L<Spreadsheet::Compare::Record>
953              
954             =cut