File Coverage

blib/lib/Struct/Diff.pm

Criterion	Covered	Total	%
statement	232	232	100.0
branch	150	150	100.0
condition	39	39	100.0
subroutine	15	15	100.0
pod	5	5	100.0
total	441	441	100.0

line	stmt	bran	cond	sub	pod	time	code
1							package Struct::Diff;
2
3	15			15		860990	use 5.006;
	15					144
4	15			15		85	use strict;
	15					26
	15					369
5	15			15		75	use warnings FATAL => 'all';
	15					37
	15					568
6	15			15		2263	use parent qw(Exporter);
	15					1459
	15					80
7
8	15			15		9782	use Algorithm::Diff qw(LCSidx);
	15					76735
	15					1050
9	15			15		143	use Carp qw(croak);
	15					32
	15					727
10	15			15		103	use Scalar::Util qw(looks_like_number);
	15					31
	15					822
11	15			15		8954	use Storable 2.05 qw(freeze);
	15					45558
	15					36316
12
13							our @EXPORT_OK = qw(
14							diff
15							list_diff
16							patch
17							split_diff
18							valid_diff
19							);
20
21							=head1 NAME
22
23							Struct::Diff - Recursive diff for nested perl structures
24
25							=begin html
26
27
28
29
30
31							=end html
32
33							=head1 VERSION
34
35							Version 0.97
36
37							=cut
38
39							our $VERSION = '0.97';
40
41							=head1 SYNOPSIS
42
43							use Struct::Diff qw(diff list_diff split_diff patch valid_diff);
44
45							$x = {one => [1,{two => 2}]};
46							$y = {one => [1,{two => 9}],three => 3};
47
48							$diff = diff($x, $y, noO => 1, noU => 1); # omit unchanged items and old values
49							# $diff == {D => {one => {D => [{D => {two => {N => 9}},I => 1}]},three => {A => 3}}}
50
51							@list_diff = list_diff($diff); # list (path and ref pairs) all diff entries
52							# @list_diff == ({K => ['one']},[1],{K => ['two']}],\{N => 9},[{K => ['three']}],\{A => 3})
53
54							$splitted = split_diff($diff);
55							# $splitted->{a} # does not exist
56							# $splitted->{b} == {one => [{two => 9}],three => 3}
57
58							patch($x, $diff); # $x now equal to $y by structure and data
59
60							@errors = valid_diff($diff);
61
62							=head1 EXPORT
63
64							Nothing is exported by default.
65
66							=head1 DIFF FORMAT
67
68							Diff is simply a HASH whose keys shows status for each item in passed
69							structures. Every status type (except C) may be omitted during the diff
70							calculation. Disabling some or other types produce different diffs: diff with
71							only unchanged items is also possible (when all other types disabled).
72
73							=over 4
74
75							=item A
76
77							Stands for 'added' (exist only in second structure), it's value - added item.
78
79							=item D
80
81							Means 'different' and contains subdiff. The only status type which can't be
82							disabled.
83
84							=item I
85
86							Index for array item, used only when prior item was omitted.
87
88							=item N
89
90							Is a new value for changed item.
91
92							=item O
93
94							Alike C, C is a changed item's old value.
95
96							=item R
97
98							Similar for C, but for removed items.
99
100							=item U
101
102							Represent unchanged items.
103
104							=back
105
106							Diff format: metadata alternates with data and, as a result, diff may
107							represent any structure of any data types. Simple types specified as is,
108							arrays and hashes contain subdiffs for their items with native for such types
109							addressing: indexes for arrays and keys for hashes.
110
111							Sample:
112
113							old: {one => [5,7]}
114							new: {one => [5],two => 2}
115							opts: {noU => 1} # omit unchanged items
116
117							diff:
118							{D => {one => {D => [{I => 1,R => 7}]},two => {A => 2}}}
119							\|\| \| \| \|\| \|\|\| \| \| \| \| \|\| \|
120							\|\| \| \| \|\| \|\|\| \| \| \| \| \|\| +- with value 2
121							\|\| \| \| \|\| \|\|\| \| \| \| \| \|+- key 'two' was added (A)
122							\|\| \| \| \|\| \|\|\| \| \| \| \| +- subdiff for it
123							\|\| \| \| \|\| \|\|\| \| \| \| +- another key from top-level hash
124							\|\| \| \| \|\| \|\|\| \| \| +- what it was (item's value: 7)
125							\|\| \| \| \|\| \|\|\| \| +- what happened to item (R - removed)
126							\|\| \| \| \|\| \|\|\| +- array item's actual index
127							\|\| \| \| \|\| \|\|+- prior item was omitted
128							\|\| \| \| \|\| \|+- subdiff for array item
129							\|\| \| \| \|\| +- it's value - ARRAY
130							\|\| \| \| \|+- it is deeply changed
131							\|\| \| \| +- subdiff for key 'one'
132							\|\| \| +- it has key 'one'
133							\|\| +- top-level thing is a HASH
134							\|+- changes somewhere deeply inside
135							+- diff is always a HASH
136
137							=head1 SUBROUTINES
138
139							=head2 diff
140
141							Returns recursive diff for two passed things.
142
143							$diff = diff($x, $y, %opts);
144							$patch = diff($x, $y, noU => 1, noO => 1, trimR => 1); # smallest diff
145
146							Beware changing diff: it's parts are references to substructures of passed
147							arguments.
148
149							=head3 Options
150
151							=over 4
152
153							=item freezer C<< _>>
154
155							Serializer callback (redefines default serializer). L is used
156							by default, see L for details.
157
158							=item noX C<< >>
159
160							Where X is a status (C, C, C, C, C); such status will be
161							omitted.
162
163							=item trimR C<< >>
164
165							Drop removed item's data.
166
167							=back
168
169							=cut
170
171							our $FREEZER = sub {
172							local $Storable::canonical = 1; # for equal snapshots for equal by data hashes
173							local $Storable::Deparse = 1; # for coderefs
174
175							freeze \$_[0];
176							};
177
178							sub diff($$;@) {
179	147			147	1	295567	my ($x, $y, %opts) = @_;
180
181	147	100				547	$opts{freezer} = $FREEZER unless (exists $opts{freezer});
182
183	147					472	_diff($x, $y, %opts);
184							}
185
186							sub _diff($$;@);
187							sub _diff($$;@) {
188	264			264		721	my ($x, $y, %opts) = @_;
189
190	264					468	my $d = {};
191	264					505	my $type = ref $x;
192
193	264	100	100			1920	if ($type ne ref $y) {
		100	100
		100	100
		100	100
194	10	100				78	$d->{O} = $x unless ($opts{noO});
195	10	100				32	$d->{N} = $y unless ($opts{noN});
196							} elsif ($type eq 'ARRAY' and $x != $y) {
197	86					247	my ($lcs, $stat) = _lcs_diff($x, $y, $opts{freezer});
198
199	86	100				162	if ($stat->{U} * 3 == @{$lcs}) {
	86					180
200	5	100				22	$d->{U} = $y unless ($opts{noU});
201							} else {
202	81					168	my ($I, $xi, $yi, $op, $sd) = 0;
203
204	81					123	while (@{$lcs}) {
	319					679
205	238					323	($op, $xi, $yi) = splice @{$lcs}, 0, 3;
	238					451
206
207	238	100				591	if ($op eq 'U') {
		100
		100
208	110	100				232	if ($opts{noU}) { $I++; next }
	44					63
	44					68
209	66					90	push @{$d->{D}}, { U => $y->[$yi] };
	66					191
210							} elsif ($op eq 'D') {
211	67					301	$sd = _diff($x->[$xi], $y->[$yi], %opts);
212	67	100				116	unless (keys %{$sd}) { $I++; next }
	67					198
	6					8
	6					11
213	61					90	push @{$d->{D}}, $sd;
	61					126
214							} elsif ($op eq 'A') {
215	32	100				100	if ($opts{noA}) { $I++; next }
	6					14
	6					9
216	26					50	push @{$d->{D}}, { A => $y->[$yi] };
	26					75
217							} else {
218	29	100				69	if ($opts{noR}) { $I++; next }
	4					9
	4					8
219	25	100				36	push @{$d->{D}}, { R => $opts{trimR} ? undef : $x->[$xi] };
	25					96
220							}
221
222	178	100				357	if ($I) {
223	32					60	$d->{D}->[-1]->{I} = $xi;
224	32					50	$I = 0;
225							}
226							}
227							}
228							} elsif ($type eq 'HASH' and $x != $y) {
229	65					114	my @keys = keys %{{ %{$x}, %{$y} }}; # uniq keys for both hashes
	65					87
	65					142
	65					266
230	65	100				207	return $opts{noU} ? {} : { U => {} } unless (@keys);
		100
231
232	63					131	for my $k (@keys) {
233	105	100	100			373	if (exists $x->{$k} and exists $y->{$k}) {
		100
234	75	100				175	if ($opts{freezer}($x->{$k}) eq $opts{freezer}($y->{$k})) {
235	25	100				606	$d->{U}->{$k} = $y->{$k} unless ($opts{noU});
236							} else {
237	50					1416	my $sd = _diff($x->{$k}, $y->{$k}, %opts);
238	50	100				77	$d->{D}->{$k} = $sd if (keys %{$sd});
	50					200
239							}
240							} elsif (exists $x->{$k}) {
241							$d->{D}->{$k}->{R} = $opts{trimR} ? undef : $x->{$k}
242	17	100				70	unless ($opts{noR});
		100
243							} else {
244	13	100				48	$d->{D}->{$k}->{A} = $y->{$k} unless ($opts{noA});
245							}
246							}
247
248	63	100	100			179	if (exists $d->{U} and exists $d->{D}) {
249	12					20	map { $d->{D}->{$_}->{U} = $d->{U}->{$_} } keys %{$d->{U}};
	15					46
	12					37
250	12					34	delete $d->{U};
251							}
252							} elsif ($type && $x == $y \|\| $opts{freezer}($x) eq $opts{freezer}($y)) {
253	18	100				1204	$d->{U} = $x unless ($opts{noU});
254							} else {
255	85	100				3513	$d->{O} = $x unless ($opts{noO});
256	85	100				234	$d->{N} = $y unless ($opts{noN});
257							}
258
259	262					956	return $d;
260							}
261
262							sub _lcs_diff {
263	86			86		285	my ($xm, $ym) = LCSidx(@_);
264	86					4817	my ($xi, $yi, @diff, %stat) = (0, 0);
265
266							# additional unchanged items to collect trailing non-matched
267	86					134	push @{$xm}, scalar @{$_[0]};
	86					141
	86					163
268	86					122	push @{$ym}, scalar @{$_[1]};
	86					130
	86					136
269
270	86					141	while (@{$xm}) {
	413					792
271	327	100	100			1255	if ($xi == $xm->[0] and $yi == $ym->[0]) {
		100	100
		100
272	199					306	push @diff, 'U', shift @{$xm}, shift @{$ym};
	199					287
	199					400
273	199					277	$xi++; $yi++;
	199					260
274	199					337	$stat{U}++;
275							} elsif ($xi < $xm->[0] and $yi < $ym->[0]) {
276	67					162	push @diff, 'D', $xi++, $yi++;
277	67					132	$stat{N}++;
278							} elsif ($xi < $xm->[0]) {
279	29					83	push @diff, 'R', $xi++, $yi;
280	29					59	$stat{R}++;
281							} else {
282	32					93	push @diff, 'A', $xi, $yi++;
283	32					63	$stat{A}++;
284							}
285							}
286
287	86	100				241	$stat{O} = $stat{N} if (exists $stat{N});
288
289							# remove added above trailing item
290	86					176	splice @diff, -3, 3;
291	86					139	$stat{U}--;
292
293	86					274	return \@diff, \%stat;
294							}
295
296							=head2 list_diff
297
298							List all pairs (path-to-subdiff, ref-to-subdiff) for provided diff. See
299							L for path format specification.
300
301							@list = list_diff($diff);
302
303							=head3 Options
304
305							=over 4
306
307							=item depth C<< >>
308
309							Don't dive deeper than defined number of levels; C used by default
310							(unlimited).
311
312							=item sort C<< >>
313
314							Defines how to handle hash subdiffs. Keys will be picked randomly (default
315							C behavior), sorted by provided subroutine (if value is a coderef) or
316							lexically sorted if set to some other true value.
317
318							=back
319
320							=cut
321
322							sub list_diff($;@) {
323	8			8	1	22	my @stack = ([], \shift); # init: (path, diff)
324	8					21	my %opts = @_;
325	8					12	my ($diff, @list, $path, $I);
326
327	8					17	while (@stack) {
328	36					66	($path, $diff) = splice @stack, -2, 2;
329
330	36	100	100			45	if (!exists ${$diff}->{D} or $opts{depth} and @{$path} >= $opts{depth}) {
	36	100	100			105
	5					15
331	18					44	unshift @list, $path, $diff;
332	18					41	} elsif (ref ${$diff}->{D} eq 'ARRAY') {
333	8					10	$I = 0;
334	8					13	for (@{${$diff}->{D}}) {
	8					9
	8					17
335	14	100				27	$I = $_->{I} if (exists $_->{I}); # use provided index
336	14					17	push @stack, [@{$path}, [$I]], \$_;
	14					28
337	14					25	$I++;
338							}
339							} else { # HASH
340							map {
341	14					33	push @stack, [@{$path}, {K => [$_]}], \${$diff}->{D}->{$_}
	14					37
	14					42
342							} $opts{sort}
343							? ref $opts{sort} eq 'CODE'
344	1					2	? $opts{sort}(keys %{${$diff}->{D}})
	1					4
345	1					1	: sort keys %{${$diff}->{D}}
	1					7
346	10	100				26	: keys %{${$diff}->{D}};
	8	100				10
	8					22
347							}
348							}
349
350	8					46	return @list;
351							}
352
353							=head2 split_diff
354
355							Divide diff to pseudo original structures.
356
357							$structs = split_diff(diff($x, $y));
358							# $structs->{a}: items from $x
359							# $structs->{b}: items from $y
360
361							=cut
362
363							sub split_diff($);
364							sub split_diff($) {
365	57			57	1	364	my $d = $_[0];
366	57					74	my (%out, $sd);
367
368	57	100				128	if (exists $d->{D}) {
		100
		100
		100
369	14	100				27	if (ref $d->{D} eq 'ARRAY') {
370	8					11	for (@{$d->{D}}) {
	8					16
371	20					38	$sd = split_diff($_);
372	20	100				40	push @{$out{a}}, $sd->{a} if (exists $sd->{a});
	19					35
373	20	100				38	push @{$out{b}}, $sd->{b} if (exists $sd->{b});
	19					34
374							}
375							} else { # HASH
376	6					8	for (keys %{$d->{D}}) {
	6					16
377	27					43	$sd = split_diff($d->{D}->{$_});
378	27	100				60	$out{a}->{$_} = $sd->{a} if (exists $sd->{a});
379	27	100				57	$out{b}->{$_} = $sd->{b} if (exists $sd->{b});
380							}
381							}
382							} elsif (exists $d->{U}) {
383	14					28	$out{a} = $out{b} = $d->{U};
384							} elsif (exists $d->{A}) {
385	6					9	$out{b} = $d->{A};
386							} elsif (exists $d->{R}) {
387	6					11	$out{a} = $d->{R};
388							} else {
389	17	100				40	$out{b} = $d->{N} if (exists $d->{N});
390	17	100				34	$out{a} = $d->{O} if (exists $d->{O});
391							}
392
393	57					126	return \%out;
394							}
395
396							=head2 patch
397
398							Apply diff.
399
400							patch($target, $diff);
401
402							=cut
403
404							sub patch($$) {
405	112			112	1	478855	my @stack = (\$_[0], $_[1]); # ref to alias - to be able to change passed scalar
406
407	112					400	while (@stack) {
408	169					425	my ($s, $d) = splice @stack, 0, 2; # struct, subdiff
409
410	169	100				616	if (exists $d->{D}) {
		100
411	83	100				108	croak "Structure does not match" unless (ref ${$s} eq ref $d->{D});
	83					505
412
413	82	100				204	if (ref $d->{D} eq 'ARRAY') {
414	50					90	my ($i, $j) = (0, 0); # target array idx, jitter
415
416	50					78	for (@{$d->{D}}) {
	50					115
417	122	100				236	$i = $_->{I} + $j if (exists $_->{I});
418
419	122	100	100			490	if (exists $_->{D} or exists $_->{N}) {
		100
		100
420	37					68	push @stack, \${$s}->[$i], $_;
	37					81
421							} elsif (exists $_->{A}) {
422	25					39	splice @{${$s}}, $i, 0, $_->{A};
	25					35
	25					59
423	25					46	$j++;
424							} elsif (exists $_->{R}) {
425	24					39	splice @{${$s}}, $i, 1;
	24					32
	24					49
426	24					45	$j--;
427	24					72	next; # don't increment $i
428							}
429
430	98					203	$i++;
431							}
432							} else { # HASH
433	32					44	while (my ($k, $v) = each %{$d->{D}}) {
	74					277
434	42	100	100			171	if (exists $v->{D} or exists $v->{N}) {
		100
		100
435	20					36	push @stack, \${$s}->{$k}, $v;
	20					52
436							} elsif (exists $v->{A}) {
437	6					12	${$s}->{$k} = $v->{A};
	6					16
438							} elsif (exists $v->{R}) {
439	10					12	delete ${$s}->{$k};
	10					27
440							}
441							}
442							}
443							} elsif (exists $d->{N}) {
444	61					114	${$s} = $d->{N};
	61					221
445							}
446							}
447							}
448
449							=head2 valid_diff
450
451							Validate diff structure. In scalar context returns C<1> for valid diff,
452							C otherwise. In list context returns list of pairs (path, type) for
453							each error. See L for path format
454							specification.
455
456							@errors_list = valid_diff($diff); # list context
457
458							or
459
460							$is_valid = valid_diff($diff); # scalar context
461
462							=cut
463
464							sub valid_diff($) {
465	143			143	1	566450	my @stack = ([], shift); # (path, diff)
466	143					326	my ($diff, @errs, $path);
467
468	143					425	while (@stack) {
469	342					697	($path, $diff) = splice @stack, 0, 2;
470
471	342	100				888	unless (ref $diff eq 'HASH') {
472	5	100				17	return undef unless wantarray;
473	4					9	push @errs, $path, 'BAD_DIFF_TYPE';
474	4					10	next;
475							}
476
477	337	100				745	if (exists $diff->{D}) {
478	104	100				288	if (ref $diff->{D} eq 'ARRAY') {
		100
479							map {
480	153					206	unshift @stack, [@{$path}, [$_]], $diff->{D}->[$_]
	153					436
481	62					106	} 0 .. $#{$diff->{D}};
	62					165
482							} elsif (ref $diff->{D} eq 'HASH') {
483							map {
484	46					74	unshift @stack, [@{$path}, {K => [$_]}], $diff->{D}->{$_}
	46					190
485	35					64	} sort keys %{$diff->{D}};
	35					125
486							} else {
487	7	100				22	return undef unless wantarray;
488	6					13	unshift @errs, $path, 'BAD_D_TYPE';
489							}
490							}
491
492	336	100				923	if (exists $diff->{I}) {
493	37	100	100			228	if (!looks_like_number($diff->{I}) or int($diff->{I}) != $diff->{I}) {
494	6	100				19	return undef unless wantarray;
495	5					13	unshift @errs, $path, 'BAD_I_TYPE';
496							}
497
498	36	100				55	if (keys %{$diff} < 2) {
	36					181
499	2	100				10	return undef unless wantarray;
500	1					4	unshift @errs, $path, 'LONESOME_I';
501							}
502							}
503							}
504
505	139	100				691	return wantarray ? @errs : 1;
506							}
507
508							=head1 CONFIGURATION VARIABLES
509
510							=over 4
511
512							=item $Struct::Diff::FREEZER
513
514							Contains reference to default serialization function (C rely on it
515							to determine data equivalency). L with enabled
516							C<$Storable::canonical> and C<$Storable::Deparse> opts used by default.
517
518							L is suitable for structures with regular expressions:
519
520							use Data::Dumper;
521
522							$Struct::Diff::FREEZER = sub {
523							local $Data::Dumper::Deparse = 1;
524							local $Data::Dumper::Sortkeys = 1;
525							local $Data::Dumper::Terse = 1;
526
527							return Dumper @_;
528							}
529
530							But comparing to L it has two another issues: speed and unability
531							to distinguish numbers from their string representations.
532
533							=back
534
535							=head1 LIMITATIONS
536
537							Only arrays and hashes traversed. All other types compared by reference
538							addresses and serialized content.
539
540							L (serializer used by default) will fail serializing compiled
541							regexps, so, consider to use other serializer if data contains regular
542							expressions. See L for details.
543
544							Struct::Diff will fail on structures with loops in references;
545							C from L can help to detect such
546							structures.
547
548							=head1 AUTHOR
549
550							Michael Samoglyadov, C<< >>
551
552							=head1 BUGS
553
554							Please report any bugs or feature requests to
555							C, or through the web interface at
556							L. I will be
557							notified, and then you'll automatically be notified of progress on your bug as
558							I make changes.
559
560							=head1 SUPPORT
561
562							You can find documentation for this module with the perldoc command.
563
564							perldoc Struct::Diff
565
566							You can also look for information at:
567
568							=over 4
569
570							=item * RT: CPAN's request tracker (report bugs here)
571
572							L
573
574							=item * AnnoCPAN: Annotated CPAN documentation
575
576							L
577
578							=item * CPAN Ratings
579
580							L
581
582							=item * Search CPAN
583
584							L
585
586							=back
587
588							=head1 SEE ALSO
589
590							L, L, L, L,
591							L, L, L
592
593							L, L, L
594
595							=head1 LICENSE AND COPYRIGHT
596
597							Copyright 2015-2018 Michael Samoglyadov.
598
599							This program is free software; you can redistribute it and/or modify it
600							under the terms of either: the GNU General Public License as published
601							by the Free Software Foundation; or the Artistic License.
602
603							See L for more information.
604
605							=cut
606
607							1; # End of Struct::Diff