File Coverage

blib/lib/Struct/Diff.pm

Criterion	Covered	Total	%
statement	236	236	100.0
branch	158	158	100.0
condition	39	39	100.0
subroutine	15	15	100.0
pod	5	5	100.0
total	453	453	100.0

line	stmt	bran	cond	sub	pod	time	code
1							package Struct::Diff;
2
3	15			15		849777	use 5.006;
	15					127
4	15			15		83	use strict;
	15					29
	15					357
5	15			15		72	use warnings FATAL => 'all';
	15					41
	15					536
6	15			15		2202	use parent qw(Exporter);
	15					1482
	15					72
7
8	15			15		9524	use Algorithm::Diff qw(LCSidx);
	15					76074
	15					1016
9	15			15		115	use Carp qw(croak);
	15					33
	15					676
10	15			15		100	use Scalar::Util qw(looks_like_number);
	15					28
	15					917
11	15			15		8799	use Storable 2.05 qw(freeze);
	15					45081
	15					37279
12
13							our @EXPORT_OK = qw(
14							diff
15							list_diff
16							patch
17							split_diff
18							valid_diff
19							);
20
21							=head1 NAME
22
23							Struct::Diff - Recursive diff for nested perl structures
24
25							=begin html
26
27
28
29
30
31							=end html
32
33							=head1 VERSION
34
35							Version 0.98
36
37							=cut
38
39							our $VERSION = '0.98';
40
41							=head1 SYNOPSIS
42
43							use Struct::Diff qw(diff list_diff split_diff patch valid_diff);
44
45							$x = {one => [1,{two => 2}]};
46							$y = {one => [1,{two => 9}],three => 3};
47
48							$diff = diff($x, $y, noO => 1, noU => 1); # omit unchanged items and old values
49							# $diff == {D => {one => {D => [{D => {two => {N => 9}},I => 1}]},three => {A => 3}}}
50
51							@list_diff = list_diff($diff); # list (path and ref pairs) all diff entries
52							# @list_diff == ({K => ['one']},[1],{K => ['two']}],\{N => 9},[{K => ['three']}],\{A => 3})
53
54							$splitted = split_diff($diff);
55							# $splitted->{a} # does not exist
56							# $splitted->{b} == {one => [{two => 9}],three => 3}
57
58							patch($x, $diff); # $x now equal to $y by structure and data
59
60							@errors = valid_diff($diff);
61
62							=head1 EXPORT
63
64							Nothing is exported by default.
65
66							=head1 DIFF FORMAT
67
68							Diff is simply a HASH whose keys shows status for each item in passed
69							structures. Every status type (except C) may be omitted during the diff
70							calculation. Disabling some or other types produce different diffs: diff with
71							only unchanged items is also possible (when all other types disabled).
72
73							=over 4
74
75							=item A
76
77							Stands for 'added' (exist only in second structure), it's value - added item.
78
79							=item D
80
81							Means 'different' and contains subdiff. The only status type which can't be
82							disabled.
83
84							=item I
85
86							Index for array item, used only when prior item was omitted.
87
88							=item N
89
90							Is a new value for changed item.
91
92							=item O
93
94							Alike C, C is a changed item's old value.
95
96							=item R
97
98							Similar for C, but for removed items.
99
100							=item U
101
102							Represent unchanged items.
103
104							=back
105
106							Diff format: metadata alternates with data and, as a result, diff may
107							represent any structure of any data types. Simple types specified as is,
108							arrays and hashes contain subdiffs for their items with native for such types
109							addressing: indexes for arrays and keys for hashes.
110
111							Sample:
112
113							old: {one => [5,7]}
114							new: {one => [5],two => 2}
115							opts: {noU => 1} # omit unchanged items
116
117							diff:
118							{D => {one => {D => [{I => 1,R => 7}]},two => {A => 2}}}
119							\|\| \| \| \|\| \|\|\| \| \| \| \| \|\| \|
120							\|\| \| \| \|\| \|\|\| \| \| \| \| \|\| +- with value 2
121							\|\| \| \| \|\| \|\|\| \| \| \| \| \|+- key 'two' was added (A)
122							\|\| \| \| \|\| \|\|\| \| \| \| \| +- subdiff for it
123							\|\| \| \| \|\| \|\|\| \| \| \| +- another key from top-level hash
124							\|\| \| \| \|\| \|\|\| \| \| +- what it was (item's value: 7)
125							\|\| \| \| \|\| \|\|\| \| +- what happened to item (R - removed)
126							\|\| \| \| \|\| \|\|\| +- array item's actual index
127							\|\| \| \| \|\| \|\|+- prior item was omitted
128							\|\| \| \| \|\| \|+- subdiff for array item
129							\|\| \| \| \|\| +- it's value - ARRAY
130							\|\| \| \| \|+- it is deeply changed
131							\|\| \| \| +- subdiff for key 'one'
132							\|\| \| +- it has key 'one'
133							\|\| +- top-level thing is a HASH
134							\|+- changes somewhere deeply inside
135							+- diff is always a HASH
136
137							=head1 SUBROUTINES
138
139							=head2 diff
140
141							Returns recursive diff for two passed things.
142
143							$diff = diff($x, $y, %opts);
144							$patch = diff($x, $y, noU => 1, noO => 1, trimR => 1); # smallest diff
145
146							Beware changing diff: it's parts are references to substructures of passed
147							arguments.
148
149							=head3 Options
150
151							=over 4
152
153							=item freezer C<< _>>
154
155							Serializer callback (redefines default serializer). L is used
156							by default, see L for details.
157
158							=item noX C<< >>
159
160							Where X is a status (C, C, C, C, C); such status will be
161							omitted.
162
163							=item trimR C<< >>
164
165							Drop removed item's data.
166
167							=back
168
169							=cut
170
171							our $FREEZER = sub {
172							local $Storable::canonical = 1; # for equal snapshots for equal by data hashes
173							local $Storable::Deparse = 1; # for coderefs
174
175							freeze \$_[0];
176							};
177
178							sub diff($$;@) {
179	151			151	1	301078	my ($x, $y, %opts) = @_;
180
181	151	100				529	$opts{freezer} = $FREEZER unless (exists $opts{freezer});
182
183	151					489	_diff($x, $y, %opts);
184							}
185
186							sub _diff($$;@);
187							sub _diff($$;@) {
188	268			268		710	my ($x, $y, %opts) = @_;
189
190	268					455	my $d = {};
191	268					546	my $type = ref $x;
192
193	268	100	100			1800	if ($type ne ref $y) {
		100	100
		100	100
		100	100
194	10	100				52	$d->{O} = $x unless ($opts{noO});
195	10	100				31	$d->{N} = $y unless ($opts{noN});
196							} elsif ($type eq 'ARRAY' and $x != $y) {
197	88					261	my ($lcs, $stat) = _lcs_diff($x, $y, $opts{freezer});
198
199	88	100				168	if ($stat->{U} * 3 == @{$lcs}) {
	88					184
200	5	100				23	$d->{U} = $y unless ($opts{noU});
201							} else {
202	83					171	my ($I, $xi, $yi, $op, $sd) = 0;
203
204	83					125	while (@{$lcs}) {
	323					648
205	240					315	($op, $xi, $yi) = splice @{$lcs}, 0, 3;
	240					438
206
207	240	100				556	if ($op eq 'U') {
		100
		100
208	110	100				214	if ($opts{noU}) { $I++; next }
	44					56
	44					66
209	66					87	push @{$d->{D}}, { U => $y->[$yi] };
	66					169
210							} elsif ($op eq 'D') {
211	67					264	$sd = _diff($x->[$xi], $y->[$yi], %opts);
212	67	100				106	unless (keys %{$sd}) { $I++; next }
	67					220
	6					10
	6					11
213	61					84	push @{$d->{D}}, $sd;
	61					136
214							} elsif ($op eq 'A') {
215	33	100				68	if ($opts{noA}) { $I++; next }
	6					11
	6					12
216	27					37	push @{$d->{D}}, { A => $y->[$yi] };
	27					73
217							} else {
218	30	100				67	if ($opts{noR}) { $I++; next }
	4					8
	4					9
219	26	100				40	push @{$d->{D}}, { R => $opts{trimR} ? undef : $x->[$xi] };
	26					92
220							}
221
222	180	100				352	if ($I) {
223	32					66	$d->{D}->[-1]->{I} = $xi;
224	32					46	$I = 0;
225							}
226							}
227							}
228							} elsif ($type eq 'HASH' and $x != $y) {
229	67					157	my @keys = keys %{{ %{$x}, %{$y} }}; # uniq keys for both hashes
	67					96
	67					161
	67					303
230	67	100				239	return $opts{noU} ? {} : { U => {} } unless (@keys);
		100
231
232	65					142	for my $k (@keys) {
233	107	100	100			386	if (exists $x->{$k} and exists $y->{$k}) {
		100
234	75	100				170	if ($opts{freezer}($x->{$k}) eq $opts{freezer}($y->{$k})) {
235	25	100				629	$d->{U}->{$k} = $y->{$k} unless ($opts{noU});
236							} else {
237	50					1504	my $sd = _diff($x->{$k}, $y->{$k}, %opts);
238	50	100				77	$d->{D}->{$k} = $sd if (keys %{$sd});
	50					217
239							}
240							} elsif (exists $x->{$k}) {
241							$d->{D}->{$k}->{R} = $opts{trimR} ? undef : $x->{$k}
242	18	100				75	unless ($opts{noR});
		100
243							} else {
244	14	100				49	$d->{D}->{$k}->{A} = $y->{$k} unless ($opts{noA});
245							}
246							}
247
248	65	100	100			196	if (exists $d->{U} and exists $d->{D}) {
249	12					18	map { $d->{D}->{$_}->{U} = $d->{U}->{$_} } keys %{$d->{U}};
	15					47
	12					44
250	12					35	delete $d->{U};
251							}
252							} elsif ($type && $x == $y \|\| $opts{freezer}($x) eq $opts{freezer}($y)) {
253	18	100				1168	$d->{U} = $x unless ($opts{noU});
254							} else {
255	85	100				3547	$d->{O} = $x unless ($opts{noO});
256	85	100				280	$d->{N} = $y unless ($opts{noN});
257							}
258
259	266					946	return $d;
260							}
261
262							sub _lcs_diff {
263	88			88		272	my ($xm, $ym) = LCSidx(@_);
264	88					4728	my ($xi, $yi, @diff, %stat) = (0, 0);
265
266							# additional unchanged items to collect trailing non-matched
267	88					133	push @{$xm}, scalar @{$_[0]};
	88					151
	88					226
268	88					152	push @{$ym}, scalar @{$_[1]};
	88					134
	88					143
269
270	88					123	while (@{$xm}) {
	419					791
271	331	100	100			1251	if ($xi == $xm->[0] and $yi == $ym->[0]) {
		100	100
		100
272	201					270	push @diff, 'U', shift @{$xm}, shift @{$ym};
	201					297
	201					437
273	201					307	$xi++; $yi++;
	201					246
274	201					303	$stat{U}++;
275							} elsif ($xi < $xm->[0] and $yi < $ym->[0]) {
276	67					164	push @diff, 'D', $xi++, $yi++;
277	67					132	$stat{N}++;
278							} elsif ($xi < $xm->[0]) {
279	30					75	push @diff, 'R', $xi++, $yi;
280	30					55	$stat{R}++;
281							} else {
282	33					72	push @diff, 'A', $xi, $yi++;
283	33					53	$stat{A}++;
284							}
285							}
286
287	88	100				217	$stat{O} = $stat{N} if (exists $stat{N});
288
289							# remove added above trailing item
290	88					189	splice @diff, -3, 3;
291	88					128	$stat{U}--;
292
293	88					259	return \@diff, \%stat;
294							}
295
296							=head2 list_diff
297
298							List all pairs (path-to-subdiff, ref-to-subdiff) for provided diff. See
299							L for path format specification.
300
301							@list = list_diff($diff);
302
303							=head3 Options
304
305							=over 4
306
307							=item depth C<< >>
308
309							Don't dive deeper than defined number of levels; C used by default
310							(unlimited).
311
312							=item sort C<< >>
313
314							Defines how to handle hash subdiffs. Keys will be picked randomly (default
315							C behavior), sorted by provided subroutine (if value is a coderef) or
316							lexically sorted if set to some other true value.
317
318							=back
319
320							=cut
321
322							sub list_diff($;@) {
323	8			8	1	23	my @stack = ([], \shift); # init: (path, diff)
324	8					20	my %opts = @_;
325	8					16	my ($diff, @list, $path, $I);
326
327	8					20	while (@stack) {
328	36					61	($path, $diff) = splice @stack, -2, 2;
329
330	36	100	100			53	if (!exists ${$diff}->{D} or $opts{depth} and @{$path} >= $opts{depth}) {
	36	100	100			113
	5					16
331	18					47	unshift @list, $path, $diff;
332	18					37	} elsif (ref ${$diff}->{D} eq 'ARRAY') {
333	8					15	$I = 0;
334	8					11	for (@{${$diff}->{D}}) {
	8					11
	8					18
335	14	100				27	$I = $_->{I} if (exists $_->{I}); # use provided index
336	14					19	push @stack, [@{$path}, [$I]], \$_;
	14					25
337	14					31	$I++;
338							}
339							} else { # HASH
340							map {
341	14					33	push @stack, [@{$path}, {K => [$_]}], \${$diff}->{D}->{$_}
	14					38
	14					43
342							} $opts{sort}
343							? ref $opts{sort} eq 'CODE'
344	1					2	? $opts{sort}(keys %{${$diff}->{D}})
	1					5
345	1					2	: sort keys %{${$diff}->{D}}
	1					6
346	10	100				26	: keys %{${$diff}->{D}};
	8	100				10
	8					23
347							}
348							}
349
350	8					45	return @list;
351							}
352
353							=head2 split_diff
354
355							Divide diff to pseudo original structures.
356
357							$structs = split_diff(diff($x, $y));
358							# $structs->{a}: items from $x
359							# $structs->{b}: items from $y
360
361							=cut
362
363							sub split_diff($);
364							sub split_diff($) {
365	65			65	1	382	my $d = $_[0];
366	65					87	my (%out, $sd);
367
368	65	100				150	if (exists $d->{D}) {
		100
		100
		100
369	18	100				39	if (ref $d->{D} eq 'ARRAY') {
370	10					13	for (@{$d->{D}}) {
	10					19
371	22					38	$sd = split_diff($_);
372	22	100				42	push @{$out{a}}, $sd->{a} if (exists $sd->{a});
	20					38
373	22	100				41	push @{$out{b}}, $sd->{b} if (exists $sd->{b});
	20					37
374							}
375
376	10	100				21	$out{a} = [] unless (exists $out{a});
377	10	100				22	$out{b} = [] unless (exists $out{b});
378							} else { # HASH
379	8					11	for (keys %{$d->{D}}) {
	8					21
380	29					48	$sd = split_diff($d->{D}->{$_});
381	29	100				61	$out{a}->{$_} = $sd->{a} if (exists $sd->{a});
382	29	100				62	$out{b}->{$_} = $sd->{b} if (exists $sd->{b});
383							}
384
385	8	100				18	$out{a} = {} unless (exists $out{a});
386	8	100				16	$out{b} = {} unless (exists $out{b});
387							}
388							} elsif (exists $d->{U}) {
389	14					26	$out{a} = $out{b} = $d->{U};
390							} elsif (exists $d->{A}) {
391	8					17	$out{b} = $d->{A};
392							} elsif (exists $d->{R}) {
393	8					17	$out{a} = $d->{R};
394							} else {
395	17	100				36	$out{b} = $d->{N} if (exists $d->{N});
396	17	100				35	$out{a} = $d->{O} if (exists $d->{O});
397							}
398
399	65					156	return \%out;
400							}
401
402							=head2 patch
403
404							Apply diff.
405
406							patch($target, $diff);
407
408							=cut
409
410							sub patch($$) {
411	112			112	1	487915	my @stack = (\$_[0], $_[1]); # ref to alias - to be able to change passed scalar
412
413	112					406	while (@stack) {
414	169					453	my ($s, $d) = splice @stack, 0, 2; # struct, subdiff
415
416	169	100				530	if (exists $d->{D}) {
		100
417	83	100				124	croak "Structure does not match" unless (ref ${$s} eq ref $d->{D});
	83					459
418
419	82	100				199	if (ref $d->{D} eq 'ARRAY') {
420	50					98	my ($i, $j) = (0, 0); # target array idx, jitter
421
422	50					66	for (@{$d->{D}}) {
	50					124
423	122	100				242	$i = $_->{I} + $j if (exists $_->{I});
424
425	122	100	100			456	if (exists $_->{D} or exists $_->{N}) {
		100
		100
426	37					57	push @stack, \${$s}->[$i], $_;
	37					78
427							} elsif (exists $_->{A}) {
428	25					34	splice @{${$s}}, $i, 0, $_->{A};
	25					27
	25					61
429	25					36	$j++;
430							} elsif (exists $_->{R}) {
431	24					33	splice @{${$s}}, $i, 1;
	24					32
	24					42
432	24					36	$j--;
433	24					75	next; # don't increment $i
434							}
435
436	98					200	$i++;
437							}
438							} else { # HASH
439	32					47	while (my ($k, $v) = each %{$d->{D}}) {
	74					270
440	42	100	100			167	if (exists $v->{D} or exists $v->{N}) {
		100
		100
441	20					32	push @stack, \${$s}->{$k}, $v;
	20					55
442							} elsif (exists $v->{A}) {
443	6					14	${$s}->{$k} = $v->{A};
	6					16
444							} elsif (exists $v->{R}) {
445	10					17	delete ${$s}->{$k};
	10					29
446							}
447							}
448							}
449							} elsif (exists $d->{N}) {
450	61					111	${$s} = $d->{N};
	61					227
451							}
452							}
453							}
454
455							=head2 valid_diff
456
457							Validate diff structure. In scalar context returns C<1> for valid diff,
458							C otherwise. In list context returns list of pairs (path, type) for
459							each error. See L for path format
460							specification.
461
462							@errors_list = valid_diff($diff); # list context
463
464							or
465
466							$is_valid = valid_diff($diff); # scalar context
467
468							=cut
469
470							sub valid_diff($) {
471	143			143	1	581285	my @stack = ([], shift); # (path, diff)
472	143					310	my ($diff, @errs, $path);
473
474	143					432	while (@stack) {
475	342					742	($path, $diff) = splice @stack, 0, 2;
476
477	342	100				855	unless (ref $diff eq 'HASH') {
478	5	100				17	return undef unless wantarray;
479	4					9	push @errs, $path, 'BAD_DIFF_TYPE';
480	4					10	next;
481							}
482
483	337	100				759	if (exists $diff->{D}) {
484	104	100				281	if (ref $diff->{D} eq 'ARRAY') {
		100
485							map {
486	153					230	unshift @stack, [@{$path}, [$_]], $diff->{D}->[$_]
	153					437
487	62					113	} 0 .. $#{$diff->{D}};
	62					150
488							} elsif (ref $diff->{D} eq 'HASH') {
489							map {
490	46					77	unshift @stack, [@{$path}, {K => [$_]}], $diff->{D}->{$_}
	46					181
491	35					54	} sort keys %{$diff->{D}};
	35					123
492							} else {
493	7	100				23	return undef unless wantarray;
494	6					12	unshift @errs, $path, 'BAD_D_TYPE';
495							}
496							}
497
498	336	100				867	if (exists $diff->{I}) {
499	37	100	100			208	if (!looks_like_number($diff->{I}) or int($diff->{I}) != $diff->{I}) {
500	6	100				17	return undef unless wantarray;
501	5					14	unshift @errs, $path, 'BAD_I_TYPE';
502							}
503
504	36	100				53	if (keys %{$diff} < 2) {
	36					127
505	2	100				10	return undef unless wantarray;
506	1					4	unshift @errs, $path, 'LONESOME_I';
507							}
508							}
509							}
510
511	139	100				721	return wantarray ? @errs : 1;
512							}
513
514							=head1 CONFIGURATION VARIABLES
515
516							=over 4
517
518							=item $Struct::Diff::FREEZER
519
520							Contains reference to default serialization function (C rely on it
521							to determine data equivalency). L with enabled
522							C<$Storable::canonical> and C<$Storable::Deparse> opts used by default.
523
524							L is suitable for structures with regular expressions:
525
526							use Data::Dumper;
527
528							$Struct::Diff::FREEZER = sub {
529							local $Data::Dumper::Deparse = 1;
530							local $Data::Dumper::Sortkeys = 1;
531							local $Data::Dumper::Terse = 1;
532
533							return Dumper @_;
534							}
535
536							But comparing to L it has two another issues: speed and unability
537							to distinguish numbers from their string representations.
538
539							=back
540
541							=head1 LIMITATIONS
542
543							Only arrays and hashes traversed. All other types compared by reference
544							addresses and serialized content.
545
546							L (serializer used by default) will fail serializing compiled
547							regexps, so, consider to use other serializer if data contains regular
548							expressions. See L for details.
549
550							Struct::Diff will fail on structures with loops in references;
551							C from L can help to detect such
552							structures.
553
554							=head1 AUTHOR
555
556							Michael Samoglyadov, C<< >>
557
558							=head1 BUGS
559
560							Please report any bugs or feature requests to
561							C, or through the web interface at
562							L. I will be
563							notified, and then you'll automatically be notified of progress on your bug as
564							I make changes.
565
566							=head1 SUPPORT
567
568							You can find documentation for this module with the perldoc command.
569
570							perldoc Struct::Diff
571
572							You can also look for information at:
573
574							=over 4
575
576							=item * RT: CPAN's request tracker (report bugs here)
577
578							L
579
580							=item * AnnoCPAN: Annotated CPAN documentation
581
582							L
583
584							=item * CPAN Ratings
585
586							L
587
588							=item * Search CPAN
589
590							L
591
592							=back
593
594							=head1 SEE ALSO
595
596							L, L, L, L,
597							L, L, L
598
599							L, L, L
600
601							=head1 LICENSE AND COPYRIGHT
602
603							Copyright 2015-2019 Michael Samoglyadov.
604
605							This program is free software; you can redistribute it and/or modify it
606							under the terms of either: the GNU General Public License as published
607							by the Free Software Foundation; or the Artistic License.
608
609							See L for more information.
610
611							=cut
612
613							1; # End of Struct::Diff