File Coverage

blib/lib/Test/Deep/Hashbag.pm

Criterion	Covered	Total	%
statement	157	161	97.5
branch	38	40	95.0
condition	17	20	85.0
subroutine	14	14	100.0
pod	2	7	28.5
total	228	242	94.2

line	stmt	bran	cond	sub	pod	time	code
1							package Test::Deep::Hashbag 0.002;
2							# ABSTRACT: A Test::Deep hash comparator ignoring hash keys
3
4	2			2		328818	use strict;
	2					5
	2					80
5	2			2		12	use warnings;
	2					5
	2					118
6
7	2			2		1059	use Test::Deep::Cmp;
	2					1871
	2					9
8
9	2			2		1485	use Data::Dumper;
	2					26049
	2					215
10	2			2		17	use Scalar::Util qw(blessed reftype);
	2					4
	2					119
11	2			2		1044	use Test::Deep::Hash ();
	2					3857
	2					74
12
13	2			2		16	use Exporter 'import';
	2					4
	2					4951
14							our @EXPORT = qw(hashbag superhashbagof);
15
16							sub init {
17	27			27	0	247	my $self = shift;
18	27					59	my $style = shift;
19	27					127	my @want = @_;
20
21	27	50	66			180	unless ($style eq 'hashbag' \|\| $style eq 'superhashbag') {
22	0					0	require Carp;
23	0					0	Carp::confess("Unknown style '$style' requested. How even?!");
24							}
25
26	27	100				125	unless (@want % 2 == 0) {
27	1					8	require Carp;
28	1					140	Carp::croak("hashbag needs an even list of pairs.");
29							}
30
31	26					54	my %seen;
32
33	26					134	for my $i (0 .. (@want / 2 - 1)) {
34	114					214	my $idx = $i * 2;
35	114					193	my $k = $want[$idx];
36
37							# Ignore ignore() and other things
38	114	100				241	if (ref $k) {
39	96	100	100			277	unless ((blessed($k) // "") eq 'Test::Deep::Ignore') {
40							# Prevent mistakes?
41	1					7	require Carp;
42	1					147	Carp::croak("hashbag keys must be simple scalars or a Test::Deep::Ignore (ignore()) object, got: " . reftype($k));
43							}
44
45	95					220	next;
46							}
47
48	18	100				76	if ($seen{$k}++) {
49	1					11	require Carp;
50	1					233	Carp::croak("Duplicate key '$k' passed to hashbag()");
51							}
52							}
53
54	24					141	$self->{val} = \@want;
55	24					59	$self->{style} = $style;
56
57	24					80	return;
58							}
59
60							sub descend {
61	26			26	0	7445	my $self = shift;
62	26					55	my $have = shift;
63
64	26	100				99	unless (ref $have eq 'HASH') {
65	1					6	my $got = Test::Deep::render_val($have);
66	1					24	$self->data->{diag} = <
67							got : $got
68							expect : A hashref
69							EOM
70
71	1					16	return 0;
72							}
73
74	25					43	my $want_count = (0 + @{$self->{val}}) / 2;
	25					92
75	25					50	my $have_count = keys %$have;
76
77	25					78	my %required;
78							my @unkeyed;
79
80							# Sort the incoming hashbag into a list of required keys/values, and values
81							# who's keys are ignore()
82	25					82	for my $i (0 .. $want_count - 1) {
83	114					185	my $idx = $i * 2;
84
85	114					185	my $k = $self->{val}->[$idx];
86	114					212	my $v = $self->{val}->[$idx + 1];
87
88	114	100				214	if (ref $k) {
89	99					235	push @unkeyed, $v;
90							} else {
91	15					41	$required{$k} = $v;
92							}
93							}
94
95							# Check all our required stuff first
96							my %got = map {
97	13					45	$_ => $have->{$_}
98							} grep {
99	25					84	exists $have->{$_}
	15					42
100							} keys %required;
101
102							# First check required keys/values simply
103	25					125	my $hcompare = Test::Deep::Hash->new(\%required);
104	25	100				551	return 0 unless $hcompare->descend(\%got);
105
106							# Now check every hash value that has an ignore() key
107							my @tocheck = map {
108							+{
109							k => $_,
110	100					335	v => $have->{$_}
111							}
112	21					33159	} grep { ! exists $required{$_} } keys %$have;
	107					245
113
114	21	100	100			246	if ($self->{style} eq 'hashbag' && @tocheck == 0) {
		100	100
		100	100
115							# hashbag() and no input keys left? We're good!
116	1					5	return 1;
117
118							} elsif ($self->{style} eq 'hashbag' && @tocheck != @unkeyed) {
119							# With hashbag(), we must have as many items left over as we have unkeyed
120							# matchers to check against
121	1					4	my $ecount = 0+@unkeyed;
122	1					2	my $gcount = 0+@tocheck;
123
124							# Turn keys into sorted list of keys with single quotes around them,
125							# escape \ and ' so "foo'bar" looks like 'foo\'bar'. This should make
126							# understanding output easier if we need to diag something.
127							my $tocheck_desc = join(", ",
128							map {
129	2					5	my $k = $_->{k};
130	2					8	$k =~ s/(\\\|')/\\$1/g;
131	2					8	"'$k'"
132	1					7	} sort { $a->{k} cmp $b->{k} } @tocheck
	1					6
133							);
134
135	1					7	$self->data->{diag} = <
136							We expected $ecount ignored() keys, but we found $gcount keys left?
137							Remaining keys: $tocheck_desc
138							EOM
139
140	1					18	return 0;
141
142							} elsif ($self->{style} eq 'superhashbag' && @unkeyed == 0) {
143							# superhashbagof() and no matchers left? We're good
144	1					6	return 1;
145							}
146
147	18					43	my %match_by_got;
148							my %match_by_want;
149
150							# Expensiveish ... check every expect against every got once
151	18					66	for my $i (0..$#unkeyed) {
152	96					8489	my $want = $unkeyed[$i];
153
154	96					307	for my $j (0..$#tocheck) {
155	1961	100				242660	if (Test::Deep::eq_deeply_cache($tocheck[$j]->{v}, $unkeyed[$i])) {
156	137					13407	$match_by_got{$j}{$i} = 1;
157	137					377	$match_by_want{$i}{$j} = 1;
158							}
159							}
160							}
161
162							# Now, imagine we have:
163							#
164							# cmp_deeply(
165							# {
166							# laksdjfaf => 'bob',
167							# xlaksdjfaf => 'bobby',
168							# },
169							# hashbag(
170							# ignore() => re('.*b'),
171							# ignore() => re('.b.bb'),
172							# ),
173							# 'got our matching resp',
174							# );
175							#
176							# %match_by_got might look like:
177							#
178							# {
179							# 0 => { # 0th got (bob)
180							# 0 => 1, # 1st want ('.*b')
181							# },
182							# 1 => { # 1st got (bobby)
183							# 0 => 1, # 0th want ('.*b')
184							# 1 => 1, # 1st want ('.b.bb')
185							# },
186							# }
187							#
188							# Sometimes, matches can match multiple things, and we need to be sure
189							# that each matcher is used only once. To do this we, we'll create a
190							# directed graph, and then use the Edmonds-Karp algorithm to find the
191							# maximum flow of the graph. If the maximum flow is equal to our number of
192							# items, we know we found a case where each item matched at least once.
193							#
194							# In the data above, our gots are g0 (bob) and g1 (bobby), and our matchers
195							# are m0 ('.b'), and m1 ('.b.*bb'). Our graph will look like
196							#
197							# -> g0
198							# / \
199							# source -> m0 --> sink
200							# \ / /
201							# -> g1 ---> m1
202
203	18					905	my $max_flow_found = 0;
204
205	18					54	my %matchers_used = map { $_ => 0 } 0..$#unkeyed;
	96					255
206
207	18	100				56	if (%match_by_got) {
208	16					27	my %graph;
209
210	16					52	for my $g (keys %match_by_got) {
211	92					264	$graph{source}{"g$g"} = 1;
212
213	92					121	for my $m (keys %{$match_by_got{$g}}) {
	92					205
214	137					341	$graph{"g$g"}{"m$m"} = 1;
215							}
216							}
217
218	16					47	for my $m (keys %match_by_want) {
219	90					223	$graph{"m$m"}{sink} = 1;
220							}
221
222							# Generate a flow graph where each edge from the source should have
223							# a weight of 0 if it was used
224	16					58	$max_flow_found = max_flow(\%graph);
225
226	16					60	for my $g (keys %match_by_got) {
227	92	100				186	if ($graph{source}{"g$g"} == 0) {
228							# Record that in our best case (highest flow) this key matched; to be
229							# used in diagnostics later
230	87					171	$tocheck[$g]{matched} = 1;
231							}
232							}
233
234	16					41	for my $m (keys %match_by_want) {
235	90	100				187	if ($graph{"m$m"}{sink} == 0) {
236							# Record that in our best case (highest flow) this matcher matched; to be
237							# used in diagnostics later
238	87					130	$matchers_used{$m} = 1;
239							}
240							}
241
242							# With hashbag() there are as many items to check as there are @unkeyed.
243							# With superhashbagof(), @unkeyed is the matchers we need to match, and
244							# there may be many more items to check against, but max flow can only
245							# go up to @unkeyed. In both cases, if max flow == unkeyed, we're good.
246	16	100				287	return 1 if $max_flow_found == @unkeyed;
247							}
248
249	9					26	my @keys_had_no_match = map { $_->{k} } grep { ! $_->{matched} } @tocheck;
	8					23
	49					68
250
251							# Turn keys into sorted list of keys with single quotes around them,
252							# escape \ and ' so "foo'bar" looks like 'foo\'bar'. This should make
253							# understanding output easier
254							my $keys_desc = join(", ",
255							map {
256	9					26	my $k = $_;
	8					17
257	8					29	$k =~ s/(\\\|')/\\$1/g;
258	8					31	"'$k'"
259							} sort @keys_had_no_match
260							);
261
262	9					69	my @matchers_had_no_match = map { $unkeyed[$_] } grep {
263	9					27	! $matchers_used{$_}
	50					67
264							} keys %matchers_used;
265
266	9					51	my $matchers_desc = "\n" . Dumper(\@matchers_had_no_match);
267
268	9					833	my $wanted_flow = @unkeyed;
269
270	9					88	$self->data->{diag} = <
271							Failed to find all required items in the remaining hash keys.
272							Expected to match $wanted_flow items, best case match was $max_flow_found.
273							Keys with no match: $keys_desc
274							Matchers that failed to match:$matchers_desc
275							EOM
276
277	9					211	return 0;
278							}
279
280							sub diagnostics {
281	9			9	0	5898	my ($self, $where, $last) = @_;
282	9					18	my $diag;
283
284	9	50				27	if ($self->data->{diag}) {
285	9					110	$diag = "Comparing $where\n" . $self->data->{diag};
286							} else {
287	0					0	$diag = $last->{diag};
288	0					0	$diag =~ s/\$data/$where what/;
289							}
290
291	9					139	return $diag;
292							}
293
294							sub hashbag {
295	19			19	1	488107	return Test::Deep::Hashbag->new('hashbag', @_);
296							}
297
298							sub superhashbagof {
299	8			8	1	444188	return Test::Deep::Hashbag->new('superhashbag', @_);
300							}
301
302							# Adapted https://en.wikipedia.org/wiki/Ford%E2%80%93Fulkerson_algorithm#Python_implementation_of_the_Edmonds%E2%80%93Karp_algorithm
303							sub bfs {
304	103			103	0	245	my ($graph, $source, $sink, $parent) = @_;
305
306	103					140	my %visited;
307
308	103					205	my @todo = $source;
309
310	103					185	while (@todo) {
311	4095					5531	my $item = pop @todo;
312
313	4095					8156	for my $v (keys $graph->{$item}->%*) {
314	11075	100				20594	next unless $graph->{$item}{$v};
315
316	7365	100				14658	next if $visited{$v}++;
317
318	3992					5716	$parent->{$v} = $item;
319
320	3992					6834	push @todo, $v;
321							}
322							}
323
324	103					694	return !! $visited{$sink};
325							}
326
327							sub max_flow {
328	16			16	0	37	my ($graph) = @_;
329
330	16					26	my $max_flow = 0;
331
332	16					32	my $parent = {};
333
334	16					56	while (bfs($graph, 'source', 'sink', $parent)) {
335	87					145	my $c = 'sink';
336
337							# No way we're hitting a flow this high
338	87					122	my $path_flow = 'Inf';
339
340							# Find our lowest flow
341	87		66			334	while ($c && $c ne 'source') {
342	267					420	my $pc = $parent->{$c};
343
344	267	100				661	$path_flow = $graph->{$pc}{$c} if $graph->{$pc}{$c} < $path_flow;
345
346	267					787	$c = $pc;
347							}
348
349	87					124	$max_flow += $path_flow;
350
351	87					116	$c = 'sink';
352
353							# Adjust flow bidirectionally from our found path
354	87		66			264	while ($c && $c ne 'source') {
355	267					400	my $pc = $parent->{$c};
356	267					440	$graph->{$pc}{$c} -= $path_flow;
357	267					460	$graph->{$c}{$pc} += $path_flow;
358	267					855	$c = $pc;
359							}
360							}
361
362	16					62	return $max_flow;
363							}
364
365							1;
366
367							=pod
368
369							=encoding UTF-8
370
371							=head1 NAME
372
373							Test::Deep::Hashbag - A Test::Deep hash comparator ignoring hash keys
374
375							=head1 VERSION
376
377							version 0.002
378
379							=head1 SYNOPSIS
380
381							use strict;
382							use warnings;
383
384							use Test::More;
385							use Test::Deep;
386							use Test::Deep::Hashbag;
387
388							cmp_deeply(
389							{
390							cat => 'meow',
391							dog => 'bark bark',
392							fish => 'blub',
393							},
394							hashbag(
395							ignore() => 'meow',
396							ignore() => re('.bark.'),
397							fish => 'blub',
398							),
399							'our animals sound about right',
400							);
401
402							done_testing;
403
404							=head1 DESCRIPTION
405
406							This module provides C and C, which are like
407							L's C and C, but for B.
408
409							The idea is it lets you test that a hash has certain B, but you don't
410							know or care what the keys are for those specific values.
411
412							=head1 EXPORTS
413
414							=head2 hashbag
415
416							cmp_deeply(\%got, hashbag(key => 'val', ignore() => 'val2', ...), $desc);
417
418							Takes a list of pairs that are expected to be keys and values. For any keys
419							that aren't C, those keys must exist and have the values provided
420							(this will be checked first).
421
422							The remaining values (where the keys are C) will then be checked
423							against the left over values in the input hash.
424
425							On failure, the diagnostics will show how many unkeyed items were expected to
426							match, and how many did match in the best possible case. Any keys that
427							matches could not be found for will be printed out, as will any matchers that
428							were not used in this best case.
429
430							=head2 superhashbagof
431
432							cmp_deeply(\%got, superhashbagof(k => 'v', ignore() => 'v2', ...), $desc);
433
434							Like C above, but C<%got> may have extra keys/values in it that we
435							don't care about.
436
437							=head1 NOTES
438
439							B
440
441							With complex matches, the printed information may seem misleading; it can
442							provide different lists of keys or matchers that didn't match on reruns of
443							the test. This indicates that some of the matchers can match multiple keys,
444							and during different test runs they did so in the best case scenario as the
445							matching order is not deterministic.
446
447							B
448
449							With larger and larger amounts of values to test, matching will get slower
450							and slower, due to how this module works (testing every expected element
451							against every input). In the future there will be changes to speed up the
452							simple best/worst cases, but there will always be inherent slowness with
453							large amounts of data. Use with caution.
454
455							=head1 SEE ALSO
456
457							L
458
459							=head1 THANKS
460
461							Thanks to rjbs for pointing out a better algorithm than what I had
462							originally, and to waltman for Graph::MaxFlow which implemented the harder
463							bits of it (until I replaced Graph / Graph::MaxFlow with my own implementation
464							to avoid dependencies :)).
465
466							=head1 AUTHOR
467
468							Matthew Horsfall
469
470							=head1 COPYRIGHT AND LICENSE
471
472							This software is copyright (c) 2025 by Fastmail Pty. Ltd.
473
474							This is free software; you can redistribute it and/or modify it under
475							the same terms as the Perl 5 programming language system itself.
476
477							=cut
478
479							__END__