File Coverage

blib/lib/Test/Deep/Hashbag.pm
Criterion Covered Total %
statement 157 161 97.5
branch 38 40 95.0
condition 17 20 85.0
subroutine 14 14 100.0
pod 2 7 28.5
total 228 242 94.2


line stmt bran cond sub pod time code
1             package Test::Deep::Hashbag 0.002;
2             # ABSTRACT: A Test::Deep hash comparator ignoring hash keys
3              
4 2     2   328818 use strict;
  2         5  
  2         80  
5 2     2   12 use warnings;
  2         5  
  2         118  
6              
7 2     2   1059 use Test::Deep::Cmp;
  2         1871  
  2         9  
8              
9 2     2   1485 use Data::Dumper;
  2         26049  
  2         215  
10 2     2   17 use Scalar::Util qw(blessed reftype);
  2         4  
  2         119  
11 2     2   1044 use Test::Deep::Hash ();
  2         3857  
  2         74  
12              
13 2     2   16 use Exporter 'import';
  2         4  
  2         4951  
14             our @EXPORT = qw(hashbag superhashbagof);
15              
16             sub init {
17 27     27 0 247 my $self = shift;
18 27         59 my $style = shift;
19 27         127 my @want = @_;
20              
21 27 50 66     180 unless ($style eq 'hashbag' || $style eq 'superhashbag') {
22 0         0 require Carp;
23 0         0 Carp::confess("Unknown style '$style' requested. How even?!");
24             }
25              
26 27 100       125 unless (@want % 2 == 0) {
27 1         8 require Carp;
28 1         140 Carp::croak("hashbag needs an even list of pairs.");
29             }
30              
31 26         54 my %seen;
32              
33 26         134 for my $i (0 .. (@want / 2 - 1)) {
34 114         214 my $idx = $i * 2;
35 114         193 my $k = $want[$idx];
36              
37             # Ignore ignore() and other things
38 114 100       241 if (ref $k) {
39 96 100 100     277 unless ((blessed($k) // "") eq 'Test::Deep::Ignore') {
40             # Prevent mistakes?
41 1         7 require Carp;
42 1         147 Carp::croak("hashbag keys must be simple scalars or a Test::Deep::Ignore (ignore()) object, got: " . reftype($k));
43             }
44              
45 95         220 next;
46             }
47              
48 18 100       76 if ($seen{$k}++) {
49 1         11 require Carp;
50 1         233 Carp::croak("Duplicate key '$k' passed to hashbag()");
51             }
52             }
53              
54 24         141 $self->{val} = \@want;
55 24         59 $self->{style} = $style;
56              
57 24         80 return;
58             }
59              
60             sub descend {
61 26     26 0 7445 my $self = shift;
62 26         55 my $have = shift;
63              
64 26 100       99 unless (ref $have eq 'HASH') {
65 1         6 my $got = Test::Deep::render_val($have);
66 1         24 $self->data->{diag} = <
67             got : $got
68             expect : A hashref
69             EOM
70              
71 1         16 return 0;
72             }
73              
74 25         43 my $want_count = (0 + @{$self->{val}}) / 2;
  25         92  
75 25         50 my $have_count = keys %$have;
76              
77 25         78 my %required;
78             my @unkeyed;
79              
80             # Sort the incoming hashbag into a list of required keys/values, and values
81             # who's keys are ignore()
82 25         82 for my $i (0 .. $want_count - 1) {
83 114         185 my $idx = $i * 2;
84              
85 114         185 my $k = $self->{val}->[$idx];
86 114         212 my $v = $self->{val}->[$idx + 1];
87              
88 114 100       214 if (ref $k) {
89 99         235 push @unkeyed, $v;
90             } else {
91 15         41 $required{$k} = $v;
92             }
93             }
94              
95             # Check all our required stuff first
96             my %got = map {
97 13         45 $_ => $have->{$_}
98             } grep {
99 25         84 exists $have->{$_}
  15         42  
100             } keys %required;
101              
102             # First check required keys/values simply
103 25         125 my $hcompare = Test::Deep::Hash->new(\%required);
104 25 100       551 return 0 unless $hcompare->descend(\%got);
105              
106             # Now check every hash value that has an ignore() key
107             my @tocheck = map {
108             +{
109             k => $_,
110 100         335 v => $have->{$_}
111             }
112 21         33159 } grep { ! exists $required{$_} } keys %$have;
  107         245  
113              
114 21 100 100     246 if ($self->{style} eq 'hashbag' && @tocheck == 0) {
    100 100        
    100 100        
115             # hashbag() and no input keys left? We're good!
116 1         5 return 1;
117              
118             } elsif ($self->{style} eq 'hashbag' && @tocheck != @unkeyed) {
119             # With hashbag(), we must have as many items left over as we have unkeyed
120             # matchers to check against
121 1         4 my $ecount = 0+@unkeyed;
122 1         2 my $gcount = 0+@tocheck;
123              
124             # Turn keys into sorted list of keys with single quotes around them,
125             # escape \ and ' so "foo'bar" looks like 'foo\'bar'. This should make
126             # understanding output easier if we need to diag something.
127             my $tocheck_desc = join(", ",
128             map {
129 2         5 my $k = $_->{k};
130 2         8 $k =~ s/(\\|')/\\$1/g;
131 2         8 "'$k'"
132 1         7 } sort { $a->{k} cmp $b->{k} } @tocheck
  1         6  
133             );
134              
135 1         7 $self->data->{diag} = <
136             We expected $ecount ignored() keys, but we found $gcount keys left?
137             Remaining keys: $tocheck_desc
138             EOM
139              
140 1         18 return 0;
141              
142             } elsif ($self->{style} eq 'superhashbag' && @unkeyed == 0) {
143             # superhashbagof() and no matchers left? We're good
144 1         6 return 1;
145             }
146              
147 18         43 my %match_by_got;
148             my %match_by_want;
149              
150             # Expensiveish ... check every expect against every got once
151 18         66 for my $i (0..$#unkeyed) {
152 96         8489 my $want = $unkeyed[$i];
153              
154 96         307 for my $j (0..$#tocheck) {
155 1961 100       242660 if (Test::Deep::eq_deeply_cache($tocheck[$j]->{v}, $unkeyed[$i])) {
156 137         13407 $match_by_got{$j}{$i} = 1;
157 137         377 $match_by_want{$i}{$j} = 1;
158             }
159             }
160             }
161              
162             # Now, imagine we have:
163             #
164             # cmp_deeply(
165             # {
166             # laksdjfaf => 'bob',
167             # xlaksdjfaf => 'bobby',
168             # },
169             # hashbag(
170             # ignore() => re('.*b'),
171             # ignore() => re('.*b.*bb'),
172             # ),
173             # 'got our matching resp',
174             # );
175             #
176             # %match_by_got might look like:
177             #
178             # {
179             # 0 => { # 0th got (bob)
180             # 0 => 1, # 1st want ('.*b')
181             # },
182             # 1 => { # 1st got (bobby)
183             # 0 => 1, # 0th want ('.*b')
184             # 1 => 1, # 1st want ('.*b.*bb')
185             # },
186             # }
187             #
188             # Sometimes, matches can match multiple things, and we need to be sure
189             # that each matcher is used only once. To do this we, we'll create a
190             # directed graph, and then use the Edmonds-Karp algorithm to find the
191             # maximum flow of the graph. If the maximum flow is equal to our number of
192             # items, we know we found a case where each item matched at least once.
193             #
194             # In the data above, our gots are g0 (bob) and g1 (bobby), and our matchers
195             # are m0 ('.*b'), and m1 ('.*b.*bb'). Our graph will look like
196             #
197             # -> g0
198             # / \
199             # source -> m0 --> sink
200             # \ / /
201             # -> g1 ---> m1
202              
203 18         905 my $max_flow_found = 0;
204              
205 18         54 my %matchers_used = map { $_ => 0 } 0..$#unkeyed;
  96         255  
206              
207 18 100       56 if (%match_by_got) {
208 16         27 my %graph;
209              
210 16         52 for my $g (keys %match_by_got) {
211 92         264 $graph{source}{"g$g"} = 1;
212              
213 92         121 for my $m (keys %{$match_by_got{$g}}) {
  92         205  
214 137         341 $graph{"g$g"}{"m$m"} = 1;
215             }
216             }
217              
218 16         47 for my $m (keys %match_by_want) {
219 90         223 $graph{"m$m"}{sink} = 1;
220             }
221              
222             # Generate a flow graph where each edge from the source *should* have
223             # a weight of 0 if it was used
224 16         58 $max_flow_found = max_flow(\%graph);
225              
226 16         60 for my $g (keys %match_by_got) {
227 92 100       186 if ($graph{source}{"g$g"} == 0) {
228             # Record that in our best case (highest flow) this key matched; to be
229             # used in diagnostics later
230 87         171 $tocheck[$g]{matched} = 1;
231             }
232             }
233              
234 16         41 for my $m (keys %match_by_want) {
235 90 100       187 if ($graph{"m$m"}{sink} == 0) {
236             # Record that in our best case (highest flow) this matcher matched; to be
237             # used in diagnostics later
238 87         130 $matchers_used{$m} = 1;
239             }
240             }
241              
242             # With hashbag() there are as many items to check as there are @unkeyed.
243             # With superhashbagof(), @unkeyed is the matchers we need to match, and
244             # there may be many more items to check against, but max flow can only
245             # go up to @unkeyed. In both cases, if max flow == unkeyed, we're good.
246 16 100       287 return 1 if $max_flow_found == @unkeyed;
247             }
248              
249 9         26 my @keys_had_no_match = map { $_->{k} } grep { ! $_->{matched} } @tocheck;
  8         23  
  49         68  
250              
251             # Turn keys into sorted list of keys with single quotes around them,
252             # escape \ and ' so "foo'bar" looks like 'foo\'bar'. This should make
253             # understanding output easier
254             my $keys_desc = join(", ",
255             map {
256 9         26 my $k = $_;
  8         17  
257 8         29 $k =~ s/(\\|')/\\$1/g;
258 8         31 "'$k'"
259             } sort @keys_had_no_match
260             );
261              
262 9         69 my @matchers_had_no_match = map { $unkeyed[$_] } grep {
263 9         27 ! $matchers_used{$_}
  50         67  
264             } keys %matchers_used;
265              
266 9         51 my $matchers_desc = "\n" . Dumper(\@matchers_had_no_match);
267              
268 9         833 my $wanted_flow = @unkeyed;
269              
270 9         88 $self->data->{diag} = <
271             Failed to find all required items in the remaining hash keys.
272             Expected to match $wanted_flow items, best case match was $max_flow_found.
273             Keys with no match: $keys_desc
274             Matchers that failed to match:$matchers_desc
275             EOM
276              
277 9         211 return 0;
278             }
279              
280             sub diagnostics {
281 9     9 0 5898 my ($self, $where, $last) = @_;
282 9         18 my $diag;
283              
284 9 50       27 if ($self->data->{diag}) {
285 9         110 $diag = "Comparing $where\n" . $self->data->{diag};
286             } else {
287 0         0 $diag = $last->{diag};
288 0         0 $diag =~ s/\$data/$where what/;
289             }
290              
291 9         139 return $diag;
292             }
293              
294             sub hashbag {
295 19     19 1 488107 return Test::Deep::Hashbag->new('hashbag', @_);
296             }
297              
298             sub superhashbagof {
299 8     8 1 444188 return Test::Deep::Hashbag->new('superhashbag', @_);
300             }
301              
302             # Adapted https://en.wikipedia.org/wiki/Ford%E2%80%93Fulkerson_algorithm#Python_implementation_of_the_Edmonds%E2%80%93Karp_algorithm
303             sub bfs {
304 103     103 0 245 my ($graph, $source, $sink, $parent) = @_;
305              
306 103         140 my %visited;
307              
308 103         205 my @todo = $source;
309              
310 103         185 while (@todo) {
311 4095         5531 my $item = pop @todo;
312              
313 4095         8156 for my $v (keys $graph->{$item}->%*) {
314 11075 100       20594 next unless $graph->{$item}{$v};
315              
316 7365 100       14658 next if $visited{$v}++;
317              
318 3992         5716 $parent->{$v} = $item;
319              
320 3992         6834 push @todo, $v;
321             }
322             }
323              
324 103         694 return !! $visited{$sink};
325             }
326              
327             sub max_flow {
328 16     16 0 37 my ($graph) = @_;
329              
330 16         26 my $max_flow = 0;
331              
332 16         32 my $parent = {};
333              
334 16         56 while (bfs($graph, 'source', 'sink', $parent)) {
335 87         145 my $c = 'sink';
336              
337             # No way we're hitting a flow this high
338 87         122 my $path_flow = 'Inf';
339              
340             # Find our lowest flow
341 87   66     334 while ($c && $c ne 'source') {
342 267         420 my $pc = $parent->{$c};
343              
344 267 100       661 $path_flow = $graph->{$pc}{$c} if $graph->{$pc}{$c} < $path_flow;
345              
346 267         787 $c = $pc;
347             }
348              
349 87         124 $max_flow += $path_flow;
350              
351 87         116 $c = 'sink';
352              
353             # Adjust flow bidirectionally from our found path
354 87   66     264 while ($c && $c ne 'source') {
355 267         400 my $pc = $parent->{$c};
356 267         440 $graph->{$pc}{$c} -= $path_flow;
357 267         460 $graph->{$c}{$pc} += $path_flow;
358 267         855 $c = $pc;
359             }
360             }
361              
362 16         62 return $max_flow;
363             }
364              
365             1;
366              
367             =pod
368              
369             =encoding UTF-8
370              
371             =head1 NAME
372              
373             Test::Deep::Hashbag - A Test::Deep hash comparator ignoring hash keys
374              
375             =head1 VERSION
376              
377             version 0.002
378              
379             =head1 SYNOPSIS
380              
381             use strict;
382             use warnings;
383              
384             use Test::More;
385             use Test::Deep;
386             use Test::Deep::Hashbag;
387              
388             cmp_deeply(
389             {
390             cat => 'meow',
391             dog => 'bark bark',
392             fish => 'blub',
393             },
394             hashbag(
395             ignore() => 'meow',
396             ignore() => re('.*bark.*'),
397             fish => 'blub',
398             ),
399             'our animals sound about right',
400             );
401              
402             done_testing;
403              
404             =head1 DESCRIPTION
405              
406             This module provides C and C, which are like
407             L's C and C, but for B.
408              
409             The idea is it lets you test that a hash has certain B, but you don't
410             know or care what the keys are for those specific values.
411              
412             =head1 EXPORTS
413              
414             =head2 hashbag
415              
416             cmp_deeply(\%got, hashbag(key => 'val', ignore() => 'val2', ...), $desc);
417              
418             Takes a list of pairs that are expected to be keys and values. For any keys
419             that aren't C, those keys must exist and have the values provided
420             (this will be checked first).
421              
422             The remaining values (where the keys are C) will then be checked
423             against the left over values in the input hash.
424              
425             On failure, the diagnostics will show how many unkeyed items were expected to
426             match, and how many did match in the best possible case. Any keys that
427             matches could not be found for will be printed out, as will any matchers that
428             were not used in this best case.
429              
430             =head2 superhashbagof
431              
432             cmp_deeply(\%got, superhashbagof(k => 'v', ignore() => 'v2', ...), $desc);
433              
434             Like C above, but C<%got> may have extra keys/values in it that we
435             don't care about.
436              
437             =head1 NOTES
438              
439             B
440              
441             With complex matches, the printed information may seem misleading; it can
442             provide different lists of keys or matchers that didn't match on reruns of
443             the test. This indicates that some of the matchers can match multiple keys,
444             and during different test runs they did so in the best case scenario as the
445             matching order is not deterministic.
446              
447             B
448              
449             With larger and larger amounts of values to test, matching will get slower
450             and slower, due to how this module works (testing every expected element
451             against every input). In the future there will be changes to speed up the
452             simple best/worst cases, but there will always be inherent slowness with
453             large amounts of data. Use with caution.
454              
455             =head1 SEE ALSO
456              
457             L
458              
459             =head1 THANKS
460              
461             Thanks to rjbs for pointing out a better algorithm than what I had
462             originally, and to waltman for Graph::MaxFlow which implemented the harder
463             bits of it (until I replaced Graph / Graph::MaxFlow with my own implementation
464             to avoid dependencies :)).
465              
466             =head1 AUTHOR
467              
468             Matthew Horsfall
469              
470             =head1 COPYRIGHT AND LICENSE
471              
472             This software is copyright (c) 2025 by Fastmail Pty. Ltd.
473              
474             This is free software; you can redistribute it and/or modify it under
475             the same terms as the Perl 5 programming language system itself.
476              
477             =cut
478              
479             __END__