File Coverage

blib/lib/Data/Rmap.pm
Criterion Covered Total %
statement 64 65 98.4
branch 26 28 92.8
condition 4 5 80.0
subroutine 20 21 95.2
pod 13 15 86.6
total 127 134 94.7


line stmt bran cond sub pod time code
1             package Data::Rmap;
2             our $VERSION = 0.65;
3              
4             =head1 NAME
5              
6             Data::Rmap - recursive map, apply a block to a data structure
7              
8             =head1 SYNOPSIS
9              
10             $ perl -MData::Rmap -e 'print rmap { $_ } 1, [2,3], \\4, "\n"'
11             1234
12              
13             $ perl -MData::Rmap=:all
14             rmap_all { print (ref($_) || "?") ,"\n" } \@array, \%hash, \*glob;
15              
16             # OUTPUT (Note: a GLOB always has a SCALAR, hence the last two items)
17             # ARRAY
18             # HASH
19             # GLOB
20             # SCALAR
21             # ?
22              
23              
24             # Upper-case your leaves in-place
25             $array = [ "a", "b", "c" ];
26             $hash = { key => "a value" };
27             rmap { $_ = uc $_; } $array, $hash;
28              
29             use Data::Dumper; $Data::Dumper::Terse=1; $Data::Dumper::Indent=0;
30             print Dumper($array), " ", Dumper($hash), "\n";
31              
32             # OUTPUT
33             # ['A','B','C'] {'key' => 'A VALUE'}
34              
35              
36             # Simple array dumper.
37             # Uses $self->recurse method to alter traversal order
38             ($dump) = rmap_to {
39              
40             return "'$_'" unless ref($_); # scalars are quoted and returned
41              
42             my $self = shift;
43             # use $self->recurse to grab results and wrap them
44             return '[ ' . join(', ', $self->recurse() ) . ' ]';
45              
46             } ARRAY|VALUE, [ 1, [ 2, [ [ 3 ], 4 ] ], 5 ];
47              
48             print "$dump\n";
49             # OUTPUT
50             # [ '1', [ '2', [ [ '3' ], '4' ] ], '5' ]
51              
52              
53             =head1 DESCRIPTION
54              
55             rmap BLOCK LIST
56              
57             Recursively evaluate a BLOCK over a list of data structures
58             (locally setting $_ to each element) and return the list composed
59             of the results of such evaluations. $_ can be used to modify
60             the elements.
61              
62             Data::Rmap currently traverses HASH, ARRAY, SCALAR and GLOB reference
63             types and ignores others. Depending on which rmap_* wrapper is used,
64             the BLOCK is called for only scalar values, arrays, hashes, references,
65             all elements or a customizable combination.
66              
67             The list of data structures is traversed pre-order in a depth-first fashion.
68             That is, the BLOCK is called for the container reference before is it called
69             for it's elements (although see "recurse" below for post-order).
70             The values of a hash are traversed in the usual "values" order which
71             may affect some applications.
72              
73             If the "cut" subroutine is called in the BLOCK then the traversal
74             stops for that branch, say if you "cut" an array then the code is
75             never called for it's elements (or their sub-elements).
76             To simultaneously return values and cut, simply pass the return list
77             to cut: C
78              
79             The first parameter to the BLOCK is an object which maintains the
80             state of the traversal. Methods available on this object are
81             described in L below.
82              
83             =head1 EXPORTS
84              
85             By default:
86              
87             rmap, rmap_all, cut
88              
89             Optionally:
90              
91             rmap_scalar rmap_hash rmap_array rmap_code rmap_ref rmap_to
92             :types => [ qw(NONE VALUE HASH ARRAY SCALAR REF CODE ALL) ],
93             :all => ... # everything
94              
95             =head1 Functions
96              
97             The various names are just wrappers which select when to call
98             the code BLOCK. rmap_all always calls it, the others are more
99             selective while rmap_to takes an extra parameter permitting you
100             to provide selection criteria. Furthermore, you can always
101             just rmap_all and skip nodes which are not of interest.
102              
103             =over 4
104              
105             =item rmap_to { ... } $want, @data_structures;
106              
107             Most general first.
108              
109             Recurse the @data_structures and apply the BLOCK to
110             elements selected by $want. The $want parameter is the
111             bitwise "or" of whatever types you choose (imported with :types):
112              
113             VALUE - non-reference scalar, eg. 1
114             HASH - hash reference
115             ARRAY - array reference
116             SCALAR - scalar refernce, eg. \1
117             REF - higher-level reference, eg. \\1, \\{}
118             B any reference type, see 's reftype:
119             perl -MScalar::Util=reftype -le 'print map reftype($_), \1, \\1'
120             GLOB - glob reference, eg. \*x
121             (scalar, hash and array recursed, code too as of 0.63)
122             ALL - all of the above (not CODE)
123             CODE - code references (as of 0.63)
124             NONE - none of the above
125              
126             So to call the block for arrays and scalar values do:
127              
128             use Data::Rmap ':all'; # or qw(:types rmap_to)
129             rmap { ... } ARRAY|VALUE, @data_structures;
130              
131             (ALL | CODE) and (ALL & !GLOB) might also be handy.
132              
133             The remainder of the wrappers are given in terms of the $want for rmap_to.
134              
135             =item rmap { ... } @list;
136              
137             Recurse and call the BLOCK on non-reference scalar values. $want = VALUE
138              
139             =item rmap_all BLOCK LIST
140              
141             Recurse and call the BLOCK on everything. $want = ALL
142              
143             =item rmap_scalar { ... } @list
144              
145             Recurse and call the BLOCK on non-collection scalars.
146             $want = VALUE|SCALAR|REF
147              
148             =item rmap_hash
149              
150             Recurse and call the BLOCK on hash refs. $want = HASH
151              
152             =item rmap_array
153              
154             Recurse and call the BLOCK on array refs. $want = ARRAY
155              
156             =item rmap_code
157              
158             Recurse and call the BLOCK on code refs. $want = CODE
159              
160             =item rmap_ref
161              
162             Recurse and call the BLOCK on all "normal" references:
163             $want = HASH|ARRAY|SCALAR|REF
164              
165             Note: rmap_ref isn't the same as rmap_to {} REF
166              
167             =item cut(@list)
168              
169             Don't traverse sub-elements and return the @list immediately.
170             For example, if $_ is an ARRAY reference, then the array's elements
171             are not traversed.
172              
173             If there's two paths to an element, both will need to be cut.
174              
175             =back
176              
177             =head1 State Object
178              
179             The first parameter to the BLOCK is an object which maintains
180             most of the traversal state (except current node, which is $_).
181             I.
182             The "recurse" method may be useful.
183             Other methods should only be used in throw away tools, see L
184              
185             Methods:
186              
187             =over 4
188              
189             =item recurse
190              
191             Process child nodes of $_ now and return the result.
192              
193             This makes it easier to perform post-order and in-order
194             processing of a structure. Note that since the same "seen list"
195             is used, the child nodes aren't reprocessed.
196              
197             =item code
198              
199             The code reference of the BLOCK itself. Possible useful in
200             some situations.
201              
202             =item seen
203              
204             Reference to the HASH used to track where we have visited.
205             You may want to modify it in some situations (though I haven't yet).
206             Beware circular references. The (current) convention used for the key
207             is in the source.
208              
209             =item want
210              
211             The $want state described in L.
212              
213             =back
214              
215             =head1 EXAMPLES
216              
217             # command-line play
218             $ perl -MData::Rmap -le 'print join ":", rmap { $_ } 1,2,[3..5],\\6'
219             1:2:3:4:5:6
220              
221              
222             # Linearly number questions on a set of pages
223             my $qnum = 1;
224             rmap_hash {
225             $_->{qnum} = $qnum++ if($_->{qn});
226             } @pages;
227              
228              
229             # Grep recursively, finding ALL objects
230             use Scalar::Util qw(blessed);
231             my @objects = rmap_ref {
232             blessed($_) ? $_ : ();
233             } $data_structure;
234              
235              
236             # Grep recursively, finding public objects (note the cut)
237             use Scalar::Util qw(blessed);
238             my @objects = rmap_ref {
239             blessed($_) ? cut($_) : ();
240             } $data_structure;
241              
242              
243             # Return a modified structure
244             # (result flattening means we must cheat by cloning then modifying)
245             use Storable qw(dclone);
246             use Lingua::EN::Numbers::Easy;
247              
248             $words = [ 1, \2, { key => 3 } ];
249             $nums = dclone $words;
250             rmap { $_ = $N{$_} || $_ } $nums;
251              
252              
253             # Make an assertion about a structure
254             use Data::Dump;
255             rmap_ref {
256             blessed($_) && $_->isa('Question') && defined($_->name)
257             or die "Question doesn't have a name:", dump($_);
258             } @pages;
259              
260              
261             # Traverse a tree using localize state
262             $tree = [
263             one =>
264             two =>
265             [
266             three_one =>
267             three_two =>
268             [
269             three_three_one =>
270             ],
271             three_four =>
272             ],
273             four =>
274             [
275             [
276             five_one_one =>
277             ],
278             ],
279             ];
280              
281             @path = ('q');
282             rmap_to {
283             if(ref $_) {
284             local(@path) = (@path, 1); # ARRAY adds a new level to the path
285             $_[0]->recurse(); # does stuff within local(@path)'s scope
286             } else {
287             print join('.', @path), " = $_ \n"; # show the scalar's path
288             }
289             $path[-1]++; # bump last element (even when it was an aref)
290             } ARRAY|VALUE, $tree;
291              
292             # OUTPUT
293             # q.1 = one
294             # q.2 = two
295             # q.3.1 = three_one
296             # q.3.2 = three_two
297             # q.3.3.1 = three_three_one
298             # q.3.4 = three_four
299             # q.4 = four
300             # q.5.1.1 = five_one_one
301              
302             # replace CODE with ""
303             $ perl -MData::Rmap=:all -E 'say join ":", rmap_code { "" } sub{},sub{}'
304             :
305              
306             # look inside code refs with PadWalker
307             $ perl -MData::Rmap=:all -MSub::Identify=:all -MPadWalker=:all -MSub::Name
308             use 5.10.0;
309             my $s = sub {}; sub A::a { $s };
310             say join ", ",
311             rmap_code {
312             sub_fullname($_), # name string
313             map { $_[0]->recurse } closed_over($_) # then recurse the sub innards
314             } \*A::a, subname b => sub { $s };
315             # A::a, main::__ANON__, main::b
316              
317             =head1 Troubleshooting
318              
319             Beware comma after block:
320              
321             rmap { print }, 1..3;
322             ^-------- bad news, you get an empty list:
323             rmap(sub { print $_; }), 1..3;
324              
325             If you don't import a function, perl's confusion may produce:
326              
327             $ perl -MData::Rmap -le 'rmap_scalar { print } 1'
328             Can't call method "rmap_scalar" without a package or object reference...
329              
330             $ perl -MData::Rmap -le 'rmap_scalar { $_++ } 1'
331             Can't call method "rmap_scalar" without a package or object reference...
332              
333             If there's two paths to an element, both will need to be cut.
334              
335             If there's two paths to an element, one will be taken randomly when
336             there is an intervening hash.
337              
338             Autovivification can lead to "Deep recursion" warnings if you test
339             C<< exists $_->{this}{that} >> instead of
340             C<< exists $_->{this} && exists $_->{this}{that} >>
341             as you may follow a long chain of "this"s
342             Alternatively use the "no autovivification" pragma to avoid this problem.
343              
344             =head1 TODO
345              
346             put for @_ in wrapper to allow parameters in a different wrapper,
347             solve localizing problem.
348              
349             Store custom localized data about the traversal.
350             Seems too difficult and ugly when compare to doing it at the call site.
351             Should support multiple reentrancy so avoid the symbol table.
352              
353             C form to pass parameters.
354             Could potentially help localizing needs. (Maybe only recurse last item)
355              
356             Benchmark. Use array based object and/or direct access internally.
357              
358             Think about permitting different callback for different types.
359             The prototype syntax is a bit too flaky....
360              
361             Ensure that no memory leaks are possible, leaking the closure.
362              
363             =head1 SEE ALSO
364              
365             map, grep, L's dclone, L's reftype and blessed
366              
367             Faint traces of treemap:
368              
369             http://www.perlmonks.org/index.pl?node_id=60829
370              
371             Update: various alternatives have appear over the years,
372             L has a list.
373              
374             =head1 AUTHOR
375              
376             Brad Bowman Ermap@bereft.netE
377              
378             =head1 LICENCE AND COPYRIGHT
379              
380             Copyright (c) 2004- Brad Bowman (Ermap@bereft.netE).
381             All rights reserved.
382              
383             This module is free software; you can redistribute it and/or
384             modify it under the same terms as Perl itself.
385             See L and L.
386              
387             This program is distributed in the hope that it will be useful,
388             but WITHOUT ANY WARRANTY; without even the implied warranty of
389             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
390              
391             =cut
392              
393             # Early design discussion:
394             # http://www.perlmonks.org/index.pl?node_id=295642
395             # wantarray
396             # http://www.class-dbi.com/cgi-bin/wiki/index.cgi?AtomicUpdates
397              
398 1     1   25177 use warnings;
  1         1  
  1         33  
399 1     1   3 use strict;
  1         1  
  1         22  
400 1     1   4 use Carp qw(croak);
  1         4  
  1         64  
401 1     1   3 use Scalar::Util qw(blessed refaddr reftype);
  1         1  
  1         1066  
402              
403             require Exporter;
404             our @ISA = qw(Exporter);
405             our @EXPORT = qw(rmap rmap_all cut);
406             our %EXPORT_TAGS = (
407             types => [ qw(NONE VALUE HASH ARRAY SCALAR REF GLOB CODE ALL) ],
408             );
409             our @EXPORT_OK = ( qw(rmap_scalar rmap_hash rmap_array rmap_code rmap_ref rmap_to),
410             @{ $EXPORT_TAGS{types} } );
411              
412             $EXPORT_TAGS{all} = [ @EXPORT, @EXPORT_OK ];
413              
414              
415             # Uses stringifying instead of S::U::ref* b/c it's under control
416             my $cut = \do { my $thing }; # my = out of symbol table
417             sub cut {
418 9     9 1 43 die $cut = [@_]; # cut can return
419             }
420              
421             sub NONE() { 0 }
422             sub VALUE() { 1 }
423             sub HASH() { 2 }
424             sub ARRAY() { 4 }
425             sub SCALAR() { 8 }
426             sub REF() { 16 }
427             sub GLOB() { 32 }
428             sub CODE() { 64 }
429             sub ALL() { VALUE|HASH|ARRAY|SCALAR|REF|GLOB }
430             # Others like CODE, Regex, etc are ignored
431              
432             my %type_bits = (
433             HASH => HASH,
434             ARRAY => ARRAY,
435             SCALAR => SCALAR,
436             REF => REF,
437             GLOB => GLOB,
438             CODE => CODE,
439             # reftype actually returns undef for:
440             VALUE => VALUE,
441             );
442              
443             sub new {
444 33     33 0 138 bless { code => $_[1], want => $_[2], seen => $_[3] }, $_[0];
445             }
446 0     0 1 0 sub code { $_[0]->{code} }
447 331     331 1 554 sub want { $_[0]->{want} }
448 698     698 1 1194 sub seen { $_[0]->{seen} }
449 175     175 0 261 sub call { $_[0]->{code}->($_[0]) }
450              
451             sub recurse {
452             # needs to deref $_ and *then* run the code, enter _recurse directly
453 9     9 1 48 $_[0]->_recurse(); # cut not needed as seen remembers
454             }
455              
456             sub rmap (&@) {
457 16     16 1 6309 __PACKAGE__->new(shift, VALUE, {})->_rmap(@_);
458             }
459              
460             sub rmap_all (&@) {
461 3     3 1 1764 __PACKAGE__->new(shift, ALL, {})->_rmap(@_);
462             }
463              
464             sub rmap_scalar (&@) {
465 2     2 1 13 __PACKAGE__->new(shift, VALUE|SCALAR|REF, {})->_rmap(@_);
466             }
467              
468             sub rmap_hash (&@) {
469 2     2 1 13 __PACKAGE__->new(shift, HASH, {})->_rmap(@_);
470             }
471              
472             sub rmap_array (&@) {
473 3     3 1 17 __PACKAGE__->new(shift, ARRAY, {})->_rmap(@_);
474             }
475              
476             sub rmap_code (&@) {
477 1     1 1 4 __PACKAGE__->new(shift, CODE, {})->_rmap(@_);
478             }
479              
480             sub rmap_ref (&@) {
481 1     1 1 5 __PACKAGE__->new(shift, HASH|ARRAY|SCALAR|REF, {})->_rmap(@_);
482             }
483              
484             sub rmap_to (&@) {
485 5     5 1 465 __PACKAGE__->new(shift, shift, {})->_rmap(@_);
486             }
487              
488             sub _rmap {
489 298     298   220 my $self = shift;
490 298         231 my @return;
491              
492 298         350 for (@_) { # just one after the wrapper call
493 367         281 my ($key, $type);
494              
495 367 100       622 if($type = reftype($_)) {
496 219         261 $key = refaddr $_;
497 219 50       373 $type = $type_bits{$type} or next;
498             } else {
499 148         227 $key = "V:".refaddr(\$_); # prefix to distinguish from \$_
500 148         129 $type = VALUE;
501             }
502              
503 367 100       397 next if ( exists $self->seen->{$key} );
504 331         433 $self->seen->{$key} = undef;
505              
506             # Call the $code
507 331 100       368 if($self->want & $type) {
508 175         125 my $e; # local($@) and rethrow caused problems
509             my @got;
510             {
511 175         123 local ($@); # don't trample, cut impl. should be transparent
  175         141  
512             # call in array context. pass block for reentrancy
513 175         167 @got = eval { $self->call() };
  175         192  
514 175         571 $e = $@;
515             }
516              
517 175 100       244 if($e) {
518 14 100 66     49 if(ref($e) && $e == $cut) {
519 9         10 push @return, @$cut; # cut can add to return list
520 9         23 next; # they're cutting, don't recurse
521             } else {
522 5         27 die $e;
523             }
524             }
525 161         170 push @return, @got;
526             }
527              
528 317         373 push @return, $self->_recurse(); # process $_ node
529             }
530 288         447 return @return;
531             }
532              
533             sub _recurse {
534 326     326   251 my $self = shift;
535 326 50 100     963 my $type = $type_bits{reftype($_) || 'VALUE'} or return;
536 326         232 my @return;
537              
538             # Recurse appropriately, keeping $_ alias
539 326 100       667 if ($type & HASH) {
    100          
    100          
    100          
540 52         104 push @return, $self->_rmap($_) for values %$_;
541             } elsif ($type & ARRAY) {
542             # Does this change cut behaviour? No, cut is one scalar ref
543             #push @return, _rmap($code, $want, $seen, $_) for @$_;
544 60         92 push @return, $self->_rmap(@$_);
545             } elsif ($type & (SCALAR|REF) ) {
546 68         94 push @return, $self->_rmap($$_);
547             } elsif ($type & GLOB) {
548             # SCALAR is always there, undef may be unused or set to undef
549 10         20 push @return, $self->_rmap(*$_{SCALAR});
550             defined *$_{ARRAY} and
551 10 100       27 push @return, $self->_rmap(*$_{ARRAY});
552             defined *$_{HASH} and
553 10 100       25 push @return, $self->_rmap(*$_{HASH});
554             defined *$_{CODE} and
555 10 100       31 push @return, $self->_rmap(*$_{CODE});
556             # Is it always: *f{GLOB} == \*f ?
557             # Also PACKAGE NAME GLOB
558             }
559 321         437 return @return;
560             }
561              
562             1;