File Coverage

blib/lib/Data/Rmap.pm
Criterion Covered Total %
statement 62 63 98.4
branch 22 26 84.6
condition 4 5 80.0
subroutine 19 20 95.0
pod 12 14 85.7
total 119 128 92.9


line stmt bran cond sub pod time code
1             package Data::Rmap;
2             our $VERSION = 0.62;
3              
4             =head1 NAME
5              
6             Data::Rmap - recursive map, apply a block to a data structure
7              
8             =head1 SYNOPSIS
9              
10             $ perl -MData::Rmap -e 'print rmap { $_ } 1, [2,3], \\4, "\n"'
11             1234
12              
13             $ perl -MData::Rmap=:all
14             rmap_all { print (ref($_) || "?") ,"\n" } \@array, \%hash, \*glob;
15              
16             # OUTPUT (Note: a GLOB always has a SCALAR, hence the last two items)
17             # ARRAY
18             # HASH
19             # GLOB
20             # SCALAR
21             # ?
22              
23              
24             # Upper-case your leaves in-place
25             $array = [ "a", "b", "c" ];
26             $hash = { key => "a value" };
27             rmap { $_ = uc $_; } $array, $hash;
28              
29             use Data::Dumper; $Data::Dumper::Terse=1; $Data::Dumper::Indent=0;
30             print Dumper($array), " ", Dumper($hash), "\n";
31              
32             # OUTPUT
33             # ['A','B','C'] {'key' => 'A VALUE'}
34              
35              
36             # Simple array dumper.
37             # Uses $self->recurse method to alter traversal order
38             ($dump) = rmap_to {
39              
40             return "'$_'" unless ref($_); # scalars are quoted and returned
41              
42             my $self = shift;
43             # use $self->recurse to grab results and wrap them
44             return '[ ' . join(', ', $self->recurse() ) . ' ]';
45              
46             } ARRAY|VALUE, [ 1, [ 2, [ [ 3 ], 4 ] ], 5 ];
47              
48             print "$dump\n";
49             # OUTPUT
50             # [ '1', [ '2', [ [ '3' ], '4' ] ], '5' ]
51              
52              
53             =head1 DESCRIPTION
54              
55             rmap BLOCK LIST
56              
57             Recursively evaluate a BLOCK over a list of data structures
58             (locally setting $_ to each element) and return the list composed
59             of the results of such evaluations. $_ can be used to modify
60             the elements.
61              
62             Data::Rmap currently traverses HASH, ARRAY, SCALAR and GLOB reference
63             types and ignores others. Depending on which rmap_* wrapper is used,
64             the BLOCK is called for only scalar values, arrays, hashes, references,
65             all elements or a customizable combination.
66              
67             The list of data structures is traversed pre-order in a depth-first fashion.
68             That is, the BLOCK is called for the container reference before is it called
69             for it's elements (although see "recurse" below for post-order).
70             The values of a hash are traversed in the usual "values" order which
71             may affect some applications.
72              
73             If the "cut" subroutine is called in the BLOCK then the traversal
74             stops for that branch, say if you "cut" an array then the code is
75             never called for it's elements (or their sub-elements).
76             To simultaneously return values and cut, simply pass the return list
77             to cut: C
78              
79             The first parameter to the BLOCK is an object which maintains the
80             state of the traversal. Methods available on this object are
81             described in L below.
82              
83             =head1 EXPORTS
84              
85             By default:
86              
87             rmap, rmap_all, cut
88              
89             Optionally:
90              
91             rmap_scalar rmap_hash rmap_array rmap_ref rmap_to
92             :types => [ qw(NONE VALUE HASH ARRAY SCALAR REF OBJECT ALL) ],
93             :all => ... # everything
94              
95             =head1 Functions
96              
97             The various names are just wrappers which select when to call
98             the code BLOCK. rmap_all always calls it, the others are more
99             selective while rmap_to takes an extra parameter permitting you
100             to provide selection criteria. Furthermore, you can always
101             just rmap_all and skip nodes which are not of interest.
102              
103             =over 4
104              
105             =item rmap_to { ... } $want, @data_structures;
106              
107             Most general first.
108              
109             Recurse the @data_structures and apply the BLOCK to
110             elements selected by $want. The $want parameter is the
111             bitwise "or" of whatever types you choose (imported with :types):
112              
113             VALUE - non-reference scalar, eg. 1
114             HASH - hash reference
115             ARRAY - array reference
116             SCALAR - scalar refernce, eg. \1
117             REF - higher-level reference, eg. \\1, \\{}
118             B any reference type, see 's reftype:
119             perl -MScalar::Util=reftype -le 'print map reftype($_), \1, \\1'
120             GLOB - glob reference, eg. \*x
121             (scalar, hash and array recursed)
122             ALL - all of the above
123             NONE - none of the above
124              
125             So to call the block for arrays and scalar values do:
126              
127             use Data::Rmap ':all'; # or qw(:types rmap_to)
128             rmap { ... } ARRAY|VALUE, @data_structures;
129              
130             (ALL & !GLOB) might also be handy.
131              
132             The remainder of the wrappers are given in terms of the $want for rmap_to.
133              
134             =item rmap { ... } @list;
135              
136             Recurse and call the BLOCK on non-reference scalar values. $want = VALUE
137              
138             =item rmap_all BLOCK LIST
139              
140             Recurse and call the BLOCK on everything. $want = ALL
141              
142             =item rmap_scalar { ... } @list
143              
144             Recurse and call the BLOCK on non-collection scalars.
145             $want = VALUE|SCALAR|REF
146              
147             =item rmap_hash
148              
149             Recurse and call the BLOCK on hash refs. $want = HASH
150              
151             =item rmap_array
152              
153             Recurse and call the BLOCK on array refs. $want = ARRAY
154              
155             =item rmap_ref
156              
157             Recurse and call the BLOCK on all references (not GLOBS).
158             $want = HASH|ARRAY|SCALAR|REF
159              
160             Note: rmap_ref isn't the same as rmap_to {} REF
161              
162             =item cut(@list)
163              
164             Don't traverse sub-elements and return the @list immediately.
165             For example, if $_ is an ARRAY reference, then the array's elements
166             are not traversed.
167              
168             If there's two paths to an element, both will need to be cut.
169              
170             =back
171              
172             =head1 State Object
173              
174             The first parameter to the BLOCK is an object which maintains
175             most of the traversal state (except current node, which is $_).
176             I.
177             The "recurse" method may be useful.
178             Other methods should only be used in throw away tools, see L
179              
180             Methods:
181              
182             =over 4
183              
184             =item recurse
185              
186             Process child nodes of $_ now and return the result.
187              
188             This makes it easier to perform post-order and in-order
189             processing of a structure. Note that since the same "seen list"
190             is used, the child nodes aren't reprocessed.
191              
192             =item code
193              
194             The code reference of the BLOCK itself. Possible useful in
195             some situations.
196              
197             =item seen
198              
199             (Warning: I'm undecided whether this method should be public)
200              
201             Reference to the HASH used to track where we have visited.
202             You may want to modify it in some situations (though I haven't yet).
203             Beware circular references. The (current) convention used for the key
204             is in the source.
205              
206             =item want
207              
208             (Warning: I'm undecided whether this method should be public)
209              
210             The $want state described in L.
211              
212             =back
213              
214             =head1 EXAMPLES
215              
216             # command-line play
217             $ perl -MData::Rmap -le 'print join ":", rmap { $_ } 1,2,[3..5],\\6'
218             1:2:3:4:5:6
219              
220              
221             # Linearly number questions on a set of pages
222             my $qnum = 1;
223             rmap_hash {
224             $_->{qnum} = $qnum++ if($_->{qn});
225             } @pages;
226              
227              
228             # Grep recursively, finding ALL objects
229             use Scalar::Util qw(blessed);
230             my @objects = rmap_ref {
231             blessed($_) ? $_ : ();
232             } $data_structure;
233              
234              
235             # Grep recursively, finding public objects (note the cut)
236             use Scalar::Util qw(blessed);
237             my @objects = rmap_ref {
238             blessed($_) ? cut($_) : ();
239             } $data_structure;
240              
241              
242             # Return a modified structure
243             # (result flattening means we must cheat by cloning then modifying)
244             use Storable qw(dclone);
245             use Lingua::EN::Numbers::Easy;
246              
247             $words = [ 1, \2, { key => 3 } ];
248             $nums = dclone $words;
249             rmap { $_ = $N{$_} || $_ } $nums;
250              
251              
252             # Make an assertion about a structure
253             use Data::Dump;
254             rmap_ref {
255             blessed($_) && $_->isa('Question') && defined($_->name)
256             or die "Question doesn't have a name:", dump($_);
257             } @pages;
258              
259              
260             # Traverse a tree using localize state
261             $tree = [
262             one =>
263             two =>
264             [
265             three_one =>
266             three_two =>
267             [
268             three_three_one =>
269             ],
270             three_four =>
271             ],
272             four =>
273             [
274             [
275             five_one_one =>
276             ],
277             ],
278             ];
279              
280             @path = ('q');
281             rmap_to {
282             if(ref $_) {
283             local(@path) = (@path, 1); # ARRAY adds a new level to the path
284             $_[0]->recurse(); # does stuff within local(@path)'s scope
285             } else {
286             print join('.', @path), " = $_ \n"; # show the scalar's path
287             }
288             $path[-1]++; # bump last element (even when it was an aref)
289             } ARRAY|VALUE, $tree;
290              
291             # OUTPUT
292             # q.1 = one
293             # q.2 = two
294             # q.3.1 = three_one
295             # q.3.2 = three_two
296             # q.3.3.1 = three_three_one
297             # q.3.4 = three_four
298             # q.4 = four
299             # q.5.1.1 = five_one_one
300              
301             =head1 Troubleshooting
302              
303             Beware comma after block:
304              
305             rmap { print }, 1..3;
306             ^-------- bad news, you get and empty list:
307             rmap(sub { print $_; }), 1..3;
308              
309             If you don't import a function, perl's confusion may produce:
310              
311             $ perl -MData::Rmap -le 'rmap_scalar { print } 1'
312             Can't call method "rmap_scalar" without a package or object reference...
313              
314             $ perl -MData::Rmap -le 'rmap_scalar { $_++ } 1'
315             Can't call method "rmap_scalar" without a package or object reference...
316              
317             If there's two paths to an element, both will need to be cut.
318              
319             If there's two paths to an element, one will be taken randomly when
320             there is an intervening hash.
321              
322             Autovivification can lead to "Deep recursion" warnings if you test
323             C{this}{that}> instead of
324             C{this} && exists $_->{this}{that}>
325             as you may follow a long chain of "this"s
326              
327              
328             =head1 TODO
329              
330             put for @_ iin wrapper to allow parameters in a different wrapper,
331             solve localizing problem.
332              
333             Note that the package/class name of the L
334             is subject to change.
335              
336             The want and seen accessors may change or become useful
337             dynamic mutators.
338              
339             Store custom localized data about the traversal.
340             Seems too difficult and ugly when compare to doing it at the call site.
341             Should support multiple reentrancy so avoid the symbol table.
342              
343             C form to pass parameters.
344             Could potentially help localizing needs. (Maybe only recurse last item)
345              
346             Benchmark. Use array based object and/or direct access internally.
347              
348             rmap_objects shortcut for Scalar::Utils::blessed
349             (Let me know of other useful rmap_??? wrappers)
350              
351             Think about permitting different callback for different types.
352             The prototype syntax is a bit too flaky....
353              
354             Ensure that no memory leaks are possible, leaking the closure.
355              
356             Read http://www.cs.vu.nl/boilerplate/
357              
358             =head1 SEE ALSO
359              
360             map, grep, L's dclone, L's reftype and blessed
361              
362             Faint traces of treemap:
363              
364             http://www.perlmonks.org/index.pl?node_id=60829
365              
366             =head1 AUTHOR
367              
368             Brad Bowman Ermap@bereft.netE
369              
370             =head1 LICENCE AND COPYRIGHT
371            
372             Copyright (c) 2004-2008 Brad Bowman (Ermap@bereft.netE).
373             All rights reserved.
374            
375             This module is free software; you can redistribute it and/or
376             modify it under the same terms as Perl itself.
377             See L and L.
378            
379             This program is distributed in the hope that it will be useful,
380             but WITHOUT ANY WARRANTY; without even the implied warranty of
381             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
382              
383             =cut
384              
385             # Early design discussion:
386             # http://www.perlmonks.org/index.pl?node_id=295642
387             # wantarray
388             # http://www.class-dbi.com/cgi-bin/wiki/index.cgi?AtomicUpdates
389              
390 1     1   26047 use warnings;
  1         3  
  1         32  
391 1     1   4 use strict;
  1         7  
  1         68  
392 1     1   7 use Carp qw(croak);
  1         1  
  1         82  
393 1     1   6 use Scalar::Util qw(blessed refaddr reftype);
  1         1  
  1         1486  
394              
395             require Exporter;
396             our @ISA = qw(Exporter);
397             our @EXPORT = qw(rmap rmap_all cut);
398             our %EXPORT_TAGS = (
399             types => [ qw(NONE VALUE HASH ARRAY SCALAR REF GLOB ALL) ],
400             );
401             our @EXPORT_OK = ( qw(rmap_scalar rmap_hash rmap_array rmap_ref rmap_to),
402             @{ $EXPORT_TAGS{types} } );
403              
404             $EXPORT_TAGS{all} = [ @EXPORT, @EXPORT_OK ];
405              
406              
407             # Uses stringifying instead of S::U::ref* b/c it's under control
408             my $cut = \do { my $thing }; # my = out of symbol table
409             sub cut {
410 9     9 1 49 die $cut = [@_]; # cut can return
411             }
412              
413             sub NONE() { 0 }
414             sub VALUE() { 1 }
415             sub HASH() { 2 }
416             sub ARRAY() { 4 }
417             sub SCALAR() { 8 }
418             sub REF() { 16 }
419             sub GLOB() { 32 }
420             sub ALL() { VALUE|HASH|ARRAY|SCALAR|REF|GLOB }
421             # Others like CODE, Regex, etc are ignored
422              
423             my %type_bits = (
424             HASH => HASH,
425             ARRAY => ARRAY,
426             SCALAR => SCALAR,
427             REF => REF,
428             GLOB => GLOB,
429             # reftype actually returns undef for:
430             VALUE => VALUE,
431             );
432              
433             sub new {
434 31     31 0 324 bless { code => $_[1], want => $_[2], seen => $_[3] }, $_[0];
435             }
436 0     0 1 0 sub code { $_[0]->{code} }
437 277     277 1 1979 sub want { $_[0]->{want} }
438 590     590 1 4786 sub seen { $_[0]->{seen} }
439 163     163 0 1857 sub call { $_[0]->{code}->($_[0]) }
440              
441             sub recurse {
442             # needs to deref $_ and *then* run the code, enter _recurse directly
443 9     9 1 56 $_[0]->_recurse(); # cut not needed as seen remembers
444             }
445              
446             sub rmap (&@) {
447 16     16 1 14069 __PACKAGE__->new(shift, VALUE, {})->_rmap(@_);
448             }
449              
450             sub rmap_all (&@) {
451 3     3 1 5467 __PACKAGE__->new(shift, ALL, {})->_rmap(@_);
452             }
453              
454             sub rmap_scalar (&@) {
455 2     2 1 17 __PACKAGE__->new(shift, VALUE|SCALAR|REF, {})->_rmap(@_);
456             }
457              
458             sub rmap_hash (&@) {
459 2     2 1 199 __PACKAGE__->new(shift, HASH, {})->_rmap(@_);
460             }
461              
462             sub rmap_array (&@) {
463 3     3 1 158 __PACKAGE__->new(shift, ARRAY, {})->_rmap(@_);
464             }
465              
466             sub rmap_ref (&@) {
467 1     1 1 4 __PACKAGE__->new(shift, HASH|ARRAY|SCALAR|REF, {})->_rmap(@_);
468             }
469              
470             sub rmap_to (&@) {
471 4     4 1 462 __PACKAGE__->new(shift, shift, {})->_rmap(@_);
472             }
473              
474             sub _rmap {
475 250     250   754 my $self = shift;
476 250         1054 my @return;
477              
478 250         2369 for (@_) { # just one after the wrapper call
479 313         520 my ($key, $type);
480              
481 313 100       1595 if($type = reftype($_)) {
482 171         813 $key = refaddr $_;
483 171 50       560 $type = $type_bits{$type} or next;
484             } else {
485 142         727 $key = "V:".refaddr(\$_); # prefix to distinguish from \$_
486 142         350 $type = VALUE;
487             }
488              
489 313 100       1032 next if ( exists $self->seen->{$key} );
490 277         808 $self->seen->{$key} = undef;
491              
492             # Call the $code
493 277 100       803 if($self->want & $type) {
494 163         603 my $e; # local($@) and rethrow caused problems
495             my @got;
496             {
497 163         241 local ($@); # don't trample, cut impl. should be transparent
  163         350  
498             # call in array context. pass block for reentrancy
499 163         581 @got = eval { $self->call() };
  163         823  
500 163         2396 $e = $@;
501             }
502              
503 163 100       408 if($e) {
504 14 100 66     164 if(ref($e) && $e == $cut) {
505 9         10 push @return, @$cut; # cut can add to return list
506 9         23 next; # they're cutting, don't recurse
507             } else {
508 5         40 die $e;
509             }
510             }
511 149         733 push @return, @got;
512             }
513              
514 263         2044 push @return, $self->_recurse(); # process $_ node
515             }
516 240         2608 return @return;
517             }
518              
519             sub _recurse {
520 272     272   1215 my $self = shift;
521 272 50 100     2122 my $type = $type_bits{reftype($_) || 'VALUE'} or return;
522 272         646 my @return;
523              
524             # Recurse appropriately, keeping $_ alias
525 272 100       2586 if ($type & HASH) {
    100          
    100          
    100          
526 41         576 push @return, $self->_rmap($_) for values %$_;
527             } elsif ($type & ARRAY) {
528             # Does this change cut behaviour? No, cut is one scalar ref
529             #push @return, _rmap($code, $want, $seen, $_) for @$_;
530 49         135 push @return, $self->_rmap(@$_);
531             } elsif ($type & (SCALAR|REF) ) {
532 62         134 push @return, $self->_rmap($$_);
533             } elsif ($type & GLOB) {
534             # SCALAR is always there, undef may be unused or set to undef
535 8         19 push @return, $self->_rmap(*$_{SCALAR});
536 8 50       20 defined *$_{ARRAY} and
537             push @return, $self->_rmap(*$_{ARRAY});
538 8 50       18 defined *$_{HASH} and
539             push @return, $self->_rmap(*$_{HASH});
540             # Is it always: *f{GLOB} == \*f ?
541             # Also CODE PACKAGE NAME GLOB
542             }
543 267         2081 return @return;
544             }
545              
546             1;