File Coverage

blib/lib/Data/Search.pm
Criterion Covered Total %
statement 9 68 13.2
branch 0 72 0.0
condition 0 60 0.0
subroutine 3 6 50.0
pod 1 2 50.0
total 13 208 6.2


line stmt bran cond sub pod time code
1             package Data::Search;
2 1     1   21744 use 5.005;
  1         3  
  1         36  
3 1     1   5 use strict;
  1         2  
  1         29  
4 1     1   4 use warnings;
  1         5  
  1         1164  
5             require Exporter;
6             our $VERSION = '0.03';
7             our @ISA = qw(Exporter);
8             our @EXPORT= qw(datasearch);
9              
10             =head1 NAME
11              
12             Data::Search - Data structure search
13              
14             =head1 SYNOPSIS
15              
16             use Data::Search;
17              
18             $data = { ... };
19             @results = datasearch( data => $data, search => 'values',
20             find => qr/string/, return => 'hashcontainer' );
21              
22             =head1 DESCRIPTION
23              
24             =head2 datasearch - Search data structures
25              
26             This function allows you to search arbitrarily large/complex
27             data structures for particular elements.
28             You can search hash keys, or hash/array values, for a number/string
29             or regular expression.
30             The datasearch function can return either the found hash keys, the found
31             values (which could be data structures themselves) or the container
32             of the key or value (which is always going to be a data structure)
33              
34             By default, hash keys are searched, and the corresponding values are returned.
35             To search hash or array values, specify SEARCH => 'values'.
36             To search both values and keys, specify SEARCH => 'all'.
37              
38             To find an exact match of a string, set FIND => 'string'. To use a regular
39             expression use the qr operator: FIND => qr/^name.*/i
40             FIND may also be a 2 element array, to search for a key-value pair.
41              
42             To return the hash keys found (or the hash keys corresponding to
43             searched values), specify RETURN => 'keys'.
44             To return both keys and values specify RETURN => 'all'.
45              
46             You can also return the data structure containing the found key/value.
47              
48             To do that, specify RETURN => 'container'. This will return the immediate
49             container, either a hash or an array reference. You can also choose to
50             get the closest hash container (even if the value was inside an array)
51             by specifying RETURN => 'hashcontainer'.
52              
53             Similarly, you can return the closest array container (even though the
54             value found was a hash value or hash key) by specifying
55             RETURN => 'arraycontainer'
56              
57             Also, you can get an outer container by doing RETURN => 'container:xyz'
58             in which case the container returned would be a structure pointed to
59             by key xyz (if found to contain the search element somewhere inside it).
60             Please see the examples at the end of this document.
61              
62             ARGUMENTS
63             The following arguments are accepted (case-insensitively).
64             The only mandatory arguments are DATA and FIND.
65              
66             data => Reference of structure to search
67             search => What elements to search: keys|values|all (default: keys)
68             find => Look for: string | qr/regex/ | [ key => value ]
69             return => What to return: keys|values|all|
70             container|hashcontainer|arraycontainer|container:key_name
71            
72             RETURN VALUES
73              
74             Returns a list of matching elements (could be strings or references
75             to internal parts (hashes/arrays) of the data structure.
76              
77             EXAMPLES
78              
79             my @results = datasearch( data => $ref, find => 'name' );
80             That will return all values pointed to by hash keys called 'name'
81              
82             my @results = datasearch( data => $ref, search => 'values',
83             find => qr/alex/i, return => 'key' );
84             That will return all keys that point to strings that match "alex"
85             case insensitively.
86              
87             my @results = datasearch( data => $ref, search => 'keys',
88             find => qr/_id$/, return => 'all' );
89             That will return all keys that end with "_id", and all values
90             pointed to by those keys.
91              
92             my @results = datasearch( data => $ref, return => 'container:myrecord',
93             find => [ suffix => 'Jr' ] )
94             That implies search=>'all', searches for a key 'suffix'
95             that has value 'Jr', and returns any matching hashes pointed to by a key
96             named myrecord (even if suffix is deep inside those hashes)
97              
98             =cut
99              
100             sub datasearch {
101 0     0 1   my $args = get_args( [qw(FIND SEARCH RETURN DATA)], @_ );
102              
103 0 0         die "FIND argument is required" unless defined $args->{FIND};
104 0 0         die "DATA argument is required" unless defined $args->{DATA};
105              
106 0 0 0       my $sk = 1 if !$args->{SEARCH} || $args->{SEARCH} =~ /key|all/
      0        
107             or ref($args->{FIND}) eq 'ARRAY';
108 0 0 0       my $sv = 1 if $args->{SEARCH} && $args->{SEARCH} =~ /value|all/;
109              
110 0 0 0       my $rv = 1 if !$args->{RETURN} || $args->{RETURN} =~ /value|all/;
111 0 0 0       my $rk = 1 if $args->{RETURN} && $args->{RETURN} =~ /key|all/;
112 0 0 0       my $rc = $args->{RETURN} && $args->{RETURN} =~ /container/
113             ? $args->{RETURN} : 0;
114              
115 0           my (@results, @refs, $container);
116 0           @results = _datasearch( $args->{DATA}, $args->{FIND}, $sk, $sv, $rv, $rk,
117             $rc, \@refs, undef, undef, undef, 0 );
118              
119 0           my @unique;
120 0           foreach my $p ( @results ) { # Weed out duplicate references
121 0 0 0       push @unique, $p unless ref($p) and grep { ref && $_ == $p } @unique;
  0 0          
122             }
123 0           return @unique;
124             } ## end sub datasearch
125              
126             # Internal recursive function called by datasearch
127             sub _datasearch {
128 0     0     my ($p, $f, $sk, $sv, $rv, $rk, $rc, $refs, $container, $key, $rr, $depth)
129             = @_;
130             # print "DEPTH IN=$depth\n";
131 0           my ($root) = $rc =~ /:(.+)/;
132 0 0         if ( ref($p) ) {
133 0 0         if ( grep { $p == $_ } @$refs ) {
  0            
134 0           warn "Skipping duplicate reference to $p";
135 0           return;
136             }
137 0           push @$refs, $p;
138             }
139              
140 0           my @results;
141 0 0 0       if ( ref($p) && $p =~ /HASH/ ) {
    0 0        
    0 0        
      0        
      0        
      0        
142 0 0         $container = $p unless $rc =~ /array/;
143 0           foreach my $k ( keys %$p ) {
144 0 0 0       $rr = $p->{$k} if $root && $root eq $k;
145 0 0         my ($f1, $f2) = ref($f) eq 'ARRAY' ? ($f->[0], $f->[1]) : $f;
146 0 0 0       if ( $sk and ref($f1) eq 'Regexp' && $k =~ /$f1/ || $k eq $f1 ) {
      0        
147 0 0 0       if ( ! defined $f2 or
      0        
      0        
148             ref($f2) eq 'Regexp' && $p->{$k} =~ /$f2/
149             || $p->{$k} eq $f2 ) {
150 0 0         if ( $rc ) {
151 0 0         if ( $root ) {
152 0 0         push @results, $rr if $rr;
153             } else {
154 0           push @results, $container;
155             }
156             } else {
157 0 0         push @results, $k if $rk;
158 0 0         push @results, $p->{$k} if $rv;
159             }
160             }
161             }
162 0 0         if ( my @r = _datasearch( $p->{$k}, $f, $sk, $sv, $rv, $rk,
163             $rc, $refs, $container, $k, $rr, $depth+1 ) ) {
164 0           push @results, @r;
165             }
166             }
167             } elsif ( ref($p) && $p =~ /ARRAY/ ) {
168 0 0         $container = $p unless $rc =~ /hash/;
169 0           foreach ( @$p ) {
170 0 0         if ( my @r = _datasearch( $_, $f, $sk, $sv, $rv, $rk,
171             $rc, $refs, $container, $key, $rr, $depth+1 ) ) {
172 0           push @results, @r;
173             }
174             }
175             } elsif ( !ref($p) && defined $p && $sv and
176             ref($f) eq 'Regexp' && $p =~ /$f/ || $p eq $f ) {
177 0 0         if ( $rc ) {
178 0 0         if ( $root ) {
179 0 0         push @results, $rr if $rr;
180             } else {
181 0           push @results, $container;
182             }
183             } else {
184 0 0         push @results, $p if $rv;
185 0 0 0       push @results, $key if $rk && defined $key;
186             }
187             }
188             # print "DEPTH OUT=$depth\n";
189 0           return @results;
190             } ## end sub datasearch
191              
192             # Return a hash of named parameters (keys converted to upper case)
193             sub get_args {
194             # Called as get_args(@_) or as get_args( [arg, arg2...], @_ )
195 0 0   0 0   my $valid_arg_list = ( ref($_[0]) eq 'ARRAY' ? shift : '' );
196              
197 0 0         die "get_args got odd number of arguments"
198             unless (@_/2 == int(@_/2));
199              
200 0           my $args;
201 0           for ( my $n = 0 ; $n < $#_ ; $n += 2 ) {
202 0           $args->{ uc $_[$n] } = $_[ $n + 1 ];
203             }
204              
205             # Do argument checking, if list of valid arguments was given
206 0 0         if ($valid_arg_list) {
207 0           foreach my $arg (keys %$args) {
208 0 0         die "get_args: Argument \"$arg\" is invalid"
209             unless grep (/^\Q$arg\E$/, @$valid_arg_list);
210             }
211             }
212 0           return $args;
213             }
214              
215             1;
216