File Coverage

blib/lib/Data/DPath/Flatten.pm
Criterion Covered Total %
statement 26 33 78.7
branch 13 22 59.0
condition n/a
subroutine 5 5 100.0
pod 1 1 100.0
total 45 61 73.7


line stmt bran cond sub pod time code
1             =head1 NAME
2            
3             Data::DPath::Flatten - Convert complex data structure into key/value pairs
4            
5             =head1 SYNOPSIS
6            
7             use Data::DPath::Flatten qw/flatten/;
8            
9             # Data can be arrays or hashes.
10             my $hash = flatten( \@record );
11             my $hash = flatten( \%record );
12            
13             # Aliases add more human readable field names.
14             my $hash = flatten( \@record );
15             my $hash = flatten( \%record );
16            
17             =head1 DESCRIPTION
18            
19             B transforms an arbitrary data structure into a hash of
20             key/value pairs.
21            
22             Why? To store raw data in an SQL database. L returns
23             arbitrary data structures. For example, Excel files return an array but XML
24             files a hash. B gives me a unique key for each field in
25             the file, regardless of the Perl data structure.
26            
27             Use B where you need key/value pairs from arbitrary data.
28             The module traverses nested data structures of any depth and converts into a
29             single dimension.
30            
31             =cut
32            
33             package Data::DPath::Flatten;
34            
35 2     2   76032 use 5.14.0;
  2         20  
36 2     2   11 use Carp;
  2         4  
  2         166  
37 2     2   12 use Exporter qw/import/;
  2         4  
  2         1023  
38            
39            
40             our @EXPORT = (qw/flatten/);
41             our $VERSION = '1.00';
42            
43            
44             =head1 FUNCTIONS
45            
46             =head3 flatten( $data )
47            
48             B takes an arbitrary data structure and converts into a one level array
49             reference. Essentially, it flattens out nested data structures.
50            
51             B returns a hash reference. The keys are L paths into the
52             original record. The value is the raw data value from the file.
53            
54             The parameter C<$data> is required. It is a reference to the input data
55             structure.
56            
57             # Recursively traverse arrays and hashes.
58             my $hash = flatten( \@fields );
59             my $hash = flatten( \%record );
60            
61             # Scalars work, but it's kind of pointless. These come out the same.
62             my $hash = flatten( $single );
63             my $hash = flatten( \$single );
64            
65             When B encounters a HASH or ARRAY reference, it recursively traverses
66             the nested structure. Circular references are traversed only once, to avoid
67             infinite loops.
68            
69             SCALAR references are dereferenced and the value stored.
70            
71             All other references and objects are stored as references.
72            
73             =cut
74            
75             sub flatten {
76 4     4 1 2651 my $data = shift;
77            
78             # Flatten the original data into a one level hash. Make sure I get a new
79             # reference for every call.
80 4         7 my $new = {};
81 4         10 _step( $data, $new, '', {} );
82            
83             # Return the flattened hash reference.
84 4         10 return $new;
85             }
86            
87            
88             #-------------------------------------------------------------------------------
89             # Internal subroutines.
90            
91             # Recursively traverse the data structure, building the path string as it goes.
92             # The initial path is an empty string. This code adds the leading "/".
93             #
94             # The $seen parameter stops circular references from causing infinite loops. We
95             # traverse any reference only once.
96             #
97             # I looked into using existing data traversal modules such as Data::Rmap,
98             # Data::Traverse, Data::Visitor, or Data::Walk. Simple recurrsion was so much
99             # easier. I would have to use all kinds of gloabl variables and conditionals
100             # just to build the correct paths. This works and handles circular references.
101             sub _step {
102 50     50   72 my ($from, $to, $path, $seen) = @_;
103            
104             # Process this node of the structure.
105 50 50       106 if (!defined( $from )) {
    100          
    50          
    100          
    50          
106             # No op!
107             } elsif (ref( $from ) eq '') {
108 24 50       30 if ($path eq '') { $to->{'/' } = $from; }
  0         0  
109 24         48 else { $to->{$path} = $from; }
110             } elsif (ref( $from) eq 'SCALAR') {
111 0 0       0 if ($path eq '') { $to->{'/' } = $$from; }
  0         0  
112 0         0 else { $to->{$path} = $$from; }
113             } elsif (ref( $from) eq 'HASH') {
114 16 100       28 unless (exists $seen->{$from}) {
115 15         26 $seen->{$from}++;
116 15         37 while (my ($key, $value) = each %$from) {
117 24 50       37 $key = "\"$key\"" if m/\.\.|\*|::ancestor(-or-self)?|\/\/|\[|\]/;
118 24         48 _step( $value, $to, "$path/$key", $seen );
119             }
120             }
121             } elsif (ref( $from ) eq 'ARRAY') {
122 10 100       22 unless (exists $seen->{$from}) {
123 9         14 $seen->{$from}++;
124 9         24 while (my ($index, $value) = each @$from) {
125 22         46 _step( $value, $to, "$path/*[$index]", $seen );
126             }
127             }
128             } else {
129 0 0       0 if ($path eq '') { $to->{'/' } = $from; }
  0         0  
130 0         0 else { $to->{$path} = $from; }
131             }
132            
133 50         114 return;
134             }
135            
136            
137             =head1 SEE ALSO
138            
139             L
140            
141             =head1 REPOSITORY
142            
143             L
144            
145             =head1 AUTHOR
146            
147             Robert Wohlfarth
148            
149             =head1 COPYRIGHT AND LICENSE
150            
151             Copyright (c) 2022 Robert Wohlfarth
152            
153             This module is free software; you can redistribute it and/or modify it
154             under the same terms as Perl 5.10.0. For details, see the full text of the
155             license in the file LICENSE.
156            
157             This program is distributed in the hope that it will be useful, but
158             without any warranty; without even the implied
159            
160             =cut
161            
162             # Required by Perl to load the module.
163             1;