File Coverage

blib/lib/ETL/Pipeline/Output/Memory.pm
Criterion Covered Total %
statement 26 26 100.0
branch 4 6 66.6
condition n/a
subroutine 8 8 100.0
pod 3 3 100.0
total 41 43 95.3


line stmt bran cond sub pod time code
1             =pod
2              
3             =head1 NAME
4              
5             ETL::Pipeline::Output::Memory - Store records in memory
6              
7             =head1 SYNOPSIS
8              
9             # Save the records into a giant list.
10             use ETL::Pipeline;
11             ETL::Pipeline->new( {
12             input => ['UnitTest'],
13             mapping => {First => 'Header1', Second => 'Header2'},
14             output => ['Memory']
15             } )->process;
16              
17             # Save the records into a hash, keyed by an identifier.
18             use ETL::Pipeline;
19             ETL::Pipeline->new( {
20             input => ['UnitTest'],
21             mapping => {First => 'Header1', Second => 'Header2'},
22             output => ['Memory', key => 'First']
23             } )->process;
24              
25             =head1 DESCRIPTION
26              
27             B<ETL::Pipeline::Output::Memory> writes the record into a Perl data structure,
28             in memory. The records can be accessed later in the same script. This output
29             destination comes in useful when processing multiple input files.
30              
31             B<ETL::Pipeline::Output::Memory> offers two ways of storing the records - in
32             a hash or in a list. B<ETL::Pipeline::Output::Memory> always put records into
33             the list. If the L</key> attribute is set, then B<ETL::Pipeline::Output::Memory>
34             also saves records into the hash.
35              
36             The hash can be used for faster look-up. Use L</key> when the record contains an
37             identifier.
38              
39             =cut
40              
41             package ETL::Pipeline::Output::Memory;
42              
43 1     1   34 use 5.014000;
  1         4  
44 1     1   5 use warnings;
  1         2  
  1         36  
45              
46 1     1   5 use Moose;
  1         2  
  1         10  
47 1     1   6732 use String::Util qw/hascontent/;
  1         2  
  1         414  
48              
49              
50             our $VERSION = '2.00';
51              
52              
53             =head1 METHODS & ATTRIBUTES
54              
55             =head2 Arguments for L<ETL::Pipeline/output>
56              
57             =head3 key
58              
59             Optional. If you want to store the records in a hash, then this is the field
60             name whose value becomes the key. When set, records go into L</hash>.
61              
62             If you don't specify a B<key>, then records are stored in an unsorted array -
63             L</list>.
64              
65             =cut
66              
67             has 'key' => (
68             default => '',
69             is => 'ro',
70             isa => 'Str',
71             );
72              
73              
74             =head2 Attributes
75              
76             =head3 hash
77              
78             Hash reference used when L</key> is set. The key is the value of the field
79             identified by L</key>. The value is an array reference. The array contains all
80             of the records with that same key.
81              
82             =cut
83              
84             has 'hash' => (
85             default => sub { {} },
86             handles => {
87             _add_to_id => 'set',
88             number_of_ids => 'count',
89             with_id => 'get',
90             },
91             is => 'ro',
92             isa => 'HashRef[ArrayRef[HashRef[Any]]]',
93             traits => [qw/Hash/],
94             );
95              
96              
97             =head3 list
98              
99             B<list> is an array reference that stores records. The records are saved in
100             same order as they are read from the input source. Each list element is a
101             hash reference (the record).
102              
103             B<list> always has a complete set of records, whether L</key> is set or not.
104              
105             =cut
106              
107             has 'list' => (
108             default => sub { [] },
109             handles => {
110             _add_record => 'push',
111             number_of_records => 'count',
112             records => 'elements',
113             },
114             is => 'ro',
115             isa => 'ArrayRef[HashRef[Any]]',
116             traits => [qw/Array/],
117             );
118              
119              
120             =head2 Methods
121              
122             =head3 close
123              
124             This method doesn't do anything. There's nothing to close or shut down.
125              
126             =cut
127              
128       2 1   sub close {}
129              
130              
131             =head3 number_of_ids
132              
133             Count of unique identifiers. This may not be the same as the number of records.
134             One key may have multiple records.
135              
136             B<number_of_ids> only works if the L</key> attribute was set.
137              
138              
139             =cut
140              
141             # This method is defined by the "hash" attribute.
142              
143              
144             =head3 number_of_records
145              
146             Count of records currently in storage.
147              
148             =cut
149              
150             # This method is defined by the "list" attribute.
151              
152              
153             =head3 open
154              
155             This method doesn't do anything. There's nothing to open or setup.
156              
157             =cut
158              
159       2 1   sub open {}
160              
161              
162             =head3 records
163              
164             Returns a list of all the records currently in storage. The list contains hash
165             references - one reference for each record.
166              
167             =cut
168              
169             # This method is defined by the "list" attribute.
170              
171              
172             =head3 with_id
173              
174             B<with_id> returns a list of records for a given key. Pass in a value for the
175             key and B<with_id> returns an array reference of records.
176              
177             B<with_id> only works if the L</key> attribute was set.
178              
179             =cut
180              
181             # This method is defined by the "hash" attribute.
182              
183              
184             =head3 write
185              
186             Save the current record into memory. Your script can access the records after
187             calling L<ETL::Pipeline/process> like this - C<$etl->output->records>.
188             Both L</records> and L</with_id> can be used.
189              
190             If L</key> is set, B<write> saves the record in both L</hash> and L</list>.
191             We're storing a reference, not a copy, so there's very little cost. And it
192             allows methods such as L</number_of_records> to work.
193              
194             B<WARNING:> This method stores a I<reference> to the original record. If the
195             input source re-uses the hash or embedded references, it will update all of the
196             currently stored values too. B<ETL::Pipeline::Output::Memory> does not make a
197             copy.
198              
199             =cut
200              
201             sub write {
202 4     4 1 8 my ($self, $etl, $record) = @_;
203 4         92 my $key = $self->key;
204              
205             # Key field = hash
206             # No key field = list
207 4         5 my $list;
208 4 100       11 if (hascontent( $key )) {
209             # NULL is an invalid key. Empty strings are okay, though. If the data
210             # has NULLs, then your script should translate them.
211 2         17 my $id = $record->{$key};
212 2 50       5 return $etl->log( 'ERROR', "The field '$key' was undefined" )
213             unless defined $id;
214              
215 2         65 $list = $self->with_id( $id );
216 2 50       4 unless (defined $list) {
217 2         5 $list = [];
218 2         60 $self->_add_to_id( $id, $list );
219             }
220 2         4 push @$list, $record;
221             }
222 4         164 $self->_add_record( $record );
223             }
224              
225              
226             =head1 SEE ALSO
227              
228             L<ETL::Pipeline>, L<ETL::Pipeline::Output>
229              
230             =cut
231              
232             with 'ETL::Pipeline::Output';
233              
234              
235             =head1 AUTHOR
236              
237             Robert Wohlfarth <robert.j.wohlfarth@vumc.org>
238              
239             =head1 LICENSE
240              
241             Copyright 2021 (c) Vanderbilt University
242              
243             This program is free software; you can redistribute it and/or modify it under
244             the same terms as Perl itself.
245              
246             =cut
247              
248 1     1   8 no Moose;
  1         2  
  1         7  
249             __PACKAGE__->meta->make_immutable;