line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=pod |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
ETL::Pipeline::Output - Role for ETL::Pipeline output destinations |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Moose; |
10
|
|
|
|
|
|
|
with 'ETL::Pipeline::Output'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub write_record { |
13
|
|
|
|
|
|
|
# Add code to save your data here |
14
|
|
|
|
|
|
|
... |
15
|
|
|
|
|
|
|
} |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 DESCRIPTION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
L<ETL::Pipeline> reads data from an input source, transforms it, and writes |
20
|
|
|
|
|
|
|
the information to an output destination. This role defines the required |
21
|
|
|
|
|
|
|
methods and attributes for output destinations. Every output destination |
22
|
|
|
|
|
|
|
B<must> implement B<ETL::Pipeline::Output>. |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
L<ETL::Pipeline> works by calling the methods defined in this role. The role |
25
|
|
|
|
|
|
|
presents a common interface. It works as a shim, tying database or file access |
26
|
|
|
|
|
|
|
modules with L<ETL::Pipeline>. For example, SQL databases may use L<DBI> or |
27
|
|
|
|
|
|
|
L<DBIx::Class>. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head2 Adding a new output destination |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
While L<ETL::Pipeline> provides a couple generic output destinations, the real |
32
|
|
|
|
|
|
|
value of L<ETL::Pipeline> comes from adding your own, business specific, |
33
|
|
|
|
|
|
|
destinations... |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=over |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=item 1. Create a Perl module. Name it C<ETL::Pipeline::Output::...>. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=item 2. Make it a Moose object: C<use Moose;>. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=item 3. Include the role: C<with 'ETL::Pipeline::Output';>. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=item 4. Add the L</write_record> method: C<sub write_record { ... }>. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=item 5. Add the L</set> method: C<sub set { ... }>. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=item 6. Add the L</new_record> method: C<sub new_record { ... }>. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=item 7. Add the L</configure> method: C<sub configure { ... }>. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=item 8. Add the L</finish> method: C<sub finish { ... }>. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=back |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Ta-da! Your output destination is ready to use: |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
$etl->output( 'YourNewDestination' ); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 Provided out of the box |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
L<ETL::Pipeline> comes with a couple of generic output destinations... |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=over |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item L<ETL::Pipeline::Output::Hash> |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Stores records in a Perl hash. Useful for loading support files and tying |
68
|
|
|
|
|
|
|
them together later. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=item L<ETL::Pipeline::Output::Perl> |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Executes a subroutine against the record. Useful for debugging data issues. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=back |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
package ETL::Pipeline::Output; |
79
|
9
|
|
|
9
|
|
5057
|
use Moose::Role; |
|
9
|
|
|
|
|
12
|
|
|
9
|
|
|
|
|
55
|
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
our $VERSION = '2.00'; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 METHODS & ATTRIBUTES |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=head3 pipeline |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
B<pipeline> returns the L<ETL::Pipeline> object using this input source. You |
90
|
|
|
|
|
|
|
can access information about the pipeline inside the methods. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
L<ETL::Pipeline/input> automatically sets this attribute. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=cut |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
has 'pipeline' => ( |
97
|
|
|
|
|
|
|
is => 'ro', |
98
|
|
|
|
|
|
|
isa => 'ETL::Pipeline', |
99
|
|
|
|
|
|
|
required => 1, |
100
|
|
|
|
|
|
|
); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 Arguments for L<ETL::Pipeline/output> |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
B<Note:> This role defines no attributes that are set with the |
106
|
|
|
|
|
|
|
L<ETL::Pipeline/output> command. Each child class defines its own options. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head2 Called from L<ETL::Pipeline/process> |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head3 set |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
B<set> temporarily saves the value of an individual output field. |
113
|
|
|
|
|
|
|
L</write_record> will later copy these values to the correct destination. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
L<ETL::Pipeline/process> calls B<set> inside of a loop - once for each field. |
116
|
|
|
|
|
|
|
B<set> accepts two parameters: |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=over |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=item 1. The output field name. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item 2. The value for that field. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=back |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
There is no return value. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head4 Couldn't you just use a hash? |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
B<set> allows your output destination to choose the in-memory storage that |
131
|
|
|
|
|
|
|
best fits. This might be a hash, a list, or an object of some type. B<set> |
132
|
|
|
|
|
|
|
merely provides a common interface for L<ETL::Pipeline>. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
requires 'set'; |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head3 write_record |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
B<write_record> sends the current record to its final destination. |
142
|
|
|
|
|
|
|
L<ETL::Pipeline/process> calls this method once for each record. |
143
|
|
|
|
|
|
|
B<write_record> is the I<last> thing done with this record. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
B<write_record> returns a boolean flag. A I<true> value means success saving |
146
|
|
|
|
|
|
|
the record. A I<false> value indicates an error. |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
When your code encounters an error, call the L</error> method like this... |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
return $self->error( 'Error message here' ); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
L</error> returns a false value. The default L</error> does nothing. To save |
153
|
|
|
|
|
|
|
errors, override L</error> and add the new functionality. When overriding |
154
|
|
|
|
|
|
|
L</error>, it is not necessary to return anything. B<ETL::Pipeline::Output> |
155
|
|
|
|
|
|
|
ensures that L</error> I<always> returns false. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
For fatal errors, use the C<croak> command from L<Carp> instead. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=cut |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
requires 'write_record'; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head3 new_record |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Start a brand new, clean record. L</write_record> automatically calls |
167
|
|
|
|
|
|
|
B<new_record>, every time, after L</write_record> finishes. This means that |
168
|
|
|
|
|
|
|
even if the save failed, L</write_record> still calls B<new_record>. The |
169
|
|
|
|
|
|
|
original record with the error is lost. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
requires 'new_record'; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
after 'configure' => sub { shift->new_record }; |
176
|
|
|
|
|
|
|
after 'write_record' => sub { shift->new_record }; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head3 configure |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
B<configure> prepares the output destination. It can open files, make database |
182
|
|
|
|
|
|
|
connections, or anything else required before saving the first record. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Why not do this in the class constructor? Some roles add automatic |
185
|
|
|
|
|
|
|
configuration. Those roles use the usual Moose method modifiers, which would |
186
|
|
|
|
|
|
|
not work with the constructor. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
This B<configure> - for the output destination - is called I<after> the |
189
|
|
|
|
|
|
|
L<ETL::Pipeline::Input/configure> of the input source. This method can expect |
190
|
|
|
|
|
|
|
that the input source is fully configured and ready for use. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=cut |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
requires 'configure'; |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=head3 finish |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
B<finish> shuts down the output destination. It can close files, disconnect |
200
|
|
|
|
|
|
|
from the database, or anything else required to cleanly terminate the output. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Why not do this in the class destructor? Some roles add automatic functionality |
203
|
|
|
|
|
|
|
via Moose method modifiers. This would not work with a destructor. |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
This B<finish> - for the output destination - is called I<before> the |
206
|
|
|
|
|
|
|
L<ETL::Pipeline::Input/finish> of the input source. This method should expect |
207
|
|
|
|
|
|
|
that the input source has reached end-of-file by this point, but is not |
208
|
|
|
|
|
|
|
closed yet. |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=cut |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
requires 'finish'; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head2 Other methods and attributes |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head3 record_number |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
The B<record_number> attribute tells you how many total records have been |
220
|
|
|
|
|
|
|
saved by L</write_record>. The first record is always B<1>. |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
B<ETL::Pipeline::Output> automatically increments the counter after |
223
|
|
|
|
|
|
|
L</write_record>. The L</write_record> method should not change |
224
|
|
|
|
|
|
|
B<record_number>. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head3 decrement_record_number |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
This method decreases L</record_number> by one. It can be used to I<back out> |
229
|
|
|
|
|
|
|
header records from the count. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head3 increment_record_number |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
This method increases L</record_number> by one. |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=cut |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
has 'record_number' => ( |
238
|
|
|
|
|
|
|
default => '0', |
239
|
|
|
|
|
|
|
handles => { |
240
|
|
|
|
|
|
|
decrement_record_number => 'dec', |
241
|
|
|
|
|
|
|
increment_record_number => 'inc', |
242
|
|
|
|
|
|
|
}, |
243
|
|
|
|
|
|
|
is => 'ro', |
244
|
|
|
|
|
|
|
isa => 'Int', |
245
|
|
|
|
|
|
|
traits => [qw/Counter/], |
246
|
|
|
|
|
|
|
); |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
around 'write_record' => sub { |
249
|
|
|
|
|
|
|
my $original = shift; |
250
|
|
|
|
|
|
|
my $self = shift; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
my $result = $self->$original( @_ ); |
253
|
|
|
|
|
|
|
$self->increment_record_number if $result; |
254
|
|
|
|
|
|
|
return $result; |
255
|
|
|
|
|
|
|
}; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=head3 error |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
B<error> handles errors from L</write_record>. The default B<error> discards |
261
|
|
|
|
|
|
|
any error messages. Override B<error> if you want to capture the messages |
262
|
|
|
|
|
|
|
and/or the record that caused it. |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
B<error> I<always> returns a false value - even if you override it. |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=cut |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
1
|
1
|
|
sub error {} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
around 'error' => sub { |
271
|
|
|
|
|
|
|
my $original = shift; |
272
|
|
|
|
|
|
|
my $self = shift; |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
$self->$original( @_ ); |
275
|
|
|
|
|
|
|
return 0; |
276
|
|
|
|
|
|
|
}; |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=head1 SEE ALSO |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<ETL::Pipeline::Output::Hash>, |
282
|
|
|
|
|
|
|
L<ETL::Pipeline::Output::Perl>, L<ETL::Pipeline::Output::UnitTest> |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=head1 AUTHOR |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
Robert Wohlfarth <robert.j.wohlfarth@vanderbilt.edu> |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=head1 LICENSE |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
Copyright 2016 (c) Vanderbilt University |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
293
|
|
|
|
|
|
|
the same terms as Perl itself. |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=cut |
296
|
|
|
|
|
|
|
|
297
|
9
|
|
|
9
|
|
20027
|
no Moose; |
|
9
|
|
|
|
|
14
|
|
|
9
|
|
|
|
|
48
|
|
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# Required by Perl to load the module. |
300
|
|
|
|
|
|
|
1; |