File Coverage

blib/lib/ETL/Pipeline/Output.pm
Criterion Covered Total %
statement 6 6 100.0
branch n/a
condition n/a
subroutine 3 3 100.0
pod 1 1 100.0
total 10 10 100.0


line stmt bran cond sub pod time code
1             =pod
2              
3             =head1 NAME
4              
5             ETL::Pipeline::Output - Role for ETL::Pipeline output destinations
6              
7             =head1 SYNOPSIS
8              
9             use Moose;
10             with 'ETL::Pipeline::Output';
11              
12             sub write_record {
13             # Add code to save your data here
14             ...
15             }
16              
17             =head1 DESCRIPTION
18              
19             L<ETL::Pipeline> reads data from an input source, transforms it, and writes
20             the information to an output destination. This role defines the required
21             methods and attributes for output destinations. Every output destination
22             B<must> implement B<ETL::Pipeline::Output>.
23              
24             L<ETL::Pipeline> works by calling the methods defined in this role. The role
25             presents a common interface. It works as a shim, tying database or file access
26             modules with L<ETL::Pipeline>. For example, SQL databases may use L<DBI> or
27             L<DBIx::Class>.
28              
29             =head2 Adding a new output destination
30              
31             While L<ETL::Pipeline> provides a couple generic output destinations, the real
32             value of L<ETL::Pipeline> comes from adding your own, business specific,
33             destinations...
34              
35             =over
36              
37             =item 1. Create a Perl module. Name it C<ETL::Pipeline::Output::...>.
38              
39             =item 2. Make it a Moose object: C<use Moose;>.
40              
41             =item 3. Include the role: C<with 'ETL::Pipeline::Output';>.
42              
43             =item 4. Add the L</write_record> method: C<sub write_record { ... }>.
44              
45             =item 5. Add the L</set> method: C<sub set { ... }>.
46              
47             =item 6. Add the L</new_record> method: C<sub new_record { ... }>.
48              
49             =item 7. Add the L</configure> method: C<sub configure { ... }>.
50              
51             =item 8. Add the L</finish> method: C<sub finish { ... }>.
52              
53             =back
54              
55             Ta-da! Your output destination is ready to use:
56              
57             $etl->output( 'YourNewDestination' );
58              
59             =head2 Provided out of the box
60              
61             L<ETL::Pipeline> comes with a couple of generic output destinations...
62              
63             =over
64              
65             =item L<ETL::Pipeline::Output::Hash>
66              
67             Stores records in a Perl hash. Useful for loading support files and tying
68             them together later.
69              
70             =item L<ETL::Pipeline::Output::Perl>
71              
72             Executes a subroutine against the record. Useful for debugging data issues.
73              
74             =back
75              
76             =cut
77              
78             package ETL::Pipeline::Output;
79 9     9   5057 use Moose::Role;
  9         12  
  9         55  
80              
81              
82             our $VERSION = '2.00';
83              
84              
85             =head1 METHODS & ATTRIBUTES
86              
87             =head3 pipeline
88              
89             B<pipeline> returns the L<ETL::Pipeline> object using this input source. You
90             can access information about the pipeline inside the methods.
91              
92             L<ETL::Pipeline/input> automatically sets this attribute.
93              
94             =cut
95              
96             has 'pipeline' => (
97             is => 'ro',
98             isa => 'ETL::Pipeline',
99             required => 1,
100             );
101              
102              
103             =head2 Arguments for L<ETL::Pipeline/output>
104              
105             B<Note:> This role defines no attributes that are set with the
106             L<ETL::Pipeline/output> command. Each child class defines its own options.
107              
108             =head2 Called from L<ETL::Pipeline/process>
109              
110             =head3 set
111              
112             B<set> temporarily saves the value of an individual output field.
113             L</write_record> will later copy these values to the correct destination.
114              
115             L<ETL::Pipeline/process> calls B<set> inside of a loop - once for each field.
116             B<set> accepts two parameters:
117              
118             =over
119              
120             =item 1. The output field name.
121              
122             =item 2. The value for that field.
123              
124             =back
125              
126             There is no return value.
127              
128             =head4 Couldn't you just use a hash?
129              
130             B<set> allows your output destination to choose the in-memory storage that
131             best fits. This might be a hash, a list, or an object of some type. B<set>
132             merely provides a common interface for L<ETL::Pipeline>.
133              
134             =cut
135              
136             requires 'set';
137              
138              
139             =head3 write_record
140              
141             B<write_record> sends the current record to its final destination.
142             L<ETL::Pipeline/process> calls this method once for each record.
143             B<write_record> is the I<last> thing done with this record.
144              
145             B<write_record> returns a boolean flag. A I<true> value means success saving
146             the record. A I<false> value indicates an error.
147              
148             When your code encounters an error, call the L</error> method like this...
149              
150             return $self->error( 'Error message here' );
151              
152             L</error> returns a false value. The default L</error> does nothing. To save
153             errors, override L</error> and add the new functionality. When overriding
154             L</error>, it is not necessary to return anything. B<ETL::Pipeline::Output>
155             ensures that L</error> I<always> returns false.
156              
157             For fatal errors, use the C<croak> command from L<Carp> instead.
158              
159             =cut
160              
161             requires 'write_record';
162              
163              
164             =head3 new_record
165              
166             Start a brand new, clean record. L</write_record> automatically calls
167             B<new_record>, every time, after L</write_record> finishes. This means that
168             even if the save failed, L</write_record> still calls B<new_record>. The
169             original record with the error is lost.
170              
171             =cut
172              
173             requires 'new_record';
174              
175             after 'configure' => sub { shift->new_record };
176             after 'write_record' => sub { shift->new_record };
177              
178              
179             =head3 configure
180              
181             B<configure> prepares the output destination. It can open files, make database
182             connections, or anything else required before saving the first record.
183              
184             Why not do this in the class constructor? Some roles add automatic
185             configuration. Those roles use the usual Moose method modifiers, which would
186             not work with the constructor.
187              
188             This B<configure> - for the output destination - is called I<after> the
189             L<ETL::Pipeline::Input/configure> of the input source. This method can expect
190             that the input source is fully configured and ready for use.
191              
192             =cut
193              
194             requires 'configure';
195              
196              
197             =head3 finish
198              
199             B<finish> shuts down the output destination. It can close files, disconnect
200             from the database, or anything else required to cleanly terminate the output.
201              
202             Why not do this in the class destructor? Some roles add automatic functionality
203             via Moose method modifiers. This would not work with a destructor.
204              
205             This B<finish> - for the output destination - is called I<before> the
206             L<ETL::Pipeline::Input/finish> of the input source. This method should expect
207             that the input source has reached end-of-file by this point, but is not
208             closed yet.
209              
210             =cut
211              
212             requires 'finish';
213              
214              
215             =head2 Other methods and attributes
216              
217             =head3 record_number
218              
219             The B<record_number> attribute tells you how many total records have been
220             saved by L</write_record>. The first record is always B<1>.
221              
222             B<ETL::Pipeline::Output> automatically increments the counter after
223             L</write_record>. The L</write_record> method should not change
224             B<record_number>.
225              
226             =head3 decrement_record_number
227              
228             This method decreases L</record_number> by one. It can be used to I<back out>
229             header records from the count.
230              
231             =head3 increment_record_number
232              
233             This method increases L</record_number> by one.
234              
235             =cut
236              
237             has 'record_number' => (
238             default => '0',
239             handles => {
240             decrement_record_number => 'dec',
241             increment_record_number => 'inc',
242             },
243             is => 'ro',
244             isa => 'Int',
245             traits => [qw/Counter/],
246             );
247              
248             around 'write_record' => sub {
249             my $original = shift;
250             my $self = shift;
251              
252             my $result = $self->$original( @_ );
253             $self->increment_record_number if $result;
254             return $result;
255             };
256              
257              
258             =head3 error
259              
260             B<error> handles errors from L</write_record>. The default B<error> discards
261             any error messages. Override B<error> if you want to capture the messages
262             and/or the record that caused it.
263              
264             B<error> I<always> returns a false value - even if you override it.
265              
266             =cut
267              
268       1 1   sub error {}
269              
270             around 'error' => sub {
271             my $original = shift;
272             my $self = shift;
273              
274             $self->$original( @_ );
275             return 0;
276             };
277              
278              
279             =head1 SEE ALSO
280              
281             L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<ETL::Pipeline::Output::Hash>,
282             L<ETL::Pipeline::Output::Perl>, L<ETL::Pipeline::Output::UnitTest>
283              
284             =head1 AUTHOR
285              
286             Robert Wohlfarth <robert.j.wohlfarth@vanderbilt.edu>
287              
288             =head1 LICENSE
289              
290             Copyright 2016 (c) Vanderbilt University
291              
292             This program is free software; you can redistribute it and/or modify it under
293             the same terms as Perl itself.
294              
295             =cut
296              
297 9     9   20027 no Moose;
  9         14  
  9         48  
298              
299             # Required by Perl to load the module.
300             1;