File Coverage

blib/lib/ETL/Pipeline/Input/FileListing.pm
Criterion Covered Total %
statement 45 45 100.0
branch 10 14 71.4
condition n/a
subroutine 11 11 100.0
pod 3 4 75.0
total 69 74 93.2


line stmt bran cond sub pod time code
1             =pod
2              
3             =head1 NAME
4              
5             ETL::Pipeline::Input::FileListing - Input source of a disk folder
6              
7             =head1 SYNOPSIS
8              
9             use ETL::Pipeline;
10             ETL::Pipeline->new( {
11             input => ['FileListing', from => 'Documents', name => qr/\.jpg$/i],
12             mapping => {FileName => 'File', FullPath => 'Path'},
13             output => ['UnitTest']
14             } )->process;
15              
16             =head1 DESCRIPTION
17              
18             B<ETL::Pipeline::Input::FileListing> defines an input source that reads a disk
19             directory. It returns information about each individual file. Use this input
20             source when you need information I<about> the files and not their content.
21              
22             =cut
23              
24             package ETL::Pipeline::Input::FileListing;
25 1     1   4 use Moose;
  1         1  
  1         7  
26              
27 1     1   4875 use 5.014000;
  1         2  
28 1     1   3 use Carp;
  1         1  
  1         74  
29 1     1   4 use MooseX::Types::Path::Class qw/Dir/;
  1         1  
  1         17  
30 1     1   837 use Path::Class;
  1         2  
  1         53  
31 1     1   6 use Path::Class::Rule;
  1         6  
  1         578  
32              
33              
34             our $VERSION = '2.00';
35              
36              
37             =head1 METHODS & ATTRIBUTES
38              
39             =head2 Arguments for L<ETL::Pipeline/input>
40              
41             =head3 from
42              
43             B<from> tells B<ETL::Pipeline::Input::FileListing> where to find the files. By
44             default, B<ETL::Pipeline::Input::FileListing> looks in
45             L<ETL::Pipeline/data_in>. B<from> tells the code to look in another place.
46              
47             If B<from> is a regular expression, the code finds the first directory whose
48             name matches. If B<from> is a relative path, it is expected to reside under
49             L<ETL::Pipeline/data_in>. An absolute path is exact.
50              
51             =cut
52              
53             has 'from' => (
54             init_arg => undef,
55             is => 'bare',
56             isa => Dir,
57             reader => '_get_from',
58             writer => '_set_from',
59             );
60              
61              
62             sub from {
63 52     52 1 2796 my $self = shift;
64              
65 52 100       113 if (scalar( @_ ) > 0) {
66 4         3 my $new = shift;
67 4 100       8 if (ref( $new ) eq 'Regexp') {
68 1         7 my $match = Path::Class::Rule->new
69             ->iname( $new )
70             ->max_depth( 1 )
71             ->directory
72             ->iter( $self->pipeline->data_in )
73             ->()
74             ;
75 1 50       1438 croak 'No matching directories' unless defined $match;
76 1         36 $self->_set_from( $match );
77             } else {
78 3         14 my $folder = dir( $new );
79 3 50       149 $folder = $folder->absolute( $self->pipeline->data_in )
80             if $folder->is_relative;
81 3         427 $self->_set_from( $folder );
82             }
83             }
84 52         1592 return $self->_get_from;
85             }
86              
87              
88             =head3 ...
89              
90             B<ETL::Pipeline::Input::FileListing> accepts any of the tests provided by
91             L<Path::Iterator::Rule>. The value of the argument is passed directly into the
92             test. For boolean tests (e.g. readable, exists, etc.), pass an C<undef> value.
93              
94             B<ETL::Pipeline::Input::FileListing> automatically applies the C<file> filter.
95             Do not pass C<file> through L<ETL::Pipeline/input>.
96              
97             C<name> is the most commonly used argument. It accepts a glob or regular
98             expression to match file names.
99              
100             =cut
101              
102             sub BUILD {
103 4     4 0 5 my $self = shift;
104 4         3 my $arguments = shift;
105              
106             # Set the top level directory.
107 4 100       8 if (defined $arguments->{from}) {
108 2         6 $self->from( $arguments->{from} );
109 2         4 } else { $self->from( '.' ); }
110              
111             # Configure the file search.
112             my @criteria = grep {
113 4 50       14 $_ ne 'file'
  7         110  
114             && !$self->meta->has_attribute( $_ )
115             } keys %$arguments;
116 4         103 my $search = Path::Class::Rule->new;
117 4         27 foreach my $name (@criteria) {
118 1         2 my $value = $arguments->{$name};
119 1         51 eval "\$search->$name( \$value )";
120 1 50       47 croak $@ unless $@ eq '';
121             }
122 4         12 $search->file;
123 4         73 $self->_set_iterator( $search->iter( $self->from ) );
124             }
125              
126              
127             =head2 Called from L<ETL::Pipeline/process>
128              
129             =head3 get
130              
131             B<get> retrieves one field about the currently selected file. B<get> can
132             also return methods from the L<Path::Class::File> object. Any additional
133             arguments for B<get> are passed directly into the method.
134              
135             # ETL::Pipeline::Input::FileListing fields.
136             $etl->get( 'Inside' );
137             $etl->get( 'File' );
138            
139             # Path::Class::File methods.
140             $etl->get( 'basename' );
141              
142             B<ETL::Pipeline::Input::FileListing> provides these fields...
143              
144             =over
145              
146             =item Extension
147              
148             The file extension, without a leading period.
149              
150             =item File
151              
152             The file name with the extension. No directory information.
153              
154             =item Folder
155              
156             The full directory where this file resides.
157              
158             =item Inside
159              
160             The relative directory name where this file resides. These are the directories
161             below L</from> where the file resides. You can use this to re-create the
162             directory structure.
163              
164             =item Path
165              
166             The complete path name of the file (directory, name, and extension). You can
167             use this to access the file contents.
168              
169             =item Relative
170              
171             The relative path name of the file. This is the part that comes after the
172             L</from> directory.
173              
174             =item Object
175              
176             The L<Path::Class::File> object for this entry.
177              
178             =back
179              
180             =cut
181              
182             sub get {
183             my ($self, $field, @arguments) = @_;
184              
185             my $record = $self->current;
186             if (exists $record->{$field}) {
187             return $record->{$field};
188             } else {
189             my $object = $record->{Object};
190             return eval "\$object->$field( \@arguments )";
191             }
192             }
193              
194              
195             =head3 next_record
196              
197             Read one record from the file for processing. B<next_record> returns a boolean.
198             I<True> means success. I<False> means it reached the end of the listing (aka
199             no more files).
200              
201             while ($input->next_record) {
202             ...
203             }
204              
205             =cut
206              
207             sub next_record {
208             my ($self) = @_;
209              
210             my $object = $self->_next_file;
211             if (defined $object) {
212             my @pieces = split( /\./, $object->basename);
213             $self->current( {
214             Extension => $pieces[-1],
215             File => $object->basename,
216             Folder => $object->dir->absolute( $self->from )->stringify,
217             Inside => $object->dir->relative( $self->from )->stringify,
218             Object => $object,
219             Path => $object->absolute( $self->from )->stringify,
220             Relative => $object->relative( $self->from )->stringify,
221             } );
222             return 1;
223             } else { return 0; }
224             }
225              
226              
227             =head3 configure
228              
229             B<configure> doesn't actually do anything. But it is required by
230             L<ETL::Pipeline/process>.
231              
232             =cut
233              
234       4 1   sub configure { }
235              
236              
237             =head3 finish
238              
239             B<finish> doesn't actually do anything. But it is required by
240             L<ETL::Pipeline/process>.
241              
242             =cut
243              
244       4 1   sub finish { }
245              
246              
247             =head2 Other Methods & Attributes
248              
249             =head3 current
250              
251             B<current> holds the current record as a hash reference.
252              
253             =cut
254              
255             has 'current' => (
256             is => 'rw',
257             isa => 'HashRef',
258             );
259              
260              
261             =head3 iterator
262              
263             L<Path::Class::Rule> creates an iterator that returns each file in turn.
264             B<iterator> holds it for L</next_record>.
265              
266             =cut
267              
268             has 'iterator' => (
269             handles => {_next_file => 'execute'},
270             is => 'ro',
271             isa => 'CodeRef',
272             traits => [qw/Code/],
273             writer => '_set_iterator',
274             );
275              
276              
277             =head1 SEE ALSO
278              
279             L<ETL::Pipeline>, L<ETL::Pipeline::Input>, L<Path::Class::File>,
280             L<Path::Class::Rule>, L<Path::Iterator::Rule>
281              
282             =cut
283              
284             with 'ETL::Pipeline::Input';
285              
286              
287             =head1 AUTHOR
288              
289             Robert Wohlfarth <robert.j.wohlfarth@vanderbilt.edu>
290              
291             =head1 LICENSE
292              
293             Copyright 2016 (c) Vanderbilt University Medical Center
294              
295             This program is free software; you can redistribute it and/or modify it under
296             the same terms as Perl itself.
297              
298             =cut
299              
300 1     1   4 no Moose;
  1         9  
  1         5  
301             __PACKAGE__->meta->make_immutable;