File Coverage

blib/lib/Text/CSV/Merge.pm
Criterion Covered Total %
statement 31 68 45.5
branch 1 22 4.5
condition 0 12 0.0
subroutine 10 14 71.4
pod 2 2 100.0
total 44 118 37.2


line stmt bran cond sub pod time code
1             package Text::CSV::Merge;
2             {
3             $Text::CSV::Merge::VERSION = '0.05';
4             }
5             # ABSTRACT: Fill in gaps in a CSV file from another CSV file
6              
7 3     3   75005 use Modern::Perl '2010';
  3         81419  
  3         21  
8 3     3   60199 use autodie;
  3         68835  
  3         20  
9 3     3   28793 use utf8;
  3         44  
  3         18  
10              
11 3     3   4967 use Moo 1.001000;
  3         52649  
  3         40  
12 3     3   7402 use Carp;
  3         7  
  3         250  
13 3     3   15 use IO::File;
  3         8  
  3         484  
14 3     3   8657 use Text::CSV_XS;
  3         37682  
  3         318  
15 3     3   40558 use DBI; # for DBD::CSV
  3         74514  
  3         292  
16 3     3   4286 use Log::Dispatch;
  3         48265  
  3         5053  
17              
18              
19             has +logger => (
20             is => 'lazy',
21             builder => sub {
22 0     0   0 Log::Dispatch->new(
23             outputs => [
24             [ 'File', autoflush => 1, min_level => 'debug', filename => 'merge.log', newline => 1, mode => '>>' ],
25             #[ 'Screen', min_level => 'info', newline => 1 ],
26             ],
27             );
28             }
29             );
30              
31             has +csv_parser => (
32             is => 'lazy',
33             builder => sub {
34 0 0   0   0 Text::CSV_XS->new({ binary => 1, eol => $/ })
35             or croak("Cannot use module Text::CSV_XS: " . Text::CSV_XS->error_diag());
36             }
37             );
38              
39             has +dbh => (
40             is => 'lazy',
41             # must return only a code ref
42             builder => sub {
43 0 0   0   0 DBI->connect("dbi:CSV:", undef, undef, {
44             RaiseError => 1,
45             PrintError => 1,
46             f_ext => ".csv/r",
47             # Better performance with XS
48             csv_class => "Text::CSV_XS",
49             # csv_null => 1,
50             }) or croak("Cannot connect to CSV file via DBI: $DBI::errstr");
51             }
52             );
53              
54             has base_file => (
55             is => 'rw',
56             required => 1,
57             #allow external names to be different from class attribute
58             init_arg => 'base',
59             #validate it
60             #isa => sub {},
61             coerce => sub {
62             my $base_fh = IO::File->new( $_[0], '<' ) or croak("Open file: '$_[0]' failed: $!");
63             $base_fh->binmode(":utf8");
64            
65             return $base_fh;
66             }
67             );
68              
69             has merge_file => (
70             # We use only the raw file name/path and do not create a FH here, unlike base_file().
71             is => 'rw',
72             init_arg => 'merge',
73             required => 1
74             );
75              
76             has output_file => (
77             is => 'rw',
78             init_arg => 'output',
79             # an output file name is NOT required
80             required => 0,
81             default => 'merged_output.csv',
82             coerce => sub {
83             my $output_fh = IO::File->new( $_[0], '>' ) or croak("Open file: '$_[0]' failed: $!");
84             $output_fh->binmode(":utf8");
85            
86             return $output_fh;
87             }
88             );
89              
90             has columns=> (
91             is => 'rw',
92             required => 1,
93             );
94              
95             has search_field => (
96             is => 'rw',
97             required => 1,
98             init_arg => 'search'
99             #,
100             #isa => sub {
101             # validate that search_field is one of the columns in the base file
102             #die "Search parameter: '$_[0]' is not one of the columns: @{$self->columns}";
103             # unless ( $_[0] ~~ @{$self->columns} );
104             #}
105             );
106              
107             has first_row_is_headers => (
108             is => 'rw',
109             required => 1,
110             #validate it
111             isa => sub {
112             # @TODO: there's got to be a better way to do this!
113             croak("Option 'first_row_is_headers' must be 1 or 0") unless ( $_[0] =~ m{'1'|'0'}x || $_[0] == 1 || $_[0] == 0 );
114             },
115             );
116              
117             sub merge {
118 0     0 1 0 my $self = shift;
119              
120             # validate that search_field is one of the columns in the base file
121 0         0 croak("Search parameter: '$self->search_field' is not one of the columns: @{$self->columns}")
  0         0  
122 0 0       0 unless ( scalar(grep { $_ eq $self->search_field } @{$self->columns}) );
  0         0  
123             # Use scalar() to force grep to return the number of matches;
124             # 0 -> false for the 'unless' statement.
125            
126 0         0 $self->csv_parser->column_names( $self->columns );
127            
128             # Loop through the base file to find missing data
129             #@TODO: make into $self->rows?
130 0         0 my @rows;
131            
132 0         0 while ( my $row = $self->csv_parser->getline_hr( $self->base_file ) ) {
133             # skip column names
134 0 0 0     0 next if ($. == 1 and $self->first_row_is_headers);
135              
136 0 0       0 if ( $self->csv_parser->parse($row) ) {
137             # keep a list of null columns in this row
138 0         0 my @nulls;
139              
140             # might be slightly more efficient to use while()
141 0         0 foreach my $key ( keys %{$row} ) {
  0         0  
142             # which fields is this row missing?
143 0 0 0     0 if ( $row->{$key} eq 'NULL' or $row->{$key} eq "" ) {
144 0         0 push @nulls, $key;
145              
146 0         0 $self->logger->info("Missing data in column: $key for '$row->{$self->search_field}'");
147             }
148             }
149              
150             # make a hash of arrays
151 0 0       0 if ( @nulls ) {
152             # search $merge_file for the missing data's row
153 0         0 local $" = ','; # reset the list separator for array interpolation to suit SQL
154            
155             # To get the original case for the columns, specify the column
156             # names rather than using SELECT *, since it normalizes to
157             # lowercase, per:
158             # http://stackoverflow.com/questions/3350775/dbdcsv-returns-header-in-lower-case
159 0 0       0 my $sth = $self->dbh->prepare(
160 0         0 "select @{$self->columns} from $self->{merge_file} where $self->{search_field} = ?"
161             ) or croak("Cannot prepare DBI statement: " . $self->dbh->errstr ());
162              
163 0         0 $sth->execute($row->{$self->search_field});
164            
165 0         0 while ( my $filler = $sth->fetchrow_hashref() ) {
166 0         0 foreach my $gap ( @nulls ) {
167 0 0 0     0 if (exists $filler->{$gap} and defined $filler->{$gap} and $filler->{$gap} ne "") {
      0        
168 0         0 $self->logger->info(
169             "Found Data: '$gap' = '$filler->{$gap}' for '$row->{$self->search_field}'"
170             );
171            
172 0         0 $row->{$gap} = $filler->{$gap};
173             } else {
174 0         0 $self->logger->info(
175             "Data not Found for column: '$gap' for '$row->{$self->search_field}' $self->{merge_file}"
176             );
177             }
178             }
179             }
180            
181             # Be efficient and neat!
182 0         0 $sth->finish();
183             }
184            
185             # insert the updated row as a reference; even if not processed, the
186             # row will still appear in the final output.
187 0         0 push @rows, $row;
188             } else {
189 0         0 my $err = $self->csv_parser->error_input;
190 0         0 $self->logger->error("Failed to parse line: $err");
191             }
192             }
193              
194             # Ensure we've processed to the end of the file
195 0 0       0 $self->csv_parser->eof or $self->csv_parser->error_diag();
196              
197             # print does NOT want an actual array! Use a hash slice, instead:
198             #$self->csv_parser->print($output_fh, [ @$_{@columns} ]) for @rows;
199             #
200             # Or, here I've switched to Text::CSV_XS's specific print_hr(), which
201             # is simply missing from the PP (Pure Perl) version.
202 0         0 $self->csv_parser->print_hr($self->output_file, $_) for @rows;
203            
204 0         0 return 1;
205             };
206              
207             sub DEMOLISH {
208 1     1 1 3050 my $self = shift;
209              
210             ## Clean up!
211 1         8 $self->base_file->close();
212 1 50       674 $self->output_file->close() or croak("Close 'output.csv' failed: $!");
213            
214 1         757 return;
215             }
216              
217              
218             1;
219              
220             __END__