File Coverage

lib/CSV/Reader.pm
Criterion Covered Total %
statement 136 174 78.1
branch 61 98 62.2
condition 14 29 48.2
subroutine 14 16 87.5
pod 7 7 100.0
total 232 324 71.6


line stmt bran cond sub pod time code
1             package CSV::Reader;
2 4     4   3676 use strict;
  4         9  
  4         134  
3 4     4   19 use Carp qw(carp croak);
  4         8  
  4         224  
4 4     4   2928 use Text::CSV ();
  4         85627  
  4         109  
5 4     4   2013 use Tie::IxHash ();
  4         17795  
  4         7723  
6             our $VERSION = 1.12;
7              
8             =head1 NAME
9              
10             CSV::Reader - CSV reader class
11              
12             =head1 DESCRIPTION
13              
14             Simple CSV reader class that uses Text::CSV internally.
15             The CSV files are expected to have a header row of column names.
16             This was designed with the idea of using an iterator interface, but Perl does not support interators (nor interfaces) yet :(
17              
18             =head1 SYNOPSIS
19              
20             use CSV::Reader ();
21             use open OUT => ':locale'; # optional; make perl aware of your terminal's encoding
22              
23             # Create reader from file name:
24             my $reader = new CSV::Reader('/path/to/file.csv');
25              
26             # Create reader from a file handle (GLOB):
27             open(my $h, '<', $filename) || die("Failed to open $filename: $!");
28             # or preferred method that can handle files having a UTF-8 BOM:
29             open(my $h, '<:via(File::BOM)', $filename) || die("Failed to open $filename: $!");
30             my $reader = new CSV::Reader($h);
31              
32             # Create reader from an IO::Handle based object:
33             my $io = IO::File->new(); # subclass of IO::Handle
34             $io->open($filename, '<:via(File::BOM)') || die("Failed to open $filename: $!");
35             my $reader = new CSV::Reader($io);
36              
37             # Create reader with advanced options:
38             my $reader = new CSV::Reader('/path/to/file.csv',
39             'delimiter' => ';',
40             'enclosure' => '',
41             'field_normalizer' => sub {
42             my $nameref = shift;
43             $$nameref = lc($$nameref); # lowercase
44             $$nameref =~ s/\s/_/g; # whitespace to underscore
45             },
46             'field_aliases' => {
47             'postal_code' => 'postcode', # applied after normalization
48             },
49             'mutators' => {
50             'postcode' => sub { # if postalcode is Dutch, then make sure it has no spaces and is in uppercase.
51             my $val_ref = shift;
52             my $row_ref = shift;
53             if (defined($$val_ref) && defined($row_ref->{'country'}) && ($row_ref->{'country'} eq 'NL')) {
54             $$val_ref =~ s/\s+//;
55             $$val_ref = uc($$val_ref);
56             }
57             },
58             'has_fiber_internet' => sub { # set a default for an empty (undef) value
59             my $val_ref = shift;
60             $$val_ref //= 0;
61             },
62             },
63             );
64              
65             # Show the field names found in the header row:
66             print 'Field names: ' . join("\n", $reader->fieldNames()) . "\n";
67              
68             # Iterate over the data rows:
69             while (my $row = $reader->nextRow()) {
70             # It's recommended to validate the $row hashref first with something such as Params::Validate.
71             # Now do whatever you want with the (validated) row hashref...
72             require Data::Dumper; local $Data::Dumper::Terse = 1;
73             print Data::Dumper::Dumper($row);
74             }
75              
76             =head1 PUBLIC STATIC METHODS
77              
78             =head2 new($file, %options)
79              
80             Constructor.
81              
82             $file can be a string file name, an open file handle (GLOB), or an IO::Handle based object (e.g. IO::File or IO::Scalar).
83             If a string file name is given, then the file is opened via File::BOM.
84              
85             The following %options are supported:
86              
87             - debug: boolean, if true, then debug messages are emitted using warn().
88             - field_aliases: hashref of case insensitive alias (in file) => real name (as expected in code) pairs.
89             - field_normalizer: callback that receives a field name by reference to normalize (e.g. make lowercase).
90             - include_fields: arrayref of field names to include. If given, then all other field names are excluded.
91             - delimiter: string, default ','
92             - enclosure: string, default '"'
93             - escape: string, default backslash
94             - mutators: hashref of field name => callback($value_ref, $row_ref) pairs.
95              
96             Note: the option field_aliases is processed after the option field_normalizer if given.
97              
98             Note: the callbacks given with the mutators option are called in their key order (which is an unpredictable order unless they're tied with Tie::IxHash).
99              
100             =cut
101              
102             sub new {
103 8     8 1 19907 my $proto = shift;
104 8         16 my $file = shift;
105 8         27 my %options = @_;
106 8         63 my $self = {
107             'h' => undef, # File handle.
108             'own_h' => undef, # Does this class own the file handle.
109             'field_cols' => {}, # Hashref of fieldname => column index pairs.
110             'row' => undef, # Current ReaderRow object.
111             'linenum' => 0, # Data row index.
112             'text_csv' => undef, # The Text::CSV object
113              
114             # Options:
115             'debug' => 0,
116             'delimiter' => ',',
117             'enclosure' => '"',
118             'escape' => '\\',
119             'mutators' => undef,
120             'skip_empty_lines' => 0, # TODO: implement this
121             };
122 8         17 tie(%{$self->{'field_cols'}}, 'Tie::IxHash');
  8         42  
123              
124 8 50 33     169 unless (defined($file) && length($file)) {
125 0         0 croak('Missing $file argument');
126             }
127 8 100       25 if (ref($file)) {
128 3 50 66     27 unless ((ref($file) eq 'GLOB') || UNIVERSAL::isa($file, 'IO::Handle')) {
129 0         0 croak(ref($file) . ' is not a legal file argument type');
130             }
131 3         7 $self->{'h'} = $file;
132 3         5 $self->{'own_h'} = 0;
133             }
134             else {
135 5         9 my $h;
136 5         11 eval {
137 5         1118 require File::BOM;
138             };
139 5 50       25517 my $mode = $@ ? '<' : '<:via(File::BOM)';
140 5 50       18 $options{'debug'} && warn(__PACKAGE__ . "::new file open mode is $mode\n");
141 5 50   2   117 open($h, $mode, $file) || croak('Failed to open "' . $file . '" for reading using mode "' . $mode . '": ' . $!);
  2         13  
  2         7  
  2         13  
142 5         1916 $self->{'h'} = $h;
143 5         18 $self->{'own_h'} = 1;
144             }
145              
146             # Get the options.
147 8         26 my %opt_field_aliases;
148             my $opt_field_normalizer;
149 8         0 my %opt_include_fields;
150 8         0 my %text_csv_options; # undocumented experimental feature; text_csv_*: avoid if possible; options with this prefix are passed as is (but without prefix) to the internal Text::CSV object.
151 8 50       21 if (%options) {
152 8         52 foreach my $key (keys %options) {
153 25         47 my $value = $options{$key};
154 25 50 33     162 if (($key eq 'debug') || ($key eq 'skip_empty_lines')) {
    100 66        
    100          
    100          
    100          
    100          
    50          
    0          
155 0         0 $self->{$key} = $value;
156             }
157             elsif (($key eq 'enclosure') || ($key eq 'escape')) {
158 7 50 33     27 if (!defined($value) || ref($value)) {
159 0         0 croak("The '$key' option must be a string");
160             }
161 7         15 $self->{$key} = $value;
162             }
163             elsif ($key eq 'delimiter') {
164 7 50 33     44 if (!defined($value) || ref($value) || !length($value)) {
      33        
165 0         0 croak("The '$key' option must be a non-empty string");
166             }
167 7         30 $self->{$key} = $value;
168             }
169              
170             elsif ($key eq 'include_fields') {
171 1 50       4 if (ref($value) ne 'ARRAY') {
172 0         0 croak("The '$key' option must be an arrayref");
173             }
174 1         3 %opt_include_fields = map { $_ => undef } @$value;
  2         6  
175             }
176             elsif ($key eq 'field_aliases') {
177 8 50       20 if (ref($value) ne 'HASH') {
178 0         0 croak("The '$key' option must be a hashref");
179             }
180 8         23 %opt_field_aliases = map { lc($_) => $value->{$_} } keys %$value;
  8         49  
181             }
182             elsif ($key eq 'field_normalizer') {
183 1 50       3 if (ref($value) ne 'CODE') {
184 0         0 croak("The '$key' option must be a code ref");
185             }
186 1         2 $opt_field_normalizer = $value;
187             }
188             elsif ($key eq 'mutators') {
189 1 50       3 if (ref($value) ne 'HASH') {
190 0         0 croak("The '$key' option must be a hashref of field name => code ref pairs");
191             }
192 1         5 foreach my $name (keys %$value) {
193 2         4 my $mutator = $options{$key}->{$name};
194 2 50       4 if (defined($mutator)) {
195 2 50       7 unless (ref($mutator) eq 'CODE') {
196 0         0 croak('The mutator for "' . $name . '" must be a CODE ref');
197             }
198             }
199             }
200 1         3 $self->{$key} = $value;
201             }
202             elsif ($key =~ /^(?:Text(?::|_)CSV|text_csv)[\._:](.+)$/) {
203 0         0 $text_csv_options{$1} = $value;
204             }
205             else {
206 0         0 croak("Unknown option '$key'");
207             }
208             }
209             }
210              
211             my $text_csv = $self->{'text_csv'} = $proto->_new_text_csv_object({
212             'auto_diag' => 1,
213             'binary' => 1,
214             'blank_is_undef' => 1,
215             'empty_is_undef' => 1,
216             'sep_char' => $self->{'delimiter'},
217             'escape_char' => $self->{'escape'},
218 8   50     77 'quote_char' => $self->{'enclosure'},
219             %text_csv_options, # undocumented experimental feature; consider overriding _new_text_csv_object() instead.
220             }) || die('Method _new_text_csv_object() did not return a Text::CSV object as expected');
221              
222             # Emulate the original Text::CSV error message format but without the LF and with the caller script/module.
223 8         23 if (0 && $text_csv->can('callbacks')) { # exists since Text::CSV_XS version 1.06
224             $text_csv->callbacks(
225             'error' => sub {
226 0     0   0 my ($err, $msg, $pos, $recno, $fldno) = @_; # This is dumb because the object itself is not given.
227 0 0       0 if ($err eq '2012') { # EOF
228 0         0 return;
229             }
230             #CSV_XS ERROR: 2021 - EIQ - NL char inside quotes, binary off @ rec 10 pos 51 field 6
231             #die 'error args: ' . Data::Dumper::Dumper(\@_);
232 0         0 local $Carp::CarpInternal{'Text::CSV'} = 1;
233 0         0 local $Carp::CarpInternal{'Text::CSV_PP'} = 1;
234 0         0 local $Carp::CarpInternal{'Text::CSV_XS'} = 1;
235 0         0 carp(ref($text_csv) . " ERROR: $err - $msg \@ rec $recno pos $pos field $fldno");
236 0         0 return;
237             },
238             );
239             }
240              
241             # Read header row.
242 8 50       330 if (my $row = $self->{'text_csv'}->getline($self->{'h'})) {
243             # Get the fieldname => column indices
244 8         2685 for (my $x = 0; $x < @$row; $x++) {
245 64         784 my $name = $row->[$x];
246 64 50       109 unless(defined($name)) {
247 0         0 next;
248             }
249 64         242 $name =~ s/^\s+|\s+$//g;
250 64 50       118 unless(length($name)) {
251 0         0 next;
252             }
253 64 100       108 if ($opt_field_normalizer) {
254 8         16 &$opt_field_normalizer(\$name);
255             }
256 64 50       147 if (%opt_field_aliases) {
257 64         103 my $key = lc($name);
258 64 100       138 if (defined($opt_field_aliases{$key})) {
259 8         14 $name = $opt_field_aliases{$key};
260             }
261             }
262 64 100 100     121 if (%opt_include_fields && !exists($opt_include_fields{$name})) {
263 6         14 next;
264             }
265 58 50       200 if (exists($self->{'field_cols'}->{$name})) {
266 0         0 croak('Duplicate field "' . $name . '" detected');
267             }
268 58         368 $self->{'field_cols'}->{$name} = $x;
269             }
270 8 50       105 unless(%{$self->{'field_cols'}}) {
  8         40  
271 0 0       0 croak(%opt_include_fields ? 'No fields found in header row to include' : 'No fields found in header row');
272             }
273             # If include_fields option was given, reorder keys of field_cols to match it.
274 8 100       117 if (%opt_include_fields) {
275 1         2 my %field_cols;
276             #{$self->{'field_cols'}}
277 1         5 tie(%field_cols, 'Tie::IxHash');
278 1         50 foreach my $key (@{$options{'include_fields'}}) {
  1         4  
279 2 50       28 if (exists($self->{'field_cols'}->{$key})) {
280 2         14 $field_cols{$key} = $self->{'field_cols'}->{$key};
281             }
282             }
283 1         22 $self->{'field_cols'} = \%field_cols;
284             }
285             }
286             else {
287 0         0 croak('No header line found in CSV');
288             }
289              
290             # Check that all the required header fields are present.
291 8 100       27 if (%opt_include_fields) {
292 1         2 my @missing;
293 1         3 foreach my $name (keys %opt_include_fields) {
294 2 50       10 if (!exists($self->{'field_cols'}->{$name})) {
295 0         0 push(@missing, $name);
296             }
297             }
298 1 50       7 if (@missing) {
299 0         0 croak('The following column headers are missing: ' . join(', ', @missing));
300             }
301             }
302              
303 8   33     44 bless($self, ref($proto) || $proto);
304 8         50 return $self;
305             }
306              
307              
308              
309              
310             =head2 DESTROY
311              
312             Closes the private file handle, if any.
313              
314             =cut
315              
316             sub DESTROY {
317 8     8   5081 my $self = shift;
318 8 100       72 if ($self->{'own_h'}) {
319 5         35 close($self->{'h'});
320             }
321             }
322              
323              
324              
325              
326              
327             =head1 PROTECTED STATIC METHODS
328              
329             =head2 _new_text_csv_object($args_hashref)
330              
331             Creates and returns a new Text::CSV object using the given arguments.
332             Child classes may override this method in order to tweak the arguments
333             and/or customize the creation of the internal Text::CSV object.
334              
335             =cut
336              
337             sub _new_text_csv_object {
338 8     8   15 my $proto = shift;
339 8         12 my $args = shift; # hashref
340 8         36 my $result = Text::CSV->new($args);
341 8 50       1669 unless ($result) {
342             # handle the bad practice of constructors that return undef instead of dieing.
343 0         0 my $epitaph = 'Failed to create a Text::CSV object';
344 0         0 require Data::Dumper; local $Data::Dumper::Terse = 1; $epitaph .= ' using args ' . Data::Dumper::Dumper($args);
  0         0  
  0         0  
345 0         0 die($epitaph);
346             }
347 8         23 return $result;
348             }
349              
350              
351              
352              
353              
354             =head1 PROTECTED OBJECT METHODS
355              
356             =head2 _read()
357              
358             Reads the next CSV data row and sets internal variables.
359              
360             =cut
361              
362             sub _read {
363 36     36   52 my $self = shift;
364 36 100       1019 if (my $csv_row = $self->{'text_csv'}->getline($self->{'h'})) {
365 27 50       1226 if ($self->{'debug'}) {
366 0         0 require Data::Dumper;
367 0         0 local $Data::Dumper::Terse = 1;
368 0         0 warn(__PACKAGE__ . '::_read ' . Data::Dumper::Dumper($csv_row));
369             }
370 27         114 tie(my %row, 'Tie::IxHash');
371 27         519 my $field_cols = $self->{'field_cols'}; # name to index map
372 27         70 foreach my $k ($self->fieldNames()) {
373 198         3555 my $i = $field_cols->{$k};
374 198         1257 my $v = $csv_row->[$i];
375 198 100       354 if (defined($v)) {
376 124         353 $v =~ s/^\s+|\s+$//g;
377 124 50       227 unless(length($v)) {
378 0         0 $v = undef;
379             }
380             }
381 198         545 $row{$k} = $v;
382             }
383              
384             # Call mutators if defined
385 27 100       414 if (my $mutators = $self->{'mutators'}) { # name to coderef map
386 3         9 foreach my $k (keys %$mutators) {
387 6 50       111 if (exists($row{$k})) {
388 6 50       32 if (my $mutator = $mutators->{$k}) {
389 6         18 &$mutator(\$row{$k}, \%row);
390             }
391             }
392             }
393             }
394              
395 27         149 $self->{'row'} = \%row;
396 27         62 $self->{'linenum'}++;
397             }
398             else {
399 9         794 $self->{'row'} = undef;
400 9         23 $self->{'linenum'} = 0;
401             }
402             }
403              
404              
405              
406              
407              
408              
409             =head1 PUBLIC OBJECT METHODS
410              
411             =head2 fieldNames()
412              
413             Returns the field names as an array.
414              
415             =cut
416              
417             sub fieldNames {
418 30     30 1 1965 my $self = shift;
419 30         40 return keys(%{$self->{'field_cols'}});
  30         161  
420             }
421              
422              
423              
424              
425              
426             =head2 current()
427              
428             Returns the current row.
429              
430             =cut
431              
432             sub current {
433 0     0 1 0 my $self = shift;
434 0         0 return $self->{'row'};
435             }
436              
437              
438              
439              
440              
441             =head2 linenum()
442              
443             Returns the current row index.
444              
445             =cut
446              
447             sub linenum {
448 9     9 1 201 my $self = shift;
449 9         45 return $self->{'linenum'};
450             }
451              
452              
453              
454              
455              
456             =head2 nextRow()
457              
458             Reads the next row.
459              
460             =cut
461              
462             sub nextRow {
463 36     36 1 18112 my $self = shift;
464 36         95 $self->_read();
465 36         105 return $self->{'row'};
466             }
467              
468              
469              
470              
471              
472             =head2 eof()
473              
474             Returns boolean
475              
476             =cut
477              
478             sub eof {
479 20     20 1 71 my $self = shift;
480 20         60 return $self->{'text_csv'}->eof();
481             }
482              
483              
484              
485              
486             =head2 rewind()
487              
488             Rewinds the file handle.
489              
490             =cut
491              
492             sub rewind {
493 1     1 1 5 my $self = shift;
494 1 50       4 seek($self->{'h'},0,0) || croak('Failed to rewind file handle');
495 1         49 $self->{'text_csv'}->getline($self->{'h'}); # skip the header row
496             }
497              
498              
499              
500             1;
501              
502              
503             __END__