File Coverage

blib/lib/File/Text/CSV.pm
Criterion Covered Total %
statement 67 133 50.3
branch 36 90 40.0
condition 4 10 40.0
subroutine 9 12 75.0
pod 6 6 100.0
total 122 251 48.6


line stmt bran cond sub pod time code
1             #! perl
2              
3             # File::Text::CSV -- Access to CSV data files
4             # Author : Johan Vromans
5             # Created On : Sun Feb 14 14:44:39 2016
6             # Last Modified By: Johan Vromans
7             # Last Modified On: Tue Feb 16 10:02:50 2016
8             # Update Count : 117
9             # Status : Unknown, Use with caution!
10              
11             =head1 NAME
12              
13             File::Text::CSV -- Easy access to CSV data files
14              
15             =head1 SYNOPSIS
16              
17             use File::Text::CSV;
18              
19             # Open a CSV file with headers.
20             my $fh = File::Text::CSV->open( "current.csv",
21             { header => 1 } );
22              
23             # Read the rows.
24             while ( my $row = $fh->read ) {
25             print( $row->{Time}, ": ", $row->{Amount}, "\n");
26             }
27              
28             # Create a new CSV file, with header row.
29             my $out = File::Text::CSV->create( "foo.csv",
30             { header => [ qw( Time User Amount ) ],
31             sep_char => ";" }
32             );
33              
34             # Print some.
35             $out->write( [ '13:21', 'root', 24 ] );
36             $out->write( { Time => '15:43', User => 'me', Amount => 42 } );
37             $out->close;
38              
39             =head1 DESCRIPTION
40              
41             File::Text::CSV is like many other CSV processing modules, but it
42             focuses on the file side.
43              
44             CSV data is a file data format, so in practice one has to work with a
45             file, reading lines, then unpacking the data from the lines using some
46             other module, and so on. This module combines all that.
47              
48             It uses Text::CSV_XS to handle the CSV details.
49              
50             File::Text::CSV requires all rows of the CSV data to have the same
51             number of columns.
52              
53             =cut
54              
55             package File::Text::CSV;
56              
57 11     11   75274 use strict;
  11         24  
  11         301  
58 11     11   53 use warnings;
  11         20  
  11         244  
59 11     11   53 use Carp;
  11         17  
  11         729  
60 11     11   5985 use Encode;
  11         168130  
  11         1067  
61              
62             our $VERSION = "0.02";
63              
64 11     11   79 use parent qw( Text::CSV_XS ); # it's safe to use Text::CSV instead
  11         22  
  11         62  
65              
66             =head1 METHODS
67              
68             =over
69              
70             =item open
71              
72             $csv = File::Text::CSV::->open( $file, $opts )
73              
74             B creates a new File::CSV object associated with an input file.
75              
76             The named file is opened and available for further processing.
77              
78             The second parameter is a hashref with options. You can pass all
79             Text::CSV options here.
80              
81             Additional options specific to this function:
82              
83             =over
84              
85             =item header
86              
87             If present, it must be either an arrayref with column names, or a
88             truth value. If the latter value is true, the column names are read
89             from the first row of the CSV file.
90              
91             =item encoding
92              
93             Encoding to open the file with. Default encoding is UTF-8, unless
94             header processing is enabled and the file starts with a byte order
95             mark (BOM).
96              
97             =item append
98              
99             If true, new records written will be appended to the file.
100              
101             =back
102              
103             =cut
104              
105             sub open {
106 10     10 1 7298 my ( $pkg, $file, $opts ) = @_;
107              
108             # Private options.
109 10         28 my $header = delete $opts->{header};
110 10         24 my $append = delete $opts->{append};
111 10         19 my $encoding = delete $opts->{encoding};
112              
113             # Default options.
114 10 50       43 $opts->{binary} = 1 unless exists $opts->{binary};
115              
116             # Create the object.
117 10         65 my $self = $pkg->SUPER::new( $opts );
118 10 50       1204 croak( $pkg->SUPER::error_diag ) unless $self;
119              
120             # Open the file.
121 10 50       33 if ( $file eq "-" ) {
122 0 0       0 croak("Cannot append to standard input") if $append;
123 0         0 $self->{_fh} = \*STDIN;
124             }
125             else {
126 10 50       29 my $mode = $append ? '+<' : '<';
127 10 50       435 CORE::open( $self->{_fh}, $mode, $file )
128             or croak( "$file: $!" );
129 10         65 $self->{_append} = $append;
130             }
131              
132             # If header is an aref, it should contain the fields.
133 10         17 my $encset;
134 10 100       31 if ( $header ) {
135 5 50       9 if ( eval { $header->[0] || 1 } ) {
  5 100       44  
    50          
136 2         5 $self->{_column_names} = $header;
137 2         4 $self->{_columns} = @$header;
138 2         18 $self->column_names( @$header );
139             }
140             # Otherwise, if set, a file header is mandatory.
141             elsif ( $encoding ) {
142 0         0 $self->{_fh}->binmode("encoding($encoding)");
143 0         0 $encset++;
144 0         0 my $res = $self->getline( $self->{_fh} );
145 0 0       0 croak( "Incomplete or missing header line" ) unless $res;
146 0 0 0     0 croak( "Incomplete or missing header line" )
147             if @$res == 1 && $res->[0] eq ''; # empty line
148 0         0 $self->{_column_names} = $res;
149 0         0 $self->{_columns} = @$res;
150 0         0 $self->column_names( @$res );
151             }
152             else {
153 3         96 my $line = readline($self->{_fh});
154 3 100       18 if ( $line ) {
155 2 50       20 if ( $line =~ /^\x{ff}\x{fe}\0\0(.*)/s ) {
    50          
    50          
    50          
    50          
156 0         0 $line = $1;
157 0         0 $encoding = 'UTF-32LE';
158             # Line end is "\n\0\0\0" - get rid of excess.
159 0         0 getc( $self->{_fh} );
160 0         0 getc( $self->{_fh} );
161 0         0 getc( $self->{_fh} );
162             }
163             elsif ( $line =~ /^\0\0\x{fe}\x{ff}(.*)/s ) {
164 0         0 $line = $1;
165 0         0 $encoding = 'UTF-32BE';
166             # Line end is "\0\0\0\n" - stopped at \n.
167             }
168             elsif ( $line =~ /^\x{ef}\x{bb}\x{ff}(.*)/s ) {
169 0         0 $line = $1;
170 0         0 $encoding = 'UTF-8';
171             }
172             elsif ( $line =~ /^\x{ff}\x{fe}(.*)/s ) {
173 0         0 $line = $1;
174 0         0 $encoding = 'UTF-16LE';
175             # Line end is "\n\0" - get rid of excess.
176 0         0 getc( $self->{_fh} );
177             }
178             elsif ( $line =~ /^\x{fe}\x{ff}(.*)/s ) {
179 0         0 $line = $1;
180 0         0 $encoding = 'UTF-16BE';
181             # Line end is "\0\n" - stopped at \n.
182             }
183             }
184 3 50 50     25 if ( $encoding ||= "UTF-8" ) {
185 3         31 $line = Encode::decode( $encoding, $line, 1 );
186             }
187 3         624 my $res = $self->parse($line);
188 3 100       379 croak( "Incomplete or missing header line" ) unless $res;
189 2         11 my @res = $self->fields;
190 2 50 33     70 croak( "Incomplete or missing header line" )
191             if @res == 1 && $res[0] eq ''; # empty line
192 2         6 $self->{_column_names} = \@res;
193 2         10 $self->{_columns} = @res;
194 2         13 $self->column_names( @res );
195             }
196             }
197              
198 9   100     181 $encoding ||= "UTF-8";
199 9 50       38 carp("Encoding set to $encoding") if $ENV{File_CSV_ENC_DEBUG};
200 9 50       797 $self->{_fh}->binmode("encoding($encoding)") unless $encset;
201              
202 9         28153 return $self;
203             }
204              
205             =item create
206              
207             $csv = File::Text::CSV::->create( $file, $opts )
208              
209             B creates a new File::Text::CSV object associated with an output file.
210              
211             The named file is created and available for further processing.
212              
213             The second parameter is a hashref with options. You can pass all
214             Text::CSV_XS options here.
215              
216             Additional options specific to this function:
217              
218             =over
219              
220             =item header
221              
222             If present, it must be a arrayref with column names. The column names
223             are written to the first row of the CSV file.
224              
225             =item encoding
226              
227             Encoding to create the file with. Default encoding is UTF-8.
228              
229             =back
230              
231             =cut
232              
233             sub create {
234 0     0 1 0 my ( $pkg, $file, $opts ) = @_;
235              
236             # Private options.
237 0         0 my $header = delete $opts->{header};
238              
239             # Default options.
240 0 0       0 $opts->{binary} = 1 unless exists $opts->{binary};
241              
242             # Create the object.
243 0         0 my $self = $pkg->SUPER::new( $opts );
244 0 0       0 croak( $pkg->SUPER::error_diag ) unless $self;
245              
246             # Open (create) the file.
247 0 0       0 if ( $file eq "-" ) {
248 0         0 $self->{_fh} = \*STDOUT;
249             }
250             else {
251 0         0 my $mode = '>';
252 0 0       0 $opts->{encoding} = "utf8" unless defined $opts->{encoding};
253 0 0       0 $mode .= ':' . $opts->{encoding} if $opts->{encoding};
254 0 0       0 CORE::open( $self->{_fh}, $mode, $file )
255             or croak( "$file: $!" );
256             }
257              
258             # If header is set, it must be an aref containing the fields.
259 0 0       0 if ( $header ) {
260 0         0 my $status = $self->print( $self->{_fh}, $header );
261 0 0       0 croak( $self->error_diag ) unless $status;
262 0         0 $self->{_fh}->print("\n");
263 0         0 $self->{_columns} = @$header;
264 0         0 $self->{_column_names} = $header;
265 0         0 $self->column_names( @$header );
266             }
267              
268 0         0 return $self;
269             }
270              
271             # Internal: Check (or set) the number of columns.
272              
273             sub _check_columns {
274 32     32   60 my ( $self, $n ) = @_;
275 32 100       93 unless ( defined $self->{_columns} ) {
276 4         8 $self->{_columns} = $n;
277 4         8 return;
278             }
279             croak( "Incorrect number of fields: $n (should be " .
280             $self->{_columns} . ")" )
281 28 100       489 unless $n == $self->{_columns};
282             }
283              
284             =item read
285              
286             $row = $csv->read
287              
288             B reads the next row from the file, parses it into columns, and
289             delivers the result.
290              
291             When column names have been specified upon object create time, this
292             method returns a hashref. Otherwise it behaves like B.
293              
294             =cut
295              
296             sub read {
297 26     26 1 24510 my ( $self ) = @_;
298 26 100       107 goto &read_arrayref unless $self->{_column_names};
299 10         43 my $res = $self->getline_hr( $self->{_fh} );
300 10 100       994 return if $self->eof;
301 8 50       59 croak( $self->error_diag ) unless $res;
302 8         30 $self->_check_columns(0+keys(%$res));
303 8         33 return $res;
304             }
305              
306             =item read_arrayref
307              
308             $row = $csv->read_arrayref
309              
310             B reads the next row from the file, parses it into
311             columns, and delivers the result as an arrayref.
312              
313             =cut
314              
315             sub read_arrayref {
316 31     31 1 14867 my ( $self ) = @_;
317 31         1021 my $res = $self->getline( $self->{_fh} );
318 31 100       1710 return if $self->eof;
319 24 50       180 croak( $self->error_diag ) unless $res;
320 24         78 $self->_check_columns(0+@$res);
321 22         104 return $res;
322             }
323              
324             =item write
325              
326             $row = $csv->write( @data )
327             $row = $csv->write( \@data )
328             $row = $csv->write( \%data )
329              
330             A new row of data is assembled using the content of the supplied hash
331             or array, and written to the file.
332              
333             =cut
334              
335             sub write {
336 0     0 1   my ( $self, @row ) = @_;
337              
338 0           my $status;
339 0 0         if ( !ref($row[0]) ) {
    0          
340 0           $self->_check_columns( 0+@row );
341             }
342 0 0         elsif ( eval { $row[0]->[0] || 1 } ) { # aref
343 0           @row = @{ $row[0]->[0] };
  0            
344 0           $self->_check_columns( 0+@row );
345             }
346             else { # hashref
347 0           my $row = $row[0];
348 0           @row = ();
349 0           $self->_check_columns( 0+keys(%$row) );
350 0           my %row = %$row;
351 0           foreach ( @{ $self->{_column_names} } ) {
  0            
352 0           push( @row, delete($row{$_}) );
353             }
354 0 0         croak("Unused column names: " . join(" ", keys(%row)))
355             if %row;
356             }
357 0 0         seek( $self->{_fh}, 0, 2 ) if $self->{_append};
358 0           $status = $self->print( $self->{_fh}, \@row );
359 0           $self->{_fh}->print("\n");
360 0 0         croak( $self->error_diag ) unless $status;
361             }
362              
363             =item close
364              
365             $csv->close
366              
367             Close the file.
368              
369             =cut
370              
371             sub close {
372 0     0 1   my ( $self ) = @_;
373 0 0         $self->{_fh}->close or croak("$!");
374 0           delete $self->{_fh};
375             }
376              
377             =back
378              
379             =head1 SUPPORT
380              
381             Bugs should be reported via the CPAN bug tracker at
382              
383             L
384              
385             For other issues, contact the author.
386              
387             =head1 AUTHOR
388              
389             Johan Vromans Ejv@cpan.orgE.
390              
391             =head1 SEE ALSO
392              
393             L, L.
394              
395             =head1 LICENSE
396              
397             Copyright (C) 2016, Johan Vromans,
398              
399             This module is free software. You can redistribute it and/or
400             modify it under the terms of the Artistic License 2.0.
401              
402             This program is distributed in the hope that it will be useful,
403             but without any warranty; without even the implied warranty of
404             merchantability or fitness for a particular purpose.
405              
406             =cut
407              
408             1;