File Coverage

blib/lib/Text/xSV.pm
Criterion Covered Total %
statement 165 230 71.7
branch 81 134 60.4
condition 14 24 58.3
subroutine 27 33 81.8
pod 20 23 86.9
total 307 444 69.1


line stmt bran cond sub pod time code
1             package Text::xSV;
2             $VERSION = 0.21;
3 1     1   1462 use strict;
  1         2  
  1         34  
4 1     1   5 use Carp;
  1         2  
  1         2885  
5              
6             sub alias {
7 1     1 1 364 my ($self, $from, $to) = @_;
8 1 50       4 my $field_pos = $self->{field_pos}
9             or return $self->error_handler(
10             "Can't call alias before headers are bound");
11 1 50       4 unless (exists $field_pos->{$from}) {
12 0         0 return $self->error_handler("'$from' is not available to alias");
13             }
14 1         4 $field_pos->{$to} = $field_pos->{$from};
15             }
16              
17             sub add_compute {
18 2     2 1 892 my ($self, $name, $compute) = @_;
19 2 50       8 my $field_pos = $self->{field_pos}
20             or return $self->error_handler(
21             "Can't call add_compute before headers are bound");
22 2 50       7 unless (UNIVERSAL::isa($compute, "CODE")) {
23 0         0 return $self->error_handler(
24             'Usage: $csv->add_compute("name", sub {FUNCTION});');
25             }
26 2         6 $field_pos->{$name} = $compute;
27             }
28              
29             sub bind_fields {
30 1     1 1 2 my $self = shift;
31 1         1 my %field_pos;
32 1         4 foreach my $i (0..$#_) {
33 3         7 $field_pos{$_[$i]} = $i;
34             }
35 1         4 $self->{field_pos} = \%field_pos;
36             }
37              
38             sub bind_header {
39 1     1 1 247 my $self = shift;
40 1         4 $self->bind_fields($self->get_row());
41             }
42              
43             *read_headers = \&bind_header;
44             *read_header = \&bind_header;
45              
46             sub delete {
47 1     1 0 221 my $self = shift;
48 1 50       5 my $field_pos = $self->{field_pos}
49             or return $self->error_handler(
50             "Can't call delete before headers are bound");
51 1         3 foreach my $field (@_) {
52 1 50       5 if (exists $field_pos->{$field}) {
53 1         3 delete $field_pos->{$field};
54             }
55             else {
56 0         0 $self->error_handler(
57             "Cannot delete field '$field': it doesn't exist");
58             }
59             }
60             }
61              
62             sub error_handler {
63 5     5 0 7 my $self = shift;
64 5         618 $self->{error_handler}->(@_);
65             }
66              
67             sub extract {
68 25     25 1 1919 my $self = shift;
69 25   100     69 my $cached_results = $self->{cached} ||= {};
70 25   100     56 my $in_compute = $self->{in_compute} ||= {};
71 25 50       51 my $row = $self->{row} or return $self->error_handler(
72             "No row found (did you call get_row())?");
73 25 50       40 my $lookup = $self->{field_pos}
74             or return $self->error_handler(
75             "Can't find field info (did you bind_fields or read_header?)");
76 25         22 my @data;
77 25         31 foreach my $field (@_) {
78 45 100       68 if (exists $lookup->{$field}) {
79 43         44 my $position_or_compute = $lookup->{$field};
80 43 100       68 if (not ref($position_or_compute)) {
    100          
    100          
81 35         62 push @data, $row->[$position_or_compute];
82             }
83             elsif (exists $cached_results->{$field}) {
84 1         5 push @data, $cached_results->{$field};
85             }
86             elsif ($in_compute->{$field}) {
87 2         8 $self->error_handler(
88             "Infinite recursion detected in computing '$field'");
89             }
90             else {
91             # Have to do compute
92 5         9 $in_compute->{$field} = 1;
93 5         13 $cached_results->{$field} = $position_or_compute->($self);
94 4         26 push @data, $cached_results->{$field};
95             }
96             }
97             else {
98 2         13 my @allowed = sort keys %$lookup;
99 2         13 $self->error_handler(
100             "Invalid field $field for file '$self->{filename}'.\n" .
101             "Valid fields are: (@allowed)\n"
102             );
103             }
104             }
105 21 100       173 return wantarray ? @data : \@data;
106             }
107              
108             sub extract_hash {
109 4     4 1 1443 my $self = shift;
110 4 100       22 my @fields = @_ ? @_ : $self->get_fields();
111 4         7 my %hash;
112 4         10 @hash{@fields} = $self->extract(@fields);
113 4 100       28 wantarray ? %hash : \%hash;
114             }
115              
116             sub fetchrow_hash {
117 2     2 1 509 my $self = shift;
118 2 100       7 return unless $self->get_row();
119 1         5 $self->extract_hash(@_);
120             }
121              
122             sub format_data {
123 0     0 1 0 my $self = shift;
124 0         0 my %data = @_;
125 0         0 my @row;
126 0 0       0 my $field_pos = $self->{field_pos} or $self->error_handler(
127             "Can't find field info (did you bind_fields or read_header?)"
128             );
129 0         0 while (my ($field, $value) = each %data) {
130 0         0 my $pos = $field_pos->{$field};
131 0 0       0 if (defined($pos)) {
132 0         0 $row[$pos] = $value;
133             }
134             else {
135 0         0 $self->warning_handler("Ignoring unknown field '$field'");
136             }
137             }
138 0         0 $self->{row} = \@row;
139 0 0       0 my $header = $self->{header}
140             or $self->error_handler("Cannot format_data when no header is set");
141 0         0 $self->format_row( $self->extract( @$header ));
142             }
143              
144             sub format_header {
145 0     0 1 0 my $self = shift;
146 0 0       0 if ($self->{header}) {
147 0         0 return $self->format_row(@{$self->{header}});
  0         0  
148             }
149             else {
150 0         0 $self->error_handler("Cannot format_header when no header is set");
151             }
152             }
153              
154             *format_headers = \&format_header;
155              
156             sub format_row {
157 4     4 1 5 my $self = shift;
158              
159 4         5 $self->{row_num}++;
160              
161 4 50       11 if ($self->{row_size_warning}) {
162 4 100       14 if (not exists $self->{row_size}) {
    50          
163 1         2 $self->{row_size} = @_;
164             }
165             elsif ( @_ != $self->{row_size}) {
166 0         0 my $count = @_;
167 0         0 $self->warning_handler(
168             "Formatting $count fields at row $self->{row_num}, "
169             . "expected $self->{row_size}"
170             );
171             }
172             }
173              
174 4         5 my $sep = $self->{sep};
175 4         3 my @row;
176 4         543 foreach my $value (@_) {
177 16 100       92 if (not defined($value)) {
    100          
    100          
178             # Empty fields are undef
179 3 100       7 push @row, $self->{quote_all} ? qq("") : "";
180             }
181             elsif ("" eq $value) {
182             # The empty string has to be quoted unless dont_quote is set
183 3 100       7 push @row, $self->{dont_quote} ? "" : qq{""};
184             }
185             elsif ($value =~ /\s|\Q$sep\E|"/) {
186             # quote it
187 3         4 local $_ = $value;
188 3         6 s/"/""/g;
189             # If dont_quote is set, just output the data element,
190             # otherwise follow the 'proper' CSV quoted format (that breaks
191             # MS SQL Server's bulk insert on date values)
192 3 100       14 push @row, $self->{dont_quote} ? $_ : qq{"$_"};
193             }
194             else {
195             # Unquoted is fine (that is, unless the quote_all option is set)
196 7 100       22 push @row, $self->{quote_all} ? qq("$value") : $value;
197             }
198             }
199 4         10 my $row = join $sep, @row;
200 4         18 return $row . "\n";
201             }
202              
203             sub get_fields {
204 4     4 1 446 my $self = shift;
205 4 50       14 my $field_pos = $self->{field_pos}
206             or return $self->error_handler(
207             "Can't call get_fields before headers are bound");
208 4         17 return keys %$field_pos;
209             }
210              
211             # Private block for shared variables in a small "parse engine".
212             # The concept here is to use pos to step through a string.
213             # This is the real engine, all else is syntactic sugar.
214             {
215             my ($self, $fh, $line, $is_error);
216              
217             sub get_row {
218 15     15 1 3131 $self = shift;
219 15         19 $is_error = 0;
220 15         38 delete $self->{row};
221 15         22 delete $self->{cached};
222 15         20 delete $self->{in_compute};
223 15 50 66     62 $fh = ($self->{fh}
224             ||= $self->{filename}
225             ? $self->open_file($self->{filename}, "<")
226             : ($self->{filename} = "ARGV", \*ARGV)
227             # Sorry for the above convoluted way to sneak in defining filename.
228             );
229 15 50       27 return unless $fh;
230 15 100       1714 defined($line = <$fh>) or return;
231 13 50       26 if ($self->{filter}) {
232 13         26 $line = $self->{filter}->($line);
233             }
234 13         23 chomp($line);
235 13         23 my @row = _get_row();
236 13 50       27 if ($is_error) {
237 0         0 return @row[0..$#row];
238             }
239 13 100       50 if (not exists $self->{row_size}) {
    50          
    50          
240 2         4 $self->{row_size} = @row;
241             }
242             elsif (not $self->{row_size_warning}) {
243             # The user asked not to get this warning, so don't issue it.
244             }
245             elsif ($self->{row_size} != @row) {
246 0         0 my $new = @row;
247 0         0 my $where = "Line $., file $self->{filename}";
248 0         0 $self->warning_handler(
249             "$where had $new fields, expected $self->{row_size}" );
250             }
251 13         21 $self->{row} = \@row;
252 13 100       67 return wantarray ? @row : [@row];
253             }
254              
255             sub _get_row {
256 13     13   14 my @row;
257 13         19 my $q_sep = quotemeta($self->{sep});
258 13         104 my $match_sep = qr/\G$q_sep/;
259 13         30 my $start_field = qr/\G(")/;
260 13         109 my $start_field_ms = qr/\G([^"$q_sep]*)/;
261              
262             # This loop is the heart of the engine
263 13   66     127 while ($line =~ /$start_field/gc or $line =~ /$start_field_ms/gc ) {
264 41 100       81 if ($1 eq '"') {
265 16         28 push @row, _get_quoted();
266             }
267             else {
268             # Needed for Microsoft compatibility
269 25 100       54 push @row, length($1) ? $1 : undef;
270             }
271 41         48 my $pos = pos($line);
272 41 100       236 if ($line !~ /$match_sep/g) {
273 13 50       23 if ($pos == length($line)) {
    0          
274 13         76 return @row;
275             }
276             elsif ($self->{strict}) {
277 0         0 my $expected = "Expected '$self->{sep}'";
278 0         0 $is_error = 1;
279 0         0 return $self->error_handler(
280             "$expected at $self->{filename}, line $., char $pos");
281             }
282             else {
283 0         0 TRY: {
284 0         0 my $expected = "Expected '$self->{sep}'";
285 0         0 $self->warning_handler(
286             "$expected at $self->{filename}, line $., char $pos");
287              
288             # Assume we are in non-strict mode and encountered a single "
289             # so we need to recover and finish my quoted field.
290 0         0 $row[-1] .= '"' . _get_quoted();
291 0         0 $pos = pos($line);
292 0 0       0 if ($line !~ /$match_sep/g) {
293 0 0       0 if ($pos == length($line)) {
294 0         0 return @row;
295             }
296             else {
297 0         0 redo TRY;
298             }
299             }
300             }
301             }
302             }
303             }
304 0         0 $is_error = 1;
305 0         0 $self->error_handler(
306             "I have no idea how parsing $self->{filename} left me here!");
307             }
308              
309             sub _get_quoted {
310 16     16   18 my $piece = "";
311 16         22 my $start_line = $.;
312 16         16 my $start_pos = pos($line);
313            
314 16         13 while(1) {
315 32 100       3903 if ($line =~ /\G([^"]+)/gc) {
    100          
    100          
316             # sequence of non-quote characters
317 14         858 $piece .= $1;
318             } elsif ($line =~ /\G""/gc) {
319             # replace "" with "
320 1         2 $piece .= '"';
321             } elsif ($line =~ /\G"/g) {
322             # closing quote
323 16         40 return $piece; # EXIT HERE
324             }
325             else {
326             # Must be at end of line
327 1         4 $piece .= $/;
328 1 50       4 unless(defined($line = <$fh>)) {
329 0         0 croak(
330             "File $self->{filename} ended inside a quoted field\n"
331             . "Field started at char $start_pos, line $start_line\n"
332             );
333             }
334 1 50       4 if ($self->{filter}) {
335 1         9 $line = $self->{filter}->($line);
336             }
337 1         3 chomp($line);
338             }
339             }
340 0         0 $is_error = 1;
341 0         0 $self->error_handler(
342             "I have no idea how parsing $self->{filename} left me here!");
343             }
344             }
345              
346             my @normal_accessors = qw(
347             close_fh error_handler warning_handler filename filter fh
348             row_size row_size_warning strict
349             );
350             foreach my $accessor (@normal_accessors) {
351 1     1   9 no strict 'refs';
  1         1  
  1         80  
352             *{"set_$accessor"} = sub {
353 25     25   712 $_[0]->{$accessor} = $_[1];
354             };
355             }
356              
357             # These two are mutually exclusive
358             foreach my $accessor (qw(dont_quote quote_all)) {
359 1     1   4 no strict 'refs';
  1         2  
  1         955  
360             *{"set_$accessor"} = sub {
361 3     3   12 my $self = shift;
362 3         7 $self->{$accessor} = shift;
363 3 50 66     13 if ($self->{dont_quote} and $self->{quote_all}) {
364 0         0 $self->error_handler("Can't set both dont_quote and quote_all");
365             }
366             };
367             }
368              
369             sub new {
370 3     3 1 778 my $self = bless ({}, shift);
371 45         82 my %allowed = map {
372 3         8 $_=>1
373             } @normal_accessors, qw(
374             header headers row sep dont_quote quote_all
375             );
376              
377 14     14   479 my %args = (
378             error_handler => \&confess,
379 14         27 filter => sub {my $line = shift; $line =~ s/\r$//; $line;},
  14         67  
380 3         33 sep => ",",
381             row_size_warning => 1,
382             close_fh => 0,
383             strict => 1,
384             @_
385             );
386             # Note, must set error_handler and warning_handler first because they
387             # might get called while processing the other args.
388 3         7 foreach my $arg ('error_handler', 'warning_handler', keys %args) {
389 27 50       53 unless (exists $allowed{$arg}) {
390 0         0 my @allowed = sort keys %allowed;
391 0         0 croak("Invalid argument '$arg', allowed args: (@allowed)");
392             }
393 27         38 my $method = "set_$arg";
394 27         61 $self->$method($args{$arg});
395             }
396 3         16 return $self;
397             }
398              
399             # Note the undocumented third argument for the mode. Most of the time this
400             # will do what is wanted without requiring Perl 5.6 or better. Users who
401             # supply their own metacharacters will also not be surprised at the result.
402             # Note the return of 0. I cannot assume that the user's error handler dies...
403             sub open_file {
404 3     3 1 3 my $self = shift;
405 3   50     9 my $file = $self->{filename} = shift || return $self->error_handler(
406             "No filename specified at open_file"
407             );
408 3 50 33     26 if ($file !~ /\||<|>/ and @_) {
409 3         4 my $mode = shift;
410 3         5 $file = "$mode $file";
411             }
412 3         5 my $fh = do {local *FH}; # Old trick, not needed in 5.6
  3         12  
413 3 50       202 unless (open ($fh, $file)) {
414 0         0 $self->error_handler("Cannot open '$file': $!");
415 0         0 return 0;
416             }
417 3         28 $self->{close_fh} = 1;
418 3         33 $self->{fh} = $fh;
419             }
420              
421             sub print {
422 4     4 1 5 my $self = shift;
423 4         5 $self->{row_out}++;
424 4 50 66     17 my $fh = ($self->{fh}
425             ||= $self->{filename}
426             ? $self->open_file($self->{filename}, ">")
427             : ($self->{filename} = "STDOUT", \*STDOUT)
428             # Sorry for the above convoluted way to sneak in defining filename.
429             );
430 4 50       7 return unless $fh;
431 4 50       21 print $fh @_ or $self->error_handler( "Print #$self->{row_out}: $!" );
432             }
433              
434             sub print_data {
435 0     0 1 0 my $self = shift;
436 0         0 $self->print($self->format_data(@_));
437             }
438              
439             sub print_header {
440 0     0 1 0 my $self = shift;
441 0         0 $self->print($self->format_header(@_));
442             }
443              
444             *print_headers = \&print_header;
445              
446             sub print_row {
447 4     4 1 472 my $self = shift;
448 4         13 $self->print($self->format_row(@_));
449             }
450              
451             sub set_header {
452 0     0 1 0 my $self = shift;
453 0 0 0     0 if (1 == @_ and UNIVERSAL::isa($_[0], 'ARRAY')) {
454 0         0 $self->{header} = $_[0];
455             }
456             else {
457 0         0 $self->{header} = \@_;
458             }
459 0 0       0 if (not exists $self->{field_pos}) {
460 0         0 $self->bind_fields(@{$self->{header}});
  0         0  
461             }
462             }
463              
464             *set_headers = \&set_header;
465              
466             sub set_sep {
467 5     5 1 1117 my $self = shift;
468 5         6 my $sep = shift;
469             # The reason for this limitation is so that $start_field in _get_row
470             # will do what it is supposed to. (I should use a negative lookahead,
471             # but I'm documenting this late at night and want some sleep.)
472 5 100       10 if (1 == length($sep)) {
473 4         14 $self->{sep} = $sep;
474             }
475             else {
476 1         8 $self->error_handler("The separator '$sep' is not of length 1");
477             }
478             }
479              
480             sub warning_handler {
481 0     0 0 0 my $self = shift;
482 0 0       0 if ($self->{warning_handler}) {
483 0         0 $self->{warning_handler}->(@_);
484             }
485             else {
486 0         0 eval { $self->{error_handler}->(@_) };
  0         0  
487 0 0       0 warn $@ if $@;
488             }
489             }
490              
491             sub DESTROY {
492 2     2   10 my $self = shift;
493 2 50       5 if ($self->{close_fh}) {
494 2 0       206 close($self->{fh}) or $self->error_handler(
    50          
495             $! ? "Cannot close '$self->{filename}': $!"
496             : "Exit status $? closing '$self->{filename}'"
497             );
498             }
499             }
500              
501             1;
502              
503             __END__