File Coverage

blib/lib/Text/AutoCSV.pm
Criterion Covered Total %
statement 1500 1761 85.1
branch 690 966 71.4
condition 188 291 64.6
subroutine 112 118 94.9
pod 43 43 100.0
total 2533 3179 79.6


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             # ABSTRACT: helper module to automate the use of Text::CSV
3              
4             # vim:tw=80 :et
5              
6             # Text/AutoCSV.pm
7              
8             #
9             # Written by Sébastien Millet
10             # March, July, August, September 2016
11             # January - August 2017
12             #
13              
14             package Text::AutoCSV;
15             $Text::AutoCSV::VERSION = '1.2.0';
16 20     20   972116 use strict;
  20         51  
  20         556  
17 17     17   102 use warnings;
  17         43  
  17         1175  
18              
19             my $PKG = "Text::AutoCSV";
20              
21             require Exporter;
22             our @ISA = 'Exporter';
23             our @EXPORT_OK = qw(remove_accents);
24              
25 17     17   109 use Carp;
  17         39  
  17         1118  
26 17     17   6036 use Params::Validate qw(validate validate_pos :types);
  17         123272  
  17         3646  
27 17     17   5779 use List::MoreUtils qw(first_index indexes);
  17         118583  
  17         167  
28 17     17   11395 use Fcntl qw(SEEK_SET);
  17         36  
  17         812  
29 17     17   4997 use File::BOM;
  17         427647  
  17         956  
30 17     17   8115 use Text::CSV;
  17         243285  
  17         775  
31 17     17   9732 use DateTime;
  17         6851560  
  17         927  
32              
33             # DateTime::Format::Strptime 1.70 does not work properly with us.
34             # Actually all version as of 1.63 are fine, except 1.70.
35 17     17   8007 use DateTime::Format::Strptime 1.71;
  17         933433  
  17         169  
36 17     17   8184 use Class::Struct;
  17         26792  
  17         100  
37 17     17   7963 use Unicode::Normalize;
  17         27240  
  17         1060  
38              
39             # lock_keys is used to prevent accessing non existing keys
40             # Credits: 3381159 on http://stackoverflow.com
41             # "make perl shout when trying to access undefined hash key"
42 17     17   5522 use Hash::Util qw(lock_keys);
  17         35279  
  17         100  
43 17     17   7461 use Memoize;
  17         32077  
  17         14334  
44              
45             # FIXME
46             # Not needed in release -> should be always commented unless at dev time
47             #use feature qw(say);
48             #use Data::Dumper;
49             #$Data::Dumper::Sortkeys = 1;
50              
51             # Set to 1 if you wish to turn on debug without touching caller's code
52             our $ALWAYS_DEBUG = 0;
53              
54             # Keep it set to 0 unless you know what you're doing!
55             # Note
56             # Taken into account only if debug is set.
57             my $DEBUG_DATETIME_FORMATS = 0;
58              
59             # The below is taken into account only if $DEBUG_DATETIME_FORMATS is set.
60             # It becomes really MASSIVE debug output.
61             my $DEBUG_DATETIME_FORMATS_EVEN_MORE = 0;
62              
63             #
64             # Uncomment to replace carp and croak with cluck and confess, respectively
65             # Also reachable with perl option:
66             # -MCarp=verbose
67             # See 'perldoc Carp'.
68             #
69             #$Carp::Verbose = 1;
70              
71             # * *************** *
72             # * BEHAVIOR TUNING *
73             # * *************** *
74              
75             # * **************************************************** *
76             # * ALL THE VARIABLES BELOW ARE RATHER LOW LEVEL. *
77             # * IF YOU UPDATE IT, IT WILL LIKELY BREAK THE TEST PLAN *
78             # * **************************************************** *
79              
80             my $DEF_SEARCH_CASE = 0; # Case insensitive search by default
81             my $DEF_SEARCH_TRIM = 1; # Trim values by default
82             my $DEF_SEARCH_IGNORE_ACCENTS = 1; # Ignore accents
83             my $DEF_SEARCH_IGNORE_EMPTY = 1; # Ignore empty strings in searches by default
84             my $DEF_SEARCH_VALUE_IF_NOT_FOUND = undef; # If not found, returned field value is undef
85             my $DEF_SEARCH_VALUE_IF_AMBIGUOUS = undef; # If more than one record found by search (when a
86             # unique value is expected), return undef
87             my $DEF_SEARCH_IGNORE_AMBIGUOUS = 1; # By default, ignore the fact that multiple records are
88             # found by search and return the first record found
89              
90             my $DETECT_ENCODING = 1;
91              
92             my $DEFAULT_IN_ENCODING = 'UTF-8,latin1';
93              
94             # By default, input encoding detected is used for output.
95             # -> the constant below is used if and only if:
96             # Inbound encoding is unknown
97             # No providing of out_encoding attribute (out_encoding takes precedence when
98             # provided)
99             my $DEFAULT_OUT_ENCODING = 'UTF-8';
100              
101             my $DEFAULT_ESCAPE_CHAR = '\\';
102             my $DEFAULT_QUOTE_CHAR = '"';
103              
104             #
105             # The code that workarounds $FIX_PERLMONKS_823214 (see below) makes sense only
106             # under plain Windows.
107             #
108             # "Plain" Windows?
109             # This code MUST NOT be executed under cygwin because cygwin uses unix line
110             # breaks. This is why we detect /mswin/. Would we detect /win/, we'd catch
111             # cygwin, too, and we don't want that.
112             #
113             my $OS_IS_PLAIN_WINDOWS = !!( $^O =~ /mswin/i );
114              
115             #
116             # Shall we fix the issue reported as #823214 in PerlMonks? See
117             # http://www.perlmonks.org/?node_id=823214
118             #
119             # In brief (in case the link above would be broken one day):
120             # Under Windows, output mode set to UTF-16LE produces line breaks made of
121             # octets "0d 0a 00", whereas it should be "0d 00 0a 00".
122             #
123             # The code also fixes UTF-16BE (but it was not tested).
124             #
125             my $FIX_PERLMONKS_823214 = 1;
126              
127             # * **** *
128             # * CODE *
129             # * **** *
130              
131             sub ERR_UNKNOWN_FIELD() { 0 }
132              
133             # Store meta-data about each column
134             struct ColData => {
135             field_name => '$',
136             header_text => '$',
137             description => '$',
138             dt_format => '$',
139             dt_locale => '$',
140             multiline => '$'
141             };
142              
143             #
144             # Enumeration of ef_type member below
145             # Alternative:
146             # use enum (...)
147             #
148             # But it is not also by default on my distro and installing a package for 3
149             # constants, I find it a bit overkill!
150             #
151             my ( $EF_LINK, $EF_FUNC, $EF_COPY ) = 0 .. 2;
152             struct ExtraField => {
153             ef_type => '$',
154             self_name => '$',
155             description => '$',
156              
157             check_field_existence => '$',
158              
159             # For when ef_type is set to $EF_LINK
160              
161             link_self_search => '$',
162             link_remote_obj => '$',
163             link_remote_search => '$',
164             link_remote_read => '$',
165             link_vlookup_opts => '%',
166              
167             # For when ef_type is set to $EF_FUNC
168              
169             func_sub => '$',
170              
171             # For when ef_type is set to $EF_COPY
172              
173             copy_source => '$',
174             copy_sub => '$'
175              
176             };
177              
178             my $SEARCH_VALIDATE_OPTIONS = {
179             value_if_not_found => { type => UNDEF | SCALAR, optional => 1 },
180             value_if_found => { type => UNDEF | SCALAR, optional => 1 },
181             value_if_ambiguous => { type => UNDEF | SCALAR, optional => 1 },
182             ignore_ambiguous => { type => BOOLEAN, optional => 1 },
183             case => { type => BOOLEAN, optional => 1 },
184             trim => { type => BOOLEAN, optional => 1 },
185             ignore_empty => { type => BOOLEAN, optional => 1 },
186             ignore_accents => { type => BOOLEAN, optional => 1 }
187             };
188              
189             sub _is_utf8 {
190 424     424   758 my $e = shift;
191              
192 424 100       2836 return 1 if $e =~ m/^(utf-?8|ucs-?8)/i;
193 32         67 return 0;
194             }
195              
196             # To replace // in old perls: return the first non-undef value in provided list
197             sub _get_def {
198 12079     12079   21704 for (@_) {
199 18709 100       44202 return $_ if defined($_);
200             }
201 895         1739 return;
202             }
203              
204             sub _print {
205 15     15   19 my $self = shift;
206 15         20 my $t = shift;
207              
208 15         18 my $infoh = $self->{infoh};
209 15 50       31 return if ref $infoh ne 'GLOB';
210              
211 0         0 print( $infoh $t );
212             }
213              
214             sub _printf {
215 17     17   22 my $self = shift;
216              
217 17         23 my $infoh = $self->{infoh};
218 17 50       45 return if ref $infoh ne 'GLOB';
219              
220 0         0 printf( $infoh @_ );
221             }
222              
223             sub _print_warning {
224 67     67   104 my $self = shift;
225 67         109 my $warning_message = shift;
226 67         94 my $dont_wrap = shift;
227              
228 67 100       168 my $msg =
229             ( $dont_wrap ? $warning_message : "$PKG: warning: $warning_message" );
230 67 100       4487 carp $msg unless $self->{quiet};
231             }
232              
233             sub _close_inh {
234 316     316   516 my $self = shift;
235              
236 316 100       3003 close $self->{_inh} if $self->{_close_inh_when_finished};
237 316         1194 $self->{_inh} = undef;
238 316         577 $self->{_close_inh_when_finished} = undef;
239             }
240              
241             sub _close_outh {
242 127     127   193 my $self = shift;
243              
244             close $self->{outh}
245             if defined( $self->{outh} )
246 127 50 66     4759 and $self->{_close_outh_when_finished};
247 127         359 $self->{outh} = undef;
248 127         246 $self->{_close_outh_when_finished} = undef;
249             }
250              
251             sub _print_error {
252 81     81   340 my ( $self, $error_message, $dont_stop, $err_code, $err_extra ) = @_;
253              
254 81         196 my $msg = "$PKG: error: $error_message";
255              
256 81 100 100     263 if ( defined($err_code)
      100        
257             and ( !$self->{quiet} )
258             and $self->{croak_if_error} )
259             {
260 5 50       15 if ( $err_code == ERR_UNKNOWN_FIELD ) {
261 5         9 my %f = %{$err_extra};
  5         23  
262 5         10 my @cols;
263 5         16 for my $n ( keys %f ) {
264 15         31 $cols[ $f{$n} ] = $n;
265             }
266 5         17 $self->_print( $self->get_in_file_disp()
267             . " column - field name correspondance:\n" );
268 5         14 $self->_print("COL # FIELD\n");
269 5         13 $self->_print("----- -----\n");
270 5         13 for my $i ( 0 .. $#cols ) {
271 17 100       48 $self->_printf( "%05d %s\n", $i,
272             ( defined( $cols[$i] ) ? $cols[$i] : '' ) );
273             }
274             }
275             else {
276 0         0 confess "Unknown error code: '$err_code'\n";
277             }
278             }
279              
280 81 100 100     289 if ( $self->{croak_if_error} and ( !$dont_stop ) ) {
281 30         98 $self->_close_read(1);
282 30         92 $self->_close_inh();
283 30         87 $self->_close_outh();
284 30         113 $self->_status_reset(1);
285 30         4480 croak $msg;
286             }
287 51         129 $self->_print_warning( $msg, 1 );
288             }
289              
290             #
291             # Return the string passed in argument with all accents removed from characters.
292             # Do it in a rather general and reliable way, not tied to latin1.
293             # Tested on latin1 and latin2 character sets.
294             #
295             # Credits:
296             # http://stackoverflow.com/questions/17561839/remove-accents-from-accented-characters
297             #
298             sub remove_accents {
299 3163     3163 1 29098 validate_pos( @_, { type => SCALAR } );
300              
301 3163         9118 my $s = $_[0];
302 3163         12216 my $r = NFKD($s);
303 3163         7912 $r =~ s/\p{Nonspacing_Mark}//g;
304 3163         7972 return $r;
305             }
306              
307             sub _detect_csv_sep {
308 262     262   452 my $ST_OUTSIDE = 0;
309 262         423 my $ST_INSIDE = 1;
310              
311 262         634 my ( $self, $escape_char, $quote_char, $sep ) = @_;
312              
313 262         465 my $_debugh = $self->{_debugh};
314 262         391 my $inh = $self->{_inh};
315 262         434 my $_debug = $self->{_debug};
316              
317 262         409 delete $self->{_inh_header};
318              
319 262 100       647 $escape_char = $DEFAULT_ESCAPE_CHAR unless defined($escape_char);
320              
321 262 50       636 do {
322 0         0 $self->_print_error(
323             "illegal \$escape_char: '$escape_char' (length >= 2)");
324 0         0 return 0;
325             } if length($escape_char) >= 2;
326              
327 262 50       569 do {
328 0         0 $self->_print_error(
329             "$PKG: error: illegal \$quote_char '$quote_char' (length >= 2)");
330 0         0 return 0;
331             } if length($quote_char) >= 2;
332              
333 262 50       575 $escape_char = '--' if $escape_char eq '';
334 262 50       524 $quote_char = '--' if $quote_char eq '';
335              
336             # FIXME (?)
337             # Avoid inlined magic values for separator auto-detection.
338             # Issue is, as you can see below, the behavior is also hard-coded and not
339             # straightforward to render 'tunable' ("," and ";" take precedence over "\t").
340 262         877 my %Seps = (
341             ";" => 0,
342             "," => 0,
343             "\t" => 0
344             );
345              
346 262         2958 my $h = <$inh>;
347 262 50       2577 if ( $self->{inh_is_stdin} ) {
348 0         0 $self->{_inh_header} = $h;
349 0 0       0 print( $_debugh "Input is STDIN => saving header line to re-read it "
350             . "later (in-memory)\n" )
351             if $_debug;
352             }
353             else {
354 262         1760 seek $inh, 0, SEEK_SET;
355 262 50       755 print( $_debugh "Input is not STDIN => using seek function to rewind "
356             . "read head after header line reading\n" )
357             if $_debug;
358             }
359              
360 262         593 chomp $h;
361 262         409 my $status = $ST_OUTSIDE;
362 262         600 my $l = length($h);
363 262         406 my $c = 0;
364 262         636 while ( $c < $l ) {
365 4808         6459 my $ch = substr( $h, $c, 1 );
366 4808         5733 my $chnext = '';
367 4808 100       8310 $chnext = substr( $h, $c + 1, 1 ) if ( $c < $l - 1 );
368 4808 100       8025 if ( $status == $ST_INSIDE ) {
    50          
369 1521 50 66     2962 if ( $ch eq $escape_char and $chnext eq $quote_char ) {
    100          
370 0         0 $c += 2;
371             }
372             elsif ( $ch eq $quote_char ) {
373 197         230 $status = $ST_OUTSIDE;
374 197         331 $c++;
375             }
376             else {
377 1324         2043 $c++;
378             }
379             }
380             elsif ( $status == $ST_OUTSIDE ) {
381 3287 50 33     7523 if (
    100 66        
    100          
382             $ch eq $escape_char
383             and ( $chnext eq $quote_char
384             or exists $Seps{$chnext} )
385             )
386             {
387 0         0 $c += 2;
388             }
389             elsif ( exists $Seps{$ch} ) {
390 946         1181 $Seps{$ch}++;
391 946         1485 $c++;
392             }
393             elsif ( $ch eq $quote_char ) {
394 197         538 $status = $ST_INSIDE;
395 197         318 $c++;
396             }
397             else {
398 2144         3484 $c++;
399             }
400             }
401             }
402              
403 262 100 100     1383 if ( $Seps{";"} == 0 and $Seps{","} >= 1 ) {
    100 66        
    50 33        
      33        
404 147         277 $$sep = ",";
405 147         620 return 1;
406             }
407             elsif ( $Seps{","} == 0 and $Seps{";"} >= 1 ) {
408 106         222 $$sep = ";";
409 106         492 return 1;
410             }
411             elsif ( $Seps{","} == 0 and $Seps{";"} == 0 and $Seps{"\t"} >= 1 ) {
412 0         0 $$sep = "\t";
413 0         0 return 1;
414             }
415             else {
416              
417             # Check the case where there is one unique column, in which case,
418             # assume comma separator.
419 9         23 my $h_no_accnt = remove_accents($h);
420 9 100       58 if ( $h_no_accnt =~ m/^[[:alnum:]_]+$/i ) {
421 3         7 $$sep = ",";
422 3         13 return 1;
423             }
424              
425 6         11 $$sep = "";
426 6 50       12 if ($_debug) {
427 0         0 for my $k ( keys %Seps ) {
428 0         0 print( $_debugh "\$Seps{'$k'} = $Seps{$k}\n" );
429             }
430             }
431 6         23 return 0;
432             }
433             }
434              
435             sub _reopen_input {
436 652     652   948 my $self = shift;
437              
438 652         985 my $in_file = $self->{in_file};
439              
440 652         805 my $inh;
441 652 50       12642 if ( !open( $inh, "<", $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen)
442 0         0 $self->_print_error("unable to open file '$in_file': $!");
443 0         0 return;
444             }
445 652 50       1833 if ( !$self->{_leave_encoding_alone} ) {
446              
447             confess "Oups! _inh_encoding_string undef?"
448 652 50       1310 unless defined( $self->{_inh_encoding_string} );
449              
450 652         4100 binmode $inh, $self->{_inh_encoding_string};
451             }
452              
453 652         29058 return $inh;
454             }
455              
456             # Abstraction layer, not useful Today, could bring added value when looking into
457             # Text::CSV I/O
458             sub _mygetline {
459 8399     8399   14351 my ( $csvobj, $fh ) = @_;
460              
461 8399         172425 return $csvobj->getline($fh);
462             }
463              
464             sub _detect_meta {
465 456     456   978 my ( $self, $quote_char, $sep_char ) = @_;
466              
467 456         765 my $in_file = $self->{in_file};
468 456         682 my $_debug = $self->{_debug};
469 456         702 my $_debugh = $self->{_debugh};
470              
471 456 100       955 return if $self->{_int_one_pass};
472 436 100       986 return if $self->{_detect_meta_done}; # Sans jeu de mot...
473              
474 300 100       682 if ( !defined( $self->{escape_char} ) ) {
475 298         727 $self->_register_pass("detect escape character");
476              
477 298         459 my $flag = 0;
478 298         642 my $inh = $self->_reopen_input();
479 298 50       756 if ( defined($inh) ) {
480 298         3897 while ( my $l = <$inh> ) {
481 3899         10878 chomp $l;
482              
483             # Very heuristic criteria...
484             # Tant pis.
485             # $flag = 1 if $l =~ m/(?<!$sep_char)$quote_char$quote_char(?!$sep_char)/;
486             # $flag = 1 if $l =~ m/(\\$quote_char|\\\\)/;
487 3899 100       14272 $flag = 1 if $l =~ m/(\\$quote_char)/;
488              
489             }
490 298         2040 close $inh;
491             }
492 298 100       1085 $self->{escape_char} = ( $flag ? '\\' : '"' );
493 298 50       1006 print( $_debugh " detected escape_char: '$self->{escape_char}'\n" )
494             if $_debug;
495             }
496              
497 300 50       647 print( $_debugh " using escape_char: '$self->{escape_char}' "
498             . "to further examine input (is_always_quoted, multiline)\n" )
499             if $_debug;
500              
501 300         437 my $is_always_quoted = 0;
502 300         724 my $inh = $self->_reopen_input();
503 300         501 my @multiline;
504 300 50       677 if ( defined($inh) ) {
505 300         758 $self->_register_pass("detect is_always_quoted and multiline");
506             my $csv = Text::CSV->new(
507             {
508             sep_char => $sep_char,
509             allow_whitespace => 1,
510             binary => 1,
511             auto_diag => 0,
512             quote_char => $quote_char,
513             escape_char => $self->{escape_char},
514 300         3169 keep_meta_info => 1,
515             allow_loose_escapes => 1
516             }
517             );
518 300         55576 my $nb_rows = 0;
519 300         494 $is_always_quoted = 1;
520 300         686 while ( my $ar = _mygetline( $csv, $inh ) ) {
521 3897         110314 $nb_rows++;
522              
523 3897         4637 my @aa = @{$ar};
  3897         9471  
524 3897         5790 my $e = $#aa;
525 3897         6876 for my $i ( 0 .. $e ) {
526 28577 100       47046 $is_always_quoted = 0 unless $csv->is_quoted($i);
527 28577 100       225458 $multiline[$i] = 1 if $aa[$i] =~ m/\n/;
528             }
529             }
530 300         8749 $self->{_nb_rows} = $nb_rows;
531 300         3349 close $inh;
532             }
533 300         1033 $self->{_multiline} = [@multiline];
534              
535 300 50       713 print( $_debugh " is_always_quoted: $is_always_quoted\n" ) if $_debug;
536 300         630 $self->{_is_always_quoted} = $is_always_quoted;
537              
538 300         1357 $self->{_detect_meta_done} = 1;
539             }
540              
541             sub _register_pass {
542 1269     1269   2350 my ( $self, $pass_name ) = @_;
543 1269         1967 my $_debug = $self->{_debug};
544 1269         2351 my $_debugh = $self->{_debugh};
545              
546 1269         2079 $self->{_pass_count}++;
547              
548 1269 50       3308 return unless $_debug;
549              
550 0         0 print( $_debugh "Pass #" . $self->{_pass_count} . " ($pass_name) done\n" );
551             }
552              
553             sub _update_in_mem_record_count {
554 510     510   1113 my ( $self, $nonexistent_arg ) = @_;
555 510         842 my $_debug = $self->{_debug};
556 510         811 my $_debugh = $self->{_debugh};
557              
558 510 50       1116 confess "Hey! what is this second argument?" if defined($nonexistent_arg);
559              
560 510         711 my $new_count = $#{ $self->{_flat} } + 1;
  510         1096  
561              
562 510         787 my $updated_max = 0;
563 510 100       1334 if ( $new_count > $self->get_max_in_mem_record_count() ) {
564 155         450 $self->_set_max_in_mem_record_count($new_count);
565 155         252 $updated_max = 1;
566             }
567              
568 510         1276 $self->{_in_mem_record_count} = $new_count;
569 510 50       1232 if ($_debug) {
570 0         0 print( $_debugh "_in_mem_record_count updated, set to $new_count" );
571 0 0       0 print( $_debugh " (also updated max)" ) if $updated_max;
572 0         0 print( $_debugh "\n" );
573             }
574             }
575              
576             sub _detect_inh_encoding {
577 320     320   1262 my ( $self, $enc, $via, $in_file, $detect_enc ) = @_;
578 320         685 my $_debug = $self->{_debug};
579 320         538 my $_debugh = $self->{_debugh};
580              
581 320 100 66     1074 $enc = $DEFAULT_IN_ENCODING if ( !defined $enc ) or $enc eq '';
582              
583 320         1995 my @encodings = split( /\s*,\s*/, $enc );
584              
585 320 50       916 confess "Oups! No encoding to try?" if $#encodings < 0;
586              
587 320 50       716 print( $_debugh "[ST] _detect_inh_encoding(): start\n" ) if $_debug;
588              
589 320         494 my $wrn = 0;
590 320         724 my $m;
591             my $m0;
592 320         0 my $ee;
593 320         626 for my $e (@encodings) {
594 332         516 $ee = $e;
595 332         696 my $viadef = _get_def( $via, '' );
596 332         931 $m = ":encoding($e)$viadef";
597 332 100       774 $m0 = $m unless defined($m0);
598              
599 332 100       705 last unless $detect_enc;
600              
601 320 50 33     1252 confess "Oups! in_file not defined?"
602             if ( !defined $in_file )
603             or $in_file eq '';
604              
605 320 50       621 print( $_debugh " Checking encoding '$e' / '$m'\n" ) if $_debug;
606 320         498 $wrn = 0;
607              
608 320         1093 $self->_register_pass("check $e encoding");
609              
610 320         493 my $utf8_bom = 0;
611 320 100       695 if ( _is_utf8($e) ) {
612 296 50       6370 if ( open my $fh, '<:raw', $in_file ) {
613 296         588 my $bom;
614 296         4148 read $fh, $bom, 3;
615 296 100 66     1476 if ( length($bom) == 3 and $bom eq "\xef\xbb\xbf" ) {
616 12 100       29 if ( !defined($via) ) {
617 10         25 $m .= ":via(File::BOM)";
618             }
619             }
620 296         2227 close $fh;
621             }
622             }
623              
624 320         602 my $inh;
625 320 50       4609 if ( !open( $inh, "<", $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen)
626 0         0 $self->_print_error("unable to open file '$in_file': $!");
627 0         0 return ( $encodings[0], $m0 );
628             }
629 16     16   109 binmode $inh, $m;
  16         28  
  16         106  
  320         3265  
630              
631             # TURN OFF WARNINGS OUTPUT
632              
633             {
634 320         35455 local $SIG{__WARN__} = sub {
635 51     51   545 $wrn++;
636              
637             # Uncomment only for debug!
638             # Otherwise you'll get quite a good deal of output at each execution :-)
639             # print(STDERR @_);
640 320         2114 };
641 320         4069 while (<$inh>) { }
642             }
643              
644             # WARNINGS ARE BACK ON
645              
646 320         11980 close $inh;
647 320 50       949 print( $_debugh " '$m' counts $wrn warning(s)\n" ) if $_debug;
648              
649 320 100       1274 last if $wrn == 0;
650             }
651              
652 320 50       789 if ( $wrn >= 1 ) {
653 0         0 $self->_print_warning(
654             "encoding warnings encountered during initial check, "
655             . "using '$encodings[0]'" );
656 0         0 return ( $encodings[0], $m0 );
657             }
658              
659 320 50       689 confess "Oups! undef encoding string?" unless defined($m);
660              
661 320 50       622 print( $_debugh " Detected encoding string '$ee' / '$m'\n" ) if $_debug;
662 320         1330 return ( $ee, $m );
663             }
664              
665             #
666             # Each of these functions brings status to the next value (current status + 1).
667             # Each of these functions returns 0 if an error occured, 1 if all good
668             #
669             my @status_forward_functions = (
670             "_S1_init_input", # To go from S0 to S1
671             "_S2_init_fields_from_header", # To go form S1 to S2
672             "_S3_init_fields_extra", # To go from S2 to S3
673             "_S4_read_all_in_mem", # To go from S3 to S4
674             );
675              
676             sub _status_reset {
677 494     494   822 my $self = shift;
678              
679 494         3173 validate_pos( @_, { type => SCALAR, optional => 1 } );
680 494         1704 my $called_from_print_error = _get_def( $_[0], 0 );
681              
682 494 100 100     1745 if ( defined( $self->{_status} ) and $self->{_status} == 4 ) {
683 18 100       43 unless ($called_from_print_error) {
684 16         28 my $msg =
685             "in-memory CSV content discarded, will have to re-read input";
686 16         52 $self->_print_warning($msg);
687             }
688 18         176 $self->{_flat} = [];
689 18         54 $self->_update_in_mem_record_count();
690             }
691              
692 494         948 $self->{_status} = 0;
693 494 100       1032 return 0 if $called_from_print_error;
694 464         981 return $self->_status_forward('S1');
695             }
696              
697             sub _status_forward {
698 4085     4085   6172 my $self = shift;
699              
700 4085         8393 return $self->___status_move( @_, 1 );
701             }
702              
703             sub _status_backward {
704 139     139   209 my $self = shift;
705              
706 139         319 return $self->___status_move( @_, -1 );
707             }
708              
709             # You should not call ___status_move() in the code, that is why the name is
710             # prefixed with 3 underscores! Only _status_forward and _status_backward should
711             # call it.
712             sub ___status_move {
713 4224     4224   8425 my ( $self, $target, $step ) = @_;
714              
715 4224         7258 my $_debug = $self->{_debug};
716 4224         6124 my $_debugh = $self->{_debugh};
717              
718 4224 50 66     18528 if ( ( !defined $step ) or ( $step != -1 and $step != 1 ) ) {
      33        
719 0         0 confess "Oups! \$step has a wrong value: '$step'";
720             }
721              
722 4224         5740 my $n;
723 4224 50       18668 confess "Oups! illegal status string: '$target'"
724             unless ($n) = $target =~ m/^S(\d)$/;
725              
726 4224 100       9727 if ( $self->{_read_in_progress} ) {
727 1         4 $self->_print_error(
728             "illegal call while read is in progress, "
729             . "would lead to infinite recursion",
730             0
731             );
732 0         0 confess "Aborted.";
733             }
734              
735 4223 100       7936 if ( $step == -1 ) {
736 139 100       406 if ( $n < $self->{_status} ) {
737 19 100       52 if ( $self->{_status} == 4 ) {
738 16 50       42 print( $_debugh
739             "[ST] Requested status $n but will go to status 0\n" )
740             if $_debug;
741 16         63 return $self->_status_reset();
742             }
743 3         6 $self->{_status} = $n;
744 3 50       6 print( $_debugh "[ST] New status: " . $self->{_status} . "\n" )
745             if $_debug;
746             }
747 123         352 return 1;
748             }
749              
750 4084 100       9168 if ( $self->{_status} < $n ) {
751 869 50       1658 print( $_debugh "[ST] Current status: " . $self->{_status} . "\n" )
752             if $_debug;
753             }
754              
755 4084 100 100     10716 if ( $self->{_status} <= 1
      100        
      100        
756             and $n >= 2
757             and $self->{_int_one_pass}
758             and $self->get_pass_count() >= 1 )
759             {
760 12         24 my $msg = "one_pass set, unable to read input again";
761             do {
762 12         44 $self->_print_error($msg);
763 5         77 return 0;
764 12 50       32 } if $self->{one_pass};
765 0 0       0 $self->_print_warning($msg) if !$self->{one_pass};
766             }
767              
768 4072         8248 while ( $self->{_status} < $n ) {
769              
770 1347         2522 my $funcname = $status_forward_functions[ $self->{_status} ];
771 1347 50       2437 confess "Oups! Unknown status?" unless defined($funcname);
772              
773 1347 50       2577 print( $_debugh "[ST] Now executing $funcname\n" ) if $_debug;
774              
775 1347 50       5511 if ( my $member_function = $self->can($funcname) ) {
776 1347 100       3089 return 0 unless $self->$member_function();
777             }
778             else {
779 0         0 confess "Could not find method $funcname in $PKG!";
780             }
781              
782 1328         2519 $self->{_status} += $step;
783 1328 50       3999 print( $_debugh "[ST] New status: " . $self->{_status} . "\n" )
784             if $_debug;
785             }
786              
787 4053         10418 return 1;
788             }
789              
790             sub new {
791 333     333 1 247908 my ( $class, @args ) = @_;
792              
793 333         37965 @args = validate(
794             @args,
795             {
796             in_file => { type => SCALAR, optional => 1 },
797             infoh =>
798             { type => UNDEF | GLOBREF, default => \*STDERR, optional => 1 },
799             verbose => { type => BOOLEAN, default => 0, optional => 1 },
800             quiet => { type => BOOLEAN, optional => 1 },
801             croak_if_error => { type => BOOLEAN, default => 1, optional => 1 },
802             inh => { type => GLOBREF, optional => 1 },
803             in_csvobj => { type => OBJECT, optional => 1 },
804             sep_char => { type => SCALAR, optional => 1 },
805             quote_char => { type => SCALAR, optional => 1 },
806             escape_char => { type => SCALAR, optional => 1 },
807             has_headers => { type => BOOLEAN, default => 1, optional => 1 },
808             out_has_headers =>
809             { type => UNDEF | BOOLEAN, default => undef, optional => 1 },
810             fields_ar => { type => ARRAYREF, optional => 1 },
811             fields_hr => { type => HASHREF, optional => 1 },
812             fields_column_names => { type => ARRAYREF, optional => 1 },
813             search_case => { type => SCALAR, optional => 1 },
814             search_trim => { type => SCALAR, optional => 1 },
815             search_ignore_empty => { type => SCALAR, optional => 1 },
816             search_ignore_accents => { type => SCALAR, optional => 1 },
817             search_ignore_ambiguous => { type => SCALAR, optional => 1 },
818             search_value_if_not_found => { type => SCALAR, optional => 1 },
819             search_value_if_found => { type => SCALAR, optional => 1 },
820             search_value_if_ambiguous => { type => SCALAR, optional => 1 },
821             walker_hr => { type => CODEREF, optional => 1 },
822             walker_ar => { type => CODEREF, optional => 1 },
823             read_post_update_hr => { type => CODEREF, optional => 1 },
824             write_filter_hr => { type => CODEREF, optional => 1 },
825             out_filter => { type => CODEREF, optional => 1 },
826             write_fields => { type => ARRAYREF, optional => 1 },
827             out_orderby => { type => ARRAYREF, optional => 1 },
828             out_fields => { type => ARRAYREF, optional => 1 },
829             out_file => { type => SCALAR, optional => 1 },
830             out_always_quote => { type => BOOLEAN, optional => 1 },
831             out_sep_char => { type => SCALAR, optional => 1 },
832             out_quote_char => { type => SCALAR, optional => 1 },
833             out_escape_char => { type => SCALAR, optional => 1 },
834             out_dates_format => { type => SCALAR, optional => 1 },
835             out_dates_locale => { type => SCALAR, optional => 1 },
836             encoding => { type => SCALAR, optional => 1 },
837             via => { type => SCALAR, optional => 1 },
838             out_encoding => { type => SCALAR, optional => 1 },
839             dont_mess_with_encoding => { type => BOOLEAN, optional => 1 },
840             one_pass => { type => BOOLEAN, optional => 1 },
841             no_undef => { type => BOOLEAN, optional => 1 },
842             fields_dates => { type => ARRAYREF, optional => 1 },
843             fields_dates_auto => { type => BOOLEAN, optional => 1 },
844             fields_dates_auto_optimize => { type => BOOLEAN, optional => 1 },
845             dates_formats_to_try => { type => ARRAYREF, optional => 1 },
846             dates_formats_to_try_supp => { type => ARRAYREF, optional => 1 },
847             dates_ignore_trailing_chars => { type => BOOLEAN, optional => 1 },
848             dates_search_time => { type => BOOLEAN, optional => 1 },
849             dates_locales => { type => SCALAR, optional => 1 },
850             out_utf8_bom => { type => SCALAR, optional => 1 },
851             dates_zeros_ok => { type => SCALAR, default => 1, optional => 1 },
852             _debug => { type => BOOLEAN, default => 0, optional => 1 },
853             _debug_read => { type => BOOLEAN, default => 0, optional => 1 },
854             _debug_extra_fields => { type => BOOLEAN, optional => 1 },
855             _debugh => { type => UNDEF | GLOBREF, optional => 1 }
856             }
857             );
858              
859 329         7074 my $self = {@args};
860              
861 329         719 my @fields = keys %{$self};
  329         1202  
862              
863             # croak_if_error
864              
865 329         821 my $croak_if_error = $self->{croak_if_error};
866              
867             # verbose and _debug management
868              
869 329 50       1143 $self->{_debugh} = $self->{infoh} if !defined( $self->{_debugh} );
870 329 50       842 $self->{_debug} = 1 if $ALWAYS_DEBUG;
871 329         650 my $_debug = $self->{_debug};
872 329 50       682 $self->{verbose} = 1 if $_debug;
873 329         596 my $verbose = $self->{verbose};
874              
875 329         511 my $_debugh = $self->{_debugh};
876              
877 329         594 bless $self, $class;
878              
879             # fields_ar, fields_hr
880              
881 329 100       1083 if (
882             defined( $self->{fields_ar} ) +
883             defined( $self->{fields_hr} ) +
884             defined( $self->{fields_column_names} ) >= 2 )
885             {
886 1         4 $self->_print_error(
887             "mixed use of fields_ar, fields_hr and fields_column_names. "
888             . "Use one at a time." );
889             }
890 329 100 100     984 if ( defined( $self->{fields_ar} ) and ( !defined $self->{fields_hr} ) ) {
891 2         4 my @f = @{ $self->{fields_ar} };
  2         5  
892 2         4 my %h;
893 2         5 for my $e (@f) {
894 6         17 $h{$e} = "^$e\$";
895             }
896 2         6 $self->{fields_hr} = \%h;
897             }
898 329 100       739 if ( !$self->{has_headers} ) {
899 13 100       35 if ( defined( $self->{fields_ar} ) ) {
900 1         5 $self->_print_error(
901             "fields_ar irrelevant if CSV file has no headers");
902 1         104 return;
903             }
904 12 100       33 if ( defined( $self->{fields_hr} ) ) {
905 1         3 $self->_print_error(
906             "fields_hr irrelevant if CSV file has no headers");
907 1         6 return;
908             }
909             }
910              
911             # in_file or inh
912              
913 327         704 $self->{_flat} = [];
914              
915 327         646 $self->{_read_update_after_hr} = {};
916 327         635 $self->{_write_update_before_hr} = {};
917              
918 327         1042 $self->_update_in_mem_record_count();
919              
920 327 100       841 return unless $self->_status_reset();
921              
922 318 50       668 $self->_debug_show_members() if $_debug;
923              
924 318 100       741 if ( $self->{dates_zeros_ok} ) {
925             $self->{_refsub_is_datetime_empty} = sub {
926 3173     3173   4677 my $v = $_[0];
927 3173 100       9569 if ( $v !~ m/[1-9]/ ) {
928 975 100       2228 return 1 if $v =~ m/^[^0:]*0+[^0:]+0+[^0:]+0+/;
929             }
930 3170         8634 return 0;
931             }
932 316         1415 }
933              
934 318         5217 return $self;
935             }
936              
937             #
938             # Return 0 if error, 1 if all good
939             #
940             # Do all low level activities associated to input:
941             # I/O init
942             # Detect encoding
943             # Detect CSV separator
944             # Detect escape character
945             #
946             sub _S1_init_input {
947 470     470   829 my $self = shift;
948              
949 470         760 my $croak_if_error = $self->{croak_if_error};
950 470         692 my $_debug = $self->{_debug};
951 470         744 my $_debugh = $self->{_debugh};
952              
953 470 100       994 $self->{in_file} = '' unless defined( $self->{in_file} );
954 470         923 $self->{_close_inh_when_finished} = 0;
955              
956             $self->{_leave_encoding_alone} = $self->{dont_mess_with_encoding}
957 470 50       978 if defined( $self->{dont_mess_with_encoding} );
958              
959 470         1279 $self->{_int_one_pass} = _get_def( $self->{one_pass}, 0 );
960 470         880 my $in_file_disp;
961              
962             #
963             # LOW LEVEL INIT STEP 1 OF 4
964             #
965             # Manage I/O (= in most cases, open input file...)
966             #
967              
968 470 100       1077 if ( defined( $self->{inh} ) ) {
969             $self->{_leave_encoding_alone} = 1
970 4 50       10 unless defined( $self->{dont_mess_with_encoding} );
971 4         7 $in_file_disp = _get_def( $self->{in_file}, '<?>' );
972 4 50       10 $self->{_int_one_pass} = 1 unless defined( $self->{one_pass} );
973 4         6 $self->{_inh} = $self->{inh};
974             }
975             else {
976             $self->{_leave_encoding_alone} = 0
977 466 50       1124 unless defined( $self->{dont_mess_with_encoding} );
978 466         764 my $in_file = $self->{in_file};
979 466         642 my $inh;
980 466 50       956 if ( $in_file eq '' ) {
981 0         0 $inh = \*STDIN;
982 0         0 $self->{inh_is_stdin} = 1;
983 0 0       0 $self->{_int_one_pass} = 1 unless defined( $self->{one_pass} );
984 0         0 $in_file_disp = '<stdin>';
985             }
986             else {
987 466 100       14335 if ( !open( $inh, '<', $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen)
988 3         50 $self->_print_error("unable to open file '$in_file': $!");
989 3         105 return 0;
990             }
991 463         1175 $in_file_disp = $in_file;
992 463         1009 $self->{_close_inh_when_finished} = 1;
993             }
994 463         1297 $self->{_inh} = $inh;
995             }
996              
997 467 50       983 confess "Oups! in_file_disp not defined?" unless defined($in_file_disp);
998 467         857 $self->{_in_file_disp} = $in_file_disp;
999              
1000             #
1001             # LOW LEVEL INIT STEP 2 OF 4
1002             #
1003             # "Detection" of encoding
1004             #
1005             # WARNING
1006             # As explained in the manual, it is a very partial and limited detection...
1007             #
1008              
1009 467 100       1030 unless ( $self->{_leave_encoding_alone} ) {
1010 463 100       1026 unless ( $self->{_init_input_already_called} ) {
1011             my ( $e, $m ) =
1012             $self->_detect_inh_encoding( $self->{encoding}, $self->{via},
1013             $self->{in_file},
1014 320 100       1697 ( $self->{_int_one_pass} ? 0 : $DETECT_ENCODING ) );
1015 320         1048 $self->{_inh_encoding} = $e;
1016 320         682 $self->{_inh_encoding_string} = $m;
1017             }
1018              
1019 463         3395 binmode $self->{_inh}, $self->{_inh_encoding_string};
1020             print( $_debugh "Input encoding: '"
1021             . $self->{_inh_encoding} . "' / '"
1022             . $self->{_inh_encoding_string}
1023 463 50       20771 . "'\n" )
1024             if $_debug;
1025              
1026             }
1027              
1028 467 100       1266 $self->{out_file} = '' unless defined( $self->{out_file} );
1029              
1030             #
1031             # LOW LEVEL INIT STEP 3 OF 4
1032             #
1033             # Detection of CSV separator and escape character
1034             #
1035              
1036 467         672 my $sep_char;
1037 467         799 my $escape_char = $self->{escape_char};
1038             $self->{quote_char} = $DEFAULT_QUOTE_CHAR
1039 467 100       1193 unless defined( $self->{quote_char} );
1040 467         771 my $quote_char = $self->{quote_char};
1041 467 100       1013 unless ( defined( $self->{in_csvobj} ) ) {
1042 462 100       916 if ( defined( $self->{sep_char} ) ) {
1043 200         324 $sep_char = $self->{sep_char};
1044 200 50       418 print( $_debugh "-- $in_file_disp: CSV separator set to \"" )
1045             if $_debug;
1046             }
1047             else {
1048             # The test below (on _init_input_already_called) shoud be useless.
1049             # Left for the sake of robustness.
1050 262 50       566 unless ( $self->{_init_input_already_called} ) {
1051 262 100       864 if (
1052             !$self->_detect_csv_sep(
1053             $escape_char, $quote_char, \$sep_char
1054             )
1055             )
1056             {
1057 6         24 $self->_print_error(
1058             "'$in_file_disp': cannot detect CSV separator");
1059 0         0 return 0;
1060             }
1061 256 50       636 print( $_debugh
1062             "-- $in_file_disp: CSV separator detected to \"" )
1063             if $_debug;
1064 256         596 $self->{sep_char} = $sep_char;
1065             }
1066             }
1067 456 50       941 print( $_debugh _render($sep_char) . "\"\n" ) if $_debug;
1068              
1069 456         1335 $self->_detect_meta( $quote_char, $sep_char );
1070              
1071             $self->{_in_csvobj} = Text::CSV->new(
1072             {
1073             sep_char => $sep_char,
1074             allow_whitespace => 1,
1075             binary => 1,
1076             auto_diag => 0,
1077             quote_char => $quote_char,
1078             escape_char => $self->{escape_char},
1079 456         3410 allow_loose_escapes => 1
1080             }
1081             );
1082 456 50       74413 unless ( defined( $self->{_in_csvobj} ) ) {
1083 0         0 $self->_print_error("error creating input Text::CSV object");
1084 0         0 return 0;
1085             }
1086              
1087             }
1088             else {
1089 5         9 $self->{_in_csvobj} = $self->{in_csvobj};
1090             }
1091              
1092 461         899 $self->{_init_input_already_called} = 1;
1093              
1094 461         1259 return 1;
1095             }
1096              
1097             sub get_in_file_disp {
1098 3663     3663 1 4957 my $self = shift;
1099              
1100 3663         18910 validate_pos(@_);
1101              
1102 3663         9196 my $in_file_disp = _get_def( $self->{_in_file_disp}, '?' );
1103 3663         6583 return $in_file_disp;
1104             }
1105              
1106             sub get_sep_char {
1107 0     0 1 0 my $self = shift;
1108              
1109 0         0 validate_pos(@_);
1110              
1111 0         0 return $self->{sep_char};
1112             }
1113              
1114             sub get_escape_char {
1115 9     9 1 50 my $self = shift;
1116              
1117 9         65 validate_pos(@_);
1118              
1119 9         52 return $self->{escape_char};
1120             }
1121              
1122             sub get_in_encoding {
1123 27     27 1 3324 my $self = shift;
1124              
1125 27         172 validate_pos(@_);
1126              
1127 27         81 return _get_def( $self->{_inh_encoding}, '' );
1128             }
1129              
1130             sub get_is_always_quoted {
1131 15     15 1 80 my $self = shift;
1132              
1133 15         96 validate_pos(@_);
1134              
1135 15         95 return $self->{_is_always_quoted};
1136             }
1137              
1138             sub get_pass_count {
1139 43     43 1 559 my $self = shift;
1140              
1141 43         286 validate_pos(@_);
1142              
1143 43         158 return _get_def( $self->{_pass_count}, 0 );
1144             }
1145              
1146             sub get_in_mem_record_count {
1147 0     0 1 0 my $self = shift;
1148              
1149 0         0 validate_pos(@_);
1150              
1151 0         0 return ( $self->{_in_mem_record_count}, 0 );
1152             }
1153              
1154             sub get_max_in_mem_record_count {
1155 513     513 1 1553 my $self = shift;
1156              
1157 513         3035 validate_pos(@_);
1158              
1159 513         2102 return _get_def( $self->{_max_in_mem_record_count}, 0 );
1160             }
1161              
1162             sub _set_max_in_mem_record_count {
1163 155     155   253 my $self = shift;
1164              
1165 155         1493 validate_pos( @_, { type => SCALAR } );
1166              
1167 155         525 $self->{_max_in_mem_record_count} = $_[0];
1168             }
1169              
1170             sub get_fields_names {
1171 19     19 1 2757 my $self = shift;
1172              
1173 19         134 validate_pos(@_);
1174              
1175 19 50       68 return () unless $self->_status_forward('S3');
1176 19         34 return @{ $self->{_columns} };
  19         82  
1177             }
1178              
1179             sub get_field_name {
1180 1     1 1 477 my $self = shift;
1181              
1182 1         16 validate_pos( @_, { type => SCALAR } );
1183              
1184 1         7 my ($n) = @_;
1185              
1186 1 50       5 return unless $self->_status_forward('S3');
1187 1         4 return $self->{_columns}->[$n];
1188             }
1189              
1190             sub get_coldata {
1191 1     1 1 2 my $self = shift;
1192              
1193 1         7 validate_pos(@_);
1194              
1195 1 50       3 return () unless $self->_status_forward('S3');
1196 1         2 my @ret;
1197 1         2 for ( @{ $self->{_coldata} } ) {
  1         3  
1198 4         123 push @ret,
1199             [
1200             $_->field_name, $_->header_text, $_->description,
1201             $_->dt_format, $_->dt_locale, $_->multiline
1202             ];
1203             }
1204              
1205 1         25 return @ret;
1206             }
1207              
1208             sub get_stats {
1209 2     2 1 9 my $self = shift;
1210              
1211 2         11 validate_pos(@_);
1212              
1213 2 50       6 return () unless defined( $self->{_stats} );
1214 2         4 return %{ $self->{_stats} };
  2         8  
1215             }
1216              
1217             sub get_nb_rows {
1218 1     1 1 2 my $self = shift;
1219              
1220 1         11 validate_pos(@_);
1221              
1222 1         6 return $self->{_nb_rows};
1223             }
1224              
1225             sub _debug_show_members {
1226 0     0   0 my ($self) = @_;
1227 0         0 my $_debugh = $self->{_debugh};
1228 0         0 my @aa;
1229 0 0       0 @aa = @{ $self->{fields_ar} } if defined( $self->{fields_ar} );
  0         0  
1230 0         0 my @c;
1231 0         0 @c = @{ $self->{fields_column_names} }
1232 0 0       0 if defined( $self->{fields_column_names} );
1233 0         0 my %h;
1234 0 0       0 %h = %{ $self->{fields_hr} } if defined( $self->{fields_hr} );
  0         0  
1235              
1236 0         0 print( $_debugh "-- _debug_show_members() start\n" );
1237 0         0 print( $_debugh " croak_if_error $self->{croak_if_error}\n" );
1238 0         0 print( $_debugh " verbose $self->{verbose}\n" );
1239 0         0 print( $_debugh " _debug $self->{_debug}\n" );
1240 0         0 print( $_debugh " _debug_read $self->{_debug_read}\n" );
1241 0         0 print( $_debugh " infoh $self->{infoh}\n" );
1242 0         0 print( $_debugh " _debugh $_debugh\n" );
1243 0         0 print( $_debugh " inh: $self->{_inh}\n" );
1244 0         0 print( $_debugh " in_file_disp " . $self->get_in_file_disp() . "\n" );
1245 0         0 print( $_debugh " _in_csvobj $self->{_in_csvobj}\n" );
1246 0         0 print( $_debugh " has_headers $self->{has_headers}\n" );
1247 0         0 print( $_debugh " fields_ar:\n" );
1248              
1249 0         0 for my $e (@aa) {
1250 0         0 print( $_debugh " '$e'\n" );
1251             }
1252 0         0 print( $_debugh " fields_hr:\n" );
1253 0         0 for my $e ( keys %h ) {
1254 0         0 print( $_debugh " '$e' => '$h{$e}'\n" );
1255             }
1256 0         0 print( $_debugh " fields_column_names:\n" );
1257 0         0 for my $e (@c) {
1258 0         0 print( $_debugh " '$e'\n" );
1259             }
1260 0         0 print( $_debugh "-- _debug_show_members() end\n" );
1261             }
1262              
1263             #
1264             # Check headers in CSV header line
1265             # Used to increase robustness by relying on header title rather than
1266             # column number.
1267             #
1268             # Return 1 if success (all fields found), 0 otherwise.
1269             #
1270             sub _process_header {
1271 12     12   22 my $self = shift;
1272 12         23 my @headers = @{ shift(@_) };
  12         39  
1273 12         22 my %fields_h = %{ shift(@_) };
  12         68  
1274 12         29 my $retval = shift;
1275              
1276 12         20 my @tmp = keys %{$retval};
  12         30  
1277              
1278 12         34 my $in_file_disp = $self->get_in_file_disp();
1279              
1280 12 50       35 confess '$_[4] must be an empty by-ref hash' if $#tmp >= 0;
1281              
1282 12         23 my $e = 0;
1283 12         38 for my $k ( keys %fields_h ) {
1284 48         85 my $v = $fields_h{$k};
1285              
1286 48     462   199 my @all_idx = indexes { /$v/i } @headers;
  462         2609  
1287 48 50       164 if ( $#all_idx >= 1 ) {
1288 0         0 $self->_print_error( "file $in_file_disp: "
1289             . "more than one column matches the criteria '$v'" );
1290 0         0 $e++;
1291             }
1292 48     258   179 my $idx = first_index { /$v/i } @headers;
  258         1630  
1293 48 50       211 if ( $idx < 0 ) {
1294 0         0 $self->_print_error(
1295             "file $in_file_disp: unable to find field '$v'");
1296 0         0 $e++;
1297             }
1298             else {
1299 48         121 $retval->{$k} = $idx;
1300             }
1301             }
1302              
1303 12 50       72 return ( $e >= 1 ? 0 : 1 );
1304             }
1305              
1306             sub set_walker_hr {
1307 2     2 1 1593 my $self = shift;
1308 2         25 validate_pos( @_, { type => UNDEF | CODEREF, optional => 1 } );
1309              
1310 2         7 my ($walker_hr) = @_;
1311              
1312 2 50       5 return unless $self->_status_forward('S2');
1313 2 50       6 return unless $self->_status_backward('S2');
1314 2         4 $self->{walker_hr} = $walker_hr;
1315              
1316 2         5 return $self;
1317             }
1318              
1319             sub set_walker_ar {
1320 2     2 1 1404 my $self = shift;
1321 2         22 validate_pos( @_, { type => UNDEF | CODEREF, optional => 1 } );
1322              
1323 2         8 my ($walker_ar) = @_;
1324              
1325 2 50       6 return unless $self->_status_forward('S2');
1326 2 50       5 return unless $self->_status_backward('S2');
1327 2         3 $self->{walker_ar} = $walker_ar;
1328              
1329 2         5 return $self;
1330             }
1331              
1332             # * *************************************** *
1333             # * BEGINNING OF DATE FORMAT DETECTION CODE *
1334             # * *************************************** *
1335              
1336             #
1337             # The '%m.%d.%y' is not at its "logical" location. It is done to make sure the
1338             # order in which entries are written does not impact the result.
1339             #
1340             # It could occur because there is some code that correlates an entry containing
1341             # %y with another one that would contain %Y. The %Y will be called the master,
1342             # the %y will be called the slave. It is important to match such entries,
1343             # otherwise an identified format with %y would always be ambiguous with the same
1344             # written with %Y.
1345             #
1346             # IMPORTANT
1347             # The list below is written almost as-is in the POD at the bottom of this
1348             # file.
1349             #
1350             my @DATES_DEFAULT_FORMATS_TO_TRY = (
1351             '',
1352             '%Y-%m-%d',
1353             '%Y.%m.%d',
1354             '%Y/%m/%d',
1355              
1356             '%m.%d.%y',
1357              
1358             '%m-%d-%Y',
1359             '%m.%d.%Y',
1360             '%m/%d/%Y',
1361             '%d-%m-%Y',
1362             '%d.%m.%Y',
1363             '%d/%m/%Y',
1364              
1365             '%m-%d-%y',
1366             '%m/%d/%y',
1367             '%d-%m-%y',
1368             '%d.%m.%y',
1369             '%d/%m/%y',
1370              
1371             '%Y%m%d%H%M%S',
1372              
1373             # Localizaed formats
1374              
1375             '%b %d, %Y',
1376             '%b %d %Y',
1377             '%b %d %T %Z %Y',
1378             '%d %b %Y',
1379             '%d %b, %Y'
1380             );
1381              
1382             #
1383             # IMPORTANT
1384             # Under Linux, $START is useless. Strptime will match a format exactly as it
1385             # is, and a tring like "01/01/16 13:00:00" won't match with "%T". Under
1386             # Windows, Strptime is capable of doing a match by ignoring characters at the
1387             # beginning, thus "01/01/2016 13:00:00" for example will return success when
1388             # matched against "%T".
1389             # Possibly it has to do with versionning of Strptime, not Linux versus Windows
1390             # as such. Any way, this difference had to be dealt with.
1391             #
1392             # The flexibility under Windows would screw the code logic so I had to add the
1393             # prefix string below, to avoid unexpected success on match.
1394             #
1395             my $START = '<';
1396              
1397             struct RecordCounter => {
1398             count_ok => '$',
1399             count_ko => '$',
1400             has_searched_time => '$',
1401              
1402             format => '$',
1403             locale => '$',
1404              
1405             has_found_time => '$',
1406             format_with_addition_of_time => '$',
1407             locale_with_addition_of_time => '$',
1408             parser_with_addition_of_time => '$'
1409             };
1410              
1411             struct Format => {
1412             id => '$',
1413             format => '$',
1414             locale => '$',
1415             parser => '$',
1416             index_slave => '$',
1417             index_master => '$'
1418             };
1419              
1420             sub _col_dispname {
1421 539     539   867 my ( $self, $n ) = @_;
1422              
1423 539         639 my $col;
1424              
1425             #
1426             # IMPORTANT
1427             #
1428             # We cannot execute here a command like
1429             # $self->_status_forward('S3');
1430             # (to ensure _columns is well defined) because _col_dispname is called by
1431             # _detect_dates_formats that is in turn called by _S3_init_fields_extra. A call
1432             # to _status_forward would trigger a never-ending call loop.
1433             #
1434 539         1103 my $cols = _get_def( $self->{'_columns'}, $self->{'_S2_columns'} );
1435              
1436 539 50       934 if ( $self->{has_headers} ) {
1437 539         929 $col = $cols->[$n];
1438 539 50       864 $col = "<UNDEF>" unless defined($col);
1439             }
1440             else {
1441 0         0 $col = "[$n]";
1442             }
1443 539         1034 return $col;
1444             }
1445              
1446             # Used by test plan only...
1447             sub _dds {
1448 45     45   917 my $self = shift;
1449              
1450 45 50       110 return unless $self->_status_forward('S3');
1451 43 50       121 return unless defined( $self->{_dates_detailed_status} );
1452 43         352 return $self->{_dates_detailed_status};
1453             }
1454              
1455             sub _detect_dates_formats {
1456 354     354   524 my $self = shift;
1457              
1458 354 100       868 return if $self->{_detect_dates_formats_has_run};
1459 287         564 $self->{_detect_dates_formats_has_run} = 1;
1460 287         383 my @fields_dates;
1461 11         35 @fields_dates = @{ $self->{fields_dates} }
1462 287 100       643 if defined( $self->{fields_dates} );
1463 287 100 100     1293 return unless @fields_dates or $self->{fields_dates_auto};
1464              
1465 54 50       151 if ( $self->{_int_one_pass} ) {
1466 0         0 $self->_print_error(
1467             "date format detection disallowed when one_pass is set");
1468 0         0 return;
1469             }
1470              
1471 54         100 my $_debug = $self->{_debug};
1472 54         85 my $_debugh = $self->{_debugh};
1473 54   33     127 my $debug_fmt = ( $_debug and $DEBUG_DATETIME_FORMATS );
1474              
1475 54         155 $self->_register_pass("detect date format");
1476              
1477             #
1478             # Why re-opening the input?
1479             # I tried two other ways that never worked on some OSes (like freebsd) and/or
1480             # with older perl versions.
1481             #
1482             # 1) The "tell" tactic
1483             # Recording at the beginning of the function the file position with
1484             # my $pos = tell($self->{inh});
1485             # ... and then recalling with a seek instruction is the most logical.
1486             # But it didn't work = sometimes, reading would go back to first row (the
1487             # headers) instead of the second row, could not figure out why (it would work
1488             # on my Ubuntu 16.04 / perl 5.22, but would fail with other OSes and/or perl
1489             # versions).
1490             #
1491             # 2) The "complete rewind" tactic
1492             # I then undertook to do (at the end of detection function):
1493             # seek $inh, 0, SEEK_SET;
1494             # $incsv->getline($inh) if $self->{has_headers};
1495             # based on the assumption that a seek to zero would behave differently from a
1496             # seek to an arbitrary position.
1497             # But still, it would sometimes fail....
1498             #
1499              
1500 54         119 my $inh = $self->_reopen_input();
1501 54         113 my $incsv = $self->{_in_csvobj};
1502 54 50       218 _mygetline( $incsv, $inh ) if $self->{has_headers};
1503              
1504 54         2401 my $formats_to_try = $self->{dates_formats_to_try};
1505 54         114 my $ignore_trailing_chars = $self->{dates_ignore_trailing_chars};
1506 54         133 my $search_time = $self->{dates_search_time};
1507 54         91 my $localizations = $self->{dates_locales};
1508              
1509 54         92 my %regular_named_fields = %{ $self->{_regular_named_fields} };
  54         423  
1510              
1511 54         144 my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty};
1512              
1513 54         82 my @fields_to_detect_format;
1514 54 100       203 if ( defined( $self->{fields_dates} ) ) {
    50          
1515 11         20 my $count_field_not_found = 0;
1516 11         17 my %column_seen;
1517 11         32 for my $f ( @{ $self->{fields_dates} } ) {
  11         31  
1518 23 100       58 if ( !exists $regular_named_fields{$f} ) {
1519 1         9 $self->_print_error( "fields_dates: unknown field: '$f'",
1520             1, ERR_UNKNOWN_FIELD, {%regular_named_fields} );
1521 1         32 $count_field_not_found++;
1522 1         4 next;
1523             }
1524 22         34 my $n = $regular_named_fields{$f};
1525 22 50       52 if ( exists $column_seen{$n} ) {
1526 0         0 $self->_print_warning("field '$f' already seen");
1527 0         0 next;
1528             }
1529 22         43 $column_seen{$n} = 1;
1530 22         47 push @fields_to_detect_format, $n;
1531             }
1532 11 100       88 $self->_print_error("non existent field(s) encountered, aborted")
1533             if $count_field_not_found;
1534             }
1535             elsif ( $self->{fields_dates_auto} ) {
1536 43         174 my @k = keys %regular_named_fields;
1537 43         181 @fields_to_detect_format = ( 0 .. $#k );
1538             }
1539             else {
1540 0         0 confess "Hey! check this code, man";
1541             }
1542              
1543             #
1544             # FIXME?
1545             # Sort by column number of not?
1546             #
1547             # At this moment in time, the author inclines to answer "yes".
1548             # But I must admit it is rather arbitrary decision for now.
1549             #
1550 53         294 @fields_to_detect_format = sort { $a <=> $b } @fields_to_detect_format;
  753         898  
1551              
1552 53         86 my @dates_formats_supp;
1553 2         6 @dates_formats_supp = @{ $self->{dates_formats_to_try_supp} }
1554 53 100       149 if defined( $self->{dates_formats_to_try_supp} );
1555              
1556 53 100       346 $formats_to_try = [@DATES_DEFAULT_FORMATS_TO_TRY]
1557             unless defined($formats_to_try);
1558 53         94 $formats_to_try = [ @{$formats_to_try}, @dates_formats_supp ];
  53         217  
1559 53         146 my %seen;
1560 53         94 my $f2 = [];
1561 53         176 for ( @${formats_to_try} ) {
1562 978 50       1463 push @{$f2}, $_ unless exists( $seen{$_} );
  978         1353  
1563 978         1685 $seen{$_} = undef;
1564             }
1565 53         134 $formats_to_try = $f2;
1566              
1567 53 100       149 $ignore_trailing_chars = 1 unless defined($ignore_trailing_chars);
1568 53 100       187 $search_time = 1 unless defined($search_time);
1569              
1570 53 100       128 my $stop = ( $ignore_trailing_chars ? '' : '>' );
1571              
1572             #
1573             # The code below (from # AMB to # AMB-END) aims to remove ambiguity that comes
1574             # from %Y versus %y.
1575             # That is: provided you have (among others) the formats to try
1576             # '%d-%m-%Y'
1577             # and
1578             # '%d-%m-%y'
1579             # then if parsing 4-digit-year dates (like '31-12-2016'), the two formats will
1580             # work and you'll end up with an ambiguity. To be precise, there'll be no
1581             # ambiguity if the date is followed by a time, but if the date is alone, both
1582             # formats will work.
1583             #
1584             # Thanks to the below code, the member 'index_slave' (and its counterpart
1585             # index_master) is populated and later, if such an ambiguity is detected, the
1586             # upper case version (the one containing upper case '%Y') will be kept and the
1587             # other one will be discarded.
1588             #
1589             # NOTE
1590             # Such an ambiguity can exist only when ignore_trailing_chars is set.
1591             # Otherwise, the remaining two digits make the date parsing fail in the '%y'
1592             # case.
1593             #
1594             # The other members of the 'Format' object are used to work "normally",
1595             # independently from this ambiguity removal feature.
1596             #
1597              
1598             # WIP = Work In Progress...
1599 53         92 my @formats_wip;
1600             my @locales;
1601 53 100       147 @locales = split( /,\s*/, $localizations ) if defined($localizations);
1602 53         73 for my $f ( @{$formats_to_try} ) {
  53         129  
1603 978 100       2135 my $has_localized_item = ( $f =~ m/%a|%A|%b|%B|%c|%\+/ ? 1 : 0 );
1604 978 100 100     1765 unless ( @locales and $has_localized_item ) {
1605 968         1583 push @formats_wip, [ $f, '' ];
1606 968         1387 next;
1607             }
1608 10         34 push @formats_wip, [ $f, $_ ] foreach @locales;
1609             }
1610              
1611             # AMB
1612 53         95 my @formats;
1613             my %mates;
1614 53         151 for my $i ( 0 .. $#formats_wip ) {
1615 988         1734 my $fstr = $formats_wip[$i]->[0];
1616 988         1263 my $floc = $formats_wip[$i]->[1];
1617              
1618             # FIXME
1619             # Will not manage correctly a string like
1620             # '%%Y'
1621             # that means (when used with Strptime), the litteral string '%Y' with no
1622             # substitution.
1623             # Such cases will be complicated to fix, as it'll require to do a kind-of
1624             # Strptime-equivalent parsing of the string, and I find it a bit overkill.
1625             #
1626             # I prefer to push back in caller world saying
1627             # "Hey, if using constructs like '%%Y', you'll be in trouble."
1628 988         1181 my $m = $fstr;
1629 988         3181 $m =~ s/%y//ig;
1630 988         1608 $m .= $floc;
1631              
1632 988         1273 my $index_slave = -1;
1633 988         1119 my $index_master = -1;
1634 988 100       1757 if ( exists $mates{$m} ) {
1635 262         516 my $alt_fstr = $formats_wip[ $mates{$m} ]->[0];
1636 262 100       705 my $m_lower = ( $fstr =~ m/%y/ ? 1 : 0 );
1637 262 100       570 my $m_upper = ( $fstr =~ m/%Y/ ? 1 : 0 );
1638 262 100       492 my $a_lower = ( $alt_fstr =~ m/%y/ ? 1 : 0 );
1639 262 100       516 my $a_upper = ( $alt_fstr =~ m/%Y/ ? 1 : 0 );
1640              
1641             # We ignore the weird cases where we'd have both %y and %Y in a format string.
1642              
1643 262 100 66     1919 if ( ( !$m_lower ) and $m_upper and $a_lower and ( !$a_upper ) ) {
    50 66        
      33        
      33        
      33        
      33        
1644 45         101 $index_slave = $mates{$m};
1645 45         749 $formats[ $mates{$m} ]->index_master($i);
1646             }
1647             elsif ( $m_lower and ( !$m_upper ) and ( !$a_lower ) and $a_upper )
1648             {
1649 217         352 $index_master = $mates{$m};
1650 217         3444 $formats[ $mates{$m} ]->index_slave($i);
1651             }
1652              
1653             }
1654             else {
1655 726         1389 $mates{$m} = $i;
1656             }
1657              
1658 988         3597 my %strptime_opts = ( pattern => $START . $fstr . $stop );
1659 988 100       1936 $strptime_opts{locale} = $floc if $floc ne '';
1660 988 100       15300 my $format = Format->new(
1661             id => "$i",
1662             format => $fstr,
1663             locale => $floc,
1664             parser => (
1665             $fstr ne ''
1666             ? _build_strptime_parser(%strptime_opts)
1667             : undef
1668             ),
1669             index_slave => $index_slave,
1670             index_master => $index_master
1671             );
1672 988         247159 push @formats, $format;
1673             }
1674 53         201 for my $i ( 0 .. $#formats ) {
1675 988         11052 my $format = $formats[$i];
1676              
1677             # If a master could be itself the slave of another entry, that'd make it a
1678             # hierarchical relation tree with multiple levels. It is not possible, only a
1679             # direct, unique master-slave relation can be managed here.
1680 988 50 66     12683 confess "Inonsistent data, check this module's code urgently!"
1681             if $format->index_slave >= 0 and $format->index_master >= 0;
1682              
1683 988 100       22232 if ( $format->index_slave >= 0 ) {
1684 262         4781 my $mate = $formats[ $format->index_slave ];
1685 262 50 33     4634 if ( $mate->index_master != $i or $mate->index_slave != -1 ) {
1686 0         0 confess
1687             "Inonsistent data (2), check this module's code urgently!";
1688             }
1689             }
1690              
1691 988 100       21627 if ( $format->index_master >= 0 ) {
1692 262         4681 my $mate = $formats[ $format->index_master ];
1693 262 50 33     4439 if ( $mate->index_slave != $i or $mate->index_master != -1 ) {
1694 0         0 confess
1695             "Inonsistent data (3), check this module's code urgently!";
1696             }
1697             }
1698              
1699             }
1700 53 50       470 if ($debug_fmt) {
1701 0         0 for (@formats) {
1702 0         0 my ( $idx, $rel ) = ( -1, "" );
1703 0 0       0 $idx = $_->index_slave, $rel = "S: " if $_->index_slave >= 0;
1704 0 0       0 $idx = $_->index_master, $rel = "M: " if $_->index_master >= 0;
1705 0         0 printf( $_debugh "%-18s %s %2d",
1706             "'" . $_->format . "'",
1707             $rel, $idx
1708             );
1709 0 0       0 print( $_debugh ": '" . $formats[$idx]->format . "'" ) if $idx >= 0;
1710 0         0 print( $_debugh "\n" );
1711             }
1712             }
1713              
1714             # AMB-END
1715              
1716 53         127 my %records;
1717             my $record_number;
1718 53         95 my $count_gotit = 0;
1719 53         78 my $count_ambiguous = 0;
1720 53         90 my $count_nodate = 0;
1721 53         81 my $count_empty = 0;
1722 53         71 my $has_signaled_can_start_recording_data = 0;
1723 53         115 $self->{_line_after_which_recording_can_start} = 0;
1724              
1725             #
1726             # Seems a weird optimization here, but it is very important.
1727             # In some cases, divides execution time (to detect date format on big files
1728             # containing numerous fields) by 10.
1729             #
1730             # When evaluates to true, it means the input column has no identified date
1731             # format, meaning, no further check to do.
1732             #
1733 53         85 my @cache_nodate;
1734              
1735 53         144 while ( my $f = _mygetline( $incsv, $inh ) ) {
1736 1418         43503 $record_number++;
1737              
1738 1418 50       3439 if ($debug_fmt) {
1739 0         0 print( $_debugh "RECORD $record_number:\n" );
1740 0         0 for ( 0 .. @$f - 1 ) {
1741 0         0 printf( $_debugh " %02d: '%s'\n", $_, $f->[$_] );
1742             }
1743             }
1744              
1745 1418         2452 for my $n (@fields_to_detect_format) {
1746 15285 100       24502 next if $cache_nodate[$n];
1747              
1748 10444         13470 my $v = $f->[$n];
1749 10444 100       14980 $v = '' unless defined($v);
1750 10444 100       17038 next if $v eq '';
1751             next
1752 2942 100 100     8788 if defined($refsub_is_datetime_empty)
1753             and $refsub_is_datetime_empty->($v);
1754              
1755 2939 50       5069 if ($debug_fmt) {
1756 0         0 my $col = $self->_col_dispname($n);
1757 0         0 print( $_debugh "Line $record_number, column '$col':\n" );
1758             }
1759              
1760 2939         4647 for my $fmt (@formats) {
1761 50192         595888 my $fid = $fmt->id;
1762 50192         811538 my $fstr = $fmt->format;
1763              
1764             $self->_debug_output_fmt( '** pre ', $fmt,
1765 50192 50       304131 $records{$n}->{$fid} )
1766             if $debug_fmt;
1767              
1768             $records{$n}->{$fid} = RecordCounter->new(
1769             count_ok => 0,
1770             count_ko => 0,
1771             has_searched_time => 0,
1772              
1773             format => undef,
1774             locale => undef,
1775              
1776             has_found_time => 0,
1777             format_with_addition_of_time => undef,
1778             locale_with_addition_of_time => undef,
1779             parser_with_addition_of_time => undef
1780 50192 100       192820 ) unless defined( $records{$n}->{$fid} );
1781              
1782 50192 100       1068410 unless ( $records{$n}->{$fid}->count_ko ) {
1783             my $is_ok = &_try_parser(
1784             $fmt,
1785 11481         90500 $records{$n}->{$fid},
1786             $START . $v . $stop
1787             );
1788              
1789 11481 100       21489 if ( !$is_ok ) {
1790 7789         10535 my $give_up_time = 0;
1791 7789 100 66     99359 if ( $records{$n}->{$fid}->count_ko == 0
      100        
1792             and $records{$n}->{$fid}->has_searched_time
1793             and $records{$n}->{$fid}->has_found_time )
1794             {
1795 77   100     4346 $give_up_time = (
1796             defined( $fmt->parser ) and defined(
1797             $fmt->parser->parse_datetime(
1798             $START . $v . $stop
1799             )
1800             ) ? 1 : 0
1801             );
1802 77 100       42441 if ($give_up_time) {
1803 4         80 $records{$n}->{$fid}->has_found_time(0);
1804 4         33 $is_ok = 1;
1805             }
1806             }
1807             }
1808              
1809 11481 100 100     200084 if ( $is_ok or ( !$ignore_trailing_chars ) ) {
1810 6079   100     76756 my $incr =
1811             ( defined( $fmt->parser ) and $is_ok ? 1 : 0 );
1812              
1813 6079 100       115704 unless ( $records{$n}->{$fid}->has_searched_time ) {
1814 3156         55838 $records{$n}->{$fid}->has_searched_time(1);
1815              
1816             croak
1817             "Inconsistent status! Issue in module code not in caller's!"
1818 3156 50       51260 if $records{$n}->{$fid}->count_ok != 0;
1819              
1820 3156 100       21966 if ($search_time) {
    100          
1821              
1822 1980 50       3352 print( $_debugh
1823             " Search time in '$v', format '$fstr'\n"
1824             ) if $debug_fmt;
1825              
1826 1980         24125 my $t =
1827             $self->_guess_time_format( $fstr,
1828             $fmt->locale, $v, $stop );
1829 1980 100       34402 $records{$n}->{$fid}
1830             ->has_found_time( ( defined($t) ? 1 : 0 ) );
1831 1980 100       14908 if ( defined($t) ) {
    100          
1832 254         3696 $records{$n}->{$fid}
1833             ->format_with_addition_of_time( $t->[0] );
1834 254         4677 $records{$n}->{$fid}
1835             ->locale_with_addition_of_time( $t->[1] );
1836 254         4603 $records{$n}->{$fid}
1837             ->parser_with_addition_of_time( $t->[2] );
1838 254         1804 $incr = 1;
1839             }
1840             elsif ( $fstr eq '' ) {
1841 200         2676 $records{$n}->{$fid}->count_ko(1);
1842             }
1843             }
1844             elsif ( $fstr eq '' ) {
1845 78         973 $records{$n}->{$fid}->count_ko(1);
1846             }
1847              
1848             }
1849              
1850             $records{$n}->{$fid}
1851 6079         95074 ->count_ok( $records{$n}->{$fid}->count_ok + $incr );
1852              
1853             $records{$n}->{$fid}
1854 6079 100 100     94759 ->count_ko( $records{$n}->{$fid}->count_ko + 1 )
1855             if ( !$incr )
1856             and ( !$is_ok );
1857              
1858 6079 100       31004 if ($incr) {
1859              
1860             # We remove the slave if master is fine.
1861             # Depending on the order in which parsing got done, the master could
1862             # pop up first, or the slave, that is why we need manage both cases.
1863 2697 100 100     32947 if ( $fmt->index_slave >= 0
1864             or $fmt->index_master >= 0 )
1865             {
1866 1995 100       56452 my $has_slave =
1867             ( $fmt->index_slave >= 0 ? 1 : 0 );
1868 1995 100       33339 my $idx = (
1869             $has_slave
1870             ? $fmt->index_slave
1871             : $fmt->index_master
1872             );
1873 1995         32688 my $mate = $formats[$idx]->id;
1874 1995 100       15143 if ( exists $records{$n}->{$mate} ) {
1875 1841 100       3444 if ($has_slave) {
1876 623 100       7673 if ( $records{$n}->{$mate}->count_ko ==
1877             0 )
1878             {
1879             # I am the master: I remove the slave
1880 2         36 $records{$n}->{$mate}->count_ko(1);
1881             }
1882             }
1883             else {
1884 1218 50 66     15021 if (
      66        
1885             $records{$n}->{$mate}->count_ko == 0
1886             and
1887             $records{$n}->{$mate}->count_ok >= 1
1888             and
1889             $records{$n}->{$fid}->count_ko ==
1890             0 )
1891             {
1892 117         6262 $records{$n}->{$fid}->count_ko(1);
1893             }
1894             }
1895             }
1896             }
1897             }
1898              
1899             }
1900             else {
1901             $records{$n}->{$fid}
1902 5402         71277 ->count_ko( $records{$n}->{$fid}->count_ko + 1 );
1903             }
1904             }
1905              
1906             $self->_debug_output_fmt( ' post', $fmt,
1907 50192 50       346497 $records{$n}->{$fid} )
1908             if $debug_fmt;
1909              
1910             }
1911             }
1912              
1913 1418         2199 $count_gotit = 0;
1914 1418         1811 $count_ambiguous = 0;
1915 1418         2187 $count_empty = 0;
1916 1418         2563 for my $n (@fields_to_detect_format) {
1917 15285 100       24296 next if $cache_nodate[$n];
1918              
1919 10444         12135 my $candidate = 0;
1920 10444         10843 my $tt = 0;
1921 10444         13604 for my $fmt (@formats) {
1922 150840         1783376 my $fid = $fmt->id;
1923 150840         910867 my $rec = $records{$n}->{$fid};
1924 150840 100       247074 next unless defined($rec);
1925              
1926 92018         1059566 my $ok = $rec->count_ok;
1927 92018         1420139 my $ko = $rec->count_ko;
1928              
1929 92018 50 66     608679 confess "Oups. Check this module code urgently!"
1930             if $ok == 0 and $ko == 0;
1931 92018         103875 $tt += $ok + $ko;
1932              
1933 92018 100 100     177244 $candidate++ if $ok >= 1 and $ko == 0;
1934             }
1935 10444 100       19328 if ( $candidate == 1 ) {
    100          
    100          
1936 4754         6947 $count_gotit++;
1937             }
1938             elsif ( $candidate >= 2 ) {
1939 653         1242 $count_ambiguous++;
1940             }
1941             elsif ( $tt != 0 ) {
1942 230         289 $count_nodate++;
1943 230         466 $cache_nodate[$n] = 1;
1944             }
1945             else {
1946 4807         6378 $count_empty++;
1947             }
1948             }
1949              
1950 1418 50       2765 if ($debug_fmt) {
1951 0         0 print( $_debugh "\$count_gotit = $count_gotit\n" );
1952 0         0 print( $_debugh "\$count_ambiguous = $count_ambiguous\n" );
1953 0         0 print( $_debugh "\$count_nodate = $count_nodate\n" );
1954 0         0 print( $_debugh "\$count_empty = $count_empty\n" );
1955             }
1956              
1957 1418         2495 my $can_start_recording_data = 0;
1958 1418 100 100     6083 $can_start_recording_data = 1
      100        
1959             if $count_gotit + $count_ambiguous + $count_nodate >= 1
1960             and ( !$count_ambiguous )
1961             and ( !$count_empty );
1962              
1963 1418 100 100     8344 if ( $can_start_recording_data
1964             and ( !$has_signaled_can_start_recording_data ) )
1965             {
1966 31         54 $has_signaled_can_start_recording_data = 1;
1967              
1968 31 50       86 print( $_debugh
1969             "Can start recording (all dates formats detection closed) "
1970             . "after record #$record_number\n" )
1971             if $_debug;
1972              
1973 31         87 $self->{_line_after_which_recording_can_start} = $record_number;
1974 31 100       119 last unless $self->{fields_dates_auto};
1975 24 100       181 last if $self->{fields_dates_auto_optimize};
1976             }
1977             }
1978              
1979 53         2822 close $inh;
1980              
1981 53         168 my %dates_detailed_status;
1982             my @dates_formats;
1983 53         208 my $check_empty = 0;
1984 53         93 my $check_nodate = 0;
1985 53         117 my $check_ambiguous = 0;
1986 53         102 my $check_gotit = 0;
1987 53         284 for my $n (@fields_to_detect_format) {
1988 532         635 my @formats_ok;
1989 532         686 my $tt = 0;
1990 532         662 for my $fid ( sort keys %{ $records{$n} } ) {
  532         4539  
1991 8465         74146 my $rec = $records{$n}->{$fid};
1992 8465 100 100     100208 if ( $rec->count_ok >= 1 and $rec->count_ko == 0 ) {
1993              
1994 298         10002 my ( $fstr, $floc ) = ( $rec->format, $rec->locale );
1995 298 100       6546 ( $fstr, $floc ) = (
1996             $rec->format_with_addition_of_time,
1997             $rec->locale_with_addition_of_time
1998             ) if $rec->has_found_time;
1999              
2000 298         5779 push @formats_ok, [ $fstr, $floc ];
2001             }
2002 8465         141069 $tt += $rec->count_ok + $rec->count_ko;
2003             }
2004 532         5172 my $is_ok = 0;
2005 532         621 my $format;
2006 532         657 my $locale = '';
2007 532 100 100     1835 if ( $#formats_ok < 0 and $tt == 0 ) {
    100          
    100          
2008 16         36 $format = "Z";
2009 16         34 $check_empty++;
2010             }
2011             elsif ( $#formats_ok < 0 ) {
2012 230         304 $format = "N";
2013 230         279 $check_nodate++;
2014             }
2015             elsif ( $#formats_ok > 0 ) {
2016 12         25 $format = "A";
2017 12         16 $check_ambiguous++;
2018             }
2019             else {
2020 274         337 $is_ok = 1;
2021 274         438 $format = $formats_ok[0]->[0];
2022 274         345 $locale = $formats_ok[0]->[1];
2023 274         349 $check_gotit++;
2024             }
2025 532         1034 my $col = $self->_col_dispname($n);
2026              
2027             $dates_detailed_status{$col} = $format
2028 532 50       1586 unless exists $dates_detailed_status{$col};
2029 532 100 66     2047 $dates_formats[$n] = [ $format, $locale ]
2030             if $is_ok and ( !defined $dates_formats[$n] );
2031             }
2032             $dates_detailed_status{'.'} =
2033 53         187 $self->{_line_after_which_recording_can_start};
2034              
2035 53 50 66     440 if ( $check_empty != $count_empty
      66        
      33        
2036             or $check_nodate != $count_nodate
2037             or $check_ambiguous != $count_ambiguous
2038             or $check_gotit != $count_gotit )
2039             {
2040             # The below condition can happen with an empty CSV (empty file (no header) or
2041             # only a header line).
2042 1 50 33     71 if ( $count_empty
      33        
      33        
      33        
      33        
      33        
2043             or $check_nodate
2044             or $count_nodate
2045             or $check_ambiguous
2046             or $count_ambiguous
2047             or $check_gotit
2048             or $count_gotit )
2049             {
2050 0         0 print( STDERR "\$check_empty = $check_empty\n" );
2051 0         0 print( STDERR "\$count_empty = $count_empty\n" );
2052 0         0 print( STDERR "\$check_nodate = $check_nodate\n" );
2053 0         0 print( STDERR "\$count_nodate = $count_nodate\n" );
2054 0         0 print( STDERR "\$check_ambiguous = $check_ambiguous\n" );
2055 0         0 print( STDERR "\$count_ambiguous = $count_ambiguous\n" );
2056 0         0 print( STDERR "\$check_gotit = $check_gotit\n" );
2057 0         0 print( STDERR "\$count_gotit = $count_gotit\n" );
2058 0         0 confess "Oups! Check immediately this module code, man!";
2059             }
2060             }
2061              
2062 53 50       140 if ($debug_fmt) {
2063              
2064             # A very detailed debug output
2065 0         0 for my $n (@fields_to_detect_format) {
2066 0         0 my $col = $self->_col_dispname($n);
2067 0         0 print( $_debugh "$col\n" );
2068 0         0 printf( $_debugh " %-25s %3s %3s\n", "format", "OK", "KO" );
2069 0         0 for my $fid ( sort keys %{ $records{$n} } ) {
  0         0  
2070 0         0 my $rec = $records{$n}->{$fid};
2071 0         0 my $cc = '';
2072 0 0 0     0 $cc = "(" . $rec->locale . ")"
2073             if defined( $rec->locale )
2074             and $rec->locale ne '';
2075 0         0 printf( $_debugh " %-25s %3d %3d\n",
2076             $rec->format . $cc,
2077             $rec->count_ok, $rec->count_ko
2078             );
2079             }
2080             }
2081             }
2082              
2083             # Not a typo - displaying it IN ADDITION to debug output above is done on
2084             # purpose...
2085 53 50       140 if ($_debug) {
2086              
2087             # A shorter (as compared to above) output of outcome of DateTime detection
2088 0         0 print( $_debugh "Result of DateTime detection:\n" );
2089 0         0 printf( $_debugh "%-3s %-25s %-30s %s\n",
2090             '###', 'FIELD',
2091             'DATETIME FORMAT',
2092             'DATETIME LOCALE'
2093             );
2094 0         0 for my $n (@fields_to_detect_format) {
2095 0         0 my ( $fmt, $loc ) = ( '<undef>', '<undef>' );
2096 0 0       0 if ( defined( $dates_formats[$n] ) ) {
2097 0         0 ( $fmt, $loc ) = @{ $dates_formats[$n] }[ 0, 1 ];
  0         0  
2098             }
2099 0         0 printf( $_debugh "%03d %-25s %-30s %s\n",
2100             $n, $self->_col_dispname($n),
2101             $fmt, $loc
2102             );
2103             }
2104             }
2105              
2106 53 100       168 if ( !$self->{fields_dates_auto} ) {
2107 10         20 my $e = 0;
2108 10         23 for my $n (@fields_to_detect_format) {
2109 20 100       53 next if defined( $dates_formats[$n] );
2110 7         17 $self->_print_error(
2111             "unable to detect DateTime format of field '"
2112             . $self->_col_dispname($n) . "'",
2113             1
2114             );
2115 7         195 $e++;
2116             }
2117             $self->_print_error(
2118 10 100       44 "$e field(s) encountered with unknown DateTime format")
2119             if $e;
2120             }
2121              
2122 51         642 $self->{_dates_detailed_status} = {%dates_detailed_status};
2123 51         10202 $self->{_dates_formats} = [@dates_formats];
2124             }
2125              
2126             sub _debug_output_fmt {
2127 0     0   0 my ( $self, $prefix, $fmt, $rec ) = @_;
2128              
2129 0         0 my $_debugh = $self->{_debugh};
2130              
2131 0         0 my ( $fstr, $floc ) = ( $fmt->format, $fmt->locale );
2132 0 0 0     0 ( $fstr, $floc ) = (
2133             '<+T>' . $rec->format_with_addition_of_time,
2134             $rec->locale_with_addition_of_time
2135             )
2136             if defined($rec)
2137             and $rec->has_found_time;
2138              
2139 0         0 my $locstr = '';
2140 0 0 0     0 $locstr = "(" . $floc . ")" if defined($floc) and $floc ne '';
2141              
2142 0         0 my $tmpok;
2143 0 0       0 $tmpok = $rec->count_ok if defined($rec);
2144 0 0       0 $tmpok = '<undef>' unless defined($tmpok);
2145 0         0 my $tmpko;
2146 0 0       0 $tmpko = $rec->count_ko if defined($rec);
2147 0 0       0 $tmpko = '<undef>' unless defined($tmpko);
2148              
2149 0         0 print( $_debugh
2150             "$prefix (format '$fstr$locstr': OK = $tmpok, KO = $tmpko)\n" );
2151             }
2152              
2153             # When no parse can be done (parser to test is undef), return 1
2154             sub _try_parser {
2155 11481     11481   21108 my ( $fmt, $rec, $value_to_parse ) = @_;
2156              
2157 11481         142300 my $parser = $fmt->parser;
2158 11481 100       191018 $parser = $rec->parser_with_addition_of_time if $rec->has_found_time;
2159              
2160 11481         92946 my $is_ok = 1;
2161 11481 100       37082 $is_ok = ( defined( $parser->parse_datetime($value_to_parse) ) ? 1 : 0 )
    100          
2162             if $parser;
2163              
2164 11481 100       2283436 unless ( defined( $rec->format ) ) {
2165 8465         148944 $rec->format( $fmt->format );
2166 8465         263129 $rec->locale( $fmt->locale );
2167             }
2168              
2169 11481         205131 return $is_ok;
2170             }
2171              
2172             #
2173             # By checking Text::AutoCSV with NYTProf, I saw the t/02-dates2.t test was long
2174             # because of the time to create Strptime parsers all along execution (while
2175             # detecting formats of dates/times). I spent some time caching (manually, and
2176             # painfully) Strptime parsers for naked dates, but didn't afford to doing it for
2177             # the time detection.
2178             #
2179             # Then I came upon knowing of the Memoize module while reading
2180             # HIGHER-ORDER PERL
2181             # of
2182             # Mark Jason Dominus, editor: Morgan Kaufmann Publishers (Elsevier)
2183             # You can find here (I strongly recommend your reading this book):
2184             # http://hop.perl.plover.com/book/pdf/HigherOrderPerl.pdf
2185             #
2186             # TODO
2187             # Actually a code rewriting should be done, to discard all the caching work done
2188             # for naked dates and just rely on this memoization, that is just perfect
2189             # feature for this case.
2190             #
2191             memoize('_build_strptime_parser');
2192             sub _build_strptime_parser { DateTime::Format::Strptime->new(@_) }
2193              
2194             sub _guess_time_format {
2195              
2196             # IMPORTANT
2197             # Formats are tested in the order of the list below, and the first one that
2198             # succeeds stops the tests.
2199             # That makes the order of the elements important: %R would match any value
2200             # that'd also match %T, that'd cause to return %R whereas %T would be
2201             # possible. Same with AM/PM formats. Thus the longest patterns appear first,
2202             # and in particular, the patterns ending with "AM/PM" appear before all
2203             # others.
2204 1980     1980   15866 my @T = ( '%I:%M:%S %p', '%I:%M %p', '%I:%M:%S%p', '%I:%M%p', '%T', '%R' );
2205              
2206 1980         4053 my ( $self, $format, $locale, $v, $stop ) = @_;
2207              
2208 1980         3132 my $_debugh = $self->{_debugh};
2209 1980   33     4227 my $debug_fmt = ( $self->{_debug} and $DEBUG_DATETIME_FORMATS );
2210              
2211 1980 100       4136 return if $format =~ /:/;
2212              
2213 1976         2392 my $sep;
2214 1976 100       3441 if ( $format eq '' ) {
2215 325         599 $sep = '';
2216             }
2217             else {
2218 1651 100       8853 unless ( ( undef, $sep ) =
2219             $v =~ /(^|\d([^0-9:]+))(\d{1,2}):(\d{1,2})(\D|$)/ )
2220             {
2221 393 100       789 if ( $v =~ /\d{4}:\d{2}(\D|$)/ ) {
2222 29         59 $sep = '';
2223             }
2224             else {
2225              
2226 364 50       691 print( $_debugh
2227             "_guess_time_format(): separator candidate not found in '$v'\n"
2228             ) if $debug_fmt;
2229              
2230 364         867 return;
2231             }
2232             }
2233             }
2234 1612 100       3839 $sep = '' unless defined($sep);
2235              
2236             #
2237             # IMPORTANT
2238             #
2239             # The code below allows to successfully detect DateTime format when
2240             # the first lines contain things like:
2241             # Jan 20 2017 2:00AM
2242             # that could lead to a separator set to ' ' while actually it should be ' '. In
2243             # this case
2244             # if the double-space is kept, then a later value of
2245             # Jan 20 2017 10:00AM
2246             # won't be parsed.
2247             #
2248             # See t/11-bugfix.t, BUG 5, for an explanation of why the line below.
2249             #
2250 1612 100       3048 $sep = ' ' if $sep eq ' ';
2251              
2252 1612 50       2937 if ($debug_fmt) {
2253 0         0 print( $_debugh " _guess_time_format(): Searching time in '$v'\n" );
2254             }
2255              
2256 1612         2707 for my $t (@T) {
2257 9193         220786 my $increased_format = "$format$sep$t";
2258              
2259 9193 50       15009 print( $_debugh
2260             " _guess_time_format(): Trying format '$increased_format'\n" )
2261             if $debug_fmt;
2262              
2263 9193         20363 my %opts = ( pattern => $START . $increased_format . $stop );
2264 9193 100 66     27603 $opts{locale} = $locale if defined($locale) and $locale ne '';
2265 9193         133851 my $parser_of_increased_format = _build_strptime_parser(%opts);
2266             next
2267             unless defined(
2268 9193 100       1359577 $parser_of_increased_format->parse_datetime( $START . $v . $stop )
2269             );
2270              
2271 254 50       155067 if ($debug_fmt) {
2272 0         0 print( $_debugh " _guess_time_format(): found time in '$v'\n" );
2273 0         0 print( $_debugh " Initial format: '$format'\n" );
2274 0         0 print( $_debugh " Increased format: '$increased_format'\n" );
2275             }
2276              
2277 254         1360 return [ $increased_format, $locale, $parser_of_increased_format ];
2278             }
2279 1358         42918 return;
2280             }
2281              
2282             # * ********************************* *
2283             # * END OF DATE FORMAT DETECTION CODE *
2284             # * ********************************* *
2285              
2286             # Take the string of a header in $_ and replace it with the corresponding field
2287             # name
2288             sub _header_to_field_name {
2289 1265     1265   2011 $_ = remove_accents($_);
2290 1265         2483 s/[^[:alnum:]_]//gi;
2291 1265         6845 s/^.*$/\U$&/;
2292             }
2293              
2294             # Return 0 if error, 1 if all good
2295             sub _S2_init_fields_from_header {
2296 355     355   617 my $self = shift;
2297              
2298 355         613 my $has_headers = $self->{has_headers};
2299 355         613 my $_debug = $self->{_debug};
2300 355         570 my $_debugh = $self->{_debugh};
2301              
2302 355         827 my $in_file_disp = $self->get_in_file_disp();
2303              
2304 355         603 my $inh = $self->{_inh};
2305 355         572 my $incsv = $self->{_in_csvobj};
2306              
2307 355         626 $self->{_row_read} = 0;
2308              
2309 355         581 my @columns;
2310             my @headers;
2311 355 100       740 if ($has_headers) {
2312              
2313             print( $_debugh
2314             "$PKG: '$in_file_disp': will parse header line to get column names\n"
2315 344 50       793 ) if $self->{_debug_read};
2316              
2317 344         558 $self->{_row_read}++;
2318              
2319             print( $_debugh "$PKG: '$in_file_disp': will read line #"
2320             . $self->{_row_read}
2321             . "\n" )
2322 344 50       716 if $self->{_debug_read};
2323              
2324 344 50       790 if ( defined( $self->{_inh_header} ) ) {
2325 0         0 my $l = $self->{_inh_header};
2326 0         0 my $inmemh;
2327 0 0       0 if ( !open( $inmemh, '<', \$l ) ) { ## no critic (InputOutput::RequireBriefOpen)
2328 0         0 $self->_print_error(
2329             "can't open header line in-memory. CSV read aborted.");
2330 0         0 return 0;
2331             }
2332 0         0 @headers = @{ _mygetline( $incsv, $inmemh ) };
  0         0  
2333             }
2334             else {
2335 344         705 my $r = _mygetline( $incsv, $inh );
2336 342 50       15499 @headers = @{$r} if defined($r);
  342         1168  
2337             }
2338             print( $_debugh "Line "
2339             . $self->{_row_read}
2340             . ":\n--\n"
2341             . join( '::', @headers )
2342             . "\n--\n" )
2343 342 50       1068 if $self->{_debug_read};
2344             }
2345              
2346 353 100 100     1513 if ( $has_headers and ( !defined $self->{fields_column_names} ) ) {
2347 336         545 my %indexes;
2348 336 100       782 if ( defined( $self->{fields_hr} ) ) {
2349 12 50       56 if (
2350             !$self->_process_header(
2351             \@headers, $self->{fields_hr}, \%indexes
2352             )
2353             )
2354             {
2355 0         0 $self->_print_error("missing headers. CSV read aborted.");
2356 0         0 return 0;
2357             }
2358 12 50       34 if ($_debug) {
2359 0         0 print( $_debugh " \%indexes:\n" );
2360 0         0 for my $k ( sort keys %indexes ) {
2361 0         0 print( $_debugh " \t$k => $indexes{$k}\n" );
2362             }
2363             }
2364 12         68 for ( sort keys %indexes ) {
2365 48 50       90 next if $_ eq '';
2366 48         95 $columns[ $indexes{$_} ] = $_;
2367             }
2368             }
2369             else {
2370 324         772 @columns = @headers;
2371 324         632 map { _header_to_field_name } @columns;
  1265         7838  
2372             }
2373             }
2374              
2375 14         46 @columns = @{ $self->{fields_column_names} }
2376 353 100       1107 if defined( $self->{fields_column_names} );
2377              
2378             # Avoid undef in column names... I prefer empty strings
2379 353 100       685 @columns = map { defined($_) ? $_ : '' } @columns;
  1389         3204  
2380              
2381 353 50       841 if ($_debug) {
2382 0         0 print( $_debugh "-- CSV headers management\n" );
2383 0 0       0 if (@columns) {
2384 0         0 printf( $_debugh " %-3s %-40s %-40s\n",
2385             'COL', 'CSV Header', 'Hash Key' );
2386 0         0 for my $i ( 0 .. $#columns ) {
2387 0         0 my $h = '';
2388 0 0       0 $h = $headers[$i] if defined( $headers[$i] );
2389 0         0 printf( $_debugh " %03d %-40s %-40s\n",
2390             $i, "'$h'", "'$columns[$i]'" );
2391             }
2392             }
2393             else {
2394 0         0 print( $_debugh " No headers\n" );
2395             }
2396             }
2397              
2398 353         511 my %regular_named_fields;
2399 353         1027 for my $i ( 0 .. $#columns ) {
2400 1389 100 66     5341 $regular_named_fields{ $columns[$i] } = $i
2401             if defined( $columns[$i] )
2402             and $columns[$i] ne '';
2403             }
2404 353         1773 $self->{_regular_named_fields} = {%regular_named_fields};
2405 353         1243 $self->{_S2_columns} = [@columns];
2406 353 100       1256 $self->{_S2_headers} = [@headers] if $has_headers;
2407              
2408 353         1363 return 1;
2409             }
2410              
2411             sub out_header {
2412 8     8 1 17 my $self = shift;
2413 8         108 validate_pos( @_, { type => SCALAR }, { type => SCALAR } );
2414              
2415 8         34 my ( $field, $header ) = @_;
2416 8 100       25 $self->{_out_headers} = {} unless exists $self->{_out_headers};
2417              
2418             $self->_print_warning("out_header: field $field already set")
2419 8 50       23 if exists $self->{_out_headers}->{$field};
2420              
2421 8         21 $self->{_out_headers}->{$field} = $header;
2422              
2423 8         35 return $self;
2424             }
2425              
2426             # Return 0 if error, 1 if all good
2427             sub _S3_init_fields_extra {
2428 355     355   591 my $self = shift;
2429              
2430 355         633 my $_debug = $self->{_debug};
2431 355         591 my $_debugh = $self->{_debugh};
2432              
2433 355         564 my $verbose = $self->{verbose};
2434              
2435 355         546 my $has_headers = $self->{has_headers};
2436              
2437 355         471 my %named_fields = %{ $self->{_regular_named_fields} };
  355         1619  
2438 355         730 my @columns = @{ $self->{_S2_columns} };
  355         917  
2439 355         554 my @headers;
2440 355 100       798 @headers = @{ $self->{_S2_headers} } if $has_headers;
  344         856  
2441              
2442 355         629 my @extra_fields_indexes;
2443             my @extra_fields_definitions_list;
2444 36         70 @extra_fields_definitions_list = @{ $self->{_extra_fields} }
2445 355 100       808 if exists $self->{_extra_fields};
2446 355         579 my %extra_fields_definitions;
2447              
2448             my @multiline;
2449 355 100       843 @multiline = @{ $self->{_multiline} } if defined( $self->{_multiline} );
  340         641  
2450              
2451 355         502 my @coldata;
2452 355         819 for my $i ( 0 .. $#columns ) {
2453 1395         46899 my $col = $columns[$i];
2454 1395         1771 my $h;
2455 1395 100       2637 $h = $headers[$i] if $has_headers;
2456 1395 100       23846 push @coldata,
2457             ColData->new(
2458             field_name => $col,
2459             header_text => $h,
2460             description => '',
2461             multiline => ( $multiline[$i] ? 'm' : '1' )
2462             );
2463             }
2464              
2465 355         15094 for my $edef (@extra_fields_definitions_list) {
2466 92         3788 my $c = $edef->check_field_existence;
2467 92 100       612 if ( defined($c) ) {
2468 80 100       178 unless ( exists $named_fields{$c} ) {
2469 9         112 $self->_print_error(
2470             "unknown field '" . $edef->check_field_existence . "'",
2471             0, ERR_UNKNOWN_FIELD, {%named_fields} );
2472 8         21 next;
2473             }
2474             }
2475              
2476 83         139 my @e_eclated = $edef;
2477              
2478 83 100 100     970 if ( $edef->ef_type == $EF_LINK and $edef->link_remote_read eq '*' ) {
2479 2         68 my @cols = $edef->link_remote_obj->get_fields_names();
2480              
2481 2         4 @e_eclated = ();
2482 2         8 my %nf = %named_fields;
2483              
2484 2         5 for my $c (@cols) {
2485              
2486 4         59 my $ex_base = $edef->self_name . $c;
2487 4         27 my $ex_target = $ex_base;
2488 4         7 my $i = 1;
2489 4         11 while ( exists $nf{$ex_target} ) {
2490 1         2 $i++;
2491 1         5 $ex_target = $ex_base . '_' . $i;
2492             }
2493              
2494 4         50 my $e = ExtraField->new(
2495             ef_type => $EF_LINK,
2496             self_name => $ex_target,
2497             description => $edef->description . " ($c)",
2498              
2499             link_self_search => $edef->link_self_search,
2500             link_remote_obj => $edef->link_remote_obj,
2501             link_remote_search => $edef->link_remote_search,
2502             link_remote_read => $c,
2503              
2504             link_vlookup_opts => $edef->link_vlookup_opts
2505             );
2506 4         354 push @e_eclated, $e;
2507 4         14 $nf{$ex_target} = undef;
2508             }
2509             }
2510              
2511 83         1166 for my $e1 (@e_eclated) {
2512 85 100       1126 if ( exists $named_fields{ $e1->self_name } ) {
2513 6         101 $self->_print_error( "extra field: duplicate field name: '"
2514             . $e1->self_name
2515             . "'" );
2516 6         17 next;
2517             }
2518              
2519 79         523 my $index_of_new_element = $#columns + 1;
2520 79         117 push @extra_fields_indexes, $index_of_new_element;
2521 79         925 $columns[$index_of_new_element] = $e1->self_name;
2522 79         1304 $named_fields{ $e1->self_name } = $index_of_new_element;
2523 79         1248 $extra_fields_definitions{ $e1->self_name } = $e1;
2524              
2525 79 50       1325 push @headers, $e1->self_name if $has_headers;
2526 79         1244 push @coldata,
2527             ColData->new(
2528             field_name => $e1->self_name,
2529             header_text => $e1->self_name,
2530             description => $e1->description,
2531             multiline => '?'
2532             );
2533             }
2534              
2535             }
2536 354 100       3314 $self->{_headers} = [@headers] if $has_headers;
2537 354         770 $self->{_extra_fields_indexes} = [@extra_fields_indexes];
2538 354         1075 $self->{_columns} = [@columns];
2539 354         858 $self->{_extra_fields_definitions} = {%extra_fields_definitions};
2540              
2541 354         1651 $self->{_named_fields} = {%named_fields};
2542              
2543 354         1269 $self->_detect_dates_formats();
2544              
2545 351         1478 $self->{_read_update_after_ar} = [];
2546 351         931 $self->{_write_update_before_ar} = [];
2547 351         508 my @dates_formats;
2548 60         231 @dates_formats = @{ $self->{_dates_formats} }
2549 351 100       843 if defined( $self->{_dates_formats} );
2550 351         958 for my $i ( 0 .. $#columns ) {
2551 1455         2097 my $dt_format;
2552             my $dt_locale;
2553 1455 100       2652 if ( defined( $dates_formats[$i] ) ) {
2554 330         598 $dt_format = $dates_formats[$i]->[0];
2555 330         506 $dt_locale = $dates_formats[$i]->[1];
2556             }
2557 1455         21525 $coldata[$i]->dt_format($dt_format);
2558 1455         25988 $coldata[$i]->dt_locale($dt_locale);
2559              
2560 1455 100       9777 next unless defined($dt_format);
2561              
2562 330         449 my %opts_in;
2563 330 100 66     1077 $opts_in{locale} = $dt_locale
2564             if defined($dt_locale)
2565             and $dt_locale ne '';
2566              
2567 330         4295 my $obj_strptime_in =
2568             _build_strptime_parser( pattern => $dt_format, %opts_in );
2569              
2570 330         74967 my %opts_out;
2571             my $loc_out = (
2572             exists $self->{out_dates_locale}
2573             ? $self->{out_dates_locale}
2574 330 50       786 : $dt_locale
2575             );
2576 330 100 66     1285 $opts_out{locale} = $loc_out if defined($loc_out) and $loc_out ne '';
2577             my $obj_strptime_out = _build_strptime_parser(
2578             pattern => (
2579             exists $self->{out_dates_format}
2580             ? $self->{out_dates_format}
2581 330 50       4947 : $dt_format
2582             ),
2583             %opts_out
2584             );
2585              
2586 330         6947 my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty};
2587 330         849 my $in_file_disp = $self->get_in_file_disp();
2588              
2589             $self->{_read_update_after_ar}->[$i] = sub {
2590             return
2591 352 100 66 352   1657 if ( !defined $_ )
      33        
      66        
2592             or $_ eq ''
2593             or ( defined($refsub_is_datetime_empty)
2594             and $refsub_is_datetime_empty->($_) );
2595              
2596 315         447 my $s = $_[0];
2597 315         603 my $field = _get_def( $_[1], '<?>' );
2598              
2599 315         905 my $dt = $obj_strptime_in->parse_datetime($_);
2600              
2601 315 0 33     167140 if ( $_debug
      33        
2602             and $DEBUG_DATETIME_FORMATS
2603             and $DEBUG_DATETIME_FORMATS_EVEN_MORE )
2604             {
2605 0 0       0 print( $_debugh "-- Record "
2606             . $s->get_recnum()
2607             . ", field '$field':\n String parsed: '$_'\n"
2608             . " Parse format: '$dt_format'\n"
2609             . " DateTime obj: <"
2610             . ( defined($dt) ? $dt . '' : 'undef' )
2611             . ">\n" );
2612             }
2613              
2614 315 100       643 if ( !defined($dt) ) {
2615 2         10 my $recnum = $s->get_recnum();
2616 2 50       6 if ($verbose) {
2617 0         0 $s->_print( "$PKG: "
2618             . "$in_file_disp: record $recnum: field $field: "
2619             . "unable to parse DateTime\n" );
2620 0         0 $s->_print("$PKG: field: '$_'\n");
2621 0         0 $s->_print("$PKG: format: '$dt_format'\n");
2622 0 0       0 $s->_print( "$PKG: "
2623             . "locale: '"
2624             . ( $dt_locale eq '' ? '<none>' : $dt_locale )
2625             . "'\n" );
2626 0         0 $s->_print( "$PKG: "
2627             . "Probable cause: when detecting DateTime format, "
2628             . "$PKG will stop reading\n" );
2629 0         0 $s->_print( "$PKG: "
2630             . "input as soon as the format is worked out. "
2631             . "If a value found later\n" );
2632 0         0 $s->_print( "$PKG: "
2633             . "turns out to use another DateTime format, "
2634             . "it'll generate a DateTime\n" );
2635 0         0 $s->_print("$PKG: parse error, as is the case now.\n");
2636 0         0 $s->_print_error("unable to parse DateTime");
2637             }
2638             else {
2639 2         18 $s->_print_error(
2640             "$in_file_disp: record $recnum: field $field: "
2641             . "unable to parse DateTime '$_'" );
2642             }
2643             }
2644              
2645 314         545 return $dt;
2646 330         2241 };
2647             $self->{_write_update_before_ar}->[$i] = sub {
2648 96 100   96   181 return '' unless defined($_);
2649 87 100       188 return $_ if !ref($_);
2650 81 50       231 return $_ unless $_->isa('DateTime');
2651              
2652 81         259 my $str = $obj_strptime_out->format_datetime($_);
2653              
2654 81 50       14831 if ( !defined($str) ) {
2655 0         0 my $s = $_[0];
2656 0         0 my $recnum = $s->get_recnum();
2657 0         0 my $field = _get_def( $_[1], '<?>' );
2658 0         0 $s->_print_error(
2659             "$in_file_disp: record $recnum: field $field: "
2660             . "unable to print DateTime '$_'" );
2661             }
2662              
2663 81         164 return $str;
2664 330         1479 };
2665             }
2666              
2667 351         1352 $self->{_coldata} = [@coldata];
2668              
2669 351         1321 my @loop = (
2670             [ '_read_update_after_hr', '_read_update_after_ar', 'read post' ],
2671             [ '_write_update_before_hr', '_write_update_before_ar', 'write pre' ]
2672             );
2673 351         955 for my $ii ( 0 .. $#loop ) {
2674 702         1074 my $l = $loop[$ii];
2675              
2676 702         1283 my $ht = $self->{ $l->[0] };
2677 702         907 my @subrefs = @{ $self->{ $l->[1] } };
  702         1421  
2678 702         953 for my $field ( keys %{$ht} ) {
  702         1503  
2679 70 50       136 unless ( exists $named_fields{$field} ) {
2680 0         0 $self->_print_error( $l->[2] . ": unknown field '$field'",
2681             0, ERR_UNKNOWN_FIELD, {%named_fields} );
2682 0         0 next;
2683             }
2684              
2685 70         93 my $i = $named_fields{$field};
2686              
2687 70         106 my @allsubs;
2688 70         86 push @allsubs, @{ $ht->{$field} };
  70         119  
2689 70 100       128 if ( defined( $subrefs[$i] ) ) {
2690 2 50       6 unshift @allsubs, $subrefs[$i] if $ii == 0;
2691 2 50       4 push @allsubs, $subrefs[$i] if $ii == 1;
2692             }
2693              
2694             my $finalsub = sub {
2695 196     196   253 for my $s (@allsubs) {
2696 262         802 $_ = $s->(@_);
2697             }
2698 189         3462 return $_;
2699 70         198 };
2700 70         138 $subrefs[$i] = $finalsub;
2701              
2702             }
2703 702         1859 $self->{ $l->[1] } = [@subrefs];
2704             }
2705              
2706 351         601 my $tmp;
2707 351         1337 $tmp = _get_def( $self->{out_fields}, $self->{write_fields} );
2708 351         826 my @wf;
2709 351 100       817 @wf = @$tmp if defined $tmp;
2710 351         571 my $count_field_not_found = 0;
2711 351         770 for (@wf) {
2712 16 100 66     102 next if ( !defined $_ ) or $_ eq '' or exists $named_fields{$_};
      100        
2713 3         6 $count_field_not_found++;
2714 3         25 $self->_print_error( "out_fields: unknown field '$_'",
2715             1, ERR_UNKNOWN_FIELD, {%named_fields} );
2716             }
2717 351 100       751 if ($count_field_not_found) {
2718 2         18 $self->_print_error("non existent field(s) encountered");
2719 1         10 delete $self->{out_fields};
2720 1         4 delete $self->{write_fields};
2721             }
2722              
2723 350         507 my %sh;
2724 350 100       819 %sh = %{ $self->{_out_headers} } if defined( $self->{_out_headers} );
  4         19  
2725 350         559 $count_field_not_found = 0;
2726 350         837 for ( keys %sh ) {
2727 8 100 33     47 next if ( !defined $_ ) or $_ eq '' or exists $named_fields{$_};
      66        
2728 2         3 $count_field_not_found++;
2729 2         13 $self->_print_error( "out_header: unknown field '$_'",
2730             1, ERR_UNKNOWN_FIELD, {%named_fields} );
2731             }
2732 350 100       668 $self->_print_error("non existent field(s) encountered")
2733             if $count_field_not_found;
2734              
2735 349         2311 return 1;
2736             }
2737              
2738             #
2739             # Return 0 if there's no more records (error or eof reached), 1 if a record got
2740             # read successfully.
2741             #
2742             # If return value is 1:
2743             # $$ref_ar and $$ref_hr are set to array ref and hash ref of the record,
2744             # respectively
2745             #
2746             # If return value is 0:
2747             # $$ref_ar and $$ref_hr are set to undef if an error occured
2748             # $$ref_ar and $$ref_hr are set to a scalar if eof reached
2749             #
2750             sub _read_one_record_from_input {
2751 2341     2341   4250 my ( $self, $ref_ar, $ref_row_hr ) = @_;
2752              
2753 2341         3549 my $_debug = $self->{_debug};
2754 2341         3369 my $_debug_extra_fields = $self->{_debug_extra_fields};
2755 2341         3050 my $_debugh = $self->{_debugh};
2756              
2757 2341         4328 my $in_file_disp = $self->get_in_file_disp();
2758              
2759 2341         3477 my $incsv = $self->{_in_csvobj};
2760 2341         2822 my $ar;
2761              
2762             print( $_debugh "$PKG: '$in_file_disp': will read line #"
2763             . ( $self->{_row_read} + 1 )
2764             . "\n" )
2765 2341 50       4048 if $self->{_debug_read};
2766              
2767 2341 100       4056 unless ( $ar = _mygetline( $incsv, $self->{_inh} ) ) {
2768 286 50       10007 if ( !$incsv->eof() ) {
2769 0         0 my ( $code, $str, $pos ) = $incsv->error_diag();
2770 0         0 $self->_print_error( "$code: $str, record "
2771             . $incsv->record_number
2772             . ", position $pos" );
2773 0         0 $$ref_ar = undef;
2774 0         0 $$ref_row_hr = undef;
2775             }
2776             else {
2777 286         1781 $$ref_ar = 1;
2778 286         683 $$ref_row_hr = 1;
2779             }
2780              
2781 286         861 $self->_close_inh();
2782              
2783 286         944 return 0;
2784             }
2785              
2786 2055         58344 $self->{_row_read}++;
2787              
2788 2055         2789 my %named_fields = %{ $self->{_named_fields} };
  2055         9074  
2789              
2790 2055 50       4801 if ( $self->{_debug_read} ) {
2791 0         0 print( $_debugh "Line " . $self->{_row_read} . ":\n--\n" );
2792 0         0 for ( sort keys %named_fields ) {
2793 0         0 my $c = _get_def( $ar->[ $named_fields{$_} ], '<undef>' );
2794 0         0 print( $_debugh " $_ => '" . $c . "'\n" );
2795             }
2796             }
2797              
2798 2055         2949 my $columns_ar = $self->{_columns};
2799              
2800 2055         2871 my $no_undef = $self->{no_undef};
2801 2055 100       3325 if ($no_undef) {
2802 30         37 for ( 0 .. $#{$columns_ar} ) {
  30         68  
2803 324 100       575 $ar->[$_] = '' unless defined( $ar->[$_] );
2804             }
2805             }
2806              
2807 2055         2988 my $row_hr = {};
2808             $row_hr->{$_} = $ar->[ $self->{_regular_named_fields}->{$_} ]
2809 2055         2751 foreach keys %{ $self->{_regular_named_fields} };
  2055         12825  
2810              
2811 2055         4016 my $rpost = $self->{_read_update_after_ar};
2812 2055         2729 for my $i ( 0 .. $#{$columns_ar} ) {
  2055         4431  
2813 10586         13242 my $subref = $rpost->[$i];
2814 10586 100       17051 next unless defined($subref);
2815              
2816 479         545 do {
2817 479         650 my $field = $columns_ar->[$i];
2818 479         746 local $_ = $ar->[$i];
2819 479         820 my $new_val = $subref->( $self, $field );
2820 474         820 $ar->[$i] = $new_val;
2821 474 50       1515 $row_hr->{$field} = $new_val if defined($field);
2822             }
2823              
2824             }
2825              
2826 2050         2815 for my $i ( @{ $self->{_extra_fields_indexes} } ) {
  2050         3742  
2827 442         599 my $name = $columns_ar->[$i];
2828 442         628 my $e = $self->{_extra_fields_definitions}->{$name};
2829              
2830 442 50       714 print( $_debugh "Extra field: #$i: $name\n" ) if $_debug_extra_fields;
2831              
2832 442         483 my $value;
2833 442 100       6481 if ( $e->ef_type == $EF_LINK ) {
    100          
    50          
2834              
2835 238 50       1592 print( $_debugh " linked field\n" ) if $_debug_extra_fields;
2836              
2837 238         2874 my $remobj = $e->link_remote_obj;
2838             $value =
2839             $remobj->vlookup( $e->link_remote_search,
2840 238         3799 $ar->[ $named_fields{ $e->link_self_search } ],
2841             $e->link_remote_read, $e->link_vlookup_opts );
2842              
2843             }
2844             elsif ( $e->ef_type == $EF_FUNC ) {
2845              
2846 51 50       1085 print( $_debugh " computed field\n" ) if $_debug_extra_fields;
2847              
2848 51         622 $value = $e->func_sub->( $name, $row_hr, $self->{_stats} );
2849              
2850             }
2851             elsif ( $e->ef_type == $EF_COPY ) {
2852              
2853 153 50       5323 print( $_debugh " copy field\n" ) if $_debug_extra_fields;
2854              
2855 153         1768 my $input = $row_hr->{ $e->copy_source };
2856 153 50 33     988 $input = '' if ( !defined $input ) and $no_undef;
2857 153 100       1709 if ( defined( $e->copy_sub ) ) {
2858 57         347 local $_ = $input;
2859 57         641 $value = $e->copy_sub->();
2860             }
2861             else {
2862 96         576 $value = $input;
2863             }
2864              
2865 153 50       868 print( $_debugh " in: '$input', out: '$value'\n" )
2866             if $_debug_extra_fields;
2867              
2868             }
2869             else {
2870 0         0 confess "Unknown ef_type '"
2871             . $e->ef_type
2872             . "', check this module' code urgently!";
2873             }
2874              
2875 440 100 100     1456 $value = '' if ( !defined $value ) and $no_undef;
2876 440         755 $ar->[$i] = $value;
2877 440         694 $row_hr->{$name} = $value;
2878              
2879 440 50       854 print( $_debugh " $name => '$value'\n" ) if $_debug_extra_fields;
2880              
2881             }
2882              
2883 2048 100       3762 if ( defined( $self->{read_post_update_hr} ) ) {
2884             $self->{read_post_update_hr}
2885 33         78 ->( $row_hr, $self->{_stats}, $self->get_recnum() );
2886 33         264 $ar->[ $named_fields{$_} ] = $row_hr->{$_} foreach keys %named_fields;
2887             }
2888              
2889 2048 100       5774 lock_keys(%$row_hr) if $self->{croak_if_error};
2890              
2891             $self->{walker_ar}->( $ar, $self->{_stats}, $self->get_recnum() )
2892 2048 100       13469 if defined( $self->{walker_ar} );
2893             $self->{walker_hr}->( $row_hr, $self->{_stats}, $self->get_recnum() )
2894 2048 100       4136 if defined( $self->{walker_hr} );
2895              
2896 2047         3249 $$ref_ar = $ar;
2897 2047         4093 $$ref_row_hr = $row_hr;
2898              
2899 2047         5928 return 1;
2900             }
2901              
2902             sub _open_read {
2903 297     297   476 my $self = shift;
2904              
2905 297         499 my $verbose = $self->{verbose};
2906 297         700 my $in_file_disp = $self->get_in_file_disp();
2907              
2908 297         687 $self->{_stats} = {};
2909 297         531 $self->{_read_in_progress} = 1;
2910              
2911 297 50       739 $self->_print("-- $in_file_disp reading start\n") if $verbose;
2912             }
2913              
2914             sub _close_read {
2915 316     316   490 my $self = shift;
2916 316         441 my $keep_quiet = shift;
2917              
2918 316         505 my $verbose = $self->{verbose};
2919 316         623 my $in_file_disp = $self->get_in_file_disp();
2920              
2921 316         568 $self->{_read_in_progress} = 0;
2922              
2923 316 50 33     721 if ( $verbose and ( !$keep_quiet ) ) {
2924             $self->_print( "-- $in_file_disp reading end: "
2925             . $self->{_row_read}
2926 0         0 . " row(s) read\n" );
2927 0         0 for my $k ( sort keys %{ $self->{_stats} } ) {
  0         0  
2928 0         0 $self->_printf( " %7d %s\n", $self->{_stats}->{$k}, $k );
2929             }
2930             }
2931              
2932 316         617 $self->{_nb_rows} = $self->{_row_read};
2933             }
2934              
2935             # Return 0 if error, 1 if all good
2936             sub _S4_read_all_in_mem {
2937 167     167   267 my $self = shift;
2938              
2939 167         469 $self->_register_pass("_S4_read_all_in_mem()");
2940              
2941 167         486 $self->_open_read();
2942              
2943 167         259 my $ar;
2944             my $row_hr;
2945 167         498 while ( $self->_read_one_record_from_input( \$ar, \$row_hr ) ) {
2946              
2947 1553         2124 push @{ $self->{_flat} }, $ar;
  1553         4335  
2948              
2949             }
2950              
2951 165 50       451 my $retcode = ( defined($ar) ? 1 : 0 );
2952 165         482 $self->_update_in_mem_record_count();
2953              
2954 165         456 $self->_close_read();
2955              
2956 165         409 return $retcode;
2957             }
2958              
2959             sub _chain_array {
2960 24     24   140 return split( /\s*->\s*/, $_[0] );
2961             }
2962              
2963             sub _chain_str {
2964 2     2   6 return join( '->', @_ );
2965             }
2966              
2967             sub field_add_link {
2968 25     25 1 4901 my $self = shift;
2969              
2970 25         756 validate_pos(
2971             @_,
2972             { type => UNDEF | SCALAR },
2973             { type => SCALAR },
2974             { type => SCALAR | OBJECT },
2975             { type => HASHREF, optional => 1 }
2976             );
2977              
2978 22         95 my ( $new_field, $chain, $obj, $param_opts ) = @_;
2979              
2980 22         39 my $croak_if_error = $self->{croak_if_error};
2981 22         36 my $_debug = $self->{_debug};
2982 22         34 my $_debugh = $self->{_debugh};
2983              
2984 22         51 my @c = _chain_array($chain);
2985 22 100       52 $new_field = $c[2] unless defined($new_field);
2986              
2987 22 50       47 print( $_debugh
2988             "Registering new linked field, new_field = '$new_field', chain = '$chain'\n"
2989             ) if $_debug;
2990              
2991 22 100 66     101 unless ( @c == 3 and $c[2] ne '' ) {
2992 1         7 $self->_print_error( "wrong links chain parameter: '$chain', "
2993             . "look for CHAIN in Text::AutoCSV manual for help" );
2994 1         9 return;
2995             }
2996              
2997 21 50       52 return unless $self->_status_forward('S2');
2998 21 50       52 return unless $self->_status_backward('S2');
2999              
3000 21         36 my @tmp;
3001 21 100       47 @tmp = %{$param_opts} if $param_opts;
  13         42  
3002 21         479 my %opts = validate( @tmp, $SEARCH_VALIDATE_OPTIONS );
3003              
3004 20         73 my $target_name = '';
3005 20 100       46 if ( ref $obj eq '' ) {
3006 19         27 my $in_file = $obj;
3007 19         27 $target_name = $in_file;
3008              
3009             #
3010             # TODO (?)
3011             #
3012             # Take into account the fact that the OS' file system is case insensitive. At
3013             # the moment, two different strings (even if identical in a case insensitive
3014             # comparison) will be managed as being distinct.
3015             # I put a question mark in this TO DO - after all, the user of this module had
3016             # better use same case when dealing with multiple links of the same file.
3017             #
3018             # Also, tuning this module' behavior depending on the OS' characteristics
3019             # would be not ideal, it'd add a level of complexity to understand how it
3020             # works and what to expect.
3021             #
3022 19 100 100     72 if ( exists $self->{_obj} and exists $self->{_obj}->{$in_file} ) {
3023              
3024 4 50       9 print( $_debugh
3025             "field_add_link: file '$in_file': re-using existing Text::AutoCSV object\n"
3026             ) if $_debug;
3027              
3028 4         7 $obj = $self->{_obj}->{$in_file};
3029             }
3030             else {
3031              
3032 15 50       31 print( $_debugh
3033             "field_add_link: file '$in_file': creating new Text::AutoCSV object\n"
3034             ) if $_debug;
3035              
3036 15 100       46 $self->{_obj} = {} unless exists $self->{_obj};
3037              
3038             #
3039             # The created Text::AutoCSV must be created with the same search options as what
3040             # is currently found in $self.
3041             #
3042             # Why?
3043             # Because the link is populated doing a vlookup on the remote object ($obj
3044             # below), not on $self. Therefore, if we don't "propagate" search options from
3045             # $self to $obj, search tunnings won't work as user would expect.
3046             #
3047 15         20 my %search_opts;
3048 15         30 for (
3049             qw(search_case search_trim search_ignore_empty
3050             search_ignore_accents search_value_if_not_found
3051             search_value_if_ambiguous search_ignore_ambiguous)
3052             )
3053             {
3054             # We assign depending on whether or not the attribute EXISTS - the definedness
3055             # is not appropriate, in case an attribute would have been assigned to undef.
3056 105 100       179 $search_opts{$_} = $self->{$_} if exists $self->{$_};
3057             }
3058              
3059             $obj = Text::AutoCSV->new(
3060             in_file => $in_file,
3061             verbose => $self->{verbose},
3062             infoh => $self->{infoh},
3063             _debug => $self->{debug},
3064             _debugh => $self->{debugh},
3065 15         95 %search_opts
3066             );
3067 15         55 $self->{_obj}->{$in_file} = $obj;
3068             }
3069             }
3070             else {
3071 1         57 $target_name = '(object)';
3072 1 50       5 print( $_debugh "field_add_link: Text::AutoCSV object provided\n" )
3073             if $_debug;
3074             }
3075              
3076 20 100       55 $self->{_extra_fields} = [] unless exists $self->{_extra_fields};
3077              
3078 20         30 push @{ $self->{_extra_fields} }, ExtraField->new(
  20         450  
3079             ef_type => $EF_LINK,
3080             self_name => $new_field,
3081             description => "link: $target_name, chain: $chain",
3082             check_field_existence => $c[0],
3083              
3084             link_self_search => $c[0],
3085             link_remote_obj => $obj,
3086             link_remote_search => $c[1],
3087             link_remote_read => $c[2],
3088              
3089             link_vlookup_opts => \%opts
3090             );
3091              
3092 20         2172 return $self;
3093             }
3094              
3095             sub links {
3096 2     2 1 5 my $self = shift;
3097              
3098 2         31 validate_pos(
3099             @_,
3100             { type => UNDEF | SCALAR },
3101             { type => SCALAR },
3102             { type => SCALAR | OBJECT },
3103             { type => HASHREF, optional => 1 }
3104             );
3105              
3106 2         7 my $prefix_field = shift;
3107 2         4 my $chain = shift;
3108 2         4 my ( $obj, $param_opts ) = @_;
3109              
3110 2         6 my @c = _chain_array($chain);
3111              
3112 2 50 33     57 if ( @c != 2 or $c[0] eq '' or $c[1] eq '' ) {
      33        
3113 0         0 $self->_print_error( "wrong links chain parameter: '$chain', "
3114             . "look for JOINCHAIN in Text::AutoCSV manual for help" );
3115 0         0 return;
3116             }
3117              
3118 2 100       8 $prefix_field = '' unless defined($prefix_field);
3119 2         6 my $chain2 = _chain_str( @c, '*' );
3120              
3121 2         8 return $self->field_add_link( $prefix_field, $chain2, @_ );
3122             }
3123              
3124             sub field_add_computed {
3125 7     7 1 2410 my $self = shift;
3126              
3127 7         202 validate_pos( @_, { type => SCALAR }, { type => CODEREF } );
3128 6         24 my ( $new_field, $func ) = @_;
3129              
3130 6         15 my $croak_if_error = $self->{croak_if_error};
3131              
3132 6         9 my $_debug = $self->{_debug};
3133 6         13 my $_debugh = $self->{_debugh};
3134              
3135 6 50       19 print( $_debugh
3136             "Registering new computed field, new_field = '$new_field'\n" )
3137             if $_debug;
3138              
3139 6 100       18 return unless $self->_status_forward('S2');
3140 5 50       14 return unless $self->_status_backward('S2');
3141              
3142 5         10 push @{ $self->{_extra_fields} }, ExtraField->new(
  5         108  
3143             ef_type => $EF_FUNC,
3144             self_name => $new_field,
3145             description => "computed",
3146              
3147             func_sub => $func
3148             );
3149              
3150 5         449 return $self;
3151             }
3152              
3153             sub field_add_copy {
3154 14     14 1 8161 my $self = shift;
3155              
3156 14         199 validate_pos(
3157             @_,
3158             { type => SCALAR },
3159             { type => SCALAR },
3160             { type => CODEREF, optional => 1 }
3161             );
3162 14         56 my ( $new_field, $copy_source, $func ) = @_;
3163              
3164 14         29 my $croak_if_error = $self->{croak_if_error};
3165              
3166 14         25 my $_debug = $self->{_debug};
3167 14         25 my $_debugh = $self->{_debugh};
3168              
3169 14 50       37 print( $_debugh
3170             "Registering field copy, new_field = '$new_field' copied from '$copy_source'\n"
3171             ) if $_debug;
3172              
3173 14 100       31 return unless $self->_status_forward('S2');
3174 12 50       45 return unless $self->_status_backward('S2');
3175              
3176 12 100       24 push @{ $self->{_extra_fields} }, ExtraField->new(
  12         284  
3177             ef_type => $EF_COPY,
3178             self_name => $new_field,
3179             description => "copy of $copy_source "
3180             . ( defined($func) ? '(with sub)' : '(no sub)' ),
3181             check_field_existence => $copy_source,
3182              
3183             copy_source => $copy_source,
3184             copy_sub => $func
3185             );
3186              
3187 12         1058 return $self;
3188             }
3189              
3190             sub in_map {
3191 15     15 1 1355 my $self = shift;
3192              
3193 15         36 return $self->read_update_after(@_);
3194             }
3195              
3196             sub read_update_after {
3197 16     16 1 25 my $self = shift;
3198 16         191 validate_pos( @_, { type => SCALAR }, { type => CODEREF } );
3199              
3200 16         52 my ( $field, $subref ) = @_;
3201              
3202 16         31 my $_debug = $self->{_debug};
3203 16         23 my $_debugh = $self->{_debugh};
3204              
3205 16 50       33 return unless $self->_status_forward('S2');
3206 16 50       36 return unless $self->_status_backward('S2');
3207              
3208 16 50       36 print( $_debugh "Registering read_post_update subref for field '$field'\n" )
3209             if $_debug;
3210              
3211             $self->{_read_update_after_hr}->{$field} = []
3212 16 100       50 unless defined( $self->{_read_update_after_hr}->{$field} );
3213              
3214 16         20 push @{ $self->{_read_update_after_hr}->{$field} }, $subref;
  16         42  
3215              
3216 16         169 return $self;
3217             }
3218              
3219             sub out_map {
3220 11     11 1 19 my $self = shift;
3221              
3222 11         27 return $self->write_update_before(@_);
3223             }
3224              
3225             sub write_update_before {
3226 12     12 1 17 my $self = shift;
3227 12         124 validate_pos( @_, { type => SCALAR }, { type => CODEREF } );
3228              
3229 12         42 my ( $field, $subref ) = @_;
3230              
3231 12         20 my $_debug = $self->{_debug};
3232 12         19 my $_debugh = $self->{_debugh};
3233              
3234 12 50       27 return unless $self->_status_forward('S2');
3235 12 50       23 return unless $self->_status_backward('S2');
3236              
3237 12 50       25 print( $_debugh "Registering write_pre_update subref for field '$field'\n" )
3238             if $_debug;
3239              
3240             $self->{_write_update_before_hr}->{$field} = []
3241 12 100       45 unless defined( $self->{_write_update_before_hr}->{$field} );
3242              
3243 12         19 push @{ $self->{_write_update_before_hr}->{$field} }, $subref;
  12         24  
3244              
3245 12         75 return $self;
3246             }
3247              
3248             sub reset_next_record_hr {
3249 189     189 1 310 my $self = shift;
3250              
3251 189         973 validate_pos(@_);
3252              
3253 189         535 $self->{_current_record} = undef;
3254              
3255 189         355 return $self;
3256             }
3257              
3258             sub _create_internal_column_name_from_its_number {
3259 1120     1120   2326 return sprintf( "__%04i__", $_[0] );
3260             }
3261              
3262             sub _ar_to_hr {
3263 2079     2079   2952 my $self = shift;
3264              
3265 2079         15865 validate_pos( @_, { type => ARRAYREF } );
3266              
3267 2079         5752 my ($ar) = @_;
3268 2079         2856 my $last_elem_index = scalar( @{$ar} ) - 1;
  2079         3828  
3269              
3270 2079         3679 my $nr = $self->{_named_fields};
3271 2079         3059 my %h;
3272             my %n_seen;
3273 2079         2700 for ( keys %{$nr} ) {
  2079         6083  
3274 9934         19949 $h{$_} = $ar->[ $nr->{$_} ];
3275 9934         18336 undef $n_seen{ $nr->{$_} };
3276             }
3277 2079         4919 for my $i ( 0 .. $last_elem_index ) {
3278 11001 100       19943 if ( !exists( $n_seen{$i} ) ) {
3279 1120         1575 my $k = _create_internal_column_name_from_its_number($i);
3280 1120 50       2895 $h{$k} = $ar->[$i] if !exists $h{$k};
3281             }
3282             }
3283              
3284 2079 100       6375 lock_keys(%h) if $self->{croak_if_error};
3285              
3286 2079         18557 return \%h;
3287             }
3288              
3289             sub get_next_record_hr {
3290 2001     2001 1 3288 my $self = shift;
3291              
3292 2001         14019 validate_pos( @_, { type => SCALARREF, optional => 1 } );
3293              
3294 2001         4712 my $refkey = $_[0];
3295              
3296 2001 50       3754 return unless $self->_status_forward('S4');
3297              
3298 1998 100       4304 if ( !defined( $self->{_current_record} ) ) {
3299 186         358 $self->{_current_record} = 0;
3300             }
3301             else {
3302 1812         2721 $self->{_current_record}++;
3303             }
3304              
3305 1998         3499 my $ar = $self->{_flat}->[ $self->{_current_record} ];
3306 1998 100       3623 if ( !defined($ar) ) {
3307 186         319 $self->{_current_record} = undef;
3308 186         342 $$refkey = undef;
3309 186         540 return;
3310             }
3311              
3312 1812         2768 $$refkey = $self->{_current_record};
3313              
3314 1812         3471 return $self->_ar_to_hr($ar);
3315             }
3316              
3317             sub read {
3318 60     60 1 20399 my $self = shift;
3319              
3320 60         399 validate_pos(@_);
3321              
3322 60 50       226 return unless $self->_status_backward('S3');
3323              
3324 60 50       151 if ( exists $self->{out_orderby} ) {
3325 0 0       0 return unless $self->_status_forward('S4');
3326 0         0 return $self;
3327             }
3328              
3329 60 100       129 return unless $self->_status_forward('S3');
3330              
3331 51         178 $self->_register_pass("read()");
3332              
3333 51         149 $self->_open_read();
3334              
3335 51         88 my $ar;
3336             my $row_hr;
3337 51         170 while ( $self->_read_one_record_from_input( \$ar, \$row_hr ) ) {
3338              
3339             # Ben oui quoi... qu'est-ce que l'on peut bien faire d'autre ?
3340             }
3341              
3342 49         166 $self->_close_read();
3343 49 50       126 return unless defined($ar);
3344              
3345 49 50       127 return unless $self->_status_reset();
3346              
3347 49         246 return $self;
3348             }
3349              
3350             #
3351             # Initially, _read_all_in_mem was intended for the test plan.
3352             #
3353             # Turned out to be sometimes useful for user, thus, is no longer private since
3354             # 1.1.5. Private version below is kept for compatibility.
3355             #
3356             sub read_all_in_mem {
3357 1     1 1 4 my $self = shift;
3358              
3359 1         6 return $self->_read_all_in_mem();
3360             }
3361              
3362             sub _read_all_in_mem {
3363 9     9   1173 my $self = shift;
3364              
3365 9 50       29 return 0 unless $self->_status_backward('S3');
3366 9 50       24 return 0 unless $self->_status_forward('S4');
3367              
3368 9         99 return $self;
3369             }
3370              
3371             sub _render {
3372 0     0   0 my $v = $_[0];
3373              
3374 0 0 0     0 if ( length($v) == 1 and ord($v) < 32 ) {
3375 0         0 my $n = ord($v);
3376 0 0       0 return '\n' if $n == 10;
3377 0 0       0 return '\r' if $n == 13;
3378 0 0       0 return '\t' if $n == 9;
3379 0 0       0 return '\f' if $n == 12;
3380 0 0       0 return '\b' if $n == 8;
3381 0 0       0 return '\a' if $n == 7;
3382 0 0       0 return '\e' if $n == 27;
3383 0         0 return '\0' . oct($n);
3384             }
3385 0         0 return $v;
3386             }
3387              
3388             sub print_id {
3389 0     0 1 0 my $self = shift;
3390              
3391 0         0 $self->_printf( "-- " . $self->get_in_file_disp() . ":\n" );
3392 0         0 $self->_printf(
3393             "sep_char: " . _render( $self->get_sep_char() ) . "\n" );
3394 0         0 $self->_printf(
3395             "escape_char: " . _render( $self->get_escape_char() ) . "\n" );
3396 0         0 $self->_printf(
3397             "in_encoding: " . _render( $self->get_in_encoding() ) . "\n" );
3398 0 0       0 $self->_printf( "is_always_quoted: "
3399             . ( $self->get_is_always_quoted() ? 'yes' : 'no' )
3400             . "\n" );
3401              
3402 0         0 my @coldata = $self->get_coldata();
3403 0         0 my @disp = [
3404             '#', 'FIELD',
3405             'HEADER', 'EXT DATA',
3406             'DATETIME FORMAT', 'DATETIME LOCALE',
3407             'ML'
3408             ];
3409 0         0 my $NCOLS = @{ $disp[0] };
  0         0  
3410 0         0 push @disp, [ map { my $s = $_; $s =~ s/./-/g; $s } @{ $disp[0] } ];
  0         0  
  0         0  
  0         0  
  0         0  
3411              
3412 0         0 for my $i ( 0 .. $#coldata ) {
3413 0         0 my $col = $coldata[$i];
3414              
3415 0         0 my @row = "$i";
3416             push @row, ( defined( $col->[$_] ) ? ( $col->[$_] . '' ) : '' )
3417 0 0       0 for 0 .. $NCOLS - 2;
3418 0         0 for (@row) {
3419 0         0 s/\n/\\n/g;
3420 0         0 s/\r/\\r/g;
3421 0         0 s/\t/\\t/g;
3422             }
3423 0         0 push @disp, [@row];
3424             }
3425 0         0 my $n = @{ $disp[-1] };
  0         0  
3426 0         0 my @max = (-1) x $n;
3427 0         0 for my $l (@disp) {
3428 0 0       0 do { $max[$_] = length( $l->[$_] ) if $max[$_] < length( $l->[$_] ) }
3429 0         0 for ( 0 .. $n - 1 );
3430             }
3431 0         0 my $s = join( ' ', map { "%-${_}s" } @max );
  0         0  
3432 0         0 $self->_print("\n");
3433 0         0 $self->_printf( "$s\n", @{$_} ) for (@disp);
  0         0  
3434             }
3435              
3436             sub set_out_file {
3437 2     2 1 3949 my $self = shift;
3438 2         23 validate_pos( @_, { type => SCALAR } );
3439              
3440 2         8 my ($out_file) = @_;
3441 2         4 $self->{out_file} = $out_file;
3442              
3443 2         9 return $self;
3444             }
3445              
3446             # Subrefs set with out_map
3447             sub _execute_write_update_before {
3448 367     367   654 my ( $self, $ar ) = @_;
3449              
3450 367         563 my $columns_ar = $self->{_columns};
3451              
3452 367         561 my $wpre = $self->{_write_update_before_ar};
3453 367         504 for my $i ( 0 .. $#{$columns_ar} ) {
  367         814  
3454 1177         1599 my $subref = $wpre->[$i];
3455 1177 100       2237 next unless defined($subref);
3456              
3457 159         174 do {
3458 159         230 local $_ = $ar->[$i];
3459 159         218 my $field = $columns_ar->[$i];
3460 159         280 my $new_val = $subref->( $self, $field );
3461 156         345 $ar->[$i] = $new_val;
3462             }
3463              
3464             }
3465             }
3466              
3467             # Take into account write_fields if it got set
3468             sub _apply_write_fields {
3469 466     466   808 my ( $self, $ar ) = @_;
3470              
3471 466         597 my @final;
3472              
3473 466         1461 my $tmp = _get_def( $self->{out_fields}, $self->{write_fields} );
3474 466         865 my @wf;
3475 466 100       823 @wf = @{$tmp} if defined($tmp);
  16         38  
3476              
3477 466 100       1047 return unless @wf;
3478              
3479 16         23 my %named_fields = %{ $self->{_named_fields} };
  16         67  
3480 16         103 for my $i ( 0 .. $#wf ) {
3481 40         75 my $field = $wf[$i];
3482 40         59 my $tmp;
3483 40 100 66     158 $tmp = $ar->[ $named_fields{$field} ]
3484             if defined($field)
3485             and $field ne '';
3486              
3487             # Put here any post-processing of value
3488             # WARNING
3489             # $tmp can be undef
3490             # ...
3491              
3492 40         101 $final[$i] = $tmp;
3493             }
3494 16         70 $_[1] = [@final];
3495             }
3496              
3497             sub write {
3498 109     109 1 24409 my $self = shift;
3499              
3500 109         737 validate_pos(@_);
3501              
3502 109 50       339 return unless $self->_status_forward('S3');
3503              
3504 104         223 my $verbose = $self->{verbose};
3505 104         199 my $_debug = $self->{_debug};
3506 104         198 my $_debugh = $self->{_debugh};
3507              
3508 104         191 my $out_file = $self->{out_file};
3509              
3510 104         151 my %stats;
3511              
3512 104 50       209 $self->_print("-- $out_file writing start\n") if $verbose;
3513 104         169 my $rows_written = 0;
3514              
3515 104         158 my $outh = $self->{outh};
3516              
3517 104         197 $self->{_close_outh_when_finished} = 0;
3518 104 50       236 unless ( defined($outh) ) {
3519 104 50       259 if ( $out_file eq '' ) {
3520 0         0 $outh = \*STDOUT;
3521             }
3522             else {
3523 104 50       163273 unless ( open( $outh, '>', $out_file ) ) { ## no critic (InputOutput::RequireBriefOpen)
3524 0         0 $self->_print_error("unable to open file '$out_file': $!");
3525 0         0 return;
3526             }
3527 104         435 $self->{_close_outh_when_finished} = 1;
3528             }
3529 104         260 $self->{outh} = $outh;
3530             }
3531              
3532 104 50       261 unless ( $self->{_leave_encoding_alone} ) {
3533             my $enc = (
3534             defined( $self->{_inh_encoding} )
3535             ? $self->{_inh_encoding}
3536 104 50       342 : $DEFAULT_OUT_ENCODING
3537             );
3538              
3539             # out_encoding option takes precedence
3540 104 100       295 $enc = $self->{out_encoding} if defined( $self->{out_encoding} );
3541 104         276 my $m = ":encoding($enc)";
3542 104 50 66     276 if ( _is_utf8($enc) and $self->{out_utf8_bom} ) {
3543 0         0 $m .= ':via(File::BOM)';
3544             }
3545              
3546 104 50 33     364 if ( $OS_IS_PLAIN_WINDOWS and $FIX_PERLMONKS_823214 ) {
3547              
3548             # Tested with UTF-16LE, NOT tested with UTF-16BE (it should be the same story)
3549 0 0       0 $m = ":raw:perlio:$m:crlf" if $enc =~ /^utf-?16/i;
3550             }
3551              
3552 104         970 binmode $outh, $m;
3553 104 50       9346 print( $_debugh "Encoding string used for output: $m\n" ) if $_debug;
3554             }
3555              
3556 104         214 my $escape_char = $self->{escape_char};
3557 104         205 my $quote_char = $self->{quote_char};
3558              
3559 104         164 my %opts;
3560 104         218 $opts{binary} = 1;
3561 104         198 $opts{eol} = "\n";
3562              
3563 104 50       346 $opts{sep_char} = $self->{sep_char} if defined( $self->{sep_char} );
3564 104 100       247 $opts{sep_char} = $self->{out_sep_char} if defined( $self->{out_sep_char} );
3565              
3566 104 50       298 $opts{quote_char} = $self->{quote_char} if defined( $self->{quote_char} );
3567             $opts{quote_char} = $self->{out_quote_char}
3568 104 50       237 if defined( $self->{out_quote_char} );
3569              
3570             $opts{escape_char} = $self->{escape_char}
3571 104 100       291 if defined( $self->{escape_char} );
3572             $opts{escape_char} = $self->{out_escape_char}
3573 104 100       237 if defined( $self->{out_escape_char} );
3574              
3575 104         205 $opts{always_quote} = $self->{_is_always_quoted};
3576             $opts{always_quote} = $self->{out_always_quote}
3577 104 100       249 if defined( $self->{out_always_quote} );
3578              
3579 104         744 my $csvout = Text::CSV->new( {%opts} );
3580 104 50       16140 if ( !defined($csvout) ) {
3581 0         0 $self->_print_error("error creating output Text::CSV object");
3582 0         0 return;
3583             }
3584              
3585             my $write_filter_hr =
3586 104         407 _get_def( $self->{out_filter}, $self->{write_filter_hr} );
3587              
3588 104 100 66     725 if (
      100        
      100        
3589             (
3590             $self->{has_headers}
3591             and (
3592             !(
3593             defined( $self->{out_has_headers} )
3594             and ( !$self->{out_has_headers} )
3595             )
3596             )
3597             )
3598             or $self->{out_has_headers}
3599             )
3600             {
3601 102         220 my $ar = [];
3602 102 100       255 if ( $self->{has_headers} ) {
3603 98         201 $ar = $self->{_headers};
3604             }
3605             else {
3606 4         4 my $nf = $self->{_named_fields};
3607 4         5 $ar->[ $nf->{$_} ] = $_ for ( keys %{$nf} );
  4         16  
3608             }
3609              
3610 102 100       255 if ( exists $self->{_out_headers} ) {
3611 3         8 my $h = $self->{_out_headers};
3612 3         5 for ( keys %{ $self->{_named_fields} } ) {
  3         16  
3613 12 100       31 if ( exists $h->{$_} ) {
3614 5         17 $ar->[ $self->{_named_fields}->{$_} ] = $h->{$_};
3615             }
3616             }
3617             }
3618              
3619 102         370 $self->_apply_write_fields($ar);
3620              
3621 102         1531 $csvout->print( $outh, $ar );
3622 102         1083 $rows_written++;
3623             }
3624              
3625 104         177 my $do_status_reset = 0;
3626              
3627             #
3628             # FIXME!!!
3629             #
3630             # Instead of this duplication of code, provide AutoCSV with a "create iterator
3631             # sub" feature to iterate over all records, whatever is going on behind the
3632             # scene (in-memory or read input).
3633             #
3634             # Such an iterator would also benefit to module users.
3635             #
3636              
3637 104 100       275 if ( exists $self->{out_orderby} ) {
3638 1 50       3 return unless $self->_status_forward('S4');
3639             }
3640              
3641 104 100       281 if ( $self->{_status} == 4 ) {
3642              
3643             #
3644             # The content is available in-memory: we write from what we have in-memory
3645             # then...
3646             #
3647              
3648 25         88 my @keys = $self->get_keys();
3649 25         64 my @ordered_keys = @keys;
3650 25 100       74 if ( exists $self->{'out_orderby'} ) {
3651 1         2 my @orderby = @{ $self->{'out_orderby'} };
  1         3  
3652             @ordered_keys = sort {
3653 1         5 for my $f (@orderby) {
  7         15  
3654 8         17 my $cmp =
3655             $self->get_cell( $a, $f ) cmp $self->get_cell( $b, $f );
3656 8 100       22 return $cmp if $cmp;
3657             }
3658 0         0 return 0;
3659             } @keys;
3660             }
3661              
3662             # for my $k ($self->get_keys()) {
3663 25         63 for my $k (@ordered_keys) {
3664 92         246 my $hr = $self->get_row_hr($k);
3665 92 50       274 if ( defined($write_filter_hr) ) {
3666 0 0       0 next unless $write_filter_hr->($hr);
3667             }
3668 92         151 my $ar = [ @{ $self->get_row_ar($k) } ];
  92         207  
3669              
3670 92         291 $self->_execute_write_update_before($ar);
3671 92         271 $self->_apply_write_fields($ar);
3672              
3673 92         662 $csvout->print( $outh, $ar );
3674 92         1032 $rows_written++;
3675             }
3676              
3677             }
3678             else {
3679              
3680             #
3681             # No in-memory content available: we read and write in parallel.
3682             #
3683              
3684 79         244 $self->_register_pass("write()");
3685              
3686 79         227 $self->_open_read();
3687 79         145 my $ar;
3688             my $row_hr;
3689 79         254 while ( $self->_read_one_record_from_input( \$ar, \$row_hr ) ) {
3690 301 100       543 if ( defined($write_filter_hr) ) {
3691             next
3692 46 100       91 unless $write_filter_hr->( $row_hr, \%stats,
3693             $self->get_recnum() );
3694             }
3695 275         470 $ar = [ @{$ar} ];
  275         642  
3696              
3697 275         798 $self->_execute_write_update_before($ar);
3698 272         580 $self->_apply_write_fields($ar);
3699              
3700 272         1441 $csvout->print( $outh, $ar );
3701 272         2199 $rows_written++;
3702             }
3703 72         223 $self->_close_read();
3704              
3705 72         124 $do_status_reset = 1;
3706             }
3707              
3708 97         281 $self->_close_outh();
3709              
3710 97 50       234 if ($verbose) {
3711 0         0 $self->_print(
3712             "-- $out_file writing end: $rows_written row(s) written\n");
3713 0         0 for my $k ( sort keys %stats ) {
3714 0         0 $self->_printf( " %7d %s\n", $stats{$k}, $k );
3715             }
3716             }
3717              
3718 97 100       216 if ($do_status_reset) {
3719 72 50       206 return unless $self->_status_reset();
3720             }
3721 97         1026 return $self;
3722             }
3723              
3724             #
3725             # * *** ************************************************************************
3726             # * *** ************************************************************************
3727             # * OBJ ************************************************************************
3728             # * *** ************************************************************************
3729             # * *** ************************************************************************
3730             #
3731              
3732             #
3733             # The subs below assume Text::AutoCSV can be in status S4 = all in memory.
3734             #
3735              
3736             sub get_keys {
3737 32     32 1 2171 my $self = shift;
3738 32         238 validate_pos(@_);
3739              
3740 32 50       122 return unless $self->_status_forward('S4');
3741              
3742 32         63 my $last_key = @{ $self->{_flat} } - 1;
  32         86  
3743 32         100 my @r = ( 0 .. $last_key );
3744              
3745 32         101 return @r;
3746             }
3747              
3748             sub get_row_ar {
3749 364     364 1 1100 my $self = shift;
3750 364         2759 validate_pos( @_, { type => SCALAR } );
3751 364         1032 my ($key) = @_;
3752              
3753 364 50       802 return unless $self->_status_forward('S4');
3754              
3755 364 50       806 unless ( defined($key) ) {
3756 0         0 $self->_print_error("get_row_ar(): \$key is not defined!");
3757 0         0 return;
3758             }
3759              
3760             $self->_print_error("unknown row '$key'")
3761 364 100       846 unless defined( $self->{_flat}->[$key] );
3762 364         885 return $self->{_flat}->[$key];
3763             }
3764              
3765             sub get_row_hr {
3766 269     269 1 461 my $self = shift;
3767 269         2443 validate_pos( @_, { type => SCALAR } );
3768 269         886 my ($key) = @_;
3769              
3770 269         696 my $ar = $self->get_row_ar($key);
3771 269 100       568 return unless defined($ar);
3772              
3773 267         606 return $self->_ar_to_hr($ar);
3774             }
3775              
3776             #
3777             # Could be made much more efficient (directly read $self->{_flat} instead of
3778             # calling get_row_hr that itself calls get_row_ar).
3779             # I leave it as is because get_hr_all is not good practice (it is not scalable),
3780             # it was primarily done to ease test plan.
3781             #
3782             # By the way I may make it one day not available by default, requesting caller
3783             # to tune some variable (like { $Text::AutoCSV::i_am_the_test_plan = 1 }) to
3784             # expose it.
3785             #
3786             sub get_hr_all {
3787 108     108 1 2130 my $self = shift;
3788 108         720 validate_pos(@_);
3789              
3790 108         226 my @resp;
3791 108         337 $self->reset_next_record_hr();
3792 108         272 while ( my $hr = $self->get_next_record_hr() ) {
3793 414         897 push @resp, $hr;
3794             }
3795 105         558 return @resp;
3796             }
3797              
3798             sub get_recnum {
3799 190     190 1 313 my $self = shift;
3800 190         923 validate_pos(@_);
3801              
3802 190 50       557 return -1 unless $self->{_read_in_progress};
3803 190         395 return _get_def( $self->{_row_read}, -1 );
3804             }
3805              
3806             sub _check_for_search {
3807 906     906   1978 my ( $self, $field ) = @_;
3808 906 50       1984 return unless $self->_status_forward('S4');
3809              
3810 905 100       3246 return 1 if exists $self->{_named_fields}->{$field};
3811             $self->_print_error( "search: unknown field '$field'",
3812 6         24 0, ERR_UNKNOWN_FIELD, $self->{_named_fields} );
3813             }
3814              
3815             sub get_cell {
3816 20     20 1 1353 my $self = shift;
3817 20         184 validate_pos( @_, { type => SCALAR }, { type => SCALAR } );
3818 20         60 my ( $key, $field ) = @_;
3819              
3820 20 50       39 return unless $self->_check_for_search($field);
3821 19         39 my $row = $self->get_row_hr($key);
3822 19 100       41 return $row unless defined($row);
3823 18         51 return $row->{$field};
3824             }
3825              
3826             sub get_values {
3827 9     9 1 2812 my $self = shift;
3828 9         102 validate_pos(
3829             @_,
3830             { type => SCALAR },
3831             { type => UNDEF | CODEREF, optional => 1 }
3832             );
3833 9         30 my ( $field, $filter_subref ) = @_;
3834              
3835 9 50       24 return unless $self->_check_for_search($field);
3836              
3837 9         15 my @values;
3838 9         25 $self->reset_next_record_hr();
3839 9         32 while ( my $hr = $self->get_next_record_hr() ) {
3840 53 100       91 if ( defined($filter_subref) ) {
3841 23         38 local $_ = $hr->{$field};
3842 23 100       41 next unless $filter_subref->();
3843             }
3844 42         150 push @values, $hr->{$field};
3845             }
3846 9         41 return @values;
3847             }
3848              
3849             sub _get_hash_and_projector {
3850 561     561   1243 my ( $self, $field, $arg_opts ) = @_;
3851              
3852 561         1015 my $_debug = $self->{_debug};
3853 561         933 my $_debugh = $self->{_debugh};
3854              
3855 561         813 my %opts;
3856 561 50       1192 %opts = %{$arg_opts} if defined($arg_opts);
  561         1576  
3857              
3858             my $opt_case =
3859 561         2285 _get_def( $opts{'case'}, $self->{search_case}, $DEF_SEARCH_CASE );
3860             my $opt_trim =
3861 561         2076 _get_def( $opts{'trim'}, $self->{search_trim}, $DEF_SEARCH_TRIM );
3862             my $opt_ignore_empty =
3863             _get_def( $opts{'ignore_empty'}, $self->{search_ignore_empty},
3864 561         1940 $DEF_SEARCH_IGNORE_EMPTY );
3865             my $opt_ignacc =
3866             _get_def( $opts{'ignore_accents'}, $self->{search_ignore_accents},
3867 561         2106 $DEF_SEARCH_IGNORE_ACCENTS );
3868              
3869 561         1819 my $opts_stringified =
3870             $opt_case . $opt_trim . $opt_ignore_empty . $opt_ignacc;
3871 561         1297 my $hash_name = "_h${field}_${opts_stringified}";
3872 561         1172 my $projector_name = "_p${field}_${opts_stringified}";
3873              
3874 561 100 66     3116 if ( exists $self->{$hash_name} and exists $self->{$projector_name} ) {
    50 33        
3875 489 50       9752 print( $_debugh
3876             "Search by key '$field': using existing hash and projector ("
3877             . "$hash_name, $projector_name)\n" )
3878             if $_debug;
3879 489         1971 return ( $hash_name, $projector_name );
3880             }
3881             elsif ( exists $self->{$hash_name} or exists $self->{$projector_name} ) {
3882 0         0 confess "Man, check your $PKG module code now!";
3883             }
3884              
3885 72 50       198 print( $_debugh "Search by key '$field': building hash\n" ) if $_debug;
3886              
3887             #
3888             # Projectors
3889             #
3890             # The projector contains subs to derivate the search key from the field value.
3891             # At the moment it is used to manage with case / without case searches and with
3892             # trim / without trim searches (meaning, ignoring spaces at beginning and end of
3893             # fields)
3894             #
3895             # Why naming it a projector?
3896             # Because if you run it twice on a value, the second run should produce the
3897             # same result, meaning: p(p(x)) = p(x) whatever x
3898             #
3899              
3900 72         134 my @projectors;
3901              
3902             # Add case removal in the projector function list
3903 1479     1479   4103 push @projectors, sub { return lc(shift); }
3904 72 100       401 unless $opt_case;
3905              
3906             # Add trim in the projector function list
3907 72 100       215 if ($opt_trim) {
3908             push @projectors, sub {
3909 1479     1479   7212 my $v = shift;
3910 1479         5181 $v =~ s/^\s+|\s+$//g;
3911 1479         3736 return $v;
3912 57         221 };
3913             }
3914              
3915             # Add remove_accents in the projector function list
3916 1886     1886   4272 push @projectors, sub { return remove_accents(shift); }
3917 72 100       373 if $opt_ignacc;
3918              
3919             my $projector = sub {
3920 1906     1906   3888 my $v = _get_def( $_[0], '' );
3921 1906         4499 $v = $_->($v) foreach (@projectors);
3922 1906         4218 return $v;
3923 72         295 };
3924              
3925             #
3926             # Filter
3927             #
3928             # As opposed to projectors above (where a search key is transformed), the idea
3929             # now is to ignore certain keys when doing a search.
3930             # At the moment, used to manage searches with / without empty values.
3931             #
3932             # That is to say: shall we use empty value as a regular value to search on, as
3933             # in
3934             # my @results = $self->search('FIELDNAME', '');
3935             # ?
3936             #
3937             # Right now we don't use an array-based construct, that'd allow to chain filters
3938             # with one another (as we now have only one filter to deal with), later, we may
3939             # use an array of filters, as done with projectors...
3940             #
3941              
3942 72         140 my $filter;
3943 72 100       241 if ($opt_ignore_empty) {
3944 1268     1268   2906 $filter = sub { return $_[0] ne ''; }
3945 61         195 }
3946             else {
3947 77     77   234 $filter = sub { return 1; }
3948 11         65 }
3949              
3950 72         162 my %h;
3951             my $k;
3952 72         408 $self->reset_next_record_hr();
3953 72         274 while ( my $hr = $self->get_next_record_hr( \$k ) ) {
3954 1345         2484 my $kv = $hr->{$field};
3955 1345         2412 my $p = $projector->($kv);
3956 1345 100       2403 unless ( $filter->($p) ) {
3957 76 50       195 print( $_debugh "Ignoring key value '$p' in hash build\n" )
3958             if $_debug;
3959 76         388 next;
3960             }
3961 1269         1869 push @{ $h{$p} }, $k;
  1269         6288  
3962             }
3963 72         507 for ( keys %h ) {
3964 1178         1582 @{ $h{$_} } = sort { $a <=> $b } @{ $h{$_} };
  1178         2478  
  103         315  
  1178         2405  
3965             }
3966              
3967 72         417 $self->{_hash_build_count}++;
3968 72         869 $self->{$hash_name} = {%h};
3969 72         310 $self->{$projector_name} = $projector;
3970 72         674 return ( $hash_name, $projector_name );
3971             }
3972              
3973             sub _get_hash_build_count {
3974 6     6   3379 my $self = shift;
3975              
3976 6         29 return _get_def( $self->{_hash_build_count}, 0 );
3977             }
3978              
3979             sub search {
3980 563     563 1 22942 my $self = shift;
3981 563         7217 validate_pos(
3982             @_,
3983             { type => SCALAR },
3984             { type => UNDEF | SCALAR },
3985             { type => UNDEF | HASHREF, optional => 1 }
3986             );
3987 563         2358 my ( $field, $value, $param_opts ) = @_;
3988              
3989 563         1354 my $croak_if_error = $self->{croak_if_error};
3990              
3991             #
3992             # FIXME?
3993             # A bit overkill to check options each time search is called...
3994             # To be thought about.
3995             #
3996              
3997 563         895 my @tmp;
3998 563 100       1455 @tmp = %{$param_opts} if $param_opts;
  421         1260  
3999 563         25596 my %opts = validate( @tmp, $SEARCH_VALIDATE_OPTIONS );
4000              
4001 562 50       2988 return unless $self->_check_for_search($field);
4002              
4003             # $self->_print_error("undef value in search call") if !defined($value);
4004 561 50       1289 $value = '' unless defined($value);
4005              
4006 561         1695 my ( $hash_name, $projector_name ) =
4007             $self->_get_hash_and_projector( $field, \%opts );
4008              
4009 561         1918 my $ret = $self->{$hash_name}->{ $self->{$projector_name}->($value) };
4010              
4011 561 100       2654 return $ret if defined($ret);
4012 185         621 return [];
4013             }
4014              
4015             sub search_1hr {
4016 22     22 1 15913 my $self = shift;
4017 22         534 validate_pos(
4018             @_,
4019             { type => SCALAR },
4020             { type => UNDEF | SCALAR },
4021             { type => UNDEF | HASHREF, optional => 1 }
4022             );
4023 22         148 my ( $field, $value, $arg_opts ) = @_;
4024              
4025 22         109 my $r = $self->search( $field, $value, $arg_opts );
4026              
4027 22 100       110 return unless defined( $r->[0] );
4028              
4029 20         87 my $opts = _get_def( $arg_opts, {} );
4030             my $opt_ignore_ambiguous = _get_def(
4031             $opts->{'ignore_ambiguous'},
4032 20         124 $self->{'search_ignore_ambiguous'},
4033             $DEF_SEARCH_IGNORE_AMBIGUOUS
4034             );
4035              
4036 20 100 100     65 return if @{$r} >= 2 and ( !$opt_ignore_ambiguous );
  20         155  
4037 14         86 return $self->get_row_hr( $r->[0] );
4038             }
4039              
4040             sub vlookup {
4041 317     317 1 35958 my $self = shift;
4042 317         4212 validate_pos(
4043             @_,
4044             { type => SCALAR },
4045             { type => UNDEF | SCALAR },
4046             { type => SCALAR },
4047             { type => UNDEF | HASHREF, optional => 1 }
4048             );
4049 317         1251 my ( $searched_field, $value, $target_field, $arg_opts ) = @_;
4050              
4051 317         739 my $r = $self->search( $searched_field, $value, $arg_opts );
4052 315 50       661 return unless $self->_check_for_search($target_field);
4053              
4054 314         652 my $opts = _get_def( $arg_opts, {} );
4055 314 100 66     720 unless ( defined( $r->[0] ) ) {
4056             return (
4057             exists $opts->{'value_if_not_found'}
4058             ? $opts->{'value_if_not_found'}
4059 143 100       505 : $self->{'search_value_if_not_found'}
4060             );
4061             }
4062             elsif ( @{$r} >= 2 ) {
4063             my $opt_ignore_ambiguous = _get_def(
4064             $opts->{'ignore_ambiguous'},
4065             $self->{'search_ignore_ambiguous'},
4066             $DEF_SEARCH_IGNORE_AMBIGUOUS
4067             );
4068             return (
4069             exists $opts->{'value_if_ambiguous'}
4070             ? $opts->{'value_if_ambiguous'}
4071             : $self->{'search_value_if_ambiguous'}
4072             ) if !$opt_ignore_ambiguous;
4073             }
4074              
4075 144 100       304 return $opts->{value_if_found} if exists $opts->{value_if_found};
4076             return $self->{search_value_if_found}
4077 140 50       255 if exists $opts->{search_value_if_found};
4078              
4079 140         313 my $hr = $self->get_row_hr( $r->[0] );
4080              
4081 140         574 return $hr->{$target_field};
4082             }
4083              
4084             1;
4085              
4086             __END__
4087              
4088             =pod
4089              
4090             =encoding UTF-8
4091              
4092             =head1 NAME
4093              
4094             Text::AutoCSV - helper module to automate the use of Text::CSV
4095              
4096             =head1 VERSION
4097              
4098             version 1.2.0
4099              
4100             =head1 SYNOPSIS
4101              
4102             By default, Text::AutoCSV will detect the following characteristics of the
4103             input:
4104              
4105             - The separator, among ",", ";" and "\t" (tab)
4106              
4107             - The escape character, among '"' (double-quote) and '\\' (backslash)
4108              
4109             - Try UTF-8 and if it fails, fall back on latin1
4110              
4111             - Read the header line and compute field names
4112              
4113             - If asked to (see L</fields_dates_auto>), detect any field that contains a
4114             DateTime value, trying 20 date formats, possibly followed by a time (6 time
4115             formats tested)
4116              
4117             - If asked to (see L</fields_dates>), detect DateTime format of certain fields,
4118             croak if no DateTime format can be worked out
4119              
4120             - Fields identified as containing a DateTime value (L</fields_dates_auto> or
4121             L</fields_dates>) are stored as DateTime objects by default
4122              
4123             Text::AutoCSV also provides methods to search on fields (using cached hash
4124             tables) and it can populate the value of "remote" fields, made from joining 2
4125             CSV files with a key-value search
4126              
4127             =head2 General
4128              
4129             use Text::AutoCSV;
4130              
4131             # Read CSV from std input, write to std output
4132             Text::AutoCSV->new()->write();
4133              
4134             # Read CSV data from f.csv, write to std output
4135             Text::AutoCSV->new(in_file => 'f.csv')->write();
4136              
4137             # Read CSV data from f.csv, write to g.csv
4138             Text::AutoCSV->new(in_file => 'f.csv', out_file => 'g.csv')->write();
4139              
4140             # "Rewrite" CSV file by printing out records as a list (separated by
4141             # line breaks) of field name followed by its value.
4142             my $csv = Text::AutoCSV->new(in_file => 'in.csv', walker_hr => \&walk);
4143             my @cols = $csv->get_fields_names();
4144             $csv->read();
4145             sub walk {
4146             my %rec = %{$_[0]};
4147             for (@cols) {
4148             next if $_ eq '';
4149             print("$_ => ", $rec{$_}, "\n");
4150             }
4151             print("\n");
4152             }
4153              
4154             =head2 OBJ-ish functions
4155              
4156             # Identify column internal names with more flexibility as the default
4157             # mechanism
4158             my $csv = Text::AutoCSV->new(in_file => 'zips.csv',
4159             fields_hr => {'CITY' => '^(city|town)', 'ZIPCODE' => '^zip(code)?$'});
4160             # Get zipcode of Claix
4161             my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE');
4162              
4163             my $csv = Text::AutoCSV->new(in_file => 'zips.csv');
4164             # Get zipcode of Claix
4165             my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE');
4166             # Same as above, but vlookup is strict for case and spaces around
4167             my $csv = Text::AutoCSV->new(in_file => 'zips.csv', search_case => 1,
4168             search_trim => 0);
4169             my $z = $csv->vlookup('CITY', 'Claix', 'ZIPCODE');
4170              
4171             # Create field 'MYCITY' made by taking pers.csv' ZIP column value, looking
4172             # it up in the ZIPCODE columns of zips.csv, taking CITY colmun value and
4173             # naming it 'MYCITY'. Output is written in std output. If a zipcode is
4174             # ambiguous, say it.
4175             Text::AutoCSV->new(in_file => 'pers.csv')
4176             ->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv',
4177             { ignore_ambiguous => 0,
4178             value_if_ambiguous => '<duplicate zipcode found!>' }
4179             )->write();
4180              
4181             # Note the above can also be written using Text::AutoCSV level attributes:
4182             Text::AutoCSV->new(in_file => 'pers.csv',
4183             search_ignore_ambiguous => 0,
4184             search_value_if_ambiguous => '<duplicate zipcode found!>')
4185             ->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->write();
4186              
4187             # Create 'MYCITY' field as above, then display some statistics
4188             my $nom_compose = 0;
4189             my $zip_not_found = 0;
4190             Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk)
4191             ->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read();
4192             sub walk {
4193             my $hr = shift;
4194             $nom_compose++ if $hr->{'NAME'} =~ m/[- ]/;
4195             $zip_not_found++ unless defined($hr->{'MYCITY'});
4196             }
4197             print("Number of persons with a multi-part name: $nom_compose\n");
4198             print("Number of persons with unknown zipcode: $zip_not_found\n");
4199              
4200             =head2 Updating
4201              
4202             Text::AutoCSV->new(in_file => 'names.csv', out_file => 'ucnames.csv',
4203             read_post_update_hr => \&updt)->write();
4204             sub updt { $_[0]->{'LASTNAME'} =~ s/^.*$/\U&/; }
4205              
4206             Text::AutoCSV->new(in_file => 'squares.csv',
4207             out_file => 'checkedsquares.csv',
4208             out_filter => \&wf)->write();
4209             sub wf { return ($_[0]->{'X'} ** 2 == $_[0]->{'SQUAREOFX'}); }
4210              
4211             # Add a field for the full name, made of the concatenation of the
4212             # first name and the last name.
4213             # Also display stats about empty full names.
4214             Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv',
4215             verbose => 1)
4216             ->field_add_computed('FULLNAME', \&calc_fn)->write();
4217             sub calc_fn {
4218             my ($field, $hr, $stats) = @_;
4219             my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'});
4220             $stats->{'empty full name'}++ if $fn eq ' ';
4221             return $fn;
4222             }
4223              
4224             # Read a file with a lot of columns and keep only 2 columns in output
4225             Text::AutoCSV->new(in_file => 'big.csv', out_file => 'addr.csv',
4226             out_fields => ['NAME', 'ADDRESS'])
4227             ->out_header('ADDRESS', 'Postal Address')
4228             ->write();
4229              
4230             =head2 Datetime management
4231              
4232             # Detect any field containing a DateTime value and convert it to yyyy-mm-dd
4233             # whatever the input format is.
4234             Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4235             fields_dates_auto => 1, out_dates_format => '%F')->write();
4236              
4237             # Detect any field containing a DateTime value and convert it to a US
4238             # DateTime whatever the input format is.
4239             Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4240             fields_dates_auto => 1,
4241             out_dates_format => '%b %d, %Y, %I:%M:%S %p',
4242             out_dates_locale => 'en')->write();
4243              
4244             # Find dates of specific formats and convert it into yyyy-mm-dd
4245             Text::AutoCSV->new(in_file => 'raw.csv', out_file => 'cooked.csv',
4246             dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d'],
4247             out_dates_format => '%F')->write();
4248              
4249             # Take the dates on columns 'LASTLOGIN' and 'CREATIONDATE' and convert it
4250             # into French dates (day/month/year).
4251             # Text::AutoCSV will croak if LASTLOGIN or CREATIONDATE do not contain a
4252             # DateTime format. By default, Text::AutoCSV will try 20 different formats.
4253             Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4254             fields_dates => ['LASTLOGIN', 'CREATIONDATE'],
4255             out_dates_format => '%d/%m/%Y')->write();
4256              
4257             # Convert 2 DateTime fields into unix standard epoch
4258             # Write -1 if DateTime is empty.
4259             sub toepoch { return $_->epoch() if $_; -1; }
4260             Text::AutoCSV->new(in_file => 'stats.csv', out_file => 'stats-epoch.csv',
4261             fields_dates => ['ATIME', 'MTIME'])
4262             ->in_map('ATIME', \&toepoch)
4263             ->in_map('MTIME', \&toepoch)
4264             ->write();
4265              
4266             # Do the other way round from above: convert 2 fields containing unix
4267             # standard epoch into a string displaying a human-readable DateTime.
4268             my $formatter = DateTime::Format::Strptime->new(
4269             pattern => 'DATE=%F, TIME=%T'
4270             );
4271             sub fromepoch {
4272             return $formatter->format_datetime(DateTime->from_epoch(epoch => $_))
4273             if $_ >= 0;
4274             '';
4275             }
4276             $csv = Text::AutoCSV->new(in_file => 'stats-epoch.csv',
4277             out_file => 'stats2.csv'
4278             )
4279             ->in_map('ATIME', \&fromepoch)
4280             ->in_map('MTIME', \&fromepoch)
4281             ->write();
4282              
4283             =head2 Miscellaneous
4284              
4285             use Text::AutoCSV 'remove_accents';
4286             # Output 'Francais: etre elementaire, Tcheque: sluzba dum' followed by a new
4287             # line.
4288             print remove_accents("Français: être élémentaire, Tchèque: služba dům"),
4289             "\n";
4290              
4291             =head2 csvcopy.pl
4292              
4293             As of version 1.2.0, csvcopy.pl is installed along with Text::AutoCSV.
4294             It is a command-line wrapper to Text::AutoCSV, run
4295              
4296             csvcopy.pl --help
4297              
4298             to get help.
4299              
4300             =for Pod::Coverage ERR_UNKNOWN_FIELD
4301              
4302             =head1 NAME
4303              
4304             Text::AutoCSV - helper module to automate the use of Text::CSV
4305              
4306             =head1 METHODS
4307              
4308             =head2 new
4309              
4310             my $csv = Text::AutoCSV->new(%attr);
4311              
4312             (Class method) Returns a new instance of Text::AutoCSV. The object attributes
4313             are described by the hash C<%attr> (can be empty).
4314              
4315             Currently the following attributes are available:
4316              
4317             =over 4
4318              
4319             =item Preliminary note about L</fields_hr>, L</fields_ar> and L</fields_column_names> attributes
4320              
4321             By default, Text::AutoCSV assumes the input has a header and will use the field
4322             values of this first line (the header) to work out the column internal names.
4323             These internal names are used everywhere in Text::AutoCSV to designate columns.
4324              
4325             The values are transformed as follows:
4326              
4327             - All accents are removed using the exportable function L</remove_accents>.
4328              
4329             - Any non-alphanumeric character is removed (except underscore) and all letters
4330             are switched to upper case. The regex to do this is
4331              
4332             s/[^[:alnum:]_]//gi; s/^.*$/\U$&/;
4333              
4334             Thus a header line of
4335              
4336             'Office Number 1,Office_2,Personal Number'
4337              
4338             will produce the internal column names
4339              
4340             'OFFICENUMBER1' (first column)
4341              
4342             'OFFICE_2' (second column)
4343              
4344             'PERSONALNUMBER' (third column).
4345              
4346             The attribute L</fields_hr>, L</fields_ar> or L</fields_column_names> (only one
4347             of the three is useful at a time) allows to change this behavior.
4348              
4349             B<NOTE>
4350              
4351             The removal of accents is *not* a conversion to us-ascii, see L</remove_accents>
4352             for details.
4353              
4354             =item Preliminary note about fields reading
4355              
4356             Functions that are given a field name (L</get_cell>, L</vlookup>,
4357             L</field_add_copy>, ...) raise an error if the field requested does not exist.
4358              
4359             B<SO WILL THE HASHREFS GIVEN BY Text::AutoCSV:> when a function returns a
4360             hashref (L</search_1hr>, L</get_row_hr>, ...), the hash is locked with the
4361             C<lock_keys> function of C<Hash::Util>. Any attempt to read a non-existing key
4362             from the hash causes a croak. This feature is de-activated if you specified
4363             C<croak_if_error =E<gt> 0> when creating Text::AutoCSV object.
4364              
4365             =item in_file
4366              
4367             The name of the file to read CSV data from.
4368              
4369             If not specified or empty, read standard input.
4370              
4371             Example:
4372              
4373             my $csv = Text::AutoCSV->new(in_file => 'in.csv');
4374              
4375             =item inh
4376              
4377             File handle to read CSV data from.
4378             Normally you don't want to specify this attribute.
4379              
4380             C<inh> is useful if you don't like the way Text::AutoCSV opens the input file
4381             for you.
4382              
4383             Example:
4384              
4385             open my $inh, "producecsv.sh|";
4386             my $csv = Text::AutoCSV->new(inh => $inh);
4387              
4388             =item encoding
4389              
4390             Comma-separated list of encodings to try to read input.
4391              
4392             Note that finding the correct encoding of any given input is overkill. This
4393             script just tries encodings one after the other, and selects the first one that
4394             does not trigger a warning during reading of input. If all produce warnings,
4395             select the first one.
4396              
4397             The encoding chosen is used in output, unless attribute L</out_encoding> is
4398             specified.
4399              
4400             Value by default: 'UTF-8,latin1'
4401              
4402             B<IMPORTANT>
4403              
4404             If one tries something like C<encoding =E<gt> 'latin1,UTF-8'>, it'll almost
4405             never detect UTF-8 because latin1 rarely triggers warnings during reading. It
4406             tends to be also true with encodings like UTF-16 that can remain happy with
4407             various inputs (sometimes resulting in Western languages turned into Chinese
4408             text).
4409              
4410             Ultimately this attribute should be used with a unique value. The result when
4411             using more than one value can produce weird results and should be considered
4412             B<experimental>.
4413              
4414             Example:
4415              
4416             my $csv = Text::AutoCSV->new(in_file => 'w.csv', encoding => 'UTF-16');
4417              
4418             =item via
4419              
4420             Adds a C<via> to the file opening instruction performed by Text::AutoCSV. You
4421             don't want to use it under normal circumstances.
4422              
4423             The value should start with a ':' character (Text::AutoCSV won't add one for
4424             you).
4425              
4426             Value by default: none
4427              
4428             Example:
4429              
4430             my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4431             via => ':raw:perlio:UTF-32:crlf');
4432              
4433             =item dont_mess_with_encoding
4434              
4435             If true, just ignore completely encoding and don't try to alter I/O operations
4436             with encoding considerations (using C<binmode> instruction). Note that if inh
4437             attribute is specified, then Text::AutoCSV will consider the caller manages
4438             encoding for himself and dont_mess_with_encoding will be automatically set, too.
4439              
4440             B<IMPORTANT>
4441              
4442             This attribute does not mean perl will totally ignore encoding and would
4443             consider character strings as bytes for example. The meaning of
4444             L</dont_mess_with_encoding> is that Text::AutoCSV itself will totally ignore
4445             encoding matters, and leave it entirely to Perl' default.
4446              
4447             Value by default:
4448              
4449             0 if inh attribute is not set
4450             1 if inh attribute is set
4451              
4452             Example:
4453              
4454             my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4455             dont_mess_with_encoding => 1);
4456              
4457             =item sep_char
4458              
4459             Specify the CSV separator character. Turns off separator auto-detection. This
4460             attribute is passed as is to C<Text::CSV-E<gt>new()>.
4461              
4462             Example:
4463              
4464             my $csv = Text::AutoCSV->new(in_file => 'in.csv', sep_char => ';');
4465              
4466             =item quote_char
4467              
4468             Specify the field quote character. This attribute is passed as is to
4469             C<Text::CSV-E<gt>new()>.
4470              
4471             Value by default: double quote ('"')
4472              
4473             Example:
4474              
4475             my $csv = Text::AutoCSV->new(in_file => 'in.csv', quote_char => '\'');
4476              
4477             =item escape_char
4478              
4479             Specify the escape character. Turns off escape character auto-detection. This
4480             attribute is passed as is to C<Text::CSV-E<gt>new()>.
4481              
4482             Value by default: backslash ('\\')
4483              
4484             Example:
4485              
4486             my $csv = Text::AutoCSV->new(in_file => 'in.csv', escape_char => '"');
4487              
4488             =item in_csvobj
4489              
4490             Text::CSV object to use.
4491             Normally you don't want to specify this attribute.
4492              
4493             By default, Text::AutoCSV will manage creating such an object and will work hard
4494             to detect the parameters it requires.
4495              
4496             Defining C<in_csvobj> attribute turns off separator character and escape
4497             character auto-detection.
4498              
4499             Using this attribute workarounds Text::AutoCSV philosophy a bit, but you may
4500             need it in case Text::AutoCSV behavior is not suitable for Text::CSV creation.
4501              
4502             Example:
4503              
4504             my $tcsv = Text::CSV->new();
4505             my $acsv = Text::AutoCSV->new(in_file => 'in.csv', in_csvobj => $tcsv);
4506              
4507             =item has_headers
4508              
4509             If true, Text::AutoCSV assumes the input has a header line.
4510              
4511             Value by default: 1
4512              
4513             Example:
4514              
4515             my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0);
4516              
4517             =item fields_hr
4518              
4519             (Only if input has a header line) Hash ref that contains column internal names
4520             along with a regular expression to find it in the header line.
4521             For example if you have:
4522              
4523             my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4524             fields_hr => {'PHONE OFFICE' => '^office phone nu',
4525             'PHONE PERSONAL' => '^personal phone nu'});
4526              
4527             And the header line is
4528              
4529             'Personal Phone Number,Office Phone Number'
4530              
4531             the column name 'PHONE OFFICE' will designate the second column and the column
4532             name 'PHONE PERSONAL' will designate the first column.
4533              
4534             You can choose column names like 'Phone Office' and 'Phone Personal' as well.
4535              
4536             The regex search is case insensitive.
4537              
4538             =item fields_ar
4539              
4540             (Only if input has a header line) Array ref that contains column internal names.
4541             The array is used to create a hash ref of the same kind as L</fields_hr>, by
4542             wrapping the column name in a regex. The names are surrounded by a leading '^'
4543             and a trailing '$', meaning, the name must match the entire field name.
4544              
4545             For example
4546              
4547             fields_ar => ['OFFICENUMBER', 'PERSONALNUMBER']
4548              
4549             is strictly equivalent to
4550              
4551             fields_hr => {'OFFICENUMBER' => '^officenumber$',
4552             'PERSONALNUMBER' = '^personalnumber$'}
4553              
4554             The regex search is case insensitive.
4555              
4556             C<fields_ar> is useful if the internal names are identical to the file column
4557             names. It avoids repeating the names over and over as would happen if using
4558             L</fields_hr> attribute.
4559              
4560             I<NOTE>
4561              
4562             You might wonder why using fields_ar as opposed to Text::AutoCSV default'
4563             mechanism. There are two reasons for that:
4564              
4565             1- Text::AutoCSV removes spaces from column names, and some people may want
4566             another behavior. A header name of 'Phone Number' will get an internal column
4567             name of 'PHONENUMBER' (default behavior, if none of fields_hr, fields_ar and
4568             fields_column_names attributes is specified), and one may prefer 'PHONE NUMBER'
4569             or 'phone number' or whatsoever.
4570              
4571             2- By specifying a list of columns using either of fields_hr or fields_ar, you
4572             not only map column names as found in the header line to internal column names:
4573             you also I<request> these columns to be available. If one of the requested
4574             columns cannot be found, Text::AutoCSV will croak (default) or print an error
4575             and return an undef object (if created with C<croak_if_error =E<gt> 0>).
4576              
4577             =item fields_column_names
4578              
4579             Array ref of column internal names, in the order of columns in file. This
4580             attribute works like the C<column_names> attribute of Text::CSV. It'll just
4581             assign names to columns one by one, regardless of what the header line contains.
4582             It'll work also if the file has no header line.
4583              
4584             Example:
4585              
4586             my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4587             fields_column_names => ['My COL1', '', 'My COL3']);
4588              
4589             =item out_file
4590              
4591             Output file when executing the L</write> method.
4592              
4593             If not specified or empty, write to standard output.
4594              
4595             Example:
4596              
4597             my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv');
4598              
4599             =item outh
4600              
4601             File handle to write CSV data to when executing the L</write> method.
4602             Normally you don't want to specify this attribute.
4603              
4604             C<outh> is useful if you don't like the way Text::AutoCSV opens the output file
4605             for you.
4606              
4607             Example:
4608              
4609             my $outh = open "myin.csv', ">>";
4610             my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0,
4611             outh => $outh);
4612              
4613             =item out_encoding
4614              
4615             Enforce the encoding of output.
4616              
4617             Value by default: input encoding
4618              
4619             Example:
4620              
4621             my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4622             out_encoding => 'UTF-16');
4623              
4624             =item out_utf8_bom
4625              
4626             Enforce BOM (Byte-Order-Mark) on output, when it is UTF8. If output encoding is
4627             not UTF-8, this attribute is ignored.
4628              
4629             B<NOTE>
4630              
4631             UTF-8 needs no BOM (there is no Byte-Order in UTF-8), and in practice,
4632             UTF8-encoded files rarely have a BOM.
4633              
4634             Using this attribute is not recommended. It is provided for the sake of
4635             completeness, and also to produce Unicode files Microsoft EXCEL will be happy to
4636             read.
4637              
4638             At first sight it would seem more logical to make EXCEL happy with something
4639             like this:
4640              
4641             out_encoding => 'UTF-16'
4642              
4643             But... While EXCEL will identify UTF-16 and read it as such, it will not take
4644             into account the BOM found at the beginning. In the end the first cell will have
4645             2 useless characters prepended. The only solution the author knows to workaround
4646             this issue if to use UTF-8 as output encoding, and enforce a BOM. That is, use:
4647              
4648             ..., out_encoding => 'UTF-8', out_utf8_bom => 1, ...
4649              
4650             =item out_sep_char
4651              
4652             Enforce the output CSV separator character.
4653              
4654             Value by default: input separator
4655              
4656             Example:
4657              
4658             my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4659             out_sep_char => ',');
4660              
4661             =item out_quote_char
4662              
4663             Enforce the output CSV quote character.
4664              
4665             Value by default: input quote character
4666              
4667             Example:
4668              
4669             my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4670             out_quote_char => '"');
4671              
4672             =item out_escape_char
4673              
4674             Enforce the output CSV escape character.
4675              
4676             Value by default: input escape character
4677              
4678             Example:
4679              
4680             my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4681             out_escape_char_char => '\\');
4682              
4683             =item out_always_quote
4684              
4685             If true, quote all fields of output (set always_quote of Text::CSV).
4686              
4687             If false, don't quote all fields of output (don't set C<always_quote> of
4688             Text::CSV).
4689              
4690             Value by default: same as what is found in input
4691              
4692             While reading input, Text::AutoCSV works out whether or not all fields were
4693             quoted. If yes, then the output Text::CSV object has the always_quote attribute
4694             set, if no, then the output Text::CSV object does not have this attribute set.
4695              
4696             Example:
4697              
4698             my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4699             out_always_quote => 1);
4700              
4701             =item out_has_headers
4702              
4703             If true, when writing output, write a header line on first line.
4704              
4705             If false, when writing output, don't write a header line on first line.
4706              
4707             Value by default: same as has_headers attribute
4708              
4709             Example 1
4710              
4711             Read standard input and write to standard output, removing the header line.
4712              
4713             Text::AutoCSV->new(out_has_headers => 0)->write();
4714              
4715             Example 2
4716              
4717             Read standard input and write to standard output, adding a header line.
4718              
4719             Text::AutoCSV->new(fields_column_names => ['MYCOL1', 'MYCOL2'],
4720             out_has_headers => 1)->write();
4721              
4722             =item no_undef
4723              
4724             If true, non-existent column values are set to an empty string instead of undef.
4725             It is also done on extra fields that happen to have an undef value (for example
4726             when the target of a linked field is not found).
4727              
4728             Note this attribute does not work on callback functions output set with
4729             L</in_map>: for example empty DateTime values (on fields identified as
4730             containing a date/time, see C<dates_*> attributes below) are set to C<undef>,
4731             even while C<no_undef> is set. Indeed setting it to an empty string while
4732             non-empty values would contain a Datetime object would not be clean. An empty
4733             value in a placeholder containing an object must be undef.
4734              
4735             Since version 1.1.5 of Text::AutoCSV, C<no_undef> is examined when sending
4736             parameter ($_) to L</in_map> callback: an undef value is now passed as is (as
4737             undef), unless C<no_undef> is set. If C<no_undef> is set, and field value is
4738             undef, then $_ is set to the empty string ('') when calling callback defined by
4739             L</in_map>. This new behavior was put in place to be consistent with what is
4740             being done with DateTime values.
4741              
4742             Value by default: 0
4743              
4744             Example:
4745              
4746             my $csv = Text::AutoCSV->new(in_file => 'in.csv', no_undef => 1);
4747              
4748             =item read_post_update_hr
4749              
4750             To be set to a ref sub. Each time a record is read from input, call
4751             C<read_post_update_hr> to update the hash ref of the record. The sub is called
4752             with 2 arguments: the hash ref to the record value and the hash ref to stats.
4753              
4754             The stats allow to count events and are printed in the end of reading in case
4755             Text::AutoCSV is called in verbose mode (C<verbose =E<gt> 1>).
4756              
4757             For example, the C<read_post_update_hr> below will turn column 'CITY' values in
4758             upper case and count occurences of empty cities in stat display:
4759              
4760             Text::AutoCSV->new(in_file => 'addresses.csv',
4761             read_post_update_hr => \&updt, verbose => 1)
4762             ->write();
4763             sub updt {
4764             my ($hr, $stats) = @_;
4765             $hr->{'CITY'} =~ s/^.*$/\U$&/;
4766             $stats->{'empty city encountered'}++ if $hr->{'CITY'} eq '';
4767             }
4768              
4769             B<IMPORTANT>
4770              
4771             You cannot create a field this way. To create a field, you have to use the
4772             member functions L</field_add_link>, L</field_add_copy> or
4773             L</field_add_computed>.
4774              
4775             B<NOTE>
4776              
4777             If you wish to manage some updates at field level, consider registering update
4778             functions with L</in_map> and L</out_map> member functions. These functions
4779             register callbacks that work at field level and with $_ variable (thus the
4780             callback function invoked is AutoCSV-agnostic).
4781              
4782             L</in_map> updates a field after read, L</out_map> updates the field content
4783             before writing it.
4784              
4785             =item walker_hr
4786              
4787             To set to a sub ref that'll be executed each time a record is read from input.
4788             It is executed after L</read_post_update_hr>. The sub is called with 2
4789             arguments: the hash ref to the record value and the hash ref to stats.
4790              
4791             Note L</read_post_update_hr> is meant for updating record fields just after
4792             reading, whereas L</walker_hr> is read-only.
4793              
4794             The stats allow to count events and are printed in the end of reading in case
4795             Text::AutoCSV is called in verbose mode (C<verbose =E<gt> 1>). If the
4796             L</verbose> attribute is not set, the stats are not displayed, however you can
4797             get stats by calling the get_stats function.
4798              
4799             The example below will count in the stats the number of records where the 'CITY'
4800             field is empty. Thanks to C<verbose =E<gt> 1> attribute, at the end of reading
4801             the stats are displayed.
4802              
4803             my $csv = Text::AutoCSV->new(in_file => 'addresses.csv',
4804             walker_hr => \&walk1, verbose => 1)->read();
4805             sub walk1 {
4806             my ($hr, $stats) = @_;
4807             $stats->{'empty city'}++ if $hr->{'CITY'} eq '';
4808             }
4809              
4810             =item walker_ar
4811              
4812             To set to a sub ref that'll be executed each time a record is read from input.
4813             It is executed after L</read_post_update_hr>. The sub is called with 2
4814             arguments: the array ref to the record value and the hash ref to stats.
4815              
4816             Note L</read_post_update_hr> is meant for updating record fields just after
4817             reading, whereas C<walker_hr> is read-only.
4818              
4819             The stats allow to count events and are printed in the end of reading in case
4820             Text::AutoCSV is called in verbose mode (C<verbose =E<gt> 1>). If the
4821             L</verbose> attribute is not set, the stats are lost.
4822              
4823             The array ref contains values in their natural order in the CSV. To be used with
4824             the column names, you have to use L</get_fields_names> member function.
4825              
4826             The example below will count in the stats the number of records where the 'CITY'
4827             field is empty. Thanks to C<verbose =E<gt> 1> attribute, at the end of reading
4828             the stats are displayed. It produces the exact same result as the example in
4829             walker_hr attribute, but it uses walker_ar.
4830              
4831             use List::MoreUtils qw(first_index);
4832             my $csv = Text::AutoCSV->new(in_file => 'addresses.csv',
4833             walker_ar => \&walk2, verbose => 1);
4834             my @cols = $csv->get_fields_names();
4835             my $idxCITY = first_index { /^city$/i } @cols;
4836             die "No city field!??" if $idxCITY < 0;
4837             $csv->read();
4838             sub walk2 {
4839             my ($ar, $stats) = @_;
4840             $stats->{'empty city'}++ if $ar->[$idxCITY] eq '';
4841             }
4842              
4843             =item write_filter_hr
4844              
4845             Alias of L</out_filter>.
4846              
4847             =item out_filter
4848              
4849             To be set to a ref sub. Before writing a record to output, C<out_filter> is
4850             called and the record gets writen only if C<out_filter> return value is true.
4851             The sub is called with 1 argument: the hash ref to the record value.
4852              
4853             For example, if you want to output only records where the 'CITY' column value is
4854             Grenoble:
4855              
4856             Text::AutoCSV->new(in_file => 'addresses.csv', out_file => 'grenoble.csv',
4857             out_filter => \&filt)->write();
4858             sub filt {
4859             my $hr = shift;
4860             return 1 if $hr->{'CITY'} =~ /^grenoble$/i;
4861             return 0;
4862             }
4863              
4864             =item write_fields
4865              
4866             Alias of L</out_fields>.
4867              
4868             =item out_fields
4869              
4870             Set to an array ref. List fields to write to output.
4871              
4872             Fields are written in their order in the array ref, the first CSV column being
4873             the first element in the array, and so on. Fields not listed in B<out_fields>
4874             are not written in output.
4875              
4876             You can use empty field names to have empty columns in output.
4877              
4878             Value by default: none, meaning, all fields are output in their natural order.
4879             What is natural order? It is the input order for fields that were read from
4880             input, and the order in which they got created for created fields.
4881              
4882             Example:
4883              
4884             Text::AutoCSV->new(in_file => 'allinfos.csv',
4885             out_file => 'only-addresses.csv', out_fields => [ 'NAME', 'ADDRESS' ]
4886             )->write();
4887              
4888             =item out_orderby
4889              
4890             Array reference to a list of fields to sort output with.
4891              
4892             At the moment this feature is a bit of a hack (no option to make sort descending
4893             or ascending, numeric or text, and it is not part of test plan).
4894              
4895             Example:
4896              
4897             Text::AutoCSV->new(in_file => 'names.csv', out_file => 'sortednames.csv',
4898             out_orderby => [ 'LASTNAME', 'FIRSTNAME']);
4899              
4900             =item search_case
4901              
4902             If true, searches are case sensitive by default. Searches are done by the member
4903             functions L</search>, L</search_1hr>, L</vlookup>, and linked fields
4904             (L</field_add_link>).
4905              
4906             The search functions can also be called with the option L</case>, that takes
4907             precedence over the object-level C<search_case> attribute value. See L</vlookup>
4908             help.
4909              
4910             Value by default: 0 (by default searches are case insensitive)
4911              
4912             Example:
4913              
4914             my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_case => 1);
4915              
4916             =item search_trim
4917              
4918             If true, searches ignore the presence of leading or trailing spaces in values.
4919              
4920             The search functions can also be called with the option L</trim>, that takes
4921             precedence over the object-level C<search_trim> attribute value. See L</vlookup>
4922             help.
4923              
4924             Value by default: 1 (by default searches ignore leading and trailing spaces)
4925              
4926             Example:
4927              
4928             my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_trim => 0);
4929              
4930             =item search_ignore_empty
4931              
4932             If true, empty fields are not included in the search indexes.
4933              
4934             The search functions can also be called with the option L</ignore_empty>, that
4935             takes precedence over the object-level C<search_ignore_empty> attribute value.
4936             See L</vlookup> help.
4937              
4938             Value by default: 1 (by default, search of the value '' will find nothing)
4939              
4940             Example:
4941              
4942             my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_empty => 0);
4943              
4944             =item search_ignore_accents
4945              
4946             If true, accents are ignored by search indexes.
4947              
4948             The search functions can also be called with the option L</ignore_accents>, that
4949             takes precedence over the object-level C<search_ignore_accents> attribute value.
4950             See L</vlookup> help.
4951              
4952             Value by default: 1 (by default, accents are ignored by search functions)
4953              
4954             Example:
4955              
4956             my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4957             search_ignore_accents => 0);
4958              
4959             =item search_value_if_not_found
4960              
4961             When a search is done with a unique value to return (field_add_link member
4962             function behavior or return value of vlookup), default value of option
4963             L</value_if_not_found>. See L</vlookup>.
4964              
4965             =item search_value_if_found
4966              
4967             When a search is done with a unique value to return (field_add_link member
4968             function behavior or return value of vlookup), default value of option
4969             L</value_if_found>. See L</vlookup>.
4970              
4971             B<IMPORTANT>
4972              
4973             This attribute is extremly unusual. Once you've provided it, all vlookups and
4974             the target field value of fields created with field_add_link will all be
4975             populated with the value provided with this option.
4976              
4977             Don't use it unless you know what you are doing.
4978              
4979             =item search_ignore_ambiguous
4980              
4981             When a search is done with a unique value to return (field_add_link member
4982             function behavior or return value of search_1hr and vlookup), default value of
4983             option L</ignore_ambiguous>. See L</vlookup>.
4984              
4985             =item search_value_if_ambiguous
4986              
4987             When a search is done with a unique value to return (field_add_link member
4988             function behavior or return value of vlookup), default value of option
4989             L</value_if_ambiguous>. See L</vlookup>.
4990              
4991             =item fields_dates
4992              
4993             Array ref of field names that contain a date.
4994              
4995             Once the formats of these fields is known (auto-detection by default), each of
4996             these fields will get a specific L</in_map> sub that converts the text in a
4997             DateTime object and a L</out_map> sub that converts back from DateTime to text.
4998              
4999             B<NOTE>
5000              
5001             The L</out_map> given to a DateTime field is "defensive code": normally,
5002             L</in_map> converts text into a DateTime object and L</out_map> does the
5003             opposite, it takes a DateTime object and converts it to text. If ever
5004             L</out_map> encounters a value that is not a DateTime object, it'll just
5005             stringify it (evaluation in a string context), without calling its DateTime
5006             formatter.
5007              
5008             If the format cannot be detected for a given field, output an error message and
5009             as always when an error occurs, croak (unless L</croak_if_error> got set to 0).
5010              
5011             Value by default: none
5012              
5013             Example:
5014              
5015             my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5016             fields_dates => ['LASTLOGIN', 'CREATIONDATE']);
5017              
5018             =item fields_dates_auto
5019              
5020             Boolean value. If set to 1, will detect dates formats on all fields. Fields in
5021             which a DateTime format got detected are then managed as if they had been being
5022             listed in L</fields_dates> attribute: they get an appropriate L</in_map> sub and
5023             a L</out_map> sub to convert to and from DateTime (see L</fields_dates>
5024             attribute above).
5025              
5026             C<fields_dates_auto> looks for DateTime on all fields, but it expects nothing:
5027             it won't raise an error if no field is found that contains DateTime.
5028              
5029             Value by default: 0
5030              
5031             Example:
5032              
5033             my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5034             fields_dates_auto => 1);
5035              
5036             =item fields_dates_auto_optimize
5037              
5038             Relevant only if L</fields_dates_auto> is set.
5039              
5040             Normally when L</fields_dates_auto> is set, the input is read completely to make
5041             sure auto-detection produces a reliable result. If C<fields_dates_auto_optimize>
5042             is set, this reading pass will stop as soon as there is no ambiguity left. That
5043             is, for every fields in input, the date format (or the fact that no date format
5044             is suitable) is known.
5045              
5046             Using this option is a bit risky because it could trigger a date format
5047             detection that later in the input, would turn out to be wrong. Should that be
5048             the case, strange errors will occur, that are not easy to understand. Use it at
5049             your own risk.
5050              
5051             Value by default: 0
5052              
5053             Example:
5054              
5055             my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5056             fields_dates_auto => 1, fields_dates_auto_optimize => 1);
5057              
5058             =item dates_formats_to_try
5059              
5060             Array ref of string formats.
5061              
5062             Text::AutoCSV has a default built-in list of 20 date formats to try and 6 time
5063             formats (also it'll combine any date format with any time format).
5064              
5065             C<dates_formats_to_try> will replace Text::AutoCSV default format-list will the
5066             one you specify, in case the default would not produce the results you expect.
5067              
5068             The formats are written in Strptime format.
5069              
5070             Value by default (see below about the role of the pseudo-format ''):
5071              
5072             [ '',
5073             '%Y-%m-%d',
5074             '%Y.%m.%d',
5075             '%Y/%m/%d',
5076             '%m.%d.%y',
5077             '%m-%d-%Y',
5078             '%m.%d.%Y',
5079             '%m/%d/%Y',
5080             '%d-%m-%Y',
5081             '%d.%m.%Y',
5082             '%d/%m/%Y',
5083             '%m-%d-%y',
5084             '%m/%d/%y',
5085             '%d-%m-%y',
5086             '%d.%m.%y',
5087             '%d/%m/%y',
5088             '%Y%m%d%H%M%S',
5089             '%b %d, %Y',
5090             '%b %d %Y',
5091             '%b %d %T %Z %Y',
5092             '%d %b %Y',
5093             '%d %b, %Y' ]
5094              
5095             B<IMPORTANT>
5096              
5097             The empty format (empty string) has a special meaning: when specified,
5098             Text::AutoCSV will be able to identify fields that contain only a time (not
5099             preceeded by a date).
5100              
5101             B<Note>
5102              
5103             Format identification is over only when there is no more ambiguity. So the usual
5104             pitfall of US versus French dates (month-day versus day-month) gets resolved
5105             only when a date is encountered that disambiguates it (a date of 13th of the
5106             month or later).
5107              
5108             Example with a weird format that uses underscores to separate elements, using
5109             either US (month, day, year), French (day, month, year), or international (year,
5110             month, day) order:
5111              
5112             my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5113             dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']);
5114              
5115             =item dates_formats_to_try_supp
5116              
5117             Same as L</dates_formats_to_try> but instead of replacing the default list of
5118             formats used during detection, it is added to this default list.
5119              
5120             You want to use this attribute if you need a specific DateTime format while
5121             continuing to benefit from the default list.
5122              
5123             B<IMPORTANT>
5124              
5125             Text::AutoCSV will identify a given Datetime format only when there is no
5126             ambiguity, meaning, one unique Datetime format matches (all other failed).
5127             Adding a format that already exists in the default list will prevent the format
5128             from being identified, as it'll always be ambiguous. See
5129             L</dates_formats_to_try> for the default list of formats.
5130              
5131             Example:
5132              
5133             my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5134             dates_formats_to_try_supp => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']);
5135              
5136             =item dates_ignore_trailing_chars
5137              
5138             If set to 1, DateTime auto-detection will ignore trailing text that may follow
5139             detected DateTime-like text.
5140              
5141             Value by default: 1 (do ignore trailing chars)
5142              
5143             my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5144             dates_ignore_trailing_chars => 0);
5145              
5146             =item dates_search_time
5147              
5148             If set to 1, look for times when detecting DateTime format. That is, whenever a
5149             date format candidate is found, a longer candidate that also contains a time
5150             (after the date) is tested.
5151              
5152             Value by default: 1 (do look for times when auto-detecting DateTime formats)
5153              
5154             Example:
5155              
5156             my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5157             dates_search_time => 0);
5158              
5159             =item dates_locales
5160              
5161             Comma-separated string of locales to test when detecting DateTime formats.
5162             Ultimately, Text::AutoCSV will try all combinations of date formats, times and
5163             locales.
5164              
5165             Value by default: none (use perl default locale)
5166              
5167             Example:
5168              
5169             my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5170             dates_locales => 'fr,de,en');
5171              
5172             =item dates_zeros_ok
5173              
5174             Boolean. If true, a date made only of 0s is regarded as being empty.
5175              
5176             For example if C<dates_zeros_ok> is False, then a date like 0000-00-00 will be
5177             always incorrect (as the day and month are out of bounds), therefore a format
5178             like '%Y-%m-%d' will never match for the field.
5179              
5180             Conversely if C<dates_zeros_ok> is true, then a date like 0000-00-00 will be
5181             processed as if being the empty string, thus the detection of format will work
5182             and when parsed, this "full of zeros" dates will be processed the same way as
5183             the empty string (= value will be undef).
5184              
5185             B<IMPORTANT>
5186              
5187             "0s dates" are evaluated to undef when parsed, thus when converted back to text
5188             (out_map), they are set to an empty string, not to the original value.
5189              
5190             Value by default: 1
5191              
5192             Example:
5193              
5194             my $csv = Text::AutoCSV->new(in_file => 'in.csv', dates_zeros_ok => 0);
5195              
5196             =item out_dates_format
5197              
5198             Enforce the format of dates in output, for all fields that contain a DateTime
5199             value.
5200              
5201             The format is written in Strptime format.
5202              
5203             Value by default: none (by default, use format detected on input)
5204              
5205             Example:
5206              
5207             # Detect any field containing a DateTime value and convert it to
5208             # yyyy-mm-dd whatever the input format is.
5209             Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
5210             fields_dates_auto => 1, out_dates_format => '%F')->write();
5211              
5212             =item out_dates_locale
5213              
5214             Taken into account only if L</out_dates_format> is used.
5215              
5216             Sets the locale to apply on L</out_dates_format>.
5217              
5218             Value by default: none (by default, use the locale detected on input)
5219              
5220             Example:
5221              
5222             # Detect any field containing a DateTime value and convert it to a US
5223             # DateTime whatever the input format is.
5224             Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
5225             fields_dates_auto => 1, out_dates_format => '%b %d, %Y, %I:%M:%S %p',
5226             out_dates_locale => 'en')->write();
5227              
5228             =item croak_if_error
5229              
5230             If true, stops the program execution in case of error.
5231              
5232             B<IMPORTANT>
5233              
5234             Value by default: 1
5235              
5236             If set to zero (C<croak_if_error =E<gt> 0>), errors are displayed as warnings.
5237             This printing can then be affected by setting the L</quiet> attribute.
5238              
5239             =item verbose
5240              
5241             If true, get Text::AutoCSV to be a bit talkative instead of speaking only when
5242             warnings and errors occur. Verbose output is printed to STDERR by default, this
5243             can be tuned with the L</infoh> attribute.
5244              
5245             Value by default: 0
5246              
5247             Example:
5248              
5249             my $csv = Text::AutoCSV->new(in_file => 'in.csv', verbose => 1);
5250              
5251             =item infoh
5252              
5253             File handle to display program's verbose output. Has effect *mainly* with
5254             attribute C<verbose =E<gt> 1>.
5255              
5256             Note B<infoh> is used to display extra information in case of error (if a field
5257             does not exist, Text::AutoCSV will display the list of existing fields). If you
5258             don't want such output, you can set C<infoh> to undef.
5259              
5260             Value by default: \*STDERR
5261              
5262             Example:
5263              
5264             open my $infoh, ">", "log.txt";
5265             my $csv = Text::AutoCSV->new(in_file => 'in.csv', infoh => $infoh);
5266              
5267             =item quiet
5268              
5269             If true, don't display warnings and errors, unless croaking.
5270              
5271             If L</croak_if_error> attribute is set (as per default), still, Text::AutoCSV
5272             will produce output (on STDERR) when croaking miserably.
5273              
5274             When using C<croak_if_error =E<gt> 0>, errors are processed as warnings and if
5275             L</quiet> is set (in addition to L</croak_if_error> being set to 0), there'll be
5276             no output. Note this way of working is not recommended, as things can go wrong
5277             without any notice to the caller.
5278              
5279             Example:
5280              
5281             my $csv = Text::AutoCSV->new(in_file => 'in.csv', quiet => 1);
5282              
5283             =item one_pass
5284              
5285             If true, Text::AutoCSV will perform one reading of the input. If other readings
5286             are triggered, it'll raise an error and no reading will be done. Should that be
5287             the case (you ask Text::AutoCSV to do something that'll trigger more than one
5288             reading of input), Text::AutoCSV will croak as is always the case if an error
5289             occurs.
5290              
5291             Normally Text::AutoCSV will do multiple reads of input to work out certain
5292             characteristics of the CSV: guess of encoding and guess of escape character.
5293              
5294             Also if member functions like L</field_add_link>, L</field_add_copy>,
5295             L</field_add_computed>, L</read> or L</write> are called after input has already
5296             been read, it'll trigger further reads as needed.
5297              
5298             If one wishes a unique read of the input to occur, one_pass attribute is to be
5299             set.
5300              
5301             When true, encoding will be assumed to be the first one in the provided list
5302             (L</encoding> attribute), if no encoding attribute is provided, it'll be the
5303             first one in the default list, to date, it is UTF-8.
5304              
5305             When true, and if attribute L</escape_char> is not set, escape_char will be
5306             assumed to be '\\' (backslash).
5307              
5308             By default, one_pass is set if inh attribute is set (caller provides the input
5309             file handle of input) or if input file is stdin (in_file attribute not set or
5310             set to an empty string).
5311              
5312             Value by default:
5313              
5314             0 if inh attribute is not set and in_file attribute is set to a non empty
5315             string
5316             1 if inh attribute is set or in_file is not set or set to an empty string
5317              
5318             Example:
5319              
5320             my $csv = Text::AutoCSV->new(in_file => 'in.csv', one_pass => 1);
5321              
5322             =back
5323              
5324             =head2 read
5325              
5326             $csv->read();
5327              
5328             Read input entirely.
5329              
5330             B<Return value>
5331              
5332             Returns the object itself in case of success.
5333             Returns undef if error.
5334              
5335             Callback functions (when defined) are invoked, in the following order:
5336              
5337             L</read_post_update_hr>, intended to do updates on fields values after each
5338             record read
5339              
5340             L</walker_ar>, called after each record read, with an array ref of fields values
5341              
5342             L</walker_hr>, called after each record read, with a hash ref of fields values
5343              
5344             Example:
5345              
5346             # Do nothing - just check CSV can be read successfully
5347             Text::AutoCSV->new(in_file => 'in.csv')->read();
5348              
5349             =head2 read_all_in_mem
5350              
5351             $csv->read_all_in_mem();
5352              
5353             Created in version 1.1.5. Before, existed only as _read_all_in_mem, meaning, was
5354             private.
5355              
5356             Read input entirely, as with L</read> function, but enforcing content to be kept
5357             in-memory.
5358              
5359             Having the content kept in-memory is implied by search functions (L</vlookup>
5360             for example). With C<read_all_in_mem> you can enforce this behavior without
5361             doing a fake search.
5362              
5363             =head2 reset_next_record_hr
5364              
5365             $csv->reset_next_record_hr();
5366              
5367             Reset the internal status to start from the beginning with
5368             L</get_next_record_hr>. Used in conjunction with L</get_next_record_hr>.
5369              
5370             =head2 get_next_record_hr
5371              
5372             my $hr = $csv->get_next_record_hr(\$opt_key);
5373              
5374             Get the next record content as a hash ref. C<$hr> is undef when the end of
5375             records has been reached.
5376              
5377             When specified, C<$opt_key> is set to the current (returned) record key.
5378              
5379             B<NOTE>
5380              
5381             You do not need to call L</reset_next_record_hr> once before using
5382             C<get_next_record_hr>.
5383              
5384             Therefore L</reset_next_record_hr> is useful only if you wish to restart from
5385             the beginning before you've reached the end of the records.
5386              
5387             B<NOTE bis>
5388              
5389             L</walker_hr> allows to execute some code each time a record is read, and it
5390             better fits with Text::AutoCSV philosophy. Using a loop with
5391             C<get_next_record_hr> is primarily meant for Text::AutoCSV internal usage. Also
5392             when using this mechanism, you get very close to original Text::CSV logic, that
5393             makes Text::AutoCSV less useful.
5394              
5395             B<Return value>
5396              
5397             A hashref of the record, or undef once there's no more record to return.
5398              
5399             Example:
5400              
5401             while (my $hr = $csv->get_next_record_hr()) {
5402             say Dumper($hr);
5403             }
5404              
5405             =head2 write
5406              
5407             $csv->write();
5408              
5409             Write input into output.
5410              
5411             B<Return value>
5412              
5413             Returns the object itself in case of success.
5414             Returns undef if error.
5415              
5416             - If the content is not in-memory at the time write() is called:
5417              
5418             Each record is read (with call of L</read_post_update_hr>, L</walker_ar> and
5419             L</walker_hr>) and then written. The read-and-write is done in sequence, each
5420             record is written to output before the next record is read from input.
5421              
5422             - If the content is in-memory at the time write() is called:
5423              
5424             No L</read> operation is performed, instead, records are directly written to
5425             output.
5426              
5427             If defined, L</out_filter> is called for each record. If the return value of
5428             L</out_filter> is false, the record is not written.
5429              
5430             Example:
5431              
5432             # Copy input to output.
5433             # As CSV is parsed in-between, this copy also checks a number of
5434             # characteristics about the input, as opposed to a plain file copy
5435             # operation.
5436             Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')->write();
5437              
5438             =head2 out_header
5439              
5440             $csv->out_header($field, $header);
5441              
5442             Set the header text of C<$field> to C<$header>.
5443              
5444             By default, the input header value is rewritten as is to output. C<out_header>
5445             allows you to change it.
5446              
5447             B<Return value>
5448              
5449             Returns the object itself.
5450              
5451             Example:
5452              
5453             Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')
5454             ->out_header('LOGIN', 'Login')
5455             ->out_header('FULLNAME', 'Full Name')
5456             ->write();
5457              
5458             =head2 print_id
5459              
5460             $csv->print_id();
5461              
5462             Print out a description of input. Write to \*STDERR by default or to L</infoh>
5463             attribute if set.
5464              
5465             The description consists in a list of a few characteristics (CSV separator and
5466             the like) followed by the list of columns with the details of each.
5467              
5468             Example of output:
5469              
5470             If you go to the C<utils> directory of this module and execute the following:
5471              
5472             ./csvcopy.pl -i f1.csv -l "1:,A->B,f2.csv" --id
5473              
5474             You will get this output:
5475              
5476             -- f1.csv:
5477             sep_char: ,
5478             escape_char: \
5479             in_encoding: UTF-8
5480             is_always_quoted: no
5481              
5482             # FIELD HEADER EXT DATA DATETIME FORMAT DATETIME LOCALE
5483             - ----- ------ -------- --------------- ---------------
5484             0 TIMESTAMP timestamp %Y%m%d%H%M%S
5485             1 A a
5486             2 B b
5487             3 C c
5488             4 D d %d/%m/%Y
5489             5 1:SITE 1:SITE link: f2.csv, chain: A->B->* (SITE)
5490             6 1:B 1:B link: f2.csv, chain: A->B->* (B)
5491              
5492             =head2 field_add_computed
5493              
5494             $csv->field_add_computed($new_field, $subref);
5495              
5496             C<$new_field> is the name of the created field.
5497              
5498             C<$subref> is a reference to a sub that'll calculate the new field value.
5499              
5500             B<Return value>
5501              
5502             Returns the object itself in case of success.
5503             Returns undef if error.
5504              
5505             Add a field calculated from other fields values. The subref runs like this:
5506              
5507             sub func {
5508             # $new_field is the name of the field (allows to use one subref for
5509             # more than one field calculation).
5510             # $hr is a hash ref of fields values.
5511             # $stats is a hash ref that gets printed (if Text::AutoCSV is
5512             # created with verbose => 1) in the end.
5513             my ($new_field, $hr, $stats) = @_;
5514              
5515             my $field_value;
5516             # ... compute $field_value
5517              
5518             return $field_value;
5519             }
5520              
5521             Example:
5522              
5523             # Add a field for the full name, made of the concatenation of the first
5524             # name and the last name.
5525             Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv',
5526             verbose => 1)
5527             ->field_add_computed('FULLNAME', \&calc_fn)->write();
5528             sub calc_fn {
5529             my ($new_field, $hr, $stats) = @_;
5530             die "Man, you are in serious trouble!" unless $new_field eq 'FULLNAME';
5531             my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'});
5532             $stats->{'empty full name'}++ if $fn eq ' ';
5533             return $fn;
5534             }
5535              
5536             =head2 field_add_copy
5537              
5538             $csv->field_add_copy($new_field, $src_field, $opt_subref);
5539              
5540             C<$new_field> if the name of the new field.
5541              
5542             C<$src_field> is the name of the field being copied.
5543              
5544             C<$opt_subref> is optional. It is a reference to a sub that takes one string
5545             (the value of C<$src_field>) and returns a string (the value assigned to
5546             C<$new_field>).
5547              
5548             B<Return value>
5549              
5550             Returns the object itself in case of success.
5551             Returns undef if error.
5552              
5553             C<field_add_copy> is a special case of L</field_add_computed>. The advantage of
5554             C<field_add_copy> is that it relies on a sub that is Text::AutoCSV "unaware",
5555             just taking one string as input and returning another string as output.
5556              
5557             B<IMPORTANT>
5558              
5559             The current field value is passed to C<field_add_copy> in $_.
5560              
5561             A call to
5562              
5563             $csv->field_add_copy($new_field, $src_field, $subref);
5564              
5565             is equivalent to
5566              
5567             $csv->field_add_computed($new_field, \&subref2);
5568             sub subref2 {
5569             my (undef, $hr) = @_;
5570             local $_ = $hr->{$src_field};
5571             return $subref->();
5572             }
5573              
5574             Example of a field copy + pass copied field in upper case and surround content
5575             with <<>>:
5576              
5577             my $csv = Text::AutoCSV->new(in_file => 'dirpeople.csv',
5578             out_file => 'd2.csv');
5579             $csv->field_add_copy('UCLAST', 'LASTNAME', \&myfunc);
5580             $csv->write();
5581             sub myfunc { s/^.*$/<<\U$&>>/; $_; }
5582              
5583             Note that the calls can be chained as most member functions return the object
5584             itself upon success. The example above is equivalent to:
5585              
5586             Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv')
5587             ->field_add_copy('UCLAST', 'LASTNAME', \&myfunc)
5588             ->write();
5589             sub myfunc { s/^.*$/<<\U$&>>/; $_; }
5590              
5591             =head2 field_add_link
5592              
5593             $csv->field_add_link($new_field, $chain, $linked_file, \%opts);
5594              
5595             C<$new_field> is the name of the new field.
5596              
5597             C<$chain> is the CHAIN of the link, that is: 'LOCAL->REMOTE->PICK' where:
5598              
5599             C<LOCAL> is the field name to read the value from.
5600              
5601             C<REMOTE> is the linked field to find the value in. This field belongs to
5602             $linked_file.
5603              
5604             C<PICK> is the field from which to read the value of, in the record found by the
5605             search. This field belongs to $linked_file.
5606              
5607             If $new_field is undef, the new field name is the name of the third field of
5608             $chain (PICK).
5609              
5610             C<$linked_file> is the name of the linked file, that gets read in a
5611             Text::AutoCSV object created on-the-fly to do the search on. C<$linked_file> can
5612             also be a Text::AutoCSV object that you created yourself, allowing for more
5613             flexibility. Example:
5614              
5615             my $lcsv = Text::AutoCSV->new(in_file => 'logins.csv', case => 1);
5616             $csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', $lcsv);
5617              
5618             C<\%opts> is a hash ref of optional attributes. The same values can be provided
5619             as with vlookup.
5620              
5621             =over 4
5622              
5623             =item trim
5624              
5625             If set to 1, searches will ignore leading and trailing spaces. That is, a
5626             C<LOCAL> value of ' x ' will match with a C<REMOTE> value of 'x'.
5627              
5628             If option is not present, use L</search_value_if_not_found> attribute of object
5629             (default value: 1).
5630              
5631             Example:
5632              
5633             $csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5634             { trim => 0 });
5635              
5636             =item case
5637              
5638             If set to 1, searches will take the case into account. That is, a C<LOCAL> value
5639             of 'X' will B<not> match with a C<REMOTE> value of 'x'.
5640              
5641             If option is not present, use L</search_case> attribute of object (default
5642             value: 0).
5643              
5644             Example:
5645              
5646             $csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5647             { case => 1 });
5648              
5649             =item ignore_empty
5650              
5651             If set to 1, empty values won't match. That is, a C<LOCAL> value of '' will not
5652             match with a C<REMOTE> value of ''.
5653              
5654             If option is not present, use L</search_ignore_empty> attribute of object
5655             (default value: 1).
5656              
5657             Example:
5658              
5659             $csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5660             { ignore_empty => 0 });
5661              
5662             =item value_if_not_found
5663              
5664             If the searched value is not found, the value of the field is undef, that
5665             produces an empty string at write time. Instead, you can specify the value.
5666              
5667             If option is not present, use L</search_value_if_not_found> attribute of object
5668             (default value: undef).
5669              
5670             Example:
5671              
5672             $csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5673             { value_if_not_found => '<not found!>' });
5674              
5675             =item value_if_found
5676              
5677             If the searched value is found, you can specify the value to return.
5678              
5679             If option is not present, use L</search_value_if_found> attribute of object
5680             (default value: none).
5681              
5682             B<NOTE>
5683              
5684             Although the C<PICK> field is ignored when using this option, you must specify
5685             it any way.
5686              
5687             Example:
5688              
5689             $csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5690             { value_if_not_found => '0', value_if_found => '1' });
5691              
5692             =item value_if_ambiguous
5693              
5694             If the searched value is found in more than one record, the value of the field
5695             is undef, that produces an empty string at write time. Instead, you can specify
5696             the value.
5697              
5698             If option is not present, use L</search_value_if_ambiguous> attribute of object
5699             (default value: undef).
5700              
5701             Example:
5702              
5703             $csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5704             { value_if_ambiguous => '<ambiguous!>' });
5705              
5706             =item ignore_ambiguous
5707              
5708             Boolean value. If ignore_ambiguous is true and the searched value is found in
5709             more than one record, then, silently fall back on returning the value of the
5710             first record. Obviously if C<ignore_ambiguous> is true, then the value of
5711             L</value_if_ambiguous> is ignored.
5712              
5713             If option is not present, use L</search_ignore_ambiguous> attribute of object
5714             (default value: 1).
5715              
5716             Example:
5717              
5718             $csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5719             { ignore_ambiguous => 1 });
5720              
5721             Example with multiple options:
5722              
5723             $csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5724             { value_if_not_found => '?', ignore_ambiguous => 1 });
5725              
5726             =back
5727              
5728             B<Return value>
5729              
5730             Returns the object itself in case of success.
5731             Returns undef if error.
5732              
5733             Example of field_add_link usage:
5734              
5735             my $nom_compose = 0;
5736             my $zip_not_found = 0;
5737             Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk)
5738             ->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read();
5739             sub walk {
5740             my $hr = shift;
5741             $nom_compose++ if $hr->{'NAME'} =~ m/[- ]/;
5742             $zip_not_found++ unless defined($hr->{'MYCITY'});
5743             }
5744             print("Number of persons with a multi-part name: $nom_compose\n");
5745             print("Number of persons with unknown zipcode: $zip_not_found\n");
5746              
5747             =head2 links
5748              
5749             $csv->links($prefix, $chain, $linked_file, \%opts);
5750              
5751             C<$prefix> is the name to add to joined fields
5752              
5753             C<$chain> is the JOINCHAIN of the link, that is: 'LOCAL->REMOTE' where:
5754              
5755             C<LOCAL> is the field name to read the value from.
5756              
5757             C<REMOTE> is the linked field to find the value in. This field belongs to
5758             $linked_file.
5759              
5760             As opposed to L</field_add_link>, there is no C<PICK> part, as all fields of
5761             target are read.
5762              
5763             As opposed to Text::AutoCSV habits of croaking whenever a field name is
5764             duplicate, here, the duplicates are resolved by appending _2 to the joined field
5765             name if it already exists. If _2 already exists, too, then _3 is appended
5766             instead, and so on, until a non-duplicate is found. This mechanism is executed
5767             given the difficulty to control all field names when joining CSVs.
5768              
5769             C<$linked_file> and C<\%opts> work exactly the same way as for
5770             L</field_add_link>, see L</field_add_link> for help.
5771              
5772             B<Return value>
5773              
5774             Returns the object itself in case of success.
5775             Returns undef if error.
5776              
5777             B<NOTE>
5778              
5779             This function used to be called C<join> but got renamed to avoid clash with
5780             perl' builtin C<join>.
5781              
5782             Example:
5783              
5784             Text::AutoCSV->new(in_file => 'pers.csv', out_file => 'pers_with_city.csv')
5785             ->links('Read from zips.csv:', 'ZIP->ZIPCODE', 'zips.csv')->write();
5786              
5787             =head2 get_in_encoding
5788              
5789             my $enc = $csv->get_in_encoding();
5790              
5791             Return the string of input encoding, for example 'latin2' or 'UTF-8', etc.
5792              
5793             =head2 get_in_file_disp
5794              
5795             my $f = $csv->get_in_file_disp();
5796              
5797             Return the printable name of in_file.
5798              
5799             =head2 get_sep_char
5800              
5801             my $s = $csv->get_sep_char();
5802              
5803             Return the string of the input CSV separator character, for example ',' or ';'.
5804              
5805             =head2 get_escape_char
5806              
5807             my $e = $csv->get_escape_char();
5808              
5809             Return the string of the input escape character, for example '"' or '\\'.
5810              
5811             =head2 get_is_always_quoted
5812              
5813             my $a = $csv->get_is_always_quoted();
5814              
5815             Return 1 if all fields of input are always quoted, 0 otherwise.
5816              
5817             =head2 get_coldata
5818              
5819             my @cd = get_coldata();
5820              
5821             Return an array that describes each column, from the first one (column 0) to the
5822             last.
5823              
5824             Each element of the array is itself an array ref that contains 5 elements:
5825              
5826             0: Name of the field (as accessed in *_hr functions)
5827             1: Content of the field in the header line (if input has a header line)
5828             2: Column content type, shows some meta-data of fields created with
5829             field_add_* functions
5830             3: Datetime format detected, if ever, in the format Strptime
5831             4: Locale of DateTime format detected, if ever
5832             5: Multiline field: '1' if not, 'm' if newlines encountered in the field
5833              
5834             =head2 get_pass_count
5835              
5836             my $n = $csv->get_pass_count();
5837              
5838             Return the number of input readings done. Useful only if you're interested in
5839             Text::AutoCSV internals.
5840              
5841             =head2 get_in_mem_record_count
5842              
5843             my $m = $csv->get_in_mem_record_count();
5844              
5845             Return the number of records currently stored in-memory. Useful only if you're
5846             interested in Text::AutoCSV internals.
5847              
5848             =head2 get_max_in_mem_record_count
5849              
5850             my $mm = $csv->get_max_in_mem_record_count();
5851              
5852             Return the maximum number of records ever stored in-memory. Indeed this number
5853             can decrease: certain functions like field_add* member-functions discard
5854             in-memory content. Useful only if you're interested in Text::AutoCSV internals.
5855              
5856             =head2 get_fields_names
5857              
5858             my @f = $csv->get_fields_names();
5859              
5860             Return an array of the internal names of the columns.
5861              
5862             =head2 get_field_name
5863              
5864             my $name = $csv->get_field_name($n);
5865              
5866             Return the C<$n>-th column name, the first column being number 0.
5867              
5868             Example:
5869              
5870             # Get the field name of the third column
5871             my $col = $csv->get_field_name(2);
5872              
5873             =head2 get_stats
5874              
5875             my %stats = $csv->get_stats();
5876              
5877             Certain callback functions provide a parameter to record event count:
5878             L</field_add_computed>, L</read_post_update_hr>, L</walker_ar> and
5879             L</walker_hr>. By default, these stats are displayed if Text::AutoCSV got
5880             created with attribute C<verbose =E<gt> 1>. get_stats() returns the statistics
5881             hash of the object.
5882              
5883             B<IMPORTANT>
5884              
5885             As opposed to most functions that trigger input reading automatically (search
5886             functions and other get_* functions), C<get_stats> just returns you the stats as
5887             it is, regardless of whether some execution already occured.
5888              
5889             =head2 get_nb_rows
5890              
5891             my $nb_rows = $csv->get_nb_rows();
5892              
5893             Gives the number of rows of the input. Does not trigger any reading - just
5894             provides the number of rows as known at the moment of the call. If unknown,
5895             return undef. Typically, the number of rows is known after doing the initial
5896             detection of CSV options (escape character, etc.), or, after doing one complete
5897             reading.
5898              
5899             The header line counts for one row.
5900              
5901             B<IMPORTANT>
5902              
5903             As some fields can contain new lines, this number is not necessarily identical
5904             to the number of lines.
5905              
5906             =head2 set_walker_ar
5907              
5908             $csv->set_walker_ar($subref);
5909              
5910             Normally one wants to define it at object creation time using L</walker_ar>
5911             attribute. C<set_walker_ar> allows to assign the attribute walker_ar after
5912             object creation.
5913              
5914             See attribute L</walker_ar> for help about the way C<$subref> should work.
5915              
5916             B<Return value>
5917              
5918             Returns the object itself in case of success.
5919             Returns undef if error.
5920              
5921             Example:
5922              
5923             # Calculate the total of the two first columns, the first column being
5924             # money in and the second one being money out.
5925             my ($actif, $passif) = (0, 0);
5926             $csv->set_walker_ar(
5927             sub {
5928             my $ar = $_[0]; $actif += $ar->[0]; $passif += $ar->[1];
5929             }
5930             )
5931             ->read();
5932             print("Actif = $actif\n");
5933             print("Passif = $passif\n");
5934              
5935             =head2 set_walker_hr
5936              
5937             $csv->set_walker_hr($subref);
5938              
5939             Normally one wants to define it at object creation time using L</walker_hr>
5940             attribute. C<set_walker_hr> allows to assign the attribute L</walker_hr> after
5941             object creation.
5942              
5943             See attribute L</walker_hr> for help about the way C<$subref> should work.
5944              
5945             B<Return value>
5946              
5947             Returns the object itself in case of success.
5948             Returns undef if error.
5949              
5950             Example:
5951              
5952             my $csv = Text::AutoCSV->new(in_file => 'directory.csv', verbose => 1);
5953              
5954             # ...
5955              
5956             $csv->set_walker_hr(
5957             sub {
5958             my ($hr, $stat) = @_;
5959             $stat{'not capital name'}++, return
5960             if $hr->{'NAME'} ne uc($hr->{'NAME'});
5961             $stat{'name is capital letters'}++;
5962             }
5963             )->read();
5964              
5965             =head2 set_out_file
5966              
5967             $csv->set_out_file($out_file);
5968              
5969             Normally one wants to define it at object creation time using L</out_file>
5970             attribute. C<set_out_file> allows to assign the attribute L</out_file> after
5971             object creation. It is set to C<$out_file> value.
5972              
5973             B<Return value>
5974              
5975             Returns the object itself in case of success.
5976             Returns undef if error.
5977              
5978             Example:
5979              
5980             $csv->set_out_file('mycopy.csv')->write();
5981              
5982             =head2 get_keys
5983              
5984             my @allkeys = $csv->get_keys();
5985              
5986             Returns an array of all the record keys of input. A record key is a unique
5987             identifier that designates the record.
5988              
5989             At the moment it is just an integer being the record number, the first one (that
5990             comes after the header line) being of number 0. For example if $csv input is
5991             made of one header line and 3 records (that is, a 4-line file typically, if no
5992             record contains a line break), $csv->get_keys() returns:
5993              
5994             (0, 1, 2)
5995              
5996             B<IMPORTANT>
5997              
5998             If not yet done, this function causes the input to be read entirely and stored
5999             in-memory.
6000              
6001             =head2 get_hr_all
6002              
6003             my @allin = $csv->get_hr_all();
6004              
6005             Returns an array of all record contents of the input, each record being a hash
6006             ref.
6007              
6008             B<IMPORTANT>
6009              
6010             If not yet done, this function causes the input to be read entirely and stored
6011             in-memory.
6012              
6013             =head2 get_row_ar
6014              
6015             my $row_ar = $csv->get_row_ar($record_key);
6016              
6017             Returns an array ref of the record designated by C<$record_key>.
6018              
6019             Example:
6020              
6021             # Get content (as array ref) of last record
6022             my @allkeys = $csv->get_keys();
6023             my $lastk = $allkeys[-1];
6024             my $lastrec_ar = $csv->get_row_ar($lastk);
6025              
6026             B<IMPORTANT>
6027              
6028             If not yet done, this function causes the input to be read entirely and stored
6029             in-memory.
6030              
6031             =head2 get_row_hr
6032              
6033             my $row_hr = $csv->get_row_hr($record_key);
6034              
6035             Returns a hash ref of the record designated by C<$record_key>.
6036              
6037             Example:
6038              
6039             # Get content (as hash ref) of first record
6040             my @allkeys = $csv->get_keys();
6041             my $firstk = $allkeys[0];
6042             my $firstrec_hr = $csv->get_row_hr($firstk);
6043              
6044             B<IMPORTANT>
6045              
6046             If not yet done, this function causes the input to be read entirely and stored
6047             in-memory.
6048              
6049             =head2 get_cell
6050              
6051             my $val = $csv->get_cell($record_key, $field_name);
6052              
6053             Return the value of the cell designated by its record key (C<$record_key>) and
6054             field name (C<$field_name>).
6055              
6056             Example:
6057              
6058             my @allkeys = $csv->get_keys();
6059             my $midk = $allkeys[int($#allkeys / 2)];
6060             my $midname = $csv->get_cell($midk, 'NAME');
6061              
6062             Note the above example is equivalent to:
6063              
6064             my @allkeys = $csv->get_keys();
6065             my $midk = $allkeys[int($#allkeys / 2)];
6066             my $midrec_hr = $csv->get_row_hr($midk);
6067             my $midname = $midrec_hr->{'NAME'};
6068              
6069             B<IMPORTANT>
6070              
6071             If not yet done, this function causes the input to be read entirely and stored
6072             in-memory.
6073              
6074             =head2 get_values
6075              
6076             my @vals = $csv->get_values($field_name, $opt_filter_subref);
6077              
6078             Return an array made of the values of the given field name (C<$field_name>), for
6079             every records, in the order of the records.
6080              
6081             C<$opt_filter_subref> is an optional subref. If defined, it is called with every
6082             values in turn (one call per value) and only values for which
6083             C<$opt_filter_subref> returned True are included in the returned array. Call to
6084             C<$opt_filter_subref> is done with $_ to pass the value.
6085              
6086             Example:
6087              
6088             my @logins = $csv->get_values('LOGIN");
6089              
6090             This is equivalent to:
6091              
6092             my @allkeys = $csv->get_keys();
6093             my @logins;
6094             push @logins, $csv->get_cell($_, 'LOGIN') for (@allkeys);
6095              
6096             Example bis
6097              
6098             # @badlogins is the list of logins that contain non alphanumeric
6099             # characters
6100             my @badlogins = Text::AutoCSV->new(in_file => 'logins.csv')
6101             ->get_values('LOGIN', sub { m/[^a-z0-9]/ });
6102              
6103             This is equivalent to:
6104              
6105             # @badlogins is the list of logins that contain non alphanumeric
6106             # characters This method leads to carrying all values of a given field
6107             # across function calls...
6108             my @badlogins = grep { m/[^a-z0-9]/ } (
6109             Text::AutoCSV->new(in_file => 'logins.csv')->get_values('LOGIN')
6110             );
6111              
6112             B<IMPORTANT>
6113              
6114             If not yet done, this function causes the input to be read entirely and stored
6115             in-memory.
6116              
6117             =head2 get_recnum
6118              
6119             my $r = $csv->get_recnum();
6120              
6121             Returns the current record identifier, if a reading is in progress. If no read
6122             is in progress, return undef.
6123              
6124             =head2 in_map
6125              
6126             See L</read_update_after> below.
6127              
6128             =head2 read_update_after
6129              
6130             C<read_update_after> is an alias of C<in_map>.
6131              
6132             $csv->in_map($field, $subref);
6133              
6134             After reading a record from input, update C<$field> by calling C<$subref>. The
6135             value is put in C<$_>. Then the field value is set to the return value of
6136             C<$subref>.
6137              
6138             This feature is originally meant to manage DateTime fields: the input and output
6139             CSVs carry text content, and in-between, the values dealt with are DateTime
6140             objects.
6141              
6142             See L</out_map> for an example.
6143              
6144             =head2 out_map
6145              
6146             See L</write_update_before> below.
6147              
6148             =head2 write_update_before
6149              
6150             C<write_update_before> is an alias of C<out_map>.
6151              
6152             $csv->out_map($field, $subref);
6153              
6154             Before writing C<$field> field content into the output file, pass it through
6155             C<out_map>. The value is put in C<$_>. Then the return value of C<$subref> is
6156             written in the output.
6157              
6158             Example:
6159              
6160             Suppose you have a CSV file with the convention that a number surrounded by
6161             parenthesis is negative. You can register corresponding L</in_map> and
6162             L</out_map> functions. During the processing of data, the field content will be
6163             just a number (positive or negative), while in input and in output, it'll follow
6164             the "negative under parenthesis" convention.
6165              
6166             In the below example, we rely on convention above and add a new field converted
6167             from the original one, that follows the same convention.
6168              
6169             sub in_updt {
6170             return 0 if !defined($_) or $_ eq '';
6171             my $i;
6172             return -$i if ($i) = $_ =~ m/^\((.*)\)$/;
6173             $_;
6174             }
6175             sub out_updt {
6176             return '' unless defined($_);
6177             return '(' . (-$_) . ')' if $_ < 0;
6178             $_;
6179             }
6180             sub convert {
6181             return ;
6182             }
6183             Text::AutoCSV->new(in_file => 'trans-euros.csv',
6184             out_file => 'trans-devises.csv')
6185             ->in_map('EUROS', \&in_updt)
6186             ->out_map('EUROS', \&out_updt)
6187             ->out_map('DEVISE', \&out_updt)
6188             ->field_add_copy('DEVISE', 'EUROS',
6189             sub { sprintf("%.2f", $_ * 1.141593); } )
6190             ->write();
6191              
6192             =head2 search
6193              
6194             my $found_ar = $csv->search($field_name, $value, \%opts);
6195              
6196             Returns an array ref of all records keys where the field C<$field_name> has the
6197             value C<$value>.
6198              
6199             C<\%opts> is an optional hash ref of options for the search. See help of
6200             L</vlookup> about options.
6201              
6202             B<IMPORTANT>
6203              
6204             An unsuccessful search returns an empty array ref, that is, [ ]. Thus you
6205             B<cannot> check for definedness of C<search> return value to know whether or not
6206             the search found something.
6207              
6208             On the other hand, you can always examine the value C<search(...)-E<gt>[0]>, as
6209             search is always an array ref. If the search found nothing, then,
6210             C<search(...)-E<gt>[0]> is not defined.
6211              
6212             B<IMPORTANT bis>
6213              
6214             If not yet done, this function causes the input to be read entirely and stored
6215             in-memory.
6216              
6217             Example:
6218              
6219             my $linux_os_keys_ar = $csv->search('OS', 'linux');
6220              
6221             =head2 search_1hr
6222              
6223             my $found_hr = $csv->search_1hr($field_name, $value, \%opts);
6224              
6225             Returns a hash ref of the first record where the field C<$field_name> has the
6226             value C<$value>.
6227              
6228             C<\%opts> is an optional hash ref of options for the search. See help of
6229             L</vlookup> about options.
6230              
6231             Note the options L</value_if_not_found> and L</value_if_ambiguous> are ignored.
6232             If not found, return undef. If the result is ambiguous (more than one record
6233             found) and ignore_ambiguous is set to a false value, return undef.
6234              
6235             The other options are taken into account as for any search:
6236             L</ignore_ambiguous>, L</trim>, L</case>, L</ignore_empty>.
6237              
6238             B<IMPORTANT>
6239              
6240             As opposed to L</search>, an unsuccessful C<search_1hr> will return C<undef>.
6241              
6242             B<IMPORTANT bis>
6243              
6244             If not yet done, this function causes the input to be read entirely and stored
6245             in-memory.
6246              
6247             Example:
6248              
6249             my $hr = $csv->search_1hr('LOGIN', $login);
6250             my $full_name = $hr->{'FIRSTNAME'} . ' ' . $hr->{'LASTNAME'};
6251              
6252             =head2 vlookup
6253              
6254             my $val = $csv->vlookup($searched_field, $value, $target_field, \%opts);
6255              
6256             Find the first record where C<$searched_field> contains C<$value> and out of
6257             this record, returns the value of C<$target_field>.
6258              
6259             C<\%opts> is optional. It is a hash of options for C<vlookup>:
6260              
6261             =over 4
6262              
6263             =item trim
6264              
6265             If true, ignore spaces before and after the values to search.
6266              
6267             If option is not present, use L</search_trim> attribute of object (default
6268             value: 1).
6269              
6270             =item case
6271              
6272             If true, do case sensitive searches.
6273              
6274             If option is not present, use L</search_case> attribute of object (default
6275             value: 0).
6276              
6277             =item ignore_empty
6278              
6279             If true, ignore empty values in the search. The consequence is that you won't be
6280             able to find empty values by searching it.
6281              
6282             If option is not present, use L</search_ignore_empty> attribute of object
6283             (default value: 1).
6284              
6285             =item ignore_accents
6286              
6287             If true, ignore accents in searches. For exampe, if C<ignore_accents> is set, a
6288             string like "élémentaire" will match "elementaire".
6289              
6290             If option is not present, use L</search_ignore_accents> attribute of object
6291             (default value: 1).
6292              
6293             B<NOTE>
6294              
6295             This option uses the function L</remove_accents> to build its internal hash
6296             tables. See L</remove_accents> help for more details.
6297              
6298             =item value_if_not_found
6299              
6300             Return value if vlookup finds nothing.
6301              
6302             If option is not present, use L</search_value_if_not_found> attribute of object
6303             (default value: undef).
6304              
6305             =item value_if_found
6306              
6307             Return value if vlookup finds something.
6308              
6309             If option is not present, use L</search_value_if_found> attribute of object
6310             (default value: none).
6311              
6312             This option is to just check whether a value exists, regardless of the target
6313             value found.
6314              
6315             B<NOTE>
6316              
6317             Although the B<$target_field> is ignored when using this option, you must
6318             specify it any way.
6319              
6320             =item value_if_ambiguous
6321              
6322             Return value if vlookup find more than one result. Tune it only if
6323             ignore_ambiguous is unset.
6324              
6325             If option is not present, use L</search_value_if_ambiguous> attribute of object
6326             (default value: undef).
6327              
6328             =item ignore_ambiguous
6329              
6330             If true, then if more than one result is found, silently return the first one.
6331              
6332             If option is not present, use L</search_ignore_ambiguous> attribute of object
6333             (default value: 1).
6334              
6335             =back
6336              
6337             B<IMPORTANT>
6338              
6339             If not yet done, this function causes the input to be read entirely and stored
6340             in-memory.
6341              
6342             Example:
6343              
6344             my $name = $csv->vlookup('LOGIN', $id, 'NAME',
6345             { value_if_not_found => '<login not found>' });
6346              
6347             =head2 remove_accents
6348              
6349             my $t = $csv->remove_accents($s);
6350              
6351             Take the string C<$s> as argument and return the string without accents. Uses a
6352             Unicode decomposition followed by removal of every characters that have the
6353             Unicode property C<Nonspacing_Mark>.
6354              
6355             B<NOTE>
6356              
6357             Only accents are removed. It is not a C<whatever-encoding -E<gt> us-ascii>
6358             conversion. For example, the French B<Å“> character (o followed by e) or the
6359             German B<ß> (eszett) are kept as is.
6360              
6361             B<NOTE bis>
6362              
6363             Tested with some latin1 and latin2 characters.
6364              
6365             B<NOTE ter>
6366              
6367             There is no language-level transformation during accents removal. For example
6368             B<Jürgen> is returned as B<Jurgen>, not B<Juergen>.
6369              
6370             This function is not exported by default.
6371              
6372             Example:
6373              
6374             use Text::AutoCSV qw(remove_accents);
6375             my $s = remove_accents("Français: être élémentaire, Tchèque: služba dům");
6376             die "This script will never die"
6377             if $s ne 'Francais: etre elementaire, Tcheque: sluzba dum';
6378              
6379             =head1 AUTHOR
6380              
6381             Sébastien Millet <milletseb@laposte.net>
6382              
6383             =head1 COPYRIGHT AND LICENSE
6384              
6385             This software is copyright (c) 2016, 2017 by Sébastien Millet.
6386              
6387             This is free software; you can redistribute it and/or modify it under
6388             the same terms as the Perl 5 programming language system itself.
6389              
6390             =cut