File Coverage

blib/lib/Text/AutoCSV.pm

Criterion	Covered	Total	%
statement	1455	1700	85.5
branch	672	930	72.2
condition	188	288	65.2
subroutine	109	115	94.7
pod	42	42	100.0
total	2466	3075	80.2

line	stmt	bran	cond	sub	pod	time	code
1							#!/usr/bin/perl
2							# ABSTRACT: helper module to automate the use of Text::CSV
3
4							# vim:tw=100
5
6							# Text/AutoCSV.pm
7
8							#
9							# Written by SÃ©bastien Millet
10							# March, July, August, September 2016
11							# January, February 2017
12							#
13
14							package Text::AutoCSV;
15							$Text::AutoCSV::VERSION = '1.1.8';
16							my $PKG = "Text::AutoCSV";
17
18	19			19		952532	use strict;
	19					48
	19					468
19	16			16		80	use warnings;
	16					32
	16					851
20
21							require Exporter;
22							our @ISA = 'Exporter';
23							our @EXPORT_OK = qw(remove_accents);
24
25	16			16		88	use Carp;
	16					41
	16					877
26	16			16		7773	use Params::Validate qw(validate validate_pos :types);
	16					126107
	16					3315
27	16			16		7961	use List::MoreUtils qw(first_index indexes);
	16					117145
	16					173
28	16			16		12604	use Fcntl qw(SEEK_SET);
	16					43
	16					828
29	16			16		7530	use File::BOM;
	16					421354
	16					1003
30	16			16		10630	use Text::CSV;
	16					243550
	16					827
31	16			16		12023	use DateTime;
	16					7394925
	16					869
32							# DateTime::Format::Strptime 1.70 does not work properly with us.
33							# Actually all version as of 1.63 are fine, except 1.70.
34	16			16		10948	use DateTime::Format::Strptime 1.71;
	16					956979
	16					145
35	16			16		11087	use Class::Struct;
	16					25547
	16					114
36	16			16		10377	use Unicode::Normalize;
	16					28388
	16					1065
37							# lock_keys is used to prevent accessing non existing keys
38							# Credits: 3381159 on http://stackoverflow.com
39							# "make perl shout when trying to access undefined hash key"
40	16			16		10330	use Hash::Util qw(lock_keys);
	16					58558
	16					106
41
42							# FIXME
43							# Not needed in release -> should be always commented unless at dev time
44							#use feature qw(say);
45							#use Data::Dumper;
46							#$Data::Dumper::Sortkeys = 1;
47
48							# Set to 1 if you wish to turn on debug without touching caller's code
49							our $ALWAYS_DEBUG = 0;
50
51							# Keep it set to 0 unless you know what you're doing!
52							# Note
53							# Taken into account only if debug is set.
54							my $DEBUG_DATETIME_FORMATS = 0;
55							# The below is taken into account only if $DEBUG_DATETIME_FORMATS is set.
56							# It'll resqult in even more debug output. It becomes really MASSIVE debug output.
57							my $DEBUG_DATETIME_FORMATS_EVEN_MORE = 0;
58
59							#
60							# Uncomment to replace carp and croak with cluck and confess, respectively
61							# Also reachable with perl option:
62							# -MCarp=verbose
63							# See 'perldoc Carp'.
64							#
65							#$Carp::Verbose = 1;
66
67
68							# * *************** *
69							# * BEHAVIOR TUNING *
70							# * *************** *
71
72
73							# * **************************************************** *
74							# * ALL THE VARIABLES BELOW ARE RATHER LOW LEVEL. *
75							# * IF YOU UPDATE IT, IT WILL LIKELY BREAK THE TEST PLAN *
76							# * **************************************************** *
77
78							my $DEF_SEARCH_CASE = 0; # Case insensitive search by default
79							my $DEF_SEARCH_TRIM = 1; # Trim values by default
80							my $DEF_SEARCH_IGNORE_ACCENTS = 1; # Ignore accents
81							my $DEF_SEARCH_IGNORE_EMPTY = 1; # Ignore empty strings in searches by default
82							my $DEF_SEARCH_VALUE_IF_NOT_FOUND = undef; # If not found, returned field value is undef
83							my $DEF_SEARCH_VALUE_IF_AMBIGUOUS = undef; # If more than one record found by search (when a
84							# unique value is expected), return undef
85							my $DEF_SEARCH_IGNORE_AMBIGUOUS = 1; # By default, ignore the fact that multiple records are
86							# found by search and return the first record found
87
88							my $DETECT_ENCODING = 1;
89
90							my $DEFAULT_IN_ENCODING = 'UTF-8,latin1';
91
92							# By default, input encoding detected is used for output.
93							# -> the constant below is used if and only if:
94							# Inbound encoding is unknown
95							# No providing of out_encoding attribute (out_encoding takes precedence when provided)
96							my $DEFAULT_OUT_ENCODING = 'UTF-8';
97
98							my $DEFAULT_ESCAPE_CHAR = '\\';
99							my $DEFAULT_QUOTE_CHAR = '"';
100
101							#
102							# The code that workarounds $FIX_PERLMONKS_823214 (see below) makes sense only under plain
103							# Windows.
104							#
105							# "Plain" Windows?
106							# This code MUST NOT be executed under cygwin because cygwin uses unix line breaks. This is
107							# why we detect /mswin/. Would we detect /win/, we'd catch cygwin, too, and we don't want
108							# that.
109							#
110							my $OS_IS_PLAIN_WINDOWS = !! ($^O =~ /mswin/i);
111
112							#
113							# Shall we fix the issue reported as #823214 in PerlMonks? See
114							# http://www.perlmonks.org/?node_id=823214
115							#
116							# In brief (in case the link above would be broken one day):
117							# Under Windows, output mode set to UTF-16LE produces line breaks made of octets "0d 0a 00",
118							# whereas it should be "0d 00 0a 00".
119							#
120							# The code also fixes UTF-16BE (but it was not tested).
121							#
122							my $FIX_PERLMONKS_823214 = 1;
123
124
125							# * **** *
126							# * CODE *
127							# * **** *
128
129
130							sub ERR_UNKNOWN_FIELD() { 0 }
131
132							# Store meta-data about each column
133							struct ColData => {
134							field_name => '$',
135							header_text => '$',
136							description => '$',
137							dt_format => '$',
138							dt_locale => '$'
139							};
140
141							#
142							# Enumeration of ef_type member below
143							# Alternative:
144							# use enum (...)
145							#
146							# But it is not also by default on my distro and installing a package for 3 constants, I find it
147							# a bit overkill!
148							#
149							my ($EF_LINK, $EF_FUNC, $EF_COPY) = 0..2;
150							struct ExtraField => {
151							ef_type => '$',
152							self_name => '$',
153							description => '$',
154
155							check_field_existence => '$',
156
157							# For when ef_type is set to $EF_LINK
158
159							link_self_search => '$',
160							link_remote_obj => '$',
161							link_remote_search => '$',
162							link_remote_read => '$',
163							link_vlookup_opts => '%',
164
165							# For when ef_type is set to $EF_FUNC
166
167							func_sub => '$',
168
169							# For when ef_type is set to $EF_COPY
170
171							copy_source => '$',
172							copy_sub => '$'
173
174							};
175
176							my $SEARCH_VALIDATE_OPTIONS = {
177							value_if_not_found => {type => UNDEF \| SCALAR, optional => 1},
178							value_if_found => {type => UNDEF \| SCALAR, optional => 1},
179							value_if_ambiguous => {type => UNDEF \| SCALAR, optional => 1},
180							ignore_ambiguous => {type => BOOLEAN, optional => 1},
181							case => {type => BOOLEAN, optional => 1},
182							trim => {type => BOOLEAN, optional => 1},
183							ignore_empty => {type => BOOLEAN, optional => 1},
184							ignore_accents => {type => BOOLEAN, optional => 1}
185							};
186
187							sub _is_utf8 {
188	416			416		1007	my $e = shift;
189
190	416	100				3836	return 1 if $e =~ m/^(utf-?8\|ucs-?8)/i;
191	32					101	return 0;
192							}
193
194							# To replace // in old perls: return the first non-undef value in provided list
195							sub _get_def {
196	12004			12004		25387	for (@_) {
197	18591	100				53806	return $_ if defined($_);
198							}
199	882					1896	return undef;
200							}
201
202							sub _print {
203	15			15		32	my $self = shift;
204	15					27	my $t = shift;
205
206	15					26	my $infoh = $self->{infoh};
207	15	50				55	return if ref $infoh ne 'GLOB';
208
209	0					0	print($infoh $t);
210							}
211
212							sub _printf {
213	17			17		31	my $self = shift;
214
215	17					32	my $infoh = $self->{infoh};
216	17	50				64	return if ref $infoh ne 'GLOB';
217
218	0					0	printf($infoh @_);
219							}
220
221							sub _print_warning {
222	67			67		145	my $self = shift;
223	67					140	my $warning_message = shift;
224	67					129	my $dont_wrap = shift;
225
226	67	100				232	my $msg = ($dont_wrap ? $warning_message : "$PKG: warning: $warning_message");
227	67	100				4925	carp $msg unless $self->{quiet};
228							}
229
230							sub _close_inh {
231	314			314		621	my $self = shift;
232
233	314	100				6037	close $self->{_inh} if $self->{_close_inh_when_finished};
234	314					1094	$self->{_inh} = undef;
235	314					1277	$self->{_close_inh_when_finished} = undef;
236							}
237
238							sub _close_outh {
239	126			126		278	my $self = shift;
240
241	126	50	66			18873	close $self->{outh} if defined($self->{outh}) and $self->{_close_outh_when_finished};
242	126					464	$self->{outh} = undef;
243	126					327	$self->{_close_outh_when_finished} = undef;
244							}
245
246							sub _print_error {
247	81			81		636	my ($self, $error_message, $dont_stop, $err_code, $err_extra) = @_;
248
249	81					294	my $msg = "$PKG: error: $error_message";
250
251	81	100	100			432	if (defined($err_code) and !$self->{quiet} and $self->{croak_if_error}) {
			100
252	5	50				21	if ($err_code == ERR_UNKNOWN_FIELD) {
253	5					11	my %f = %{$err_extra};
	5					29
254	5					14	my @cols;
255	5					20	for my $n (keys %f) {
256	15					41	$cols[$f{$n}] = $n;
257							}
258	5					19	$self->_print($self->get_in_file_disp() . " column - field name correspondance:\n");
259	5					18	$self->_print("COL # FIELD\n");
260	5					15	$self->_print("----- -----\n");
261	5					16	for my $i (0..$#cols) {
262	17	100				58	$self->_printf("%05d %s\n", $i, (defined($cols[$i]) ? $cols[$i] : ''));
263							}
264							} else {
265	0					0	confess "Unknown error code: '$err_code'\n";
266							}
267							}
268
269	81	100	100			380	if ($self->{croak_if_error} and !$dont_stop) {
270	30					119	$self->_close_read(1);
271	30					123	$self->_close_inh();
272	30					101	$self->_close_outh();
273	30					160	$self->_status_reset(1);
274	30					5202	croak $msg;
275							}
276	51					196	$self->_print_warning($msg, 1);
277							}
278
279							#
280							# Return the string passed in argument with all accents removed from characters.
281							# Do it in a rather general and reliable way, not tied to latin1.
282							# Tested on latin1 and latin2 character sets.
283							#
284							# Credits:
285							# http://stackoverflow.com/questions/17561839/remove-accents-from-accented-characters
286							#
287							sub remove_accents {
288	3144			3144	1	29323	validate_pos(@_, {type => SCALAR});
289
290	3144					8564	my $s = $_[0];
291	3144					13584	my $r = NFKD($s);
292	3144					8753	$r =~ s/\p{Nonspacing_Mark}//g;
293	3144					7763	return $r;
294							}
295
296							sub _detect_csv_sep {
297	255			255		572	my $ST_OUTSIDE = 0;
298	255					500	my $ST_INSIDE = 1;
299
300	255					779	my ($self, $escape_char, $quote_char, $sep) = @_;
301
302	255					553	my $_debugh = $self->{_debugh};
303	255					559	my $inh = $self->{_inh};
304	255					502	my $_debug = $self->{_debug};
305
306	255					526	delete $self->{_inh_header};
307
308	255	100				823	$escape_char = $DEFAULT_ESCAPE_CHAR unless defined($escape_char);
309
310	255	50				785	$self->_print_error("illegal \$escape_char: '$escape_char' (length >= 2)"), return 0
311							if length($escape_char) >= 2;
312
313	255	50				781	$self->_print_error("$PKG: error: illegal \$quote_char '$quote_char' (length >= 2)"), return 0
314							if length($quote_char) >= 2;
315
316	255	50				782	$escape_char = '--' if $escape_char eq '';
317	255	50				714	$quote_char = '--' if $quote_char eq '';
318
319							# FIXME (?)
320							# Avoid inlined magic values for separator auto-detection.
321							# Issue is, as you can see below, the behavior is also hard-coded and not straightforward to
322							# render 'tunable' ("," and ";" take precedence over "\t").
323	255					1286	my %Seps = (
324							";" => 0,
325							"," => 0,
326							"\t" => 0
327							);
328
329	255					3210	my $h = <$inh>;
330	255	50				2860	if ($self->{inh_is_stdin}) {
331	0					0	$self->{_inh_header} = $h;
332	0	0				0	print($_debugh "Input is STDIN => saving header line to re-read it " .
333							"later (in-memory)\n") if $_debug;
334							} else {
335	255					1800	seek $inh, 0, SEEK_SET;
336	255	50				860	print($_debugh "Input is not STDIN => using seek function to rewind " .
337							"read head after header line reading\n") if $_debug;
338							}
339
340	255					708	chomp $h;
341	255					520	my $status = $ST_OUTSIDE;
342	255					653	my $l = length($h);
343	255					481	my $c = 0;
344	255					782	while ($c < $l) {
345	4769					7927	my $ch = substr($h, $c, 1);
346	4769					6711	my $chnext = '';
347	4769	100				10870	$chnext = substr($h, $c + 1, 1) if ($c < $l - 1);
348	4769	100				10683	if ($status == $ST_INSIDE) {
		50
349	1521	50	66			4169	if ($ch eq $escape_char and $chnext eq $quote_char) {
		100
350	0					0	$c += 2;
351							} elsif ($ch eq $quote_char) {
352	197					307	$status = $ST_OUTSIDE;
353	197					435	$c++;
354							} else {
355	1324					2585	$c++;
356							}
357							} elsif ($status == $ST_OUTSIDE) {
358	3248	50	33			10777	if ($ch eq $escape_char and ($chnext eq $quote_char or
		100	66
		100
359							exists $Seps{$chnext})) {
360	0					0	$c += 2;
361							} elsif (exists $Seps{$ch}) {
362	930					1420	$Seps{$ch}++;
363	930					1990	$c++;
364							} elsif ($ch eq $quote_char) {
365	197					296	$status = $ST_INSIDE;
366	197					425	$c++;
367							} else {
368	2121					4746	$c++;
369							}
370							}
371							}
372
373	255	100	100			1852	if ($Seps{";"} == 0 and $Seps{","} >= 1) {
		100	66
		50	33
			33
374	144					348	$$sep = ",";
375	144					761	return 1;
376							} elsif ($Seps{","} == 0 and $Seps{";"} >= 1) {
377	102					220	$$sep = ";";
378	102					499	return 1;
379							} elsif ($Seps{","} == 0 and $Seps{";"} == 0 and $Seps{"\t"} >= 1) {
380	0					0	$$sep = "\t";
381	0					0	return 1;
382							} else {
383
384							# Check the case where there is one unique column, in which case,
385							# assume comma separator.
386	9					32	my $h_no_accnt = remove_accents($h);
387	9	100				64	if ($h_no_accnt =~ m/^[[:alnum:]_]+$/i) {
388	3					8	$$sep = ",";
389	3					15	return 1;
390							}
391
392	6					14	$$sep = "";
393	6	50				14	if ($_debug) {
394	0					0	for my $k (keys %Seps) {
395	0					0	print($_debugh "\$Seps{'$k'} = $Seps{$k}\n");
396							}
397							}
398	6					31	return 0;
399							}
400							}
401
402							sub _reopen_input {
403	633			633		1161	my $self = shift;
404
405	633					1191	my $in_file = $self->{in_file};
406
407	633					972	my $inh;
408	633	50				15399	if (!open($inh, "<", $in_file)) {
409	0					0	$self->_print_error("unable to open file '$in_file': $!");
410	0					0	return undef;
411							}
412	633	50				2252	if (!$self->{_leave_encoding_alone}) {
413
414							confess "Oups! _inh_encoding_string undef?"
415	633	50				1690	unless defined($self->{_inh_encoding_string});
416
417	633					4315	binmode $inh, $self->{_inh_encoding_string};
418							}
419
420	633					30469	return $inh;
421							}
422
423							# Abstraction layer, not useful Today, could bring added value when looking into Text::CSV I/O
424							sub _mygetline {
425	9078			9078		19491	my ($csvobj, $fh) = @_;
426
427	9078					210971	return $csvobj->getline($fh);
428							}
429
430							sub _detect_escape_char {
431	304			304		960	my ($self, $quote_char, $sep_char, $ref_escape_char, $ref_is_always_quoted) = @_;
432
433	304					670	my $in_file = $self->{in_file};
434	304					683	my $_debug = $self->{_debug};
435	304					579	my $_debugh = $self->{_debugh};
436
437	304					707	$$ref_escape_char = $DEFAULT_ESCAPE_CHAR;
438	304					582	$$ref_is_always_quoted = undef;
439
440	304	100				862	if ($self->{_int_one_pass}) {
441	13					37	return;
442							}
443
444	291					881	$self->_register_pass("detect escape character");
445
446	291					534	my $qesc = 0;
447	291					909	my $inh = $self->_reopen_input();
448	291	50				951	if (defined($inh)) {
449	291					4363	while (my $l = <$inh>) {
450	7088					17838	chomp $l;
451
452							# Very heuristic criteria...
453							# Tant pis.
454	7088	100				31105	$qesc = 1 if $l =~ m/(?<!$sep_char)$quote_char$quote_char(?!$sep_char)/;
455
456							}
457	291					2732	close $inh;
458							}
459	291	100				992	if ($qesc) {
460	7					20	$$ref_escape_char = '"';
461							} else {
462	284					735	$$ref_escape_char = '\\' ;
463							}
464
465	291					567	my $is_always_quoted = 0;
466	291					842	$inh = $self->_reopen_input();
467	291	50				1212	if (defined($inh)) {
468	291					3694	my $csv = Text::CSV->new({sep_char => $sep_char,
469							allow_whitespace => 1, binary => 1, auto_diag => 0,
470							quote_char => $quote_char, escape_char => $$ref_escape_char,
471							keep_meta_info => 1,
472							allow_loose_escapes => 1});
473	291					65381	$is_always_quoted = 1;
474	291					1004	while (my $ar = _mygetline($csv, $inh)) {
475	1628					57281	my @a = @{$ar};
	1628					4629
476	1628					2907	my $e = $#a;
477	1628					3473	for my $i (0..$e) {
478	11018	100				91320	$is_always_quoted = 0 unless $csv->is_quoted($i);
479							}
480	1628	100				17517	last unless $is_always_quoted;
481							}
482	291	100				2473	my $is_ok = ($csv->eof() ? 1 : 0);
483	291					5639	close $inh;
484							}
485
486	291	50				1229	print($_debugh " is_always_quoted: $is_always_quoted\n") if $_debug;
487	291					616	$$ref_is_always_quoted = $is_always_quoted;
488
489	291					1317	return;
490							}
491
492							sub _register_pass {
493	950			950		2193	my ($self, $pass_name) = @_;
494	950					1756	my $_debug = $self->{_debug};
495	950					1642	my $_debugh = $self->{_debugh};
496
497	950					1916	$self->{_pass_count}++;
498
499	950	50				2781	return unless $_debug;
500
501	0					0	print($_debugh "Pass #" . $self->{_pass_count} . " ($pass_name) done\n");
502							}
503
504							sub _update_in_mem_record_count {
505	501			501		1407	my ($self, $nonexistent_arg) = @_;
506	501					1062	my $_debug = $self->{_debug};
507	501					1061	my $_debugh = $self->{_debugh};
508
509	501	50				1427	confess "Hey! what is this second argument?" if defined($nonexistent_arg);
510
511	501					910	my $new_count = $#{$self->{_flat}} + 1;
	501					1381
512
513	501					1002	my $updated_max = 0;
514	501	100				1776	if ($new_count > $self->get_max_in_mem_record_count()) {
515	153					637	$self->_set_max_in_mem_record_count($new_count);
516	153					310	$updated_max = 1;
517							}
518
519	501					1528	$self->{_in_mem_record_count} = $new_count;
520	501	50				1687	if ($_debug) {
521	0					0	print($_debugh "_in_mem_record_count updated, set to $new_count");
522	0	0				0	print($_debugh " (also updated max)") if $updated_max;
523	0					0	print($_debugh "\n");
524							}
525							}
526
527							sub _detect_inh_encoding {
528	313			313		1377	my ($self, $enc, $via, $in_file, $detect_enc) = @_;
529	313					785	my $_debug = $self->{_debug};
530	313					665	my $_debugh = $self->{_debugh};
531
532	313	100	66			1312	$enc = $DEFAULT_IN_ENCODING if !defined($enc) or $enc eq '';
533
534	313					2405	my @encodings = split(/\s,\s/, $enc);
535
536	313	50				1158	confess "Oups! No encoding to try?" if $#encodings < 0;
537
538	313	50				972	print($_debugh "[ST] _detect_inh_encoding(): start\n") if $_debug;
539
540	313					582	my $wrn = 0;
541	313					944	my $m;
542							my $m0;
543	313					0	my $ee;
544	313					808	for my $e (@encodings) {
545	325					693	$ee = $e;
546	325					856	my $viadef = _get_def($via, '');
547	325					1113	$m = ":encoding($e)$viadef";
548	325	100				1081	$m0 = $m unless defined($m0);
549
550	325	100				905	last unless $detect_enc;
551
552	313	50	33			1843	confess "Oups! in_file not defined?" if !defined($in_file) or $in_file eq '';
553
554	313	50				871	print($_debugh " Checking encoding '$e' / '$m'\n") if $_debug;
555	313					617	$wrn = 0;
556
557	313					1426	$self->_register_pass("check $e encoding");
558
559	313					643	my $utf8_bom = 0;
560	313	100				976	if (_is_utf8($e)) {
561	289	50				7500	if (open my $fh, '<:raw', $in_file) {
562	289					732	my $bom;
563	289					5712	read $fh, $bom, 3;
564	289	100	66			1963	if (length($bom) == 3 and $bom eq "\xef\xbb\xbf") {
565	12	100				84	if (!defined($via)) {
566	10					35	$m .= ":via(File::BOM)";
567							}
568							}
569	289					3955	close $fh;
570							}
571							}
572
573	313					753	my $inh;
574	313	50				6191	if (!open($inh, "<", $in_file)) {
575	0					0	$self->_print_error("unable to open file '$in_file': $!");
576	0					0	return ($encodings[0], $m0);
577							}
578	15			15		116	binmode $inh, $m;
	15					32
	15					102
	313					4070
579
580							# TURN OFF WARNINGS OUTPUT
581
582							{
583	313					42788	local $SIG{__WARN__} = sub {
584	51			51		746	$wrn++;
585							# Uncomment only for debug!
586							# Otherwise you'll get quite a good deal of output at each execution :-)
587							# print(STDERR @_);
588	313					2604	};
589	313					4928	while (<$inh>) { }
590							}
591
592							# WARNINGS ARE BACK ON
593
594	313					18097	close $inh;
595	313	50				1212	print($_debugh " '$m' counts $wrn warning(s)\n") if $_debug;
596
597	313	100				1608	last if $wrn == 0;
598							}
599
600	313	50				1086	if ($wrn >= 1) {
601	0					0	$self->_print_warning("encoding warnings encountered during initial check, " .
602							"using '$encodings[0]'");
603	0					0	return ($encodings[0], $m0);
604							}
605
606	313	50				958	confess "Oups! undef encoding string?" unless defined($m);
607
608	313	50				873	print($_debugh " Detected encoding string '$ee' / '$m'\n") if $_debug;
609	313					1561	return ($ee, $m);
610							}
611
612							#
613							# Each of these functions brings status to the next value (current status + 1).
614							# Each of these functions returns 0 if an error occured, 1 if all good
615							#
616							my @status_forward_functions = (
617							"_S1_init_input", # To go from S0 to S1
618							"_S2_init_fields_from_header", # To go form S1 to S2
619							"_S3_init_fields_extra", # To go from S2 to S3
620							"_S4_read_all_in_mem", # To go from S3 to S4
621							);
622
623							sub _status_reset {
624	487			487		1010	my $self = shift;
625
626	487					3754	validate_pos(@_, {type => SCALAR, optional => 1});
627	487					2042	my $called_from_print_error = _get_def($_[0], 0);
628
629	487	100	100			2351	if (defined($self->{_status}) and $self->{_status} == 4) {
630	18	100				65	unless ($called_from_print_error) {
631	16					39	my $msg = "in-memory CSV content discarded, will have to re-read input";
632	16					65	$self->_print_warning($msg);
633							}
634	18					93	$self->{_flat} = [ ];
635	18					173	$self->_update_in_mem_record_count();
636							}
637
638	487					1193	$self->{_status} = 0;
639	487	100				1446	return 0 if $called_from_print_error;
640	457					1328	return $self->_status_forward('S1');
641							}
642
643							sub _status_forward {
644	4017			4017		6996	my $self = shift;
645
646	4017					10067	return $self->___status_move(@_, 1);
647							}
648
649							sub _status_backward {
650	139			139		269	my $self = shift;
651
652	139					399	return $self->___status_move(@_, -1);
653							}
654
655							# You should not call ___status_move() in the code, that is why the name is prefixed with 3
656							# underscores! Only _status_forward and _status_backward should call it.
657							sub ___status_move {
658	4156			4156		9007	my ($self, $target, $step) = @_;
659
660	4156					7854	my $_debug = $self->{_debug};
661	4156					7283	my $_debugh = $self->{_debugh};
662
663	4156	50	66			25252	if (!defined($step) or ($step != -1 and $step != 1)) {
			33
664	0					0	confess "Oups! \$step has a wrong value: '$step'";
665							}
666
667	4156					6741	my $n;
668	4156	50				20288	confess "Oups! illegal status string: '$target'" unless ($n) = $target =~ m/^S(\d)$/;
669
670	4156	100				11469	if ($self->{_read_in_progress}) {
671	1					6	$self->_print_error("illegal call while read is in progress, " .
672							"would lead to infinite recursion", 0);
673	0					0	confess "Aborted.";
674							}
675
676	4155	100				10067	if ($step == -1) {
677	139	100				527	if ($n < $self->{_status}) {
678	19	100				76	if ($self->{_status} == 4) {
679	16	50				72	print($_debugh "[ST] Requested status $n but will go to status 0\n") if $_debug;
680	16					63	return $self->_status_reset();
681							}
682	3					6	$self->{_status} = $n ;
683	3	50				9	print($_debugh "[ST] New status: ". $self->{_status} . "\n") if $_debug;
684							}
685	123					446	return 1;
686							}
687
688	4016	100				11991	if ($self->{_status} < $n) {
689	856	50				2278	print($_debugh "[ST] Current status: ". $self->{_status} . "\n") if $_debug;
690							}
691
692	4016	100	100			13962	if ($self->{_status} <= 1 and $n >= 2 and $self->{_int_one_pass} and
			100
			100
693							$self->get_pass_count() >= 1) {
694	12					26	my $msg = "one_pass set, unable to read input again";
695	12	50				54	$self->_print_error($msg), return 0 if $self->{one_pass};
696	0	0				0	$self->_print_warning($msg) if !$self->{one_pass};
697							}
698
699	4004					10264	while ($self->{_status} < $n) {
700
701	1326					3229	my $funcname = $status_forward_functions[$self->{_status}];
702	1326	50				3316	confess "Oups! Unknown status?" unless defined($funcname);
703
704	1326	50				3175	print($_debugh "[ST] Now executing $funcname\n") if $_debug;
705
706	1326	50				6403	if (my $member_function = $self->can($funcname)) {
707	1326	100				3958	return 0 unless $self->$member_function();
708							} else {
709	0					0	confess "Could not find method $funcname in $PKG!";
710							}
711
712	1307					3121	$self->{_status} += $step;
713	1307	50				5235	print($_debugh "[ST] New status: ". $self->{_status} . "\n") if $_debug;
714							}
715
716	3985					12473	return 1;
717							}
718
719							sub new {
720	326			326	1	317556	my ($class, @args) = @_;
721
722	326					42533	@args = validate(@args,
723							{ in_file => {type => SCALAR, optional => 1},
724							infoh => {type => UNDEF \| GLOBREF, default => \*STDERR, optional => 1},
725							verbose => {type => BOOLEAN, default => 0, optional => 1},
726							quiet => {type => BOOLEAN, optional => 1},
727							croak_if_error => {type => BOOLEAN, default => 1, optional => 1},
728							inh => {type => GLOBREF, optional => 1},
729							in_csvobj => {type => OBJECT, optional => 1},
730							sep_char => {type => SCALAR, optional => 1},
731							quote_char => {type => SCALAR, optional => 1},
732							escape_char => {type => SCALAR, optional => 1},
733							has_headers => {type => BOOLEAN, default => 1, optional => 1},
734							out_has_headers => {type => UNDEF \| BOOLEAN, default => undef, optional => 1},
735							fields_ar => {type => ARRAYREF, optional => 1},
736							fields_hr => {type => HASHREF, optional => 1},
737							fields_column_names => {type => ARRAYREF, optional => 1},
738							search_case => {type => SCALAR, optional => 1},
739							search_trim => {type => SCALAR, optional => 1},
740							search_ignore_empty => {type => SCALAR, optional => 1},
741							search_ignore_accents => {type => SCALAR, optional => 1},
742							search_ignore_ambiguous => {type => SCALAR, optional => 1},
743							search_value_if_not_found => {type => SCALAR, optional => 1},
744							search_value_if_found => {type => SCALAR, optional => 1},
745							search_value_if_ambiguous => {type => SCALAR, optional => 1},
746							walker_hr => {type => CODEREF, optional => 1},
747							walker_ar => {type => CODEREF, optional => 1},
748							read_post_update_hr => {type => CODEREF, optional => 1},
749							write_filter_hr => {type => CODEREF, optional => 1},
750							out_filter => {type => CODEREF, optional => 1},
751							write_fields => {type => ARRAYREF, optional => 1},
752							out_fields => {type => ARRAYREF, optional => 1},
753							out_file => {type => SCALAR, optional => 1},
754							out_always_quote => {type => BOOLEAN, optional => 1},
755							out_sep_char => {type => SCALAR, optional => 1},
756							out_quote_char => {type => SCALAR, optional => 1},
757							out_escape_char => {type => SCALAR, optional => 1},
758							out_dates_format => {type => SCALAR, optional => 1},
759							out_dates_locale => {type => SCALAR, optional => 1},
760							encoding => {type => SCALAR, optional => 1},
761							via => {type => SCALAR, optional => 1},
762							out_encoding => {type => SCALAR, optional => 1},
763							dont_mess_with_encoding => {type => BOOLEAN, optional => 1},
764							one_pass => {type => BOOLEAN, optional => 1},
765							no_undef => {type => BOOLEAN, optional => 1},
766							fields_dates => {type => ARRAYREF, optional => 1},
767							fields_dates_auto => {type => BOOLEAN, optional => 1},
768							dates_formats_to_try => {type => ARRAYREF, optional => 1},
769							dates_formats_to_try_supp => {type => ARRAYREF, optional => 1},
770							dates_ignore_trailing_chars => {type => BOOLEAN, optional => 1},
771							dates_search_time => {type => BOOLEAN, optional => 1},
772							dates_locales => {type => SCALAR, optional => 1},
773							out_utf8_bom => {type => SCALAR, optional => 1},
774							dates_zeros_ok => {type => SCALAR, default => 1, optional => 1},
775							_debug => {type => BOOLEAN, default => 0, optional => 1},
776							_debug_read => {type => BOOLEAN, default => 0, optional => 1},
777							_debug_extra_fields => {type => BOOLEAN, optional => 1},
778							_debugh => {type => UNDEF \| GLOBREF, optional => 1}
779							}
780							);
781
782	322					7777	my $self = { @args };
783
784	322					916	my @fields = keys %{$self};
	322					1651
785
786							# croak_if_error
787
788	322					1050	my $croak_if_error = $self->{croak_if_error};
789
790							# verbose and _debug management
791
792	322	50				1471	$self->{_debugh} = $self->{infoh} if !defined($self->{_debugh});
793	322	50				1109	$self->{_debug} = 1 if $ALWAYS_DEBUG;
794	322					701	my $_debug = $self->{_debug};
795	322	50				909	$self->{verbose} = 1 if $_debug;
796	322					690	my $verbose = $self->{verbose};
797
798	322					704	my $_debugh = $self->{_debugh};
799
800	322					763	bless $self, $class;
801
802							# fields_ar, fields_hr
803
804	322	100				1373	if (defined($self->{fields_ar}) +
805							defined($self->{fields_hr}) +
806							defined($self->{fields_column_names})
807							>= 2) {
808	1					4	$self->_print_error("mixed use of fields_ar, fields_hr and fields_column_names. " .
809							"Use one at a time.");
810							}
811	322	100	100			1316	if (defined($self->{fields_ar}) and !defined($self->{fields_hr})) {
812	2					4	my @f = @{$self->{fields_ar}};
	2					6
813	2					5	my %h;
814	2					5	for my $e (@f) {
815	6					18	$h{$e} = "^$e\$";
816							}
817	2					6	$self->{fields_hr} = \%h;
818							}
819	322	100				1020	if (!$self->{has_headers}) {
820	13	100				38	if (defined($self->{fields_ar})) {
821	1					4	$self->_print_error("fields_ar irrelevant if CSV file has no headers");
822	1					8	return undef;
823							}
824	12	100				35	if (defined($self->{fields_hr})) {
825	1					3	$self->_print_error("fields_hr irrelevant if CSV file has no headers");
826	1					10	return undef;
827							}
828							}
829
830							# in_file or inh
831
832	320					921	$self->{_flat} = [ ];
833
834	320					803	$self->{_read_update_after_hr} = { };
835	320					794	$self->{_write_update_before_hr} = { };
836
837	320					1364	$self->_update_in_mem_record_count();
838
839	320	100				1070	return undef unless $self->_status_reset();
840
841	311	50				899	$self->_debug_show_members() if $_debug;
842
843	311	100				1062	if ($self->{dates_zeros_ok}) {
844							$self->{_refsub_is_datetime_empty} = sub {
845	9626			9626		18500	my $v = $_[0];
846	9626	100				36790	if ($v !~ m/[1-9]/) {
847	4201	100				12200	return 1 if $v =~ m/^[^0:]*0+[^0:]+0+[^0:]+0+/;
848							}
849	9623					37148	return 0;
850							}
851	309					1937	}
852
853	311					2190	return $self;
854							};
855
856							#
857							# Return 0 if error, 1 if all good
858							#
859							# Do all low level activities associated to input:
860							# I/O init
861							# Detect encoding
862							# Detect CSV separator
863							# Detect escape character
864							#
865							sub _S1_init_input {
866	463			463		1087	my $self = shift;
867
868	463					1011	my $croak_if_error = $self->{croak_if_error};
869	463					914	my $_debug = $self->{_debug};
870	463					917	my $_debugh = $self->{_debugh};
871
872	463	100				1368	$self->{in_file} = '' unless defined($self->{in_file});
873	463					1160	$self->{_close_inh_when_finished} = 0;
874
875							$self->{_leave_encoding_alone} = $self->{dont_mess_with_encoding}
876	463	50				1342	if defined($self->{dont_mess_with_encoding});
877
878	463					1596	$self->{_int_one_pass} = _get_def($self->{one_pass}, 0);
879	463					1118	my $in_file_disp;
880
881
882							#
883							# LOW LEVEL INIT STEP 1 OF 4
884							#
885							# Manage I/O (= in most cases, open input file...)
886							#
887
888	463	100				1259	if (defined($self->{inh})) {
889	4	50				11	$self->{_leave_encoding_alone} = 1 unless defined($self->{dont_mess_with_encoding});
890	4					10	$in_file_disp = _get_def($self->{in_file}, '<?>');
891	4	50				13	$self->{_int_one_pass} = 1 unless defined($self->{one_pass});
892	4					6	$self->{_inh} = $self->{inh};
893							} else {
894	459	50				1568	$self->{_leave_encoding_alone} = 0 unless defined($self->{dont_mess_with_encoding});
895	459					1013	my $in_file = $self->{in_file};
896	459					769	my $inh;
897	459	50				1190	if ($in_file eq '') {
898	0					0	$inh = \*STDIN;
899	0					0	$self->{inh_is_stdin} = 1;
900	0	0				0	$self->{_int_one_pass} = 1 unless defined($self->{one_pass});
901	0					0	$in_file_disp = '<stdin>';
902							} else {
903	459	100				21694	if (!open($inh, '<', $in_file)) {
904	3					42	$self->_print_error("unable to open file '$in_file': $!");
905	3					48	return 0;
906							}
907	456					1412	$in_file_disp = $in_file;
908	456					1173	$self->{_close_inh_when_finished} = 1;
909							}
910	456					1226	$self->{_inh} = $inh;
911							}
912
913	460	50				1262	confess "Oups! in_file_disp not defined?" unless defined($in_file_disp);
914	460					1228	$self->{_in_file_disp} = $in_file_disp;
915
916
917							#
918							# LOW LEVEL INIT STEP 2 OF 4
919							#
920							# "Detection" of encoding
921							#
922							# WARNING
923							# As explained in the manual, it is a very partial and limited detection...
924							#
925
926	460	100				1405	unless ($self->{_leave_encoding_alone}) {
927	456	100				1323	unless ($self->{_init_input_already_called}) {
928							my ($e, $m) = $self->_detect_inh_encoding($self->{encoding}, $self->{via},
929	313	100				2098	$self->{in_file}, ($self->{_int_one_pass} ? 0 : $DETECT_ENCODING));
930	313					1203	$self->{_inh_encoding} = $e;
931	313					859	$self->{_inh_encoding_string} = $m;
932							}
933
934	456					3848	binmode $self->{_inh}, $self->{_inh_encoding_string};
935							print($_debugh "Input encoding: '" . $self->{_inh_encoding} . "' / '" .
936	456	50				23439	$self->{_inh_encoding_string} . "'\n") if $_debug;
937
938							}
939
940	460	100				1707	$self->{out_file} = '' unless defined($self->{out_file});
941
942
943							#
944							# LOW LEVEL INIT STEP 3 OF 4
945							#
946							# Detection of CSV separator and escape character
947							#
948
949	460					872	my $sep_char;
950	460					1047	my $escape_char = $self->{escape_char};
951	460	100				1535	$self->{quote_char} = $DEFAULT_QUOTE_CHAR unless defined($self->{quote_char});
952	460					968	my $quote_char = $self->{quote_char};
953	460	100				1362	unless (defined($self->{in_csvobj})) {
954	455	100				1383	if (defined($self->{sep_char})) {
955	200					442	$sep_char = $self->{sep_char};
956	200	50				568	print($_debugh "-- $in_file_disp: CSV separator set to \"") if $_debug;
957							} else {
958							# The test below (on _init_input_already_called) shoud be useless.
959							# Left for the sake of robustness.
960	255	50				770	unless ($self->{_init_input_already_called}) {
961	255	100				1099	if (!$self->_detect_csv_sep($escape_char, $quote_char, \$sep_char)) {
962	6					29	$self->_print_error("'$in_file_disp': cannot detect CSV separator");
963	0					0	return 0;
964							}
965	249	50				746	print($_debugh "-- $in_file_disp: CSV separator detected to \"") if $_debug;
966	249					686	$self->{sep_char} = $sep_char;
967							}
968							}
969	449	0				1168	print($_debugh ($sep_char eq "\t" ? '\t' : $sep_char) . "\"\n") if $_debug;
		50
970
971	449					796	my $is_always_quoted;
972	449	100				1316	unless (defined($self->{escape_char})) {
973	304					1346	$self->_detect_escape_char($quote_char, $sep_char, \$escape_char, \$is_always_quoted);
974	304					1177	$self->{escape_char} = $escape_char;
975	304					751	$self->{_is_always_quoted} = $is_always_quoted;
976							}
977
978	449					3655	$self->{_in_csvobj} = Text::CSV->new({sep_char => $sep_char,
979							allow_whitespace => 1, binary => 1, auto_diag => 0,
980							quote_char => $quote_char, escape_char => $escape_char,
981							allow_loose_escapes => 1});
982	449	50				88376	unless (defined($self->{_in_csvobj})) {
983	0					0	$self->_print_error("error creating input Text::CSV object");
984	0					0	return 0;
985							}
986
987							} else {
988	5					9	$self->{_in_csvobj} = $self->{in_csvobj};
989							}
990
991	454					1272	$self->{_init_input_already_called} = 1;
992
993	454					1598	return 1;
994							}
995
996							sub get_in_file_disp {
997	3640			3640	1	6004	my $self = shift;
998
999	3640					21415	validate_pos(@_);
1000
1001	3640					10961	my $in_file_disp = _get_def($self->{_in_file_disp}, '?');
1002	3640					7890	return $in_file_disp;
1003							}
1004
1005							sub get_sep_char {
1006	0			0	1	0	my $self = shift;
1007
1008	0					0	validate_pos(@_);
1009
1010	0					0	return $self->{sep_char};
1011							}
1012
1013							sub get_escape_char {
1014	9			9	1	255	my $self = shift;
1015
1016	9					79	validate_pos(@_);
1017
1018	9					84	return $self->{escape_char};
1019							}
1020
1021							sub get_in_encoding {
1022	27			27	1	6035	my $self = shift;
1023
1024	27					208	validate_pos(@_);
1025
1026	27					97	return _get_def($self->{_inh_encoding}, '');
1027							}
1028
1029							sub get_is_always_quoted {
1030	15			15	1	435	my $self = shift;
1031
1032	15					138	validate_pos(@_);
1033
1034	15					130	return $self->{_is_always_quoted};
1035							}
1036
1037							sub get_pass_count {
1038	43			43	1	1156	my $self = shift;
1039
1040	43					275	validate_pos(@_);
1041
1042	43					170	return _get_def($self->{_pass_count}, 0);
1043							}
1044
1045							sub get_in_mem_record_count {
1046	0			0	1	0	my $self = shift;
1047
1048	0					0	validate_pos(@_);
1049
1050	0					0	return ($self->{_in_mem_record_count}, 0);
1051							}
1052
1053							sub get_max_in_mem_record_count {
1054	504			504	1	2270	my $self = shift;
1055
1056	504					3481	validate_pos(@_);
1057
1058	504					2620	return _get_def($self->{_max_in_mem_record_count}, 0);
1059							}
1060
1061							sub _set_max_in_mem_record_count {
1062	153			153		341	my $self = shift;
1063
1064	153					1697	validate_pos(@_, {type => SCALAR});
1065
1066	153					619	$self->{_max_in_mem_record_count} = $_[0];
1067							}
1068
1069							sub get_fields_names {
1070	19			19	1	3909	my $self = shift;
1071
1072	19					162	validate_pos(@_);
1073
1074	19	50				73	return () unless $self->_status_forward('S3');
1075	19					39	return @{$self->{_columns}};
	19					109
1076							}
1077
1078							sub get_field_name {
1079	1			1	1	609	my $self = shift;
1080
1081	1					12	validate_pos(@_, {type => SCALAR});
1082
1083	1					3	my ($n) = @_;
1084
1085	1	50				4	return undef unless $self->_status_forward('S3');
1086	1					5	return $self->{_columns}->[$n];
1087							}
1088
1089							sub get_coldata {
1090	0			0	1	0	my $self = shift;
1091
1092	0					0	validate_pos(@_);
1093
1094	0	0				0	return () unless $self->_status_forward('S3');
1095	0					0	my @ret;
1096	0					0	for (@{$self->{_coldata}}) {
	0					0
1097	0					0	push @ret, [
1098							$_->field_name,
1099							$_->header_text,
1100							$_->description,
1101							$_->dt_format,
1102							$_->dt_locale];
1103							}
1104
1105	0					0	return @ret;
1106							}
1107
1108							sub get_stats {
1109	2			2	1	12	my $self = shift;
1110
1111	2					13	validate_pos(@_);
1112
1113	2	50				9	return () unless defined($self->{_stats});
1114	2					3	return %{$self->{_stats}};
	2					10
1115							}
1116
1117							sub _debug_show_members {
1118	0			0		0	my ($self) = @_;
1119	0					0	my $_debugh = $self->{_debugh};
1120	0	0				0	my @a = @{$self->{fields_ar}} if defined($self->{fields_ar});
	0					0
1121	0	0				0	my @c = @{$self->{fields_column_names}} if defined($self->{fields_column_names});
	0					0
1122	0	0				0	my %h = %{$self->{fields_hr}} if defined($self->{fields_hr});
	0					0
1123
1124	0					0	print($_debugh "-- _debug_show_members() start\n");
1125	0					0	print($_debugh " croak_if_error $self->{croak_if_error}\n");
1126	0					0	print($_debugh " verbose $self->{verbose}\n");
1127	0					0	print($_debugh " _debug $self->{_debug}\n");
1128	0					0	print($_debugh " _debug_read $self->{_debug_read}\n");
1129	0					0	print($_debugh " infoh $self->{infoh}\n");
1130	0					0	print($_debugh " _debugh $_debugh\n");
1131	0					0	print($_debugh " inh: $self->{_inh}\n");
1132	0					0	print($_debugh " in_file_disp " . $self->get_in_file_disp() . "\n");
1133	0					0	print($_debugh " _in_csvobj $self->{_in_csvobj}\n");
1134	0					0	print($_debugh " has_headers $self->{has_headers}\n");
1135	0					0	print($_debugh " fields_ar:\n");
1136	0					0	for my $e (@a) {
1137	0					0	print($_debugh " '$e'\n");
1138							}
1139	0					0	print($_debugh " fields_hr:\n");
1140	0					0	for my $e (keys %h) {
1141	0					0	print($_debugh " '$e' => '$h{$e}'\n");
1142							}
1143	0					0	print($_debugh " fields_column_names:\n");
1144	0					0	for my $e (@c) {
1145	0					0	print($_debugh " '$e'\n");
1146							}
1147	0					0	print($_debugh "-- _debug_show_members() end\n");
1148							}
1149
1150							#
1151							# Check headers in CSV header line
1152							# Used to increase robustness by relying on header title rather than
1153							# column number.
1154							#
1155							# Return 1 if success (all fields found), 0 otherwise.
1156							#
1157							sub _process_header {
1158	12			12		27	my $self = shift;
1159	12					23	my @headers = @{shift(@_)};
	12					45
1160	12					25	my %fields_h = %{shift(@_)};
	12					80
1161	12					31	my $retval = shift;
1162
1163	12					22	my @tmp = keys %{$retval};
	12					30
1164
1165	12					38	my $in_file_disp = $self->get_in_file_disp();
1166
1167	12	50				46	confess '$_[4] must be an empty by-ref hash' if $#tmp >= 0;
1168
1169	12					23	my $e = 0;
1170	12					45	for my $k (keys %fields_h) {
1171	48					105	my $v = $fields_h{$k};
1172
1173	48			462		252	my @all_idx = indexes { /$v/i } @headers;
	462					3507
1174	48	50				210	if ($#all_idx >= 1) {
1175	0					0	$self->_print_error("file $in_file_disp: " .
1176							"more than one column matches the criteria '$v'");
1177	0					0	$e++;
1178							}
1179	48			258		223	my $idx = first_index { /$v/i } @headers;
	258					2172
1180	48	50				313	if ($idx < 0) {
1181	0					0	$self->_print_error("file $in_file_disp: unable to find field '$v'");
1182	0					0	$e++;
1183							} else {
1184	48					146	$retval->{$k} = $idx;
1185							}
1186							}
1187
1188	12	50				82	return ($e >= 1 ? 0 : 1);
1189							}
1190
1191							sub set_walker_hr {
1192	2			2	1	1928	my $self = shift;
1193	2					22	validate_pos(@_, {type => UNDEF \| CODEREF, optional => 1});
1194
1195	2					7	my ($walker_hr) = @_;
1196
1197	2	50				6	return undef unless $self->_status_forward('S2');
1198	2	50				6	return undef unless $self->_status_backward('S2');
1199	2					6	$self->{walker_hr} = $walker_hr;
1200
1201	2					5	return $self;
1202							}
1203
1204							sub set_walker_ar {
1205	2			2	1	3245	my $self = shift;
1206	2					31	validate_pos(@_, {type => UNDEF \| CODEREF, optional => 1});
1207
1208	2					8	my ($walker_ar) = @_;
1209
1210	2	50				8	return undef unless $self->_status_forward('S2');
1211	2	50				7	return undef unless $self->_status_backward('S2');
1212	2					6	$self->{walker_ar} = $walker_ar;
1213
1214	2					7	return $self;
1215							}
1216
1217
1218							# * *************************************** *
1219							# * BEGINNING OF DATE FORMAT DETECTION CODE *
1220							# * *************************************** *
1221
1222
1223							#
1224							# The '%m.%d.%y' is not at its "logical" location. It is done to make sure the order in which
1225							# entries are written does not impact the result.
1226							#
1227							# It could occur because there is some code that correlates an entry containing %y with another
1228							# one that would contain %Y. The %Y will be called the master, the %y will be called the slave.
1229							# It is important to match such entries, otherwise an identified format with %y would always be
1230							# ambiguous with the same written with %Y.
1231							#
1232							# IMPORTANT
1233							# The list below is written almost as-is in the POD at the bottom of this file.
1234							#
1235							my @DATES_DEFAULT_FORMATS_TO_TRY = (
1236							'',
1237							'%Y-%m-%d',
1238							'%Y.%m.%d',
1239							'%Y/%m/%d',
1240
1241							'%m.%d.%y',
1242
1243							'%m-%d-%Y',
1244							'%m.%d.%Y',
1245							'%m/%d/%Y',
1246							'%d-%m-%Y',
1247							'%d.%m.%Y',
1248							'%d/%m/%Y',
1249
1250							'%m-%d-%y',
1251							'%m/%d/%y',
1252							'%d-%m-%y',
1253							'%d.%m.%y',
1254							'%d/%m/%y',
1255
1256							'%Y%m%d%H%M%S',
1257
1258							# Localizaed formats
1259
1260							'%b %d, %Y',
1261							'%b %d %Y',
1262							'%b %d %T %Z %Y',
1263							'%d %b %Y',
1264							'%d %b, %Y'
1265							);
1266
1267							#
1268							# IMPORTANT
1269							# Under Linux, $START is useless. Strptime will match a format exactly as it is, and a tring
1270							# like "01/01/16 13:00:00" won't match with "%T". Under Windows, Strptime is capable of doing
1271							# a match by ignoring characters at the beginning, thus "01/01/2016 13:00:00" for example will
1272							# return success when matched against "%T".
1273							# Possibly it has to do with versionning of Strptime, not Linux versus Windows as such. Any
1274							# way, this difference had to be dealt with.
1275							#
1276							# The flexibility under Windows would screw the code logic so I had to add the prefix string
1277							# below, to avoid unexpected success on match.
1278							#
1279							my $START = '<';
1280
1281							struct RecordCounter => {
1282							count_ok => '$',
1283							count_ko => '$',
1284							has_searched_time => '$',
1285
1286							format => '$',
1287							locale => '$',
1288
1289							has_found_time => '$',
1290							format_with_addition_of_time => '$',
1291							locale_with_addition_of_time => '$',
1292							parser_with_addition_of_time => '$'
1293							};
1294
1295							struct Format => {
1296							id => '$',
1297							format => '$',
1298							locale => '$',
1299							parser => '$',
1300							index_slave => '$',
1301							index_master => '$'
1302							};
1303
1304							sub _col_dispname {
1305	530			530		1035	my ($self, $n) = @_;
1306
1307	530					778	my $col;
1308
1309							#
1310							# IMPORTANT
1311							#
1312							# We cannot execute here a command like
1313							# $self->_status_forward('S3');
1314							# (to ensure _columns is well defined) because _col_dispname is called by
1315							# _detect_dates_formats that is in turn called by _S3_init_fields_extra. A call to
1316							# _status_forward would trigger a never-ending call loop.
1317							#
1318	530					1478	my $cols = _get_def($self->{'_columns'}, $self->{'_S2_columns'});
1319
1320	530	50				1272	if ($self->{has_headers}) {
1321	530					1033	$col = $cols->[$n];
1322	530	50				1225	$col = "<UNDEF>" unless defined($col);
1323							} else {
1324	0					0	$col = "[$n]";
1325							}
1326	530					1243	return $col;
1327							}
1328
1329							# Used by test plan only...
1330							sub _dds {
1331	42			42		1909	my $self = shift;
1332
1333	42	50				115	return undef unless $self->_status_forward('S3');
1334	40	50				145	return undef unless defined($self->{_dates_detailed_status});
1335	40					341	return $self->{_dates_detailed_status};
1336							}
1337
1338							sub _detect_dates_formats {
1339	348			348		698	my $self = shift;
1340
1341	348	100				1150	return if $self->{_detect_dates_formats_has_run};
1342	281					785	$self->{_detect_dates_formats_has_run} = 1;
1343	281	100				894	my @fields_dates = @{$self->{fields_dates}} if defined($self->{fields_dates});
	11					75
1344	281	100	100			1755	return unless @fields_dates or $self->{fields_dates_auto};
1345
1346	51	50				214	if ($self->{_int_one_pass}) {
1347	0					0	$self->_print_error("date format detection disallowed when one_pass is set");
1348	0					0	return;
1349							}
1350
1351	51					113	my $_debug = $self->{_debug};
1352	51					105	my $_debugh = $self->{_debugh};
1353	51		33			153	my $debug_fmt = ($_debug and $DEBUG_DATETIME_FORMATS);
1354
1355	51					186	$self->_register_pass("detect date format");
1356
1357							#
1358							# Why re-opening the input?
1359							# I tried two other ways that never worked on some OSes (like freebsd) and/or with older perl
1360							# versions.
1361							#
1362							# 1) The "tell" tactic
1363							# Recording at the beginning of the function the file position with
1364							# my $pos = tell($self->{inh});
1365							# ... and then recalling with a seek instruction is the most logical.
1366							# But it didn't work = sometimes, reading would go back to first row (the headers) instead
1367							# of the second row, could not figure out why (it would work on my Ubuntu 16.04 / perl 5.22, but
1368							# would fail with other OSes and/or perl versions).
1369							#
1370							# 2) The "complete rewind" tactic
1371							# I then undertook to do (at the end of detection function):
1372							# seek $inh, 0, SEEK_SET;
1373							# $incsv->getline($inh) if $self->{has_headers};
1374							# based on the assumption that a seek to zero would behave differently from a seek to an
1375							# arbitrary position.
1376							# But still, it would sometimes fail....
1377							#
1378
1379	51					135	my $inh = $self->_reopen_input();
1380	51					112	my $incsv = $self->{_in_csvobj};
1381	51	50				249	_mygetline($incsv, $inh) if $self->{has_headers};
1382
1383	51					2412	my $formats_to_try = $self->{dates_formats_to_try};
1384	51					112	my $ignore_trailing_chars = $self->{dates_ignore_trailing_chars};
1385	51					115	my $search_time = $self->{dates_search_time};
1386	51					111	my $localizations = $self->{dates_locales};
1387
1388	51					103	my %regular_named_fields = %{$self->{_regular_named_fields}};
	51					438
1389
1390	51					203	my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty};
1391
1392	51					88	my @fields_to_detect_format;
1393	51	100				211	if (defined($self->{fields_dates})) {
		50
1394	11					23	my $count_field_not_found = 0;
1395	11					25	my %column_seen;
1396	11					25	for my $f (@{$self->{fields_dates}}) {
	11					43
1397	23	100				74	if (!exists $regular_named_fields{$f}) {
1398	1					17	$self->_print_error("fields_dates: unknown field: '$f'",
1399							1, ERR_UNKNOWN_FIELD, { %regular_named_fields } );
1400	1					64	$count_field_not_found++;
1401	1					5	next;
1402							}
1403	22					42	my $n = $regular_named_fields{$f};
1404	22	50				67	if (exists $column_seen{$n}) {
1405	0					0	$self->_print_warning("field '$f' already seen");
1406	0					0	next;
1407							}
1408	22					49	$column_seen{$n} = 1;
1409	22					52	push @fields_to_detect_format, $n;
1410							}
1411	11	100				50	$self->_print_error("non existent field(s) encountered, aborted") if $count_field_not_found;
1412							} elsif ($self->{fields_dates_auto}) {
1413	40					281	my @k = keys %regular_named_fields;
1414	40					228	@fields_to_detect_format = (0..$#k);
1415							} else {
1416	0					0	confess "Hey! check this code, man";
1417							}
1418
1419							#
1420							# FIXME?
1421							# Sort by column number of not?
1422							#
1423							# At this moment in time, the author inclines to answer "yes".
1424							# But I must admit it is rather arbitrary decision for now.
1425							#
1426	50					342	@fields_to_detect_format = sort { $a <=> $b } @fields_to_detect_format;
	744					1054
1427
1428	2					8	my @dates_formats_supp = @{$self->{dates_formats_to_try_supp}}
1429	50	100				172	if defined($self->{dates_formats_to_try_supp});
1430
1431	50	100				344	$formats_to_try = [ @DATES_DEFAULT_FORMATS_TO_TRY ] unless defined($formats_to_try);
1432	50					100	$formats_to_try = [ @{$formats_to_try}, @dates_formats_supp ];
	50					212
1433	50					138	my %seen;
1434	50					102	my $f2 = [ ];
1435	50					139	for (@${formats_to_try}) {
1436	912	50				1854	push @{$f2}, $_ unless exists($seen{$_});
	912					1468
1437	912					1843	$seen{$_} = undef;
1438							}
1439	50					103	$formats_to_try = $f2;
1440
1441	50	100				190	$ignore_trailing_chars = 1 unless defined($ignore_trailing_chars);
1442	50	100				145	$search_time = 1 unless defined($search_time);
1443
1444	50	100				146	my $stop = ($ignore_trailing_chars ? '' : '>');
1445
1446							#
1447							# The code below (from # AMB to # AMB-END) aims to remove ambiguity that comes from %Y versus %y.
1448							# That is: provided you have (among others) the formats to try
1449							# '%d-%m-%Y'
1450							# and
1451							# '%d-%m-%y'
1452							# then if parsing 4-digit-year dates (like '31-12-2016'), the two formats will work and you'll end
1453							# up with an ambiguity. To be precise, there'll be no ambiguity if the date is followed by a time,
1454							# but if the date is alone, both formats will work.
1455							#
1456							# Thanks to the below code, the member 'index_slave' (and its counterpart index_master) is populated
1457							# and later, if such an ambiguity is detected, the upper case version (the one containing upper case
1458							# '%Y') will be kept and the other one will be discarded.
1459							#
1460							# NOTE
1461							# Such an ambiguity can exist only when ignore_trailing_chars is set. Otherwise, the remaining two
1462							# digits make the date parsing fail in the '%y' case.
1463							#
1464							# The other members of the 'Format' object are used to work "normally", independently from this
1465							# ambiguity removal feature.
1466							#
1467
1468							# WIP = Work In Progress...
1469	50					88	my @formats_wip;
1470	50	100				438	my @locales = split(/,\s*/, $localizations) if defined($localizations);
1471	50					101	for my $f (@{$formats_to_try}) {
	50					149
1472	912	100				2392	my $has_localized_item = ($f =~ m/%a\|%A\|%b\|%B\|%c\|%\+/ ? 1 : 0);
1473	912	100	100			2189	unless (@locales and $has_localized_item) {
1474	902					1822	push @formats_wip, [$f, ''];
1475	902					1484	next;
1476							}
1477	10					36	push @formats_wip, [$f, $_] foreach @locales;
1478							}
1479
1480							# AMB
1481	50					108	my @formats;
1482							my %mates;
1483	50					177	for my $i (0..$#formats_wip) {
1484	922					2353	my $fstr = $formats_wip[$i]->[0];
1485	922					1580	my $floc = $formats_wip[$i]->[1];
1486
1487							# FIXME
1488							# Will not manage correctly a string like
1489							# '%%Y'
1490							# that means (when used with Strptime), the litteral string '%Y' with no substitution.
1491							# Such cases will be complicated to fix, as it'll require to do a kind-of
1492							# Strptime-equivalent parsing of the string, and I find it a bit overkill.
1493							#
1494							# I prefer to push back in caller world saying
1495							# "Hey, if using constructs like '%%Y', you'll be in trouble."
1496	922					1581	my $m = $fstr;
1497	922					3903	$m =~ s/%y//ig;
1498	922					1887	$m .= $floc;
1499
1500	922					1478	my $index_slave = -1;
1501	922					1428	my $index_master = -1;
1502	922	100				2317	if (exists $mates{$m}) {
1503	244					640	my $alt_fstr = $formats_wip[$mates{$m}]->[0];
1504	244	100				859	my $m_lower = ($fstr =~ m/%y/ ? 1 : 0);
1505	244	100				677	my $m_upper = ($fstr =~ m/%Y/ ? 1 : 0);
1506	244	100				639	my $a_lower = ($alt_fstr =~ m/%y/ ? 1 : 0);
1507	244	100				621	my $a_upper = ($alt_fstr =~ m/%Y/ ? 1 : 0);
1508
1509							# We ignore the weird cases where we'd have both %y and %Y in a format string.
1510
1511	244	100	66			2819	if (!$m_lower and $m_upper and $a_lower and !$a_upper) {
		50	66
			33
			33
			33
			33
1512	42					104	$index_slave = $mates{$m};
1513	42					720	$formats[$mates{$m}]->index_master($i);
1514							} elsif ($m_lower and !$m_upper and !$a_lower and $a_upper) {
1515	202					422	$index_master = $mates{$m};
1516	202					3565	$formats[$mates{$m}]->index_slave($i);
1517							}
1518
1519							} else {
1520	678					1660	$mates{$m} = $i;
1521							}
1522
1523	922					4366	my %strptime_opts = (pattern => $START . $fstr . $stop);
1524	922	100				2748	$strptime_opts{locale} = $floc if $floc ne '';
1525	922	100				6003	my $format = Format->new(
1526							id => "$i",
1527							format => $fstr,
1528							locale => $floc,
1529							parser => ($fstr ne '' ?
1530							DateTime::Format::Strptime->new(%strptime_opts) :
1531							undef),
1532							index_slave => $index_slave,
1533							index_master => $index_master
1534							);
1535	922					1130237	push @formats, $format;
1536							}
1537	50					242	for my $i (0..$#formats) {
1538	922					11421	my $format = $formats[$i];
1539
1540							# If a master could be itself the slave of another entry, that'd make it a hierarchical
1541							# relation tree with multiple levels. It is not possible, only a direct, unique
1542							# master-slave relation can be managed here.
1543	922	50	66			11660	confess "Inonsistent data, check this module's code urgently!"
1544							if $format->index_slave >= 0 and $format->index_master >= 0;
1545
1546	922	100				21329	if ($format->index_slave >= 0) {
1547	244					4429	my $mate = $formats[$format->index_slave];
1548	244	50	33			4186	if ($mate->index_master != $i or $mate->index_slave != -1) {
1549	0					0	confess "Inonsistent data (2), check this module's code urgently!"
1550							}
1551							}
1552
1553	922	100				21239	if ($format->index_master >= 0) {
1554	244					4486	my $mate = $formats[$format->index_master];
1555	244	50	33			4217	if ($mate->index_slave != $i or $mate->index_master != -1) {
1556	0					0	confess "Inonsistent data (3), check this module's code urgently!"
1557							}
1558							}
1559
1560							}
1561	50	50				574	if ($debug_fmt) {
1562	0					0	for (@formats) {
1563	0					0	my ($idx, $rel) = (-1, "");
1564	0	0				0	$idx = $_->index_slave, $rel = "S: " if $_->index_slave >= 0;
1565	0	0				0	$idx = $_->index_master, $rel = "M: " if $_->index_master >= 0;
1566	0					0	printf($_debugh "%-18s %s %2d", "'" . $_->format . "'", $rel, $idx);
1567	0	0				0	print($_debugh ": '" . $formats[$idx]->format . "'") if $idx >= 0;
1568	0					0	print($_debugh "\n");
1569							}
1570							}
1571							# AMB-END
1572
1573	50					151	my %records;
1574							my $record_number;
1575	50					104	my $count_gotit = 0;
1576	50					107	my $count_ambiguous = 0;
1577	50					105	my $count_nodate = 0;
1578	50					98	my $count_empty = 0;
1579	50					99	my $has_signaled_can_start_recording_data = 0;
1580	50					154	$self->{_line_after_which_recording_can_start} = 0;
1581
1582							#
1583							# Seems a weird optimization here, but it is very important.
1584							# In some cases, divides execution time (to detect date format on big files
1585							# containing numerous fields) by 10.
1586							#
1587							# When evaluates to true, it means the input column has no identified date format, meaning,
1588							# no further check to do.
1589							#
1590	50					122	my @cache_nodate;
1591
1592	50					208	while (my $f = _mygetline($incsv, $inh)) {
1593	4645					147657	$record_number++;
1594
1595	4645	50				10989	if ($debug_fmt) {
1596	0					0	print($_debugh "RECORD $record_number:\n");
1597	0					0	for (0 .. @$f - 1) { printf($_debugh " %02d: '%s'\n", $_, $f->[$_]); }
	0					0
1598							}
1599
1600	4645					9460	for my $n (@fields_to_detect_format) {
1601	31430	100				71512	next if $cache_nodate[$n];
1602
1603	16897					30214	my $v = $f->[$n];
1604	16897	100				35622	$v = '' unless defined($v);
1605	16897	100				39587	next if $v eq '';
1606	9395	100	100			34623	next if defined($refsub_is_datetime_empty) and $refsub_is_datetime_empty->($v);
1607
1608	9392	50				22468	if ($debug_fmt) {
1609	0					0	my $col = $self->_col_dispname($n);
1610	0					0	print($_debugh "Line $record_number, column '$col':\n");
1611							}
1612
1613	9392					18512	for my $fmt (@formats) {
1614	158222					2011791	my $fid = $fmt->id;
1615	158222					2724809	my $fstr = $fmt->format;
1616
1617	158222	50				1152556	$self->_debug_output_fmt('** pre ', $fmt, $records{$n}->{$fid}) if $debug_fmt;
1618
1619							$records{$n}->{$fid} = RecordCounter->new(
1620							count_ok => 0,
1621							count_ko => 0,
1622							has_searched_time => 0,
1623
1624							format => undef,
1625							locale => undef,
1626
1627							has_found_time => 0,
1628							format_with_addition_of_time => undef,
1629							locale_with_addition_of_time => undef,
1630							parser_with_addition_of_time => undef
1631	158222	100				501871	) unless defined($records{$n}->{$fid});
1632
1633	158222	100				2537923	unless ($records{$n}->{$fid}->count_ko) {
1634	19361					181657	my $is_ok = &_try_parser($fmt, $records{$n}->{$fid}, $START . $v . $stop);
1635
1636	19361	100				51956	if (!$is_ok) {
1637	7604					12924	my $give_up_time = 0;
1638	7604	100	66			98468	if ($records{$n}->{$fid}->count_ko == 0 and
			100
1639							$records{$n}->{$fid}->has_searched_time and
1640							$records{$n}->{$fid}->has_found_time) {
1641	77		100			4598	$give_up_time = (defined($fmt->parser) and
1642							defined($fmt->parser->parse_datetime($START . $v . $stop))
1643							?
1644							1 : 0);
1645	77	100				47156	if ($give_up_time) {
1646	4					92	$records{$n}->{$fid}->has_found_time(0);
1647	4					39	$is_ok = 1;
1648							}
1649							}
1650							}
1651
1652	19361	100	100			252839	if ($is_ok or !$ignore_trailing_chars) {
1653	14144		100			188133	my $incr = (defined($fmt->parser) and $is_ok ? 1: 0);
1654
1655	14144	100				306619	unless ($records{$n}->{$fid}->has_searched_time) {
1656	3141					58701	$records{$n}->{$fid}->has_searched_time(1);
1657
1658							croak "Inconsistent status! Issue in module code not in caller's!"
1659	3141	50				53907	if $records{$n}->{$fid}->count_ok != 0;
1660
1661	3141	100				27660	if ($search_time) {
		100
1662
1663	1965	50				5577	print($_debugh " Search time in '$v', format '$fstr'\n")
1664							if $debug_fmt;
1665
1666	1965					25839	my $t = $self->_guess_time_format($fstr, $fmt->locale, $v, $stop);
1667	1965	100				48455	$records{$n}->{$fid}->has_found_time((defined($t) ? 1 : 0));
1668	1965	100				22083	if (defined($t)) {
		100
1669	254					4087	$records{$n}->{$fid}->format_with_addition_of_time($t->[0]);
1670	254					5234	$records{$n}->{$fid}->locale_with_addition_of_time($t->[1]);
1671	254					5218	$records{$n}->{$fid}->parser_with_addition_of_time($t->[2]);
1672	254					2308	$incr = 1;
1673							} elsif ($fstr eq '') {
1674	191					3066	$records{$n}->{$fid}->count_ko(1);
1675							}
1676							} elsif ($fstr eq '') {
1677	78					1003	$records{$n}->{$fid}->count_ko(1);
1678							}
1679
1680							}
1681
1682	14144					258579	$records{$n}->{$fid}->count_ok($records{$n}->{$fid}->count_ok + $incr);
1683
1684	14144	100	100			212481	$records{$n}->{$fid}->count_ko($records{$n}->{$fid}->count_ko + 1)
1685							if !$incr and !$is_ok;
1686
1687	14144	100				57896	if ($incr) {
1688							# We remove the slave if master is fine.
1689							# Depending on the order in which parsing got done, the master could
1690							# pop up first, or the slave, that is why we need manage both cases.
1691	9155	100	100			120488	if ($fmt->index_slave >= 0 or $fmt->index_master >= 0) {
1692	6837	100				218610	my $has_slave = ($fmt->index_slave >= 0 ? 1 : 0);
1693	6837	100				129995	my $idx = ($has_slave ? $fmt->index_slave : $fmt->index_master);
1694	6837					121500	my $mate = $formats[$idx]->id;
1695	6837	100				60566	if (exists $records{$n}->{$mate}) {
1696	6686	100				16791	if ($has_slave) {
1697	2239	100				30513	if ($records{$n}->{$mate}->count_ko == 0) {
1698							# I am the master: I remove the slave
1699	2					50	$records{$n}->{$mate}->count_ko(1);
1700							}
1701							} else {
1702	4447	50	66			60285	if ($records{$n}->{$mate}->count_ko == 0 and
			66
1703							$records{$n}->{$mate}->count_ok >= 1 and
1704							$records{$n}->{$fid}->count_ko == 0) {
1705	114					6722	$records{$n}->{$fid}->count_ko(1);
1706							}
1707							}
1708							}
1709							}
1710							}
1711
1712							} else {
1713	5217					70104	$records{$n}->{$fid}->count_ko($records{$n}->{$fid}->count_ko + 1);
1714							}
1715							}
1716
1717	158222	50				1339039	$self->_debug_output_fmt(' post', $fmt, $records{$n}->{$fid}) if $debug_fmt;
1718
1719							}
1720							}
1721
1722	4645					8710	$count_gotit = 0;
1723	4645					7516	$count_ambiguous = 0;
1724	4645					7526	$count_empty = 0;
1725	4645					10816	for my $n (@fields_to_detect_format) {
1726	31430	100				71592	next if $cache_nodate[$n];
1727
1728	16897					25711	my $candidate = 0;
1729	16897					24086	my $tt = 0;
1730	16897					28237	for my $fmt (@formats) {
1731	258870					3239194	my $fid = $fmt->id;
1732	258870					1821426	my $rec = $records{$n}->{$fid};
1733	258870	100				574678	next unless defined($rec);
1734
1735	200048					2451503	my $ok = $rec->count_ok;
1736	200048					3426576	my $ko = $rec->count_ko;
1737
1738	200048	50	66			1752815	confess "Oups. Check this module code urgently!" if $ok == 0 and $ko == 0;
1739	200048					301631	$tt += $ok + $ko;
1740
1741	200048	100	100			549775	$candidate++ if $ok >= 1 and $ko == 0;
1742							}
1743	16897	100				42993	if ($candidate == 1) {
		100
		100
1744	9599					18745	$count_gotit++;
1745							} elsif ($candidate >= 2) {
1746	2269					5566	$count_ambiguous++;
1747							} elsif ($tt != 0) {
1748	222					417	$count_nodate++;
1749	222					572	$cache_nodate[$n] = 1;
1750							} else {
1751	4807					8012	$count_empty++;
1752							}
1753							}
1754
1755	4645	50				11977	if ($debug_fmt) {
1756	0					0	print($_debugh "\$count_gotit = $count_gotit\n");
1757	0					0	print($_debugh "\$count_ambiguous = $count_ambiguous\n");
1758	0					0	print($_debugh "\$count_nodate = $count_nodate\n");
1759	0					0	print($_debugh "\$count_empty = $count_empty\n");
1760							}
1761
1762	4645					7395	my $can_start_recording_data = 0;
1763	4645	100	100			27636	$can_start_recording_data = 1
			100
1764							if $count_gotit + $count_ambiguous + $count_nodate >= 1 and
1765							!$count_ambiguous and !$count_empty;
1766
1767	4645	100	100			32531	if ($can_start_recording_data and !$has_signaled_can_start_recording_data) {
1768	28					64	$has_signaled_can_start_recording_data = 1;
1769
1770	28	50				97	print($_debugh "Can start recording (all dates formats detection closed) " .
1771							"after record #$record_number\n") if $_debug;
1772
1773	28					98	$self->{_line_after_which_recording_can_start} = $record_number;
1774	28	100				559	last unless $self->{fields_dates_auto};
1775							}
1776							}
1777
1778	50					5167	close $inh;
1779
1780	50					190	my %dates_detailed_status;
1781							my @dates_formats;
1782	50					141	my $check_empty = 0;
1783	50					101	my $check_nodate = 0;
1784	50					104	my $check_ambiguous = 0;
1785	50					114	my $check_gotit = 0;
1786	50					145	for my $n (@fields_to_detect_format) {
1787	523					833	my @formats_ok;
1788	523					798	my $tt = 0;
1789	523					798	for my $fid (sort keys %{$records{$n}}) {
	523					4886
1790	8267					84350	my $rec = $records{$n}->{$fid};
1791	8267	100	100			99253	if ($rec->count_ok >= 1 and $rec->count_ko == 0) {
1792
1793	297					10743	my ($fstr, $floc) = ($rec->format, $rec->locale);
1794	297	100				6871	($fstr, $floc) = (
1795							$rec->format_with_addition_of_time,
1796							$rec->locale_with_addition_of_time
1797							) if $rec->has_found_time;
1798
1799	297					6572	push @formats_ok, [$fstr, $floc];
1800							}
1801	8267					147918	$tt += $rec->count_ok + $rec->count_ko;
1802							}
1803	523					5830	my $is_ok = 0;
1804	523					803	my $format;
1805	523					834	my $locale = '';
1806	523	100	100			2432	if ($#formats_ok < 0 and $tt == 0) {
		100
		100
1807	16					32	$format = "Z";
1808	16					30	$check_empty++;
1809							} elsif ($#formats_ok < 0) {
1810	222					378	$format = "N";
1811	222					318	$check_nodate++;
1812							} elsif ($#formats_ok > 0) {
1813	12					23	$format = "A";
1814	12					24	$check_ambiguous++;
1815							} else {
1816	273					457	$is_ok = 1;
1817	273					509	$format = $formats_ok[0]->[0];
1818	273					529	$locale = $formats_ok[0]->[1];
1819	273					580	$check_gotit++;
1820							}
1821	523					1318	my $col = $self->_col_dispname($n);
1822
1823	523	50				1922	$dates_detailed_status{$col} = $format unless exists $dates_detailed_status{$col};
1824	523	100	66			2593	$dates_formats[$n] = [ $format, $locale ] if $is_ok and !defined($dates_formats[$n]);
1825							}
1826	50					213	$dates_detailed_status{'.'} = $self->{_line_after_which_recording_can_start};
1827
1828	50	50	66			591	if ($check_empty != $count_empty or $check_nodate != $count_nodate or
			66
			33
1829							$check_ambiguous != $count_ambiguous or $check_gotit != $count_gotit) {
1830							# The below condition can happen with an empty CSV (empty file (no header) or
1831							# only a header line).
1832	1	50	33			21	unless (!$count_empty and !$check_nodate and !$count_nodate and
			33
			33
			33
			33
			33
1833							!$check_ambiguous and !$count_ambiguous and !$check_gotit and !$count_gotit) {
1834	0					0	print(STDERR "\$check_empty = $check_empty\n");
1835	0					0	print(STDERR "\$count_empty = $count_empty\n");
1836	0					0	print(STDERR "\$check_nodate = $check_nodate\n");
1837	0					0	print(STDERR "\$count_nodate = $count_nodate\n");
1838	0					0	print(STDERR "\$check_ambiguous = $check_ambiguous\n");
1839	0					0	print(STDERR "\$count_ambiguous = $count_ambiguous\n");
1840	0					0	print(STDERR "\$check_gotit = $check_gotit\n");
1841	0					0	print(STDERR "\$count_gotit = $count_gotit\n");
1842	0					0	confess "Oups! Check immediately this module code, man!";
1843							}
1844							}
1845
1846	50	50				148	if ($debug_fmt) {
1847							# A very detailed debug output
1848	0					0	for my $n (@fields_to_detect_format) {
1849	0					0	my $col = $self->_col_dispname($n);
1850	0					0	print($_debugh "$col\n");
1851	0					0	printf($_debugh " %-25s %3s %3s\n", "format", "OK", "KO");
1852	0					0	for my $fid (sort keys %{$records{$n}}) {
	0					0
1853	0					0	my $rec = $records{$n}->{$fid};
1854	0					0	my $cc = '';
1855	0	0	0			0	$cc = "(" . $rec->locale . ")" if defined($rec->locale) and $rec->locale ne '';
1856	0					0	printf($_debugh " %-25s %3d %3d\n",
1857							$rec->format . $cc, $rec->count_ok, $rec->count_ko);
1858							}
1859							}
1860							}
1861							# Not a typo - displaying it IN ADDITION to debug output above is done on purpose...
1862	50	50				202	if ($_debug) {
1863							# A shorter (as compared to above) output of outcome of DateTime detection
1864	0					0	print($_debugh "Result of DateTime detection:\n");
1865	0					0	printf($_debugh "%-3s %-25s %-30s %s\n", '###', 'FIELD', 'DATETIME FORMAT',
1866							'DATETIME LOCALE');
1867	0					0	for my $n (@fields_to_detect_format) {
1868	0					0	my ($fmt, $loc) = ('<undef>', '<undef>');
1869	0	0				0	if (defined($dates_formats[$n])) {
1870	0					0	($fmt, $loc) = @{$dates_formats[$n]}[0, 1];
	0					0
1871							}
1872	0					0	printf($_debugh "%03d %-25s %-30s %s\n", $n, $self->_col_dispname($n), $fmt, $loc);
1873							}
1874							}
1875
1876	50	100				178	if (!$self->{fields_dates_auto}) {
1877	10					28	my $e = 0;
1878	10					32	for my $n (@fields_to_detect_format) {
1879	20	100				69	next if defined($dates_formats[$n]);
1880	7					27	$self->_print_error("unable to detect DateTime format of field '" .
1881							$self->_col_dispname($n) . "'", 1);
1882	7					281	$e++;
1883							}
1884	10	100				57	$self->_print_error("$e field(s) encountered with unknown DateTime format") if $e;
1885							}
1886
1887	48					774	$self->{_dates_detailed_status} = { %dates_detailed_status };
1888	48					79659	$self->{_dates_formats} = [ @dates_formats ];
1889							}
1890
1891							sub _debug_output_fmt {
1892	0			0		0	my ($self, $prefix, $fmt, $rec) = @_;
1893
1894	0					0	my $_debugh = $self->{_debugh};
1895
1896	0					0	my ($fstr, $floc) = ($fmt->format, $fmt->locale);
1897	0	0	0			0	($fstr, $floc) = (
1898							'<+T>' . $rec->format_with_addition_of_time,
1899							$rec->locale_with_addition_of_time
1900							) if defined($rec) and $rec->has_found_time;
1901
1902	0					0	my $locstr = '';
1903	0	0	0			0	$locstr = "(" . $floc . ")" if defined($floc) and $floc ne '';
1904
1905	0	0				0	my $tmpok = $rec->count_ok if defined($rec);
1906	0	0				0	$tmpok = '<undef>' unless defined($tmpok);
1907	0	0				0	my $tmpko = $rec->count_ko if defined($rec);
1908	0	0				0	$tmpko = '<undef>' unless defined($tmpko);
1909
1910	0					0	print($_debugh "$prefix (format '$fstr$locstr': OK = $tmpok, KO = $tmpko)\n");
1911							}
1912
1913							# When no parse can be done (parser to test is undef), return 1
1914							sub _try_parser {
1915	19361			19361		41937	my ($fmt, $rec, $value_to_parse) = @_;
1916
1917	19361					248204	my $parser = $fmt->parser;
1918	19361	100				360345	$parser = $rec->parser_with_addition_of_time if $rec->has_found_time;
1919
1920	19361					219429	my $is_ok = 1;
1921	19361	100				81838	$is_ok = (defined($parser->parse_datetime($value_to_parse)) ? 1 : 0) if $parser;
		100
1922
1923	19361	100				7599175	unless (defined($rec->format)) {
1924	8267					153643	$rec->format($fmt->format);
1925	8267					265471	$rec->locale($fmt->locale);
1926							}
1927
1928	19361					296678	return $is_ok;
1929							}
1930
1931							sub _guess_time_format {
1932
1933							# IMPORTANT
1934							# Formats are tested in the order of the list below, and the first one that succeeds stops the
1935							# tests.
1936							# That makes the order of the elements important: %R would match any value that'd also match
1937							# %T, that'd cause to return %R whereas %T would be possible. Same with AM/PM formats. Thus
1938							# the longest patterns appear first.
1939	1965			1965		18953	my @T = (
1940							'%I:%M:%S %p',
1941							'%I:%M %p',
1942							'%I:%M:%S%p',
1943							'%I:%M%p',
1944							'%T',
1945							'%R'
1946							);
1947
1948	1965					5684	my ($self, $format, $locale, $v, $stop) = @_;
1949
1950	1965					4895	my $_debugh = $self->{_debugh};
1951	1965		33			6387	my $debug_fmt = ($self->{_debug} and $DEBUG_DATETIME_FORMATS);
1952
1953	1965	100				6396	return undef if $format =~ /:/;
1954
1955	1961					3296	my $sep;
1956	1961	100				6636	if ($format eq '') {
1957	316					663	$sep = '';
1958							} else {
1959	1645	100				12926	unless ((undef, $sep) = $v =~ /(^\|\d([^0-9:]+))(\d{1,2}):(\d{1,2})(\D\|$)/) {
1960	387	100				1046	if ($v =~ /\d{4}:\d{2}(\D\|$)/) {
1961	29					73	$sep = '';
1962							} else {
1963
1964	358	50				862	print($_debugh "_guess_time_format(): separator candidate not found in '$v'\n")
1965							if $debug_fmt;
1966
1967	358					945	return undef;
1968							}
1969							}
1970							}
1971	1603	100				5468	$sep = '' unless defined($sep);
1972
1973							#
1974							# IMPORTANT
1975							#
1976							# The code below allows to successfully detect DateTime format when
1977							# the first lines contain things like:
1978							# Jan 20 2017 2:00AM
1979							# that could lead to a separator set to ' ' while actually it should be ' '. In this case
1980							# if the double-space is kept, then a later value of
1981							# Jan 20 2017 10:00AM
1982							# won't be parsed.
1983							#
1984							# See t/11-bugfix.t, BUG 5, for an explanation of why the line below.
1985							#
1986
1987							# More generic code, but will also break some separators like ' ' (4 spaces)
1988							# $sep = substr($sep, 0, length($sep) - 1) if length($sep) >= 2 and substr($sep, -2) eq ' ';
1989	1603	100				4677	$sep = ' ' if $sep eq ' ';
1990
1991	1603	50				4046	if ($debug_fmt) {
1992	0					0	print($_debugh " _guess_time_format(): Searching time in '$v'\n");
1993							}
1994
1995	1603					3624	for my $t (@T) {
1996	9139					599709	my $increased_format = "$format$sep$t";
1997
1998	9139	50				22295	print($_debugh " _guess_time_format(): Trying format '$increased_format'\n") if $debug_fmt;
1999
2000	9139					31763	my %opts = (pattern => $START . $increased_format . $stop);
2001	9139	100	66			46360	$opts{locale} = $locale if defined($locale) and $locale ne '';
2002	9139					40199	my $parser_of_increased_format = DateTime::Format::Strptime->new(%opts);
2003	9139	100				11636796	next unless defined($parser_of_increased_format->parse_datetime($START . $v . $stop));
2004
2005	254	50				179457	if ($debug_fmt) {
2006	0					0	print($_debugh " _guess_time_format(): found time in '$v'\n");
2007	0					0	print($_debugh " Initial format: '$format'\n");
2008	0					0	print($_debugh " Increased format: '$increased_format'\n");
2009							}
2010
2011	254					1623	return [$increased_format, $locale, $parser_of_increased_format];
2012							}
2013	1349					113176	return undef;
2014							}
2015
2016
2017							# * ********************************* *
2018							# * END OF DATE FORMAT DETECTION CODE *
2019							# * ********************************* *
2020
2021
2022							# Take the string of a header in $_ and replace it with the corresponding field name
2023							sub _header_to_field_name {
2024	1246			1246		2749	$_ = remove_accents($_);
2025	1246					2756	s/[^[:alnum:]_]//gi;
2026	1246					7630	s/^.*$/\U$&/;
2027							}
2028
2029							# Return 0 if error, 1 if all good
2030							sub _S2_init_fields_from_header {
2031	349			349		737	my $self = shift;
2032
2033	349					804	my $has_headers = $self->{has_headers};
2034	349					734	my $_debug = $self->{_debug};
2035	349					756	my $_debugh = $self->{_debugh};
2036
2037	349					1108	my $in_file_disp = $self->get_in_file_disp();
2038
2039	349					739	my $inh = $self->{_inh};
2040	349					758	my $incsv = $self->{_in_csvobj};
2041
2042	349					990	$self->{_row_read} = 0;
2043
2044	349					754	my @columns;
2045							my @headers;
2046	349	100				1042	if ($has_headers) {
2047
2048							print($_debugh "$PKG: '$in_file_disp': will parse header line to get column names\n")
2049	338	50				1055	if $self->{_debug_read};
2050
2051	338					704	$self->{_row_read}++;
2052
2053							print($_debugh "$PKG: '$in_file_disp': will read line #" . $self->{_row_read} . "\n")
2054	338	50				1003	if $self->{_debug_read};
2055
2056	338	50				1067	if (defined($self->{_inh_header})) {
2057	0					0	my $l = $self->{_inh_header};
2058	0					0	my $inmemh;
2059	0	0				0	if (!open ($inmemh, '<', \$l)) {
2060	0					0	$self->_print_error("can't open header line in-memory. CSV read aborted.");
2061	0					0	return 0;
2062							}
2063	0					0	@headers = @{_mygetline($incsv, $inmemh)};
	0					0
2064							} else {
2065	338					893	my $r = _mygetline($incsv, $inh);
2066	336	50				17668	@headers = @{$r} if defined($r);
	336					1414
2067							}
2068							print($_debugh "Line " . $self->{_row_read} . ":\n--\n" . join('::', @headers) . "\n--\n")
2069	336	50				1322	if $self->{_debug_read};
2070							}
2071
2072	347	100	100			2083	if ($has_headers and !defined($self->{fields_column_names})) {
2073	330					677	my %indexes;
2074	330	100				978	if (defined($self->{fields_hr})) {
2075	12	50				65	if (!$self->_process_header(\@headers, $self->{fields_hr}, \%indexes)) {
2076	0					0	$self->_print_error("missing headers. CSV read aborted.");
2077	0					0	return 0;
2078							}
2079	12	50				40	if ($_debug) {
2080	0					0	print($_debugh " \%indexes:\n");
2081	0					0	for my $k (sort keys %indexes) {
2082	0					0	print($_debugh " \t$k => $indexes{$k}\n");
2083							}
2084							}
2085	12					75	for (sort keys %indexes) {
2086	48	50				122	next if $_ eq '';
2087	48					113	$columns[$indexes{$_}] = $_;
2088							}
2089							} else {
2090	318					1075	@columns = @headers;
2091	318					778	map { _header_to_field_name } @columns;
	1246					9060
2092							}
2093							}
2094
2095	347	100				1266	@columns = @{$self->{fields_column_names}} if defined($self->{fields_column_names});
	14					40
2096
2097							# Avoid undef in column names... I prefer empty strings
2098	347	100				1255	@columns = map { defined($_) ? $_ : '' } @columns;
	1370					3967
2099
2100	347	50				1185	if ($_debug) {
2101	0					0	print($_debugh "-- CSV headers management\n");
2102	0	0				0	if (@columns) {
2103	0					0	printf($_debugh " %-3s %-40s %-40s\n", 'COL', 'CSV Header', 'Hash Key');
2104	0					0	for my $i (0..$#columns) {
2105	0					0	my $h = '';
2106	0	0				0	$h = $headers[$i] if defined($headers[$i]);
2107	0					0	printf($_debugh " %03d %-40s %-40s\n", $i, "'$h'", "'$columns[$i]'");
2108							}
2109							} else {
2110	0					0	print($_debugh " No headers\n");
2111							}
2112							}
2113
2114	347					695	my %regular_named_fields;
2115	347					1170	for my $i (0..$#columns) {
2116	1370	100	66			6865	$regular_named_fields{$columns[$i]} = $i if defined($columns[$i]) and $columns[$i] ne '';
2117							}
2118	347					2023	$self->{_regular_named_fields} = { %regular_named_fields };
2119	347					1446	$self->{_S2_columns} = [ @columns ];
2120	347	100				1635	$self->{_S2_headers} = [ @headers ] if $has_headers;
2121
2122	347					1730	return 1;
2123							}
2124
2125							sub out_header {
2126	8			8	1	15	my $self = shift;
2127	8					131	validate_pos(@_, {type => SCALAR}, {type => SCALAR});
2128
2129	8					28	my ($field, $header) = @_;
2130	8	100				28	$self->{_out_headers} = { } unless exists $self->{_out_headers};
2131
2132							$self->_print_warning("out_header: field $field already set")
2133	8	50				19	if exists $self->{_out_headers}->{$field};
2134
2135	8					17	$self->{_out_headers}->{$field} = $header;
2136
2137	8					27	return $self;
2138							}
2139
2140							# Return 0 if error, 1 if all good
2141							sub _S3_init_fields_extra {
2142	349			349		738	my $self = shift;
2143
2144	349					783	my $_debug = $self->{_debug};
2145	349					721	my $_debugh = $self->{_debugh};
2146
2147	349					760	my $verbose = $self->{verbose};
2148
2149	349					676	my $has_headers = $self->{has_headers};
2150
2151	349					610	my %named_fields = %{$self->{_regular_named_fields}};
	349					1857
2152	349					821	my @columns = @{$self->{_S2_columns}};
	349					1176
2153	349	100				1019	my @headers = @{$self->{_S2_headers}} if $has_headers;
	338					1066
2154
2155	349					664	my @extra_fields_indexes;
2156	349	100				1173	my @extra_fields_definitions_list = @{$self->{_extra_fields}} if exists $self->{_extra_fields};
	36					135
2157	349					754	my %extra_fields_definitions;
2158
2159							my @coldata;
2160	349					1096	for my $i (0..$#columns) {
2161	1376					47229	my $col = $columns[$i];
2162	1376	100				3304	my $h = $headers[$i] if $has_headers;
2163	1376					24745	push @coldata, ColData->new(
2164							field_name => $col,
2165							header_text => $h,
2166							description => ''
2167							);
2168							}
2169
2170	349					16487	for my $edef (@extra_fields_definitions_list) {
2171	92					6463	my $c = $edef->check_field_existence;
2172	92	100				1234	if (defined($c)) {
2173	80	100				276	unless (exists $named_fields{$c}) {
2174	9					173	$self->_print_error("unknown field '" . $edef->check_field_existence . "'",
2175							0, ERR_UNKNOWN_FIELD, { %named_fields } );
2176	8					37	next;
2177							}
2178							}
2179
2180	83					247	my @e_eclated = $edef;
2181
2182	83	100	100			1621	if ($edef->ef_type == $EF_LINK and $edef->link_remote_read eq '*') {
2183	2					102	my @cols = $edef->link_remote_obj->get_fields_names();
2184
2185	2					7	@e_eclated = ();
2186	2					10	my %nf = %named_fields;
2187
2188	2					7	for my $c (@cols) {
2189
2190	4					84	my $ex_base = $edef->self_name . $c;
2191	4					40	my $ex_target = $ex_base;
2192	4					9	my $i = 1;
2193	4					19	while (exists $nf{$ex_target}) {
2194	1					3	$i++;
2195	1					61	$ex_target = $ex_base . '_' . $i;
2196							}
2197
2198	4					77	my $e = ExtraField->new(
2199							ef_type => $EF_LINK,
2200							self_name => $ex_target,
2201							description => $edef->description . " ($c)",
2202
2203							link_self_search => $edef->link_self_search,
2204							link_remote_obj => $edef->link_remote_obj,
2205							link_remote_search => $edef->link_remote_search,
2206							link_remote_read => $c,
2207
2208							link_vlookup_opts => $edef->link_vlookup_opts
2209							);
2210	4					522	push @e_eclated, $e;
2211	4					19	$nf{$ex_target} = undef;
2212							}
2213							}
2214
2215	83					2010	for my $e1 (@e_eclated) {
2216	85	100				1756	if (exists $named_fields{$e1->self_name}) {
2217	6					184	$self->_print_error("extra field: duplicate field name: '" . $e1->self_name . "'");
2218	6					36	next;
2219							}
2220
2221	79					905	my $index_of_new_element = $#columns + 1;
2222	79					193	push @extra_fields_indexes, $index_of_new_element;
2223	79					1463	$columns[$index_of_new_element] = $e1->self_name;
2224	79					2011	$named_fields{$e1->self_name} = $index_of_new_element;
2225	79					1987	$extra_fields_definitions{$e1->self_name} = $e1;
2226
2227	79	50				2104	push @headers, $e1->self_name if $has_headers;
2228	79					2057	push @coldata, ColData->new(
2229							field_name => $e1->self_name,
2230							header_text => $e1->self_name,
2231							description => $e1->description
2232							);
2233							}
2234
2235							}
2236	348	100				4319	$self->{_headers} = [ @headers ] if $has_headers;
2237	348					1295	$self->{_extra_fields_indexes} = [ @extra_fields_indexes ];
2238	348					1375	$self->{_columns} = [ @columns ];
2239	348					1229	$self->{_extra_fields_definitions} = { %extra_fields_definitions };
2240
2241	348					2011	$self->{_named_fields} = { %named_fields };
2242
2243	348					1694	$self->_detect_dates_formats();
2244
2245	345					1046	$self->{_read_update_after_ar} = [ ];
2246	345					3959	$self->{_write_update_before_ar} = [ ];
2247	345	100				3326	my @dates_formats = @{$self->{_dates_formats}} if defined($self->{_dates_formats});
	57					286
2248	345					1224	for my $i (0..$#columns) {
2249	1436					2591	my $dt_format;
2250							my $dt_locale;
2251	1436	100				3506	if (defined($dates_formats[$i])) {
2252	329					674	$dt_format = $dates_formats[$i]->[0];
2253	329					607	$dt_locale = $dates_formats[$i]->[1];
2254							}
2255	1436					23769	$coldata[$i]->dt_format($dt_format);
2256	1436					27651	$coldata[$i]->dt_locale($dt_locale);
2257
2258	1436	100				11583	next unless defined($dt_format);
2259
2260	329					540	my %opts_in;
2261	329	100	66			1663	$opts_in{locale} = $dt_locale if defined($dt_locale) and $dt_locale ne '';
2262
2263	329					1443	my $obj_strptime_in = DateTime::Format::Strptime->new(pattern => $dt_format, %opts_in);
2264
2265	329					379557	my %opts_out;
2266	329	50				1038	my $loc_out = (exists $self->{out_dates_locale} ? $self->{out_dates_locale} : $dt_locale);
2267	329	100	66			1922	$opts_out{locale} = $loc_out if defined($loc_out) and $loc_out ne '';
2268							my $obj_strptime_out = DateTime::Format::Strptime->new(
2269	329	50				1534	pattern => (exists $self->{out_dates_format} ? $self->{out_dates_format} :$dt_format),
2270							%opts_out
2271							);
2272
2273	329					356151	my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty};
2274	329					1123	my $in_file_disp = $self->get_in_file_disp();
2275
2276							$self->{_read_update_after_ar}->[$i] = sub {
2277	352	100	66	352		2215	return undef if !defined($_) or $_ eq '' or
			33
			66
2278							(defined($refsub_is_datetime_empty) and $refsub_is_datetime_empty->($_));
2279
2280	315					585	my $s = $_[0];
2281	315					741	my $field = _get_def($_[1], '<?>');
2282
2283	315					1028	my $dt = $obj_strptime_in->parse_datetime($_);
2284
2285	315	0	33			175356	if ($_debug and $DEBUG_DATETIME_FORMATS and $DEBUG_DATETIME_FORMATS_EVEN_MORE) {
			33
2286	0	0				0	print($_debugh "-- Record " . $s->get_recnum() .
2287							", field '$field':\n String parsed: '$_'\n Parse format: '$dt_format'\n" .
2288							" DateTime obj: <" . (defined($dt) ? $dt . '' : 'undef') . ">\n");
2289							}
2290
2291	315	100				753	if (!defined($dt)) {
2292	2					11	my $recnum = $s->get_recnum();
2293	2	50				8	if ($verbose) {
2294	0					0	$s->_print("$PKG: " .
2295							"$in_file_disp: record $recnum: field $field: unable to parse DateTime\n");
2296	0					0	$s->_print("$PKG: field: '$_'\n");
2297	0					0	$s->_print("$PKG: format: '$dt_format'\n");
2298	0	0				0	$s->_print("$PKG: " .
2299							"locale: '" . ($dt_locale eq '' ? '<none>' : $dt_locale) . "'\n");
2300	0					0	$s->_print("$PKG: " .
2301							"Probable cause: when detecting DateTime format, $PKG will stop reading\n");
2302	0					0	$s->_print("$PKG: " .
2303							"input as soon as the format is worked out. If a value found later\n");
2304	0					0	$s->_print("$PKG: " .
2305							"turns out to use another DateTime format, it'll generate a DateTime\n");
2306	0					0	$s->_print("$PKG: parse error, as is the case now.\n");
2307	0					0	$s->_print_error("unable to parse DateTime");
2308							} else {
2309	2					18	$s->_print_error("$in_file_disp: record $recnum: field $field: " .
2310							"unable to parse DateTime '$_'");
2311							}
2312							}
2313
2314	314					651	return $dt;
2315	329					2387	};
2316							$self->{_write_update_before_ar}->[$i] = sub {
2317	96	100		96		240	return '' unless defined($_);
2318	87	100				211	return $_ if !ref($_);
2319	81	50				297	return $_ unless $_->isa('DateTime');
2320
2321	81					268	my $str = $obj_strptime_out->format_datetime($_);
2322
2323	81	50				16151	if (!defined($str)) {
2324	0					0	my $s = $_[0];
2325	0					0	my $recnum = $s->get_recnum();
2326	0					0	my $field = _get_def($_[1], '<?>');
2327	0					0	$s->_print_error("$in_file_disp: record $recnum: field $field: " .
2328							"unable to print DateTime '$_'")
2329							}
2330
2331	81					188	return $str;
2332	329					1696	};
2333							}
2334
2335	345					1234	$self->{_coldata} = [ @coldata ];
2336
2337	345					2110	my @loop = (
2338							['_read_update_after_hr', '_read_update_after_ar', 'read post'],
2339							['_write_update_before_hr', '_write_update_before_ar', 'write pre']
2340							);
2341	345					1143	for my $ii (0..$#loop) {
2342	690					1360	my $l = $loop[$ii];
2343
2344	690					1583	my $ht = $self->{$l->[0]};
2345	690					1112	my @subrefs = @{$self->{$l->[1]}};
	690					1821
2346	690					1171	for my $field (keys %{$ht}) {
	690					1776
2347	70	50				192	unless (exists $named_fields{$field}) {
2348	0					0	$self->_print_error($l->[2] . ": unknown field '$field'",
2349							0, ERR_UNKNOWN_FIELD, { %named_fields } );
2350	0					0	next;
2351							}
2352
2353	70					120	my $i = $named_fields{$field};
2354
2355	70					96	my @allsubs;
2356	70					108	push @allsubs, @{$ht->{$field}};
	70					135
2357	70	100				174	if (defined($subrefs[$i])) {
2358	2	50				8	unshift @allsubs, $subrefs[$i] if $ii == 0;
2359	2	50				6	push @allsubs, $subrefs[$i] if $ii == 1;
2360							}
2361
2362							my $finalsub = sub {
2363	196			196		349	for my $s (@allsubs) {
2364	262					979	$_ = $s->(@_);
2365							}
2366	189					6359	return $_;
2367	70					248	};
2368	70					157	$subrefs[$i] = $finalsub;
2369
2370							}
2371	690					2132	$self->{$l->[1]} = [ @subrefs ];
2372							}
2373
2374	345					1684	my $tmp = _get_def($self->{out_fields}, $self->{write_fields});
2375	345	100				1326	my @wf = @{$tmp} if defined($tmp);
	6					14
2376	345					748	my $count_field_not_found = 0;
2377	345					850	for (@wf) {
2378	16	100	66			106	next if !defined($_) or $_ eq '' or exists $named_fields{$_};
			100
2379	3					6	$count_field_not_found++;
2380	3					18	$self->_print_error("out_fields: unknown field '$_'",
2381							1, ERR_UNKNOWN_FIELD, { %named_fields } );
2382							}
2383	345	100				1022	if ($count_field_not_found) {
2384	2					5	$self->_print_error("non existent field(s) encountered");
2385	1					59	delete $self->{out_fields};
2386	1					4	delete $self->{write_fields};
2387							}
2388
2389	344	100				1068	my %sh = %{$self->{_out_headers}} if defined($self->{_out_headers});
	4					16
2390	344					682	$count_field_not_found = 0;
2391	344					988	for (keys %sh) {
2392	8	100	33			50	next if !defined($_) or $_ eq '' or exists $named_fields{$_};
			66
2393	2					4	$count_field_not_found++;
2394	2					12	$self->_print_error("out_header: unknown field '$_'",
2395							1, ERR_UNKNOWN_FIELD, { %named_fields } );
2396							}
2397	344	100				937	$self->_print_error("non existent field(s) encountered") if $count_field_not_found;
2398
2399	343					2544	return 1;
2400							}
2401
2402							#
2403							# Return 0 if there's no more records (error or eof reached), 1 if a record got read
2404							# successfully.
2405							#
2406							# If return value is 1:
2407							# $$ref_ar and $$ref_hr are set to array ref and hash ref of the record, respectively
2408							#
2409							# If return value is 0:
2410							# $$ref_ar and $$ref_hr are set to undef if an error occured
2411							# $$ref_ar and $$ref_hr are set to a scalar if eof reached
2412							#
2413							sub _read_one_record_from_input {
2414	2329			2329		4935	my ($self, $ref_ar, $ref_row_hr) = @_;
2415
2416	2329					4355	my $_debug = $self->{_debug};
2417	2329					3688	my $_debug_extra_fields = $self->{_debug_extra_fields};
2418	2329					3707	my $_debugh = $self->{_debugh};
2419
2420	2329					5212	my $in_file_disp = $self->get_in_file_disp();
2421
2422	2329					3981	my $incsv = $self->{_in_csvobj};
2423	2329					3539	my $ar;
2424
2425							print($_debugh "$PKG: '$in_file_disp': will read line #" . ($self->{_row_read} + 1) . "\n")
2426	2329	50				5342	if $self->{_debug_read};
2427
2428	2329	100				5116	unless ($ar = _mygetline($incsv, $self->{_inh})) {
2429	284	50				11520	if (!$incsv->eof()) {
2430	0					0	my ($code, $str, $pos) = $incsv->error_diag();
2431	0					0	$self->_print_error("$code: $str, record " . $incsv->record_number . ", position $pos");
2432	0					0	$$ref_ar = undef;
2433	0					0	$$ref_row_hr = undef;
2434							} else {
2435	284					2104	$$ref_ar = 1;
2436	284					677	$$ref_row_hr = 1;
2437							}
2438
2439	284					1443	$self->_close_inh();
2440
2441	284					1141	return 0;
2442							}
2443
2444	2045					63747	$self->{_row_read}++;
2445
2446	2045					3385	my %named_fields = %{$self->{_named_fields}};
	2045					10002
2447
2448	2045	50				5756	if ($self->{_debug_read}) {
2449	0					0	print($_debugh "Line " . $self->{_row_read} . ":\n--\n");
2450	0					0	for (sort keys %named_fields) {
2451	0					0	my $c = _get_def($ar->[$named_fields{$_}], '<undef>');
2452	0					0	print($_debugh " $_ => '" . $c . "'\n");
2453							}
2454							}
2455
2456	2045					3630	my $columns_ar = $self->{_columns};
2457
2458	2045					3399	my $no_undef = $self->{no_undef};
2459	2045	100				4499	if ($no_undef) {
2460	30					63	for (0..$#{$columns_ar}) {
	30					103
2461	324	100				1043	$ar->[$_] = '' unless defined($ar->[$_]);
2462							}
2463							}
2464
2465	2045					3621	my $row_hr = { };
2466							$row_hr->{$_} = $ar->[$self->{_regular_named_fields}->{$_}]
2467	2045					3224	foreach keys %{$self->{_regular_named_fields}};
	2045					14072
2468
2469	2045					4523	my $rpost = $self->{_read_update_after_ar};
2470	2045					3118	for my $i (0..$#{$columns_ar}) {
	2045					4791
2471	10556					15404	my $subref = $rpost->[$i];
2472	10556	100				23234	next unless defined($subref);
2473
2474	479					750	do {
2475	479					817	my $field = $columns_ar->[$i];
2476	479					845	local $_ = $ar->[$i];
2477	479					1080	my $new_val = $subref->($self, $field);
2478	474					940	$ar->[$i] = $new_val;
2479	474	50				1726	$row_hr->{$field} = $new_val if defined($field);
2480							}
2481
2482							}
2483
2484	2040					3337	for my $i (@{$self->{_extra_fields_indexes}}) {
	2040					4099
2485	442					1039	my $name = $columns_ar->[$i];
2486	442					1023	my $e = $self->{_extra_fields_definitions}->{$name};
2487
2488	442	50				1186	print($_debugh "Extra field: #$i: $name\n") if $_debug_extra_fields;
2489
2490	442					852	my $value;
2491	442	100				9646	if ($e->ef_type == $EF_LINK) {
		100
		50
2492
2493	238	50				2639	print($_debugh " linked field\n") if $_debug_extra_fields;
2494
2495	238					4405	my $remobj = $e->link_remote_obj;
2496							$value = $remobj->vlookup(
2497							$e->link_remote_search,
2498	238					5846	$ar->[$named_fields{$e->link_self_search}],
2499							$e->link_remote_read,
2500							$e->link_vlookup_opts
2501							);
2502
2503							} elsif ($e->ef_type == $EF_FUNC) {
2504
2505	51	50				1616	print($_debugh " computed field\n") if $_debug_extra_fields;
2506
2507	51					889	$value = $e->func_sub->($name, $row_hr, $self->{_stats});
2508
2509							} elsif ($e->ef_type == $EF_COPY) {
2510
2511	153	50				8580	print($_debugh " copy field\n") if $_debug_extra_fields;
2512
2513	153					2850	my $input = $row_hr->{$e->copy_source};
2514	153	50	33			1664	$input = '' if !defined($input) and $no_undef;
2515	153	100				3878	if (defined($e->copy_sub)) {
2516	57					595	local $_ = $input;
2517	57					995	$value = $e->copy_sub->();
2518							} else {
2519	96					971	$value = $input;
2520							}
2521
2522	153	50				1448	print($_debugh " in: '$input', out: '$value'\n") if $_debug_extra_fields;
2523
2524							} else {
2525	0					0	confess "Unknown ef_type '" . $e->ef_type . "', check this module' code urgently!";
2526							}
2527
2528	440	100	100			2431	$value = '' if !defined($value) and $no_undef;
2529	440					1151	$ar->[$i] = $value;
2530	440					1217	$row_hr->{$name} = $value;
2531
2532	440	50				1369	print($_debugh " $name => '$value'\n") if $_debug_extra_fields;
2533
2534							}
2535
2536	2038	100				5070	if (defined($self->{read_post_update_hr})) {
2537	33					100	$self->{read_post_update_hr}->($row_hr, $self->{_stats}, $self->get_recnum());
2538	33					318	$ar->[$named_fields{$_}] = $row_hr->{$_} foreach keys %named_fields;
2539							}
2540
2541	2038	100				7468	lock_keys(%$row_hr) if $self->{croak_if_error};
2542
2543							$self->{walker_ar}->($ar, $self->{_stats}, $self->get_recnum())
2544	2038	100				15211	if defined($self->{walker_ar});
2545							$self->{walker_hr}->($row_hr, $self->{_stats}, $self->get_recnum())
2546	2038	100				5428	if defined($self->{walker_hr});
2547
2548	2037					3705	$$ref_ar = $ar;
2549	2037					3314	$$ref_row_hr = $row_hr;
2550
2551	2037					8424	return 1;
2552							}
2553
2554							sub _open_read {
2555	295			295		626	my $self = shift;
2556
2557	295					619	my $verbose = $self->{verbose};
2558	295					1195	my $in_file_disp = $self->get_in_file_disp();
2559
2560	295					849	$self->{_stats} = { };
2561	295					677	$self->{_read_in_progress} = 1;
2562
2563	295	50				895	$self->_print("-- $in_file_disp reading start\n") if $verbose;
2564							}
2565
2566							sub _close_read {
2567	314			314		712	my $self = shift;
2568	314					600	my $keep_quiet = shift;
2569
2570	314					657	my $verbose = $self->{verbose};
2571	314					849	my $in_file_disp = $self->get_in_file_disp();
2572
2573	314					772	$self->{_read_in_progress} = 0;
2574
2575	314	50	33			1196	if ($verbose and !$keep_quiet) {
2576	0					0	$self->_print("-- $in_file_disp reading end: " . $self->{_row_read} . " row(s) read\n");
2577	0					0	for my $k (sort keys %{$self->{_stats}}) {
	0					0
2578	0					0	$self->_printf(" %7d %s\n", $self->{_stats}->{$k}, $k);
2579							}
2580							}
2581							}
2582
2583							# Return 0 if error, 1 if all good
2584							sub _S4_read_all_in_mem {
2585	165			165		377	my $self = shift;
2586
2587	165					588	$self->_register_pass("_S4_read_all_in_mem()");
2588
2589	165					633	$self->_open_read();
2590
2591	165					374	my $ar;
2592							my $row_hr;
2593	165					697	while ($self->_read_one_record_from_input(\$ar, \$row_hr)) {
2594
2595	1543					2566	push @{$self->{_flat}}, $ar;
	1543					4903
2596
2597							}
2598
2599	163	50				549	my $retcode = (defined($ar) ? 1 : 0);
2600	163					645	$self->_update_in_mem_record_count();
2601
2602	163					616	$self->_close_read();
2603
2604	163					513	return $retcode;
2605							}
2606
2607							sub _chain_array {
2608	24			24		194	return split(/\s->\s/, $_[0]);
2609							}
2610
2611							sub _chain_str {
2612	2			2		11	return join('->', @_);
2613							}
2614
2615							sub field_add_link {
2616	25			25	1	5483	my $self = shift;
2617
2618	25					821	validate_pos(@_, {type => UNDEF \| SCALAR}, {type => SCALAR}, {type => SCALAR \| OBJECT},
2619							{type => HASHREF, optional => 1});
2620
2621	22					109	my ($new_field, $chain, $obj, $param_opts) = @_;
2622
2623	22					51	my $croak_if_error = $self->{croak_if_error};
2624	22					52	my $_debug = $self->{_debug};
2625	22					44	my $_debugh = $self->{_debugh};
2626
2627	22					75	my @c = _chain_array($chain);
2628	22	100				85	$new_field = $c[2] unless defined($new_field);
2629
2630	22	50				65	print($_debugh "Registering new linked field, new_field = '$new_field', chain = '$chain'\n")
2631							if $_debug;
2632
2633	22	100	66			164	unless (@c == 3 and $c[2] ne '') {
2634	1					5	$self->_print_error("wrong links chain parameter: '$chain', " .
2635							"look for CHAIN in Text::AutoCSV manual for help");
2636	1					8	return undef;
2637							}
2638
2639	21	50				68	return undef unless $self->_status_forward('S2');
2640	21	50				76	return undef unless $self->_status_backward('S2');
2641
2642	21	100				73	my @tmp = %{$param_opts} if $param_opts;
	13					52
2643	21					585	my %opts = validate(@tmp, $SEARCH_VALIDATE_OPTIONS);
2644
2645	20					92	my $target_name = '';
2646	20	100				69	if (ref $obj eq '') {
2647	19					38	my $in_file = $obj;
2648	19					39	$target_name = $in_file;
2649
2650							#
2651							# TODO (?)
2652							#
2653							# Take into account the fact that the OS' file system is case insensitive. At the
2654							# moment, two different strings (even if identical in a case insensitive comparison)
2655							# will be managed as being distinct.
2656							# I put a question mark in this TO DO - after all, the user of this module had better
2657							# use same case when dealing with multiple links of the same file.
2658							#
2659							# Also, tuning this module' behavior depending on the OS' characteristics would be not
2660							# ideal, it'd add a level of complexity to understand how it works and what to expect.
2661							#
2662	19	100	100			95	if (exists $self->{_obj} and exists $self->{_obj}->{$in_file}) {
2663
2664	4	50				11	print(
2665							$_debugh
2666							"field_add_link: file '$in_file': re-using existing Text::AutoCSV object\n"
2667							) if $_debug;
2668
2669	4					8	$obj = $self->{_obj}->{$in_file};
2670							} else {
2671
2672	15	50				49	print($_debugh "field_add_link: file '$in_file': creating new Text::AutoCSV object\n")
2673							if $_debug;
2674
2675	15	100				64	$self->{_obj} = { } unless exists $self->{_obj};
2676
2677							#
2678							# The created Text::AutoCSV must be created with the same search options as what is
2679							# currently found in $self.
2680							#
2681							# Why?
2682							# Because the link is populated doing a vlookup on the remote object ($obj below),
2683							# not on $self. Therefore, if we don't "propagate" search options from $self to
2684							# $obj, search tunnings won't work as user would expect.
2685							#
2686	15					36	my %search_opts;
2687	15					41	for (qw(search_case search_trim search_ignore_empty search_ignore_accents
2688							search_value_if_not_found search_value_if_ambiguous search_ignore_ambiguous)) {
2689							# We assign depending on whether or not the attribute EXISTS - the definedness
2690							# is not appropriate, in case an attribute would have been assigned to undef.
2691	105	100				267	$search_opts{$_} = $self->{$_} if exists $self->{$_};
2692							}
2693
2694							$obj = Text::AutoCSV->new(
2695							in_file => $in_file,
2696							verbose => $self->{verbose},
2697							infoh => $self->{infoh},
2698							_debug => $self->{debug},
2699							_debugh => $self->{debugh},
2700	15					130	%search_opts
2701							);
2702	15					83	$self->{_obj}->{$in_file} = $obj;
2703							}
2704							} else {
2705	1					3	$target_name = '(object)';
2706	1	50				6	print($_debugh "field_add_link: Text::AutoCSV object provided\n") if $_debug;
2707							}
2708
2709	20	100				90	$self->{_extra_fields} = [ ] unless exists $self->{_extra_fields};
2710
2711	20					45	push @{$self->{_extra_fields}}, ExtraField->new(
	20					596
2712							ef_type => $EF_LINK,
2713							self_name => $new_field,
2714							description => "link: $target_name, chain: $chain",
2715							check_field_existence => $c[0],
2716
2717							link_self_search => $c[0],
2718							link_remote_obj => $obj,
2719							link_remote_search => $c[1],
2720							link_remote_read => $c[2],
2721
2722							link_vlookup_opts => \%opts
2723							);
2724
2725	20					2778	return $self;
2726							}
2727
2728							sub links {
2729	2			2	1	6	my $self = shift;
2730
2731	2					44	validate_pos(@_, {type => UNDEF \| SCALAR}, {type => SCALAR}, {type => SCALAR \| OBJECT},
2732							{type => HASHREF, optional => 1});
2733
2734	2					10	my $prefix_field = shift;
2735	2					6	my $chain = shift;
2736	2					5	my ($obj, $param_opts) = @_;
2737
2738	2					9	my @c = _chain_array($chain);
2739
2740	2	50	33			25	if (@c != 2 or $c[0] eq '' or $c[1] eq '') {
			33
2741	0					0	$self->_print_error("wrong links chain parameter: '$chain', " .
2742							"look for JOINCHAIN in Text::AutoCSV manual for help");
2743	0					0	return undef;
2744							}
2745
2746	2	100				10	$prefix_field = '' unless defined($prefix_field);
2747	2					7	my $chain2 = _chain_str(@c, '*');
2748
2749	2					8	return $self->field_add_link($prefix_field, $chain2, @_);
2750							}
2751
2752							sub field_add_computed {
2753	7			7	1	2517	my $self = shift;
2754
2755	7					209	validate_pos(@_, {type => SCALAR}, {type => CODEREF});
2756	6					23	my ($new_field, $func) = @_;
2757
2758	6					17	my $croak_if_error = $self->{croak_if_error};
2759
2760	6					14	my $_debug = $self->{_debug};
2761	6					13	my $_debugh = $self->{_debugh};
2762
2763	6	50				20	print($_debugh "Registering new computed field, new_field = '$new_field'\n") if $_debug;
2764
2765	6	100				17	return undef unless $self->_status_forward('S2');
2766	5	50				20	return undef unless $self->_status_backward('S2');
2767
2768	5					14	push @{$self->{_extra_fields}}, ExtraField->new(
	5					133
2769							ef_type => $EF_FUNC,
2770							self_name => $new_field,
2771							description => "computed",
2772
2773							func_sub => $func
2774							);
2775
2776	5					644	return $self;
2777							}
2778
2779							sub field_add_copy {
2780	14			14	1	13785	my $self = shift;
2781
2782	14					244	validate_pos(@_, {type => SCALAR}, {type => SCALAR}, {type => CODEREF, optional => 1});
2783	14					72	my ($new_field, $copy_source, $func) = @_;
2784
2785	14					42	my $croak_if_error = $self->{croak_if_error};
2786
2787	14					33	my $_debug = $self->{_debug};
2788	14					33	my $_debugh = $self->{_debugh};
2789
2790	14	50				49	print($_debugh "Registering field copy, new_field = '$new_field' copied from '$copy_source'\n")
2791							if $_debug;
2792
2793	14	100				47	return undef unless $self->_status_forward('S2');
2794	12	50				47	return undef unless $self->_status_backward('S2');
2795
2796	12	100				36	push @{$self->{_extra_fields}}, ExtraField->new(
	12					528
2797							ef_type => $EF_COPY,
2798							self_name => $new_field,
2799							description => "copy of $copy_source " . (defined($func) ? '(with sub)' : '(no sub)'),
2800							check_field_existence => $copy_source,
2801
2802							copy_source => $copy_source,
2803							copy_sub => $func
2804							);
2805
2806	12					1680	return $self;
2807							}
2808
2809							sub in_map {
2810	15			15	1	1349	my $self = shift;
2811
2812	15					48	return $self->read_update_after(@_);
2813							}
2814
2815							sub read_update_after {
2816	16			16	1	31	my $self = shift;
2817	16					194	validate_pos(@_, {type => SCALAR}, {type => CODEREF});
2818
2819	16					60	my ($field, $subref) = @_;
2820
2821	16					33	my $_debug = $self->{_debug};
2822	16					32	my $_debugh = $self->{_debugh};
2823
2824	16	50				45	return undef unless $self->_status_forward('S2');
2825	16	50				45	return undef unless $self->_status_backward('S2');
2826
2827	16	50				42	print($_debugh "Registering read_post_update subref for field '$field'\n") if $_debug;
2828
2829							$self->{_read_update_after_hr}->{$field} = [ ]
2830	16	100				65	unless defined($self->{_read_update_after_hr}->{$field});
2831
2832	16					26	push @{$self->{_read_update_after_hr}->{$field}}, $subref;
	16					44
2833
2834	16					140	return $self;
2835							}
2836
2837							sub out_map {
2838	11			11	1	17	my $self = shift;
2839
2840	11					32	return $self->write_update_before(@_);
2841							}
2842
2843							sub write_update_before {
2844	12			12	1	19	my $self = shift;
2845	12					108	validate_pos(@_, {type => SCALAR}, {type => CODEREF});
2846
2847	12					36	my ($field, $subref) = @_;
2848
2849	12					23	my $_debug = $self->{_debug};
2850	12					20	my $_debugh = $self->{_debugh};
2851
2852	12	50				26	return undef unless $self->_status_forward('S2');
2853	12	50				27	return undef unless $self->_status_backward('S2');
2854
2855	12	50				32	print($_debugh "Registering write_pre_update subref for field '$field'\n") if $_debug;
2856
2857							$self->{_write_update_before_hr}->{$field} = [ ]
2858	12	100				47	unless defined($self->{_write_update_before_hr}->{$field});
2859
2860	12					18	push @{$self->{_write_update_before_hr}->{$field}}, $subref;
	12					29
2861
2862	12					70	return $self;
2863							}
2864
2865							sub reset_next_record_hr {
2866	187			187	1	418	my $self = shift;
2867
2868	187					995	validate_pos(@_);
2869
2870	187					684	$self->{_current_record} = undef;
2871
2872	187					378	return $self;
2873							}
2874
2875							sub _create_internal_column_name_from_its_number {
2876	1120			1120		2425	return sprintf("__%04i__", $_[0]);
2877							}
2878
2879							sub _ar_to_hr {
2880	2048			2048		3221	my $self = shift;
2881
2882	2048					15471	validate_pos(@_, {type => ARRAYREF});
2883
2884	2048					5758	my ($ar) = @_;
2885	2048					3191	my $last_elem_index = scalar(@{$ar}) - 1;
	2048					4209
2886
2887	2048					3472	my $nr = $self->{_named_fields};
2888	2048					3125	my %h;
2889							my %n_seen;
2890	2048					3017	for (keys %{$nr}) {
	2048					6236
2891	9841					21307	$h{$_} = $ar->[$nr->{$_}];
2892	9841					19241	undef $n_seen{$nr->{$_}};
2893							}
2894	2048					4913	for my $i (0..$last_elem_index) {
2895	10908	100				24216	if (!exists($n_seen{$i})) {
2896	1120					2010	my $k = _create_internal_column_name_from_its_number($i);
2897	1120	50				3403	$h{$k} = $ar->[$i] if !exists $h{$k};
2898							}
2899							}
2900
2901	2048	100				7180	lock_keys(%h) if $self->{croak_if_error};
2902
2903	2048					18621	return \%h;
2904							}
2905
2906							sub get_next_record_hr {
2907	1989			1989	1	3527	my $self = shift;
2908
2909	1989					13767	validate_pos(@_, {type => SCALARREF, optional => 1});
2910
2911	1989					4830	my $refkey = $_[0];
2912
2913	1989	50				4396	return undef unless $self->_status_forward('S4');
2914
2915	1986	100				4581	if (!defined($self->{_current_record})) {
2916	184					425	$self->{_current_record} = 0;
2917							} else {
2918	1802					2884	$self->{_current_record}++;
2919							}
2920
2921	1986					3798	my $ar = $self->{_flat}->[$self->{_current_record}];
2922	1986	100				4196	if (!defined($ar)) {
2923	184					354	$self->{_current_record} = undef;
2924	184					359	$$refkey = undef;
2925	184					606	return undef;
2926							}
2927
2928	1802					2987	$$refkey = $self->{_current_record};
2929
2930	1802					3971	return $self->_ar_to_hr($ar);
2931							}
2932
2933							sub read {
2934	60			60	1	24762	my $self = shift;
2935
2936	60					459	validate_pos(@_);
2937
2938	60	50				225	return undef unless $self->_status_backward('S3');
2939	60	100				187	return undef unless $self->_status_forward('S3');
2940
2941	51					209	$self->_register_pass("read()");
2942
2943	51					192	$self->_open_read();
2944
2945	51					113	my $ar;
2946							my $row_hr;
2947	51					230	while ($self->_read_one_record_from_input(\$ar, \$row_hr)) {
2948							# Ben oui quoi... qu'est-ce que l'on peut bien faire d'autre ?
2949							}
2950
2951	49					202	$self->_close_read();
2952	49	50				147	return undef unless defined($ar);
2953
2954	49	50				158	return undef unless $self->_status_reset();
2955
2956	49					228	return $self;
2957							}
2958
2959							#
2960							# Initially, _read_all_in_mem was intended for the test plan.
2961							#
2962							# Turned out to be sometimes useful for user, thus, is no longer private since 1.1.5.
2963							# Private version below is kept for compatibility.
2964							#
2965							sub read_all_in_mem {
2966	1			1	1	3	my $self = shift;
2967
2968	1					5	return $self->_read_all_in_mem();
2969							}
2970
2971							sub _read_all_in_mem {
2972	9			9		1281	my $self = shift;
2973
2974	9	50				31	return 0 unless $self->_status_backward('S3');
2975	9	50				32	return 0 unless $self->_status_forward('S4');
2976
2977	9					49	return $self;
2978							}
2979
2980							sub print_id {
2981	0			0	1	0	my $self = shift;
2982
2983	0					0	$self->_printf("-- " . $self->get_in_file_disp() . ":\n");
2984	0					0	$self->_printf("sep_char: " . $self->get_sep_char() . "\n");
2985	0					0	$self->_printf("escape_char: " . $self->get_escape_char() . "\n");
2986	0					0	$self->_printf("in_encoding: " . $self->get_in_encoding() . "\n");
2987	0	0				0	$self->_printf("is_always_quoted: " . ($self->get_is_always_quoted() ? 'yes' : 'no') . "\n");
2988
2989	0					0	my @coldata = $self->get_coldata();
2990	0					0	my @disp;
2991	0					0	push @disp, [ '#', 'FIELD', 'HEADER', 'EXT DATA', 'DATETIME FORMAT', 'DATETIME LOCALE' ];
2992	0					0	push @disp, [ map { my $s = $_; $s =~ s/./-/g; $s } @{$disp[0]} ];
	0					0
	0					0
	0					0
	0					0
2993	0					0	for my $i (0..$#coldata) {
2994	0					0	my $col = $coldata[$i];
2995
2996	0					0	my @row;
2997	0					0	push @row, "$i";
2998	0	0				0	push @row, (defined($col->[$_]) ? ($col->[$_] . '') : '') for (0..4);
2999	0					0	map { s/\n/\\n/g; s/\r/\\r/g; s/\t/\\t/g } @row;
	0					0
	0					0
	0					0
3000	0					0	push @disp, [ @row ];
3001							}
3002	0					0	my $n = @{$disp[-1]};
	0					0
3003	0					0	my @max = (-1) x $n;
3004	0					0	for my $l (@disp) {
3005	0	0				0	do { $max[$_] = length($l->[$_]) if $max[$_] < length($l->[$_]) } for (0 .. $n - 1);
	0					0
3006							}
3007	0					0	my $s = join(' ', map { "%-${_}s" } @max);
	0					0
3008	0					0	$self->_print("\n");
3009	0					0	$self->_printf("$s\n", @{$_}) for (@disp);
	0					0
3010							}
3011
3012							sub set_out_file {
3013	2			2	1	8983	my $self = shift;
3014	2					50	validate_pos(@_, {type => SCALAR});
3015
3016	2					11	my ($out_file) = @_;
3017	2					152	$self->{out_file} = $out_file;
3018
3019	2					18	return $self;
3020							}
3021
3022							# Subrefs set with out_map
3023							sub _execute_write_update_before {
3024	362			362		763	my ($self, $ar) = @_;
3025
3026	362					705	my $columns_ar = $self->{_columns};
3027
3028	362					666	my $wpre = $self->{_write_update_before_ar};
3029	362					641	for my $i (0..$#{$columns_ar}) {
	362					949
3030	1162					1984	my $subref = $wpre->[$i];
3031	1162	100				3022	next unless defined($subref);
3032
3033	159					245	do {
3034	159					273	local $_ = $ar->[$i];
3035	159					263	my $field = $columns_ar->[$i];
3036	159					338	my $new_val = $subref->($self, $field);
3037	156					390	$ar->[$i] = $new_val;
3038							}
3039
3040							}
3041							}
3042
3043							# Take into account write_fields if it got set
3044							sub _apply_write_fields {
3045	460			460		948	my ($self, $ar) = @_;
3046
3047	460					792	my @final;
3048
3049	460					1745	my $tmp = _get_def($self->{out_fields}, $self->{write_fields});
3050	460	100				1480	my @wf = @{$tmp} if defined($tmp);
	16					36
3051
3052	460	100				1388	return unless @wf;
3053
3054	16					22	my %named_fields = %{$self->{_named_fields}};
	16					50
3055	16					38	for my $i (0..$#wf) {
3056	40					58	my $field = $wf[$i];
3057	40	100	66			158	my $tmp = $ar->[$named_fields{$field}] if defined($field) and $field ne '';
3058
3059							# Put here any post-processing of value
3060							# WARNING
3061							# $tmp can be undef
3062							# ...
3063
3064	40					79	$final[$i] = $tmp;
3065							}
3066	16					51	$_[1] = [ @final ];
3067							}
3068
3069							sub write {
3070	108			108	1	27763	my $self = shift;
3071
3072	108					850	validate_pos(@_);
3073
3074	108	50				416	return undef unless $self->_status_forward('S3');
3075
3076	103					260	my $verbose = $self->{verbose};
3077	103					209	my $_debug = $self->{_debug};
3078	103					255	my $_debugh = $self->{_debugh};
3079
3080	103					228	my $out_file = $self->{out_file};
3081
3082	103					212	my %stats;
3083
3084	103	50				297	$self->_print("-- $out_file writing start\n") if $verbose;
3085	103					207	my $rows_written = 0;
3086
3087	103					210	my $outh = $self->{outh};
3088
3089	103					241	$self->{_close_outh_when_finished} = 0;
3090	103	50				331	unless (defined($outh)) {
3091	103	50				335	if ($out_file eq '') {
3092	0					0	$outh = \*STDOUT;
3093							} else {
3094	103	50				10569	unless (open($outh, '>', $out_file)) {
3095	0					0	$self->_print_error("unable to open file '$out_file': $!");
3096	0					0	return undef;
3097							}
3098	103					457	$self->{_close_outh_when_finished} = 1;
3099							}
3100	103					280	$self->{outh} = $outh;
3101							}
3102
3103	103	50				383	unless ($self->{_leave_encoding_alone}) {
3104							my $enc = (defined($self->{_inh_encoding}) ?
3105							$self->{_inh_encoding} :
3106	103	50				408	$DEFAULT_OUT_ENCODING);
3107
3108							# out_encoding option takes precedence
3109	103	100				390	$enc = $self->{out_encoding} if defined($self->{out_encoding});
3110	103					350	my $m = ":encoding($enc)";
3111	103	50	66			345	if (_is_utf8($enc) and $self->{out_utf8_bom}) {
3112	0					0	$m .= ':via(File::BOM)';
3113							}
3114
3115	103	50	33			440	if ($OS_IS_PLAIN_WINDOWS and $FIX_PERLMONKS_823214) {
3116							# Tested with UTF-16LE, NOT tested with UTF-16BE (it should be the same story)
3117	0	0				0	$m = ":raw:perlio:$m:crlf" if $enc =~ /^utf-?16/i;
3118							}
3119
3120	103					1044	binmode $outh, $m;
3121	103	50				11702	print($_debugh "Encoding string used for output: $m\n") if $_debug;
3122							}
3123
3124	103					266	my $escape_char = $self->{escape_char};
3125	103					231	my $quote_char = $self->{quote_char};
3126
3127	103					205	my %opts;
3128	103					322	$opts{binary} = 1;
3129	103					541	$opts{eol} = "\n";
3130
3131	103	50				437	$opts{sep_char} = $self->{sep_char} if defined($self->{sep_char});
3132	103	100				346	$opts{sep_char} = $self->{out_sep_char} if defined($self->{out_sep_char});
3133
3134	103	50				389	$opts{quote_char} = $self->{quote_char} if defined($self->{quote_char});
3135	103	50				323	$opts{quote_char} = $self->{out_quote_char} if defined($self->{out_quote_char});
3136
3137	103	50				383	$opts{escape_char} = $self->{escape_char} if defined($self->{escape_char});
3138	103	100				306	$opts{escape_char} = $self->{out_escape_char} if defined($self->{out_escape_char});
3139
3140	103					264	$opts{always_quote} = $self->{_is_always_quoted};
3141	103	100				311	$opts{always_quote} = $self->{out_always_quote} if defined($self->{out_always_quote});
3142
3143	103					927	my $csvout = Text::CSV->new({ %opts });
3144	103	50				19593	if (!defined($csvout)) {
3145	0					0	$self->_print_error("error creating output Text::CSV object");
3146	0					0	return undef;
3147							}
3148
3149	103					520	my $write_filter_hr = _get_def($self->{out_filter}, $self->{write_filter_hr});
3150
3151	103	100	66			915	if (($self->{has_headers} and
			100
			100
3152							!(defined($self->{out_has_headers}) and !$self->{out_has_headers}))
3153							or $self->{out_has_headers}) {
3154	101					253	my $ar = [ ];
3155	101	100				366	if ($self->{has_headers}) {
3156	97					251	$ar = $self->{_headers};
3157							} else {
3158	4					8	my $nf = $self->{_named_fields};
3159	4					7	$ar->[$nf->{$_}] = $_ for (keys %{$nf});
	4					25
3160							}
3161
3162	101	100				343	if (exists $self->{_out_headers}) {
3163	3					5	my $h = $self->{_out_headers};
3164	3					6	for (keys %{$self->{_named_fields}}) {
	3					11
3165	12	100				30	if (exists $h->{$_}) {
3166	5					13	$ar->[$self->{_named_fields}->{$_}] = $h->{$_};
3167							}
3168							}
3169							}
3170
3171	101					445	$self->_apply_write_fields($ar);
3172
3173	101					1848	$csvout->print($outh, $ar);
3174	101					1337	$rows_written++;
3175							}
3176
3177	103					233	my $do_status_reset = 0;
3178
3179
3180							#
3181							# FIXME!!!
3182							#
3183							# Instead of this duplication of code, provide AutoCSV with a "create iterator sub" feature to
3184							# iterate over all records, whatever is going on behind the scene (in-memory or read input).
3185							#
3186							# Such an iterator would also benefit to module users.
3187							#
3188
3189
3190	103	100				351	if ($self->{_status} == 4) {
3191
3192							#
3193							# The content is available in-memory: we write from what we have in-memory then...
3194							#
3195
3196	24					105	for my $k ($self->get_keys()) {
3197	87					253	my $hr = $self->get_row_hr($k);
3198	87	50				251	if (defined($write_filter_hr)) {
3199	0	0				0	next unless $write_filter_hr->($hr);
3200							}
3201	87					154	my $ar = [ @{$self->get_row_ar($k)} ];
	87					208
3202
3203	87					344	$self->_execute_write_update_before($ar);
3204	87					278	$self->_apply_write_fields($ar);
3205
3206	87					628	$csvout->print($outh, $ar);
3207	87					989	$rows_written++;
3208							}
3209
3210							} else {
3211
3212							#
3213							# No in-memory content available: we read and write in parallel.
3214							#
3215
3216	79					317	$self->_register_pass("write()");
3217
3218	79					335	$self->_open_read();
3219	79					173	my $ar;
3220							my $row_hr;
3221	79					339	while ($self->_read_one_record_from_input(\$ar, \$row_hr)) {
3222	301	100				812	if (defined($write_filter_hr)) {
3223	46	100				112	next unless $write_filter_hr->($row_hr, \%stats, $self->get_recnum());
3224							}
3225	275					610	$ar = [ @{$ar} ];
	275					803
3226
3227	275					1138	$self->_execute_write_update_before($ar);
3228	272					800	$self->_apply_write_fields($ar);
3229
3230	272					1914	$csvout->print($outh, $ar);
3231	272					2864	$rows_written++;
3232							}
3233	72					330	$self->_close_read();
3234
3235	72					177	$do_status_reset = 1
3236							}
3237
3238	96					400	$self->_close_outh();
3239
3240	96	50				323	if ($verbose) {
3241	0					0	$self->_print("-- $out_file writing end: $rows_written row(s) written\n");
3242	0					0	for my $k (sort keys %stats) {
3243	0					0	$self->_printf(" %7d %s\n", $stats{$k}, $k);
3244							}
3245							}
3246
3247	96	100				303	if ($do_status_reset) {
3248	72	50				310	return undef unless $self->_status_reset();
3249							}
3250	96					1094	return $self;
3251							}
3252
3253
3254
3255							#
3256							# * * *************************************************************************
3257							# * * *************************************************************************
3258							# * OBJ ***************************************************************************
3259							# * * *************************************************************************
3260							# * * *************************************************************************
3261							#
3262
3263							#
3264							# The subs below assume Text::AutoCSV can be in status S4 = all in memory.
3265							#
3266
3267
3268							sub get_keys {
3269	31			31	1	2672	my $self = shift;
3270	31					242	validate_pos(@_);
3271
3272	31	50				113	return undef unless $self->_status_forward('S4');
3273
3274	31					63	my $last_key = @{$self->{_flat}} - 1;
	31					85
3275	31					106	my @r = (0..$last_key);
3276
3277	31					108	return @r;
3278							}
3279
3280							sub get_row_ar {
3281	338			338	1	1307	my $self = shift;
3282	338					2565	validate_pos(@_, {type => SCALAR});
3283	338					1052	my ($key) = @_;
3284
3285	338	50				912	return undef unless $self->_status_forward('S4');
3286
3287	338	50				983	unless (defined($key)) {
3288	0					0	$self->_print_error("get_row_ar(): \$key is not defined!");
3289	0					0	return undef;
3290							}
3291
3292	338	100				997	$self->_print_error("unknown row '$key'") unless defined($self->{_flat}->[$key]);
3293	338					882	return $self->{_flat}->[$key];
3294							}
3295
3296							sub get_row_hr {
3297	248			248	1	795	my $self = shift;
3298	248					2558	validate_pos(@_, {type => SCALAR});
3299	248					881	my ($key) = @_;
3300
3301	248					747	my $ar = $self->get_row_ar($key);
3302	248	100				719	return undef unless defined($ar);
3303
3304	246					790	return $self->_ar_to_hr($ar);
3305							}
3306
3307							#
3308							# Could be made much more efficient (directly read $self->{_flat} instead of calling get_row_hr
3309							# that itself calls get_row_ar).
3310							# I leave it as is because get_hr_all is not good practice (it is not scalable), it was
3311							# primarily done to ease test plan.
3312							#
3313							# By the way I may make it one day not available by default, requesting caller to tune some
3314							# variable (like { $Text::AutoCSV::i_am_the_test_plan = 1 }) to expose it.
3315							#
3316							sub get_hr_all {
3317	106			106	1	5160	my $self = shift;
3318	106					859	validate_pos(@_);
3319
3320	106					290	my @resp;
3321	106					448	$self->reset_next_record_hr();
3322	106					387	while (my $hr = $self->get_next_record_hr()) {
3323	404					1187	push @resp, $hr;
3324							}
3325	103					476	return @resp;
3326							}
3327
3328							sub get_recnum {
3329	190			190	1	350	my $self = shift;
3330	190					1102	validate_pos(@_);
3331
3332	190	50				642	return -1 unless $self->{_read_in_progress};
3333	190					458	return _get_def($self->{_row_read}, -1);
3334							}
3335
3336							sub _check_for_search {
3337	890			890		2116	my ($self, $field) = @_;
3338	890	50				2331	return undef unless $self->_status_forward('S4');
3339
3340	889	100				3707	return 1 if exists $self->{_named_fields}->{$field};
3341							$self->_print_error("search: unknown field '$field'",
3342	6					30	0, ERR_UNKNOWN_FIELD, $self->{_named_fields});
3343							}
3344
3345							sub get_cell {
3346	4			4	1	671	my $self = shift;
3347	4					48	validate_pos(@_, {type => SCALAR}, {type => SCALAR});
3348	4					15	my ($key, $field) = @_;
3349
3350	4	50				13	return undef unless $self->_check_for_search($field);
3351	3					10	my $row = $self->get_row_hr($key);
3352	3	100				19	return $row unless defined($row);
3353	2					12	return $row->{$field};
3354							}
3355
3356							sub get_values {
3357	9			9	1	4150	my $self = shift;
3358	9					132	validate_pos(@_, {type => SCALAR}, {type => UNDEF \| CODEREF, optional => 1});
3359	9					44	my ($field, $filter_subref) = @_;
3360
3361	9	50				34	return undef unless $self->_check_for_search($field);
3362
3363	9					18	my @values;
3364	9					35	$self->reset_next_record_hr();
3365	9					31	while (my $hr = $self->get_next_record_hr()) {
3366	53	100				118	if (defined($filter_subref)) {
3367	23					41	local $_ = $hr->{$field};
3368	23	100				51	next unless $filter_subref->();
3369							}
3370	42					181	push @values, $hr->{$field};
3371							}
3372	9					43	return @values;
3373							}
3374
3375							sub _get_hash_and_projector {
3376	561			561		1318	my ($self, $field, $arg_opts) = @_;
3377
3378	561					1105	my $_debug = $self->{_debug};
3379	561					1042	my $_debugh = $self->{_debugh};
3380
3381	561	50				1415	my %opts = %{$arg_opts} if defined($arg_opts);
	561					1726
3382
3383	561					2351	my $opt_case = _get_def($opts{'case'}, $self->{search_case}, $DEF_SEARCH_CASE);
3384	561					2184	my $opt_trim = _get_def($opts{'trim'}, $self->{search_trim}, $DEF_SEARCH_TRIM);
3385							my $opt_ignore_empty = _get_def($opts{'ignore_empty'}, $self->{search_ignore_empty},
3386	561					2058	$DEF_SEARCH_IGNORE_EMPTY);
3387							my $opt_ignacc = _get_def($opts{'ignore_accents'}, $self->{search_ignore_accents},
3388	561					2042	$DEF_SEARCH_IGNORE_ACCENTS);
3389
3390	561					1868	my $opts_stringified = $opt_case . $opt_trim . $opt_ignore_empty . $opt_ignacc;
3391	561					1423	my $hash_name = "_h${field}_${opts_stringified}";
3392	561					1188	my $projector_name = "_p${field}_${opts_stringified}";
3393
3394	561	100	66			2959	if (exists $self->{$hash_name} and exists $self->{$projector_name}) {
		50	33
3395	489	50				1268	print($_debugh "Search by key '$field': using existing hash and projector (" .
3396							"$hash_name, $projector_name)\n") if $_debug;
3397	489					1906	return ($hash_name, $projector_name);
3398							} elsif (exists $self->{$hash_name} or exists $self->{$projector_name}) {
3399	0					0	confess "Man, check your $PKG module code now!";
3400							}
3401
3402	72	50				237	print($_debugh "Search by key '$field': building hash\n") if $_debug;
3403
3404							#
3405							# Projectors
3406							#
3407							# The projector contains subs to derivate the search key from the field value.
3408							# At the moment it is used to manage with case / without case searches and with trim / without trim
3409							# searches (meaning, ignoring spaces at beginning and end of fields)
3410							#
3411							# Why naming it a projector?
3412							# Because if you run it twice on a value, the second run should produce the same result, meaning:
3413							# p(p(x)) = p(x) whatever x
3414							#
3415
3416	72					145	my @projectors;
3417
3418							# Add case removal in the projector function list
3419	72	100		1479		398	push @projectors, sub { return lc(shift); } unless $opt_case;
	1479					4573
3420
3421							# Add trim in the projector function list
3422	72	100				219	if ($opt_trim) {
3423							push @projectors,
3424							sub {
3425	1479			1479		8134	my $v = shift;
3426	1479					5239	$v =~ s/^\s+\|\s+$//g;
3427	1479					3808	return $v;
3428	57					213	};
3429							}
3430
3431							# Add remove_accents in the projector function list
3432	72	100		1886		328	push @projectors, sub { return remove_accents(shift); } if $opt_ignacc;
	1886					4174
3433
3434							my $projector = sub {
3435	1906			1906		4245	my $v = _get_def($_[0], '');
3436	1906					4688	$v = $_->($v) foreach (@projectors);
3437	1906					4083	return $v;
3438	72					251	};
3439
3440							#
3441							# Filter
3442							#
3443							# As opposed to projectors above (where a search key is transformed), the idea now is to ignore
3444							# certain keys when doing a search.
3445							# At the moment, used to manage searches with / without empty values.
3446							#
3447							# That is to say: shall we use empty value as a regular value to search on, as in
3448							# my @results = $self->search('FIELDNAME', '');
3449							# ?
3450							#
3451							# Right now we don't use an array-based construct, that'd allow to chain filters with one another
3452							# (as we now have only one filter to deal with), later, we may use an array of filters, as done with
3453							# projectors...
3454							#
3455
3456	72					130	my $filter;
3457	72	100				175	if ($opt_ignore_empty) {
3458	1268			1268		3588	$filter = sub { return $_[0] ne ''; }
3459	61					191	} else {
3460	77			77		183	$filter = sub { return 1; }
3461	11					48	}
3462
3463	72					154	my %h;
3464							my $k;
3465	72					286	$self->reset_next_record_hr();
3466	72					320	while (my $hr = $self->get_next_record_hr(\$k)) {
3467	1345					2470	my $kv = $hr->{$field};
3468	1345					2567	my $p = $projector->($kv);
3469	1345	100				2723	unless ($filter->($p)) {
3470	76	50				184	print($_debugh "Ignoring key value '$p' in hash build\n") if $_debug;
3471	76					256	next;
3472							}
3473	1269					2012	push @{$h{$p}}, $k;
	1269					6049
3474							}
3475	72					486	for (keys %h) {
3476	1178					1638	@{$h{$_}} = sort { $a <=> $b } @{$h{$_}};
	1178					2405
	103					273
	1178					2368
3477							}
3478
3479	72					247	$self->{_hash_build_count}++;
3480	72					853	$self->{$hash_name} = { %h };
3481	72					260	$self->{$projector_name} = $projector;
3482	72					564	return ($hash_name, $projector_name);
3483							}
3484
3485							sub _get_hash_build_count {
3486	6			6		1488	my $self = shift;
3487
3488	6					33	return _get_def($self->{_hash_build_count}, 0);
3489							}
3490
3491							sub search {
3492	563			563	1	14062	my $self = shift;
3493	563					6299	validate_pos(@_,
3494							{type => SCALAR}, {type => UNDEF \| SCALAR}, {type => UNDEF \| HASHREF, optional => 1});
3495	563					2234	my ($field, $value, $param_opts) = @_;
3496
3497	563					1201	my $croak_if_error = $self->{croak_if_error};
3498
3499							#
3500							# FIXME?
3501							# A bit overkill to check options each time search is called...
3502							# To be thought about.
3503							#
3504
3505	563	100				1548	my @tmp = %{$param_opts} if $param_opts;
	421					1572
3506	563					9833	my %opts = validate(@tmp, $SEARCH_VALIDATE_OPTIONS);
3507
3508	562	50				2778	return undef unless $self->_check_for_search($field);
3509
3510							# $self->_print_error("undef value in search call") if !defined($value);
3511	561	50				1448	$value = '' unless defined($value);
3512
3513	561					1661	my ($hash_name, $projector_name) = $self->_get_hash_and_projector($field, \%opts);
3514
3515	561					1925	my $ret = $self->{$hash_name}->{$self->{$projector_name}->($value)};
3516
3517	561	100				2643	return $ret if defined($ret);
3518	185					724	return [ ];
3519							}
3520
3521							sub search_1hr {
3522	22			22	1	10058	my $self = shift;
3523	22					332	validate_pos(@_,
3524							{type => SCALAR}, {type => UNDEF \| SCALAR}, {type => UNDEF \| HASHREF, optional => 1});
3525	22					96	my ($field, $value, $arg_opts) = @_;
3526
3527	22					72	my $r = $self->search($field, $value, $arg_opts);
3528
3529	22	100				74	return undef unless defined($r->[0]);
3530
3531	20					48	my $opts = _get_def($arg_opts, { });
3532							my $opt_ignore_ambiguous = _get_def($opts->{'ignore_ambiguous'},
3533	20					72	$self->{'search_ignore_ambiguous'}, $DEF_SEARCH_IGNORE_AMBIGUOUS);
3534
3535	20	100	100			38	return undef if @{$r} >= 2 and !$opt_ignore_ambiguous;
	20					107
3536	14					50	return $self->get_row_hr($r->[0]);
3537							}
3538
3539							sub vlookup {
3540	317			317	1	33785	my $self = shift;
3541	317					5554	validate_pos(@_, {type => SCALAR}, {type => UNDEF \| SCALAR}, {type => SCALAR},
3542							{type => UNDEF \| HASHREF, optional => 1});
3543	317					1630	my ($searched_field, $value, $target_field, $arg_opts) = @_;
3544
3545	317					991	my $r = $self->search($searched_field, $value, $arg_opts);
3546	315	50				945	return undef unless $self->_check_for_search($target_field);
3547
3548	314					924	my $opts = _get_def($arg_opts, { });
3549	314	100	66			989	unless (defined($r->[0])) {
3550							return (exists $opts->{'value_if_not_found'} ? $opts->{'value_if_not_found'} :
3551	143	100				682	$self->{'search_value_if_not_found'});
3552							} elsif (@{$r} >= 2) {
3553							my $opt_ignore_ambiguous = _get_def($opts->{'ignore_ambiguous'},
3554							$self->{'search_ignore_ambiguous'}, $DEF_SEARCH_IGNORE_AMBIGUOUS);
3555							return (exists $opts->{'value_if_ambiguous'} ? $opts->{'value_if_ambiguous'} :
3556							$self->{'search_value_if_ambiguous'}) if !$opt_ignore_ambiguous;
3557							}
3558
3559	144	100				458	return $opts->{value_if_found} if exists $opts->{value_if_found};
3560	140	50				394	return $self->{search_value_if_found} if exists $opts->{search_value_if_found};
3561
3562	140					449	my $hr = $self->get_row_hr($r->[0]);
3563
3564	140					730	return $hr->{$target_field};
3565							}
3566
3567							1;
3568
3569							__END__
3570
3571							=pod
3572
3573							=encoding UTF-8
3574
3575							=head1 NAME
3576
3577							Text::AutoCSV - helper module to automate the use of Text::CSV
3578
3579							=head1 VERSION
3580
3581							version 1.1.8
3582
3583							=head1 SYNOPSIS
3584
3585							By default, Text::AutoCSV will detect the following characteristics of the input:
3586
3587							- The separator, among ",", ";" and "\t" (tab)
3588
3589							- The escape character, among '"' (double-quote) and '\\' (backslash)
3590
3591							- Try UTF-8 and if it fails, fall back on latin1
3592
3593							- Read the header line and compute field names
3594
3595							- If asked to (see L</fields_dates_auto>), detect any field that contains a DateTime value, trying
3596							20 date formats, possibly followed by a time (6 time formats tested)
3597
3598							- If asked to (see L</fields_dates>), detect DateTime format of certain fields, croak if no DateTime
3599							format can be worked out
3600
3601							- Fields identified as containing a DateTime value (L</fields_dates_auto> or L</fields_dates>) are
3602							stored as DateTime objects by default
3603
3604							Text::AutoCSV also provides methods to search on fields (using cached hash tables) and it can
3605							populate the value of "remote" fields, made from joining 2 CSV files with a key-value search
3606
3607							=head2 General
3608
3609							use Text::AutoCSV;
3610
3611							Text::AutoCSV->new()->write(); # Read CSV data from std input, write to std output
3612
3613							Text::AutoCSV->new(in_file => 'f.csv')->write(); # Read CSV data from f.csv, write to std output
3614
3615							# Read CSV data from f.csv, write to g.csv
3616							Text::AutoCSV->new(in_file => 'f.csv', out_file => 'g.csv')->write();
3617
3618							# "Rewrite" CSV file by printing out records as a list (separated by line breaks) of field
3619							# name followed by its value.
3620							my $csv = Text::AutoCSV->new(in_file => 'in.csv', walker_hr => \&walk);
3621							my @cols = $csv->get_fields_names();
3622							$csv->read();
3623							sub walk {
3624							my %rec = %{$_[0]};
3625							for (@cols) {
3626							next if $_ eq '';
3627							print("$_ => ", $rec{$_}, "\n");
3628							}
3629							print("\n");
3630							}
3631
3632							=head2 OBJ-ish functions
3633
3634							# Identify column internal names with more flexibility as the default mechanism
3635							my $csv = Text::AutoCSV->new(in_file => 'zips.csv',
3636							fields_hr => {'CITY' => '^(city\|town)', 'ZIPCODE' => '^zip(code)?$'});
3637							# Get zipcode of Claix
3638							my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE');
3639
3640							my $csv = Text::AutoCSV->new(in_file => 'zips.csv');
3641							# Get zipcode of Claix
3642							my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE');
3643							# Same as above, but vlookup is strict for case and spaces around
3644							my $csv = Text::AutoCSV->new(in_file => 'zips.csv', search_case => 1, search_trim => 0);
3645							my $z = $csv->vlookup('CITY', 'Claix', 'ZIPCODE');
3646
3647							# Create field 'MYCITY' made by taking pers.csv' ZIP column value, looking it up in the
3648							# ZIPCODE columns of zips.csv, taking CITY colmun value and naming it 'MYCITY'. Output is
3649							# written in std output.
3650							# If a zipcode is ambiguous, say it.
3651							Text::AutoCSV->new(in_file => 'pers.csv')
3652							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv',
3653							{ ignore_ambiguous => 0, value_if_ambiguous => '<duplicate zipcode found!>' })->write();
3654
3655							# Note the above can also be written using Text::AutoCSV level attributes:
3656							Text::AutoCSV->new(in_file => 'pers.csv',
3657							search_ignore_ambiguous => 0, search_value_if_ambiguous => '<duplicate zipcode found!>')
3658							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->write();
3659
3660							# Create 'MYCITY' field as above, then display some statistics
3661							my $nom_compose = 0;
3662							my $zip_not_found = 0;
3663							Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk)
3664							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read();
3665							sub walk {
3666							my $hr = shift;
3667							$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/;
3668							$zip_not_found++ unless defined($hr->{'MYCITY'});
3669							}
3670							print("Number of persons with a multi-part name: $nom_compose\n");
3671							print("Number of persons with unknown zipcode: $zip_not_found\n");
3672
3673							=head2 Updating
3674
3675							Text::AutoCSV->new(in_file => 'names.csv', out_file => 'ucnames.csv',
3676							read_post_update_hr => \&updt)->write();
3677							sub updt { $_[0]->{'LASTNAME'} =~ s/^.*$/\U&/; }
3678
3679							Text::AutoCSV->new(in_file => 'squares.csv', out_file => 'checkedsquares.csv',
3680							out_filter => \&wf)->write();
3681							sub wf { return ($_[0]->{'X'} ** 2 == $_[0]->{'SQUAREOFX'}); }
3682
3683							# Add a field for the full name, made of the concatenation of the
3684							# first name and the last name.
3685							# Also display stats about empty full names.
3686							Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', verbose => 1)
3687							->field_add_computed('FULLNAME', \&calc_fn)->write();
3688							sub calc_fn {
3689							my ($field, $hr, $stats) = @_;
3690							my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'});
3691							$stats->{'empty full name'}++ if $fn eq ' ';
3692							return $fn;
3693							}
3694
3695							# Read a file with a lot of columns and keep only 2 columns in output
3696							Text::AutoCSV->new(in_file => 'big.csv', out_file => 'addr.csv',
3697							out_fields => ['NAME', 'ADDRESS'])
3698							->out_header('ADDRESS', 'Postal Address')
3699							->write();
3700
3701							=head2 Datetime management
3702
3703							# Detect any field containing a DateTime value and convert it to yyyy-mm-dd whatever the
3704							# input format is.
3705							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1,
3706							out_dates_format => '%F')->write();
3707
3708							# Detect any field containing a DateTime value and convert it to a US DateTime whatever the
3709							# input format is.
3710							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1,
3711							out_dates_format => '%b %d, %Y, %I:%M:%S %p', out_dates_locale => 'en')->write();
3712
3713							# Find dates of specific formats and convert it into yyyy-mm-dd
3714							Text::AutoCSV->new(in_file => 'raw.csv', out_file => 'cooked.csv',
3715							dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d'],
3716							out_dates_format => '%F')->write();
3717
3718							# Take the dates on columns 'LASTLOGIN' and 'CREATIONDATE' and convert it into French dates
3719							# (day/month/year).
3720							# Text::AutoCSV will croak if LASTLOGIN or CREATIONDATE do not contain a DateTime format.
3721							# By default, Text::AutoCSV will try 20 different formats.
3722							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
3723							fields_dates => ['LASTLOGIN', 'CREATIONDATE'], out_dates_format => '%d/%m/%Y')->write();
3724
3725							# Convert 2 DateTime fields into unix standard epoch
3726							# Write -1 if DateTime is empty.
3727							sub toepoch { return $_->epoch() if $_; -1; }
3728							Text::AutoCSV->new(in_file => 'stats.csv', out_file => 'stats-epoch.csv',
3729							fields_dates => ['ATIME', 'MTIME'])
3730							->in_map('ATIME', \&toepoch)
3731							->in_map('MTIME', \&toepoch)
3732							->write();
3733
3734							# Do the other way round from above: convert 2 fields containing unix standard epoch into a
3735							# string displaying a human-readable DateTime.
3736							my $formatter = DateTime::Format::Strptime->new(pattern => 'DATE=%F, TIME=%T');
3737							sub fromepoch {
3738							return $formatter->format_datetime(DateTime->from_epoch(epoch => $_)) if $_ >= 0;
3739							'';
3740							}
3741							$csv = Text::AutoCSV->new(in_file => 'stats-epoch.csv', out_file => 'stats2.csv')
3742							->in_map('ATIME', \&fromepoch)
3743							->in_map('MTIME', \&fromepoch)
3744							->write();
3745
3746							=head2 Miscellaneous
3747
3748							use Text::AutoCSV 'remove_accents';
3749							# Output 'Francais: etre elementaire, Tcheque: sluzba dum' followed by a new line.
3750							print remove_accents("FranÃ§ais: Ãªtre Ã©lÃ©mentaire, TchÃ¨que: sluÅ¾ba dÅ¯m"), "\n";
3751
3752							=for Pod::Coverage ERR_UNKNOWN_FIELD
3753
3754							=head1 NAME
3755
3756							Text::AutoCSV - helper module to automate the use of Text::CSV
3757
3758							=head1 METHODS
3759
3760							=head2 new
3761
3762							my $csv = Text::AutoCSV->new(%attr);
3763
3764							(Class method) Returns a new instance of Text::AutoCSV. The object attributes are described by the
3765							hash C<%attr> (can be empty).
3766
3767							Currently the following attributes are available:
3768
3769							=over 4
3770
3771							=item Preliminary note about L</fields_hr>, L</fields_ar> and L</fields_column_names> attributes
3772
3773							By default, Text::AutoCSV assumes the input has a header and will use the field values of this first
3774							line (the header) to work out the column internal names. These internal names are used everywhere in
3775							Text::AutoCSV to designate columns.
3776
3777							The values are transformed as follows:
3778
3779							- All accents are removed using the exportable function L</remove_accents>.
3780
3781							- Any non-alphanumeric character is removed (except underscore) and all letters are switched to
3782							upper case. The regex to do this is
3783
3784							s/[^[:alnum:]_]//gi; s/^.*$/\U$&/;
3785
3786							Thus a header line of
3787
3788							'Office Number 1,Office_2,Personal Number'
3789
3790							will produce the internal column names
3791
3792							'OFFICENUMBER1' (first column)
3793
3794							'OFFICE_2' (second column)
3795
3796							'PERSONALNUMBER' (third column).
3797
3798							The attribute L</fields_hr>, L</fields_ar> or L</fields_column_names> (only one of the three is
3799							useful at a time) allows to change this behavior.
3800
3801							B<NOTE>
3802
3803							The removal of accents is not a conversion to us-ascii, see L</remove_accents> for details.
3804
3805							=item Preliminary note about fields reading
3806
3807							Functions that are given a field name (L</get_cell>, L</vlookup>, L</field_add_copy>, ...) raise an
3808							error if the field requested does not exist.
3809
3810							B<SO WILL THE HASHREFS GIVEN BY Text::AutoCSV:> when a function returns a hashref (L</search_1hr>,
3811							L</get_row_hr>, ...), the hash is locked with the C<lock_keys> function of C<Hash::Util>. Any
3812							attempt to read a non-existing key from the hash causes a croak. This feature is de-activated if you
3813							specified C<croak_if_error =E<gt> 0> when creating Text::AutoCSV object.
3814
3815							=item in_file
3816
3817							The name of the file to read CSV data from.
3818
3819							If not specified or empty, read standard input.
3820
3821							Example:
3822
3823							my $csv = Text::AutoCSV->new(in_file => 'in.csv');
3824
3825							=item inh
3826
3827							File handle to read CSV data from.
3828							Normally you don't want to specify this attribute.
3829
3830							C<inh> is useful if you don't like the way Text::AutoCSV opens the input file for you.
3831
3832							Example:
3833
3834							open my $inh, "producecsv.sh\|";
3835							my $csv = Text::AutoCSV->new(inh => $inh);
3836
3837							=item encoding
3838
3839							Comma-separated list of encodings to try to read input.
3840
3841							Note that finding the correct encoding of any given input is overkill. This script just tries
3842							encodings one after the other, and selects the first one that does not trigger a warning during
3843							reading of input. If all produce warnings, select the first one.
3844
3845							The encoding chosen is used in output, unless attribute L</out_encoding> is specified.
3846
3847							Value by default: 'UTF-8,latin1'
3848
3849							B<IMPORTANT>
3850
3851							If one tries something like C<encoding =E<gt> 'latin1,UTF-8'>, it'll almost never detect UTF-8
3852							because latin1 rarely triggers warnings during reading. It tends to be also true with encodings like
3853							UTF-16 that can remain happy with various inputs (sometimes resulting in Western languages turned
3854							into Chinese text).
3855
3856							Ultimately this attribute should be used with a unique value. The result when using more than one
3857							value can produce weird results and should be considered B<experimental>.
3858
3859							Example:
3860
3861							my $csv = Text::AutoCSV->new(in_file => 'w.csv', encoding => 'UTF-16');
3862
3863							=item via
3864
3865							Adds a C<via> to the file opening instruction performed by Text::AutoCSV. You don't want to use it
3866							under normal circumstances.
3867
3868							The value should start with a ':' character (Text::AutoCSV won't add one for you).
3869
3870							Value by default: none
3871
3872							Example:
3873
3874							my $csv = Text::AutoCSV->new(in_file => 'in.csv', via => ':raw:perlio:UTF-32:crlf');
3875
3876							=item dont_mess_with_encoding
3877
3878							If true, just ignore completely encoding and don't try to alter I/O operations with encoding
3879							considerations (using C<binmode> instruction). Note that if inh attribute is specified, then
3880							Text::AutoCSV will consider the caller manages encoding for himself and dont_mess_with_encoding will
3881							be automatically set, too.
3882
3883							B<IMPORTANT>
3884
3885							This attribute does not mean perl will totally ignore encoding and would consider character strings
3886							as bytes for example. The meaning of L</dont_mess_with_encoding> is that Text::AutoCSV itself will
3887							totally ignore encoding matters, and leave it entirely to Perl' default.
3888
3889							Value by default:
3890
3891							0 if inh attribute is not set
3892							1 if inh attribute is set
3893
3894							Example:
3895
3896							my $csv = Text::AutoCSV->new(in_file => 'in.csv', dont_mess_with_encoding => 1);
3897
3898							=item sep_char
3899
3900							Specify the CSV separator character. Turns off separator auto-detection. This attribute is passed as
3901							is to C<Text::CSV-E<gt>new()>.
3902
3903							Example:
3904
3905							my $csv = Text::AutoCSV->new(in_file => 'in.csv', sep_char => ';');
3906
3907							=item quote_char
3908
3909							Specify the field quote character. This attribute is passed as is to C<Text::CSV-E<gt>new()>.
3910
3911							Value by default: double quote ('"')
3912
3913							Example:
3914
3915							my $csv = Text::AutoCSV->new(in_file => 'in.csv', quote_char => '\'');
3916
3917							=item escape_char
3918
3919							Specify the escape character. Turns off escape character auto-detection. This attribute is passed as
3920							is to C<Text::CSV-E<gt>new()>.
3921
3922							Value by default: backslash ('\\')
3923
3924							Example:
3925
3926							my $csv = Text::AutoCSV->new(in_file => 'in.csv', escape_char => '"');
3927
3928							=item in_csvobj
3929
3930							Text::CSV object to use.
3931							Normally you don't want to specify this attribute.
3932
3933							By default, Text::AutoCSV will manage creating such an object and will work hard to detect the
3934							parameters it requires.
3935
3936							Defining C<in_csvobj> attribute turns off separator character and escape character auto-detection.
3937
3938							Using this attribute workarounds Text::AutoCSV philosophy a bit, but you may need it in case
3939							Text::AutoCSV behavior is not suitable for Text::CSV creation.
3940
3941							Example:
3942
3943							my $tcsv = Text::CSV->new();
3944							my $acsv = Text::AutoCSV->new(in_file => 'in.csv', in_csvobj => $tcsv);
3945
3946							=item has_headers
3947
3948							If true, Text::AutoCSV assumes the input has a header line.
3949
3950							Value by default: 1
3951
3952							Example:
3953
3954							my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0);
3955
3956							=item fields_hr
3957
3958							(Only if input has a header line) Hash ref that contains column internal names along with a regular
3959							expression to find it in the header line.
3960							For example if you have:
3961
3962							my $csv = Text::AutoCSV->new(in_file => 'in.csv',
3963							fields_hr => {'PHONE OFFICE' => '^office phone nu',
3964							'PHONE PERSONAL' => '^personal phone nu'});
3965
3966							And the header line is
3967
3968							'Personal Phone Number,Office Phone Number'
3969
3970							the column name 'PHONE OFFICE' will designate the second column and the column name 'PHONE PERSONAL'
3971							will designate the first column.
3972
3973							You can choose column names like 'Phone Office' and 'Phone Personal' as well.
3974
3975							The regex search is case insensitive.
3976
3977							=item fields_ar
3978
3979							(Only if input has a header line) Array ref that contains column internal names. The array is used
3980							to create a hash ref of the same kind as L</fields_hr>, by wrapping the column name in a regex. The
3981							names are surrounded by a leading '^' and a trailing '$', meaning, the name must match the entire
3982							field name.
3983
3984							For example
3985
3986							fields_ar => ['OFFICENUMBER', 'PERSONALNUMBER']
3987
3988							is strictly equivalent to
3989
3990							fields_hr => {'OFFICENUMBER' => '^officenumber$', 'PERSONALNUMBER' = '^personalnumber$'}
3991
3992							The regex search is case insensitive.
3993
3994							C<fields_ar> is useful if the internal names are identical to the file column names. It avoids
3995							repeating the names over and over as would happen if using L</fields_hr> attribute.
3996
3997							I<NOTE>
3998
3999							You might wonder why using fields_ar as opposed to Text::AutoCSV default' mechanism. There are two
4000							reasons for that:
4001
4002							1- Text::AutoCSV removes spaces from column names, and some people may want another behavior. A
4003							header name of 'Phone Number' will get an internal column name of 'PHONENUMBER' (default behavior,
4004							if none of fields_hr, fields_ar and fields_column_names attributes is specified), and one may prefer
4005							'PHONE NUMBER' or 'phone number' or whatsoever.
4006
4007							2- By specifying a list of columns using either of fields_hr or fields_ar, you not only map column
4008							names as found in the header line to internal column names: you also I<request> these columns to be
4009							available. If one of the requested columns cannot be found, Text::AutoCSV will croak (default) or
4010							print an error and return an undef object (if created with C<croak_if_error =E<gt> 0>).
4011
4012							=item fields_column_names
4013
4014							Array ref of column internal names, in the order of columns in file. This attribute works like the
4015							C<column_names> attribute of Text::CSV. It'll just assign names to columns one by one, regardless of
4016							what the header line contains. It'll work also if the file has no header line.
4017
4018							Example:
4019
4020							my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4021							fields_column_names => ['My COL1', '', 'My COL3']);
4022
4023							=item out_file
4024
4025							Output file when executing the L</write> method.
4026
4027							If not specified or empty, write to standard output.
4028
4029							Example:
4030
4031							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv');
4032
4033							=item outh
4034
4035							File handle to write CSV data to when executing the L</write> method.
4036							Normally you don't want to specify this attribute.
4037
4038							C<outh> is useful if you don't like the way Text::AutoCSV opens the output file for you.
4039
4040							Example:
4041
4042							my $outh = open "myin.csv', ">>";
4043							my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0, outh => $outh);
4044
4045							=item out_encoding
4046
4047							Enforce the encoding of output.
4048
4049							Value by default: input encoding
4050
4051							Example:
4052
4053							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4054							out_encoding => 'UTF-16');
4055
4056							=item out_utf8_bom
4057
4058							Enforce BOM (Byte-Order-Mark) on output, when it is UTF8. If output encoding is not UTF-8, this
4059							attribute is ignored.
4060
4061							B<NOTE>
4062
4063							UTF-8 needs no BOM (there is no Byte-Order in UTF-8), and in practice, UTF8-encoded files rarely
4064							have a BOM.
4065
4066							Using this attribute is not recommended. It is provided for the sake of completeness, and also to
4067							produce Unicode files Microsoft EXCEL will be happy to read.
4068
4069							At first sight it would seem more logical to make EXCEL happy with something like this:
4070
4071							out_encoding => 'UTF-16'
4072
4073							But... While EXCEL will identify UTF-16 and read it as such, it will not take into account the BOM
4074							found at the beginning. In the end the first cell will have 2 useless characters prepended. The only
4075							solution the author knows to workaround this issue if to use UTF-8 as output encoding, and enforce a
4076							BOM. That is, use:
4077
4078							..., out_encoding => 'UTF-8', out_utf8_bom => 1, ...
4079
4080							=item out_sep_char
4081
4082							Enforce the output CSV separator character.
4083
4084							Value by default: input separator
4085
4086							Example:
4087
4088							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_sep_char => ',');
4089
4090							=item out_quote_char
4091
4092							Enforce the output CSV quote character.
4093
4094							Value by default: input quote character
4095
4096							Example:
4097
4098							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_quote_char => '"');
4099
4100							=item out_escape_char
4101
4102							Enforce the output CSV escape character.
4103
4104							Value by default: input escape character
4105
4106							Example:
4107
4108							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4109							out_escape_char_char => '\\');
4110
4111							=item out_always_quote
4112
4113							If true, quote all fields of output (set always_quote of Text::CSV).
4114
4115							If false, don't quote all fields of output (don't set C<always_quote> of Text::CSV).
4116
4117							Value by default: same as what is found in input
4118
4119							While reading input, Text::AutoCSV works out whether or not all fields were quoted. If yes, then the
4120							output Text::CSV object has the always_quote attribute set, if no, then the output Text::CSV object
4121							does not have this attribute set.
4122
4123							Example:
4124
4125							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_always_quote => 1);
4126
4127							=item out_has_headers
4128
4129							If true, when writing output, write a header line on first line.
4130
4131							If false, when writing output, don't write a header line on first line.
4132
4133							Value by default: same as has_headers attribute
4134
4135							Example 1
4136
4137							Read standard input and write to standard output, removing the header line.
4138
4139							Text::AutoCSV->new(out_has_headers => 0)->write();
4140
4141							Example 2
4142
4143							Read standard input and write to standard output, adding a header line.
4144
4145							Text::AutoCSV->new(fields_column_names => ['MYCOL1', 'MYCOL2'], out_has_headers => 1)->write();
4146
4147							=item no_undef
4148
4149							If true, non-existent column values are set to an empty string instead of undef. It is also done on
4150							extra fields that happen to have an undef value (for example when the target of a linked field is
4151							not found).
4152
4153							Note this attribute does not work on callback functions output set with L</in_map>: for example
4154							empty DateTime values (on fields identified as containing a date/time, see C<dates_*> attributes
4155							below) are set to C<undef>, even while C<no_undef> is set. Indeed setting it to an empty string
4156							while non-empty values would contain a Datetime object would not be clean. An empty value in a
4157							placeholder containing an object must be undef.
4158
4159							Since version 1.1.5 of Text::AutoCSV, C<no_undef> is examined when sending parameter ($_) to
4160							L</in_map> callback: an undef value is now passed as is (as undef), unless C<no_undef> is set. If
4161							C<no_undef> is set, and field value is undef, then $_ is set to the empty string ('') when calling
4162							callback defined by L</in_map>. This new behavior was put in place to be consistent with what is
4163							being done with DateTime values.
4164
4165							Value by default: 0
4166
4167							Example:
4168
4169							my $csv = Text::AutoCSV->new(in_file => 'in.csv', no_undef => 1);
4170
4171							=item read_post_update_hr
4172
4173							To be set to a ref sub. Each time a record is read from input, call C<read_post_update_hr> to update
4174							the hash ref of the record. The sub is called with 2 arguments: the hash ref to the record value and
4175							the hash ref to stats.
4176
4177							The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is
4178							called in verbose mode (C<verbose =E<gt> 1>).
4179
4180							For example, the C<read_post_update_hr> below will turn column 'CITY' values in upper case and count
4181							occurences of empty cities in stat display:
4182
4183							Text::AutoCSV->new(in_file => 'addresses.csv', read_post_update_hr => \&updt, verbose => 1)
4184							->write();
4185							sub updt {
4186							my ($hr, $stats) = @_;
4187							$hr->{'CITY'} =~ s/^.*$/\U$&/;
4188							$stats->{'empty city encountered'}++ if $hr->{'CITY'} eq '';
4189							}
4190
4191							B<IMPORTANT>
4192
4193							You cannot create a field this way. To create a field, you have to use the member functions
4194							L</field_add_link>, L</field_add_copy> or L</field_add_computed>.
4195
4196							B<NOTE>
4197
4198							If you wish to manage some updates at field level, consider registering update functions with
4199							L</in_map> and L</out_map> member functions. These functions register callbacks that work at field
4200							level and with $_ variable (thus the callback function invoked is AutoCSV-agnostic).
4201
4202							L</in_map> updates a field after read, L</out_map> updates the field content before writing it.
4203
4204							=item walker_hr
4205
4206							To set to a sub ref that'll be executed each time a record is read from input. It is executed after
4207							L</read_post_update_hr>. The sub is called with 2 arguments: the hash ref to the record value and
4208							the hash ref to stats.
4209
4210							Note L</read_post_update_hr> is meant for updating record fields just after reading, whereas
4211							L</walker_hr> is read-only.
4212
4213							The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is
4214							called in verbose mode (C<verbose =E<gt> 1>). If the L</verbose> attribute is not set, the stats are
4215							not displayed, however you can get stats by calling the get_stats function.
4216
4217							The example below will count in the stats the number of records where the 'CITY' field is empty.
4218							Thanks to C<verbose =E<gt> 1> attribute, at the end of reading the stats are displayed.
4219
4220							my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', walker_hr => \&walk1,
4221							verbose => 1)->read();
4222							sub walk1 {
4223							my ($hr, $stats) = @_;
4224							$stats->{'empty city'}++ if $hr->{'CITY'} eq '';
4225							}
4226
4227							=item walker_ar
4228
4229							To set to a sub ref that'll be executed each time a record is read from input. It is executed after
4230							L</read_post_update_hr>. The sub is called with 2 arguments: the array ref to the record value and
4231							the hash ref to stats.
4232
4233							Note L</read_post_update_hr> is meant for updating record fields just after reading, whereas
4234							C<walker_hr> is read-only.
4235
4236							The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is
4237							called in verbose mode (C<verbose =E<gt> 1>). If the L</verbose> attribute is not set, the stats are
4238							lost.
4239
4240							The array ref contains values in their natural order in the CSV. To be used with the column names,
4241							you have to use L</get_fields_names> member function.
4242
4243							The example below will count in the stats the number of records where the 'CITY' field is empty.
4244							Thanks to C<verbose =E<gt> 1> attribute, at the end of reading the stats are displayed. It produces
4245							the exact same result as the example in walker_hr attribute, but it uses walker_ar.
4246
4247							use List::MoreUtils qw(first_index);
4248							my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', walker_ar => \&walk2, verbose => 1);
4249							my @cols = $csv->get_fields_names();
4250							my $idxCITY = first_index { /^city$/i } @cols;
4251							die "No city field!??" if $idxCITY < 0;
4252							$csv->read();
4253							sub walk2 {
4254							my ($ar, $stats) = @_;
4255							$stats->{'empty city'}++ if $ar->[$idxCITY] eq '';
4256							}
4257
4258							=item write_filter_hr
4259
4260							Alias of L</out_filter>.
4261
4262							=item out_filter
4263
4264							To be set to a ref sub. Before writing a record to output, C<out_filter> is called and the record
4265							gets writen only if C<out_filter> return value is true. The sub is called with 1 argument: the hash
4266							ref to the record value.
4267
4268							For example, if you want to output only records where the 'CITY' column value is Grenoble:
4269
4270							Text::AutoCSV->new(in_file => 'addresses.csv', out_file => 'grenoble.csv',
4271							out_filter => \&filt)->write();
4272							sub filt {
4273							my $hr = shift;
4274							return 1 if $hr->{'CITY'} =~ /^grenoble$/i;
4275							return 0;
4276							}
4277
4278							=item write_fields
4279
4280							Alias of L</out_fields>.
4281
4282							=item out_fields
4283
4284							Set to an array ref. List fields to write to output.
4285
4286							Fields are written in their order in the array ref, the first CSV column being the first element in
4287							the array, and so on. Fields not listed in B<out_fields> are not written in output.
4288
4289							You can use empty field names to have empty columns in output.
4290
4291							Example:
4292
4293							Text::AutoCSV->new(in_file => 'allinfos.csv', out_file => 'only-addresses.csv',
4294							out_fields => [ 'NAME', 'ADDRESS' ] )->write();
4295
4296							=item search_case
4297
4298							If true, searches are case sensitive by default. Searches are done by the member functions
4299							L</search>, L</search_1hr>, L</vlookup>, and linked fields (L</field_add_link>).
4300
4301							The search functions can also be called with the option L</case>, that takes precedence over the
4302							object-level C<search_case> attribute value. See L</vlookup> help.
4303
4304							Value by default: 0 (by default searches are case insensitive)
4305
4306							Example:
4307
4308							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_case => 1);
4309
4310							=item search_trim
4311
4312							If true, searches ignore the presence of leading or trailing spaces in values.
4313
4314							The search functions can also be called with the option L</trim>, that takes precedence over the
4315							object-level C<search_trim> attribute value. See L</vlookup> help.
4316
4317							Value by default: 1 (by default searches ignore leading and trailing spaces)
4318
4319							Example:
4320
4321							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_trim => 0);
4322
4323							=item search_ignore_empty
4324
4325							If true, empty fields are not included in the search indexes.
4326
4327							The search functions can also be called with the option L</ignore_empty>, that takes precedence over
4328							the object-level C<search_ignore_empty> attribute value. See L</vlookup> help.
4329
4330							Value by default: 1 (by default, search of the value '' will find nothing)
4331
4332							Example:
4333
4334							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_empty => 0);
4335
4336							=item search_ignore_accents
4337
4338							If true, accents are ignored by search indexes.
4339
4340							The search functions can also be called with the option L</ignore_accents>, that takes precedence
4341							over the object-level C<search_ignore_accents> attribute value. See L</vlookup> help.
4342
4343							Value by default: 1 (by default, accents are ignored by search functions)
4344
4345							Example:
4346
4347							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_accents => 0);
4348
4349							=item search_value_if_not_found
4350
4351							When a search is done with a unique value to return (field_add_link member function behavior or
4352							return value of vlookup), default value of option L</value_if_not_found>. See L</vlookup>.
4353
4354							=item search_value_if_found
4355
4356							When a search is done with a unique value to return (field_add_link member function behavior or
4357							return value of vlookup), default value of option L</value_if_found>. See L</vlookup>.
4358
4359							B<IMPORTANT>
4360
4361							This attribute is extremly unusual. Once you've provided it, all vlookups and the target field value
4362							of fields created with field_add_link will all be populated with the value provided with this
4363							option.
4364
4365							Don't use it unless you know what you are doing.
4366
4367							=item search_ignore_ambiguous
4368
4369							When a search is done with a unique value to return (field_add_link member function behavior or
4370							return value of search_1hr and vlookup), default value of option L</ignore_ambiguous>. See
4371							L</vlookup>.
4372
4373							=item search_value_if_ambiguous
4374
4375							When a search is done with a unique value to return (field_add_link member function behavior or
4376							return value of vlookup), default value of option L</value_if_ambiguous>. See L</vlookup>.
4377
4378							=item fields_dates
4379
4380							Array ref of field names that contain a date.
4381
4382							Once the formats of these fields is known (auto-detection by default), each of these fields will get
4383							a specific L</in_map> sub that converts the text in a DateTime object and a L</out_map> sub that
4384							converts back from DateTime to text.
4385
4386							B<NOTE>
4387
4388							The L</out_map> given to a DateTime field is "defensive code": normally, L</in_map> converts text
4389							into a DateTime object and L</out_map> does the opposite, it takes a DateTime object and converts it
4390							to text. If ever L</out_map> encounters a value that is not a DateTime object, it'll just stringify
4391							it (evaluation in a string context), without calling its DateTime formatter.
4392
4393							If the format cannot be detected for a given field, output an error message and as always when an
4394							error occurs, croak (unless L</croak_if_error> got set to 0).
4395
4396							Value by default: none
4397
4398							Example:
4399
4400							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
4401							fields_dates => ['LASTLOGIN', 'CREATIONDATE']);
4402
4403							=item fields_dates_auto
4404
4405							Boolean value. If set to 1, will detect dates formats on all fields. Fields in which a DateTime
4406							format got detected are then managed as if they had been being listed in L</fields_dates> attribute:
4407							they get an appropriate L</in_map> sub and a L</out_map> sub to convert to and from DateTime (see
4408							L</fields_dates> attribute above).
4409
4410							C<fields_dates_auto> looks for DateTime on all fields, but it expects nothing: it won't raise an
4411							error if no field is found that contains DateTime.
4412
4413							Value by default: 0
4414
4415							Example:
4416
4417							my $csv = Text::AutoCSV->new(in_file => 'logins.csv', fields_dates_auto => 1);
4418
4419							=item dates_formats_to_try
4420
4421							Array ref of string formats.
4422
4423							Text::AutoCSV has a default built-in list of 20 date formats to try and 6 time formats (also it'll
4424							combine any date format with any time format).
4425
4426							C<dates_formats_to_try> will replace Text::AutoCSV default format-list will the one you specify, in
4427							case the default would not produce the results you expect.
4428
4429							The formats are written in Strptime format.
4430
4431							Value by default (see below about the role of the pseudo-format ''):
4432
4433							[ '',
4434							'%Y-%m-%d',
4435							'%Y.%m.%d',
4436							'%Y/%m/%d',
4437							'%m.%d.%y',
4438							'%m-%d-%Y',
4439							'%m.%d.%Y',
4440							'%m/%d/%Y',
4441							'%d-%m-%Y',
4442							'%d.%m.%Y',
4443							'%d/%m/%Y',
4444							'%m-%d-%y',
4445							'%m/%d/%y',
4446							'%d-%m-%y',
4447							'%d.%m.%y',
4448							'%d/%m/%y',
4449							'%Y%m%d%H%M%S',
4450							'%b %d, %Y',
4451							'%b %d %Y',
4452							'%b %d %T %Z %Y',
4453							'%d %b %Y',
4454							'%d %b, %Y' ]
4455
4456							B<IMPORTANT>
4457
4458							The empty format (empty string) has a special meaning: when specified, Text::AutoCSV will be able to
4459							identify fields that contain only a time (not preceeded by a date).
4460
4461							B<Note>
4462
4463							Format identification is over only when there is no more ambiguity. So the usual pitfall of US
4464							versus French dates (month-day versus day-month) gets resolved only when a date is encountered that
4465							disambiguates it (a date of 13th of the month or later).
4466
4467							Example with a weird format that uses underscores to separate elements, using either US (month, day,
4468							year), French (day, month, year), or international (year, month, day) order:
4469
4470							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
4471							dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']);
4472
4473							=item dates_formats_to_try_supp
4474
4475							Same as L</dates_formats_to_try> but instead of replacing the default list of formats used during
4476							detection, it is added to this default list.
4477
4478							You want to use this attribute if you need a specific DateTime format while continuing to benefit
4479							from the default list.
4480
4481							B<IMPORTANT>
4482
4483							Text::AutoCSV will identify a given Datetime format only when there is no ambiguity, meaning, one
4484							unique Datetime format matches (all other failed). Adding a format that already exists in the
4485							default list will prevent the format from being identified, as it'll always be ambiguous. See
4486							L</dates_formats_to_try> for the default list of formats.
4487
4488							Example:
4489
4490							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
4491							dates_formats_to_try_supp => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']);
4492
4493							=item dates_ignore_trailing_chars
4494
4495							If set to 1, DateTime auto-detection will ignore trailing text that may follow detected
4496							DateTime-like text.
4497
4498							Value by default: 1 (do ignore trailing chars)
4499
4500							my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_ignore_trailing_chars => 0);
4501
4502							=item dates_search_time
4503
4504							If set to 1, look for times when detecting DateTime format. That is, whenever a date format
4505							candidate is found, a longer candidate that also contains a time (after the date) is tested.
4506
4507							Value by default: 1 (do look for times when auto-detecting DateTime formats)
4508
4509							Example:
4510
4511							my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_search_time => 0);
4512
4513							=item dates_locales
4514
4515							Comma-separated string of locales to test when detecting DateTime formats. Ultimately, Text::AutoCSV
4516							will try all combinations of date formats, times and locales.
4517
4518							Value by default: none (use perl default locale)
4519
4520							Example:
4521
4522							my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_locales => 'fr,de,en');
4523
4524							=item dates_zeros_ok
4525
4526							Boolean. If true, a date made only of 0s is regarded as being empty.
4527
4528							For example if C<dates_zeros_ok> is False, then a date like 0000-00-00 will be always incorrect (as
4529							the day and month are out of bounds), therefore a format like '%Y-%m-%d' will never match for the
4530							field.
4531
4532							Conversely if C<dates_zeros_ok> is true, then a date like 0000-00-00 will be processed as if being
4533							the empty string, thus the detection of format will work and when parsed, this "full of zeros" dates
4534							will be processed the same way as the empty string (= value will be undef).
4535
4536							B<IMPORTANT>
4537
4538							"0s dates" are evaluated to undef when parsed, thus when converted back to text (out_map), they are
4539							set to an empty string, not to the original value.
4540
4541							Value by default: 1
4542
4543							Example:
4544
4545							my $csv = Text::AutoCSV->new(in_file => 'in.csv', dates_zeros_ok => 0);
4546
4547							=item out_dates_format
4548
4549							Enforce the format of dates in output, for all fields that contain a DateTime value.
4550
4551							The format is written in Strptime format.
4552
4553							Value by default: none (by default, use format detected on input)
4554
4555							Example:
4556
4557							# Detect any field containing a DateTime value and convert it to yyyy-mm-dd whatever the
4558							# input format is.
4559							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1,
4560							out_dates_format => '%F')->write();
4561
4562							=item out_dates_locale
4563
4564							Taken into account only if L</out_dates_format> is used.
4565
4566							Sets the locale to apply on L</out_dates_format>.
4567
4568							Value by default: none (by default, use the locale detected on input)
4569
4570							Example:
4571
4572							# Detect any field containing a DateTime value and convert it to a US DateTime whatever the
4573							# input format is.
4574							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1,
4575							out_dates_format => '%b %d, %Y, %I:%M:%S %p', out_dates_locale => 'en')->write();
4576
4577							=item croak_if_error
4578
4579							If true, stops the program execution in case of error.
4580
4581							B<IMPORTANT>
4582
4583							Value by default: 1
4584
4585							If set to zero (C<croak_if_error =E<gt> 0>), errors are displayed as warnings. This printing can
4586							then be affected by setting the L</quiet> attribute.
4587
4588							=item verbose
4589
4590							If true, get Text::AutoCSV to be a bit talkative instead of speaking only when warnings and errors
4591							occur. Verbose output is printed to STDERR by default, this can be tuned with the L</infoh>
4592							attribute.
4593
4594							Value by default: 0
4595
4596							Example:
4597
4598							my $csv = Text::AutoCSV->new(in_file => 'in.csv', verbose => 1);
4599
4600							=item infoh
4601
4602							File handle to display program's verbose output. Has effect mainly with attribute
4603							C<verbose =E<gt> 1>.
4604
4605							Note B<infoh> is used to display extra information in case of error (if a field does not exist,
4606							Text::AutoCSV will display the list of existing fields). If you don't want such output, you can set
4607							C<infoh> to undef.
4608
4609							Value by default: \*STDERR
4610
4611							Example:
4612
4613							open my $infoh, ">", "log.txt";
4614							my $csv = Text::AutoCSV->new(in_file => 'in.csv', infoh => $infoh);
4615
4616							=item quiet
4617
4618							If true, don't display warnings and errors, unless croaking.
4619
4620							If L</croak_if_error> attribute is set (as per default), still, Text::AutoCSV will produce output
4621							(on STDERR) when croaking miserably.
4622
4623							When using C<croak_if_error =E<gt> 0>, errors are processed as warnings and if L</quiet> is set (in
4624							addition to L</croak_if_error> being set to 0), there'll be no output. Note this way of working is
4625							not recommended, as things can go wrong without any notice to the caller.
4626
4627							Example:
4628
4629							my $csv = Text::AutoCSV->new(in_file => 'in.csv', quiet => 1);
4630
4631							=item one_pass
4632
4633							If true, Text::AutoCSV will perform one reading of the input. If other readings are triggered, it'll
4634							raise an error and no reading will be done. Should that be the case (you ask Text::AutoCSV to do
4635							something that'll trigger more than one reading of input), Text::AutoCSV will croak as is always the
4636							case if an error occurs.
4637
4638							Normally Text::AutoCSV will do multiple reads of input to work out certain characteristics of the
4639							CSV: guess of encoding and guess of escape character.
4640
4641							Also if member functions like L</field_add_link>, L</field_add_copy>, L</field_add_computed>,
4642							L</read> or L</write> are called after input has already been read, it'll trigger further reads as
4643							needed.
4644
4645							If one wishes a unique read of the input to occur, one_pass attribute is to be set.
4646
4647							When true, encoding will be assumed to be the first one in the provided list (L</encoding>
4648							attribute), if no encoding attribute is provided, it'll be the first one in the default list, to
4649							date, it is UTF-8.
4650
4651							When true, and if attribute L</escape_char> is not set, escape_char will be assumed to be '\\'
4652							(backslash).
4653
4654							By default, one_pass is set if inh attribute is set (caller provides the input file handle of input)
4655							or if input file is stdin (in_file attribute not set or set to an empty string).
4656
4657							Value by default:
4658
4659							0 if inh attribute is not set and in_file attribute is set to a non empty string
4660							1 if inh attribute is set or in_file is not set or set to an empty string
4661
4662							Example:
4663
4664							my $csv = Text::AutoCSV->new(in_file => 'in.csv', one_pass => 1);
4665
4666							=back
4667
4668							=head2 read
4669
4670							$csv->read();
4671
4672							Read input entirely.
4673
4674							B<Return value>
4675
4676							Returns the object itself in case of success.
4677							Returns undef if error.
4678
4679							Callback functions (when defined) are invoked, in the following order:
4680
4681							L</read_post_update_hr>, intended to do updates on fields values after each record read
4682
4683							L</walker_ar>, called after each record read, with an array ref of fields values
4684
4685							L</walker_hr>, called after each record read, with a hash ref of fields values
4686
4687							Example:
4688
4689							# Do nothing - just check CSV can be read successfully
4690							Text::AutoCSV->new(in_file => 'in.csv')->read();
4691
4692							=head2 read_all_in_mem
4693
4694							$csv->read_all_in_mem();
4695
4696							Created in version 1.1.5. Before, existed only as _read_all_in_mem, meaning, was private.
4697
4698							Read input entirely, as with L</read> function, but enforcing content to be kept in-memory.
4699
4700							Having the content kept in-memory is implied by search functions (L</vlookup> for example). With
4701							C<read_all_in_mem> you can enforce this behavior without doing a fake search.
4702
4703							=head2 reset_next_record_hr
4704
4705							$csv->reset_next_record_hr();
4706
4707							Reset the internal status to start from the beginning with L</get_next_record_hr>. Used in
4708							conjunction with L</get_next_record_hr>.
4709
4710							=head2 get_next_record_hr
4711
4712							my $hr = $csv->get_next_record_hr(\$opt_key);
4713
4714							Get the next record content as a hash ref. C<$hr> is undef when the end of records has been reached.
4715
4716							When specified, C<$opt_key> is set to the current (returned) record key.
4717
4718							B<NOTE>
4719
4720							You do not need to call L</reset_next_record_hr> once before using C<get_next_record_hr>.
4721
4722							Therefore L</reset_next_record_hr> is useful only if you wish to restart from the beginning before
4723							you've reached the end of the records.
4724
4725							B<NOTE bis>
4726
4727							L</walker_hr> allows to execute some code each time a record is read, and it better fits with
4728							Text::AutoCSV philosophy. Using a loop with C<get_next_record_hr> is primarily meant for
4729							Text::AutoCSV internal usage. Also when using this mechanism, you get very close to original
4730							Text::CSV logic, that makes Text::AutoCSV less useful.
4731
4732							B<Return value>
4733
4734							A hashref of the record, or undef once there's no more record to return.
4735
4736							Example:
4737
4738							while (my $hr = $csv->get_next_record_hr()) {
4739							say Dumper($hr);
4740							}
4741
4742							=head2 write
4743
4744							$csv->write();
4745
4746							Write input into output.
4747
4748							B<Return value>
4749
4750							Returns the object itself in case of success.
4751							Returns undef if error.
4752
4753							- If the content is not in-memory at the time write() is called:
4754
4755							Each record is read (with call of L</read_post_update_hr>, L</walker_ar> and L</walker_hr>) and then
4756							written. The read-and-write is done in sequence, each record is written to output before the next
4757							record is read from input.
4758
4759							- If the content is in-memory at the time write() is called:
4760
4761							No L</read> operation is performed, instead, records are directly written to output.
4762
4763							If defined, L</out_filter> is called for each record. If the return value of L</out_filter> is
4764							false, the record is not written.
4765
4766							Example:
4767
4768							# Copy input to output.
4769							# As CSV is parsed in-between, this copy also checks a number of characteristics about the
4770							# input, as opposed to a plain file copy operation.
4771							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')->write();
4772
4773							=head2 out_header
4774
4775							$csv->out_header($field, $header);
4776
4777							Set the header text of C<$field> to C<$header>.
4778
4779							By default, the input header value is rewritten as is to output. C<out_header> allows you to change
4780							it.
4781
4782							B<Return value>
4783
4784							Returns the object itself.
4785
4786							Example:
4787
4788							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')
4789							->out_header('LOGIN', 'Login')
4790							->out_header('FULLNAME', 'Full Name')
4791							->write();
4792
4793							=head2 print_id
4794
4795							$csv->print_id();
4796
4797							Print out a description of input. Write to \*STDERR by default or to L</infoh> attribute if set.
4798
4799							The description consists in a list of a few characteristics (CSV separator and the like) followed by
4800							the list of columns with the details of each.
4801
4802							Example of output:
4803
4804							If you go to the C<utils> directory of this module and execute the following:
4805
4806							./csvcopy.pl -i f1.csv -l "1:,A->B,f2.csv" --id
4807
4808							You will get this output:
4809
4810							-- f1.csv:
4811							sep_char: ,
4812							escape_char: \
4813							in_encoding: UTF-8
4814							is_always_quoted: no
4815
4816							# FIELD HEADER EXT DATA DATETIME FORMAT DATETIME LOCALE
4817							- ----- ------ -------- --------------- ---------------
4818							0 TIMESTAMP timestamp %Y%m%d%H%M%S
4819							1 A a
4820							2 B b
4821							3 C c
4822							4 D d %d/%m/%Y
4823							5 1:SITE 1:SITE link: f2.csv, chain: A->B->* (SITE)
4824							6 1:B 1:B link: f2.csv, chain: A->B->* (B)
4825
4826							=head2 field_add_computed
4827
4828							$csv->field_add_computed($new_field, $subref);
4829
4830							C<$new_field> is the name of the created field.
4831
4832							C<$subref> is a reference to a sub that'll calculate the new field value.
4833
4834							B<Return value>
4835
4836							Returns the object itself in case of success.
4837							Returns undef if error.
4838
4839							Add a field calculated from other fields values. The subref runs like this:
4840
4841							sub func {
4842							# $new_field is the name of the field (allows to use one subref for more than one field
4843							# calculation).
4844							# $hr is a hash ref of fields values.
4845							# $stats is a hash ref that gets printed (if Text::AutoCSV is created with verbose => 1)
4846							# in the end.
4847							my ($new_field, $hr, $stats) = @_;
4848
4849							my $field_value;
4850							# ... compute $field_value
4851
4852							return $field_value;
4853							}
4854
4855							Example:
4856
4857							# Add a field for the full name, made of the concatenation of the
4858							# first name and the last name.
4859							Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', verbose => 1)
4860							->field_add_computed('FULLNAME', \&calc_fn)->write();
4861							sub calc_fn {
4862							my ($new_field, $hr, $stats) = @_;
4863							die "Man, you are in serious trouble!" unless $new_field eq 'FULLNAME';
4864							my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'});
4865							$stats->{'empty full name'}++ if $fn eq ' ';
4866							return $fn;
4867							}
4868
4869							=head2 field_add_copy
4870
4871							$csv->field_add_copy($new_field, $src_field, $opt_subref);
4872
4873							C<$new_field> if the name of the new field.
4874
4875							C<$src_field> is the name of the field being copied.
4876
4877							C<$opt_subref> is optional. It is a reference to a sub that takes one string (the value of
4878							C<$src_field>) and returns a string (the value assigned to C<$new_field>).
4879
4880							B<Return value>
4881
4882							Returns the object itself in case of success.
4883							Returns undef if error.
4884
4885							C<field_add_copy> is a special case of L</field_add_computed>. The advantage of C<field_add_copy> is
4886							that it relies on a sub that is Text::AutoCSV "unaware", just taking one string as input and
4887							returning another string as output.
4888
4889							B<IMPORTANT>
4890
4891							The current field value is passed to C<field_add_copy> in $_.
4892
4893							A call to
4894
4895							$csv->field_add_copy($new_field, $src_field, $subref);
4896
4897							is equivalent to
4898
4899							$csv->field_add_computed($new_field, \&subref2);
4900							sub subref2 {
4901							my (undef, $hr) = @_;
4902							local $_ = $hr->{$src_field};
4903							return $subref->();
4904							}
4905
4906							Example of a field copy + pass copied field in upper case and surround content with <<>>:
4907
4908							my $csv = Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv');
4909							$csv->field_add_copy('UCLAST', 'LASTNAME', \&myfunc);
4910							$csv->write();
4911							sub myfunc { s/^.*$/<<\U$&>>/; $_; }
4912
4913							Note that the calls can be chained as most member functions return the object itself upon success.
4914							The example above is equivalent to:
4915
4916							Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv')
4917							->field_add_copy('UCLAST', 'LASTNAME', \&myfunc)
4918							->write();
4919							sub myfunc { s/^.*$/<<\U$&>>/; $_; }
4920
4921							=head2 field_add_link
4922
4923							$csv->field_add_link($new_field, $chain, $linked_file, \%opts);
4924
4925							C<$new_field> is the name of the new field.
4926
4927							C<$chain> is the CHAIN of the link, that is: 'LOCAL->REMOTE->PICK' where:
4928
4929							C<LOCAL> is the field name to read the value from.
4930
4931							C<REMOTE> is the linked field to find the value in. This field belongs to $linked_file.
4932
4933							C<PICK> is the field from which to read the value of, in the record found by the search. This field
4934							belongs to $linked_file.
4935
4936							If $new_field is undef, the new field name is the name of the third field of $chain (PICK).
4937
4938							C<$linked_file> is the name of the linked file, that gets read in a Text::AutoCSV object created
4939							on-the-fly to do the search on. C<$linked_file> can also be a Text::AutoCSV object that you created
4940							yourself, allowing for more flexibility. Example:
4941
4942							my $lcsv = Text::AutoCSV->new(in_file => 'logins.csv', case => 1);
4943							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', $lcsv);
4944
4945							C<\%opts> is a hash ref of optional attributes. The same values can be provided as with vlookup.
4946
4947							=over 4
4948
4949							=item trim
4950
4951							If set to 1, searches will ignore leading and trailing spaces. That is, a C<LOCAL> value of ' x '
4952							will match with a C<REMOTE> value of 'x'.
4953
4954							If option is not present, use L</search_value_if_not_found> attribute of object (default value: 1).
4955
4956							Example:
4957
4958							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
4959							{ trim => 0 });
4960
4961							=item case
4962
4963							If set to 1, searches will take the case into account. That is, a C<LOCAL> value of 'X' will B<not>
4964							match with a C<REMOTE> value of 'x'.
4965
4966							If option is not present, use L</search_case> attribute of object (default value: 0).
4967
4968							Example:
4969
4970							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
4971							{ case => 1 });
4972
4973							=item ignore_empty
4974
4975							If set to 1, empty values won't match. That is, a C<LOCAL> value of '' will not match with a
4976							C<REMOTE> value of ''.
4977
4978							If option is not present, use L</search_ignore_empty> attribute of object (default value: 1).
4979
4980							Example:
4981
4982							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
4983							{ ignore_empty => 0 });
4984
4985							=item value_if_not_found
4986
4987							If the searched value is not found, the value of the field is undef, that produces an empty string
4988							at write time. Instead, you can specify the value.
4989
4990							If option is not present, use L</search_value_if_not_found> attribute of object (default value:
4991							undef).
4992
4993							Example:
4994
4995							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
4996							{ value_if_not_found => '<not found!>' });
4997
4998							=item value_if_found
4999
5000							If the searched value is found, you can specify the value to return.
5001
5002							If option is not present, use L</search_value_if_found> attribute of object (default value: none).
5003
5004							B<NOTE>
5005
5006							Although the C<PICK> field is ignored when using this option, you must specify it any way.
5007
5008							Example:
5009
5010							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5011							{ value_if_not_found => '0', value_if_found => '1' });
5012
5013							=item value_if_ambiguous
5014
5015							If the searched value is found in more than one record, the value of the field is undef, that
5016							produces an empty string at write time. Instead, you can specify the value.
5017
5018							If option is not present, use L</search_value_if_ambiguous> attribute of object (default value:
5019							undef).
5020
5021							Example:
5022
5023							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5024							{ value_if_ambiguous => '<ambiguous!>' });
5025
5026							=item ignore_ambiguous
5027
5028							Boolean value. If ignore_ambiguous is true and the searched value is found in more than one record,
5029							then, silently fall back on returning the value of the first record. Obviously if
5030							C<ignore_ambiguous> is true, then the value of L</value_if_ambiguous> is ignored.
5031
5032							If option is not present, use L</search_ignore_ambiguous> attribute of object (default value: 1).
5033
5034							Example:
5035
5036							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5037							{ ignore_ambiguous => 1 });
5038
5039							Example with multiple options:
5040
5041							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5042							{ value_if_not_found => '?', ignore_ambiguous => 1 });
5043
5044							=back
5045
5046							B<Return value>
5047
5048							Returns the object itself in case of success.
5049							Returns undef if error.
5050
5051							Example of field_add_link usage:
5052
5053							my $nom_compose = 0;
5054							my $zip_not_found = 0;
5055							Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk)
5056							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read();
5057							sub walk {
5058							my $hr = shift;
5059							$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/;
5060							$zip_not_found++ unless defined($hr->{'MYCITY'});
5061							}
5062							print("Number of persons with a multi-part name: $nom_compose\n");
5063							print("Number of persons with unknown zipcode: $zip_not_found\n");
5064
5065							=head2 links
5066
5067							$csv->links($prefix, $chain, $linked_file, \%opts);
5068
5069							C<$prefix> is the name to add to joined fields
5070
5071							C<$chain> is the JOINCHAIN of the link, that is: 'LOCAL->REMOTE' where:
5072
5073							C<LOCAL> is the field name to read the value from.
5074
5075							C<REMOTE> is the linked field to find the value in. This field belongs to $linked_file.
5076
5077							As opposed to L</field_add_link>, there is no C<PICK> part, as all fields of target are read.
5078
5079							As opposed to Text::AutoCSV habits of croaking whenever a field name is duplicate, here, the
5080							duplicates are resolved by appending _2 to the joined field name if it already exists. If _2 already
5081							exists, too, then _3 is appended instead, and so on, until a non-duplicate is found. This mechanism
5082							is executed given the difficulty to control all field names when joining CSVs.
5083
5084							C<$linked_file> and C<\%opts> work exactly the same way as for L</field_add_link>, see
5085							L</field_add_link> for help.
5086
5087							B<Return value>
5088
5089							Returns the object itself in case of success.
5090							Returns undef if error.
5091
5092							B<NOTE>
5093
5094							This function used to be called C<join> but got renamed to avoid clash with perl' builtin C<join>.
5095
5096							Example:
5097
5098							Text::AutoCSV->new(in_file => 'pers.csv', out_file => 'pers_with_city.csv')
5099							->links('Read from zips.csv:', 'ZIP->ZIPCODE', 'zips.csv')->write();
5100
5101							=head2 get_in_encoding
5102
5103							my $enc = $csv->get_in_encoding();
5104
5105							Return the string of input encoding, for example 'latin2' or 'UTF-8', etc.
5106
5107							=head2 get_in_file_disp
5108
5109							my $f = $csv->get_in_file_disp();
5110
5111							Return the printable name of in_file.
5112
5113							=head2 get_sep_char
5114
5115							my $s = $csv->get_sep_char();
5116
5117							Return the string of the input CSV separator character, for example ',' or ';'.
5118
5119							=head2 get_escape_char
5120
5121							my $e = $csv->get_escape_char();
5122
5123							Return the string of the input escape character, for example '"' or '\\'.
5124
5125							=head2 get_is_always_quoted
5126
5127							my $a = $csv->get_is_always_quoted();
5128
5129							Return 1 if all fields of input are always quoted, 0 otherwise.
5130
5131							=head2 get_coldata
5132
5133							my @cd = get_coldata();
5134
5135							Return an array that describes each column, from the first one (column 0) to the last.
5136
5137							Each element of the array is itself an array ref that contains 5 elements:
5138
5139							0: Name of the field (as accessed in *_hr functions)
5140							1: Content of the field in the header line (if input has a header line)
5141							2: Column content type, shows some meta-data of fields created with field_add_* functions
5142							3: Datetime format detected, if ever, in the format Strptime
5143							4: Locale of DateTime format detected, if ever
5144
5145							=head2 get_pass_count
5146
5147							my $n = $csv->get_pass_count();
5148
5149							Return the number of input readings done. Useful only if you're interested in Text::AutoCSV
5150							internals.
5151
5152							=head2 get_in_mem_record_count
5153
5154							my $m = $csv->get_in_mem_record_count();
5155
5156							Return the number of records currently stored in-memory. Useful only if you're interested in
5157							Text::AutoCSV internals.
5158
5159							=head2 get_max_in_mem_record_count
5160
5161							my $mm = $csv->get_max_in_mem_record_count();
5162
5163							Return the maximum number of records ever stored in-memory. Indeed this number can decrease: certain
5164							functions like field_add* member-functions discard in-memory content. Useful only if you're
5165							interested in Text::AutoCSV internals.
5166
5167							=head2 get_fields_names
5168
5169							my @f = $csv->get_fields_names();
5170
5171							Return an array of the internal names of the columns.
5172
5173							=head2 get_field_name
5174
5175							my $name = $csv->get_field_name($n);
5176
5177							Return the C<$n>-th column name, the first column being number 0.
5178
5179							Example:
5180
5181							# Get the field name of the third column
5182							my $col = $csv->get_field_name(2);
5183
5184							=head2 get_stats
5185
5186							my %stats = $csv->get_stats();
5187
5188							Certain callback functions provide a parameter to record event count: L</field_add_computed>,
5189							L</read_post_update_hr>, L</walker_ar> and L</walker_hr>. By default, these stats are displayed if
5190							Text::AutoCSV got created with attribute C<verbose =E<gt> 1>. get_stats() returns the statistics
5191							hash of the object.
5192
5193							B<IMPORTANT>
5194
5195							As opposed to most functions that trigger input reading automatically (search functions and other
5196							get_* functions), C<get_stats> just returns you the stats as it is, regardless of whether some
5197							execution already occured.
5198
5199							=head2 set_walker_ar
5200
5201							$csv->set_walker_ar($subref);
5202
5203							Normally one wants to define it at object creation time using L</walker_ar> attribute.
5204							C<set_walker_ar> allows to assign the attribute walker_ar after object creation.
5205
5206							See attribute L</walker_ar> for help about the way C<$subref> should work.
5207
5208							B<Return value>
5209
5210							Returns the object itself in case of success.
5211							Returns undef if error.
5212
5213							Example:
5214
5215							# Calculate the total of the two first columns, the first column being money in and the
5216							# second one being money out.
5217							my ($actif, $passif) = (0, 0);
5218							$csv->set_walker_ar(sub { my $ar = $_[0]; $actif += $ar->[0]; $passif += $ar->[1]; })->read();
5219							print("Actif = $actif\n");
5220							print("Passif = $passif\n");
5221
5222							=head2 set_walker_hr
5223
5224							$csv->set_walker_hr($subref);
5225
5226							Normally one wants to define it at object creation time using L</walker_hr> attribute.
5227							C<set_walker_hr> allows to assign the attribute L</walker_hr> after object creation.
5228
5229							See attribute L</walker_hr> for help about the way C<$subref> should work.
5230
5231							B<Return value>
5232
5233							Returns the object itself in case of success.
5234							Returns undef if error.
5235
5236							Example:
5237
5238							my $csv = Text::AutoCSV->new(in_file => 'directory.csv', verbose => 1);
5239
5240							# ...
5241
5242							$csv->set_walker_hr(
5243							sub {
5244							my ($hr, $stat) = @_;
5245							$stat{'not capital name'}++, return if $hr->{'NAME'} ne uc($hr->{'NAME'});
5246							$stat{'name is capital letters'}++;
5247							}
5248							)->read();
5249
5250							=head2 set_out_file
5251
5252							$csv->set_out_file($out_file);
5253
5254							Normally one wants to define it at object creation time using L</out_file> attribute.
5255							C<set_out_file> allows to assign the attribute L</out_file> after object creation. It is set to
5256							C<$out_file> value.
5257
5258							B<Return value>
5259
5260							Returns the object itself in case of success.
5261							Returns undef if error.
5262
5263							Example:
5264
5265							$csv->set_out_file('mycopy.csv')->write();
5266
5267							=head2 get_keys
5268
5269							my @allkeys = $csv->get_keys();
5270
5271							Returns an array of all the record keys of input. A record key is a unique identifier that
5272							designates the record.
5273
5274							At the moment it is just an integer being the record number, the first one (that comes after the
5275							header line) being of number 0. For example if $csv input is made of one header line and 3 records
5276							(that is, a 4-line file typically, if no record contains a line break), $csv->get_keys() returns
5277
5278							(0, 1, 2)
5279
5280							B<IMPORTANT>
5281
5282							If not yet done, this function causes the input to be read entirely and stored in-memory.
5283
5284							=head2 get_hr_all
5285
5286							my @allin = $csv->get_hr_all();
5287
5288							Returns an array of all record contents of the input, each record being a hash ref.
5289
5290							B<IMPORTANT>
5291
5292							If not yet done, this function causes the input to be read entirely and stored in-memory.
5293
5294							=head2 get_row_ar
5295
5296							my $row_ar = $csv->get_row_ar($record_key);
5297
5298							Returns an array ref of the record designated by C<$record_key>.
5299
5300							Example:
5301
5302							# Get content (as array ref) of last record
5303							my @allkeys = $csv->get_keys();
5304							my $lastk = $allkeys[-1];
5305							my $lastrec_ar = $csv->get_row_ar($lastk);
5306
5307							B<IMPORTANT>
5308
5309							If not yet done, this function causes the input to be read entirely and stored in-memory.
5310
5311							=head2 get_row_hr
5312
5313							my $row_hr = $csv->get_row_hr($record_key);
5314
5315							Returns a hash ref of the record designated by C<$record_key>.
5316
5317							Example:
5318
5319							# Get content (as hash ref) of first record
5320							my @allkeys = $csv->get_keys();
5321							my $firstk = $allkeys[0];
5322							my $firstrec_hr = $csv->get_row_hr($firstk);
5323
5324							B<IMPORTANT>
5325
5326							If not yet done, this function causes the input to be read entirely and stored in-memory.
5327
5328							=head2 get_cell
5329
5330							my $val = $csv->get_cell($record_key, $field_name);
5331
5332							Return the value of the cell designated by its record key (C<$record_key>) and field name
5333							(C<$field_name>).
5334
5335							Example:
5336
5337							my @allkeys = $csv->get_keys();
5338							my $midk = $allkeys[int($#allkeys / 2)];
5339							my $midname = $csv->get_cell($midk, 'NAME');
5340
5341							Note the above example is equivalent to:
5342
5343							my @allkeys = $csv->get_keys();
5344							my $midk = $allkeys[int($#allkeys / 2)];
5345							my $midrec_hr = $csv->get_row_hr($midk);
5346							my $midname = $midrec_hr->{'NAME'};
5347
5348							B<IMPORTANT>
5349
5350							If not yet done, this function causes the input to be read entirely and stored in-memory.
5351
5352							=head2 get_values
5353
5354							my @vals = $csv->get_values($field_name, $opt_filter_subref);
5355
5356							Return an array made of the values of the given field name (C<$field_name>), for every records, in
5357							the order of the records.
5358
5359							C<$opt_filter_subref> is an optional subref. If defined, it is called with every values in turn (one
5360							call per value) and only values for which C<$opt_filter_subref> returned True are included in the
5361							returned array. Call to C<$opt_filter_subref> is done with $_ to pass the value.
5362
5363							Example:
5364
5365							my @logins = $csv->get_values('LOGIN");
5366
5367							This is equivalent to:
5368
5369							my @allkeys = $csv->get_keys();
5370							my @logins;
5371							push @logins, $csv->get_cell($_, 'LOGIN') for (@allkeys);
5372
5373							Example bis
5374
5375							# @badlogins is the list of logins that contain non alphanumeric characters
5376							my @badlogins = Text::AutoCSV->new(in_file => 'logins.csv')
5377							->get_values('LOGIN', sub { m/[^a-z0-9]/ });
5378
5379							This is equivalent to:
5380
5381							# @badlogins is the list of logins that contain non alphanumeric characters
5382							# This method leads to carrying all values of a given field across function calls...
5383							my @badlogins = grep { m/[^a-z0-9]/ } (
5384							Text::AutoCSV->new(in_file => 'logins.csv')->get_values('LOGIN')
5385							);
5386
5387							B<IMPORTANT>
5388
5389							If not yet done, this function causes the input to be read entirely and stored in-memory.
5390
5391							=head2 get_recnum
5392
5393							my $r = $csv->get_recnum();
5394
5395							Returns the current record identifier, if a reading is in progress. If no read is in progress,
5396							return undef.
5397
5398							=head2 in_map
5399
5400							=head2 read_update_after
5401
5402							C<read_update_after> is an alias of C<in_map>.
5403
5404							$csv->in_map($field, $subref);
5405
5406							After reading a record from input, update C<$field> by calling C<$subref>. The value is put in
5407							C<$_>. Then the field value is set to the return value of C<$subref>.
5408
5409							This feature is originally meant to manage DateTime fields: the input and output CSVs carry text
5410							content, and in-between, the values dealt with are DateTime objects.
5411
5412							See L</out_map> for an example.
5413
5414							=head2 out_map
5415
5416							=head2 write_update_before
5417
5418							C<write_update_before> is an alias of C<out_map>.
5419
5420							$csv->out_map($field, $subref);
5421
5422							Before writing C<$field> field content into the output file, pass it through C<out_map>. The value
5423							is put in C<$_>. Then the return value of C<$subref> is written in the output.
5424
5425							Example:
5426
5427							Suppose you have a CSV file with the convention that a number surrounded by parenthesis is negative.
5428							You can register corresponding L</in_map> and L</out_map> functions. During the processing of data,
5429							the field content will be just a number (positive or negative), while in input and in output, it'll
5430							follow the "negative under parenthesis" convention.
5431
5432							In the below example, we rely on convention above and add a new field converted from the original
5433							one, that follows the same convention.
5434
5435							sub in_updt {
5436							return 0 if !defined($_) or $_ eq '';
5437							my $i;
5438							return -$i if ($i) = $_ =~ m/^$(.*)$$/;
5439							$_;
5440							}
5441							sub out_updt {
5442							return '' unless defined($_);
5443							return '(' . (-$_) . ')' if $_ < 0;
5444							$_;
5445							}
5446							sub convert {
5447							return ;
5448							}
5449							Text::AutoCSV->new(in_file => 'trans-euros.csv', out_file => 'trans-devises.csv')
5450							->in_map('EUROS', \&in_updt)
5451							->out_map('EUROS', \&out_updt)
5452							->out_map('DEVISE', \&out_updt)
5453							->field_add_copy('DEVISE', 'EUROS', sub { sprintf("%.2f", $_ * 1.141593); } )
5454							->write();
5455
5456							=head2 search
5457
5458							my $found_ar = $csv->search($field_name, $value, \%opts);
5459
5460							Returns an array ref of all records keys where the field C<$field_name> has the value C<$value>.
5461
5462							C<\%opts> is an optional hash ref of options for the search. See help of L</vlookup> about options.
5463
5464							B<IMPORTANT>
5465
5466							An unsuccessful search returns an empty array ref, that is, [ ]. Thus you B<cannot> check for
5467							definedness of C<search> return value to know whether or not the search found something.
5468
5469							On the other hand, you can always examine the value C<search(...)-E<gt>[0]>, as search is always an
5470							array ref. If the search found nothing, then, C<search(...)-E<gt>[0]> is not defined.
5471
5472							B<IMPORTANT bis>
5473
5474							If not yet done, this function causes the input to be read entirely and stored in-memory.
5475
5476							Example:
5477
5478							my $linux_os_keys_ar = $csv->search('OS', 'linux');
5479
5480							=head2 search_1hr
5481
5482							my $found_hr = $csv->search_1hr($field_name, $value, \%opts);
5483
5484							Returns a hash ref of the first record where the field C<$field_name> has the value C<$value>.
5485
5486							C<\%opts> is an optional hash ref of options for the search. See help of L</vlookup> about options.
5487
5488							Note the options L</value_if_not_found> and L</value_if_ambiguous> are ignored. If not found, return
5489							undef. If the result is ambiguous (more than one record found) and ignore_ambiguous is set to a
5490							false value, return undef.
5491
5492							The other options are taken into account as for any search: L</ignore_ambiguous>, L</trim>,
5493							L</case>, L</ignore_empty>.
5494
5495							B<IMPORTANT>
5496
5497							As opposed to L</search>, an unsuccessful C<search_1hr> will return C<undef>.
5498
5499							B<IMPORTANT bis>
5500
5501							If not yet done, this function causes the input to be read entirely and stored in-memory.
5502
5503							Example:
5504
5505							my $hr = $csv->search_1hr('LOGIN', $login);
5506							my $full_name = $hr->{'FIRSTNAME'} . ' ' . $hr->{'LASTNAME'};
5507
5508							=head2 vlookup
5509
5510							my $val = $csv->vlookup($searched_field, $value, $target_field, \%opts);
5511
5512							Find the first record where C<$searched_field> contains C<$value> and out of this record, returns
5513							the value of C<$target_field>.
5514
5515							C<\%opts> is optional. It is a hash of options for C<vlookup>:
5516
5517							=over 4
5518
5519							=item trim
5520
5521							If true, ignore spaces before and after the values to search.
5522
5523							If option is not present, use L</search_trim> attribute of object (default value: 1).
5524
5525							=item case
5526
5527							If true, do case sensitive searches.
5528
5529							If option is not present, use L</search_case> attribute of object (default value: 0).
5530
5531							=item ignore_empty
5532
5533							If true, ignore empty values in the search. The consequence is that you won't be able to find
5534							empty values by searching it.
5535
5536							If option is not present, use L</search_ignore_empty> attribute of object (default value: 1).
5537
5538							=item ignore_accents
5539
5540							If true, ignore accents in searches. For exampe, if C<ignore_accents> is set, a string like
5541							"Ã©lÃ©mentaire" will match "elementaire".
5542
5543							If option is not present, use L</search_ignore_accents> attribute of object (default value: 1).
5544
5545							B<NOTE>
5546
5547							This option uses the function L</remove_accents> to build its internal hash tables. See
5548							L</remove_accents> help for more details.
5549
5550							=item value_if_not_found
5551
5552							Return value if vlookup finds nothing.
5553
5554							If option is not present, use L</search_value_if_not_found> attribute of object (default value:
5555							undef).
5556
5557							=item value_if_found
5558
5559							Return value if vlookup finds something.
5560
5561							If option is not present, use L</search_value_if_found> attribute of object (default value: none).
5562
5563							This option is to just check whether a value exists, regardless of the target value found.
5564
5565							B<NOTE>
5566
5567							Although the B<$target_field> is ignored when using this option, you must specify it any way.
5568
5569							=item value_if_ambiguous
5570
5571							Return value if vlookup find more than one result. Tune it only if ignore_ambiguous is unset.
5572
5573							If option is not present, use L</search_value_if_ambiguous> attribute of object (default value:
5574							undef).
5575
5576							=item ignore_ambiguous
5577
5578							If true, then if more than one result is found, silently return the first one.
5579
5580							If option is not present, use L</search_ignore_ambiguous> attribute of object (default value: 1).
5581
5582							=back
5583
5584							B<IMPORTANT>
5585
5586							If not yet done, this function causes the input to be read entirely and stored in-memory.
5587
5588							Example:
5589
5590							my $name = $csv->vlookup('LOGIN', $id, 'NAME', { value_if_not_found => '<login not found>' });
5591
5592							=head2 remove_accents
5593
5594							my $t = $csv->remove_accents($s);
5595
5596							Take the string C<$s> as argument and return the string without accents. Uses a Unicode
5597							decomposition followed by removal of every characters that have the Unicode property
5598							C<Nonspacing_Mark>.
5599
5600							B<NOTE>
5601
5602							Only accents are removed. It is not a C<whatever-encoding -E<gt> us-ascii> conversion. For example,
5603							the French B<Å“> character (o followed by e) or the German B<ÃŸ> (eszett) are kept as is.
5604
5605							B<NOTE bis>
5606
5607							Tested with some latin1 and latin2 characters.
5608
5609							B<NOTE ter>
5610
5611							There is no language-level transformation during accents removal. For example B<JÃ¼rgen> is returned
5612							as B<Jurgen>, not B<Juergen>.
5613
5614							This function is not exported by default.
5615
5616							Example:
5617
5618							use Text::AutoCSV qw(remove_accents);
5619							my $s = remove_accents("FranÃ§ais: Ãªtre Ã©lÃ©mentaire, TchÃ¨que: sluÅ¾ba dÅ¯m");
5620							die "This script will never die" if $s ne 'Francais: etre elementaire, Tcheque: sluzba dum';
5621
5622							=head1 AUTHOR
5623
5624							SÃ©bastien Millet <milletseb@laposte.net>
5625
5626							=head1 COPYRIGHT AND LICENSE
5627
5628							This software is copyright (c) 2016, 2017 by SÃ©bastien Millet.
5629
5630							This is free software; you can redistribute it and/or modify it under
5631							the same terms as the Perl 5 programming language system itself.
5632
5633							=cut