File Coverage

blib/lib/Spreadsheet/Read.pm

Criterion	Covered	Total	%
statement	458	539	84.9
branch	394	528	74.6
condition	156	224	69.6
subroutine	46	46	100.0
pod	13	13	100.0
total	1067	1350	79.0

line	stmt	bran	cond	sub	pod	time	code
1							#!/pro/bin/perl
2
3							package Spreadsheet::Read;
4
5							=head1 NAME
6
7							Spreadsheet::Read - Read the data from a spreadsheet
8
9							=head1 SYNOPSIS
10
11							use Spreadsheet::Read;
12							my $book = ReadData ("test.csv", sep => ";");
13							my $book = ReadData ("test.sxc");
14							my $book = ReadData ("test.ods");
15							my $book = ReadData ("test.xls");
16							my $book = ReadData ("test.xlsx");
17							my $book = ReadData ($fh, parser => "xls");
18
19							Spreadsheet::Read::add ($book, "sheet.csv");
20
21							my $sheet = $book->[1]; # first datasheet
22							my $cell = $book->[1]{A3}; # content of field A3 of sheet 1
23							my $cell = $book->[1]{cell}[1][3]; # same, unformatted
24
25							# OO API
26							my $book = Spreadsheet::Read->new ("file.csv");
27							my $sheet = $book->sheet (1);
28							my $cell = $sheet->cell ("A3");
29							my $cell = $sheet->cell (1, 3);
30
31							$book->add ("test.xls");
32
33							=cut
34
35	53			53		5239645	use 5.8.1;
	53					714
36	53			53		297	use strict;
	53					109
	53					1255
37	53			53		281	use warnings;
	53					120
	53					3448
38
39							our $VERSION = "0.81";
40	1			1	1	131	sub Version { $VERSION }
41
42	53			53		400	use Carp;
	53					152
	53					3471
43	53			53		379	use Exporter;
	53					113
	53					4949
44							our @ISA = qw( Exporter );
45							our @EXPORT = qw( ReadData cell2cr cr2cell );
46							our @EXPORT_OK = qw( parses rows cellrow row add );
47
48	53			53		29800	use Encode qw( decode );
	53					826256
	53					4470
49	53			53		47041	use File::Temp qw( );
	53					1145819
	53					1811
50	53			53		33881	use Data::Dumper;
	53					332531
	53					56844
51
52							my @parsers = (
53							[ csv => "Text::CSV_XS", "0.71" ],
54							[ csv => "Text::CSV_PP", "1.17" ],
55							[ csv => "Text::CSV", "1.17" ],
56							[ ods => "Spreadsheet::ReadSXC", "0.20" ],
57							[ sxc => "Spreadsheet::ReadSXC", "0.20" ],
58							[ xls => "Spreadsheet::ParseExcel", "0.34" ],
59							[ xlsx => "Spreadsheet::ParseXLSX", "0.24" ],
60							[ xlsx => "Spreadsheet::XLSX", "0.13" ],
61							[ prl => "Spreadsheet::Perl", "" ],
62
63							# Helper modules
64							[ ios => "IO::Scalar", "" ],
65							[ dmp => "Data::Peek", "" ],
66							);
67							my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers });
68							foreach my $p (@parsers) {
69							my $format = $p->[0];
70							$can{$format} and next;
71							$can{$format} = "";
72							my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next;
73							my $min_version = $can{supports}{$preset};
74							unless ($min_version) {
75							# Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle"
76							$can{$format} = "!$preset is not supported for the $format format";
77							next;
78							}
79							if (eval "local \$_; require $preset" and not $@) {
80							# forcing a parser should still check the version
81							my $ok;
82							my $has = $preset->VERSION;
83							$has =~ s/_[0-9]+$//; # Remove beta-part
84							if ($min_version =~ m/^v([0-9.]+)/) { # clumsy versions
85							my @min = split m/\./ => $1;
86							$has =~ s/^v//;
87							my @has = split m/\./ => $has;
88							$ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >=
89							(($min[0] * 1000 + $min[1]) * 1000 + $min[2]);
90							}
91							else { # normal versions
92							$ok = $has >= $min_version;
93							}
94							$ok or $preset = "!$preset";
95							}
96							else {
97							$preset = "!$preset";
98							}
99							$can{$format} = $preset;
100							}
101							delete $can{supports};
102							for (@parsers) {
103							my ($flag, $mod, $vsn) = @$_;
104							$can{$flag} and next;
105							eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" or
106							$_->[0] = "! Cannot use $mod version $vsn: $@";
107							}
108							$can{sc} = __PACKAGE__; # SquirelCalc is built-in
109
110							defined $Spreadsheet::ParseExcel::VERSION && $Spreadsheet::ParseExcel::VERSION < 0.61 and
111							*Spreadsheet::ParseExcel::Workbook::get_active_sheet = sub { undef; };
112
113							my $debug = 0;
114							my %def_opts = (
115							rc => 1,
116							cells => 1,
117							attr => 0,
118							clip => undef, # $opt{cells};
119							strip => 0,
120							pivot => 0,
121							dtfmt => "yyyy-mm-dd", # Format 14
122							debug => 0,
123							passwd => undef,
124							parser => undef,
125							sep => undef,
126							quote => undef,
127							label => undef,
128							);
129							my @def_attr = (
130							type => "text",
131							fgcolor => undef,
132							bgcolor => undef,
133							font => undef,
134							size => undef,
135							format => undef,
136							halign => "left",
137							valign => "top",
138							bold => 0,
139							italic => 0,
140							uline => 0,
141							wrap => 0,
142							merged => 0,
143							hidden => 0,
144							locked => 0,
145							enc => "utf-8", # $ENV{LC_ALL} // $ENV{LANG} // ...
146							formula => undef,
147							);
148
149							# Helper functions
150
151							sub _dump {
152	7			7		19	my ($label, $ref) = @_;
153	7	50				23	if ($can{dmp}) {
154	7					48	print STDERR Data::Peek::DDumper ({ $label => $ref });
155							}
156							else {
157	0					0	print STDERR Data::Dumper->Dump ([$ref], [$label]);
158							}
159							} # _dump
160
161							sub _parser {
162	342	100		342		1460	my $type = shift or return "";
163	100					308	$type = lc $type;
164							# Aliases and fullnames
165	100	100				343	$type eq "excel" and return "xls";
166	99	50				302	$type eq "excel2007" and return "xlsx";
167	99	100				307	$type eq "oo" and return "sxc";
168	98	100				296	$type eq "ods" and return "sxc";
169	96	100				292	$type eq "openoffice" and return "sxc";
170	95	50				291	$type eq "libreoffice" and return "sxc";
171	95	100				265	$type eq "perl" and return "prl";
172	94	50				261	$type eq "squirelcalc" and return "sc";
173	94	100				558	return exists $can{$type} ? $type : "";
174							} # _parser
175
176							sub new {
177	12			12	1	6892	my $class = shift;
178	12					49	my $r = ReadData (@_);
179	12	100				76	unless ($r) {
180	2	50				17	@_ and return; # new with arguments failed to open resource
181	0					0	$r = [{
182							parsers => [],
183							error => undef,
184							sheets => 0,
185							sheet => { },
186							}];
187							}
188	10					212	bless $r => $class;
189							} # new
190
191							# Spreadsheet::Read::parses ("csv") or die "Cannot parse CSV"
192							sub parses {
193	63	100		63	1	145584	ref $_[0] eq __PACKAGE__ and shift;
194	63	100				277	my $type = _parser (shift) or return 0;
195	61	100				407	if ($can{$type} =~ m/^!\s(.)/) {
196	8					32	$@ = $1;
197	8					28	return 0;
198							}
199	53					183	return $can{$type};
200							} # parses
201
202							sub sheets {
203	4			4	1	14	my $ctrl = shift->[0];
204	4					6	my %s = %{$ctrl->{sheet}};
	4					19
205	4	100				35	wantarray ? sort { $s{$a} <=> $s{$b} } keys %s : $ctrl->{sheets};
	4					26
206							} # sheets
207
208							# col2label (4) => "D"
209							sub col2label {
210	15697	100		15697	1	32919	ref $_[0] eq __PACKAGE__ and shift;
211	15697					20483	my $c = shift;
212	15697	100	100			41042	defined $c && $c > 0 or return "";
213	15693					21423	my $cell = "";
214	15693					26123	while ($c) {
215	53			53		30000	use integer;
	53					836
	53					301
216
217	15710					29729	substr $cell, 0, 0, chr (--$c % 26 + ord "A");
218	15710					27647	$c /= 26;
219							}
220	15693					42026	$cell;
221							} # col2label
222
223							# cr2cell (4, 18) => "D18"
224							# No prototype to allow 'cr2cell (@rowcol)'
225							sub cr2cell {
226	15495	100		15495	1	189546	ref $_[0] eq __PACKAGE__ and shift;
227	15495					25206	my ($c, $r) = @_;
228	15495	100	100			72297	defined $c && defined $r && $c > 0 && $r > 0 or return "";
			100
			100
229	15489					24027	col2label ($c) . $r;
230							} # cr2cell
231
232							# cell2cr ("D18") => (4, 18)
233							sub cell2cr {
234	604	100		604	1	610707	ref $_[0] eq __PACKAGE__ and shift;
235	604	100	100			5633	my ($cc, $r) = (uc ($_[0]\|\|"") =~ m/^([A-Z]+)([0-9]+)$/) or return (0, 0);
236	600					1241	my $c = 0;
237	600					2499	while ($cc =~ s/^([A-Z])//) {
238	608					2497	$c = 26 * $c + 1 + ord ($1) - ord ("A");
239							}
240	600					2112	($c, $r);
241							} # cell2cr
242
243							# my @row = cellrow ($book->[1], 1);
244							# my @row = $book->cellrow (1, 1);
245							sub cellrow {
246	10	100		10	1	69	my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
		100
247	9	100	66			65	ref $sheet eq "HASH" && exists $sheet->{cell} or return;
248	7	50	33			39	exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
249	7	100				26	my $row = shift or return;
250	6	100	100			39	$row > 0 && $row <= $sheet->{maxrow} or return;
251	4					12	my $s = $sheet->{cell};
252	4					17	map { $s->[$_][$row] } 1..$sheet->{maxcol};
	76					147
253							} # cellrow
254
255							# my @row = row ($book->[1], 1);
256							# my @row = $book->row (1, 1);
257							sub row {
258	10	100		10	1	3508	my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
		100
259	9	100	66			65	ref $sheet eq "HASH" && exists $sheet->{cell} or return;
260	7	50	33			39	exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
261	7	100				48	my $row = shift or return;
262	6	100	100			40	$row > 0 && $row <= $sheet->{maxrow} or return;
263	4					23	map { $sheet->{cr2cell ($_, $row)} } 1..$sheet->{maxcol};
	76					134
264							} # row
265
266							# Convert {cell}'s [column][row] to a [row][column] list
267							# my @rows = rows ($book->[1]);
268							sub rows {
269	33	50		33	1	19369	my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
		100
270	30	100	100			169	ref $sheet eq "HASH" && exists $sheet->{cell} or return;
271	26	100	100			120	exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
272	5					15	my $s = $sheet->{cell};
273
274							map {
275	20					33	my $r = $_;
276	20					36	[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
	80					205
277	5					28	} 1..$sheet->{maxrow};
278							} # rows
279
280							sub sheet {
281	23			23	1	650	my ($book, $sheet) = @_;
282	23	100	100			116	$book && $sheet or return;
283	21					43	my $class = "Spreadsheet::Read::Sheet";
284							$sheet =~ m/^[0-9]+$/ && $sheet >= 1 && $sheet <= $book->[0]{sheets} and
285	21	100	66			301	return bless $book->[$sheet] => $class;
			100
286							exists $book->[0]{sheet}{$sheet} and
287	6	100				26	return bless $book->[$book->[0]{sheet}{$sheet}] => $class;
288	5					17	foreach my $idx (1 .. $book->[0]{sheets}) {
289	6	100				24	$book->[$idx]{label} eq $sheet and
290							return bless $book->[$idx] => $class;
291							}
292	4					19	return;
293							} # sheet
294
295							# If option "clip" is set, remove the trailing rows and
296							# columns in each sheet that contain no visible data
297							sub _clipsheets {
298	227			227		616	my ($opt, $ref) = @_;
299
300	227	50				766	$ref->[0]{sheets} or return $ref;
301
302	227					610	my ($rc, $cl) = ($opt->{rc}, $opt->{cells});
303	227					646	my ($oc, $os, $oa) = ($opt->{clip}, $opt->{strip}, $opt->{attr});
304
305							# Strip leading/trailing spaces
306	227	100	100			1020	if ($os \|\| $oc) {
307	212					703	foreach my $sheet (1 .. $ref->[0]{sheets}) {
308	254					659	$ref->[$sheet]{indx} = $sheet;
309	254					448	my $ss = $ref->[$sheet];
310	254	100	66			1331	$ss->{maxrow} && $ss->{maxcol} or next;
311	247					587	my ($mc, $mr) = (0, 0);
312	247					656	foreach my $row (1 .. $ss->{maxrow}) {
313	1954					3372	foreach my $col (1 .. $ss->{maxcol}) {
314	16636	100				26429	if ($rc) {
315	14738	100				26967	defined $ss->{cell}[$col][$row] or next;
316	5881	100				10285	$os & 2 and $ss->{cell}[$col][$row] =~ s/\s+$//;
317	5881	100				9764	$os & 1 and $ss->{cell}[$col][$row] =~ s/^\s+//;
318	5881	100				10671	if (length $ss->{cell}[$col][$row]) {
319	4417	100				7531	$col > $mc and $mc = $col;
320	4417	100				7463	$row > $mr and $mr = $row;
321							}
322							}
323	7779	100				12465	if ($cl) {
324	7560					11687	my $cell = cr2cell ($col, $row);
325	7560	100				15679	defined $ss->{$cell} or next;
326	6233	100				11280	$os & 2 and $ss->{$cell} =~ s/\s+$//;
327	6233	100				10084	$os & 1 and $ss->{$cell} =~ s/^\s+//;
328	6233	100				11221	if (length $ss->{$cell}) {
329	4757	100				7763	$col > $mc and $mc = $col;
330	4757	100				9090	$row > $mr and $mr = $row;
331							}
332							}
333							}
334							}
335
336	247	100	66			1430	$oc && ($mc < $ss->{maxcol} \|\| $mr < $ss->{maxrow}) or next;
			66
337
338							# Remove trailing empty columns
339	106					277	foreach my $col (($mc + 1) .. $ss->{maxcol}) {
340	243	100				627	$rc and undef $ss->{cell}[$col];
341	243	100				518	$oa and undef $ss->{attr}[$col];
342	243	100				462	$cl or next;
343	193					333	my $c = col2label ($col);
344	193					1424	delete $ss->{"$c$_"} for 1 .. $ss->{maxrow};
345							}
346
347							# Remove trailing empty rows
348	106					285	foreach my $row (($mr + 1) .. $ss->{maxrow}) {
349	139					271	foreach my $col (1 .. $mc) {
350	64	50				143	$cl and delete $ss->{cr2cell ($col, $row)};
351	64	50				183	$rc and undef $ss->{cell} [$col][$row];
352	64	50				152	$oa and undef $ss->{attr} [$col][$row];
353							}
354							}
355
356	106					330	($ss->{maxrow}, $ss->{maxcol}) = ($mr, $mc);
357							}
358							}
359
360	227	100				657	if ($opt->{pivot}) {
361	1					5	foreach my $sheet (1 .. $ref->[0]{sheets}) {
362	1					4	my $ss = $ref->[$sheet];
363	1	0	33			4	$ss->{maxrow} \|\| $ss->{maxcol} or next;
364	1	50				6	my $mx = $ss->{maxrow} > $ss->{maxcol} ? $ss->{maxrow} : $ss->{maxcol};
365	1					5	foreach my $row (2 .. $mx) {
366	18					40	foreach my $col (1 .. ($row - 1)) {
367							$opt->{rc} and
368							($ss->{cell}[$col][$row], $ss->{cell}[$row][$col]) =
369	171	50				527	($ss->{cell}[$row][$col], $ss->{cell}[$col][$row]);
370							$opt->{cells} and
371							($ss->{cr2cell ($col, $row)}, $ss->{cr2cell ($row, $col)}) =
372	171	50				352	($ss->{cr2cell ($row, $col)}, $ss->{cr2cell ($col, $row)});
373							}
374							}
375	1					9	($ss->{maxcol}, $ss->{maxrow}) = ($ss->{maxrow}, $ss->{maxcol});
376							}
377							}
378
379	227					5657	$ref;
380							} # _clipsheets
381
382							# Convert a single color (index) to a color
383							sub _xls_color {
384	1742			1742		21196	my $clr = shift;
385	1742	100				3558	defined $clr or return undef;
386	850	50				1532	$clr eq "#000000" and return undef;
387	850	50				1689	$clr =~ m/^#[0-9a-fA-F]+$/ and return lc $clr;
388	850	100	66			2901	$clr == 0 \|\| $clr == 32767 and return undef; # Default fg color
389	468					1142	return "#" . lc Spreadsheet::ParseExcel->ColorIdxToRGB ($clr);
390							} # _xls_color
391
392							# Convert a fill [ $pattern, $front_color, $back_color ] to a single background
393							sub _xls_fill {
394	1526			1526		2743	my ($p, $fg, $bg) = @_;
395	1526	50				2702	defined $p or return undef;
396	1526	50				2759	$p == 32767 and return undef; # Default fg color
397	1526	100	100			17004	$p == 0 && !defined $bg and return undef; # No fill bg color
398	634	100				1152	$p == 1 and return _xls_color ($fg);
399	418	50	33			6280	$bg < 8 \|\| $bg > 63 and return undef; # see Workbook.pm#106
400	0					0	return _xls_color ($bg);
401							} # _xls_fill
402
403							sub ReadData {
404	279	100		279	1	301347	my $txt = shift or return;
405
406	276					637	my %opt;
407	276	100				856	if (@_) {
408	221	100				1175	if (ref $_[0] eq "HASH") { %opt = %{shift @_} }
	20	50				35
	20					86
409	201					836	elsif (@_ % 2 == 0) { %opt = @_ }
410							}
411
412	276	100				1205	exists $opt{rc} or $opt{rc} = $def_opts{rc};
413	276	100				968	exists $opt{cells} or $opt{cells} = $def_opts{cells};
414	276	100				899	exists $opt{attr} or $opt{attr} = $def_opts{attr};
415	276	100				796	exists $opt{clip} or $opt{clip} = $opt{cells};
416	276	100				839	exists $opt{strip} or $opt{strip} = $def_opts{strip};
417	276	100				954	exists $opt{dtfmt} or $opt{dtfmt} = $def_opts{dtfmt};
418
419							# $debug = $opt{debug} // 0;
420	276	100				808	$debug = defined $opt{debug} ? $opt{debug} : $def_opts{debug};
421	276	100				745	$debug > 4 and _dump (Options => \%opt);
422
423	31					105	my %parser_opts = map { $_ => $opt{$_} }
424	276					2651	grep { !exists $def_opts{$_} }
	1764					3624
425							keys %opt;
426
427	276					1162	my $_parser = _parser ($opt{parser});
428
429	276	100				1146	my $io_ref = ref ($txt) =~ m/GLOB\|IO/ ? $txt : undef;
430							my $io_fil = $io_ref ? 0 : $txt =~ m/\0/ ? 0
431	53	100		53		115311	: do { no warnings "newline"; -f $txt };
	53	100				175
	53					282478
	276					1062
	255					6142
432	276	100	100			1781	my $io_txt = $io_ref \|\| $io_fil ? 0 : 1;
433
434	276	100	100			3276	$io_fil && ! -s $txt and do { $@ = "$txt is empty"; return };
	11					60
	11					65
435	265	100	100			1540	$io_ref && eof $txt and do { $@ = "Empty stream"; return };
	5					20
	5					86
436
437	260	100	100			2194	if ($opt{parser} ? $_parser eq "csv" : ($io_fil && $txt =~ m/\.(csv)$/i)) {
		100
438	106	50				378	$can{csv} or croak "CSV parser not installed";
439
440	106	100				410	my $label = defined $opt{label} ? $opt{label} : $io_fil ? $txt : "IO";
		100
441
442	106	50				411	$debug and print STDERR "Opening CSV $label using $can{csv}-", $can{csv}->VERSION, "\n";
443
444							my @data = (
445							{ type => "csv",
446							parser => $can{csv},
447							version => $can{csv}->VERSION,
448							parsers => [ {
449							type => "csv",
450							parser => $can{csv},
451							version => $can{csv}->VERSION,
452	106					3423	}],
453							error => undef,
454							quote => '"',
455							sepchar => ',',
456							sheets => 1,
457							sheet => { $label => 1 },
458							},
459							{ parser => 0,
460							label => $label,
461							maxrow => 0,
462							maxcol => 0,
463							cell => [],
464							attr => [],
465							merged => [],
466							active => 1,
467							},
468							);
469
470	106					560	my ($sep, $quo, $in) = (",", '"');
471	106	100				332	defined $opt{sep} and $sep = $opt{sep};
472	106	100				281	defined $opt{quote} and $quo = $opt{quote};
473	106	50				311	$debug > 8 and _dump (debug => {
474							data => \@data, txt => $txt, io_ref => $io_ref, io_fil => $io_fil });
475	106	100				322	if ($io_fil) {
		100
		100
		50
476	94	100	100			306	unless (defined $opt{quote} && defined $opt{sep}) {
477	87	50				3736	open $in, "<", $txt or return;
478	87					1831	my $l1 = <$in>;
479
480	87	100				439	$quo = defined $opt{quote} ? $opt{quote} : '"';
481							$sep = # If explicitly set, use it
482							defined $opt{sep} ? $opt{sep} :
483							# otherwise start auto-detect with quoted strings
484	87	50				913	$l1 =~ m/["0-9];["0-9;]/ ? ";" :
		100
		100
		50
		50
		50
		100
		100
		50
485							$l1 =~ m/["0-9],["0-9,]/ ? "," :
486							$l1 =~ m/["0-9]\t["0-9,]/ ? "\t" :
487							$l1 =~ m/["0-9]\\|["0-9,]/ ? "\|" :
488							# If neither, then for unquoted strings
489							$l1 =~ m/\w;[\w;]/ ? ";" :
490							$l1 =~ m/\w,[\w,]/ ? "," :
491							$l1 =~ m/\w\t[\w,]/ ? "\t" :
492							$l1 =~ m/\w\\|[\w,]/ ? "\|" :
493							"," ;
494	87					1348	close $in;
495							}
496	94	50				2882	open $in, "<", $txt or return;
497							}
498							elsif ($io_ref) {
499	10					23	$in = $txt;
500							}
501							elsif (ref $txt eq "SCALAR") {
502	1	50				13	open $in, "<", $txt or croak "Cannot open input: $!";
503							}
504							elsif ($txt =~ m/[\r\n,;]/) {
505	1	50		2		54	open $in, "<", \$txt or croak "Cannot open input: $!";
	2					23
	2					5
	2					18
506							}
507							else {
508	0					0	warn "Input type ", ref $txt,
509							" might not be supported. Please file a ticket\n";
510	0					0	$in = $txt; # Now pray ...
511							}
512	106	50				1409	$debug > 1 and print STDERR "CSV sep_char '$sep', quote_char '$quo'\n";
513							my $csv = $can{csv}->new ({
514							%parser_opts,
515
516							sep_char => ($data[0]{sepchar} = $sep),
517	106	50				1555	quote_char => ($data[0]{quote} = $quo),
518							keep_meta_info => 1,
519							binary => 1,
520							auto_diag => 1,
521							}) or croak "Cannot create a csv ('$sep', '$quo') parser!";
522
523	106					23943	while (my $row = $csv->getline ($in)) {
524	413	50				70451	my @row = @$row or last;
525
526	413					948	my $r = ++$data[1]{maxrow};
527	413	100				1070	@row > $data[1]{maxcol} and $data[1]{maxcol} = @row;
528	413					1121	foreach my $c (0 .. $#row) {
529	2427					3465	my $val = $row[$c];
530	2427					4442	my $cell = cr2cell ($c + 1, $r);
531	2427	100				6865	$opt{rc} and $data[1]{cell}[$c + 1][$r] = $val;
532	2427	100				6275	$opt{cells} and $data[1]{$cell} = $val;
533	2427	50				12353	$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
534							}
535							}
536	106	50				14838	$csv->eof () or $data[0]{error} = [ $csv->error_diag ];
537	106					1659	close $in;
538
539	106					367	for (@{$data[1]{cell}}) {
	106					547
540	734	100				1679	defined or $_ = [];
541							}
542	106					571	return _clipsheets \%opt, [ @data ];
543							}
544
545	154	100				466	if ($io_txt) { # && $_parser !~ m/^xlsx?$/) {
546	29	100	66			435	if ( # /etc/magic: Microsoft Office Document
		50	66
		100	66
547							$txt =~ m{\A(\376\067\0\043
548							\|\320\317\021\340\241\261\032\341
549							\|\333\245-\0\0\0)}x
550							# /usr/share/misc/magic
551							\|\| $txt =~ m{\A.{2080}Microsoft Excel 5.0 Worksheet}
552							\|\| $txt =~ m{\A\x09\x04\x06\x00\x00\x00\x10\x00}
553							) {
554	1	50				7	$can{xls} or croak "Spreadsheet::ParseExcel not installed";
555	1					2	my $tmpfile;
556	1	50				4	if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
557	1					3	$tmpfile = \$txt;
558							}
559							else {
560	0					0	$tmpfile = File::Temp->new (SUFFIX => ".xls", UNLINK => 1);
561	0					0	binmode $tmpfile;
562	0					0	print $tmpfile $txt;
563	0					0	close $tmpfile;
564							}
565	1	50				59	open $io_ref, "<", $tmpfile or do { $@ = $!; return };
	0					0
	0					0
566	1					1005	$io_txt = 0;
567	1					6	$_parser = _parser ($opt{parser} = "xls");
568							}
569							elsif ( # /usr/share/misc/magic
570							$txt =~ m{\APK\003\004.{4,30}(?:\[Content_Types\]\.xml\|_rels/\.rels)}
571							) {
572	0	0				0	$can{xlsx} or croak "XLSX parser not installed";
573	0					0	my $tmpfile;
574	0	0				0	if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
575	0					0	$tmpfile = \$txt;
576							}
577							else {
578	0					0	$tmpfile = File::Temp->new (SUFFIX => ".xlsx", UNLINK => 1);
579	0					0	binmode $tmpfile;
580	0					0	print $tmpfile $txt;
581	0					0	close $tmpfile;
582							}
583	0	0				0	open $io_ref, "<", $tmpfile or do { $@ = $!; return };
	0					0
	0					0
584	0					0	$io_txt = 0;
585	0					0	$_parser = _parser ($opt{parser} = "xlsx");
586							}
587							elsif (!$io_ref && $txt =~ m/\.xlsx?$/i) {
588	3					17	$@ = "Cannot open $txt as file";
589	3					50	return;
590							}
591							}
592	151	100	66			1561	if ($opt{parser} ? $_parser =~ m/^xlsx?$/
		100
593							: ($io_fil && $txt =~ m/\.(xlsx?)$/i && ($_parser = $1))) {
594	75	100				388	my $parse_type = $_parser =~ m/x$/i ? "XLSX" : "XLS";
595	75	50				350	my $parser = $can{lc $parse_type} or
596							croak "Parser for $parse_type is not installed";
597							$debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt,
598	75	50				390	" using $parser-", $can{lc $parse_type}->VERSION, "\n";
		100
599	75	50				260	$opt{passwd} and $parser_opts{Password} = $opt{passwd};
600	75					265	my $oBook = eval {
601							$io_ref
602							? $parse_type eq "XLSX"
603							? $can{xlsx} =~ m/::XLSX$/
604							? $parser->new ($io_ref)
605							: $parser->new (%parser_opts)->parse ($io_ref)
606							: $parser->new (%parser_opts)->Parse ($io_ref)
607							: $parse_type eq "XLSX"
608	75	0				991	? $can{xlsx} =~ m/::XLSX$/
		50
		50
		100
		100
609							? $parser->new ($txt)
610							: $parser->new (%parser_opts)->parse ($txt)
611							: $parser->new (%parser_opts)->Parse ($txt);
612							};
613	75	50				2298301	unless ($oBook) {
614							# cleanup will fail on folders with spaces.
615	0					0	(my $msg = $@) =~ s/ at \S+ line \d+.*//s;
616	0					0	croak "$parse_type parser cannot parse data: $msg";
617							}
618	75	50				319	$debug > 8 and _dump (oBook => $oBook);
619
620							# WorkBook keys:
621							# aColor _CurSheet Format SheetCount
622							# ActiveSheet _CurSheet_ FormatStr _skip_chart
623							# Author File NotSetCell _string_contin
624							# BIFFVersion Flg1904 Object Version
625							# _buffer FmtClass PkgStr Worksheet
626							# CellHandler Font _previous_info
627
628							my @data = ( {
629							type => lc $parse_type,
630							parser => $can{lc $parse_type},
631							version => $can{lc $parse_type}->VERSION,
632							parsers => [{
633							type => lc $parse_type,
634							parser => $can{lc $parse_type},
635							version => $can{lc $parse_type}->VERSION,
636							}],
637							error => undef,
638	75		50			3035	sheets => $oBook->{SheetCount} \|\| 0,
639							sheet => {},
640							} );
641							# $debug and $data[0]{_parser} = $oBook;
642							# Overrule the default date format strings
643							my %def_fmt = (
644							0x0E => lc $opt{dtfmt}, # m-d-yy
645	75					718	0x0F => "d-mmm-yyyy", # d-mmm-yy
646							0x11 => "mmm-yyyy", # mmm-yy
647							0x16 => "yyyy-mm-dd hh:mm", # m-d-yy h:mm
648							);
649	75					627	$oBook->{FormatStr}{$_} = $def_fmt{$_} for keys %def_fmt;
650							my $oFmt = $parse_type eq "XLSX"
651	75	50				823	? $can{xlsx} =~ m/::XLSX$/
		100
652							? Spreadsheet::XLSX::Fmt2007->new
653							: Spreadsheet::ParseExcel::FmtDefault->new
654							: Spreadsheet::ParseExcel::FmtDefault->new;
655
656	75	100				904	$debug and print STDERR "\t$data[0]{sheets} sheets\n";
657							my $active_sheet = $oBook->get_active_sheet
658							\|\| $oBook->{ActiveSheet}
659	75		33			485	\|\| $oBook->{SelectedSheet};
660	75					999	my $current_sheet = 0;
661	75					169	foreach my $oWkS (@{$oBook->{Worksheet}}) {
	75					264
662	121					291	$current_sheet++;
663	121	100	100			682	$opt{clip} and !defined $oWkS->{Cells} and next; # Skip empty sheets
664							my %sheet = (
665							parser => 0,
666							label => $oWkS->{Name},
667	115					918	maxrow => 0,
668							maxcol => 0,
669							cell => [],
670							attr => [],
671							merged => [],
672							active => 0,
673							);
674							# $debug and $sheet{_parser} = $oWkS;
675	115	50				441	defined $sheet{label} or $sheet{label} = "-- unlabeled --";
676	115	100				435	exists $oWkS->{MinRow} and $sheet{minrow} = $oWkS->{MinRow} + 1;
677	115	100				388	exists $oWkS->{MaxRow} and $sheet{maxrow} = $oWkS->{MaxRow} + 1;
678	115	100				370	exists $oWkS->{MinCol} and $sheet{mincol} = $oWkS->{MinCol} + 1;
679	115	100				348	exists $oWkS->{MaxCol} and $sheet{maxcol} = $oWkS->{MaxCol} + 1;
680							$sheet{merged} = [
681	12					34	map { $_->[0] }
682	4					21	sort { $a->[1] cmp $b->[1] }
683	12					94	map {[ $_, pack "NNNN", @$_ ]}
684	12					84	map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]}
	48					109
	12					41
685	115	100				261	@{$oWkS->get_merged_areas \|\| []}];
	115					516
686	115					1195	my $sheet_idx = 1 + @data;
687	115	100				375	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
688	115	100				361	if (defined $active_sheet) {
689							# _SheetNo is 0-based
690	7	50				21	my $sheet_no = defined $oWkS->{_SheetNo} ? $oWkS->{_SheetNo} : $current_sheet - 1;
691	7	100				31	$sheet_no eq $active_sheet and $sheet{active} = 1;
692							}
693							# Sheet keys:
694							# _Book FooterMargin MinCol RightMargin
695							# BottomMargin FooterMergin MinRow RightMergin
696							# BottomMergin HCenter Name RowHeight
697							# Cells Header NoColor RowHidden
698							# ColFmtNo HeaderMargin NoOrient Scale
699							# ColHidden HeaderMergin NoPls SheetHidden
700							# ColWidth Kind Notes _SheetNo
701							# Copis Landscape PageFit SheetType
702							# DefColWidth LeftMargin PageStart SheetVersion
703							# DefRowHeight LeftMergin PaperSize TopMargin
704							# Draft LeftToRight _Pos TopMergin
705							# FitHeight MaxCol PrintGrid UsePage
706							# FitWidth MaxRow PrintHeaders VCenter
707							# Footer MergedArea Res VRes
708	115	100				341	if (exists $oWkS->{MinRow}) {
709	110		50			477	my $hiddenRows = $oWkS->{RowHidden} \|\| [];
710	110		50			463	my $hiddenCols = $oWkS->{ColHidden} \|\| [];
711	110	100				314	if ($opt{clip}) {
712	91					242	my ($mr, $mc) = (-1, -1);
713	91					355	foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
714	516					949	foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
715	2908	100				5626	my $oWkC = $oWkS->{Cells}[$r][$c] or next;
716	1701	50				3285	defined (my $val = $oWkC->{Val}) or next;
717	1701	100				3180	$val eq "" and next;
718	1679	100				2734	$r > $mr and $mr = $r;
719	1679	100				2999	$c > $mc and $mc = $c;
720							}
721							}
722	91					350	($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1);
723							}
724	110					352	foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
725	597					1280	foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
726	3132	100				12249	my $oWkC = $oWkS->{Cells}[$r][$c] or next;
727							#defined (my $val = $oWkC->{Val}) or next;
728	1802					2951	my $val = $oWkC->{Val};
729	1802	50	33			6154	if (defined $val and my $enc = $oWkC->{Code}) {
730	0	0				0	$enc eq "ucs2" and $val = decode ("utf-16be", $val);
731							}
732	1802					4147	my $cell = cr2cell ($c + 1, $r + 1);
733	1802	100				5604	$opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original
734
735	1802					2466	my $fmt;
736	1802					2652	my $FmT = $oWkC->{Format};
737	1802	100				3375	if ($FmT) {
738	952	100				1928	unless (ref $FmT) {
739	130					171	$fmt = $FmT;
740	130					222	$FmT = {};
741							}
742							}
743							else {
744	850					1395	$FmT = {};
745							}
746	1802					3572	foreach my $attr (qw( AlignH AlignV FmtIdx Hidden Lock
747							Wrap )) {
748	10812	100				21191	exists $FmT->{$attr} or $FmT->{$attr} = 0;
749							}
750	1802	100				3758	exists $FmT->{Fill} or $FmT->{Fill} = [ 0 ];
751	1802	100				4029	exists $FmT->{Font} or $FmT->{Font} = undef;
752
753	1802	100				3403	unless (defined $fmt) {
754							$fmt = $FmT->{FmtIdx}
755							? $oBook->{FormatStr}{$FmT->{FmtIdx}}
756	1672	100				3215	: undef;
757							}
758	1802	100				3465	if ($oWkC->{Type} eq "Numeric") {
759							# Fixed in 0.33 and up
760							# see Spreadsheet/ParseExcel/FmtDefault.pm
761							$FmT->{FmtIdx} == 0x0e \|\|
762							$FmT->{FmtIdx} == 0x0f \|\|
763							$FmT->{FmtIdx} == 0x10 \|\|
764							$FmT->{FmtIdx} == 0x11 \|\|
765							$FmT->{FmtIdx} == 0x16 \|\|
766							(defined $fmt && $fmt =~ m{^[dmy][-\\/dmy]*$}) and
767	370	100	33			3315	$oWkC->{Type} = "Date";
			33
			33
			33
			100
			66
768							$FmT->{FmtIdx} == 0x09 \|\|
769							$FmT->{FmtIdx} == 0x0a \|\|
770							(defined $fmt && $fmt =~ m{^0+\.0+%$}) and
771	370	100	100			2129	$oWkC->{Type} = "Percentage";
			100
			100
772							}
773	1802	100				3451	defined $fmt and $fmt =~ s/\\//g;
774							$opt{cells} and # Formatted value
775							$sheet{$cell} = defined $val ? $FmT && exists $def_fmt{$FmT->{FmtIdx}}
776	1802	100	66			9920	? $oFmt->ValFmt ($oWkC, $oBook)
		50
		100
777							: $oWkC->Value : undef;
778	1802	100				15284	if ($opt{attr}) {
779	1526					2303	my $FnT = $FmT->{Font};
780							my $fmi = $FmT->{FmtIdx}
781							? $oBook->{FormatStr}{$FmT->{FmtIdx}}
782	1526	100				2672	: undef;
783	1526	100				2611	$fmi and $fmi =~ s/\\//g;
784							$sheet{attr}[$c + 1][$r + 1] = {
785							@def_attr,
786
787							type => lc $oWkC->{Type},
788							enc => $oWkC->{Code},
789							merged => (defined $oWkC->{Merged} ? $oWkC->{Merged} : $oWkC->is_merged) \|\| 0,
790							hidden => ($hiddenRows->[$r] \|\| $hiddenCols->[$c] ? 1 :
791							defined $oWkC->{Hidden} ? $oWkC->{Hidden} : $FmT->{Hidden}) \|\| 0,
792							locked => $FmT->{Lock} \|\| 0,
793							format => $fmi,
794							halign => [ undef, qw( left center right
795							fill justify ), undef,
796							"equal_space" ]->[$FmT->{AlignH}],
797							valign => [ qw( top center bottom justify
798							equal_space )]->[$FmT->{AlignV}],
799							wrap => $FmT->{Wrap},
800							font => $FnT->{Name},
801							size => $FnT->{Height},
802							bold => $FnT->{Bold},
803							italic => $FnT->{Italic},
804							uline => $FnT->{Underline},
805							fgcolor => _xls_color ($FnT->{Color}),
806	1526					6194	bgcolor => _xls_fill (@{$FmT->{Fill}}),
807							formula => $oWkC->{Formula},
808	1526		100			4888	};
			100
			100
809							#_dump "cell", $sheet{attr}[$c + 1][$r + 1];
810							}
811							}
812							}
813							}
814	115					206	for (@{$sheet{cell}}) {
	115					380
815	409	100				1068	defined or $_ = [];
816							}
817	115					2012	push @data, { %sheet };
818							# $data[0]{sheets}++;
819	115	50				614	if ($sheet{label} eq "-- unlabeled --") {
820	0					0	$sheet{label} = "";
821							}
822							else {
823	115					783	$data[0]{sheet}{$sheet{label}} = $#data;
824							}
825							}
826	75					497	return _clipsheets \%opt, [ @data ];
827							}
828
829	76	100				430	if ($opt{parser} ? _parser ($opt{parser}) eq "sc"
		100
		100
830							: $io_fil
831							? $txt =~ m/\.sc$/
832							: $txt =~ m/^# .*SquirrelCalc/) {
833	46	100				129	if ($io_ref) {
		100
834	2					9	local $/;
835	2					56	my $x = <$txt>;
836	2					12	$txt = $x;
837							}
838							elsif ($io_fil) {
839	42					171	local $/;
840	42	50				1606	open my $sc, "<", $txt or return;
841	42					1836	$txt = <$sc>;
842	42					702	close $sc;
843							}
844	46	50				302	$txt =~ m/\S/ or return;
845	46	50				170	my $label = defined $opt{label} ? $opt{label} : "sheet";
846	46					646	my @data = (
847							{ type => "sc",
848							parser => "Spreadsheet::Read",
849							version => $VERSION,
850							parsers => [{
851							type => "sc",
852							parser => "Spreadsheet::Read",
853							version => $VERSION,
854							}],
855							error => undef,
856							sheets => 1,
857							sheet => { $label => 1 },
858							},
859							{ parser => 0,
860							label => $label,
861							maxrow => 0,
862							maxcol => 0,
863							cell => [],
864							attr => [],
865							merged => [],
866							active => 1,
867							},
868							);
869
870	46					9715	for (split m/\s[\r\n]\s/, $txt) {
871	13478	100				22369	if (m/^dimension.of ([0-9]+) rows.of ([0-9]+) columns/i) {
872	46					89	@{$data[1]}{qw(maxrow maxcol)} = ($1, $2);
	46					274
873	46					99	next;
874							}
875	13432	100				33232	s/^r([0-9]+)c([0-9]+)\s=\s// or next;
876	3542					6147	my ($c, $r) = map { $_ + 1 } $2, $1;
	7084					15254
877	3542	100	66			15837	if (m/.* \{(.)}$/ or m/"(.)"/) {
878	2714					5250	my $cell = cr2cell ($c, $r);
879	2714	100				8551	$opt{rc} and $data[1]{cell}[$c][$r] = $1;
880	2714	50				7580	$opt{cells} and $data[1]{$cell} = $1;
881	2714	100				6438	$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
882	2714					4661	next;
883							}
884							# Now only formula's remain. Ignore for now
885							# r67c7 = [P2L] 2(1000r67c5-60)
886							}
887	46					795	for (@{$data[1]{cell}}) {
	46					167
888	360	100				756	defined or $_ = [];
889							}
890	46					172	return _clipsheets \%opt, [ @data ];
891							}
892
893	30	50	66			364	if ($opt{parser} ? _parser ($opt{parser}) eq "sxc"
		100
894							: ($txt =~ m/^<\?xml/ or -f $txt)) {
895	7	50				996	$can{sxc} or croak "Spreadsheet::ReadSXC not installed";
896	0	0				0	ref $txt and
897							croak ("Sorry, references as input are not (yet) supported by Spreadsheet::ReadSXC");
898
899	0					0	my $using = "using $can{sxc}-" . $can{sxc}->VERSION;
900	0					0	my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up
901	0					0	my $sxc;
902	0	0	0			0	if ($txt =~ m/\.(sxc\|ods)$/i) {
		0
		0
903	0	0				0	$debug and print STDERR "Opening \U$1\E $txt $using\n";
904	0	0				0	$sxc = Spreadsheet::ReadSXC::read_sxc ($txt, $sxc_options) or return;
905							}
906							elsif ($txt =~ m/\.xml$/i) {
907	0	0				0	$debug and print STDERR "Opening XML $txt $using\n";
908	0	0				0	$sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return;
909							}
910							# need to test on pattern to prevent stat warning
911							# on filename with newline
912							elsif ($txt !~ m/^<\?xml/i and -f $txt) {
913	0	0				0	$debug and print STDERR "Opening XML $txt $using\n";
914	0	0				0	open my $f, "<", $txt or return;
915	0					0	local $/;
916	0					0	$txt = <$f>;
917	0					0	close $f;
918							}
919	0	0	0			0	!$sxc && $txt =~ m/^<\?xml/i and
920							$sxc = Spreadsheet::ReadSXC::read_xml_string ($txt, $sxc_options);
921	0	0				0	$debug > 8 and _dump (sxc => $sxc);
922	0	0				0	if ($sxc) {
923	0					0	my @data = ( {
924							type => "sxc",
925							parser => "Spreadsheet::ReadSXC",
926							version => $Spreadsheet::ReadSXC::VERSION,
927							parsers => [{
928							type => "sxc",
929							parser => "Spreadsheet::ReadSXC",
930							version => $Spreadsheet::ReadSXC::VERSION,
931							}],
932							error => undef,
933							sheets => 0,
934							sheet => {},
935							} );
936							my @sheets = ref $sxc eq "HASH" # < 0.20
937							? map {
938							{ label => $_,
939	0					0	data => $sxc->{$_},
940							}
941							} keys %$sxc
942	0	0				0	: @{$sxc};
	0					0
943	0					0	foreach my $sheet (@sheets) {
944	0					0	my @sheet = @{$sheet->{data}};
	0					0
945							my %sheet = (
946							parser => 0,
947							label => $sheet->{label},
948	0					0	maxrow => scalar @sheet,
949							maxcol => 0,
950							cell => [],
951							attr => [],
952							merged => [],
953							active => 0,
954							);
955	0					0	my $sheet_idx = 1 + @data;
956	0	0				0	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} rows\n";
957	0					0	foreach my $r (0 .. $#sheet) {
958	0	0				0	my @row = @{$sheet[$r]} or next;
	0					0
959	0					0	foreach my $c (0 .. $#row) {
960	0	0				0	defined (my $val = $row[$c]) or next;
961	0					0	my $C = $c + 1;
962	0	0				0	$C > $sheet{maxcol} and $sheet{maxcol} = $C;
963	0					0	my $cell = cr2cell ($C, $r + 1);
964	0	0				0	$opt{rc} and $sheet{cell}[$C][$r + 1] = $val;
965	0	0				0	$opt{cells} and $sheet{$cell} = $val;
966	0	0				0	$opt{attr} and $sheet{attr}[$C][$r + 1] = { @def_attr };
967							}
968							}
969	0					0	for (@{$sheet{cell}}) {
	0					0
970	0	0				0	defined or $_ = [];
971							}
972	0	0				0	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
973	0					0	push @data, { %sheet };
974	0					0	$data[0]{sheets}++;
975	0					0	$data[0]{sheet}{$sheet->{label}} = $#data;
976							}
977	0					0	return _clipsheets \%opt, [ @data ];
978							}
979							}
980
981	23	100	100			138	if (!ref $txt and $txt =~ m/\.\w+$/) {
982							# Return (localized) system message
983	5	50				120	open my $fh, "<", $txt and
984							croak "I can open file $txt, but I do not know how to parse it\n";
985
986	5					91	$@ = $!;
987							}
988
989	23					127	return;
990							} # ReadData
991
992							sub add {
993	2			2	1	7	my $book = shift;
994	2	50				9	my $r = ReadData (@_) or return;
995							$book && (ref $book eq "ARRAY" \|\|
996	2	50	33			37	ref $book eq __PACKAGE__) && $book->[0]{sheets} or return $r;
			33
			33
997
998	2					5	my $c1 = $book->[0];
999	2					5	my $c2 = $r->[0];
1000
1001	2	50				9	unless ($c1->{parsers}) {
1002	0					0	$c1->{parsers}[0]{$_} = $c1->{$_} for qw( type parser version );
1003	0					0	$book->[$_]{parser} = 0 for 1 .. $c1->{sheets};
1004							}
1005	2					8	my ($pidx) = (grep { my $p = $c1->{parsers}[$_];
1006							$p->{type} eq $c2->{type} &&
1007							$p->{parser} eq $c2->{parser} &&
1008	2	50	33			5	$p->{version} eq $c2->{version} } 0 .. $#{$c1->{parsers}});
	2					77
	2					7
1009	2	50				9	unless (defined $pidx) {
1010	0					0	$pidx = scalar @{$c1->{parsers}};
	0					0
1011	0					0	$c1->{parsers}[$pidx]{$_} = $c2->{$_} for qw( type parser version );
1012							}
1013
1014	2					7	foreach my $sn (sort { $c2->{sheet}{$a} <=> $c2->{sheet}{$b} } keys %{$c2->{sheet}}) {
	0					0
	2					14
1015	2					5	my $s = $sn;
1016	2					4	my $v = 2;
1017	2					9	while (exists $c1->{sheet}{$s}) {
1018	1					7	$s = $sn."[".$v++."]";
1019							}
1020	2					18	$c1->{sheet}{$s} = $c1->{sheets} + $c2->{sheet}{$sn};
1021	2					11	$r->[$c2->{sheet}{$sn}]{parser} = $pidx;
1022	2					9	push @$book, $r->[$c2->{sheet}{$sn}];
1023							}
1024	2					5	$c1->{sheets} += $c2->{sheets};
1025
1026	2					19	return $book;
1027							} # add
1028
1029							package Spreadsheet::Read::Attribute;
1030
1031	53			53		614	use Carp;
	53					194
	53					4624
1032	53			53		472	use vars qw( $AUTOLOAD );
	53					128
	53					64261
1033
1034							sub AUTOLOAD {
1035	6			6		788	my $self = shift;
1036	6					42	(my $attr = $AUTOLOAD) =~ s/.*:://;
1037	6					249	$self->{$attr};
1038							} # AUTOLOAD
1039
1040							package Spreadsheet::Read::Sheet;
1041
1042							sub cell {
1043	55			55		17964	my ($sheet, @id) = @_;
1044							@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1045	55	50	66			542	return $sheet->{cell}[$id[0]][$id[1]];
			66
1046							@id && $id[0] && exists $sheet->{$id[0]} and
1047	27	50	33			296	return $sheet->{$id[0]};
			33
1048							} # cell
1049
1050							sub attr {
1051	8			8		23	my ($sheet, @id) = @_;
1052	8					15	my $class = "Spreadsheet::Read::Attribute";
1053							@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1054	8	50	66			70	return bless $sheet->{attr}[$id[0]][$id[1]] => $class;
			66
1055	5	50	33			34	if (@id && $id[0] && exists $sheet->{$id[0]}) {
			33
1056	5					17	my ($c, $r) = $sheet->cell2cr ($id[0]);
1057	5					54	return bless $sheet->{attr}[$c][$r] => $class;
1058							}
1059	0					0	undef;
1060							} # attr
1061
1062							sub maxrow {
1063	2			2		5	my $sheet = shift;
1064	2					9	return $sheet->{maxrow};
1065							} # maxrow
1066
1067							sub maxcol {
1068	2			2		5	my $sheet = shift;
1069	2					8	return $sheet->{maxcol};
1070							} # maxrow
1071
1072							sub col2label {
1073	1	50		1		9	$_[0] =~ m/::/ and shift; # class unused
1074	1					22	return Spreadsheet::Read::col2label (@_);
1075							} # col2label
1076
1077							sub cr2cell {
1078	24	50		24		90	$_[0] =~ m/::/ and shift; # class unused
1079	24					89	return Spreadsheet::Read::cr2cell (@_);
1080							} # cr2cell
1081
1082							sub cell2cr {
1083	21	50		21		11124	$_[0] =~ m/::/ and shift; # class unused
1084	21					64	return Spreadsheet::Read::cell2cr (@_);
1085							} # cell2cr
1086
1087							sub label {
1088	2			2		6	my ($sheet, $label) = @_;
1089	2	100				10	defined $label and $sheet->{label} = $label;
1090	2					8	return $sheet->{label};
1091							} # label
1092
1093							sub active {
1094	1			1		3	my $sheet = shift;
1095	1					4	return $sheet->{active};
1096							} # label
1097
1098							# my @row = $sheet->cellrow (1);
1099							sub cellrow {
1100	4			4		11	my ($sheet, $row) = @_;
1101	4	100	66			33	defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
			100
1102	1					3	my $s = $sheet->{cell};
1103	1					6	map { $s->[$_][$row] } 1..$sheet->{maxcol};
	19					42
1104							} # cellrow
1105
1106							# my @row = $sheet->row (1);
1107							sub row {
1108	4			4		9	my ($sheet, $row) = @_;
1109	4	100	66			37	defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
			100
1110	1					5	map { $sheet->{$sheet->cr2cell ($_, $row)} } 1..$sheet->{maxcol};
	19					48
1111							} # row
1112
1113							# my @col = $sheet->cellcolumn (1);
1114							sub cellcolumn {
1115	4			4		10	my ($sheet, $col) = @_;
1116	4	100	66			37	defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
			100
1117	1					3	my $s = $sheet->{cell};
1118	1					4	map { $s->[$col][$_] } 1..$sheet->{maxrow};
	5					20
1119							} # cellcolumn
1120
1121							# my @col = $sheet->column (1);
1122							sub column {
1123	4			4		13	my ($sheet, $col) = @_;
1124	4	100	66			32	defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
			100
1125	1					4	map { $sheet->{$sheet->cr2cell ($col, $_)} } 1..$sheet->{maxrow};
	5					13
1126							} # column
1127
1128							# Convert {cell}'s [column][row] to a [row][column] list
1129							# my @rows = $sheet->rows ();
1130							sub rows {
1131	1			1		4	my $sheet = shift;
1132	1					2	my $s = $sheet->{cell};
1133
1134							map {
1135	5					8	my $r = $_;
1136	5					10	[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
	95					199
1137	1					5	} 1..$sheet->{maxrow};
1138							} # rows
1139
1140							1;
1141
1142							__END__
1143							=head1 DESCRIPTION
1144
1145							Spreadsheet::Read tries to transparently read any spreadsheet and
1146							return its content in a universal manner independent of the parsing
1147							module that does the actual spreadsheet scanning.
1148
1149							For OpenOffice and/or LibreOffice this module uses
1150							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>
1151
1152							For Microsoft Excel this module uses
1153							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel>,
1154							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX>, or
1155							L<Spreadsheet::XLSX\|https://metacpan.org/release/Spreadsheet-XLSX> (stronly
1156							discouraged).
1157
1158							For CSV this module uses L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS>
1159							or L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV_PP>.
1160
1161							For SquirrelCalc there is a very simplistic built-in parser
1162
1163							=head2 Data structure
1164
1165							The data is returned as an array reference:
1166
1167							$book = [
1168							# Entry 0 is the overall control hash
1169							{ sheets => 2,
1170							sheet => {
1171							"Sheet 1" => 1,
1172							"Sheet 2" => 2,
1173							},
1174							parsers => [ {
1175							type => "xls",
1176							parser => "Spreadsheet::ParseExcel",
1177							version => 0.59,
1178							}],
1179							error => undef,
1180							},
1181							# Entry 1 is the first sheet
1182							{ parser => 0,
1183							label => "Sheet 1",
1184							maxrow => 2,
1185							maxcol => 4,
1186							cell => [ undef,
1187							[ undef, 1 ],
1188							[ undef, undef, undef, undef, undef, "Nugget" ],
1189							],
1190							attr => [],
1191							merged => [],
1192							active => 1,
1193							A1 => 1,
1194							B5 => "Nugget",
1195							},
1196							# Entry 2 is the second sheet
1197							{ parser => 0,
1198							label => "Sheet 2",
1199							:
1200							:
1201
1202							To keep as close contact to spreadsheet users, row and column 1 have
1203							index 1 too in the C<cell> element of the sheet hash, so cell "A1" is
1204							the same as C<cell> [1, 1] (column first). To switch between the two,
1205							there are helper functions available: C<cell2cr ()>, C<cr2cell ()>,
1206							and C<col2label ()>.
1207
1208							The C<cell> hash entry contains unformatted data, while the hash entries
1209							with the traditional labels contain the formatted values (if applicable).
1210
1211							The control hash (the first entry in the returned array ref), contains
1212							some spreadsheet meta-data. The entry C<sheet> is there to be able to find
1213							the sheets when accessing them by name:
1214
1215							my %sheet2 = %{$book->[$book->[0]{sheet}{"Sheet 2"}]};
1216
1217							=head2 Formatted vs Unformatted
1218
1219							The difference between formatted and unformatted cells is that the (optional)
1220							format is applied to the cell or not. This part is B<completely> implemented
1221							on the parser side. Spreadsheet::Read just makes both available if these are
1222							supported. Options provide means to disable either. If the parser does not
1223							provide formatted cells - like CSV - both values are equal.
1224
1225							To show what this implies:
1226
1227							use Spreadsheet::Read;
1228
1229							my $file = "files/example.xlsx";
1230							my $workbook = Spreadsheet::Read->new ($file);
1231
1232							my $info = $workbook->[0];
1233							say "Parsed $file with $info->{parser}-$info->{version}";
1234
1235							my $sheet = $workbook->sheet (1);
1236
1237							say join "\t" => "Formatted:", $sheet->row (1);
1238							say join "\t" => "Unformatted:", $sheet->cellrow (1);
1239
1240							Might return very different results depending one the underlying parser (and
1241							its version):
1242
1243							Parsed files/example.xlsx with Spreadsheet::ParseXLSX-0.27
1244							Formatted: 8-Aug Foo & Barr < Quux
1245							Unformatted: 39668 Foo & Barr < Quux
1246
1247							Parsed files/example.xlsx with Spreadsheet::XLSX-0.15
1248							Formatted: 39668 Foo & Barr < Quux
1249							Unformatted: 39668 Foo & Barr < Quux
1250
1251							=head2 Functions and methods
1252
1253							=head3 new
1254
1255							my $book = Spreadsheet::Read->new (...) or die $@;
1256
1257							All options accepted by ReadData are accepted by new.
1258
1259							With no arguments at all, $book will be an object where sheets can be added
1260							using C<add>
1261
1262							my $book = Spreadsheet::Read->new ();
1263							$book->add ("file.csv");
1264							$book->add ("file.cslx");
1265
1266							=head3 ReadData
1267
1268							my $book = ReadData ($source [, option => value [, ... ]]);
1269
1270							my $book = ReadData ("file.csv", sep => ',', quote => '"');
1271
1272							my $book = ReadData ("file.xls", dtfmt => "yyyy-mm-dd");
1273
1274							my $book = ReadData ("file.ods");
1275
1276							my $book = ReadData ("file.sxc");
1277
1278							my $book = ReadData ("content.xml");
1279
1280							my $book = ReadData ($content);
1281
1282							my $book = ReadData ($content, parser => "xlsx");
1283
1284							my $book = ReadData ($fh, parser => "xlsx");
1285
1286							my $book = ReadData (\$content, parser => "xlsx");
1287
1288							Tries to convert the given file, string, or stream to the data structure
1289							described above.
1290
1291							Processing Excel data from a stream or content is supported through a
1292							L<File::Temp\|https://metacpan.org/release/File-Temp> temporary file or
1293							L<IO::Scalar\|https://metacpan.org/release/IO-Scalar> when available.
1294
1295							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>
1296							does preserve sheet order as of version 0.20.
1297
1298							Choosing between C<$content> and C<\\$content> (with or without passing
1299							the desired C<parser> option) may be depending on trial and terror.
1300							C<ReadData> does try to determine parser type on content if needed, but
1301							not all combinations are checked, and not all signatures are builtin.
1302
1303							Currently supported options are:
1304
1305							=over 2
1306
1307							=item parser
1308							X<parser>
1309
1310							Force the data to be parsed by a specific format. Possible values are
1311							C<csv>, C<prl> (or C<perl>), C<sc> (or C<squirelcalc>), C<sxc> (or C<oo>,
1312							C<ods>, C<openoffice>, C<libreoffice>) C<xls> (or C<excel>), and C<xlsx>
1313							(or C<excel2007>).
1314
1315							When parsing streams, instead of files, it is highly recommended to pass
1316							this option.
1317
1318							Spreadsheet::Read supports several underlying parsers per spreadsheet
1319							type. It will try those from most favored to least favored. When you
1320							have a good reason to prefer a different parser, you can set that in
1321							environment variables. The other options then will not be tested for:
1322
1323							env SPREADSHEET_READ_CSV=Text::CSV_PP ...
1324
1325							=item cells
1326							X<cells>
1327
1328							Control the generation of named cells ("C<A1>" etc). Default is true.
1329
1330							=item rc
1331
1332							Control the generation of the {cell}[c][r] entries. Default is true.
1333
1334							=item attr
1335
1336							Control the generation of the {attr}[c][r] entries. Default is false.
1337							See L</Cell Attributes> below.
1338
1339							=item clip
1340
1341							If set, L<C<ReadData>\|/ReadData> will remove all trailing rows and columns
1342							per sheet that have no data, where no data means only undefined or empty
1343							cells (after optional stripping). If a sheet has no data at all, the sheet
1344							will be skipped entirely when this attribute is true.
1345
1346							=item strip
1347
1348							If set, L<C<ReadData>\|/ReadData> will remove trailing- and/or
1349							leading-whitespace from every field.
1350
1351							strip leading strailing
1352							----- ------- ---------
1353							0 n/a n/a
1354							1 strip n/a
1355							2 n/a strip
1356							3 strip strip
1357
1358							=item pivot
1359
1360							Swap all rows and columns.
1361
1362							When a sheet contains data like
1363
1364							A1 B1 C1 E1
1365							A2 C2 D2
1366							A3 B3 C3 D3 E3
1367
1368							using C<pivot> will return the sheet data as
1369
1370							A1 A2 A3
1371							B1 B3
1372							C1 C2 C3
1373							D2 D3
1374							E1 E3
1375
1376							=item sep
1377
1378							Set separator for CSV. Default is comma C<,>.
1379
1380							=item quote
1381
1382							Set quote character for CSV. Default is C<">.
1383
1384							=item dtfmt
1385
1386							Set the format for MS-Excel date fields that are set to use the default
1387							date format. The default format in Excel is "C<m-d-yy>", which is both
1388							not year 2000 safe, nor very useful. The default is now "C<yyyy-mm-dd>",
1389							which is more ISO-like.
1390
1391							Note that date formatting in MS-Excel is not reliable at all, as it will
1392							store/replace/change the date field separator in already stored formats
1393							if you change your locale settings. So the above mentioned default can
1394							be either "C<m-d-yy>" OR "C<m/d/yy>" depending on what that specific
1395							character happened to be at the time the user saved the file.
1396
1397							=item debug
1398
1399							Enable some diagnostic messages to STDERR.
1400
1401							The value determines how much diagnostics are dumped (using
1402							L<Data::Peek\|https://metacpan.org/release/Data-Peek>). A value of C<9>
1403							and higher will dump the entire structure from the back-end parser.
1404
1405							=item passwd
1406
1407							Use this password to decrypt password protected spreadsheet.
1408
1409							Currently only supports Excel.
1410
1411							=back
1412
1413							All other attributes/options will be passed to the underlying parser if
1414							that parser supports attributes.
1415
1416							=head3 col2label
1417
1418							my $col_id = col2label (col);
1419
1420							my $col_id = $book->col2label (col); # OO
1421
1422							C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1423							traditional cell notation:
1424
1425							my $id = col2label ( 4); # $id now "D"
1426							my $id = col2label (28); # $id now "AB"
1427
1428							=head3 cr2cell
1429
1430							my $cell = cr2cell (col, row);
1431
1432							my $cell = $book->cr2cell (col, row); # OO
1433
1434							C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1435							traditional cell notation:
1436
1437							my $cell = cr2cell ( 4, 14); # $cell now "D14"
1438							my $cell = cr2cell (28, 4); # $cell now "AB4"
1439
1440							=head3 cell2cr
1441
1442							my ($col, $row) = cell2cr ($cell);
1443
1444							my ($col, $row) = $book->cell2cr ($cell); # OO
1445
1446							C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1447							pair (1 based):
1448
1449							my ($col, $row) = cell2cr ("D14"); # returns ( 4, 14)
1450							my ($col, $row) = cell2cr ("AB4"); # returns (28, 4)
1451
1452							=head3 row
1453
1454							my @row = row ($sheet, $row)
1455
1456							my @row = Spreadsheet::Read::row ($book->[1], 3);
1457
1458							my @row = $book->row ($sheet, $row); # OO
1459
1460							Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1461
1462							Note that the indexes in the returned list are 0-based.
1463
1464							C<row ()> is not imported by default, so either specify it in the
1465							use argument list, or call it fully qualified.
1466
1467							See also the C<row ()> method on sheets.
1468
1469							=head3 cellrow
1470
1471							my @row = cellrow ($sheet, $row);
1472
1473							my @row = Spreadsheet::Read::cellrow ($book->[1], 3);
1474
1475							my @row = $book->cellrow ($sheet, $row); # OO
1476
1477							Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1478
1479							Note that the indexes in the returned list are 0-based.
1480
1481							C<cellrow ()> is not imported by default, so either specify it in the
1482							use argument list, or call it fully qualified or as method call.
1483
1484							See also the C<cellrow ()> method on sheets.
1485
1486							=head3 rows
1487
1488							my @rows = rows ($sheet);
1489
1490							my @rows = Spreadsheet::Read::rows ($book->[1]);
1491
1492							my @rows = $book->rows (1); # OO
1493
1494							Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1495
1496							Note that the indexes in the returned list are 0-based, where the
1497							index in the C<{cell}> entry is 1-based.
1498
1499							C<rows ()> is not imported by default, so either specify it in the
1500							use argument list, or call it fully qualified.
1501
1502							=head3 parses
1503
1504							parses ($format);
1505
1506							Spreadsheet::Read::parses ("CSV");
1507
1508							$book->parses ("CSV"); # OO
1509
1510							C<parses ()> returns Spreadsheet::Read's capability to parse the
1511							required format. L<C<ReadData>\|/ReadData> will pick its preferred parser
1512							for that format unless overruled. See L<C<parser>\|/parser>.
1513
1514							C<parses ()> is not imported by default, so either specify it in the
1515							use argument list, or call it fully qualified.
1516
1517							=head3 Version
1518
1519							my $v = Version ()
1520
1521							my $v = Spreadsheet::Read::Version ()
1522
1523							my $v = Spreadsheet::Read->VERSION;
1524
1525							my $v = $book->Version (); # OO
1526
1527							Returns the current version of Spreadsheet::Read.
1528
1529							C<Version ()> is not imported by default, so either specify it in the
1530							use argument list, or call it fully qualified.
1531
1532							This function returns exactly the same as C<< Spreadsheet::Read->VERSION >>
1533							returns and is only kept for backward compatibility reasons.
1534
1535							=head3 sheets
1536
1537							my $sheets = $book->sheets; # OO
1538							my @sheets = $book->sheets; # OO
1539
1540							In scalar context return the number of sheets in the book.
1541							In list context return the labels of the sheets in the book.
1542
1543							=head3 sheet
1544
1545							my $sheet = $book->sheet (1); # OO
1546							my $sheet = $book->sheet ("Foo"); # OO
1547
1548							Return the numbered or named sheet out of the book. Will return C<undef> if
1549							there is no match. Will not work for sheets I<named> with a number between 1
1550							and the number of sheets in the book.
1551
1552							With named sheets will first try to use the list of sheet-labels as stored in
1553							the control structure. If no match is found, it will scan the actual labels
1554							of the sheets. In that case, it will return the first matching sheet.
1555
1556							If defined, the returned sheet will be of class C<Spreadsheet::Read::Sheet>.
1557
1558							=head3 add
1559
1560							my $book = ReadData ("file.csv");
1561							Spreadsheet::Read::add ($book, "file.xlsx");
1562
1563							my $book = Spreadsheet::Read->new ("file.csv");
1564							$book->add ("file.xlsx"); # OO
1565
1566							=head2 Methods on sheets
1567
1568							=head3 maxcol
1569
1570							my $col = $sheet->maxcol;
1571
1572							Return the index of the last in-use column in the sheet. This index is 1-based.
1573
1574							=head3 maxrow
1575
1576							my $row = $sheet->maxrow;
1577
1578							Return the index of the last in-use row in the sheet. This index is 1-based.
1579
1580							=head3 cell
1581
1582							my $cell = $sheet->cell ("A3");
1583							my $cell = $sheet->cell (1, 3);
1584
1585							Return the value for a cell. Using tags will return the formatted value,
1586							using column and row will return unformatted value.
1587
1588							=head3 attr
1589
1590							my $cell = $sheet->attr ("A3");
1591							my $cell = $sheet->attr (1, 3);
1592
1593							Return the attributes of a cell. Only valid if attributes are enabled through
1594							option C<attr>.
1595
1596							=head3 col2label
1597
1598							my $col_id = $sheet->col2label (col);
1599
1600							C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1601							traditional cell notation:
1602
1603							my $id = $sheet->col2label ( 4); # $id now "D"
1604							my $id = $sheet->col2label (28); # $id now "AB"
1605
1606							=head3 cr2cell
1607
1608							my $cell = $sheet->cr2cell (col, row);
1609
1610							C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1611							traditional cell notation:
1612
1613							my $cell = $sheet->cr2cell ( 4, 14); # $cell now "D14"
1614							my $cell = $sheet->cr2cell (28, 4); # $cell now "AB4"
1615
1616							=head3 cell2cr
1617
1618							my ($col, $row) = $sheet->cell2cr ($cell);
1619
1620							C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1621							pair (1 based):
1622
1623							my ($col, $row) = $sheet->cell2cr ("D14"); # returns ( 4, 14)
1624							my ($col, $row) = $sheet->cell2cr ("AB4"); # returns (28, 4)
1625
1626							=head3 col
1627
1628							my @col = $sheet->column ($col);
1629
1630							Get full column of formatted values (like C<< $sheet->{C1} .. $sheet->{C9} >>)
1631
1632							Note that the indexes in the returned list are 0-based.
1633
1634							=head3 cellcolumn
1635
1636							my @col = $sheet->cellcolumn ($col);
1637
1638							Get full column of unformatted values (like C<< $sheet->{cell}[3][1] .. $sheet->{cell}[3][9] >>)
1639
1640							Note that the indexes in the returned list are 0-based.
1641
1642							=head3 row
1643
1644							my @row = $sheet->row ($row);
1645
1646							Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1647
1648							Note that the indexes in the returned list are 0-based.
1649
1650							=head3 cellrow
1651
1652							my @row = $sheet->cellrow ($row);
1653
1654							Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1655
1656							Note that the indexes in the returned list are 0-based.
1657
1658							=head3 rows
1659
1660							my @rows = $sheet->rows ();
1661
1662							Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1663
1664							Note that the indexes in the returned list are 0-based, where the
1665							index in the C<{cell}> entry is 1-based.
1666
1667							=head3 label
1668
1669							my $label = $sheet->label;
1670							$sheet->label ("New sheet label");
1671
1672							Set a new label to a sheet. Note that the index in the control structure will
1673							I<NOT> be updated.
1674
1675							=head3 active
1676
1677							my $sheet_is_active = $sheet->active;
1678
1679							Returns 1 if the selected sheet is active, otherwise returns 0.
1680
1681							Currently only works on XLS (as of Spreadsheed::ParseExcel-0.61).
1682							CSV is always active.
1683
1684							=head2 Using CSV
1685
1686							In case of CSV parsing, L<C<ReadData>\|/ReadData> will use the first line of
1687							the file to auto-detect the separation character if the first argument is a
1688							file and both C<sep> and C<quote> are not passed as attributes.
1689							L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS> (or
1690							L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV_PP>) is able to
1691							automatically detect and use C<\r> line endings.
1692
1693							CSV can parse streams too, but be sure to pass C<sep> and/or C<quote> if
1694							these do not match the default C<,> and C<">.
1695
1696							When an error is found in the CSV, it is automatically reported (to STDERR).
1697							The structure will store the error in C<< $ss->[0]{error} >> as anonymous
1698							list returned by
1699							L<C<< $csv->error_diag >>\|https://metacpan.org/pod/Text::CSV_XS#error_diag>.
1700							See L<Text::CSV_XS\|https://metacpan.org/pod/Text-CSV_XS> for documentation.
1701
1702							my $ss = ReadData ("bad.csv");
1703							$ss->[0]{error} and say $ss->[0]{error}[1];
1704
1705							As CSV has no sheet labels, the default label for a CSV sheet is its filename.
1706							For CSV, this can be overruled using the I<label> attribute:
1707
1708							my $ss = Spreadsheet::Read->new ("/some/place/test.csv", label => "Test");
1709
1710							=head2 Cell Attributes
1711							X<attr>
1712
1713							If the constructor was called with C<attr> having a true value,
1714
1715							my $book = ReadData ("book.xls", attr => 1);
1716							my $book = Spreadsheet::Read->new ("book.xlsx", attr => 1);
1717
1718							effort is made to analyze and store field attributes like this:
1719
1720							{ label => "Sheet 1",
1721							maxrow => 5,
1722							maxcol => 2,
1723							cell => [ undef,
1724							[ undef, 1 ],
1725							[ undef, undef, undef, undef, undef, "Nugget" ],
1726							],
1727							attr => [ undef,
1728							[ undef, {
1729							type => "numeric",
1730							fgcolor => "#ff0000",
1731							bgcolor => undef,
1732							font => "Arial",
1733							size => undef,
1734							format => "## ##0.00",
1735							halign => "right",
1736							valign => "top",
1737							uline => 0,
1738							bold => 0,
1739							italic => 0,
1740							wrap => 0,
1741							merged => 0,
1742							hidden => 0,
1743							locked => 0,
1744							enc => "utf-8",
1745							}, ]
1746							[ undef, undef, undef, undef, undef, {
1747							type => "text",
1748							fgcolor => "#e2e2e2",
1749							bgcolor => undef,
1750							font => "Letter Gothic",
1751							size => 15,
1752							format => undef,
1753							halign => "left",
1754							valign => "top",
1755							uline => 0,
1756							bold => 0,
1757							italic => 0,
1758							wrap => 0,
1759							merged => 0,
1760							hidden => 0,
1761							locked => 0,
1762							enc => "iso8859-1",
1763							}, ]
1764							merged => [],
1765							A1 => 1,
1766							B5 => "Nugget",
1767							},
1768
1769							The entries C<maxrow> and C<maxcol> are 1-based.
1770
1771							This has now been partially implemented, mainly for Excel, as the other
1772							parsers do not (yet) support all of that. YMMV.
1773
1774							If a cell itself is not hidden, but the parser holds the information that
1775							either the row or the column (or both) the field is in is hidden, the flag
1776							is inherited into the cell attributes.
1777
1778							You can get the attributes of a cell (as a hash-ref) like this:
1779
1780							my $attr = $book[1]{attr}[1][3]; # Direct structure
1781							my $attr = $book->sheet (1)->attr (1, 3); # Same using OO
1782							my $attr = $book->sheet (1)->attr ("A3"); # Same using OO
1783
1784							To get to the C<font> attribute, use any of these:
1785
1786							my $font = $book[1]{attr}[1][3]{font};
1787							my $font = $book->sheet (1)->attr (1, 3)->{font};
1788							my $font = $book->sheet (1)->attr ("A3")->font;
1789
1790							=head3 Merged cells
1791							X<merged>
1792
1793							Note that only
1794							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>
1795							documents the use of merged cells, and not in a way useful for the spreadsheet
1796							consumer.
1797
1798							CSV does not support merged cells (though future implementations of CSV
1799							for the web might).
1800
1801							The documentation of merged areas in
1802							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel> and
1803							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX> can
1804							be found in
1805							L<Spreadsheet::ParseExcel::Worksheet\|https://metacpan.org/release/Spreadsheet-ParseExcel-Worksheet>
1806							and L<Spreadsheet::ParseExcel::Cell\|https://metacpan.org/release/Spreadsheet-ParseExcel-Cell>.
1807
1808							None of basic L<Spreadsheet::XLSX\|https://metacpan.org/release/Spreadsheet-XLSX>,
1809							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel>, and
1810							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX> manual
1811							pages mention merged cells at all.
1812
1813							This module just tries to return the information in a generic way.
1814
1815							Given this spreadsheet as an example
1816
1817							merged.xlsx:
1818
1819							A B C
1820							+-----+-----------+
1821							1\| \| foo \|
1822							+-----+ +
1823							2\| bar \| \|
1824							\| +-----+-----+
1825							3\| \| urg \| orc \|
1826							+-----+-----+-----+
1827
1828							the information extracted from that undocumented information is
1829							returned in the C<merged> entry of the sheet's hash as a list of
1830							top-left, bottom-right coordinate pars (col, row, col, row). For
1831							given example, that would be:
1832
1833							$ss->{merged} = [
1834							[ 1, 2, 1, 3 ], # A2-A3
1835							[ 2, 1, 3, 2 ], # B1-C2
1836							];
1837
1838							When the attributes are also enabled, there is some merge information
1839							copied directly from the cell information, but again, that stems from
1840							code analysis and not from documentation:
1841
1842							my $ss = ReadData ("merged.xlsx", attr => 1)->[1];
1843							foreach my $row (1 .. $ss->{maxrow}) {
1844							foreach my $col (1 .. $ss->{maxcol}) {
1845							my $cell = cr2cell ($col, $row);
1846							printf "%s %-3s %d ", $cell, $ss->{$cell},
1847							$ss->{attr}[$col][$row]{merged};
1848							}
1849							print "\n";
1850							}
1851
1852							A1 0 B1 foo 1 C1 1
1853							A2 bar 1 B2 1 C2 1
1854							A3 1 B3 urg 0 C3 orc 0
1855
1856							In this example, there is no way to see if C<B2> is merged to C<A2> or
1857							to C<B1> without analyzing all surrounding cells. This could as well
1858							mean C<A2:A3>, C<B1:C1>, C<B2:C2>, as C<A2:A3>, C<B1:B2>, C<C1:C2>, as
1859							C<A2:A3>, C<B1:C2>.
1860							Use the L<C<merged>\|/merged> entry described above to find out what
1861							fields are merged to what other fields.
1862
1863							=head1 TOOLS
1864
1865							This modules comes with a few tools that perform tasks from the FAQ, like
1866							"How do I select only column D through F from sheet 2 into a CSV file?"
1867
1868							If the module was installed without the tools, you can find them here:
1869							https://github.com/Tux/Spreadsheet-Read/tree/master/examples
1870
1871							=head2 C<xlscat>
1872
1873							Show (parts of) a spreadsheet in plain text, CSV, or HTML
1874
1875							usage: xlscat [-s <sep>] [-L] [-n] [-A] [-u] [Selection] file.xls
1876							[-c \| -m] [-u] [Selection] file.xls
1877							-i [-S sheets] file.xls
1878							Generic options:
1879							-v[#] Set verbose level (xlscat/xlsgrep)
1880							-d[#] Set debug level (Spreadsheet::Read)
1881							-u Use unformatted values
1882							--noclip Do not strip empty sheets and
1883							trailing empty rows and columns
1884							-e <enc> Set encoding for input and output
1885							-b <enc> Set encoding for input
1886							-a <enc> Set encoding for output
1887							Input CSV:
1888							--in-sep=c Set input sep_char for CSV
1889							Input XLS:
1890							--dtfmt=fmt Specify the default date format to replace 'm-d-yy'
1891							the default replacement is 'yyyy-mm-dd'
1892							Output Text (default):
1893							-s <sep> Use separator <sep>. Default '\|', \n allowed
1894							-L Line up the columns
1895							-n [skip] Number lines (prefix with column number)
1896							optionally skip <skip> (header) lines
1897							-A Show field attributes in ANSI escapes
1898							-h[#] Show # header lines
1899							Output Index only:
1900							-i Show sheet names and size only
1901							Output CSV:
1902							-c Output CSV, separator = ','
1903							-m Output CSV, separator = ';'
1904							Output HTML:
1905							-H Output HTML
1906							Selection:
1907							-S <sheets> Only print sheets <sheets>. 'all' is a valid set
1908							Default only prints the first sheet
1909							-R <rows> Only print rows <rows>. Default is 'all'
1910							-C <cols> Only print columns <cols>. Default is 'all'
1911							-F <flds> Only fields <flds> e.g. -FA3,B16
1912							Ordering (column numbers in result set after selection):
1913							--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
1914							+# - first # lines do not sort (header)
1915							# - order on column # lexical ascending
1916							#n - order on column # numeric ascending
1917							#r - order on column # lexical descending
1918							#rn - order on column # numeric descending
1919
1920							=head2 C<xlsgrep>
1921
1922							Show (parts of) a spreadsheet that match a pattern in plain text, CSV, or HTML
1923
1924							usage: xlsgrep [-s <sep>] [-L] [-n] [-A] [-u] [Selection] pattern file.xls
1925							[-c \| -m] [-u] [Selection] pattern file.xls
1926							-i [-S sheets] pattern file.xls
1927							Generic options:
1928							-v[#] Set verbose level (xlscat/xlsgrep)
1929							-d[#] Set debug level (Spreadsheet::Read)
1930							-u Use unformatted values
1931							--noclip Do not strip empty sheets and
1932							trailing empty rows and columns
1933							-e <enc> Set encoding for input and output
1934							-b <enc> Set encoding for input
1935							-a <enc> Set encoding for output
1936							Input CSV:
1937							--in-sep=c Set input sep_char for CSV
1938							Input XLS:
1939							--dtfmt=fmt Specify the default date format to replace 'm-d-yy'
1940							the default replacement is 'yyyy-mm-dd'
1941							Output Text (default):
1942							-s <sep> Use separator <sep>. Default '\|', \n allowed
1943							-L Line up the columns
1944							-n [skip] Number lines (prefix with column number)
1945							optionally skip <skip> (header) lines
1946							-A Show field attributes in ANSI escapes
1947							-h[#] Show # header lines
1948							Grep options:
1949							-i Ignore case
1950							-w Match whole words only
1951							Output CSV:
1952							-c Output CSV, separator = ','
1953							-m Output CSV, separator = ';'
1954							Output HTML:
1955							-H Output HTML
1956							Selection:
1957							-S <sheets> Only print sheets <sheets>. 'all' is a valid set
1958							Default only prints the first sheet
1959							-R <rows> Only print rows <rows>. Default is 'all'
1960							-C <cols> Only print columns <cols>. Default is 'all'
1961							-F <flds> Only fields <flds> e.g. -FA3,B16
1962							Ordering (column numbers in result set after selection):
1963							--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
1964							+# - first # lines do not sort (header)
1965							# - order on column # lexical ascending
1966							#n - order on column # numeric ascending
1967							#r - order on column # lexical descending
1968							#rn - order on column # numeric descending
1969
1970							=head2 C<xls2csv>
1971
1972							Convert a spreadsheet to CSV. This is just a small wrapper over C<xlscat>.
1973
1974							usage: xls2csv [ -o file.csv ] file.xls
1975
1976							=head2 C<ss2tk>
1977
1978							Show a spreadsheet in a perl/Tk spreadsheet widget
1979
1980							usage: ss2tk [-w <width>] [X11 options] file.xls [<pattern>]
1981							-w <width> use <width> as default column width (4)
1982
1983							=head2 C<ssdiff>
1984
1985							Show the differences between two spreadsheets.
1986
1987							usage: examples/ssdiff [--verbose[=1]] file.xls file.xlsx
1988
1989							=head1 TODO
1990
1991							=over 4
1992
1993							=item Options
1994
1995							=over 2
1996
1997							=item Module Options
1998
1999							New Spreadsheet::Read options are bound to happen. I'm thinking of an
2000							option that disables the reading of the data entirely to speed up an
2001							index request (how many sheets/fields/columns). See C<xlscat -i>.
2002
2003							=item Parser options
2004
2005							Try to transparently support as many options as the encapsulated modules
2006							support regarding (un)formatted values, (date) formats, hidden columns
2007							rows or fields etc. These could be implemented like C<attr> above but
2008							names C<meta>, or just be new values in the C<attr> hashes.
2009
2010							=back
2011
2012							=item Other parsers
2013
2014							Add support for new(er) parsers for already supported formats, like
2015
2016							=over 2
2017
2018							=item Data::XLSX::Parser
2019
2020							Data::XLSX::Parser provides faster way to parse Microsoft Excel's .xlsx
2021							files. The implementation of this module is highly inspired from Python's
2022							FastXLSX library.
2023
2024							This is SAX based parser, so you can parse very large XLSX file with
2025							lower memory usage.
2026
2027							=back
2028
2029							=item Other spreadsheet formats
2030
2031							I consider adding any spreadsheet interface that offers a usable API.
2032
2033							Under investigation:
2034
2035							=over 2
2036
2037							=item Gnumeric (.gnumeric)
2038
2039							I have seen no existing CPAN module yet.
2040
2041							It is gzip'ed XML
2042
2043							=item Kspread (.ksp)
2044
2045							Now knows as Calligra Sheets.
2046
2047							I have seen no existing CPAN module yet.
2048
2049							It is XML in ZIP
2050
2051							=back
2052
2053							=item Alternative parsers for existing formats
2054
2055							As long as the alternative has a good reason for its existence, and the
2056							API of that parser reasonable fits in my approach, I will consider to
2057							implement the glue layer, or apply patches to do so as long as these
2058							match what F<CONTRIBUTING.md> describes.
2059
2060							=back
2061
2062							=head1 SEE ALSO
2063
2064							=over 2
2065
2066							=item Text::CSV_XS, Text::CSV_PP
2067
2068							See L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS> ,
2069							L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV_PP> , and
2070							L<Text::CSV\|https://metacpan.org/release/Text-CSV> documentation.
2071
2072							L<Text::CSV\|https://metacpan.org/release/Text-CSV> is a wrapper over Text::CSV_XS (the fast XS version) and/or
2073							L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV_PP> (the pure perl version).
2074
2075							=item Spreadsheet::ParseExcel
2076
2077							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel> is
2078							the best parser for old-style Microsoft Excel (.xls) files.
2079
2080							=item Spreadsheet::ParseXLSX
2081
2082							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX> is
2083							like L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel>,
2084							but for new Microsoft Excel 2007+ files (.xlsx). They have the same API.
2085
2086							This module uses L<XML::Twig\|https://metacpan.org/release/XML-Twig> to parse the
2087							internal XML.
2088
2089							=item Spreadsheet::XLSX
2090
2091							See L<Spreadsheet::XLSX\|https://metacpan.org/release/Spreadsheet-XLSX>
2092							documentation.
2093
2094							This module is dead and deprecated. It is B<buggy and unmaintained>. I<Please>
2095							use L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX>
2096							instead.
2097
2098							=item Spreadsheet::ReadSXC
2099
2100							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC> is a
2101							parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files.
2102
2103							=item Spreadsheet::BasicRead
2104
2105							See L<Spreadsheet::BasicRead\|https://metacpan.org/release/Spreadsheet-BasicRead>
2106							for xlscat-like functionality (Excel only)
2107
2108							=item Spreadsheet::ConvertAA
2109
2110							See L<Spreadsheet::ConvertAA\|https://metacpan.org/release/Spreadsheet-ConvertAA>
2111							for an alternative set of L</cell2cr>/L</cr2cell> pair.
2112
2113							=item Spreadsheet::Perl
2114
2115							L<Spreadsheet::Perl\|https://metacpan.org/release/Spreadsheet-Perl> offers a Pure
2116							Perl implementation of a spreadsheet engine. Users that want this format to be
2117							supported in Spreadsheet::Read are hereby motivated to offer patches. It is
2118							not high on my TODO-list.
2119
2120							=item Spreadsheet::CSV
2121
2122							L<Spreadsheet::CSV\|https://metacpan.org/release/Spreadsheet-CSV> offers the
2123							interesting approach of seeing all supported spreadsheet formats as if it were
2124							CSV, mimicking the L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS>
2125							interface.
2126
2127							=item xls2csv
2128
2129							L<xls2csv\|https://metacpan.org/release/xls2csv> offers an alternative for my
2130							C<xlscat -c>, in the xls2csv tool, but this tool focuses on character encoding
2131							transparency, and requires some other modules.
2132
2133							=back
2134
2135							=head1 AUTHOR
2136
2137							H.Merijn Brand, <h.m.brand@xs4all.nl>
2138
2139							=head1 COPYRIGHT AND LICENSE
2140
2141							Copyright (C) 2005-2019 H.Merijn Brand
2142
2143							This library is free software; you can redistribute it and/or modify
2144							it under the same terms as Perl itself.
2145
2146							=cut