File Coverage

blib/lib/Spreadsheet/Read.pm

Criterion	Covered	Total	%
statement	447	523	85.4
branch	384	514	74.7
condition	153	220	69.5
subroutine	46	46	100.0
pod	13	13	100.0
total	1043	1316	79.2

line	stmt	bran	cond	sub	pod	time	code
1							#!/pro/bin/perl
2
3							package Spreadsheet::Read;
4
5							=head1 NAME
6
7							Spreadsheet::Read - Read the data from a spreadsheet
8
9							=head1 SYNOPSIS
10
11							use Spreadsheet::Read;
12							my $book = ReadData ("test.csv", sep => ";");
13							my $book = ReadData ("test.sxc");
14							my $book = ReadData ("test.ods");
15							my $book = ReadData ("test.xls");
16							my $book = ReadData ("test.xlsx");
17							my $book = ReadData ($fh, parser => "xls");
18
19							Spreadsheet::Read::add ($book, "sheet.csv");
20
21							my $sheet = $book->[1]; # first datasheet
22							my $cell = $book->[1]{A3}; # content of field A3 of sheet 1
23							my $cell = $book->[1]{cell}[1][3]; # same, unformatted
24
25							# OO API
26							my $book = Spreadsheet::Read->new ("file.csv");
27							my $sheet = $book->sheet (1);
28							my $cell = $sheet->cell ("A3");
29							my $cell = $sheet->cell (1, 3);
30
31							$book->add ("test.xls");
32
33							=cut
34
35	53			53		5103204	use 5.8.1;
	53					687
36	53			53		303	use strict;
	53					132
	53					1239
37	53			53		301	use warnings;
	53					132
	53					3686
38
39							our $VERSION = "0.80";
40	1			1	1	190	sub Version { $VERSION }
41
42	53			53		386	use Carp;
	53					146
	53					3232
43	53			53		360	use Exporter;
	53					118
	53					4581
44							our @ISA = qw( Exporter );
45							our @EXPORT = qw( ReadData cell2cr cr2cell );
46							our @EXPORT_OK = qw( parses rows cellrow row add );
47
48	53			53		28812	use Encode qw( decode );
	53					855019
	53					4019
49	53			53		45456	use File::Temp qw( );
	53					1101698
	53					1586
50	53			53		32181	use Data::Dumper;
	53					326483
	53					54353
51
52							my @parsers = (
53							[ csv => "Text::CSV_XS", "0.71" ],
54							[ csv => "Text::CSV_PP", "1.17" ],
55							[ csv => "Text::CSV", "1.17" ],
56							[ ods => "Spreadsheet::ReadSXC", "0.20" ],
57							[ sxc => "Spreadsheet::ReadSXC", "0.20" ],
58							[ xls => "Spreadsheet::ParseExcel", "0.34" ],
59							[ xlsx => "Spreadsheet::ParseXLSX", "0.24" ],
60							[ xlsx => "Spreadsheet::XLSX", "0.13" ],
61							[ prl => "Spreadsheet::Perl", "" ],
62
63							# Helper modules
64							[ ios => "IO::Scalar", "" ],
65							[ dmp => "Data::Peek", "" ],
66							);
67							my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers });
68							foreach my $p (@parsers) {
69							my $format = $p->[0];
70							$can{$format} and next;
71							$can{$format} = "";
72							my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next;
73							my $min_version = $can{supports}{$preset};
74							unless ($min_version) {
75							# Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle"
76							$can{$format} = "!$preset is not supported for the $format format";
77							next;
78							}
79							if (eval "local \$_; require $preset" and not $@) {
80							# forcing a parser should still check the version
81							my $ok;
82							my $has = $preset->VERSION;
83							$has =~ s/_[0-9]+$//; # Remove beta-part
84							if ($min_version =~ m/^v([0-9.]+)/) { # clumsy versions
85							my @min = split m/\./ => $1;
86							$has =~ s/^v//;
87							my @has = split m/\./ => $has;
88							$ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >=
89							(($min[0] * 1000 + $min[1]) * 1000 + $min[2]);
90							}
91							else { # normal versions
92							$ok = $has >= $min_version;
93							}
94							$ok or $preset = "!$preset";
95							}
96							else {
97							$preset = "!$preset";
98							}
99							$can{$format} = $preset;
100							}
101							delete $can{supports};
102							for (@parsers) {
103							my ($flag, $mod, $vsn) = @$_;
104							$can{$flag} and next;
105							eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" or
106							$_->[0] = "! Cannot use $mod version $vsn: $@";
107							}
108							$can{sc} = __PACKAGE__; # SquirelCalc is built-in
109
110							defined $Spreadsheet::ParseExcel::VERSION && $Spreadsheet::ParseExcel::VERSION < 0.61 and
111							*Spreadsheet::ParseExcel::Workbook::get_active_sheet = sub { undef; };
112
113							my $debug = 0;
114							my %def_opts = (
115							rc => 1,
116							cells => 1,
117							attr => 0,
118							clip => undef, # $opt{cells};
119							strip => 0,
120							pivot => 0,
121							dtfmt => "yyyy-mm-dd", # Format 14
122							debug => 0,
123							passwd => undef,
124							parser => undef,
125							sep => undef,
126							quote => undef,
127							label => undef,
128							);
129							my @def_attr = (
130							type => "text",
131							fgcolor => undef,
132							bgcolor => undef,
133							font => undef,
134							size => undef,
135							format => undef,
136							halign => "left",
137							valign => "top",
138							bold => 0,
139							italic => 0,
140							uline => 0,
141							wrap => 0,
142							merged => 0,
143							hidden => 0,
144							locked => 0,
145							enc => "utf-8", # $ENV{LC_ALL} // $ENV{LANG} // ...
146							formula => undef,
147							);
148
149							# Helper functions
150
151							sub _dump {
152	7			7		17	my ($label, $ref) = @_;
153	7	50				22	if ($can{dmp}) {
154	7					42	print STDERR Data::Peek::DDumper ({ $label => $ref });
155							}
156							else {
157	0					0	print STDERR Data::Dumper->Dump ([$ref], [$label]);
158							}
159							} # _dump
160
161							sub _parser {
162	342	100		342		1370	my $type = shift or return "";
163	100					287	$type = lc $type;
164							# Aliases and fullnames
165	100	100				327	$type eq "excel" and return "xls";
166	99	50				307	$type eq "excel2007" and return "xlsx";
167	99	100				285	$type eq "oo" and return "sxc";
168	98	100				297	$type eq "ods" and return "sxc";
169	96	100				277	$type eq "openoffice" and return "sxc";
170	95	50				278	$type eq "libreoffice" and return "sxc";
171	95	100				278	$type eq "perl" and return "prl";
172	94	50				279	$type eq "squirelcalc" and return "sc";
173	94	100				572	return exists $can{$type} ? $type : "";
174							} # _parser
175
176							sub new {
177	12			12	1	7235	my $class = shift;
178	12		100			42	my $r = ReadData (@_) \|\| [{
179							parsers => [],
180							error => undef,
181							sheets => 0,
182							sheet => { },
183							}];
184	12					167	bless $r => $class;
185							} # new
186
187							# Spreadsheet::Read::parses ("csv") or die "Cannot parse CSV"
188							sub parses {
189	63	100		63	1	142213	ref $_[0] eq __PACKAGE__ and shift;
190	63	100				252	my $type = _parser (shift) or return 0;
191	61	100				422	if ($can{$type} =~ m/^!\s(.)/) {
192	8					31	$@ = $1;
193	8					29	return 0;
194							}
195	53					181	return $can{$type};
196							} # parses
197
198							sub sheets {
199	4			4	1	11	my $ctrl = shift->[0];
200	4					8	my %s = %{$ctrl->{sheet}};
	4					17
201	4	100				32	wantarray ? sort { $s{$a} <=> $s{$b} } keys %s : $ctrl->{sheets};
	4					25
202							} # sheets
203
204							# col2label (4) => "D"
205							sub col2label {
206	15697	100		15697	1	33559	ref $_[0] eq __PACKAGE__ and shift;
207	15697					20665	my $c = shift;
208	15697	100	100			40185	defined $c && $c > 0 or return "";
209	15693					21147	my $cell = "";
210	15693					25964	while ($c) {
211	53			53		28205	use integer;
	53					806
	53					290
212
213	15710					29819	substr $cell, 0, 0, chr (--$c % 26 + ord "A");
214	15710					28222	$c /= 26;
215							}
216	15693					41640	$cell;
217							} # col2label
218
219							# cr2cell (4, 18) => "D18"
220							# No prototype to allow 'cr2cell (@rowcol)'
221							sub cr2cell {
222	15495	100		15495	1	186261	ref $_[0] eq __PACKAGE__ and shift;
223	15495					24713	my ($c, $r) = @_;
224	15495	100	100			71533	defined $c && defined $r && $c > 0 && $r > 0 or return "";
			100
			100
225	15489					23358	col2label ($c) . $r;
226							} # cr2cell
227
228							# cell2cr ("D18") => (4, 18)
229							sub cell2cr {
230	604	100		604	1	585639	ref $_[0] eq __PACKAGE__ and shift;
231	604	100	100			5510	my ($cc, $r) = (uc ($_[0]\|\|"") =~ m/^([A-Z]+)([0-9]+)$/) or return (0, 0);
232	600					1208	my $c = 0;
233	600					2466	while ($cc =~ s/^([A-Z])//) {
234	608					2421	$c = 26 * $c + 1 + ord ($1) - ord ("A");
235							}
236	600					2043	($c, $r);
237							} # cell2cr
238
239							# my @row = cellrow ($book->[1], 1);
240							# my @row = $book->cellrow (1, 1);
241							sub cellrow {
242	10	100		10	1	66	my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
		100
243	9	100	66			63	ref $sheet eq "HASH" && exists $sheet->{cell} or return;
244	7	50	33			53	exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
245	7	100				27	my $row = shift or return;
246	6	100	100			62	$row > 0 && $row <= $sheet->{maxrow} or return;
247	4					13	my $s = $sheet->{cell};
248	4					15	map { $s->[$_][$row] } 1..$sheet->{maxcol};
	76					143
249							} # cellrow
250
251							# my @row = row ($book->[1], 1);
252							sub row {
253	10	100		10	1	3241	my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
		100
254	9	100	66			67	ref $sheet eq "HASH" && exists $sheet->{cell} or return;
255	7	50	33			36	exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
256	7	100				24	my $row = shift or return;
257	6	100	100			38	$row > 0 && $row <= $sheet->{maxrow} or return;
258	4					20	map { $sheet->{cr2cell ($_, $row)} } 1..$sheet->{maxcol};
	76					136
259							} # row
260
261							# Convert {cell}'s [column][row] to a [row][column] list
262							# my @rows = rows ($book->[1]);
263							sub rows {
264	33	50		33	1	19187	my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
		100
265	30	100	100			170	ref $sheet eq "HASH" && exists $sheet->{cell} or return;
266	26	100	100			118	exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
267	5					16	my $s = $sheet->{cell};
268
269							map {
270	20					35	my $r = $_;
271	20					38	[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
	80					204
272	5					20	} 1..$sheet->{maxrow};
273							} # rows
274
275							sub sheet {
276	23			23	1	611	my ($book, $sheet) = @_;
277	23	100	100			118	$book && $sheet or return;
278	21					38	my $class = "Spreadsheet::Read::Sheet";
279							$sheet =~ m/^[0-9]+$/ && $sheet >= 1 && $sheet <= $book->[0]{sheets} and
280	21	100	66			276	return bless $book->[$sheet] => $class;
			100
281							exists $book->[0]{sheet}{$sheet} and
282	6	100				27	return bless $book->[$book->[0]{sheet}{$sheet}] => $class;
283	5					13	foreach my $idx (1 .. $book->[0]{sheets}) {
284	6	100				23	$book->[$idx]{label} eq $sheet and
285							return bless $book->[$idx] => $class;
286							}
287	4					19	return;
288							} # sheet
289
290							# If option "clip" is set, remove the trailing rows and
291							# columns in each sheet that contain no visible data
292							sub _clipsheets {
293	227			227		537	my ($opt, $ref) = @_;
294
295	227	50				758	$ref->[0]{sheets} or return $ref;
296
297	227					615	my ($rc, $cl) = ($opt->{rc}, $opt->{cells});
298	227					601	my ($oc, $os, $oa) = ($opt->{clip}, $opt->{strip}, $opt->{attr});
299
300							# Strip leading/trailing spaces
301	227	100	100			949	if ($os \|\| $oc) {
302	212					646	foreach my $sheet (1 .. $ref->[0]{sheets}) {
303	254					574	$ref->[$sheet]{indx} = $sheet;
304	254					438	my $ss = $ref->[$sheet];
305	254	100	66			1064	$ss->{maxrow} && $ss->{maxcol} or next;
306	247					540	my ($mc, $mr) = (0, 0);
307	247					646	foreach my $row (1 .. $ss->{maxrow}) {
308	1954					3257	foreach my $col (1 .. $ss->{maxcol}) {
309	16636	100				25590	if ($rc) {
310	14738	100				26748	defined $ss->{cell}[$col][$row] or next;
311	5881	100				10614	$os & 2 and $ss->{cell}[$col][$row] =~ s/\s+$//;
312	5881	100				9558	$os & 1 and $ss->{cell}[$col][$row] =~ s/^\s+//;
313	5881	100				10479	if (length $ss->{cell}[$col][$row]) {
314	4417	100				7282	$col > $mc and $mc = $col;
315	4417	100				7639	$row > $mr and $mr = $row;
316							}
317							}
318	7779	100				12105	if ($cl) {
319	7560					11401	my $cell = cr2cell ($col, $row);
320	7560	100				15671	defined $ss->{$cell} or next;
321	6233	100				11208	$os & 2 and $ss->{$cell} =~ s/\s+$//;
322	6233	100				9893	$os & 1 and $ss->{$cell} =~ s/^\s+//;
323	6233	100				11233	if (length $ss->{$cell}) {
324	4757	100				7852	$col > $mc and $mc = $col;
325	4757	100				8925	$row > $mr and $mr = $row;
326							}
327							}
328							}
329							}
330
331	247	100	66			1341	$oc && ($mc < $ss->{maxcol} \|\| $mr < $ss->{maxrow}) or next;
			66
332
333							# Remove trailing empty columns
334	106					252	foreach my $col (($mc + 1) .. $ss->{maxcol}) {
335	243	100				564	$rc and undef $ss->{cell}[$col];
336	243	100				474	$oa and undef $ss->{attr}[$col];
337	243	100				444	$cl or next;
338	193					308	my $c = col2label ($col);
339	193					1425	delete $ss->{"$c$_"} for 1 .. $ss->{maxrow};
340							}
341
342							# Remove trailing empty rows
343	106					247	foreach my $row (($mr + 1) .. $ss->{maxrow}) {
344	139					258	foreach my $col (1 .. $mc) {
345	64	50				203	$cl and delete $ss->{cr2cell ($col, $row)};
346	64	50				168	$rc and undef $ss->{cell} [$col][$row];
347	64	50				133	$oa and undef $ss->{attr} [$col][$row];
348							}
349							}
350
351	106					314	($ss->{maxrow}, $ss->{maxcol}) = ($mr, $mc);
352							}
353							}
354
355	227	100				664	if ($opt->{pivot}) {
356	1					5	foreach my $sheet (1 .. $ref->[0]{sheets}) {
357	1					2	my $ss = $ref->[$sheet];
358	1	0	33			5	$ss->{maxrow} \|\| $ss->{maxcol} or next;
359	1	50				4	my $mx = $ss->{maxrow} > $ss->{maxcol} ? $ss->{maxrow} : $ss->{maxcol};
360	1					4	foreach my $row (2 .. $mx) {
361	18					37	foreach my $col (1 .. ($row - 1)) {
362							$opt->{rc} and
363							($ss->{cell}[$col][$row], $ss->{cell}[$row][$col]) =
364	171	50				569	($ss->{cell}[$row][$col], $ss->{cell}[$col][$row]);
365							$opt->{cells} and
366							($ss->{cr2cell ($col, $row)}, $ss->{cr2cell ($row, $col)}) =
367	171	50				361	($ss->{cr2cell ($row, $col)}, $ss->{cr2cell ($col, $row)});
368							}
369							}
370	1					4	($ss->{maxcol}, $ss->{maxrow}) = ($ss->{maxrow}, $ss->{maxcol});
371							}
372							}
373
374	227					5153	$ref;
375							} # _clipsheets
376
377							# Convert a single color (index) to a color
378							sub _xls_color {
379	1742			1742		20358	my $clr = shift;
380	1742	100				3571	defined $clr or return undef;
381	850	50				1553	$clr eq "#000000" and return undef;
382	850	50				1620	$clr =~ m/^#[0-9a-fA-F]+$/ and return lc $clr;
383	850	100	66			2527	$clr == 0 \|\| $clr == 32767 and return undef; # Default fg color
384	468					1097	return "#" . lc Spreadsheet::ParseExcel->ColorIdxToRGB ($clr);
385							} # _xls_color
386
387							# Convert a fill [ $pattern, $front_color, $back_color ] to a single background
388							sub _xls_fill {
389	1526			1526		2714	my ($p, $fg, $bg) = @_;
390	1526	50				2711	defined $p or return undef;
391	1526	50				2593	$p == 32767 and return undef; # Default fg color
392	1526	100	100			16518	$p == 0 && !defined $bg and return undef; # No fill bg color
393	634	100				1117	$p == 1 and return _xls_color ($fg);
394	418	50	33			6189	$bg < 8 \|\| $bg > 63 and return undef; # see Workbook.pm#106
395	0					0	return _xls_color ($bg);
396							} # _xls_fill
397
398							sub ReadData {
399	279	100		279	1	285361	my $txt = shift or return;
400
401	276					551	my %opt;
402	276	100				750	if (@_) {
403	221	100				1080	if (ref $_[0] eq "HASH") { %opt = %{shift @_} }
	20	50				30
	20					77
404	201					842	elsif (@_ % 2 == 0) { %opt = @_ }
405							}
406
407	276	100				1032	exists $opt{rc} or $opt{rc} = $def_opts{rc};
408	276	100				812	exists $opt{cells} or $opt{cells} = $def_opts{cells};
409	276	100				775	exists $opt{attr} or $opt{attr} = $def_opts{attr};
410	276	100				749	exists $opt{clip} or $opt{clip} = $opt{cells};
411	276	100				741	exists $opt{strip} or $opt{strip} = $def_opts{strip};
412	276	100				814	exists $opt{dtfmt} or $opt{dtfmt} = $def_opts{dtfmt};
413
414							# $debug = $opt{debug} // 0;
415	276	100				737	$debug = defined $opt{debug} ? $opt{debug} : $def_opts{debug};
416	276	100				710	$debug > 4 and _dump (Options => \%opt);
417
418	31					95	my %parser_opts = map { $_ => $opt{$_} }
419	276					2541	grep { !exists $def_opts{$_} }
	1764					3503
420							keys %opt;
421
422	276					1036	my $_parser = _parser ($opt{parser});
423
424	276	100				1066	my $io_ref = ref ($txt) =~ m/GLOB\|IO/ ? $txt : undef;
425							my $io_fil = $io_ref ? 0 : $txt =~ m/\0/ ? 0
426	53	100		53		111422	: do { no warnings "newline"; -f $txt };
	53	100				174
	53					264656
	276					1023
	255					5495
427	276	100	100			1722	my $io_txt = $io_ref \|\| $io_fil ? 0 : 1;
428
429	276	100	100			3194	$io_fil && ! -s $txt and return;
430	265	100	100			1450	$io_ref && eof ($txt) and return;
431
432	260	100	100			1863	if ($opt{parser} ? $_parser eq "csv" : ($io_fil && $txt =~ m/\.(csv)$/i)) {
		100
433	106	50				395	$can{csv} or croak "CSV parser not installed";
434
435	106	100				439	my $label = defined $opt{label} ? $opt{label} : $io_fil ? $txt : "IO";
		100
436
437	106	50				366	$debug and print STDERR "Opening CSV $label using $can{csv}-", $can{csv}->VERSION, "\n";
438
439							my @data = (
440							{ type => "csv",
441							parser => $can{csv},
442							version => $can{csv}->VERSION,
443							parsers => [ {
444							type => "csv",
445							parser => $can{csv},
446							version => $can{csv}->VERSION,
447	106					3064	}],
448							error => undef,
449							quote => '"',
450							sepchar => ',',
451							sheets => 1,
452							sheet => { $label => 1 },
453							},
454							{ parser => 0,
455							label => $label,
456							maxrow => 0,
457							maxcol => 0,
458							cell => [],
459							attr => [],
460							merged => [],
461							active => 1,
462							},
463							);
464
465	106					540	my ($sep, $quo, $in) = (",", '"');
466	106	100				325	defined $opt{sep} and $sep = $opt{sep};
467	106	100				244	defined $opt{quote} and $quo = $opt{quote};
468	106	50				264	$debug > 8 and _dump (debug => {
469							data => \@data, txt => $txt, io_ref => $io_ref, io_fil => $io_fil });
470	106	100				254	if ($io_fil) {
		100
		100
		50
471	94	100	100			329	unless (defined $opt{quote} && defined $opt{sep}) {
472	87	50				3393	open $in, "<", $txt or return;
473	87					1646	my $l1 = <$in>;
474
475	87	100				419	$quo = defined $opt{quote} ? $opt{quote} : '"';
476							$sep = # If explicitly set, use it
477							defined $opt{sep} ? $opt{sep} :
478							# otherwise start auto-detect with quoted strings
479	87	100				786	$l1 =~ m/["0-9];["0-9;]/ ? ";" :
		100
		50
		50
		100
		100
		50
480							$l1 =~ m/["0-9],["0-9,]/ ? "," :
481							$l1 =~ m/["0-9]\t["0-9,]/ ? "\t" :
482							# If neither, then for unquoted strings
483							$l1 =~ m/\w;[\w;]/ ? ";" :
484							$l1 =~ m/\w,[\w,]/ ? "," :
485							$l1 =~ m/\w\t[\w,]/ ? "\t" :
486							"," ;
487	87					1243	close $in;
488							}
489	94	50				2630	open $in, "<", $txt or return;
490							}
491							elsif ($io_ref) {
492	10					22	$in = $txt;
493							}
494							elsif (ref $txt eq "SCALAR") {
495	1	50				18	open $in, "<", $txt or croak "Cannot open input: $!";
496							}
497							elsif ($txt =~ m/[\r\n,;]/) {
498	1	50		2		59	open $in, "<", \$txt or croak "Cannot open input: $!";
	2					21
	2					5
	2					18
499							}
500							else {
501	0					0	warn "Input type ", ref $txt,
502							" might not be supported. Please file a ticket\n";
503	0					0	$in = $txt; # Now pray ...
504							}
505	106	50				1481	$debug > 1 and print STDERR "CSV sep_char '$sep', quote_char '$quo'\n";
506							my $csv = $can{csv}->new ({
507							%parser_opts,
508
509							sep_char => ($data[0]{sepchar} = $sep),
510	106	50				1339	quote_char => ($data[0]{quote} = $quo),
511							keep_meta_info => 1,
512							binary => 1,
513							auto_diag => 1,
514							}) or croak "Cannot create a csv ('$sep', '$quo') parser!";
515
516	106					22143	while (my $row = $csv->getline ($in)) {
517	413	50				70327	my @row = @$row or last;
518
519	413					886	my $r = ++$data[1]{maxrow};
520	413	100				1020	@row > $data[1]{maxcol} and $data[1]{maxcol} = @row;
521	413					1097	foreach my $c (0 .. $#row) {
522	2427					3329	my $val = $row[$c];
523	2427					4225	my $cell = cr2cell ($c + 1, $r);
524	2427	100				6746	$opt{rc} and $data[1]{cell}[$c + 1][$r] = $val;
525	2427	100				6095	$opt{cells} and $data[1]{$cell} = $val;
526	2427	50				12137	$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
527							}
528							}
529	106	50				14130	$csv->eof () or $data[0]{error} = [ $csv->error_diag ];
530	106					1593	close $in;
531
532	106					315	for (@{$data[1]{cell}}) {
	106					517
533	734	100				1602	defined or $_ = [];
534							}
535	106					482	return _clipsheets \%opt, [ @data ];
536							}
537
538	154	100				411	if ($io_txt) { # && $_parser !~ m/^xlsx?$/) {
539	29	100	66			296	if ( # /etc/magic: Microsoft Office Document
		50	66
540							$txt =~ m{\A(\376\067\0\043
541							\|\320\317\021\340\241\261\032\341
542							\|\333\245-\0\0\0)}x
543							# /usr/share/misc/magic
544							\|\| $txt =~ m{\A.{2080}Microsoft Excel 5.0 Worksheet}
545							\|\| $txt =~ m{\A\x09\x04\x06\x00\x00\x00\x10\x00}
546							) {
547	1	50				5	$can{xls} or croak "Spreadsheet::ParseExcel not installed";
548	1					2	my $tmpfile;
549	1	50				4	if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
550	1					3	$tmpfile = \$txt;
551							}
552							else {
553	0					0	$tmpfile = File::Temp->new (SUFFIX => ".xls", UNLINK => 1);
554	0					0	binmode $tmpfile;
555	0					0	print $tmpfile $txt;
556	0					0	close $tmpfile;
557							}
558	1	50				53	open $io_ref, "<", $tmpfile or return;
559	1					981	$io_txt = 0;
560	1					6	$_parser = _parser ($opt{parser} = "xls");
561							}
562							elsif ( # /usr/share/misc/magic
563							$txt =~ m{\APK\003\004.{4,30}(?:\[Content_Types\]\.xml\|_rels/\.rels)}
564							) {
565	0	0				0	$can{xlsx} or croak "XLSX parser not installed";
566	0					0	my $tmpfile;
567	0	0				0	if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
568	0					0	$tmpfile = \$txt;
569							}
570							else {
571	0					0	$tmpfile = File::Temp->new (SUFFIX => ".xlsx", UNLINK => 1);
572	0					0	binmode $tmpfile;
573	0					0	print $tmpfile $txt;
574	0					0	close $tmpfile;
575							}
576	0	0				0	open $io_ref, "<", $tmpfile or return;
577	0					0	$io_txt = 0;
578	0					0	$_parser = _parser ($opt{parser} = "xlsx");
579							}
580							}
581	154	100	66			1500	if ($opt{parser} ? $_parser =~ m/^xlsx?$/
		100
582							: ($io_fil && $txt =~ m/\.(xlsx?)$/i && ($_parser = $1))) {
583	75	100				339	my $parse_type = $_parser =~ m/x$/i ? "XLSX" : "XLS";
584	75	50				329	my $parser = $can{lc $parse_type} or
585							croak "Parser for $parse_type is not installed";
586							$debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt,
587	75	50				350	" using $parser-", $can{lc $parse_type}->VERSION, "\n";
		100
588	75	50				241	$opt{passwd} and $parser_opts{Password} = $opt{passwd};
589	75					194	my $oBook = eval {
590							$io_ref
591							? $parse_type eq "XLSX"
592							? $can{xlsx} =~ m/::XLSX$/
593							? $parser->new ($io_ref)
594							: $parser->new (%parser_opts)->parse ($io_ref)
595							: $parser->new (%parser_opts)->Parse ($io_ref)
596							: $parse_type eq "XLSX"
597	75	0				854	? $can{xlsx} =~ m/::XLSX$/
		50
		50
		100
		100
598							? $parser->new ($txt)
599							: $parser->new (%parser_opts)->parse ($txt)
600							: $parser->new (%parser_opts)->Parse ($txt);
601							};
602	75	50				2252417	unless ($oBook) {
603							# cleanup will fail on folders with spaces.
604	0					0	(my $msg = $@) =~ s/ at \S+ line \d+.*//s;
605	0					0	croak "$parse_type parser cannot parse data: $msg";
606							}
607	75	50				318	$debug > 8 and _dump (oBook => $oBook);
608
609							# WorkBook keys:
610							# aColor _CurSheet Format SheetCount
611							# ActiveSheet _CurSheet_ FormatStr _skip_chart
612							# Author File NotSetCell _string_contin
613							# BIFFVersion Flg1904 Object Version
614							# _buffer FmtClass PkgStr Worksheet
615							# CellHandler Font _previous_info
616
617							my @data = ( {
618							type => lc $parse_type,
619							parser => $can{lc $parse_type},
620							version => $can{lc $parse_type}->VERSION,
621							parsers => [{
622							type => lc $parse_type,
623							parser => $can{lc $parse_type},
624							version => $can{lc $parse_type}->VERSION,
625							}],
626							error => undef,
627	75		50			2305	sheets => $oBook->{SheetCount} \|\| 0,
628							sheet => {},
629							} );
630							# $debug and $data[0]{_parser} = $oBook;
631							# Overrule the default date format strings
632							my %def_fmt = (
633							0x0E => lc $opt{dtfmt}, # m-d-yy
634	75					636	0x0F => "d-mmm-yyyy", # d-mmm-yy
635							0x11 => "mmm-yyyy", # mmm-yy
636							0x16 => "yyyy-mm-dd hh:mm", # m-d-yy h:mm
637							);
638	75					538	$oBook->{FormatStr}{$_} = $def_fmt{$_} for keys %def_fmt;
639							my $oFmt = $parse_type eq "XLSX"
640	75	50				742	? $can{xlsx} =~ m/::XLSX$/
		100
641							? Spreadsheet::XLSX::Fmt2007->new
642							: Spreadsheet::ParseExcel::FmtDefault->new
643							: Spreadsheet::ParseExcel::FmtDefault->new;
644
645	75	100				846	$debug and print STDERR "\t$data[0]{sheets} sheets\n";
646							my $active_sheet = $oBook->get_active_sheet
647							\|\| $oBook->{ActiveSheet}
648	75		33			376	\|\| $oBook->{SelectedSheet};
649	75					826	my $current_sheet = 0;
650	75					146	foreach my $oWkS (@{$oBook->{Worksheet}}) {
	75					232
651	121					192	$current_sheet++;
652	121	100	100			676	$opt{clip} and !defined $oWkS->{Cells} and next; # Skip empty sheets
653							my %sheet = (
654							parser => 0,
655							label => $oWkS->{Name},
656	115					762	maxrow => 0,
657							maxcol => 0,
658							cell => [],
659							attr => [],
660							merged => [],
661							active => 0,
662							);
663							# $debug and $sheet{_parser} = $oWkS;
664	115	50				401	defined $sheet{label} or $sheet{label} = "-- unlabeled --";
665	115	100				404	exists $oWkS->{MinRow} and $sheet{minrow} = $oWkS->{MinRow} + 1;
666	115	100				346	exists $oWkS->{MaxRow} and $sheet{maxrow} = $oWkS->{MaxRow} + 1;
667	115	100				420	exists $oWkS->{MinCol} and $sheet{mincol} = $oWkS->{MinCol} + 1;
668	115	100				310	exists $oWkS->{MaxCol} and $sheet{maxcol} = $oWkS->{MaxCol} + 1;
669							$sheet{merged} = [
670	12					35	map { $_->[0] }
671	4					30	sort { $a->[1] cmp $b->[1] }
672	12					106	map {[ $_, pack "NNNN", @$_ ]}
673	12					90	map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]}
	48					133
	12					38
674	115	100				235	@{$oWkS->get_merged_areas \|\| []}];
	115					460
675	115					1135	my $sheet_idx = 1 + @data;
676	115	100				345	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
677	115	100				301	if (defined $active_sheet) {
678							# _SheetNo is 0-based
679	7	50				23	my $sheet_no = defined $oWkS->{_SheetNo} ? $oWkS->{_SheetNo} : $current_sheet - 1;
680	7	100				21	$sheet_no eq $active_sheet and $sheet{active} = 1;
681							}
682							# Sheet keys:
683							# _Book FooterMargin MinCol RightMargin
684							# BottomMargin FooterMergin MinRow RightMergin
685							# BottomMergin HCenter Name RowHeight
686							# Cells Header NoColor RowHidden
687							# ColFmtNo HeaderMargin NoOrient Scale
688							# ColHidden HeaderMergin NoPls SheetHidden
689							# ColWidth Kind Notes _SheetNo
690							# Copis Landscape PageFit SheetType
691							# DefColWidth LeftMargin PageStart SheetVersion
692							# DefRowHeight LeftMergin PaperSize TopMargin
693							# Draft LeftToRight _Pos TopMergin
694							# FitHeight MaxCol PrintGrid UsePage
695							# FitWidth MaxRow PrintHeaders VCenter
696							# Footer MergedArea Res VRes
697	115	100				363	if (exists $oWkS->{MinRow}) {
698	110		50			452	my $hiddenRows = $oWkS->{RowHidden} \|\| [];
699	110		50			434	my $hiddenCols = $oWkS->{ColHidden} \|\| [];
700	110	100				310	if ($opt{clip}) {
701	91					227	my ($mr, $mc) = (-1, -1);
702	91					286	foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
703	516					923	foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
704	2908	100				5507	my $oWkC = $oWkS->{Cells}[$r][$c] or next;
705	1701	50				3190	defined (my $val = $oWkC->{Val}) or next;
706	1701	100				3027	$val eq "" and next;
707	1679	100				2759	$r > $mr and $mr = $r;
708	1679	100				3003	$c > $mc and $mc = $c;
709							}
710							}
711	91					318	($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1);
712							}
713	110					284	foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
714	597					1157	foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
715	3132	100				12173	my $oWkC = $oWkS->{Cells}[$r][$c] or next;
716							#defined (my $val = $oWkC->{Val}) or next;
717	1802					2785	my $val = $oWkC->{Val};
718	1802	50	33			6026	if (defined $val and my $enc = $oWkC->{Code}) {
719	0	0				0	$enc eq "ucs2" and $val = decode ("utf-16be", $val);
720							}
721	1802					3920	my $cell = cr2cell ($c + 1, $r + 1);
722	1802	100				5439	$opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original
723
724	1802					2461	my $fmt;
725	1802					2713	my $FmT = $oWkC->{Format};
726	1802	100				3091	if ($FmT) {
727	952	100				1911	unless (ref $FmT) {
728	130					176	$fmt = $FmT;
729	130					211	$FmT = {};
730							}
731							}
732							else {
733	850					1697	$FmT = {};
734							}
735	1802					3104	foreach my $attr (qw( AlignH AlignV FmtIdx Hidden Lock
736							Wrap )) {
737	10812	100				20807	exists $FmT->{$attr} or $FmT->{$attr} = 0;
738							}
739	1802	100				3602	exists $FmT->{Fill} or $FmT->{Fill} = [ 0 ];
740	1802	100				3941	exists $FmT->{Font} or $FmT->{Font} = undef;
741
742	1802	100				3285	unless (defined $fmt) {
743							$fmt = $FmT->{FmtIdx}
744							? $oBook->{FormatStr}{$FmT->{FmtIdx}}
745	1672	100				3099	: undef;
746							}
747	1802	100				3476	if ($oWkC->{Type} eq "Numeric") {
748							# Fixed in 0.33 and up
749							# see Spreadsheet/ParseExcel/FmtDefault.pm
750							$FmT->{FmtIdx} == 0x0e \|\|
751							$FmT->{FmtIdx} == 0x0f \|\|
752							$FmT->{FmtIdx} == 0x10 \|\|
753							$FmT->{FmtIdx} == 0x11 \|\|
754							$FmT->{FmtIdx} == 0x16 \|\|
755							(defined $fmt && $fmt =~ m{^[dmy][-\\/dmy]*$}) and
756	370	100	33			3261	$oWkC->{Type} = "Date";
			33
			33
			33
			100
			66
757							$FmT->{FmtIdx} == 0x09 \|\|
758							$FmT->{FmtIdx} == 0x0a \|\|
759							(defined $fmt && $fmt =~ m{^0+\.0+%$}) and
760	370	100	100			2000	$oWkC->{Type} = "Percentage";
			100
			100
761							}
762	1802	100				3463	defined $fmt and $fmt =~ s/\\//g;
763							$opt{cells} and # Formatted value
764							$sheet{$cell} = defined $val ? $FmT && exists $def_fmt{$FmT->{FmtIdx}}
765	1802	100	66			9742	? $oFmt->ValFmt ($oWkC, $oBook)
		50
		100
766							: $oWkC->Value : undef;
767	1802	100				15126	if ($opt{attr}) {
768	1526					2210	my $FnT = $FmT->{Font};
769							my $fmi = $FmT->{FmtIdx}
770							? $oBook->{FormatStr}{$FmT->{FmtIdx}}
771	1526	100				2594	: undef;
772	1526	100				2717	$fmi and $fmi =~ s/\\//g;
773							$sheet{attr}[$c + 1][$r + 1] = {
774							@def_attr,
775
776							type => lc $oWkC->{Type},
777							enc => $oWkC->{Code},
778							merged => (defined $oWkC->{Merged} ? $oWkC->{Merged} : $oWkC->is_merged) \|\| 0,
779							hidden => ($hiddenRows->[$r] \|\| $hiddenCols->[$c] ? 1 :
780							defined $oWkC->{Hidden} ? $oWkC->{Hidden} : $FmT->{Hidden}) \|\| 0,
781							locked => $FmT->{Lock} \|\| 0,
782							format => $fmi,
783							halign => [ undef, qw( left center right
784							fill justify ), undef,
785							"equal_space" ]->[$FmT->{AlignH}],
786							valign => [ qw( top center bottom justify
787							equal_space )]->[$FmT->{AlignV}],
788							wrap => $FmT->{Wrap},
789							font => $FnT->{Name},
790							size => $FnT->{Height},
791							bold => $FnT->{Bold},
792							italic => $FnT->{Italic},
793							uline => $FnT->{Underline},
794							fgcolor => _xls_color ($FnT->{Color}),
795	1526					6032	bgcolor => _xls_fill (@{$FmT->{Fill}}),
796							formula => $oWkC->{Formula},
797	1526		100			4827	};
			100
			100
798							#_dump "cell", $sheet{attr}[$c + 1][$r + 1];
799							}
800							}
801							}
802							}
803	115					206	for (@{$sheet{cell}}) {
	115					353
804	409	100				1072	defined or $_ = [];
805							}
806	115					1919	push @data, { %sheet };
807							# $data[0]{sheets}++;
808	115	50				534	if ($sheet{label} eq "-- unlabeled --") {
809	0					0	$sheet{label} = "";
810							}
811							else {
812	115					739	$data[0]{sheet}{$sheet{label}} = $#data;
813							}
814							}
815	75					361	return _clipsheets \%opt, [ @data ];
816							}
817
818	79	100				438	if ($opt{parser} ? _parser ($opt{parser}) eq "sc"
		100
		100
819							: $io_fil
820							? $txt =~ m/\.sc$/
821							: $txt =~ m/^# .*SquirrelCalc/) {
822	46	100				113	if ($io_ref) {
		100
823	2					8	local $/;
824	2					39	my $x = <$txt>;
825	2					11	$txt = $x;
826							}
827							elsif ($io_fil) {
828	42					168	local $/;
829	42	50				1449	open my $sc, "<", $txt or return;
830	42					1623	$txt = <$sc>;
831	42					623	close $sc;
832							}
833	46	50				282	$txt =~ m/\S/ or return;
834	46	50				148	my $label = defined $opt{label} ? $opt{label} : "sheet";
835	46					571	my @data = (
836							{ type => "sc",
837							parser => "Spreadsheet::Read",
838							version => $VERSION,
839							parsers => [{
840							type => "sc",
841							parser => "Spreadsheet::Read",
842							version => $VERSION,
843							}],
844							error => undef,
845							sheets => 1,
846							sheet => { $label => 1 },
847							},
848							{ parser => 0,
849							label => $label,
850							maxrow => 0,
851							maxcol => 0,
852							cell => [],
853							attr => [],
854							merged => [],
855							active => 1,
856							},
857							);
858
859	46					9680	for (split m/\s[\r\n]\s/, $txt) {
860	13478	100				21993	if (m/^dimension.of ([0-9]+) rows.of ([0-9]+) columns/i) {
861	46					93	@{$data[1]}{qw(maxrow maxcol)} = ($1, $2);
	46					249
862	46					108	next;
863							}
864	13432	100				32226	s/^r([0-9]+)c([0-9]+)\s=\s// or next;
865	3542					6329	my ($c, $r) = map { $_ + 1 } $2, $1;
	7084					15809
866	3542	100	66			15887	if (m/.* \{(.)}$/ or m/"(.)"/) {
867	2714					5160	my $cell = cr2cell ($c, $r);
868	2714	100				8875	$opt{rc} and $data[1]{cell}[$c][$r] = $1;
869	2714	50				7472	$opt{cells} and $data[1]{$cell} = $1;
870	2714	100				6448	$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
871	2714					4592	next;
872							}
873							# Now only formula's remain. Ignore for now
874							# r67c7 = [P2L] 2(1000r67c5-60)
875							}
876	46					761	for (@{$data[1]{cell}}) {
	46					154
877	360	100				723	defined or $_ = [];
878							}
879	46					223	return _clipsheets \%opt, [ @data ];
880							}
881
882	33	50	66			907	if ($opt{parser} ? _parser ($opt{parser}) eq "sxc"
		100
883							: ($txt =~ m/^<\?xml/ or -f $txt)) {
884	7	50				984	$can{sxc} or croak "Spreadsheet::ReadSXC not installed";
885	0	0				0	ref $txt and
886							croak ("Sorry, references as input are not (yet) supported by Spreadsheet::ReadSXC");
887
888	0					0	my $using = "using $can{sxc}-" . $can{sxc}->VERSION;
889	0					0	my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up
890	0					0	my $sxc;
891	0	0	0			0	if ($txt =~ m/\.(sxc\|ods)$/i) {
		0
		0
892	0	0				0	$debug and print STDERR "Opening \U$1\E $txt $using\n";
893	0	0				0	$sxc = Spreadsheet::ReadSXC::read_sxc ($txt, $sxc_options) or return;
894							}
895							elsif ($txt =~ m/\.xml$/i) {
896	0	0				0	$debug and print STDERR "Opening XML $txt $using\n";
897	0	0				0	$sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return;
898							}
899							# need to test on pattern to prevent stat warning
900							# on filename with newline
901							elsif ($txt !~ m/^<\?xml/i and -f $txt) {
902	0	0				0	$debug and print STDERR "Opening XML $txt $using\n";
903	0	0				0	open my $f, "<", $txt or return;
904	0					0	local $/;
905	0					0	$txt = <$f>;
906	0					0	close $f;
907							}
908	0	0	0			0	!$sxc && $txt =~ m/^<\?xml/i and
909							$sxc = Spreadsheet::ReadSXC::read_xml_string ($txt, $sxc_options);
910	0	0				0	$debug > 8 and _dump (sxc => $sxc);
911	0	0				0	if ($sxc) {
912	0					0	my @data = ( {
913							type => "sxc",
914							parser => "Spreadsheet::ReadSXC",
915							version => $Spreadsheet::ReadSXC::VERSION,
916							parsers => [{
917							type => "sxc",
918							parser => "Spreadsheet::ReadSXC",
919							version => $Spreadsheet::ReadSXC::VERSION,
920							}],
921							error => undef,
922							sheets => 0,
923							sheet => {},
924							} );
925							my @sheets = ref $sxc eq "HASH" # < 0.20
926							? map {
927							{ label => $_,
928	0					0	data => $sxc->{$_},
929							}
930							} keys %$sxc
931	0	0				0	: @{$sxc};
	0					0
932	0					0	foreach my $sheet (@sheets) {
933	0					0	my @sheet = @{$sheet->{data}};
	0					0
934							my %sheet = (
935							parser => 0,
936							label => $sheet->{label},
937	0					0	maxrow => scalar @sheet,
938							maxcol => 0,
939							cell => [],
940							attr => [],
941							merged => [],
942							active => 0,
943							);
944	0					0	my $sheet_idx = 1 + @data;
945	0	0				0	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} rows\n";
946	0					0	foreach my $r (0 .. $#sheet) {
947	0	0				0	my @row = @{$sheet[$r]} or next;
	0					0
948	0					0	foreach my $c (0 .. $#row) {
949	0	0				0	defined (my $val = $row[$c]) or next;
950	0					0	my $C = $c + 1;
951	0	0				0	$C > $sheet{maxcol} and $sheet{maxcol} = $C;
952	0					0	my $cell = cr2cell ($C, $r + 1);
953	0	0				0	$opt{rc} and $sheet{cell}[$C][$r + 1] = $val;
954	0	0				0	$opt{cells} and $sheet{$cell} = $val;
955	0	0				0	$opt{attr} and $sheet{attr}[$C][$r + 1] = { @def_attr };
956							}
957							}
958	0					0	for (@{$sheet{cell}}) {
	0					0
959	0	0				0	defined or $_ = [];
960							}
961	0	0				0	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
962	0					0	push @data, { %sheet };
963	0					0	$data[0]{sheets}++;
964	0					0	$data[0]{sheet}{$sheet->{label}} = $#data;
965							}
966	0					0	return _clipsheets \%opt, [ @data ];
967							}
968							}
969
970	26					168	return;
971							} # ReadData
972
973							sub add {
974	2			2	1	5	my $book = shift;
975	2	50				7	my $r = ReadData (@_) or return;
976							$book && (ref $book eq "ARRAY" \|\|
977	2	50	33			25	ref $book eq __PACKAGE__) && $book->[0]{sheets} or return $r;
			33
			33
978
979	2					6	my $c1 = $book->[0];
980	2					4	my $c2 = $r->[0];
981
982	2	50				7	unless ($c1->{parsers}) {
983	0					0	$c1->{parsers}[0]{$_} = $c1->{$_} for qw( type parser version );
984	0					0	$book->[$_]{parser} = 0 for 1 .. $c1->{sheets};
985							}
986	2					3	my ($pidx) = (grep { my $p = $c1->{parsers}[$_];
987							$p->{type} eq $c2->{type} &&
988							$p->{parser} eq $c2->{parser} &&
989	2	50	33			5	$p->{version} eq $c2->{version} } 0 .. $#{$c1->{parsers}});
	2					19
	2					8
990	2	50				6	unless (defined $pidx) {
991	0					0	$pidx = scalar @{$c1->{parsers}};
	0					0
992	0					0	$c1->{parsers}[$pidx]{$_} = $c2->{$_} for qw( type parser version );
993							}
994
995	2					4	foreach my $sn (sort { $c2->{sheet}{$a} <=> $c2->{sheet}{$b} } keys %{$c2->{sheet}}) {
	0					0
	2					11
996	2					4	my $s = $sn;
997	2					3	my $v = 2;
998	2					6	while (exists $c1->{sheet}{$s}) {
999	1					8	$s = $sn."[".$v++."]";
1000							}
1001	2					8	$c1->{sheet}{$s} = $c1->{sheets} + $c2->{sheet}{$sn};
1002	2					5	$r->[$c2->{sheet}{$sn}]{parser} = $pidx;
1003	2					7	push @$book, $r->[$c2->{sheet}{$sn}];
1004							}
1005	2					5	$c1->{sheets} += $c2->{sheets};
1006
1007	2					17	return $book;
1008							} # add
1009
1010							package Spreadsheet::Read::Attribute;
1011
1012	53			53		584	use Carp;
	53					156
	53					4595
1013	53			53		402	use vars qw( $AUTOLOAD );
	53					138
	53					62049
1014
1015							sub AUTOLOAD {
1016	6			6		607	my $self = shift;
1017	6					31	(my $attr = $AUTOLOAD) =~ s/.*:://;
1018	6					194	$self->{$attr};
1019							} # AUTOLOAD
1020
1021							package Spreadsheet::Read::Sheet;
1022
1023							sub cell {
1024	55			55		21025	my ($sheet, @id) = @_;
1025							@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1026	55	50	66			501	return $sheet->{cell}[$id[0]][$id[1]];
			66
1027							@id && $id[0] && exists $sheet->{$id[0]} and
1028	27	50	33			419	return $sheet->{$id[0]};
			33
1029							} # cell
1030
1031							sub attr {
1032	8			8		24	my ($sheet, @id) = @_;
1033	8					15	my $class = "Spreadsheet::Read::Attribute";
1034							@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1035	8	50	66			66	return bless $sheet->{attr}[$id[0]][$id[1]] => $class;
			66
1036	5	50	33			31	if (@id && $id[0] && exists $sheet->{$id[0]}) {
			33
1037	5					13	my ($c, $r) = $sheet->cell2cr ($id[0]);
1038	5					47	return bless $sheet->{attr}[$c][$r] => $class;
1039							}
1040	0					0	undef;
1041							} # attr
1042
1043							sub maxrow {
1044	2			2		4	my $sheet = shift;
1045	2					9	return $sheet->{maxrow};
1046							} # maxrow
1047
1048							sub maxcol {
1049	2			2		5	my $sheet = shift;
1050	2					8	return $sheet->{maxcol};
1051							} # maxrow
1052
1053							sub col2label {
1054	1	50		1		8	$_[0] =~ m/::/ and shift; # class unused
1055	1					5	return Spreadsheet::Read::col2label (@_);
1056							} # col2label
1057
1058							sub cr2cell {
1059	24	50		24		66	$_[0] =~ m/::/ and shift; # class unused
1060	24					50	return Spreadsheet::Read::cr2cell (@_);
1061							} # cr2cell
1062
1063							sub cell2cr {
1064	21	50		21		12888	$_[0] =~ m/::/ and shift; # class unused
1065	21					56	return Spreadsheet::Read::cell2cr (@_);
1066							} # cell2cr
1067
1068							sub label {
1069	2			2		6	my ($sheet, $label) = @_;
1070	2	100				9	defined $label and $sheet->{label} = $label;
1071	2					73	return $sheet->{label};
1072							} # label
1073
1074							sub active {
1075	1			1		3	my $sheet = shift;
1076	1					4	return $sheet->{active};
1077							} # label
1078
1079							# my @row = $sheet->cellrow (1);
1080							sub cellrow {
1081	4			4		10	my ($sheet, $row) = @_;
1082	4	100	66			36	defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
			100
1083	1					2	my $s = $sheet->{cell};
1084	1					4	map { $s->[$_][$row] } 1..$sheet->{maxcol};
	19					38
1085							} # cellrow
1086
1087							# my @row = $sheet->row (1);
1088							sub row {
1089	4			4		10	my ($sheet, $row) = @_;
1090	4	100	66			36	defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
			100
1091	1					4	map { $sheet->{$sheet->cr2cell ($_, $row)} } 1..$sheet->{maxcol};
	19					37
1092							} # row
1093
1094							# my @col = $sheet->cellcolumn (1);
1095							sub cellcolumn {
1096	4			4		13	my ($sheet, $col) = @_;
1097	4	100	66			36	defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
			100
1098	1					3	my $s = $sheet->{cell};
1099	1					4	map { $s->[$col][$_] } 1..$sheet->{maxrow};
	5					17
1100							} # cellcolumn
1101
1102							# my @col = $sheet->column (1);
1103							sub column {
1104	4			4		10	my ($sheet, $col) = @_;
1105	4	100	66			37	defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
			100
1106	1					5	map { $sheet->{$sheet->cr2cell ($col, $_)} } 1..$sheet->{maxrow};
	5					14
1107							} # column
1108
1109							# Convert {cell}'s [column][row] to a [row][column] list
1110							# my @rows = $sheet->rows ();
1111							sub rows {
1112	1			1		3	my $sheet = shift;
1113	1					2	my $s = $sheet->{cell};
1114
1115							map {
1116	5					10	my $r = $_;
1117	5					9	[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
	95					164
1118	1					4	} 1..$sheet->{maxrow};
1119							} # rows
1120
1121							1;
1122
1123							__END__
1124							=head1 DESCRIPTION
1125
1126							Spreadsheet::Read tries to transparently read any spreadsheet and
1127							return its content in a universal manner independent of the parsing
1128							module that does the actual spreadsheet scanning.
1129
1130							For OpenOffice and/or LibreOffice this module uses
1131							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>
1132
1133							For Microsoft Excel this module uses
1134							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel>,
1135							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX>, or
1136							L<Spreadsheet::XLSX\|https://metacpan.org/release/Spreadsheet-XLSX> (stronly
1137							discouraged).
1138
1139							For CSV this module uses L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS>
1140							or L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV_PP>.
1141
1142							For SquirrelCalc there is a very simplistic built-in parser
1143
1144							=head2 Data structure
1145
1146							The data is returned as an array reference:
1147
1148							$book = [
1149							# Entry 0 is the overall control hash
1150							{ sheets => 2,
1151							sheet => {
1152							"Sheet 1" => 1,
1153							"Sheet 2" => 2,
1154							},
1155							parsers => [ {
1156							type => "xls",
1157							parser => "Spreadsheet::ParseExcel",
1158							version => 0.59,
1159							}],
1160							error => undef,
1161							},
1162							# Entry 1 is the first sheet
1163							{ parser => 0,
1164							label => "Sheet 1",
1165							maxrow => 2,
1166							maxcol => 4,
1167							cell => [ undef,
1168							[ undef, 1 ],
1169							[ undef, undef, undef, undef, undef, "Nugget" ],
1170							],
1171							attr => [],
1172							merged => [],
1173							active => 1,
1174							A1 => 1,
1175							B5 => "Nugget",
1176							},
1177							# Entry 2 is the second sheet
1178							{ parser => 0,
1179							label => "Sheet 2",
1180							:
1181							:
1182
1183							To keep as close contact to spreadsheet users, row and column 1 have
1184							index 1 too in the C<cell> element of the sheet hash, so cell "A1" is
1185							the same as C<cell> [1, 1] (column first). To switch between the two,
1186							there are helper functions available: C<cell2cr ()>, C<cr2cell ()>,
1187							and C<col2label ()>.
1188
1189							The C<cell> hash entry contains unformatted data, while the hash entries
1190							with the traditional labels contain the formatted values (if applicable).
1191
1192							The control hash (the first entry in the returned array ref), contains
1193							some spreadsheet meta-data. The entry C<sheet> is there to be able to find
1194							the sheets when accessing them by name:
1195
1196							my %sheet2 = %{$book->[$book->[0]{sheet}{"Sheet 2"}]};
1197
1198							=head2 Functions and methods
1199
1200							=head3 new
1201
1202							my $book = Spreadsheet::Read->new (...);
1203
1204							All options accepted by ReadData are accepted by new.
1205
1206							=head3 ReadData
1207
1208							my $book = ReadData ($source [, option => value [, ... ]]);
1209
1210							my $book = ReadData ("file.csv", sep => ',', quote => '"');
1211
1212							my $book = ReadData ("file.xls", dtfmt => "yyyy-mm-dd");
1213
1214							my $book = ReadData ("file.ods");
1215
1216							my $book = ReadData ("file.sxc");
1217
1218							my $book = ReadData ("content.xml");
1219
1220							my $book = ReadData ($content);
1221
1222							my $book = ReadData ($content, parser => "xlsx");
1223
1224							my $book = ReadData ($fh, parser => "xlsx");
1225
1226							my $book = ReadData (\$content, parser => "xlsx");
1227
1228							Tries to convert the given file, string, or stream to the data structure
1229							described above.
1230
1231							Processing Excel data from a stream or content is supported through a
1232							L<File::Temp\|https://metacpan.org/release/File-Temp> temporary file or
1233							L<IO::Scalar\|https://metacpan.org/release/IO-Scalar> when available.
1234
1235							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>
1236							does preserve sheet order as of version 0.20.
1237
1238							Choosing between C<$content> and C<\\$content> (with or without passing
1239							the desired C<parser> option) may be depending on trial and terror.
1240							C<ReadData> does try to determine parser type on content if needed, but
1241							not all combinations are checked, and not all signatures are builtin.
1242
1243							Currently supported options are:
1244
1245							=over 2
1246
1247							=item parser
1248							X<parser>
1249
1250							Force the data to be parsed by a specific format. Possible values are
1251							C<csv>, C<prl> (or C<perl>), C<sc> (or C<squirelcalc>), C<sxc> (or C<oo>,
1252							C<ods>, C<openoffice>, C<libreoffice>) C<xls> (or C<excel>), and C<xlsx>
1253							(or C<excel2007>).
1254
1255							When parsing streams, instead of files, it is highly recommended to pass
1256							this option.
1257
1258							Spreadsheet::Read supports several underlying parsers per spreadsheet
1259							type. It will try those from most favored to least favored. When you
1260							have a good reason to prefer a different parser, you can set that in
1261							environment variables. The other options then will not be tested for:
1262
1263							env SPREADSHEET_READ_CSV=Text::CSV_PP ...
1264
1265							=item cells
1266							X<cells>
1267
1268							Control the generation of named cells ("C<A1>" etc). Default is true.
1269
1270							=item rc
1271
1272							Control the generation of the {cell}[c][r] entries. Default is true.
1273
1274							=item attr
1275
1276							Control the generation of the {attr}[c][r] entries. Default is false.
1277							See L</Cell Attributes> below.
1278
1279							=item clip
1280
1281							If set, L<C<ReadData>\|/ReadData> will remove all trailing rows and columns
1282							per sheet that have no data, where no data means only undefined or empty
1283							cells (after optional stripping). If a sheet has no data at all, the sheet
1284							will be skipped entirely when this attribute is true.
1285
1286							=item strip
1287
1288							If set, L<C<ReadData>\|/ReadData> will remove trailing- and/or
1289							leading-whitespace from every field.
1290
1291							strip leading strailing
1292							----- ------- ---------
1293							0 n/a n/a
1294							1 strip n/a
1295							2 n/a strip
1296							3 strip strip
1297
1298							=item pivot
1299
1300							Swap all rows and columns.
1301
1302							When a sheet contains data like
1303
1304							A1 B1 C1 E1
1305							A2 C2 D2
1306							A3 B3 C3 D3 E3
1307
1308							using C<pivot> will return the sheet data as
1309
1310							A1 A2 A3
1311							B1 B3
1312							C1 C2 C3
1313							D2 D3
1314							E1 E3
1315
1316							=item sep
1317
1318							Set separator for CSV. Default is comma C<,>.
1319
1320							=item quote
1321
1322							Set quote character for CSV. Default is C<">.
1323
1324							=item dtfmt
1325
1326							Set the format for MS-Excel date fields that are set to use the default
1327							date format. The default format in Excel is "C<m-d-yy>", which is both
1328							not year 2000 safe, nor very useful. The default is now "C<yyyy-mm-dd>",
1329							which is more ISO-like.
1330
1331							Note that date formatting in MS-Excel is not reliable at all, as it will
1332							store/replace/change the date field separator in already stored formats
1333							if you change your locale settings. So the above mentioned default can
1334							be either "C<m-d-yy>" OR "C<m/d/yy>" depending on what that specific
1335							character happened to be at the time the user saved the file.
1336
1337							=item debug
1338
1339							Enable some diagnostic messages to STDERR.
1340
1341							The value determines how much diagnostics are dumped (using
1342							L<Data::Peek\|https://metacpan.org/release/Data-Peek>). A value of C<9>
1343							and higher will dump the entire structure from the back-end parser.
1344
1345							=item passwd
1346
1347							Use this password to decrypt password protected spreadsheet.
1348
1349							Currently only supports Excel.
1350
1351							=back
1352
1353							All other attributes/options will be passed to the underlying parser if
1354							that parser supports attributes.
1355
1356							=head3 col2label
1357
1358							my $col_id = col2label (col);
1359
1360							my $col_id = $book->col2label (col); # OO
1361
1362							C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1363							traditional cell notation:
1364
1365							my $id = col2label ( 4); # $id now "D"
1366							my $id = col2label (28); # $id now "AB"
1367
1368							=head3 cr2cell
1369
1370							my $cell = cr2cell (col, row);
1371
1372							my $cell = $book->cr2cell (col, row); # OO
1373
1374							C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1375							traditional cell notation:
1376
1377							my $cell = cr2cell ( 4, 14); # $cell now "D14"
1378							my $cell = cr2cell (28, 4); # $cell now "AB4"
1379
1380							=head3 cell2cr
1381
1382							my ($col, $row) = cell2cr ($cell);
1383
1384							my ($col, $row) = $book->cell2cr ($cell); # OO
1385
1386							C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1387							pair (1 based):
1388
1389							my ($col, $row) = cell2cr ("D14"); # returns ( 4, 14)
1390							my ($col, $row) = cell2cr ("AB4"); # returns (28, 4)
1391
1392							=head3 row
1393
1394							my @row = row ($sheet, $row)
1395
1396							my @row = Spreadsheet::Read::row ($book->[1], 3);
1397
1398							my @row = $book->row ($sheet, $row); # OO
1399
1400							Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1401
1402							Note that the indexes in the returned list are 0-based.
1403
1404							C<row ()> is not imported by default, so either specify it in the
1405							use argument list, or call it fully qualified.
1406
1407							=head3 cellrow
1408
1409							my @row = cellrow ($sheet, $row);
1410
1411							my @row = Spreadsheet::Read::cellrow ($book->[1], 3);
1412
1413							my @row = $book->cellrow ($sheet, $row); # OO
1414
1415							Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1416
1417							Note that the indexes in the returned list are 0-based.
1418
1419							C<cellrow ()> is not imported by default, so either specify it in the
1420							use argument list, or call it fully qualified or as method call.
1421
1422							=head3 rows
1423
1424							my @rows = rows ($sheet);
1425
1426							my @rows = Spreadsheet::Read::rows ($book->[1]);
1427
1428							my @rows = $book->rows (1); # OO
1429
1430							Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1431
1432							Note that the indexes in the returned list are 0-based, where the
1433							index in the C<{cell}> entry is 1-based.
1434
1435							C<rows ()> is not imported by default, so either specify it in the
1436							use argument list, or call it fully qualified.
1437
1438							=head3 parses
1439
1440							parses ($format);
1441
1442							Spreadsheet::Read::parses ("CSV");
1443
1444							$book->parses ("CSV"); # OO
1445
1446							C<parses ()> returns Spreadsheet::Read's capability to parse the
1447							required format. L<C<ReadData>\|/ReadData> will pick its preferred parser
1448							for that format unless overruled. See L<C<parser>\|/parser>.
1449
1450							C<parses ()> is not imported by default, so either specify it in the
1451							use argument list, or call it fully qualified.
1452
1453							=head3 Version
1454
1455							my $v = Version ()
1456
1457							my $v = Spreadsheet::Read::Version ()
1458
1459							my $v = Spreadsheet::Read->VERSION;
1460
1461							my $v = $book->Version (); # OO
1462
1463							Returns the current version of Spreadsheet::Read.
1464
1465							C<Version ()> is not imported by default, so either specify it in the
1466							use argument list, or call it fully qualified.
1467
1468							This function returns exactly the same as C<< Spreadsheet::Read->VERSION >>
1469							returns and is only kept for backward compatibility reasons.
1470
1471							=head3 sheets
1472
1473							my $sheets = $book->sheets; # OO
1474							my @sheets = $book->sheets; # OO
1475
1476							In scalar context return the number of sheets in the book.
1477							In list context return the labels of the sheets in the book.
1478
1479							=head3 sheet
1480
1481							my $sheet = $book->sheet (1); # OO
1482							my $sheet = $book->sheet ("Foo"); # OO
1483
1484							Return the numbered or named sheet out of the book. Will return C<undef> if
1485							there is no match. Will not work for sheets I<named> with a number between 1
1486							and the number of sheets in the book.
1487
1488							With named sheets will first try to use the list of sheet-labels as stored in
1489							the control structure. If no match is found, it will scan the actual labels
1490							of the sheets. In that case, it will return the first matching sheet.
1491
1492							If defined, the returned sheet will be of class C<Spreadsheet::Read::Sheet>.
1493
1494							=head3 add
1495
1496							my $book = ReadData ("file.csv");
1497							Spreadsheet::Read::add ($book, "file.xlsx");
1498
1499							my $book = Spreadsheet::Read->new ("file.csv");
1500							$book->add ("file.xlsx"); # OO
1501
1502							=head2 Methods on sheets
1503
1504							=head3 maxcol
1505
1506							my $col = $sheet->maxcol;
1507
1508							Return the index of the last in-use column in the sheet. This index is 1-based.
1509
1510							=head3 maxrow
1511
1512							my $row = $sheet->maxrow;
1513
1514							Return the index of the last in-use row in the sheet. This index is 1-based.
1515
1516							=head3 cell
1517
1518							my $cell = $sheet->cell ("A3");
1519							my $cell = $sheet->cell (1, 3);
1520
1521							Return the value for a cell. Using tags will return the formatted value,
1522							using column and row will return unformatted value.
1523
1524							=head3 attr
1525
1526							my $cell = $sheet->attr ("A3");
1527							my $cell = $sheet->attr (1, 3);
1528
1529							Return the attributes of a cell. Only valid if attributes are enabled through
1530							option C<attr>.
1531
1532							=head3 col2label
1533
1534							my $col_id = $sheet->col2label (col);
1535
1536							C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1537							traditional cell notation:
1538
1539							my $id = $sheet->col2label ( 4); # $id now "D"
1540							my $id = $sheet->col2label (28); # $id now "AB"
1541
1542							=head3 cr2cell
1543
1544							my $cell = $sheet->cr2cell (col, row);
1545
1546							C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1547							traditional cell notation:
1548
1549							my $cell = $sheet->cr2cell ( 4, 14); # $cell now "D14"
1550							my $cell = $sheet->cr2cell (28, 4); # $cell now "AB4"
1551
1552							=head3 cell2cr
1553
1554							my ($col, $row) = $sheet->cell2cr ($cell);
1555
1556							C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1557							pair (1 based):
1558
1559							my ($col, $row) = $sheet->cell2cr ("D14"); # returns ( 4, 14)
1560							my ($col, $row) = $sheet->cell2cr ("AB4"); # returns (28, 4)
1561
1562							=head3 col
1563
1564							my @col = $sheet->column ($col);
1565
1566							Get full column of formatted values (like C<< $sheet->{C1} .. $sheet->{C9} >>)
1567
1568							Note that the indexes in the returned list are 0-based.
1569
1570							=head3 cellcolumn
1571
1572							my @col = $sheet->cellcolumn ($col);
1573
1574							Get full column of unformatted values (like C<< $sheet->{cell}[3][1] .. $sheet->{cell}[3][9] >>)
1575
1576							Note that the indexes in the returned list are 0-based.
1577
1578							=head3 row
1579
1580							my @row = $sheet->row ($row);
1581
1582							Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1583
1584							Note that the indexes in the returned list are 0-based.
1585
1586							=head3 cellrow
1587
1588							my @row = $sheet->cellrow ($row);
1589
1590							Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1591
1592							Note that the indexes in the returned list are 0-based.
1593
1594							=head3 rows
1595
1596							my @rows = $sheet->rows ();
1597
1598							Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1599
1600							Note that the indexes in the returned list are 0-based, where the
1601							index in the C<{cell}> entry is 1-based.
1602
1603							=head3 label
1604
1605							my $label = $sheet->label;
1606							$sheet->label ("New sheet label");
1607
1608							Set a new label to a sheet. Note that the index in the control structure will
1609							I<NOT> be updated.
1610
1611							=head3 active
1612
1613							my $sheet_is_active = $sheet->active;
1614
1615							Returns 1 if the selected sheet is active, otherwise returns 0.
1616
1617							Currently only works on XLS (as of Spreadsheed::ParseExcel-0.61).
1618							CSV is always active.
1619
1620							=head2 Using CSV
1621
1622							In case of CSV parsing, L<C<ReadData>\|/ReadData> will use the first line of
1623							the file to auto-detect the separation character if the first argument is a
1624							file and both C<sep> and C<quote> are not passed as attributes.
1625							L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS> (or
1626							L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV_PP>) is able to
1627							automatically detect and use C<\r> line endings.
1628
1629							CSV can parse streams too, but be sure to pass C<sep> and/or C<quote> if
1630							these do not match the default C<,> and C<">.
1631
1632							When an error is found in the CSV, it is automatically reported (to STDERR).
1633							The structure will store the error in C<< $ss->[0]{error} >> as anonymous
1634							list returned by
1635							L<C<< $csv->error_diag >>\|https://metacpan.org/pod/Text::CSV_XS#error_diag>.
1636							See L<Text::CSV_XS\|https://metacpan.org/pod/Text-CSV_XS> for documentation.
1637
1638							my $ss = ReadData ("bad.csv");
1639							$ss->[0]{error} and say $ss->[0]{error}[1];
1640
1641							As CSV has no sheet labels, the default label for a CSV sheet is its filename.
1642							For CSV, this can be overruled using the I<label> attribute:
1643
1644							my $ss = Spreadsheet::Read->new ("/some/place/test.csv", label => "Test");
1645
1646							=head2 Cell Attributes
1647							X<attr>
1648
1649							If the constructor was called with C<attr> having a true value,
1650
1651							my $book = ReadData ("book.xls", attr => 1);
1652							my $book = Spreadsheet::Read->new ("book.xlsx", attr => 1);
1653
1654							effort is made to analyze and store field attributes like this:
1655
1656							{ label => "Sheet 1",
1657							maxrow => 5,
1658							maxcol => 2,
1659							cell => [ undef,
1660							[ undef, 1 ],
1661							[ undef, undef, undef, undef, undef, "Nugget" ],
1662							],
1663							attr => [ undef,
1664							[ undef, {
1665							type => "numeric",
1666							fgcolor => "#ff0000",
1667							bgcolor => undef,
1668							font => "Arial",
1669							size => undef,
1670							format => "## ##0.00",
1671							halign => "right",
1672							valign => "top",
1673							uline => 0,
1674							bold => 0,
1675							italic => 0,
1676							wrap => 0,
1677							merged => 0,
1678							hidden => 0,
1679							locked => 0,
1680							enc => "utf-8",
1681							}, ]
1682							[ undef, undef, undef, undef, undef, {
1683							type => "text",
1684							fgcolor => "#e2e2e2",
1685							bgcolor => undef,
1686							font => "Letter Gothic",
1687							size => 15,
1688							format => undef,
1689							halign => "left",
1690							valign => "top",
1691							uline => 0,
1692							bold => 0,
1693							italic => 0,
1694							wrap => 0,
1695							merged => 0,
1696							hidden => 0,
1697							locked => 0,
1698							enc => "iso8859-1",
1699							}, ]
1700							merged => [],
1701							A1 => 1,
1702							B5 => "Nugget",
1703							},
1704
1705							The entries C<maxrow> and C<maxcol> are 1-based.
1706
1707							This has now been partially implemented, mainly for Excel, as the other
1708							parsers do not (yet) support all of that. YMMV.
1709
1710							If a cell itself is not hidden, but the parser holds the information that
1711							either the row or the column (or both) the field is in is hidden, the flag
1712							is inherited into the cell attributes.
1713
1714							You can get the attributes of a cell (as a hash-ref) like this:
1715
1716							my $attr = $book[1]{attr}[1][3]; # Direct structure
1717							my $attr = $book->sheet (1)->attr (1, 3); # Same using OO
1718							my $attr = $book->sheet (1)->attr ("A3"); # Same using OO
1719
1720							To get to the C<font> attribute, use any of these:
1721
1722							my $font = $book[1]{attr}[1][3]{font};
1723							my $font = $book->sheet (1)->attr (1, 3)->{font};
1724							my $font = $book->sheet (1)->attr ("A3")->font;
1725
1726							=head3 Merged cells
1727							X<merged>
1728
1729							Note that only
1730							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>
1731							documents the use of merged cells, and not in a way useful for the spreadsheet
1732							consumer.
1733
1734							CSV does not support merged cells (though future implementations of CSV
1735							for the web might).
1736
1737							The documentation of merged areas in
1738							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel> and
1739							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX> can
1740							be found in
1741							L<Spreadsheet::ParseExcel::Worksheet\|https://metacpan.org/release/Spreadsheet-ParseExcel-Worksheet>
1742							and L<Spreadsheet::ParseExcel::Cell\|https://metacpan.org/release/Spreadsheet-ParseExcel-Cell>.
1743
1744							None of basic L<Spreadsheet::XLSX\|https://metacpan.org/release/Spreadsheet-XLSX>,
1745							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel>, and
1746							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX> manual
1747							pages mention merged cells at all.
1748
1749							This module just tries to return the information in a generic way.
1750
1751							Given this spreadsheet as an example
1752
1753							merged.xlsx:
1754
1755							A B C
1756							+-----+-----------+
1757							1\| \| foo \|
1758							+-----+ +
1759							2\| bar \| \|
1760							\| +-----+-----+
1761							3\| \| urg \| orc \|
1762							+-----+-----+-----+
1763
1764							the information extracted from that undocumented information is
1765							returned in the C<merged> entry of the sheet's hash as a list of
1766							top-left, bottom-right coordinate pars (col, row, col, row). For
1767							given example, that would be:
1768
1769							$ss->{merged} = [
1770							[ 1, 2, 1, 3 ], # A2-A3
1771							[ 2, 1, 3, 2 ], # B1-C2
1772							];
1773
1774							When the attributes are also enabled, there is some merge information
1775							copied directly from the cell information, but again, that stems from
1776							code analysis and not from documentation:
1777
1778							my $ss = ReadData ("merged.xlsx", attr => 1)->[1];
1779							foreach my $row (1 .. $ss->{maxrow}) {
1780							foreach my $col (1 .. $ss->{maxcol}) {
1781							my $cell = cr2cell ($col, $row);
1782							printf "%s %-3s %d ", $cell, $ss->{$cell},
1783							$ss->{attr}[$col][$row]{merged};
1784							}
1785							print "\n";
1786							}
1787
1788							A1 0 B1 foo 1 C1 1
1789							A2 bar 1 B2 1 C2 1
1790							A3 1 B3 urg 0 C3 orc 0
1791
1792							In this example, there is no way to see if C<B2> is merged to C<A2> or
1793							to C<B1> without analyzing all surrounding cells. This could as well
1794							mean C<A2:A3>, C<B1:C1>, C<B2:C2>, as C<A2:A3>, C<B1:B2>, C<C1:C2>, as
1795							C<A2:A3>, C<B1:C2>.
1796							Use the L<C<merged>\|/merged> entry described above to find out what
1797							fields are merged to what other fields.
1798
1799							=head1 TOOLS
1800
1801							This modules comes with a few tools that perform tasks from the FAQ, like
1802							"How do I select only column D through F from sheet 2 into a CSV file?"
1803
1804							If the module was installed without the tools, you can find them here:
1805							https://github.com/Tux/Spreadsheet-Read/tree/master/examples
1806
1807							=head2 C<xlscat>
1808
1809							Show (parts of) a spreadsheet in plain text, CSV, or HTML
1810
1811							usage: xlscat [-s <sep>] [-L] [-n] [-A] [-u] [Selection] file.xls
1812							[-c \| -m] [-u] [Selection] file.xls
1813							-i [-S sheets] file.xls
1814							Generic options:
1815							-v[#] Set verbose level (xlscat/xlsgrep)
1816							-d[#] Set debug level (Spreadsheet::Read)
1817							-u Use unformatted values
1818							--noclip Do not strip empty sheets and
1819							trailing empty rows and columns
1820							-e <enc> Set encoding for input and output
1821							-b <enc> Set encoding for input
1822							-a <enc> Set encoding for output
1823							Input CSV:
1824							--in-sep=c Set input sep_char for CSV
1825							Input XLS:
1826							--dtfmt=fmt Specify the default date format to replace 'm-d-yy'
1827							the default replacement is 'yyyy-mm-dd'
1828							Output Text (default):
1829							-s <sep> Use separator <sep>. Default '\|', \n allowed
1830							-L Line up the columns
1831							-n [skip] Number lines (prefix with column number)
1832							optionally skip <skip> (header) lines
1833							-A Show field attributes in ANSI escapes
1834							-h[#] Show # header lines
1835							Output Index only:
1836							-i Show sheet names and size only
1837							Output CSV:
1838							-c Output CSV, separator = ','
1839							-m Output CSV, separator = ';'
1840							Output HTML:
1841							-H Output HTML
1842							Selection:
1843							-S <sheets> Only print sheets <sheets>. 'all' is a valid set
1844							Default only prints the first sheet
1845							-R <rows> Only print rows <rows>. Default is 'all'
1846							-C <cols> Only print columns <cols>. Default is 'all'
1847							-F <flds> Only fields <flds> e.g. -FA3,B16
1848							Ordering (column numbers in result set after selection):
1849							--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
1850							+# - first # lines do not sort (header)
1851							# - order on column # lexical ascending
1852							#n - order on column # numeric ascending
1853							#r - order on column # lexical descending
1854							#rn - order on column # numeric descending
1855
1856							=head2 C<xlsgrep>
1857
1858							Show (parts of) a spreadsheet that match a pattern in plain text, CSV, or HTML
1859
1860							usage: xlsgrep [-s <sep>] [-L] [-n] [-A] [-u] [Selection] pattern file.xls
1861							[-c \| -m] [-u] [Selection] pattern file.xls
1862							-i [-S sheets] pattern file.xls
1863							Generic options:
1864							-v[#] Set verbose level (xlscat/xlsgrep)
1865							-d[#] Set debug level (Spreadsheet::Read)
1866							-u Use unformatted values
1867							--noclip Do not strip empty sheets and
1868							trailing empty rows and columns
1869							-e <enc> Set encoding for input and output
1870							-b <enc> Set encoding for input
1871							-a <enc> Set encoding for output
1872							Input CSV:
1873							--in-sep=c Set input sep_char for CSV
1874							Input XLS:
1875							--dtfmt=fmt Specify the default date format to replace 'm-d-yy'
1876							the default replacement is 'yyyy-mm-dd'
1877							Output Text (default):
1878							-s <sep> Use separator <sep>. Default '\|', \n allowed
1879							-L Line up the columns
1880							-n [skip] Number lines (prefix with column number)
1881							optionally skip <skip> (header) lines
1882							-A Show field attributes in ANSI escapes
1883							-h[#] Show # header lines
1884							Grep options:
1885							-i Ignore case
1886							-w Match whole words only
1887							Output CSV:
1888							-c Output CSV, separator = ','
1889							-m Output CSV, separator = ';'
1890							Output HTML:
1891							-H Output HTML
1892							Selection:
1893							-S <sheets> Only print sheets <sheets>. 'all' is a valid set
1894							Default only prints the first sheet
1895							-R <rows> Only print rows <rows>. Default is 'all'
1896							-C <cols> Only print columns <cols>. Default is 'all'
1897							-F <flds> Only fields <flds> e.g. -FA3,B16
1898							Ordering (column numbers in result set after selection):
1899							--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
1900							+# - first # lines do not sort (header)
1901							# - order on column # lexical ascending
1902							#n - order on column # numeric ascending
1903							#r - order on column # lexical descending
1904							#rn - order on column # numeric descending
1905
1906							=head2 C<xls2csv>
1907
1908							Convert a spreadsheet to CSV. This is just a small wrapper over C<xlscat>.
1909
1910							usage: xls2csv [ -o file.csv ] file.xls
1911
1912							=head2 C<ss2tk>
1913
1914							Show a spreadsheet in a perl/Tk spreadsheet widget
1915
1916							usage: ss2tk [-w <width>] [X11 options] file.xls [<pattern>]
1917							-w <width> use <width> as default column width (4)
1918
1919							=head2 C<ssdiff>
1920
1921							Show the differences between two spreadsheets.
1922
1923							usage: examples/ssdiff [--verbose[=1]] file.xls file.xlsx
1924
1925							=head1 TODO
1926
1927							=over 4
1928
1929							=item Options
1930
1931							=over 2
1932
1933							=item Module Options
1934
1935							New Spreadsheet::Read options are bound to happen. I'm thinking of an
1936							option that disables the reading of the data entirely to speed up an
1937							index request (how many sheets/fields/columns). See C<xlscat -i>.
1938
1939							=item Parser options
1940
1941							Try to transparently support as many options as the encapsulated modules
1942							support regarding (un)formatted values, (date) formats, hidden columns
1943							rows or fields etc. These could be implemented like C<attr> above but
1944							names C<meta>, or just be new values in the C<attr> hashes.
1945
1946							=back
1947
1948							=item Other parsers
1949
1950							Add support for new(er) parsers for already supported formats, like
1951
1952							=over 2
1953
1954							=item Data::XLSX::Parser
1955
1956							Data::XLSX::Parser provides faster way to parse Microsoft Excel's .xlsx
1957							files. The implementation of this module is highly inspired from Python's
1958							FastXLSX library.
1959
1960							This is SAX based parser, so you can parse very large XLSX file with
1961							lower memory usage.
1962
1963							=back
1964
1965							=item Other spreadsheet formats
1966
1967							I consider adding any spreadsheet interface that offers a usable API.
1968
1969							Under investigation:
1970
1971							=over 2
1972
1973							=item Gnumeric (.gnumeric)
1974
1975							I have seen no existing CPAN module yet.
1976
1977							It is gzip'ed XML
1978
1979							=item Kspread (.ksp)
1980
1981							Now knows as Calligra Sheets.
1982
1983							I have seen no existing CPAN module yet.
1984
1985							It is XML in ZIP
1986
1987							=back
1988
1989							=item Alternative parsers for existing formats
1990
1991							As long as the alternative has a good reason for its existence, and the
1992							API of that parser reasonable fits in my approach, I will consider to
1993							implement the glue layer, or apply patches to do so as long as these
1994							match what F<CONTRIBUTING.md> describes.
1995
1996							=back
1997
1998							=head1 SEE ALSO
1999
2000							=over 2
2001
2002							=item Text::CSV_XS, Text::CSV_PP
2003
2004							See L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS> ,
2005							L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV_PP> , and
2006							L<Text::CSV\|https://metacpan.org/release/Text-CSV> documentation.
2007
2008							L<Text::CSV\|https://metacpan.org/release/Text-CSV> is a wrapper over Text::CSV_XS (the fast XS version) and/or
2009							L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV_PP> (the pure perl version).
2010
2011							=item Spreadsheet::ParseExcel
2012
2013							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel> is
2014							the best parser for old-style Microsoft Excel (.xls) files.
2015
2016							=item Spreadsheet::ParseXLSX
2017
2018							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX> is
2019							like L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel>,
2020							but for new Microsoft Excel 2007+ files (.xlsx). They have the same API.
2021
2022							This module uses L<XML::Twig\|https://metacpan.org/release/XML-Twig> to parse the
2023							internal XML.
2024
2025							=item Spreadsheet::XLSX
2026
2027							See L<Spreadsheet::XLSX\|https://metacpan.org/release/Spreadsheet-XLSX>
2028							documentation.
2029
2030							This module is dead and deprecated. It is B<buggy and unmaintained>. I<Please>
2031							use L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX>
2032							instead.
2033
2034							=item Spreadsheet::ReadSXC
2035
2036							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC> is a
2037							parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files.
2038
2039							=item Spreadsheet::BasicRead
2040
2041							See L<Spreadsheet::BasicRead\|https://metacpan.org/release/Spreadsheet-BasicRead>
2042							for xlscat-like functionality (Excel only)
2043
2044							=item Spreadsheet::ConvertAA
2045
2046							See L<Spreadsheet::ConvertAA\|https://metacpan.org/release/Spreadsheet-ConvertAA>
2047							for an alternative set of L</cell2cr>/L</cr2cell> pair.
2048
2049							=item Spreadsheet::Perl
2050
2051							L<Spreadsheet::Perl\|https://metacpan.org/release/Spreadsheet-Perl> offers a Pure
2052							Perl implementation of a spreadsheet engine. Users that want this format to be
2053							supported in Spreadsheet::Read are hereby motivated to offer patches. It is
2054							not high on my TODO-list.
2055
2056							=item Spreadsheet::CSV
2057
2058							L<Spreadsheet::CSV\|https://metacpan.org/release/Spreadsheet-CSV> offers the
2059							interesting approach of seeing all supported spreadsheet formats as if it were
2060							CSV, mimicking the L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS>
2061							interface.
2062
2063							=item xls2csv
2064
2065							L<xls2csv\|https://metacpan.org/release/xls2csv> offers an alternative for my
2066							C<xlscat -c>, in the xls2csv tool, but this tool focuses on character encoding
2067							transparency, and requires some other modules.
2068
2069							=back
2070
2071							=head1 AUTHOR
2072
2073							H.Merijn Brand, <h.m.brand@xs4all.nl>
2074
2075							=head1 COPYRIGHT AND LICENSE
2076
2077							Copyright (C) 2005-2018 H.Merijn Brand
2078
2079							This library is free software; you can redistribute it and/or modify
2080							it under the same terms as Perl itself.
2081
2082							=cut