File Coverage

blib/lib/Spreadsheet/Read.pm
Criterion Covered Total %
statement 458 539 84.9
branch 394 528 74.6
condition 156 224 69.6
subroutine 46 46 100.0
pod 13 13 100.0
total 1067 1350 79.0


line stmt bran cond sub pod time code
1             #!/pro/bin/perl
2              
3             package Spreadsheet::Read;
4              
5             =head1 NAME
6              
7             Spreadsheet::Read - Read the data from a spreadsheet
8              
9             =head1 SYNOPSIS
10              
11             use Spreadsheet::Read;
12             my $book = ReadData ("test.csv", sep => ";");
13             my $book = ReadData ("test.sxc");
14             my $book = ReadData ("test.ods");
15             my $book = ReadData ("test.xls");
16             my $book = ReadData ("test.xlsx");
17             my $book = ReadData ($fh, parser => "xls");
18              
19             Spreadsheet::Read::add ($book, "sheet.csv");
20              
21             my $sheet = $book->[1]; # first datasheet
22             my $cell = $book->[1]{A3}; # content of field A3 of sheet 1
23             my $cell = $book->[1]{cell}[1][3]; # same, unformatted
24              
25             # OO API
26             my $book = Spreadsheet::Read->new ("file.csv");
27             my $sheet = $book->sheet (1);
28             my $cell = $sheet->cell ("A3");
29             my $cell = $sheet->cell (1, 3);
30              
31             $book->add ("test.xls");
32              
33             =cut
34              
35 53     53   5239645 use 5.8.1;
  53         714  
36 53     53   297 use strict;
  53         109  
  53         1255  
37 53     53   281 use warnings;
  53         120  
  53         3448  
38              
39             our $VERSION = "0.81";
40 1     1 1 131 sub Version { $VERSION }
41              
42 53     53   400 use Carp;
  53         152  
  53         3471  
43 53     53   379 use Exporter;
  53         113  
  53         4949  
44             our @ISA = qw( Exporter );
45             our @EXPORT = qw( ReadData cell2cr cr2cell );
46             our @EXPORT_OK = qw( parses rows cellrow row add );
47              
48 53     53   29800 use Encode qw( decode );
  53         826256  
  53         4470  
49 53     53   47041 use File::Temp qw( );
  53         1145819  
  53         1811  
50 53     53   33881 use Data::Dumper;
  53         332531  
  53         56844  
51              
52             my @parsers = (
53             [ csv => "Text::CSV_XS", "0.71" ],
54             [ csv => "Text::CSV_PP", "1.17" ],
55             [ csv => "Text::CSV", "1.17" ],
56             [ ods => "Spreadsheet::ReadSXC", "0.20" ],
57             [ sxc => "Spreadsheet::ReadSXC", "0.20" ],
58             [ xls => "Spreadsheet::ParseExcel", "0.34" ],
59             [ xlsx => "Spreadsheet::ParseXLSX", "0.24" ],
60             [ xlsx => "Spreadsheet::XLSX", "0.13" ],
61             [ prl => "Spreadsheet::Perl", "" ],
62              
63             # Helper modules
64             [ ios => "IO::Scalar", "" ],
65             [ dmp => "Data::Peek", "" ],
66             );
67             my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers });
68             foreach my $p (@parsers) {
69             my $format = $p->[0];
70             $can{$format} and next;
71             $can{$format} = "";
72             my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next;
73             my $min_version = $can{supports}{$preset};
74             unless ($min_version) {
75             # Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle"
76             $can{$format} = "!$preset is not supported for the $format format";
77             next;
78             }
79             if (eval "local \$_; require $preset" and not $@) {
80             # forcing a parser should still check the version
81             my $ok;
82             my $has = $preset->VERSION;
83             $has =~ s/_[0-9]+$//; # Remove beta-part
84             if ($min_version =~ m/^v([0-9.]+)/) { # clumsy versions
85             my @min = split m/\./ => $1;
86             $has =~ s/^v//;
87             my @has = split m/\./ => $has;
88             $ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >=
89             (($min[0] * 1000 + $min[1]) * 1000 + $min[2]);
90             }
91             else { # normal versions
92             $ok = $has >= $min_version;
93             }
94             $ok or $preset = "!$preset";
95             }
96             else {
97             $preset = "!$preset";
98             }
99             $can{$format} = $preset;
100             }
101             delete $can{supports};
102             for (@parsers) {
103             my ($flag, $mod, $vsn) = @$_;
104             $can{$flag} and next;
105             eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" or
106             $_->[0] = "! Cannot use $mod version $vsn: $@";
107             }
108             $can{sc} = __PACKAGE__; # SquirelCalc is built-in
109              
110             defined $Spreadsheet::ParseExcel::VERSION && $Spreadsheet::ParseExcel::VERSION < 0.61 and
111             *Spreadsheet::ParseExcel::Workbook::get_active_sheet = sub { undef; };
112              
113             my $debug = 0;
114             my %def_opts = (
115             rc => 1,
116             cells => 1,
117             attr => 0,
118             clip => undef, # $opt{cells};
119             strip => 0,
120             pivot => 0,
121             dtfmt => "yyyy-mm-dd", # Format 14
122             debug => 0,
123             passwd => undef,
124             parser => undef,
125             sep => undef,
126             quote => undef,
127             label => undef,
128             );
129             my @def_attr = (
130             type => "text",
131             fgcolor => undef,
132             bgcolor => undef,
133             font => undef,
134             size => undef,
135             format => undef,
136             halign => "left",
137             valign => "top",
138             bold => 0,
139             italic => 0,
140             uline => 0,
141             wrap => 0,
142             merged => 0,
143             hidden => 0,
144             locked => 0,
145             enc => "utf-8", # $ENV{LC_ALL} // $ENV{LANG} // ...
146             formula => undef,
147             );
148              
149             # Helper functions
150              
151             sub _dump {
152 7     7   19 my ($label, $ref) = @_;
153 7 50       23 if ($can{dmp}) {
154 7         48 print STDERR Data::Peek::DDumper ({ $label => $ref });
155             }
156             else {
157 0         0 print STDERR Data::Dumper->Dump ([$ref], [$label]);
158             }
159             } # _dump
160              
161             sub _parser {
162 342 100   342   1460 my $type = shift or return "";
163 100         308 $type = lc $type;
164             # Aliases and fullnames
165 100 100       343 $type eq "excel" and return "xls";
166 99 50       302 $type eq "excel2007" and return "xlsx";
167 99 100       307 $type eq "oo" and return "sxc";
168 98 100       296 $type eq "ods" and return "sxc";
169 96 100       292 $type eq "openoffice" and return "sxc";
170 95 50       291 $type eq "libreoffice" and return "sxc";
171 95 100       265 $type eq "perl" and return "prl";
172 94 50       261 $type eq "squirelcalc" and return "sc";
173 94 100       558 return exists $can{$type} ? $type : "";
174             } # _parser
175              
176             sub new {
177 12     12 1 6892 my $class = shift;
178 12         49 my $r = ReadData (@_);
179 12 100       76 unless ($r) {
180 2 50       17 @_ and return; # new with arguments failed to open resource
181 0         0 $r = [{
182             parsers => [],
183             error => undef,
184             sheets => 0,
185             sheet => { },
186             }];
187             }
188 10         212 bless $r => $class;
189             } # new
190              
191             # Spreadsheet::Read::parses ("csv") or die "Cannot parse CSV"
192             sub parses {
193 63 100   63 1 145584 ref $_[0] eq __PACKAGE__ and shift;
194 63 100       277 my $type = _parser (shift) or return 0;
195 61 100       407 if ($can{$type} =~ m/^!\s*(.*)/) {
196 8         32 $@ = $1;
197 8         28 return 0;
198             }
199 53         183 return $can{$type};
200             } # parses
201              
202             sub sheets {
203 4     4 1 14 my $ctrl = shift->[0];
204 4         6 my %s = %{$ctrl->{sheet}};
  4         19  
205 4 100       35 wantarray ? sort { $s{$a} <=> $s{$b} } keys %s : $ctrl->{sheets};
  4         26  
206             } # sheets
207              
208             # col2label (4) => "D"
209             sub col2label {
210 15697 100   15697 1 32919 ref $_[0] eq __PACKAGE__ and shift;
211 15697         20483 my $c = shift;
212 15697 100 100     41042 defined $c && $c > 0 or return "";
213 15693         21423 my $cell = "";
214 15693         26123 while ($c) {
215 53     53   30000 use integer;
  53         836  
  53         301  
216              
217 15710         29729 substr $cell, 0, 0, chr (--$c % 26 + ord "A");
218 15710         27647 $c /= 26;
219             }
220 15693         42026 $cell;
221             } # col2label
222              
223             # cr2cell (4, 18) => "D18"
224             # No prototype to allow 'cr2cell (@rowcol)'
225             sub cr2cell {
226 15495 100   15495 1 189546 ref $_[0] eq __PACKAGE__ and shift;
227 15495         25206 my ($c, $r) = @_;
228 15495 100 100     72297 defined $c && defined $r && $c > 0 && $r > 0 or return "";
      100        
      100        
229 15489         24027 col2label ($c) . $r;
230             } # cr2cell
231              
232             # cell2cr ("D18") => (4, 18)
233             sub cell2cr {
234 604 100   604 1 610707 ref $_[0] eq __PACKAGE__ and shift;
235 604 100 100     5633 my ($cc, $r) = (uc ($_[0]||"") =~ m/^([A-Z]+)([0-9]+)$/) or return (0, 0);
236 600         1241 my $c = 0;
237 600         2499 while ($cc =~ s/^([A-Z])//) {
238 608         2497 $c = 26 * $c + 1 + ord ($1) - ord ("A");
239             }
240 600         2112 ($c, $r);
241             } # cell2cr
242              
243             # my @row = cellrow ($book->[1], 1);
244             # my @row = $book->cellrow (1, 1);
245             sub cellrow {
246 10 100   10 1 69 my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
    100          
247 9 100 66     65 ref $sheet eq "HASH" && exists $sheet->{cell} or return;
248 7 50 33     39 exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
249 7 100       26 my $row = shift or return;
250 6 100 100     39 $row > 0 && $row <= $sheet->{maxrow} or return;
251 4         12 my $s = $sheet->{cell};
252 4         17 map { $s->[$_][$row] } 1..$sheet->{maxcol};
  76         147  
253             } # cellrow
254              
255             # my @row = row ($book->[1], 1);
256             # my @row = $book->row (1, 1);
257             sub row {
258 10 100   10 1 3508 my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
    100          
259 9 100 66     65 ref $sheet eq "HASH" && exists $sheet->{cell} or return;
260 7 50 33     39 exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
261 7 100       48 my $row = shift or return;
262 6 100 100     40 $row > 0 && $row <= $sheet->{maxrow} or return;
263 4         23 map { $sheet->{cr2cell ($_, $row)} } 1..$sheet->{maxcol};
  76         134  
264             } # row
265              
266             # Convert {cell}'s [column][row] to a [row][column] list
267             # my @rows = rows ($book->[1]);
268             sub rows {
269 33 50   33 1 19369 my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
    100          
270 30 100 100     169 ref $sheet eq "HASH" && exists $sheet->{cell} or return;
271 26 100 100     120 exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
272 5         15 my $s = $sheet->{cell};
273              
274             map {
275 20         33 my $r = $_;
276 20         36 [ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
  80         205  
277 5         28 } 1..$sheet->{maxrow};
278             } # rows
279              
280             sub sheet {
281 23     23 1 650 my ($book, $sheet) = @_;
282 23 100 100     116 $book && $sheet or return;
283 21         43 my $class = "Spreadsheet::Read::Sheet";
284             $sheet =~ m/^[0-9]+$/ && $sheet >= 1 && $sheet <= $book->[0]{sheets} and
285 21 100 66     301 return bless $book->[$sheet] => $class;
      100        
286             exists $book->[0]{sheet}{$sheet} and
287 6 100       26 return bless $book->[$book->[0]{sheet}{$sheet}] => $class;
288 5         17 foreach my $idx (1 .. $book->[0]{sheets}) {
289 6 100       24 $book->[$idx]{label} eq $sheet and
290             return bless $book->[$idx] => $class;
291             }
292 4         19 return;
293             } # sheet
294              
295             # If option "clip" is set, remove the trailing rows and
296             # columns in each sheet that contain no visible data
297             sub _clipsheets {
298 227     227   616 my ($opt, $ref) = @_;
299              
300 227 50       766 $ref->[0]{sheets} or return $ref;
301              
302 227         610 my ($rc, $cl) = ($opt->{rc}, $opt->{cells});
303 227         646 my ($oc, $os, $oa) = ($opt->{clip}, $opt->{strip}, $opt->{attr});
304              
305             # Strip leading/trailing spaces
306 227 100 100     1020 if ($os || $oc) {
307 212         703 foreach my $sheet (1 .. $ref->[0]{sheets}) {
308 254         659 $ref->[$sheet]{indx} = $sheet;
309 254         448 my $ss = $ref->[$sheet];
310 254 100 66     1331 $ss->{maxrow} && $ss->{maxcol} or next;
311 247         587 my ($mc, $mr) = (0, 0);
312 247         656 foreach my $row (1 .. $ss->{maxrow}) {
313 1954         3372 foreach my $col (1 .. $ss->{maxcol}) {
314 16636 100       26429 if ($rc) {
315 14738 100       26967 defined $ss->{cell}[$col][$row] or next;
316 5881 100       10285 $os & 2 and $ss->{cell}[$col][$row] =~ s/\s+$//;
317 5881 100       9764 $os & 1 and $ss->{cell}[$col][$row] =~ s/^\s+//;
318 5881 100       10671 if (length $ss->{cell}[$col][$row]) {
319 4417 100       7531 $col > $mc and $mc = $col;
320 4417 100       7463 $row > $mr and $mr = $row;
321             }
322             }
323 7779 100       12465 if ($cl) {
324 7560         11687 my $cell = cr2cell ($col, $row);
325 7560 100       15679 defined $ss->{$cell} or next;
326 6233 100       11280 $os & 2 and $ss->{$cell} =~ s/\s+$//;
327 6233 100       10084 $os & 1 and $ss->{$cell} =~ s/^\s+//;
328 6233 100       11221 if (length $ss->{$cell}) {
329 4757 100       7763 $col > $mc and $mc = $col;
330 4757 100       9090 $row > $mr and $mr = $row;
331             }
332             }
333             }
334             }
335              
336 247 100 66     1430 $oc && ($mc < $ss->{maxcol} || $mr < $ss->{maxrow}) or next;
      66        
337              
338             # Remove trailing empty columns
339 106         277 foreach my $col (($mc + 1) .. $ss->{maxcol}) {
340 243 100       627 $rc and undef $ss->{cell}[$col];
341 243 100       518 $oa and undef $ss->{attr}[$col];
342 243 100       462 $cl or next;
343 193         333 my $c = col2label ($col);
344 193         1424 delete $ss->{"$c$_"} for 1 .. $ss->{maxrow};
345             }
346              
347             # Remove trailing empty rows
348 106         285 foreach my $row (($mr + 1) .. $ss->{maxrow}) {
349 139         271 foreach my $col (1 .. $mc) {
350 64 50       143 $cl and delete $ss->{cr2cell ($col, $row)};
351 64 50       183 $rc and undef $ss->{cell} [$col][$row];
352 64 50       152 $oa and undef $ss->{attr} [$col][$row];
353             }
354             }
355              
356 106         330 ($ss->{maxrow}, $ss->{maxcol}) = ($mr, $mc);
357             }
358             }
359              
360 227 100       657 if ($opt->{pivot}) {
361 1         5 foreach my $sheet (1 .. $ref->[0]{sheets}) {
362 1         4 my $ss = $ref->[$sheet];
363 1 0 33     4 $ss->{maxrow} || $ss->{maxcol} or next;
364 1 50       6 my $mx = $ss->{maxrow} > $ss->{maxcol} ? $ss->{maxrow} : $ss->{maxcol};
365 1         5 foreach my $row (2 .. $mx) {
366 18         40 foreach my $col (1 .. ($row - 1)) {
367             $opt->{rc} and
368             ($ss->{cell}[$col][$row], $ss->{cell}[$row][$col]) =
369 171 50       527 ($ss->{cell}[$row][$col], $ss->{cell}[$col][$row]);
370             $opt->{cells} and
371             ($ss->{cr2cell ($col, $row)}, $ss->{cr2cell ($row, $col)}) =
372 171 50       352 ($ss->{cr2cell ($row, $col)}, $ss->{cr2cell ($col, $row)});
373             }
374             }
375 1         9 ($ss->{maxcol}, $ss->{maxrow}) = ($ss->{maxrow}, $ss->{maxcol});
376             }
377             }
378              
379 227         5657 $ref;
380             } # _clipsheets
381              
382             # Convert a single color (index) to a color
383             sub _xls_color {
384 1742     1742   21196 my $clr = shift;
385 1742 100       3558 defined $clr or return undef;
386 850 50       1532 $clr eq "#000000" and return undef;
387 850 50       1689 $clr =~ m/^#[0-9a-fA-F]+$/ and return lc $clr;
388 850 100 66     2901 $clr == 0 || $clr == 32767 and return undef; # Default fg color
389 468         1142 return "#" . lc Spreadsheet::ParseExcel->ColorIdxToRGB ($clr);
390             } # _xls_color
391              
392             # Convert a fill [ $pattern, $front_color, $back_color ] to a single background
393             sub _xls_fill {
394 1526     1526   2743 my ($p, $fg, $bg) = @_;
395 1526 50       2702 defined $p or return undef;
396 1526 50       2759 $p == 32767 and return undef; # Default fg color
397 1526 100 100     17004 $p == 0 && !defined $bg and return undef; # No fill bg color
398 634 100       1152 $p == 1 and return _xls_color ($fg);
399 418 50 33     6280 $bg < 8 || $bg > 63 and return undef; # see Workbook.pm#106
400 0         0 return _xls_color ($bg);
401             } # _xls_fill
402              
403             sub ReadData {
404 279 100   279 1 301347 my $txt = shift or return;
405              
406 276         637 my %opt;
407 276 100       856 if (@_) {
408 221 100       1175 if (ref $_[0] eq "HASH") { %opt = %{shift @_} }
  20 50       35  
  20         86  
409 201         836 elsif (@_ % 2 == 0) { %opt = @_ }
410             }
411              
412 276 100       1205 exists $opt{rc} or $opt{rc} = $def_opts{rc};
413 276 100       968 exists $opt{cells} or $opt{cells} = $def_opts{cells};
414 276 100       899 exists $opt{attr} or $opt{attr} = $def_opts{attr};
415 276 100       796 exists $opt{clip} or $opt{clip} = $opt{cells};
416 276 100       839 exists $opt{strip} or $opt{strip} = $def_opts{strip};
417 276 100       954 exists $opt{dtfmt} or $opt{dtfmt} = $def_opts{dtfmt};
418              
419             # $debug = $opt{debug} // 0;
420 276 100       808 $debug = defined $opt{debug} ? $opt{debug} : $def_opts{debug};
421 276 100       745 $debug > 4 and _dump (Options => \%opt);
422              
423 31         105 my %parser_opts = map { $_ => $opt{$_} }
424 276         2651 grep { !exists $def_opts{$_} }
  1764         3624  
425             keys %opt;
426              
427 276         1162 my $_parser = _parser ($opt{parser});
428              
429 276 100       1146 my $io_ref = ref ($txt) =~ m/GLOB|IO/ ? $txt : undef;
430             my $io_fil = $io_ref ? 0 : $txt =~ m/\0/ ? 0
431 53 100   53   115311 : do { no warnings "newline"; -f $txt };
  53 100       175  
  53         282478  
  276         1062  
  255         6142  
432 276 100 100     1781 my $io_txt = $io_ref || $io_fil ? 0 : 1;
433              
434 276 100 100     3276 $io_fil && ! -s $txt and do { $@ = "$txt is empty"; return };
  11         60  
  11         65  
435 265 100 100     1540 $io_ref && eof $txt and do { $@ = "Empty stream"; return };
  5         20  
  5         86  
436              
437 260 100 100     2194 if ($opt{parser} ? $_parser eq "csv" : ($io_fil && $txt =~ m/\.(csv)$/i)) {
    100          
438 106 50       378 $can{csv} or croak "CSV parser not installed";
439              
440 106 100       410 my $label = defined $opt{label} ? $opt{label} : $io_fil ? $txt : "IO";
    100          
441              
442 106 50       411 $debug and print STDERR "Opening CSV $label using $can{csv}-", $can{csv}->VERSION, "\n";
443              
444             my @data = (
445             { type => "csv",
446             parser => $can{csv},
447             version => $can{csv}->VERSION,
448             parsers => [ {
449             type => "csv",
450             parser => $can{csv},
451             version => $can{csv}->VERSION,
452 106         3423 }],
453             error => undef,
454             quote => '"',
455             sepchar => ',',
456             sheets => 1,
457             sheet => { $label => 1 },
458             },
459             { parser => 0,
460             label => $label,
461             maxrow => 0,
462             maxcol => 0,
463             cell => [],
464             attr => [],
465             merged => [],
466             active => 1,
467             },
468             );
469              
470 106         560 my ($sep, $quo, $in) = (",", '"');
471 106 100       332 defined $opt{sep} and $sep = $opt{sep};
472 106 100       281 defined $opt{quote} and $quo = $opt{quote};
473 106 50       311 $debug > 8 and _dump (debug => {
474             data => \@data, txt => $txt, io_ref => $io_ref, io_fil => $io_fil });
475 106 100       322 if ($io_fil) {
    100          
    100          
    50          
476 94 100 100     306 unless (defined $opt{quote} && defined $opt{sep}) {
477 87 50       3736 open $in, "<", $txt or return;
478 87         1831 my $l1 = <$in>;
479              
480 87 100       439 $quo = defined $opt{quote} ? $opt{quote} : '"';
481             $sep = # If explicitly set, use it
482             defined $opt{sep} ? $opt{sep} :
483             # otherwise start auto-detect with quoted strings
484 87 50       913 $l1 =~ m/["0-9];["0-9;]/ ? ";" :
    100          
    100          
    50          
    50          
    50          
    100          
    100          
    50          
485             $l1 =~ m/["0-9],["0-9,]/ ? "," :
486             $l1 =~ m/["0-9]\t["0-9,]/ ? "\t" :
487             $l1 =~ m/["0-9]\|["0-9,]/ ? "|" :
488             # If neither, then for unquoted strings
489             $l1 =~ m/\w;[\w;]/ ? ";" :
490             $l1 =~ m/\w,[\w,]/ ? "," :
491             $l1 =~ m/\w\t[\w,]/ ? "\t" :
492             $l1 =~ m/\w\|[\w,]/ ? "|" :
493             "," ;
494 87         1348 close $in;
495             }
496 94 50       2882 open $in, "<", $txt or return;
497             }
498             elsif ($io_ref) {
499 10         23 $in = $txt;
500             }
501             elsif (ref $txt eq "SCALAR") {
502 1 50       13 open $in, "<", $txt or croak "Cannot open input: $!";
503             }
504             elsif ($txt =~ m/[\r\n,;]/) {
505 1 50   2   54 open $in, "<", \$txt or croak "Cannot open input: $!";
  2         23  
  2         5  
  2         18  
506             }
507             else {
508 0         0 warn "Input type ", ref $txt,
509             " might not be supported. Please file a ticket\n";
510 0         0 $in = $txt; # Now pray ...
511             }
512 106 50       1409 $debug > 1 and print STDERR "CSV sep_char '$sep', quote_char '$quo'\n";
513             my $csv = $can{csv}->new ({
514             %parser_opts,
515              
516             sep_char => ($data[0]{sepchar} = $sep),
517 106 50       1555 quote_char => ($data[0]{quote} = $quo),
518             keep_meta_info => 1,
519             binary => 1,
520             auto_diag => 1,
521             }) or croak "Cannot create a csv ('$sep', '$quo') parser!";
522              
523 106         23943 while (my $row = $csv->getline ($in)) {
524 413 50       70451 my @row = @$row or last;
525              
526 413         948 my $r = ++$data[1]{maxrow};
527 413 100       1070 @row > $data[1]{maxcol} and $data[1]{maxcol} = @row;
528 413         1121 foreach my $c (0 .. $#row) {
529 2427         3465 my $val = $row[$c];
530 2427         4442 my $cell = cr2cell ($c + 1, $r);
531 2427 100       6865 $opt{rc} and $data[1]{cell}[$c + 1][$r] = $val;
532 2427 100       6275 $opt{cells} and $data[1]{$cell} = $val;
533 2427 50       12353 $opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
534             }
535             }
536 106 50       14838 $csv->eof () or $data[0]{error} = [ $csv->error_diag ];
537 106         1659 close $in;
538              
539 106         367 for (@{$data[1]{cell}}) {
  106         547  
540 734 100       1679 defined or $_ = [];
541             }
542 106         571 return _clipsheets \%opt, [ @data ];
543             }
544              
545 154 100       466 if ($io_txt) { # && $_parser !~ m/^xlsx?$/) {
546 29 100 66     435 if ( # /etc/magic: Microsoft Office Document
    50 66        
    100 66        
547             $txt =~ m{\A(\376\067\0\043
548             |\320\317\021\340\241\261\032\341
549             |\333\245-\0\0\0)}x
550             # /usr/share/misc/magic
551             || $txt =~ m{\A.{2080}Microsoft Excel 5.0 Worksheet}
552             || $txt =~ m{\A\x09\x04\x06\x00\x00\x00\x10\x00}
553             ) {
554 1 50       7 $can{xls} or croak "Spreadsheet::ParseExcel not installed";
555 1         2 my $tmpfile;
556 1 50       4 if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
557 1         3 $tmpfile = \$txt;
558             }
559             else {
560 0         0 $tmpfile = File::Temp->new (SUFFIX => ".xls", UNLINK => 1);
561 0         0 binmode $tmpfile;
562 0         0 print $tmpfile $txt;
563 0         0 close $tmpfile;
564             }
565 1 50       59 open $io_ref, "<", $tmpfile or do { $@ = $!; return };
  0         0  
  0         0  
566 1         1005 $io_txt = 0;
567 1         6 $_parser = _parser ($opt{parser} = "xls");
568             }
569             elsif ( # /usr/share/misc/magic
570             $txt =~ m{\APK\003\004.{4,30}(?:\[Content_Types\]\.xml|_rels/\.rels)}
571             ) {
572 0 0       0 $can{xlsx} or croak "XLSX parser not installed";
573 0         0 my $tmpfile;
574 0 0       0 if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
575 0         0 $tmpfile = \$txt;
576             }
577             else {
578 0         0 $tmpfile = File::Temp->new (SUFFIX => ".xlsx", UNLINK => 1);
579 0         0 binmode $tmpfile;
580 0         0 print $tmpfile $txt;
581 0         0 close $tmpfile;
582             }
583 0 0       0 open $io_ref, "<", $tmpfile or do { $@ = $!; return };
  0         0  
  0         0  
584 0         0 $io_txt = 0;
585 0         0 $_parser = _parser ($opt{parser} = "xlsx");
586             }
587             elsif (!$io_ref && $txt =~ m/\.xlsx?$/i) {
588 3         17 $@ = "Cannot open $txt as file";
589 3         50 return;
590             }
591             }
592 151 100 66     1561 if ($opt{parser} ? $_parser =~ m/^xlsx?$/
    100          
593             : ($io_fil && $txt =~ m/\.(xlsx?)$/i && ($_parser = $1))) {
594 75 100       388 my $parse_type = $_parser =~ m/x$/i ? "XLSX" : "XLS";
595 75 50       350 my $parser = $can{lc $parse_type} or
596             croak "Parser for $parse_type is not installed";
597             $debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt,
598 75 50       390 " using $parser-", $can{lc $parse_type}->VERSION, "\n";
    100          
599 75 50       260 $opt{passwd} and $parser_opts{Password} = $opt{passwd};
600 75         265 my $oBook = eval {
601             $io_ref
602             ? $parse_type eq "XLSX"
603             ? $can{xlsx} =~ m/::XLSX$/
604             ? $parser->new ($io_ref)
605             : $parser->new (%parser_opts)->parse ($io_ref)
606             : $parser->new (%parser_opts)->Parse ($io_ref)
607             : $parse_type eq "XLSX"
608 75 0       991 ? $can{xlsx} =~ m/::XLSX$/
    50          
    50          
    100          
    100          
609             ? $parser->new ($txt)
610             : $parser->new (%parser_opts)->parse ($txt)
611             : $parser->new (%parser_opts)->Parse ($txt);
612             };
613 75 50       2298301 unless ($oBook) {
614             # cleanup will fail on folders with spaces.
615 0         0 (my $msg = $@) =~ s/ at \S+ line \d+.*//s;
616 0         0 croak "$parse_type parser cannot parse data: $msg";
617             }
618 75 50       319 $debug > 8 and _dump (oBook => $oBook);
619              
620             # WorkBook keys:
621             # aColor _CurSheet Format SheetCount
622             # ActiveSheet _CurSheet_ FormatStr _skip_chart
623             # Author File NotSetCell _string_contin
624             # BIFFVersion Flg1904 Object Version
625             # _buffer FmtClass PkgStr Worksheet
626             # CellHandler Font _previous_info
627              
628             my @data = ( {
629             type => lc $parse_type,
630             parser => $can{lc $parse_type},
631             version => $can{lc $parse_type}->VERSION,
632             parsers => [{
633             type => lc $parse_type,
634             parser => $can{lc $parse_type},
635             version => $can{lc $parse_type}->VERSION,
636             }],
637             error => undef,
638 75   50     3035 sheets => $oBook->{SheetCount} || 0,
639             sheet => {},
640             } );
641             # $debug and $data[0]{_parser} = $oBook;
642             # Overrule the default date format strings
643             my %def_fmt = (
644             0x0E => lc $opt{dtfmt}, # m-d-yy
645 75         718 0x0F => "d-mmm-yyyy", # d-mmm-yy
646             0x11 => "mmm-yyyy", # mmm-yy
647             0x16 => "yyyy-mm-dd hh:mm", # m-d-yy h:mm
648             );
649 75         627 $oBook->{FormatStr}{$_} = $def_fmt{$_} for keys %def_fmt;
650             my $oFmt = $parse_type eq "XLSX"
651 75 50       823 ? $can{xlsx} =~ m/::XLSX$/
    100          
652             ? Spreadsheet::XLSX::Fmt2007->new
653             : Spreadsheet::ParseExcel::FmtDefault->new
654             : Spreadsheet::ParseExcel::FmtDefault->new;
655              
656 75 100       904 $debug and print STDERR "\t$data[0]{sheets} sheets\n";
657             my $active_sheet = $oBook->get_active_sheet
658             || $oBook->{ActiveSheet}
659 75   33     485 || $oBook->{SelectedSheet};
660 75         999 my $current_sheet = 0;
661 75         169 foreach my $oWkS (@{$oBook->{Worksheet}}) {
  75         264  
662 121         291 $current_sheet++;
663 121 100 100     682 $opt{clip} and !defined $oWkS->{Cells} and next; # Skip empty sheets
664             my %sheet = (
665             parser => 0,
666             label => $oWkS->{Name},
667 115         918 maxrow => 0,
668             maxcol => 0,
669             cell => [],
670             attr => [],
671             merged => [],
672             active => 0,
673             );
674             # $debug and $sheet{_parser} = $oWkS;
675 115 50       441 defined $sheet{label} or $sheet{label} = "-- unlabeled --";
676 115 100       435 exists $oWkS->{MinRow} and $sheet{minrow} = $oWkS->{MinRow} + 1;
677 115 100       388 exists $oWkS->{MaxRow} and $sheet{maxrow} = $oWkS->{MaxRow} + 1;
678 115 100       370 exists $oWkS->{MinCol} and $sheet{mincol} = $oWkS->{MinCol} + 1;
679 115 100       348 exists $oWkS->{MaxCol} and $sheet{maxcol} = $oWkS->{MaxCol} + 1;
680             $sheet{merged} = [
681 12         34 map { $_->[0] }
682 4         21 sort { $a->[1] cmp $b->[1] }
683 12         94 map {[ $_, pack "NNNN", @$_ ]}
684 12         84 map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]}
  48         109  
  12         41  
685 115 100       261 @{$oWkS->get_merged_areas || []}];
  115         516  
686 115         1195 my $sheet_idx = 1 + @data;
687 115 100       375 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
688 115 100       361 if (defined $active_sheet) {
689             # _SheetNo is 0-based
690 7 50       21 my $sheet_no = defined $oWkS->{_SheetNo} ? $oWkS->{_SheetNo} : $current_sheet - 1;
691 7 100       31 $sheet_no eq $active_sheet and $sheet{active} = 1;
692             }
693             # Sheet keys:
694             # _Book FooterMargin MinCol RightMargin
695             # BottomMargin FooterMergin MinRow RightMergin
696             # BottomMergin HCenter Name RowHeight
697             # Cells Header NoColor RowHidden
698             # ColFmtNo HeaderMargin NoOrient Scale
699             # ColHidden HeaderMergin NoPls SheetHidden
700             # ColWidth Kind Notes _SheetNo
701             # Copis Landscape PageFit SheetType
702             # DefColWidth LeftMargin PageStart SheetVersion
703             # DefRowHeight LeftMergin PaperSize TopMargin
704             # Draft LeftToRight _Pos TopMergin
705             # FitHeight MaxCol PrintGrid UsePage
706             # FitWidth MaxRow PrintHeaders VCenter
707             # Footer MergedArea Res VRes
708 115 100       341 if (exists $oWkS->{MinRow}) {
709 110   50     477 my $hiddenRows = $oWkS->{RowHidden} || [];
710 110   50     463 my $hiddenCols = $oWkS->{ColHidden} || [];
711 110 100       314 if ($opt{clip}) {
712 91         242 my ($mr, $mc) = (-1, -1);
713 91         355 foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
714 516         949 foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
715 2908 100       5626 my $oWkC = $oWkS->{Cells}[$r][$c] or next;
716 1701 50       3285 defined (my $val = $oWkC->{Val}) or next;
717 1701 100       3180 $val eq "" and next;
718 1679 100       2734 $r > $mr and $mr = $r;
719 1679 100       2999 $c > $mc and $mc = $c;
720             }
721             }
722 91         350 ($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1);
723             }
724 110         352 foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
725 597         1280 foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
726 3132 100       12249 my $oWkC = $oWkS->{Cells}[$r][$c] or next;
727             #defined (my $val = $oWkC->{Val}) or next;
728 1802         2951 my $val = $oWkC->{Val};
729 1802 50 33     6154 if (defined $val and my $enc = $oWkC->{Code}) {
730 0 0       0 $enc eq "ucs2" and $val = decode ("utf-16be", $val);
731             }
732 1802         4147 my $cell = cr2cell ($c + 1, $r + 1);
733 1802 100       5604 $opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original
734              
735 1802         2466 my $fmt;
736 1802         2652 my $FmT = $oWkC->{Format};
737 1802 100       3375 if ($FmT) {
738 952 100       1928 unless (ref $FmT) {
739 130         171 $fmt = $FmT;
740 130         222 $FmT = {};
741             }
742             }
743             else {
744 850         1395 $FmT = {};
745             }
746 1802         3572 foreach my $attr (qw( AlignH AlignV FmtIdx Hidden Lock
747             Wrap )) {
748 10812 100       21191 exists $FmT->{$attr} or $FmT->{$attr} = 0;
749             }
750 1802 100       3758 exists $FmT->{Fill} or $FmT->{Fill} = [ 0 ];
751 1802 100       4029 exists $FmT->{Font} or $FmT->{Font} = undef;
752              
753 1802 100       3403 unless (defined $fmt) {
754             $fmt = $FmT->{FmtIdx}
755             ? $oBook->{FormatStr}{$FmT->{FmtIdx}}
756 1672 100       3215 : undef;
757             }
758 1802 100       3465 if ($oWkC->{Type} eq "Numeric") {
759             # Fixed in 0.33 and up
760             # see Spreadsheet/ParseExcel/FmtDefault.pm
761             $FmT->{FmtIdx} == 0x0e ||
762             $FmT->{FmtIdx} == 0x0f ||
763             $FmT->{FmtIdx} == 0x10 ||
764             $FmT->{FmtIdx} == 0x11 ||
765             $FmT->{FmtIdx} == 0x16 ||
766             (defined $fmt && $fmt =~ m{^[dmy][-\\/dmy]*$}) and
767 370 100 33     3315 $oWkC->{Type} = "Date";
      33        
      33        
      33        
      100        
      66        
768             $FmT->{FmtIdx} == 0x09 ||
769             $FmT->{FmtIdx} == 0x0a ||
770             (defined $fmt && $fmt =~ m{^0+\.0+%$}) and
771 370 100 100     2129 $oWkC->{Type} = "Percentage";
      100        
      100        
772             }
773 1802 100       3451 defined $fmt and $fmt =~ s/\\//g;
774             $opt{cells} and # Formatted value
775             $sheet{$cell} = defined $val ? $FmT && exists $def_fmt{$FmT->{FmtIdx}}
776 1802 100 66     9920 ? $oFmt->ValFmt ($oWkC, $oBook)
    50          
    100          
777             : $oWkC->Value : undef;
778 1802 100       15284 if ($opt{attr}) {
779 1526         2303 my $FnT = $FmT->{Font};
780             my $fmi = $FmT->{FmtIdx}
781             ? $oBook->{FormatStr}{$FmT->{FmtIdx}}
782 1526 100       2672 : undef;
783 1526 100       2611 $fmi and $fmi =~ s/\\//g;
784             $sheet{attr}[$c + 1][$r + 1] = {
785             @def_attr,
786              
787             type => lc $oWkC->{Type},
788             enc => $oWkC->{Code},
789             merged => (defined $oWkC->{Merged} ? $oWkC->{Merged} : $oWkC->is_merged) || 0,
790             hidden => ($hiddenRows->[$r] || $hiddenCols->[$c] ? 1 :
791             defined $oWkC->{Hidden} ? $oWkC->{Hidden} : $FmT->{Hidden}) || 0,
792             locked => $FmT->{Lock} || 0,
793             format => $fmi,
794             halign => [ undef, qw( left center right
795             fill justify ), undef,
796             "equal_space" ]->[$FmT->{AlignH}],
797             valign => [ qw( top center bottom justify
798             equal_space )]->[$FmT->{AlignV}],
799             wrap => $FmT->{Wrap},
800             font => $FnT->{Name},
801             size => $FnT->{Height},
802             bold => $FnT->{Bold},
803             italic => $FnT->{Italic},
804             uline => $FnT->{Underline},
805             fgcolor => _xls_color ($FnT->{Color}),
806 1526         6194 bgcolor => _xls_fill (@{$FmT->{Fill}}),
807             formula => $oWkC->{Formula},
808 1526   100     4888 };
      100        
      100        
809             #_dump "cell", $sheet{attr}[$c + 1][$r + 1];
810             }
811             }
812             }
813             }
814 115         206 for (@{$sheet{cell}}) {
  115         380  
815 409 100       1068 defined or $_ = [];
816             }
817 115         2012 push @data, { %sheet };
818             # $data[0]{sheets}++;
819 115 50       614 if ($sheet{label} eq "-- unlabeled --") {
820 0         0 $sheet{label} = "";
821             }
822             else {
823 115         783 $data[0]{sheet}{$sheet{label}} = $#data;
824             }
825             }
826 75         497 return _clipsheets \%opt, [ @data ];
827             }
828              
829 76 100       430 if ($opt{parser} ? _parser ($opt{parser}) eq "sc"
    100          
    100          
830             : $io_fil
831             ? $txt =~ m/\.sc$/
832             : $txt =~ m/^# .*SquirrelCalc/) {
833 46 100       129 if ($io_ref) {
    100          
834 2         9 local $/;
835 2         56 my $x = <$txt>;
836 2         12 $txt = $x;
837             }
838             elsif ($io_fil) {
839 42         171 local $/;
840 42 50       1606 open my $sc, "<", $txt or return;
841 42         1836 $txt = <$sc>;
842 42         702 close $sc;
843             }
844 46 50       302 $txt =~ m/\S/ or return;
845 46 50       170 my $label = defined $opt{label} ? $opt{label} : "sheet";
846 46         646 my @data = (
847             { type => "sc",
848             parser => "Spreadsheet::Read",
849             version => $VERSION,
850             parsers => [{
851             type => "sc",
852             parser => "Spreadsheet::Read",
853             version => $VERSION,
854             }],
855             error => undef,
856             sheets => 1,
857             sheet => { $label => 1 },
858             },
859             { parser => 0,
860             label => $label,
861             maxrow => 0,
862             maxcol => 0,
863             cell => [],
864             attr => [],
865             merged => [],
866             active => 1,
867             },
868             );
869              
870 46         9715 for (split m/\s*[\r\n]\s*/, $txt) {
871 13478 100       22369 if (m/^dimension.*of ([0-9]+) rows.*of ([0-9]+) columns/i) {
872 46         89 @{$data[1]}{qw(maxrow maxcol)} = ($1, $2);
  46         274  
873 46         99 next;
874             }
875 13432 100       33232 s/^r([0-9]+)c([0-9]+)\s*=\s*// or next;
876 3542         6147 my ($c, $r) = map { $_ + 1 } $2, $1;
  7084         15254  
877 3542 100 66     15837 if (m/.* \{(.*)}$/ or m/"(.*)"/) {
878 2714         5250 my $cell = cr2cell ($c, $r);
879 2714 100       8551 $opt{rc} and $data[1]{cell}[$c][$r] = $1;
880 2714 50       7580 $opt{cells} and $data[1]{$cell} = $1;
881 2714 100       6438 $opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
882 2714         4661 next;
883             }
884             # Now only formula's remain. Ignore for now
885             # r67c7 = [P2L] 2*(1000*r67c5-60)
886             }
887 46         795 for (@{$data[1]{cell}}) {
  46         167  
888 360 100       756 defined or $_ = [];
889             }
890 46         172 return _clipsheets \%opt, [ @data ];
891             }
892              
893 30 50 66     364 if ($opt{parser} ? _parser ($opt{parser}) eq "sxc"
    100          
894             : ($txt =~ m/^<\?xml/ or -f $txt)) {
895 7 50       996 $can{sxc} or croak "Spreadsheet::ReadSXC not installed";
896 0 0       0 ref $txt and
897             croak ("Sorry, references as input are not (yet) supported by Spreadsheet::ReadSXC");
898              
899 0         0 my $using = "using $can{sxc}-" . $can{sxc}->VERSION;
900 0         0 my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up
901 0         0 my $sxc;
902 0 0 0     0 if ($txt =~ m/\.(sxc|ods)$/i) {
    0          
    0          
903 0 0       0 $debug and print STDERR "Opening \U$1\E $txt $using\n";
904 0 0       0 $sxc = Spreadsheet::ReadSXC::read_sxc ($txt, $sxc_options) or return;
905             }
906             elsif ($txt =~ m/\.xml$/i) {
907 0 0       0 $debug and print STDERR "Opening XML $txt $using\n";
908 0 0       0 $sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return;
909             }
910             # need to test on pattern to prevent stat warning
911             # on filename with newline
912             elsif ($txt !~ m/^<\?xml/i and -f $txt) {
913 0 0       0 $debug and print STDERR "Opening XML $txt $using\n";
914 0 0       0 open my $f, "<", $txt or return;
915 0         0 local $/;
916 0         0 $txt = <$f>;
917 0         0 close $f;
918             }
919 0 0 0     0 !$sxc && $txt =~ m/^<\?xml/i and
920             $sxc = Spreadsheet::ReadSXC::read_xml_string ($txt, $sxc_options);
921 0 0       0 $debug > 8 and _dump (sxc => $sxc);
922 0 0       0 if ($sxc) {
923 0         0 my @data = ( {
924             type => "sxc",
925             parser => "Spreadsheet::ReadSXC",
926             version => $Spreadsheet::ReadSXC::VERSION,
927             parsers => [{
928             type => "sxc",
929             parser => "Spreadsheet::ReadSXC",
930             version => $Spreadsheet::ReadSXC::VERSION,
931             }],
932             error => undef,
933             sheets => 0,
934             sheet => {},
935             } );
936             my @sheets = ref $sxc eq "HASH" # < 0.20
937             ? map {
938             { label => $_,
939 0         0 data => $sxc->{$_},
940             }
941             } keys %$sxc
942 0 0       0 : @{$sxc};
  0         0  
943 0         0 foreach my $sheet (@sheets) {
944 0         0 my @sheet = @{$sheet->{data}};
  0         0  
945             my %sheet = (
946             parser => 0,
947             label => $sheet->{label},
948 0         0 maxrow => scalar @sheet,
949             maxcol => 0,
950             cell => [],
951             attr => [],
952             merged => [],
953             active => 0,
954             );
955 0         0 my $sheet_idx = 1 + @data;
956 0 0       0 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} rows\n";
957 0         0 foreach my $r (0 .. $#sheet) {
958 0 0       0 my @row = @{$sheet[$r]} or next;
  0         0  
959 0         0 foreach my $c (0 .. $#row) {
960 0 0       0 defined (my $val = $row[$c]) or next;
961 0         0 my $C = $c + 1;
962 0 0       0 $C > $sheet{maxcol} and $sheet{maxcol} = $C;
963 0         0 my $cell = cr2cell ($C, $r + 1);
964 0 0       0 $opt{rc} and $sheet{cell}[$C][$r + 1] = $val;
965 0 0       0 $opt{cells} and $sheet{$cell} = $val;
966 0 0       0 $opt{attr} and $sheet{attr}[$C][$r + 1] = { @def_attr };
967             }
968             }
969 0         0 for (@{$sheet{cell}}) {
  0         0  
970 0 0       0 defined or $_ = [];
971             }
972 0 0       0 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
973 0         0 push @data, { %sheet };
974 0         0 $data[0]{sheets}++;
975 0         0 $data[0]{sheet}{$sheet->{label}} = $#data;
976             }
977 0         0 return _clipsheets \%opt, [ @data ];
978             }
979             }
980              
981 23 100 100     138 if (!ref $txt and $txt =~ m/\.\w+$/) {
982             # Return (localized) system message
983 5 50       120 open my $fh, "<", $txt and
984             croak "I can open file $txt, but I do not know how to parse it\n";
985              
986 5         91 $@ = $!;
987             }
988              
989 23         127 return;
990             } # ReadData
991              
992             sub add {
993 2     2 1 7 my $book = shift;
994 2 50       9 my $r = ReadData (@_) or return;
995             $book && (ref $book eq "ARRAY" ||
996 2 50 33     37 ref $book eq __PACKAGE__) && $book->[0]{sheets} or return $r;
      33        
      33        
997              
998 2         5 my $c1 = $book->[0];
999 2         5 my $c2 = $r->[0];
1000              
1001 2 50       9 unless ($c1->{parsers}) {
1002 0         0 $c1->{parsers}[0]{$_} = $c1->{$_} for qw( type parser version );
1003 0         0 $book->[$_]{parser} = 0 for 1 .. $c1->{sheets};
1004             }
1005 2         8 my ($pidx) = (grep { my $p = $c1->{parsers}[$_];
1006             $p->{type} eq $c2->{type} &&
1007             $p->{parser} eq $c2->{parser} &&
1008 2 50 33     5 $p->{version} eq $c2->{version} } 0 .. $#{$c1->{parsers}});
  2         77  
  2         7  
1009 2 50       9 unless (defined $pidx) {
1010 0         0 $pidx = scalar @{$c1->{parsers}};
  0         0  
1011 0         0 $c1->{parsers}[$pidx]{$_} = $c2->{$_} for qw( type parser version );
1012             }
1013              
1014 2         7 foreach my $sn (sort { $c2->{sheet}{$a} <=> $c2->{sheet}{$b} } keys %{$c2->{sheet}}) {
  0         0  
  2         14  
1015 2         5 my $s = $sn;
1016 2         4 my $v = 2;
1017 2         9 while (exists $c1->{sheet}{$s}) {
1018 1         7 $s = $sn."[".$v++."]";
1019             }
1020 2         18 $c1->{sheet}{$s} = $c1->{sheets} + $c2->{sheet}{$sn};
1021 2         11 $r->[$c2->{sheet}{$sn}]{parser} = $pidx;
1022 2         9 push @$book, $r->[$c2->{sheet}{$sn}];
1023             }
1024 2         5 $c1->{sheets} += $c2->{sheets};
1025              
1026 2         19 return $book;
1027             } # add
1028              
1029             package Spreadsheet::Read::Attribute;
1030              
1031 53     53   614 use Carp;
  53         194  
  53         4624  
1032 53     53   472 use vars qw( $AUTOLOAD );
  53         128  
  53         64261  
1033              
1034             sub AUTOLOAD {
1035 6     6   788 my $self = shift;
1036 6         42 (my $attr = $AUTOLOAD) =~ s/.*:://;
1037 6         249 $self->{$attr};
1038             } # AUTOLOAD
1039              
1040             package Spreadsheet::Read::Sheet;
1041              
1042             sub cell {
1043 55     55   17964 my ($sheet, @id) = @_;
1044             @id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1045 55 50 66     542 return $sheet->{cell}[$id[0]][$id[1]];
      66        
1046             @id && $id[0] && exists $sheet->{$id[0]} and
1047 27 50 33     296 return $sheet->{$id[0]};
      33        
1048             } # cell
1049              
1050             sub attr {
1051 8     8   23 my ($sheet, @id) = @_;
1052 8         15 my $class = "Spreadsheet::Read::Attribute";
1053             @id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1054 8 50 66     70 return bless $sheet->{attr}[$id[0]][$id[1]] => $class;
      66        
1055 5 50 33     34 if (@id && $id[0] && exists $sheet->{$id[0]}) {
      33        
1056 5         17 my ($c, $r) = $sheet->cell2cr ($id[0]);
1057 5         54 return bless $sheet->{attr}[$c][$r] => $class;
1058             }
1059 0         0 undef;
1060             } # attr
1061              
1062             sub maxrow {
1063 2     2   5 my $sheet = shift;
1064 2         9 return $sheet->{maxrow};
1065             } # maxrow
1066              
1067             sub maxcol {
1068 2     2   5 my $sheet = shift;
1069 2         8 return $sheet->{maxcol};
1070             } # maxrow
1071              
1072             sub col2label {
1073 1 50   1   9 $_[0] =~ m/::/ and shift; # class unused
1074 1         22 return Spreadsheet::Read::col2label (@_);
1075             } # col2label
1076              
1077             sub cr2cell {
1078 24 50   24   90 $_[0] =~ m/::/ and shift; # class unused
1079 24         89 return Spreadsheet::Read::cr2cell (@_);
1080             } # cr2cell
1081              
1082             sub cell2cr {
1083 21 50   21   11124 $_[0] =~ m/::/ and shift; # class unused
1084 21         64 return Spreadsheet::Read::cell2cr (@_);
1085             } # cell2cr
1086              
1087             sub label {
1088 2     2   6 my ($sheet, $label) = @_;
1089 2 100       10 defined $label and $sheet->{label} = $label;
1090 2         8 return $sheet->{label};
1091             } # label
1092              
1093             sub active {
1094 1     1   3 my $sheet = shift;
1095 1         4 return $sheet->{active};
1096             } # label
1097              
1098             # my @row = $sheet->cellrow (1);
1099             sub cellrow {
1100 4     4   11 my ($sheet, $row) = @_;
1101 4 100 66     33 defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
      100        
1102 1         3 my $s = $sheet->{cell};
1103 1         6 map { $s->[$_][$row] } 1..$sheet->{maxcol};
  19         42  
1104             } # cellrow
1105              
1106             # my @row = $sheet->row (1);
1107             sub row {
1108 4     4   9 my ($sheet, $row) = @_;
1109 4 100 66     37 defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
      100        
1110 1         5 map { $sheet->{$sheet->cr2cell ($_, $row)} } 1..$sheet->{maxcol};
  19         48  
1111             } # row
1112              
1113             # my @col = $sheet->cellcolumn (1);
1114             sub cellcolumn {
1115 4     4   10 my ($sheet, $col) = @_;
1116 4 100 66     37 defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
      100        
1117 1         3 my $s = $sheet->{cell};
1118 1         4 map { $s->[$col][$_] } 1..$sheet->{maxrow};
  5         20  
1119             } # cellcolumn
1120              
1121             # my @col = $sheet->column (1);
1122             sub column {
1123 4     4   13 my ($sheet, $col) = @_;
1124 4 100 66     32 defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
      100        
1125 1         4 map { $sheet->{$sheet->cr2cell ($col, $_)} } 1..$sheet->{maxrow};
  5         13  
1126             } # column
1127              
1128             # Convert {cell}'s [column][row] to a [row][column] list
1129             # my @rows = $sheet->rows ();
1130             sub rows {
1131 1     1   4 my $sheet = shift;
1132 1         2 my $s = $sheet->{cell};
1133              
1134             map {
1135 5         8 my $r = $_;
1136 5         10 [ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
  95         199  
1137 1         5 } 1..$sheet->{maxrow};
1138             } # rows
1139              
1140             1;
1141              
1142             __END__
1143             =head1 DESCRIPTION
1144              
1145             Spreadsheet::Read tries to transparently read *any* spreadsheet and
1146             return its content in a universal manner independent of the parsing
1147             module that does the actual spreadsheet scanning.
1148              
1149             For OpenOffice and/or LibreOffice this module uses
1150             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>
1151              
1152             For Microsoft Excel this module uses
1153             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>,
1154             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX>, or
1155             L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX> (stronly
1156             discouraged).
1157              
1158             For CSV this module uses L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS>
1159             or L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP>.
1160              
1161             For SquirrelCalc there is a very simplistic built-in parser
1162              
1163             =head2 Data structure
1164              
1165             The data is returned as an array reference:
1166              
1167             $book = [
1168             # Entry 0 is the overall control hash
1169             { sheets => 2,
1170             sheet => {
1171             "Sheet 1" => 1,
1172             "Sheet 2" => 2,
1173             },
1174             parsers => [ {
1175             type => "xls",
1176             parser => "Spreadsheet::ParseExcel",
1177             version => 0.59,
1178             }],
1179             error => undef,
1180             },
1181             # Entry 1 is the first sheet
1182             { parser => 0,
1183             label => "Sheet 1",
1184             maxrow => 2,
1185             maxcol => 4,
1186             cell => [ undef,
1187             [ undef, 1 ],
1188             [ undef, undef, undef, undef, undef, "Nugget" ],
1189             ],
1190             attr => [],
1191             merged => [],
1192             active => 1,
1193             A1 => 1,
1194             B5 => "Nugget",
1195             },
1196             # Entry 2 is the second sheet
1197             { parser => 0,
1198             label => "Sheet 2",
1199             :
1200             :
1201              
1202             To keep as close contact to spreadsheet users, row and column 1 have
1203             index 1 too in the C<cell> element of the sheet hash, so cell "A1" is
1204             the same as C<cell> [1, 1] (column first). To switch between the two,
1205             there are helper functions available: C<cell2cr ()>, C<cr2cell ()>,
1206             and C<col2label ()>.
1207              
1208             The C<cell> hash entry contains unformatted data, while the hash entries
1209             with the traditional labels contain the formatted values (if applicable).
1210              
1211             The control hash (the first entry in the returned array ref), contains
1212             some spreadsheet meta-data. The entry C<sheet> is there to be able to find
1213             the sheets when accessing them by name:
1214              
1215             my %sheet2 = %{$book->[$book->[0]{sheet}{"Sheet 2"}]};
1216              
1217             =head2 Formatted vs Unformatted
1218              
1219             The difference between formatted and unformatted cells is that the (optional)
1220             format is applied to the cell or not. This part is B<completely> implemented
1221             on the parser side. Spreadsheet::Read just makes both available if these are
1222             supported. Options provide means to disable either. If the parser does not
1223             provide formatted cells - like CSV - both values are equal.
1224              
1225             To show what this implies:
1226              
1227             use Spreadsheet::Read;
1228              
1229             my $file = "files/example.xlsx";
1230             my $workbook = Spreadsheet::Read->new ($file);
1231              
1232             my $info = $workbook->[0];
1233             say "Parsed $file with $info->{parser}-$info->{version}";
1234              
1235             my $sheet = $workbook->sheet (1);
1236              
1237             say join "\t" => "Formatted:", $sheet->row (1);
1238             say join "\t" => "Unformatted:", $sheet->cellrow (1);
1239              
1240             Might return very different results depending one the underlying parser (and
1241             its version):
1242              
1243             Parsed files/example.xlsx with Spreadsheet::ParseXLSX-0.27
1244             Formatted: 8-Aug Foo & Barr < Quux
1245             Unformatted: 39668 Foo & Barr < Quux
1246              
1247             Parsed files/example.xlsx with Spreadsheet::XLSX-0.15
1248             Formatted: 39668 Foo &amp; Barr &lt; Quux
1249             Unformatted: 39668 Foo &amp; Barr &lt; Quux
1250              
1251             =head2 Functions and methods
1252              
1253             =head3 new
1254              
1255             my $book = Spreadsheet::Read->new (...) or die $@;
1256              
1257             All options accepted by ReadData are accepted by new.
1258              
1259             With no arguments at all, $book will be an object where sheets can be added
1260             using C<add>
1261              
1262             my $book = Spreadsheet::Read->new ();
1263             $book->add ("file.csv");
1264             $book->add ("file.cslx");
1265              
1266             =head3 ReadData
1267              
1268             my $book = ReadData ($source [, option => value [, ... ]]);
1269              
1270             my $book = ReadData ("file.csv", sep => ',', quote => '"');
1271              
1272             my $book = ReadData ("file.xls", dtfmt => "yyyy-mm-dd");
1273              
1274             my $book = ReadData ("file.ods");
1275              
1276             my $book = ReadData ("file.sxc");
1277              
1278             my $book = ReadData ("content.xml");
1279              
1280             my $book = ReadData ($content);
1281              
1282             my $book = ReadData ($content, parser => "xlsx");
1283              
1284             my $book = ReadData ($fh, parser => "xlsx");
1285              
1286             my $book = ReadData (\$content, parser => "xlsx");
1287              
1288             Tries to convert the given file, string, or stream to the data structure
1289             described above.
1290              
1291             Processing Excel data from a stream or content is supported through a
1292             L<File::Temp|https://metacpan.org/release/File-Temp> temporary file or
1293             L<IO::Scalar|https://metacpan.org/release/IO-Scalar> when available.
1294              
1295             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>
1296             does preserve sheet order as of version 0.20.
1297              
1298             Choosing between C<$content> and C<\\$content> (with or without passing
1299             the desired C<parser> option) may be depending on trial and terror.
1300             C<ReadData> does try to determine parser type on content if needed, but
1301             not all combinations are checked, and not all signatures are builtin.
1302              
1303             Currently supported options are:
1304              
1305             =over 2
1306              
1307             =item parser
1308             X<parser>
1309              
1310             Force the data to be parsed by a specific format. Possible values are
1311             C<csv>, C<prl> (or C<perl>), C<sc> (or C<squirelcalc>), C<sxc> (or C<oo>,
1312             C<ods>, C<openoffice>, C<libreoffice>) C<xls> (or C<excel>), and C<xlsx>
1313             (or C<excel2007>).
1314              
1315             When parsing streams, instead of files, it is highly recommended to pass
1316             this option.
1317              
1318             Spreadsheet::Read supports several underlying parsers per spreadsheet
1319             type. It will try those from most favored to least favored. When you
1320             have a good reason to prefer a different parser, you can set that in
1321             environment variables. The other options then will not be tested for:
1322              
1323             env SPREADSHEET_READ_CSV=Text::CSV_PP ...
1324              
1325             =item cells
1326             X<cells>
1327              
1328             Control the generation of named cells ("C<A1>" etc). Default is true.
1329              
1330             =item rc
1331              
1332             Control the generation of the {cell}[c][r] entries. Default is true.
1333              
1334             =item attr
1335              
1336             Control the generation of the {attr}[c][r] entries. Default is false.
1337             See L</Cell Attributes> below.
1338              
1339             =item clip
1340              
1341             If set, L<C<ReadData>|/ReadData> will remove all trailing rows and columns
1342             per sheet that have no data, where no data means only undefined or empty
1343             cells (after optional stripping). If a sheet has no data at all, the sheet
1344             will be skipped entirely when this attribute is true.
1345              
1346             =item strip
1347              
1348             If set, L<C<ReadData>|/ReadData> will remove trailing- and/or
1349             leading-whitespace from every field.
1350              
1351             strip leading strailing
1352             ----- ------- ---------
1353             0 n/a n/a
1354             1 strip n/a
1355             2 n/a strip
1356             3 strip strip
1357              
1358             =item pivot
1359              
1360             Swap all rows and columns.
1361              
1362             When a sheet contains data like
1363              
1364             A1 B1 C1 E1
1365             A2 C2 D2
1366             A3 B3 C3 D3 E3
1367              
1368             using C<pivot> will return the sheet data as
1369              
1370             A1 A2 A3
1371             B1 B3
1372             C1 C2 C3
1373             D2 D3
1374             E1 E3
1375              
1376             =item sep
1377              
1378             Set separator for CSV. Default is comma C<,>.
1379              
1380             =item quote
1381              
1382             Set quote character for CSV. Default is C<">.
1383              
1384             =item dtfmt
1385              
1386             Set the format for MS-Excel date fields that are set to use the default
1387             date format. The default format in Excel is "C<m-d-yy>", which is both
1388             not year 2000 safe, nor very useful. The default is now "C<yyyy-mm-dd>",
1389             which is more ISO-like.
1390              
1391             Note that date formatting in MS-Excel is not reliable at all, as it will
1392             store/replace/change the date field separator in already stored formats
1393             if you change your locale settings. So the above mentioned default can
1394             be either "C<m-d-yy>" OR "C<m/d/yy>" depending on what that specific
1395             character happened to be at the time the user saved the file.
1396              
1397             =item debug
1398              
1399             Enable some diagnostic messages to STDERR.
1400              
1401             The value determines how much diagnostics are dumped (using
1402             L<Data::Peek|https://metacpan.org/release/Data-Peek>). A value of C<9>
1403             and higher will dump the entire structure from the back-end parser.
1404              
1405             =item passwd
1406              
1407             Use this password to decrypt password protected spreadsheet.
1408              
1409             Currently only supports Excel.
1410              
1411             =back
1412              
1413             All other attributes/options will be passed to the underlying parser if
1414             that parser supports attributes.
1415              
1416             =head3 col2label
1417              
1418             my $col_id = col2label (col);
1419              
1420             my $col_id = $book->col2label (col); # OO
1421              
1422             C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1423             traditional cell notation:
1424              
1425             my $id = col2label ( 4); # $id now "D"
1426             my $id = col2label (28); # $id now "AB"
1427              
1428             =head3 cr2cell
1429              
1430             my $cell = cr2cell (col, row);
1431              
1432             my $cell = $book->cr2cell (col, row); # OO
1433              
1434             C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1435             traditional cell notation:
1436              
1437             my $cell = cr2cell ( 4, 14); # $cell now "D14"
1438             my $cell = cr2cell (28, 4); # $cell now "AB4"
1439              
1440             =head3 cell2cr
1441              
1442             my ($col, $row) = cell2cr ($cell);
1443              
1444             my ($col, $row) = $book->cell2cr ($cell); # OO
1445              
1446             C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1447             pair (1 based):
1448              
1449             my ($col, $row) = cell2cr ("D14"); # returns ( 4, 14)
1450             my ($col, $row) = cell2cr ("AB4"); # returns (28, 4)
1451              
1452             =head3 row
1453              
1454             my @row = row ($sheet, $row)
1455              
1456             my @row = Spreadsheet::Read::row ($book->[1], 3);
1457              
1458             my @row = $book->row ($sheet, $row); # OO
1459              
1460             Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1461              
1462             Note that the indexes in the returned list are 0-based.
1463              
1464             C<row ()> is not imported by default, so either specify it in the
1465             use argument list, or call it fully qualified.
1466              
1467             See also the C<row ()> method on sheets.
1468              
1469             =head3 cellrow
1470              
1471             my @row = cellrow ($sheet, $row);
1472              
1473             my @row = Spreadsheet::Read::cellrow ($book->[1], 3);
1474              
1475             my @row = $book->cellrow ($sheet, $row); # OO
1476              
1477             Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1478              
1479             Note that the indexes in the returned list are 0-based.
1480              
1481             C<cellrow ()> is not imported by default, so either specify it in the
1482             use argument list, or call it fully qualified or as method call.
1483              
1484             See also the C<cellrow ()> method on sheets.
1485              
1486             =head3 rows
1487              
1488             my @rows = rows ($sheet);
1489              
1490             my @rows = Spreadsheet::Read::rows ($book->[1]);
1491              
1492             my @rows = $book->rows (1); # OO
1493              
1494             Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1495              
1496             Note that the indexes in the returned list are 0-based, where the
1497             index in the C<{cell}> entry is 1-based.
1498              
1499             C<rows ()> is not imported by default, so either specify it in the
1500             use argument list, or call it fully qualified.
1501              
1502             =head3 parses
1503              
1504             parses ($format);
1505              
1506             Spreadsheet::Read::parses ("CSV");
1507              
1508             $book->parses ("CSV"); # OO
1509              
1510             C<parses ()> returns Spreadsheet::Read's capability to parse the
1511             required format. L<C<ReadData>|/ReadData> will pick its preferred parser
1512             for that format unless overruled. See L<C<parser>|/parser>.
1513              
1514             C<parses ()> is not imported by default, so either specify it in the
1515             use argument list, or call it fully qualified.
1516              
1517             =head3 Version
1518              
1519             my $v = Version ()
1520              
1521             my $v = Spreadsheet::Read::Version ()
1522              
1523             my $v = Spreadsheet::Read->VERSION;
1524              
1525             my $v = $book->Version (); # OO
1526              
1527             Returns the current version of Spreadsheet::Read.
1528              
1529             C<Version ()> is not imported by default, so either specify it in the
1530             use argument list, or call it fully qualified.
1531              
1532             This function returns exactly the same as C<< Spreadsheet::Read->VERSION >>
1533             returns and is only kept for backward compatibility reasons.
1534              
1535             =head3 sheets
1536              
1537             my $sheets = $book->sheets; # OO
1538             my @sheets = $book->sheets; # OO
1539              
1540             In scalar context return the number of sheets in the book.
1541             In list context return the labels of the sheets in the book.
1542              
1543             =head3 sheet
1544              
1545             my $sheet = $book->sheet (1); # OO
1546             my $sheet = $book->sheet ("Foo"); # OO
1547              
1548             Return the numbered or named sheet out of the book. Will return C<undef> if
1549             there is no match. Will not work for sheets I<named> with a number between 1
1550             and the number of sheets in the book.
1551              
1552             With named sheets will first try to use the list of sheet-labels as stored in
1553             the control structure. If no match is found, it will scan the actual labels
1554             of the sheets. In that case, it will return the first matching sheet.
1555              
1556             If defined, the returned sheet will be of class C<Spreadsheet::Read::Sheet>.
1557              
1558             =head3 add
1559              
1560             my $book = ReadData ("file.csv");
1561             Spreadsheet::Read::add ($book, "file.xlsx");
1562              
1563             my $book = Spreadsheet::Read->new ("file.csv");
1564             $book->add ("file.xlsx"); # OO
1565              
1566             =head2 Methods on sheets
1567              
1568             =head3 maxcol
1569              
1570             my $col = $sheet->maxcol;
1571              
1572             Return the index of the last in-use column in the sheet. This index is 1-based.
1573              
1574             =head3 maxrow
1575              
1576             my $row = $sheet->maxrow;
1577              
1578             Return the index of the last in-use row in the sheet. This index is 1-based.
1579              
1580             =head3 cell
1581              
1582             my $cell = $sheet->cell ("A3");
1583             my $cell = $sheet->cell (1, 3);
1584              
1585             Return the value for a cell. Using tags will return the formatted value,
1586             using column and row will return unformatted value.
1587              
1588             =head3 attr
1589              
1590             my $cell = $sheet->attr ("A3");
1591             my $cell = $sheet->attr (1, 3);
1592              
1593             Return the attributes of a cell. Only valid if attributes are enabled through
1594             option C<attr>.
1595              
1596             =head3 col2label
1597              
1598             my $col_id = $sheet->col2label (col);
1599              
1600             C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1601             traditional cell notation:
1602              
1603             my $id = $sheet->col2label ( 4); # $id now "D"
1604             my $id = $sheet->col2label (28); # $id now "AB"
1605              
1606             =head3 cr2cell
1607              
1608             my $cell = $sheet->cr2cell (col, row);
1609              
1610             C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1611             traditional cell notation:
1612              
1613             my $cell = $sheet->cr2cell ( 4, 14); # $cell now "D14"
1614             my $cell = $sheet->cr2cell (28, 4); # $cell now "AB4"
1615              
1616             =head3 cell2cr
1617              
1618             my ($col, $row) = $sheet->cell2cr ($cell);
1619              
1620             C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1621             pair (1 based):
1622              
1623             my ($col, $row) = $sheet->cell2cr ("D14"); # returns ( 4, 14)
1624             my ($col, $row) = $sheet->cell2cr ("AB4"); # returns (28, 4)
1625              
1626             =head3 col
1627              
1628             my @col = $sheet->column ($col);
1629              
1630             Get full column of formatted values (like C<< $sheet->{C1} .. $sheet->{C9} >>)
1631              
1632             Note that the indexes in the returned list are 0-based.
1633              
1634             =head3 cellcolumn
1635              
1636             my @col = $sheet->cellcolumn ($col);
1637              
1638             Get full column of unformatted values (like C<< $sheet->{cell}[3][1] .. $sheet->{cell}[3][9] >>)
1639              
1640             Note that the indexes in the returned list are 0-based.
1641              
1642             =head3 row
1643              
1644             my @row = $sheet->row ($row);
1645              
1646             Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1647              
1648             Note that the indexes in the returned list are 0-based.
1649              
1650             =head3 cellrow
1651              
1652             my @row = $sheet->cellrow ($row);
1653              
1654             Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1655              
1656             Note that the indexes in the returned list are 0-based.
1657              
1658             =head3 rows
1659              
1660             my @rows = $sheet->rows ();
1661              
1662             Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1663              
1664             Note that the indexes in the returned list are 0-based, where the
1665             index in the C<{cell}> entry is 1-based.
1666              
1667             =head3 label
1668              
1669             my $label = $sheet->label;
1670             $sheet->label ("New sheet label");
1671              
1672             Set a new label to a sheet. Note that the index in the control structure will
1673             I<NOT> be updated.
1674              
1675             =head3 active
1676              
1677             my $sheet_is_active = $sheet->active;
1678              
1679             Returns 1 if the selected sheet is active, otherwise returns 0.
1680              
1681             Currently only works on XLS (as of Spreadsheed::ParseExcel-0.61).
1682             CSV is always active.
1683              
1684             =head2 Using CSV
1685              
1686             In case of CSV parsing, L<C<ReadData>|/ReadData> will use the first line of
1687             the file to auto-detect the separation character if the first argument is a
1688             file and both C<sep> and C<quote> are not passed as attributes.
1689             L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> (or
1690             L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP>) is able to
1691             automatically detect and use C<\r> line endings.
1692              
1693             CSV can parse streams too, but be sure to pass C<sep> and/or C<quote> if
1694             these do not match the default C<,> and C<">.
1695              
1696             When an error is found in the CSV, it is automatically reported (to STDERR).
1697             The structure will store the error in C<< $ss->[0]{error} >> as anonymous
1698             list returned by
1699             L<C<< $csv->error_diag >>|https://metacpan.org/pod/Text::CSV_XS#error_diag>.
1700             See L<Text::CSV_XS|https://metacpan.org/pod/Text-CSV_XS> for documentation.
1701              
1702             my $ss = ReadData ("bad.csv");
1703             $ss->[0]{error} and say $ss->[0]{error}[1];
1704              
1705             As CSV has no sheet labels, the default label for a CSV sheet is its filename.
1706             For CSV, this can be overruled using the I<label> attribute:
1707              
1708             my $ss = Spreadsheet::Read->new ("/some/place/test.csv", label => "Test");
1709              
1710             =head2 Cell Attributes
1711             X<attr>
1712              
1713             If the constructor was called with C<attr> having a true value,
1714              
1715             my $book = ReadData ("book.xls", attr => 1);
1716             my $book = Spreadsheet::Read->new ("book.xlsx", attr => 1);
1717              
1718             effort is made to analyze and store field attributes like this:
1719              
1720             { label => "Sheet 1",
1721             maxrow => 5,
1722             maxcol => 2,
1723             cell => [ undef,
1724             [ undef, 1 ],
1725             [ undef, undef, undef, undef, undef, "Nugget" ],
1726             ],
1727             attr => [ undef,
1728             [ undef, {
1729             type => "numeric",
1730             fgcolor => "#ff0000",
1731             bgcolor => undef,
1732             font => "Arial",
1733             size => undef,
1734             format => "## ##0.00",
1735             halign => "right",
1736             valign => "top",
1737             uline => 0,
1738             bold => 0,
1739             italic => 0,
1740             wrap => 0,
1741             merged => 0,
1742             hidden => 0,
1743             locked => 0,
1744             enc => "utf-8",
1745             }, ]
1746             [ undef, undef, undef, undef, undef, {
1747             type => "text",
1748             fgcolor => "#e2e2e2",
1749             bgcolor => undef,
1750             font => "Letter Gothic",
1751             size => 15,
1752             format => undef,
1753             halign => "left",
1754             valign => "top",
1755             uline => 0,
1756             bold => 0,
1757             italic => 0,
1758             wrap => 0,
1759             merged => 0,
1760             hidden => 0,
1761             locked => 0,
1762             enc => "iso8859-1",
1763             }, ]
1764             merged => [],
1765             A1 => 1,
1766             B5 => "Nugget",
1767             },
1768              
1769             The entries C<maxrow> and C<maxcol> are 1-based.
1770              
1771             This has now been partially implemented, mainly for Excel, as the other
1772             parsers do not (yet) support all of that. YMMV.
1773              
1774             If a cell itself is not hidden, but the parser holds the information that
1775             either the row or the column (or both) the field is in is hidden, the flag
1776             is inherited into the cell attributes.
1777              
1778             You can get the attributes of a cell (as a hash-ref) like this:
1779              
1780             my $attr = $book[1]{attr}[1][3]; # Direct structure
1781             my $attr = $book->sheet (1)->attr (1, 3); # Same using OO
1782             my $attr = $book->sheet (1)->attr ("A3"); # Same using OO
1783              
1784             To get to the C<font> attribute, use any of these:
1785              
1786             my $font = $book[1]{attr}[1][3]{font};
1787             my $font = $book->sheet (1)->attr (1, 3)->{font};
1788             my $font = $book->sheet (1)->attr ("A3")->font;
1789              
1790             =head3 Merged cells
1791             X<merged>
1792              
1793             Note that only
1794             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>
1795             documents the use of merged cells, and not in a way useful for the spreadsheet
1796             consumer.
1797              
1798             CSV does not support merged cells (though future implementations of CSV
1799             for the web might).
1800              
1801             The documentation of merged areas in
1802             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> and
1803             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> can
1804             be found in
1805             L<Spreadsheet::ParseExcel::Worksheet|https://metacpan.org/release/Spreadsheet-ParseExcel-Worksheet>
1806             and L<Spreadsheet::ParseExcel::Cell|https://metacpan.org/release/Spreadsheet-ParseExcel-Cell>.
1807              
1808             None of basic L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX>,
1809             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>, and
1810             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> manual
1811             pages mention merged cells at all.
1812              
1813             This module just tries to return the information in a generic way.
1814              
1815             Given this spreadsheet as an example
1816              
1817             merged.xlsx:
1818              
1819             A B C
1820             +-----+-----------+
1821             1| | foo |
1822             +-----+ +
1823             2| bar | |
1824             | +-----+-----+
1825             3| | urg | orc |
1826             +-----+-----+-----+
1827              
1828             the information extracted from that undocumented information is
1829             returned in the C<merged> entry of the sheet's hash as a list of
1830             top-left, bottom-right coordinate pars (col, row, col, row). For
1831             given example, that would be:
1832              
1833             $ss->{merged} = [
1834             [ 1, 2, 1, 3 ], # A2-A3
1835             [ 2, 1, 3, 2 ], # B1-C2
1836             ];
1837              
1838             When the attributes are also enabled, there is some merge information
1839             copied directly from the cell information, but again, that stems from
1840             code analysis and not from documentation:
1841              
1842             my $ss = ReadData ("merged.xlsx", attr => 1)->[1];
1843             foreach my $row (1 .. $ss->{maxrow}) {
1844             foreach my $col (1 .. $ss->{maxcol}) {
1845             my $cell = cr2cell ($col, $row);
1846             printf "%s %-3s %d ", $cell, $ss->{$cell},
1847             $ss->{attr}[$col][$row]{merged};
1848             }
1849             print "\n";
1850             }
1851              
1852             A1 0 B1 foo 1 C1 1
1853             A2 bar 1 B2 1 C2 1
1854             A3 1 B3 urg 0 C3 orc 0
1855              
1856             In this example, there is no way to see if C<B2> is merged to C<A2> or
1857             to C<B1> without analyzing all surrounding cells. This could as well
1858             mean C<A2:A3>, C<B1:C1>, C<B2:C2>, as C<A2:A3>, C<B1:B2>, C<C1:C2>, as
1859             C<A2:A3>, C<B1:C2>.
1860             Use the L<C<merged>|/merged> entry described above to find out what
1861             fields are merged to what other fields.
1862              
1863             =head1 TOOLS
1864              
1865             This modules comes with a few tools that perform tasks from the FAQ, like
1866             "How do I select only column D through F from sheet 2 into a CSV file?"
1867              
1868             If the module was installed without the tools, you can find them here:
1869             https://github.com/Tux/Spreadsheet-Read/tree/master/examples
1870              
1871             =head2 C<xlscat>
1872              
1873             Show (parts of) a spreadsheet in plain text, CSV, or HTML
1874              
1875             usage: xlscat [-s <sep>] [-L] [-n] [-A] [-u] [Selection] file.xls
1876             [-c | -m] [-u] [Selection] file.xls
1877             -i [-S sheets] file.xls
1878             Generic options:
1879             -v[#] Set verbose level (xlscat/xlsgrep)
1880             -d[#] Set debug level (Spreadsheet::Read)
1881             -u Use unformatted values
1882             --noclip Do not strip empty sheets and
1883             trailing empty rows and columns
1884             -e <enc> Set encoding for input and output
1885             -b <enc> Set encoding for input
1886             -a <enc> Set encoding for output
1887             Input CSV:
1888             --in-sep=c Set input sep_char for CSV
1889             Input XLS:
1890             --dtfmt=fmt Specify the default date format to replace 'm-d-yy'
1891             the default replacement is 'yyyy-mm-dd'
1892             Output Text (default):
1893             -s <sep> Use separator <sep>. Default '|', \n allowed
1894             -L Line up the columns
1895             -n [skip] Number lines (prefix with column number)
1896             optionally skip <skip> (header) lines
1897             -A Show field attributes in ANSI escapes
1898             -h[#] Show # header lines
1899             Output Index only:
1900             -i Show sheet names and size only
1901             Output CSV:
1902             -c Output CSV, separator = ','
1903             -m Output CSV, separator = ';'
1904             Output HTML:
1905             -H Output HTML
1906             Selection:
1907             -S <sheets> Only print sheets <sheets>. 'all' is a valid set
1908             Default only prints the first sheet
1909             -R <rows> Only print rows <rows>. Default is 'all'
1910             -C <cols> Only print columns <cols>. Default is 'all'
1911             -F <flds> Only fields <flds> e.g. -FA3,B16
1912             Ordering (column numbers in result set *after* selection):
1913             --sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
1914             +# - first # lines do not sort (header)
1915             # - order on column # lexical ascending
1916             #n - order on column # numeric ascending
1917             #r - order on column # lexical descending
1918             #rn - order on column # numeric descending
1919              
1920             =head2 C<xlsgrep>
1921              
1922             Show (parts of) a spreadsheet that match a pattern in plain text, CSV, or HTML
1923              
1924             usage: xlsgrep [-s <sep>] [-L] [-n] [-A] [-u] [Selection] pattern file.xls
1925             [-c | -m] [-u] [Selection] pattern file.xls
1926             -i [-S sheets] pattern file.xls
1927             Generic options:
1928             -v[#] Set verbose level (xlscat/xlsgrep)
1929             -d[#] Set debug level (Spreadsheet::Read)
1930             -u Use unformatted values
1931             --noclip Do not strip empty sheets and
1932             trailing empty rows and columns
1933             -e <enc> Set encoding for input and output
1934             -b <enc> Set encoding for input
1935             -a <enc> Set encoding for output
1936             Input CSV:
1937             --in-sep=c Set input sep_char for CSV
1938             Input XLS:
1939             --dtfmt=fmt Specify the default date format to replace 'm-d-yy'
1940             the default replacement is 'yyyy-mm-dd'
1941             Output Text (default):
1942             -s <sep> Use separator <sep>. Default '|', \n allowed
1943             -L Line up the columns
1944             -n [skip] Number lines (prefix with column number)
1945             optionally skip <skip> (header) lines
1946             -A Show field attributes in ANSI escapes
1947             -h[#] Show # header lines
1948             Grep options:
1949             -i Ignore case
1950             -w Match whole words only
1951             Output CSV:
1952             -c Output CSV, separator = ','
1953             -m Output CSV, separator = ';'
1954             Output HTML:
1955             -H Output HTML
1956             Selection:
1957             -S <sheets> Only print sheets <sheets>. 'all' is a valid set
1958             Default only prints the first sheet
1959             -R <rows> Only print rows <rows>. Default is 'all'
1960             -C <cols> Only print columns <cols>. Default is 'all'
1961             -F <flds> Only fields <flds> e.g. -FA3,B16
1962             Ordering (column numbers in result set *after* selection):
1963             --sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
1964             +# - first # lines do not sort (header)
1965             # - order on column # lexical ascending
1966             #n - order on column # numeric ascending
1967             #r - order on column # lexical descending
1968             #rn - order on column # numeric descending
1969              
1970             =head2 C<xls2csv>
1971              
1972             Convert a spreadsheet to CSV. This is just a small wrapper over C<xlscat>.
1973              
1974             usage: xls2csv [ -o file.csv ] file.xls
1975              
1976             =head2 C<ss2tk>
1977              
1978             Show a spreadsheet in a perl/Tk spreadsheet widget
1979              
1980             usage: ss2tk [-w <width>] [X11 options] file.xls [<pattern>]
1981             -w <width> use <width> as default column width (4)
1982              
1983             =head2 C<ssdiff>
1984              
1985             Show the differences between two spreadsheets.
1986              
1987             usage: examples/ssdiff [--verbose[=1]] file.xls file.xlsx
1988              
1989             =head1 TODO
1990              
1991             =over 4
1992              
1993             =item Options
1994              
1995             =over 2
1996              
1997             =item Module Options
1998              
1999             New Spreadsheet::Read options are bound to happen. I'm thinking of an
2000             option that disables the reading of the data entirely to speed up an
2001             index request (how many sheets/fields/columns). See C<xlscat -i>.
2002              
2003             =item Parser options
2004              
2005             Try to transparently support as many options as the encapsulated modules
2006             support regarding (un)formatted values, (date) formats, hidden columns
2007             rows or fields etc. These could be implemented like C<attr> above but
2008             names C<meta>, or just be new values in the C<attr> hashes.
2009              
2010             =back
2011              
2012             =item Other parsers
2013              
2014             Add support for new(er) parsers for already supported formats, like
2015              
2016             =over 2
2017              
2018             =item Data::XLSX::Parser
2019              
2020             Data::XLSX::Parser provides faster way to parse Microsoft Excel's .xlsx
2021             files. The implementation of this module is highly inspired from Python's
2022             FastXLSX library.
2023              
2024             This is SAX based parser, so you can parse very large XLSX file with
2025             lower memory usage.
2026              
2027             =back
2028              
2029             =item Other spreadsheet formats
2030              
2031             I consider adding any spreadsheet interface that offers a usable API.
2032              
2033             Under investigation:
2034              
2035             =over 2
2036              
2037             =item Gnumeric (.gnumeric)
2038              
2039             I have seen no existing CPAN module yet.
2040              
2041             It is gzip'ed XML
2042              
2043             =item Kspread (.ksp)
2044              
2045             Now knows as Calligra Sheets.
2046              
2047             I have seen no existing CPAN module yet.
2048              
2049             It is XML in ZIP
2050              
2051             =back
2052              
2053             =item Alternative parsers for existing formats
2054              
2055             As long as the alternative has a good reason for its existence, and the
2056             API of that parser reasonable fits in my approach, I will consider to
2057             implement the glue layer, or apply patches to do so as long as these
2058             match what F<CONTRIBUTING.md> describes.
2059              
2060             =back
2061              
2062             =head1 SEE ALSO
2063              
2064             =over 2
2065              
2066             =item Text::CSV_XS, Text::CSV_PP
2067              
2068             See L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> ,
2069             L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP> , and
2070             L<Text::CSV|https://metacpan.org/release/Text-CSV> documentation.
2071              
2072             L<Text::CSV|https://metacpan.org/release/Text-CSV> is a wrapper over Text::CSV_XS (the fast XS version) and/or
2073             L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP> (the pure perl version).
2074              
2075             =item Spreadsheet::ParseExcel
2076              
2077             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> is
2078             the best parser for old-style Microsoft Excel (.xls) files.
2079              
2080             =item Spreadsheet::ParseXLSX
2081              
2082             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> is
2083             like L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>,
2084             but for new Microsoft Excel 2007+ files (.xlsx). They have the same API.
2085              
2086             This module uses L<XML::Twig|https://metacpan.org/release/XML-Twig> to parse the
2087             internal XML.
2088              
2089             =item Spreadsheet::XLSX
2090              
2091             See L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX>
2092             documentation.
2093              
2094             This module is dead and deprecated. It is B<buggy and unmaintained>. I<Please>
2095             use L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX>
2096             instead.
2097              
2098             =item Spreadsheet::ReadSXC
2099              
2100             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> is a
2101             parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files.
2102              
2103             =item Spreadsheet::BasicRead
2104              
2105             See L<Spreadsheet::BasicRead|https://metacpan.org/release/Spreadsheet-BasicRead>
2106             for xlscat-like functionality (Excel only)
2107              
2108             =item Spreadsheet::ConvertAA
2109              
2110             See L<Spreadsheet::ConvertAA|https://metacpan.org/release/Spreadsheet-ConvertAA>
2111             for an alternative set of L</cell2cr>/L</cr2cell> pair.
2112              
2113             =item Spreadsheet::Perl
2114              
2115             L<Spreadsheet::Perl|https://metacpan.org/release/Spreadsheet-Perl> offers a Pure
2116             Perl implementation of a spreadsheet engine. Users that want this format to be
2117             supported in Spreadsheet::Read are hereby motivated to offer patches. It is
2118             not high on my TODO-list.
2119              
2120             =item Spreadsheet::CSV
2121              
2122             L<Spreadsheet::CSV|https://metacpan.org/release/Spreadsheet-CSV> offers the
2123             interesting approach of seeing all supported spreadsheet formats as if it were
2124             CSV, mimicking the L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS>
2125             interface.
2126              
2127             =item xls2csv
2128              
2129             L<xls2csv|https://metacpan.org/release/xls2csv> offers an alternative for my
2130             C<xlscat -c>, in the xls2csv tool, but this tool focuses on character encoding
2131             transparency, and requires some other modules.
2132              
2133             =back
2134              
2135             =head1 AUTHOR
2136              
2137             H.Merijn Brand, <h.m.brand@xs4all.nl>
2138              
2139             =head1 COPYRIGHT AND LICENSE
2140              
2141             Copyright (C) 2005-2019 H.Merijn Brand
2142              
2143             This library is free software; you can redistribute it and/or modify
2144             it under the same terms as Perl itself.
2145              
2146             =cut