File Coverage

blib/lib/Spreadsheet/Read.pm
Criterion Covered Total %
statement 447 523 85.4
branch 384 514 74.7
condition 153 220 69.5
subroutine 46 46 100.0
pod 13 13 100.0
total 1043 1316 79.2


line stmt bran cond sub pod time code
1             #!/pro/bin/perl
2              
3             package Spreadsheet::Read;
4              
5             =head1 NAME
6              
7             Spreadsheet::Read - Read the data from a spreadsheet
8              
9             =head1 SYNOPSIS
10              
11             use Spreadsheet::Read;
12             my $book = ReadData ("test.csv", sep => ";");
13             my $book = ReadData ("test.sxc");
14             my $book = ReadData ("test.ods");
15             my $book = ReadData ("test.xls");
16             my $book = ReadData ("test.xlsx");
17             my $book = ReadData ($fh, parser => "xls");
18              
19             Spreadsheet::Read::add ($book, "sheet.csv");
20              
21             my $sheet = $book->[1]; # first datasheet
22             my $cell = $book->[1]{A3}; # content of field A3 of sheet 1
23             my $cell = $book->[1]{cell}[1][3]; # same, unformatted
24              
25             # OO API
26             my $book = Spreadsheet::Read->new ("file.csv");
27             my $sheet = $book->sheet (1);
28             my $cell = $sheet->cell ("A3");
29             my $cell = $sheet->cell (1, 3);
30              
31             $book->add ("test.xls");
32              
33             =cut
34              
35 53     53   5103204 use 5.8.1;
  53         687  
36 53     53   303 use strict;
  53         132  
  53         1239  
37 53     53   301 use warnings;
  53         132  
  53         3686  
38              
39             our $VERSION = "0.80";
40 1     1 1 190 sub Version { $VERSION }
41              
42 53     53   386 use Carp;
  53         146  
  53         3232  
43 53     53   360 use Exporter;
  53         118  
  53         4581  
44             our @ISA = qw( Exporter );
45             our @EXPORT = qw( ReadData cell2cr cr2cell );
46             our @EXPORT_OK = qw( parses rows cellrow row add );
47              
48 53     53   28812 use Encode qw( decode );
  53         855019  
  53         4019  
49 53     53   45456 use File::Temp qw( );
  53         1101698  
  53         1586  
50 53     53   32181 use Data::Dumper;
  53         326483  
  53         54353  
51              
52             my @parsers = (
53             [ csv => "Text::CSV_XS", "0.71" ],
54             [ csv => "Text::CSV_PP", "1.17" ],
55             [ csv => "Text::CSV", "1.17" ],
56             [ ods => "Spreadsheet::ReadSXC", "0.20" ],
57             [ sxc => "Spreadsheet::ReadSXC", "0.20" ],
58             [ xls => "Spreadsheet::ParseExcel", "0.34" ],
59             [ xlsx => "Spreadsheet::ParseXLSX", "0.24" ],
60             [ xlsx => "Spreadsheet::XLSX", "0.13" ],
61             [ prl => "Spreadsheet::Perl", "" ],
62              
63             # Helper modules
64             [ ios => "IO::Scalar", "" ],
65             [ dmp => "Data::Peek", "" ],
66             );
67             my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers });
68             foreach my $p (@parsers) {
69             my $format = $p->[0];
70             $can{$format} and next;
71             $can{$format} = "";
72             my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next;
73             my $min_version = $can{supports}{$preset};
74             unless ($min_version) {
75             # Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle"
76             $can{$format} = "!$preset is not supported for the $format format";
77             next;
78             }
79             if (eval "local \$_; require $preset" and not $@) {
80             # forcing a parser should still check the version
81             my $ok;
82             my $has = $preset->VERSION;
83             $has =~ s/_[0-9]+$//; # Remove beta-part
84             if ($min_version =~ m/^v([0-9.]+)/) { # clumsy versions
85             my @min = split m/\./ => $1;
86             $has =~ s/^v//;
87             my @has = split m/\./ => $has;
88             $ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >=
89             (($min[0] * 1000 + $min[1]) * 1000 + $min[2]);
90             }
91             else { # normal versions
92             $ok = $has >= $min_version;
93             }
94             $ok or $preset = "!$preset";
95             }
96             else {
97             $preset = "!$preset";
98             }
99             $can{$format} = $preset;
100             }
101             delete $can{supports};
102             for (@parsers) {
103             my ($flag, $mod, $vsn) = @$_;
104             $can{$flag} and next;
105             eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" or
106             $_->[0] = "! Cannot use $mod version $vsn: $@";
107             }
108             $can{sc} = __PACKAGE__; # SquirelCalc is built-in
109              
110             defined $Spreadsheet::ParseExcel::VERSION && $Spreadsheet::ParseExcel::VERSION < 0.61 and
111             *Spreadsheet::ParseExcel::Workbook::get_active_sheet = sub { undef; };
112              
113             my $debug = 0;
114             my %def_opts = (
115             rc => 1,
116             cells => 1,
117             attr => 0,
118             clip => undef, # $opt{cells};
119             strip => 0,
120             pivot => 0,
121             dtfmt => "yyyy-mm-dd", # Format 14
122             debug => 0,
123             passwd => undef,
124             parser => undef,
125             sep => undef,
126             quote => undef,
127             label => undef,
128             );
129             my @def_attr = (
130             type => "text",
131             fgcolor => undef,
132             bgcolor => undef,
133             font => undef,
134             size => undef,
135             format => undef,
136             halign => "left",
137             valign => "top",
138             bold => 0,
139             italic => 0,
140             uline => 0,
141             wrap => 0,
142             merged => 0,
143             hidden => 0,
144             locked => 0,
145             enc => "utf-8", # $ENV{LC_ALL} // $ENV{LANG} // ...
146             formula => undef,
147             );
148              
149             # Helper functions
150              
151             sub _dump {
152 7     7   17 my ($label, $ref) = @_;
153 7 50       22 if ($can{dmp}) {
154 7         42 print STDERR Data::Peek::DDumper ({ $label => $ref });
155             }
156             else {
157 0         0 print STDERR Data::Dumper->Dump ([$ref], [$label]);
158             }
159             } # _dump
160              
161             sub _parser {
162 342 100   342   1370 my $type = shift or return "";
163 100         287 $type = lc $type;
164             # Aliases and fullnames
165 100 100       327 $type eq "excel" and return "xls";
166 99 50       307 $type eq "excel2007" and return "xlsx";
167 99 100       285 $type eq "oo" and return "sxc";
168 98 100       297 $type eq "ods" and return "sxc";
169 96 100       277 $type eq "openoffice" and return "sxc";
170 95 50       278 $type eq "libreoffice" and return "sxc";
171 95 100       278 $type eq "perl" and return "prl";
172 94 50       279 $type eq "squirelcalc" and return "sc";
173 94 100       572 return exists $can{$type} ? $type : "";
174             } # _parser
175              
176             sub new {
177 12     12 1 7235 my $class = shift;
178 12   100     42 my $r = ReadData (@_) || [{
179             parsers => [],
180             error => undef,
181             sheets => 0,
182             sheet => { },
183             }];
184 12         167 bless $r => $class;
185             } # new
186              
187             # Spreadsheet::Read::parses ("csv") or die "Cannot parse CSV"
188             sub parses {
189 63 100   63 1 142213 ref $_[0] eq __PACKAGE__ and shift;
190 63 100       252 my $type = _parser (shift) or return 0;
191 61 100       422 if ($can{$type} =~ m/^!\s*(.*)/) {
192 8         31 $@ = $1;
193 8         29 return 0;
194             }
195 53         181 return $can{$type};
196             } # parses
197              
198             sub sheets {
199 4     4 1 11 my $ctrl = shift->[0];
200 4         8 my %s = %{$ctrl->{sheet}};
  4         17  
201 4 100       32 wantarray ? sort { $s{$a} <=> $s{$b} } keys %s : $ctrl->{sheets};
  4         25  
202             } # sheets
203              
204             # col2label (4) => "D"
205             sub col2label {
206 15697 100   15697 1 33559 ref $_[0] eq __PACKAGE__ and shift;
207 15697         20665 my $c = shift;
208 15697 100 100     40185 defined $c && $c > 0 or return "";
209 15693         21147 my $cell = "";
210 15693         25964 while ($c) {
211 53     53   28205 use integer;
  53         806  
  53         290  
212              
213 15710         29819 substr $cell, 0, 0, chr (--$c % 26 + ord "A");
214 15710         28222 $c /= 26;
215             }
216 15693         41640 $cell;
217             } # col2label
218              
219             # cr2cell (4, 18) => "D18"
220             # No prototype to allow 'cr2cell (@rowcol)'
221             sub cr2cell {
222 15495 100   15495 1 186261 ref $_[0] eq __PACKAGE__ and shift;
223 15495         24713 my ($c, $r) = @_;
224 15495 100 100     71533 defined $c && defined $r && $c > 0 && $r > 0 or return "";
      100        
      100        
225 15489         23358 col2label ($c) . $r;
226             } # cr2cell
227              
228             # cell2cr ("D18") => (4, 18)
229             sub cell2cr {
230 604 100   604 1 585639 ref $_[0] eq __PACKAGE__ and shift;
231 604 100 100     5510 my ($cc, $r) = (uc ($_[0]||"") =~ m/^([A-Z]+)([0-9]+)$/) or return (0, 0);
232 600         1208 my $c = 0;
233 600         2466 while ($cc =~ s/^([A-Z])//) {
234 608         2421 $c = 26 * $c + 1 + ord ($1) - ord ("A");
235             }
236 600         2043 ($c, $r);
237             } # cell2cr
238              
239             # my @row = cellrow ($book->[1], 1);
240             # my @row = $book->cellrow (1, 1);
241             sub cellrow {
242 10 100   10 1 66 my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
    100          
243 9 100 66     63 ref $sheet eq "HASH" && exists $sheet->{cell} or return;
244 7 50 33     53 exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
245 7 100       27 my $row = shift or return;
246 6 100 100     62 $row > 0 && $row <= $sheet->{maxrow} or return;
247 4         13 my $s = $sheet->{cell};
248 4         15 map { $s->[$_][$row] } 1..$sheet->{maxcol};
  76         143  
249             } # cellrow
250              
251             # my @row = row ($book->[1], 1);
252             sub row {
253 10 100   10 1 3241 my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
    100          
254 9 100 66     67 ref $sheet eq "HASH" && exists $sheet->{cell} or return;
255 7 50 33     36 exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
256 7 100       24 my $row = shift or return;
257 6 100 100     38 $row > 0 && $row <= $sheet->{maxrow} or return;
258 4         20 map { $sheet->{cr2cell ($_, $row)} } 1..$sheet->{maxcol};
  76         136  
259             } # row
260              
261             # Convert {cell}'s [column][row] to a [row][column] list
262             # my @rows = rows ($book->[1]);
263             sub rows {
264 33 50   33 1 19187 my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
    100          
265 30 100 100     170 ref $sheet eq "HASH" && exists $sheet->{cell} or return;
266 26 100 100     118 exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
267 5         16 my $s = $sheet->{cell};
268              
269             map {
270 20         35 my $r = $_;
271 20         38 [ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
  80         204  
272 5         20 } 1..$sheet->{maxrow};
273             } # rows
274              
275             sub sheet {
276 23     23 1 611 my ($book, $sheet) = @_;
277 23 100 100     118 $book && $sheet or return;
278 21         38 my $class = "Spreadsheet::Read::Sheet";
279             $sheet =~ m/^[0-9]+$/ && $sheet >= 1 && $sheet <= $book->[0]{sheets} and
280 21 100 66     276 return bless $book->[$sheet] => $class;
      100        
281             exists $book->[0]{sheet}{$sheet} and
282 6 100       27 return bless $book->[$book->[0]{sheet}{$sheet}] => $class;
283 5         13 foreach my $idx (1 .. $book->[0]{sheets}) {
284 6 100       23 $book->[$idx]{label} eq $sheet and
285             return bless $book->[$idx] => $class;
286             }
287 4         19 return;
288             } # sheet
289              
290             # If option "clip" is set, remove the trailing rows and
291             # columns in each sheet that contain no visible data
292             sub _clipsheets {
293 227     227   537 my ($opt, $ref) = @_;
294              
295 227 50       758 $ref->[0]{sheets} or return $ref;
296              
297 227         615 my ($rc, $cl) = ($opt->{rc}, $opt->{cells});
298 227         601 my ($oc, $os, $oa) = ($opt->{clip}, $opt->{strip}, $opt->{attr});
299              
300             # Strip leading/trailing spaces
301 227 100 100     949 if ($os || $oc) {
302 212         646 foreach my $sheet (1 .. $ref->[0]{sheets}) {
303 254         574 $ref->[$sheet]{indx} = $sheet;
304 254         438 my $ss = $ref->[$sheet];
305 254 100 66     1064 $ss->{maxrow} && $ss->{maxcol} or next;
306 247         540 my ($mc, $mr) = (0, 0);
307 247         646 foreach my $row (1 .. $ss->{maxrow}) {
308 1954         3257 foreach my $col (1 .. $ss->{maxcol}) {
309 16636 100       25590 if ($rc) {
310 14738 100       26748 defined $ss->{cell}[$col][$row] or next;
311 5881 100       10614 $os & 2 and $ss->{cell}[$col][$row] =~ s/\s+$//;
312 5881 100       9558 $os & 1 and $ss->{cell}[$col][$row] =~ s/^\s+//;
313 5881 100       10479 if (length $ss->{cell}[$col][$row]) {
314 4417 100       7282 $col > $mc and $mc = $col;
315 4417 100       7639 $row > $mr and $mr = $row;
316             }
317             }
318 7779 100       12105 if ($cl) {
319 7560         11401 my $cell = cr2cell ($col, $row);
320 7560 100       15671 defined $ss->{$cell} or next;
321 6233 100       11208 $os & 2 and $ss->{$cell} =~ s/\s+$//;
322 6233 100       9893 $os & 1 and $ss->{$cell} =~ s/^\s+//;
323 6233 100       11233 if (length $ss->{$cell}) {
324 4757 100       7852 $col > $mc and $mc = $col;
325 4757 100       8925 $row > $mr and $mr = $row;
326             }
327             }
328             }
329             }
330              
331 247 100 66     1341 $oc && ($mc < $ss->{maxcol} || $mr < $ss->{maxrow}) or next;
      66        
332              
333             # Remove trailing empty columns
334 106         252 foreach my $col (($mc + 1) .. $ss->{maxcol}) {
335 243 100       564 $rc and undef $ss->{cell}[$col];
336 243 100       474 $oa and undef $ss->{attr}[$col];
337 243 100       444 $cl or next;
338 193         308 my $c = col2label ($col);
339 193         1425 delete $ss->{"$c$_"} for 1 .. $ss->{maxrow};
340             }
341              
342             # Remove trailing empty rows
343 106         247 foreach my $row (($mr + 1) .. $ss->{maxrow}) {
344 139         258 foreach my $col (1 .. $mc) {
345 64 50       203 $cl and delete $ss->{cr2cell ($col, $row)};
346 64 50       168 $rc and undef $ss->{cell} [$col][$row];
347 64 50       133 $oa and undef $ss->{attr} [$col][$row];
348             }
349             }
350              
351 106         314 ($ss->{maxrow}, $ss->{maxcol}) = ($mr, $mc);
352             }
353             }
354              
355 227 100       664 if ($opt->{pivot}) {
356 1         5 foreach my $sheet (1 .. $ref->[0]{sheets}) {
357 1         2 my $ss = $ref->[$sheet];
358 1 0 33     5 $ss->{maxrow} || $ss->{maxcol} or next;
359 1 50       4 my $mx = $ss->{maxrow} > $ss->{maxcol} ? $ss->{maxrow} : $ss->{maxcol};
360 1         4 foreach my $row (2 .. $mx) {
361 18         37 foreach my $col (1 .. ($row - 1)) {
362             $opt->{rc} and
363             ($ss->{cell}[$col][$row], $ss->{cell}[$row][$col]) =
364 171 50       569 ($ss->{cell}[$row][$col], $ss->{cell}[$col][$row]);
365             $opt->{cells} and
366             ($ss->{cr2cell ($col, $row)}, $ss->{cr2cell ($row, $col)}) =
367 171 50       361 ($ss->{cr2cell ($row, $col)}, $ss->{cr2cell ($col, $row)});
368             }
369             }
370 1         4 ($ss->{maxcol}, $ss->{maxrow}) = ($ss->{maxrow}, $ss->{maxcol});
371             }
372             }
373              
374 227         5153 $ref;
375             } # _clipsheets
376              
377             # Convert a single color (index) to a color
378             sub _xls_color {
379 1742     1742   20358 my $clr = shift;
380 1742 100       3571 defined $clr or return undef;
381 850 50       1553 $clr eq "#000000" and return undef;
382 850 50       1620 $clr =~ m/^#[0-9a-fA-F]+$/ and return lc $clr;
383 850 100 66     2527 $clr == 0 || $clr == 32767 and return undef; # Default fg color
384 468         1097 return "#" . lc Spreadsheet::ParseExcel->ColorIdxToRGB ($clr);
385             } # _xls_color
386              
387             # Convert a fill [ $pattern, $front_color, $back_color ] to a single background
388             sub _xls_fill {
389 1526     1526   2714 my ($p, $fg, $bg) = @_;
390 1526 50       2711 defined $p or return undef;
391 1526 50       2593 $p == 32767 and return undef; # Default fg color
392 1526 100 100     16518 $p == 0 && !defined $bg and return undef; # No fill bg color
393 634 100       1117 $p == 1 and return _xls_color ($fg);
394 418 50 33     6189 $bg < 8 || $bg > 63 and return undef; # see Workbook.pm#106
395 0         0 return _xls_color ($bg);
396             } # _xls_fill
397              
398             sub ReadData {
399 279 100   279 1 285361 my $txt = shift or return;
400              
401 276         551 my %opt;
402 276 100       750 if (@_) {
403 221 100       1080 if (ref $_[0] eq "HASH") { %opt = %{shift @_} }
  20 50       30  
  20         77  
404 201         842 elsif (@_ % 2 == 0) { %opt = @_ }
405             }
406              
407 276 100       1032 exists $opt{rc} or $opt{rc} = $def_opts{rc};
408 276 100       812 exists $opt{cells} or $opt{cells} = $def_opts{cells};
409 276 100       775 exists $opt{attr} or $opt{attr} = $def_opts{attr};
410 276 100       749 exists $opt{clip} or $opt{clip} = $opt{cells};
411 276 100       741 exists $opt{strip} or $opt{strip} = $def_opts{strip};
412 276 100       814 exists $opt{dtfmt} or $opt{dtfmt} = $def_opts{dtfmt};
413              
414             # $debug = $opt{debug} // 0;
415 276 100       737 $debug = defined $opt{debug} ? $opt{debug} : $def_opts{debug};
416 276 100       710 $debug > 4 and _dump (Options => \%opt);
417              
418 31         95 my %parser_opts = map { $_ => $opt{$_} }
419 276         2541 grep { !exists $def_opts{$_} }
  1764         3503  
420             keys %opt;
421              
422 276         1036 my $_parser = _parser ($opt{parser});
423              
424 276 100       1066 my $io_ref = ref ($txt) =~ m/GLOB|IO/ ? $txt : undef;
425             my $io_fil = $io_ref ? 0 : $txt =~ m/\0/ ? 0
426 53 100   53   111422 : do { no warnings "newline"; -f $txt };
  53 100       174  
  53         264656  
  276         1023  
  255         5495  
427 276 100 100     1722 my $io_txt = $io_ref || $io_fil ? 0 : 1;
428              
429 276 100 100     3194 $io_fil && ! -s $txt and return;
430 265 100 100     1450 $io_ref && eof ($txt) and return;
431              
432 260 100 100     1863 if ($opt{parser} ? $_parser eq "csv" : ($io_fil && $txt =~ m/\.(csv)$/i)) {
    100          
433 106 50       395 $can{csv} or croak "CSV parser not installed";
434              
435 106 100       439 my $label = defined $opt{label} ? $opt{label} : $io_fil ? $txt : "IO";
    100          
436              
437 106 50       366 $debug and print STDERR "Opening CSV $label using $can{csv}-", $can{csv}->VERSION, "\n";
438              
439             my @data = (
440             { type => "csv",
441             parser => $can{csv},
442             version => $can{csv}->VERSION,
443             parsers => [ {
444             type => "csv",
445             parser => $can{csv},
446             version => $can{csv}->VERSION,
447 106         3064 }],
448             error => undef,
449             quote => '"',
450             sepchar => ',',
451             sheets => 1,
452             sheet => { $label => 1 },
453             },
454             { parser => 0,
455             label => $label,
456             maxrow => 0,
457             maxcol => 0,
458             cell => [],
459             attr => [],
460             merged => [],
461             active => 1,
462             },
463             );
464              
465 106         540 my ($sep, $quo, $in) = (",", '"');
466 106 100       325 defined $opt{sep} and $sep = $opt{sep};
467 106 100       244 defined $opt{quote} and $quo = $opt{quote};
468 106 50       264 $debug > 8 and _dump (debug => {
469             data => \@data, txt => $txt, io_ref => $io_ref, io_fil => $io_fil });
470 106 100       254 if ($io_fil) {
    100          
    100          
    50          
471 94 100 100     329 unless (defined $opt{quote} && defined $opt{sep}) {
472 87 50       3393 open $in, "<", $txt or return;
473 87         1646 my $l1 = <$in>;
474              
475 87 100       419 $quo = defined $opt{quote} ? $opt{quote} : '"';
476             $sep = # If explicitly set, use it
477             defined $opt{sep} ? $opt{sep} :
478             # otherwise start auto-detect with quoted strings
479 87 100       786 $l1 =~ m/["0-9];["0-9;]/ ? ";" :
    100          
    50          
    50          
    100          
    100          
    50          
480             $l1 =~ m/["0-9],["0-9,]/ ? "," :
481             $l1 =~ m/["0-9]\t["0-9,]/ ? "\t" :
482             # If neither, then for unquoted strings
483             $l1 =~ m/\w;[\w;]/ ? ";" :
484             $l1 =~ m/\w,[\w,]/ ? "," :
485             $l1 =~ m/\w\t[\w,]/ ? "\t" :
486             "," ;
487 87         1243 close $in;
488             }
489 94 50       2630 open $in, "<", $txt or return;
490             }
491             elsif ($io_ref) {
492 10         22 $in = $txt;
493             }
494             elsif (ref $txt eq "SCALAR") {
495 1 50       18 open $in, "<", $txt or croak "Cannot open input: $!";
496             }
497             elsif ($txt =~ m/[\r\n,;]/) {
498 1 50   2   59 open $in, "<", \$txt or croak "Cannot open input: $!";
  2         21  
  2         5  
  2         18  
499             }
500             else {
501 0         0 warn "Input type ", ref $txt,
502             " might not be supported. Please file a ticket\n";
503 0         0 $in = $txt; # Now pray ...
504             }
505 106 50       1481 $debug > 1 and print STDERR "CSV sep_char '$sep', quote_char '$quo'\n";
506             my $csv = $can{csv}->new ({
507             %parser_opts,
508              
509             sep_char => ($data[0]{sepchar} = $sep),
510 106 50       1339 quote_char => ($data[0]{quote} = $quo),
511             keep_meta_info => 1,
512             binary => 1,
513             auto_diag => 1,
514             }) or croak "Cannot create a csv ('$sep', '$quo') parser!";
515              
516 106         22143 while (my $row = $csv->getline ($in)) {
517 413 50       70327 my @row = @$row or last;
518              
519 413         886 my $r = ++$data[1]{maxrow};
520 413 100       1020 @row > $data[1]{maxcol} and $data[1]{maxcol} = @row;
521 413         1097 foreach my $c (0 .. $#row) {
522 2427         3329 my $val = $row[$c];
523 2427         4225 my $cell = cr2cell ($c + 1, $r);
524 2427 100       6746 $opt{rc} and $data[1]{cell}[$c + 1][$r] = $val;
525 2427 100       6095 $opt{cells} and $data[1]{$cell} = $val;
526 2427 50       12137 $opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
527             }
528             }
529 106 50       14130 $csv->eof () or $data[0]{error} = [ $csv->error_diag ];
530 106         1593 close $in;
531              
532 106         315 for (@{$data[1]{cell}}) {
  106         517  
533 734 100       1602 defined or $_ = [];
534             }
535 106         482 return _clipsheets \%opt, [ @data ];
536             }
537              
538 154 100       411 if ($io_txt) { # && $_parser !~ m/^xlsx?$/) {
539 29 100 66     296 if ( # /etc/magic: Microsoft Office Document
    50 66        
540             $txt =~ m{\A(\376\067\0\043
541             |\320\317\021\340\241\261\032\341
542             |\333\245-\0\0\0)}x
543             # /usr/share/misc/magic
544             || $txt =~ m{\A.{2080}Microsoft Excel 5.0 Worksheet}
545             || $txt =~ m{\A\x09\x04\x06\x00\x00\x00\x10\x00}
546             ) {
547 1 50       5 $can{xls} or croak "Spreadsheet::ParseExcel not installed";
548 1         2 my $tmpfile;
549 1 50       4 if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
550 1         3 $tmpfile = \$txt;
551             }
552             else {
553 0         0 $tmpfile = File::Temp->new (SUFFIX => ".xls", UNLINK => 1);
554 0         0 binmode $tmpfile;
555 0         0 print $tmpfile $txt;
556 0         0 close $tmpfile;
557             }
558 1 50       53 open $io_ref, "<", $tmpfile or return;
559 1         981 $io_txt = 0;
560 1         6 $_parser = _parser ($opt{parser} = "xls");
561             }
562             elsif ( # /usr/share/misc/magic
563             $txt =~ m{\APK\003\004.{4,30}(?:\[Content_Types\]\.xml|_rels/\.rels)}
564             ) {
565 0 0       0 $can{xlsx} or croak "XLSX parser not installed";
566 0         0 my $tmpfile;
567 0 0       0 if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
568 0         0 $tmpfile = \$txt;
569             }
570             else {
571 0         0 $tmpfile = File::Temp->new (SUFFIX => ".xlsx", UNLINK => 1);
572 0         0 binmode $tmpfile;
573 0         0 print $tmpfile $txt;
574 0         0 close $tmpfile;
575             }
576 0 0       0 open $io_ref, "<", $tmpfile or return;
577 0         0 $io_txt = 0;
578 0         0 $_parser = _parser ($opt{parser} = "xlsx");
579             }
580             }
581 154 100 66     1500 if ($opt{parser} ? $_parser =~ m/^xlsx?$/
    100          
582             : ($io_fil && $txt =~ m/\.(xlsx?)$/i && ($_parser = $1))) {
583 75 100       339 my $parse_type = $_parser =~ m/x$/i ? "XLSX" : "XLS";
584 75 50       329 my $parser = $can{lc $parse_type} or
585             croak "Parser for $parse_type is not installed";
586             $debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt,
587 75 50       350 " using $parser-", $can{lc $parse_type}->VERSION, "\n";
    100          
588 75 50       241 $opt{passwd} and $parser_opts{Password} = $opt{passwd};
589 75         194 my $oBook = eval {
590             $io_ref
591             ? $parse_type eq "XLSX"
592             ? $can{xlsx} =~ m/::XLSX$/
593             ? $parser->new ($io_ref)
594             : $parser->new (%parser_opts)->parse ($io_ref)
595             : $parser->new (%parser_opts)->Parse ($io_ref)
596             : $parse_type eq "XLSX"
597 75 0       854 ? $can{xlsx} =~ m/::XLSX$/
    50          
    50          
    100          
    100          
598             ? $parser->new ($txt)
599             : $parser->new (%parser_opts)->parse ($txt)
600             : $parser->new (%parser_opts)->Parse ($txt);
601             };
602 75 50       2252417 unless ($oBook) {
603             # cleanup will fail on folders with spaces.
604 0         0 (my $msg = $@) =~ s/ at \S+ line \d+.*//s;
605 0         0 croak "$parse_type parser cannot parse data: $msg";
606             }
607 75 50       318 $debug > 8 and _dump (oBook => $oBook);
608              
609             # WorkBook keys:
610             # aColor _CurSheet Format SheetCount
611             # ActiveSheet _CurSheet_ FormatStr _skip_chart
612             # Author File NotSetCell _string_contin
613             # BIFFVersion Flg1904 Object Version
614             # _buffer FmtClass PkgStr Worksheet
615             # CellHandler Font _previous_info
616              
617             my @data = ( {
618             type => lc $parse_type,
619             parser => $can{lc $parse_type},
620             version => $can{lc $parse_type}->VERSION,
621             parsers => [{
622             type => lc $parse_type,
623             parser => $can{lc $parse_type},
624             version => $can{lc $parse_type}->VERSION,
625             }],
626             error => undef,
627 75   50     2305 sheets => $oBook->{SheetCount} || 0,
628             sheet => {},
629             } );
630             # $debug and $data[0]{_parser} = $oBook;
631             # Overrule the default date format strings
632             my %def_fmt = (
633             0x0E => lc $opt{dtfmt}, # m-d-yy
634 75         636 0x0F => "d-mmm-yyyy", # d-mmm-yy
635             0x11 => "mmm-yyyy", # mmm-yy
636             0x16 => "yyyy-mm-dd hh:mm", # m-d-yy h:mm
637             );
638 75         538 $oBook->{FormatStr}{$_} = $def_fmt{$_} for keys %def_fmt;
639             my $oFmt = $parse_type eq "XLSX"
640 75 50       742 ? $can{xlsx} =~ m/::XLSX$/
    100          
641             ? Spreadsheet::XLSX::Fmt2007->new
642             : Spreadsheet::ParseExcel::FmtDefault->new
643             : Spreadsheet::ParseExcel::FmtDefault->new;
644              
645 75 100       846 $debug and print STDERR "\t$data[0]{sheets} sheets\n";
646             my $active_sheet = $oBook->get_active_sheet
647             || $oBook->{ActiveSheet}
648 75   33     376 || $oBook->{SelectedSheet};
649 75         826 my $current_sheet = 0;
650 75         146 foreach my $oWkS (@{$oBook->{Worksheet}}) {
  75         232  
651 121         192 $current_sheet++;
652 121 100 100     676 $opt{clip} and !defined $oWkS->{Cells} and next; # Skip empty sheets
653             my %sheet = (
654             parser => 0,
655             label => $oWkS->{Name},
656 115         762 maxrow => 0,
657             maxcol => 0,
658             cell => [],
659             attr => [],
660             merged => [],
661             active => 0,
662             );
663             # $debug and $sheet{_parser} = $oWkS;
664 115 50       401 defined $sheet{label} or $sheet{label} = "-- unlabeled --";
665 115 100       404 exists $oWkS->{MinRow} and $sheet{minrow} = $oWkS->{MinRow} + 1;
666 115 100       346 exists $oWkS->{MaxRow} and $sheet{maxrow} = $oWkS->{MaxRow} + 1;
667 115 100       420 exists $oWkS->{MinCol} and $sheet{mincol} = $oWkS->{MinCol} + 1;
668 115 100       310 exists $oWkS->{MaxCol} and $sheet{maxcol} = $oWkS->{MaxCol} + 1;
669             $sheet{merged} = [
670 12         35 map { $_->[0] }
671 4         30 sort { $a->[1] cmp $b->[1] }
672 12         106 map {[ $_, pack "NNNN", @$_ ]}
673 12         90 map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]}
  48         133  
  12         38  
674 115 100       235 @{$oWkS->get_merged_areas || []}];
  115         460  
675 115         1135 my $sheet_idx = 1 + @data;
676 115 100       345 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
677 115 100       301 if (defined $active_sheet) {
678             # _SheetNo is 0-based
679 7 50       23 my $sheet_no = defined $oWkS->{_SheetNo} ? $oWkS->{_SheetNo} : $current_sheet - 1;
680 7 100       21 $sheet_no eq $active_sheet and $sheet{active} = 1;
681             }
682             # Sheet keys:
683             # _Book FooterMargin MinCol RightMargin
684             # BottomMargin FooterMergin MinRow RightMergin
685             # BottomMergin HCenter Name RowHeight
686             # Cells Header NoColor RowHidden
687             # ColFmtNo HeaderMargin NoOrient Scale
688             # ColHidden HeaderMergin NoPls SheetHidden
689             # ColWidth Kind Notes _SheetNo
690             # Copis Landscape PageFit SheetType
691             # DefColWidth LeftMargin PageStart SheetVersion
692             # DefRowHeight LeftMergin PaperSize TopMargin
693             # Draft LeftToRight _Pos TopMergin
694             # FitHeight MaxCol PrintGrid UsePage
695             # FitWidth MaxRow PrintHeaders VCenter
696             # Footer MergedArea Res VRes
697 115 100       363 if (exists $oWkS->{MinRow}) {
698 110   50     452 my $hiddenRows = $oWkS->{RowHidden} || [];
699 110   50     434 my $hiddenCols = $oWkS->{ColHidden} || [];
700 110 100       310 if ($opt{clip}) {
701 91         227 my ($mr, $mc) = (-1, -1);
702 91         286 foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
703 516         923 foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
704 2908 100       5507 my $oWkC = $oWkS->{Cells}[$r][$c] or next;
705 1701 50       3190 defined (my $val = $oWkC->{Val}) or next;
706 1701 100       3027 $val eq "" and next;
707 1679 100       2759 $r > $mr and $mr = $r;
708 1679 100       3003 $c > $mc and $mc = $c;
709             }
710             }
711 91         318 ($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1);
712             }
713 110         284 foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
714 597         1157 foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
715 3132 100       12173 my $oWkC = $oWkS->{Cells}[$r][$c] or next;
716             #defined (my $val = $oWkC->{Val}) or next;
717 1802         2785 my $val = $oWkC->{Val};
718 1802 50 33     6026 if (defined $val and my $enc = $oWkC->{Code}) {
719 0 0       0 $enc eq "ucs2" and $val = decode ("utf-16be", $val);
720             }
721 1802         3920 my $cell = cr2cell ($c + 1, $r + 1);
722 1802 100       5439 $opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original
723              
724 1802         2461 my $fmt;
725 1802         2713 my $FmT = $oWkC->{Format};
726 1802 100       3091 if ($FmT) {
727 952 100       1911 unless (ref $FmT) {
728 130         176 $fmt = $FmT;
729 130         211 $FmT = {};
730             }
731             }
732             else {
733 850         1697 $FmT = {};
734             }
735 1802         3104 foreach my $attr (qw( AlignH AlignV FmtIdx Hidden Lock
736             Wrap )) {
737 10812 100       20807 exists $FmT->{$attr} or $FmT->{$attr} = 0;
738             }
739 1802 100       3602 exists $FmT->{Fill} or $FmT->{Fill} = [ 0 ];
740 1802 100       3941 exists $FmT->{Font} or $FmT->{Font} = undef;
741              
742 1802 100       3285 unless (defined $fmt) {
743             $fmt = $FmT->{FmtIdx}
744             ? $oBook->{FormatStr}{$FmT->{FmtIdx}}
745 1672 100       3099 : undef;
746             }
747 1802 100       3476 if ($oWkC->{Type} eq "Numeric") {
748             # Fixed in 0.33 and up
749             # see Spreadsheet/ParseExcel/FmtDefault.pm
750             $FmT->{FmtIdx} == 0x0e ||
751             $FmT->{FmtIdx} == 0x0f ||
752             $FmT->{FmtIdx} == 0x10 ||
753             $FmT->{FmtIdx} == 0x11 ||
754             $FmT->{FmtIdx} == 0x16 ||
755             (defined $fmt && $fmt =~ m{^[dmy][-\\/dmy]*$}) and
756 370 100 33     3261 $oWkC->{Type} = "Date";
      33        
      33        
      33        
      100        
      66        
757             $FmT->{FmtIdx} == 0x09 ||
758             $FmT->{FmtIdx} == 0x0a ||
759             (defined $fmt && $fmt =~ m{^0+\.0+%$}) and
760 370 100 100     2000 $oWkC->{Type} = "Percentage";
      100        
      100        
761             }
762 1802 100       3463 defined $fmt and $fmt =~ s/\\//g;
763             $opt{cells} and # Formatted value
764             $sheet{$cell} = defined $val ? $FmT && exists $def_fmt{$FmT->{FmtIdx}}
765 1802 100 66     9742 ? $oFmt->ValFmt ($oWkC, $oBook)
    50          
    100          
766             : $oWkC->Value : undef;
767 1802 100       15126 if ($opt{attr}) {
768 1526         2210 my $FnT = $FmT->{Font};
769             my $fmi = $FmT->{FmtIdx}
770             ? $oBook->{FormatStr}{$FmT->{FmtIdx}}
771 1526 100       2594 : undef;
772 1526 100       2717 $fmi and $fmi =~ s/\\//g;
773             $sheet{attr}[$c + 1][$r + 1] = {
774             @def_attr,
775              
776             type => lc $oWkC->{Type},
777             enc => $oWkC->{Code},
778             merged => (defined $oWkC->{Merged} ? $oWkC->{Merged} : $oWkC->is_merged) || 0,
779             hidden => ($hiddenRows->[$r] || $hiddenCols->[$c] ? 1 :
780             defined $oWkC->{Hidden} ? $oWkC->{Hidden} : $FmT->{Hidden}) || 0,
781             locked => $FmT->{Lock} || 0,
782             format => $fmi,
783             halign => [ undef, qw( left center right
784             fill justify ), undef,
785             "equal_space" ]->[$FmT->{AlignH}],
786             valign => [ qw( top center bottom justify
787             equal_space )]->[$FmT->{AlignV}],
788             wrap => $FmT->{Wrap},
789             font => $FnT->{Name},
790             size => $FnT->{Height},
791             bold => $FnT->{Bold},
792             italic => $FnT->{Italic},
793             uline => $FnT->{Underline},
794             fgcolor => _xls_color ($FnT->{Color}),
795 1526         6032 bgcolor => _xls_fill (@{$FmT->{Fill}}),
796             formula => $oWkC->{Formula},
797 1526   100     4827 };
      100        
      100        
798             #_dump "cell", $sheet{attr}[$c + 1][$r + 1];
799             }
800             }
801             }
802             }
803 115         206 for (@{$sheet{cell}}) {
  115         353  
804 409 100       1072 defined or $_ = [];
805             }
806 115         1919 push @data, { %sheet };
807             # $data[0]{sheets}++;
808 115 50       534 if ($sheet{label} eq "-- unlabeled --") {
809 0         0 $sheet{label} = "";
810             }
811             else {
812 115         739 $data[0]{sheet}{$sheet{label}} = $#data;
813             }
814             }
815 75         361 return _clipsheets \%opt, [ @data ];
816             }
817              
818 79 100       438 if ($opt{parser} ? _parser ($opt{parser}) eq "sc"
    100          
    100          
819             : $io_fil
820             ? $txt =~ m/\.sc$/
821             : $txt =~ m/^# .*SquirrelCalc/) {
822 46 100       113 if ($io_ref) {
    100          
823 2         8 local $/;
824 2         39 my $x = <$txt>;
825 2         11 $txt = $x;
826             }
827             elsif ($io_fil) {
828 42         168 local $/;
829 42 50       1449 open my $sc, "<", $txt or return;
830 42         1623 $txt = <$sc>;
831 42         623 close $sc;
832             }
833 46 50       282 $txt =~ m/\S/ or return;
834 46 50       148 my $label = defined $opt{label} ? $opt{label} : "sheet";
835 46         571 my @data = (
836             { type => "sc",
837             parser => "Spreadsheet::Read",
838             version => $VERSION,
839             parsers => [{
840             type => "sc",
841             parser => "Spreadsheet::Read",
842             version => $VERSION,
843             }],
844             error => undef,
845             sheets => 1,
846             sheet => { $label => 1 },
847             },
848             { parser => 0,
849             label => $label,
850             maxrow => 0,
851             maxcol => 0,
852             cell => [],
853             attr => [],
854             merged => [],
855             active => 1,
856             },
857             );
858              
859 46         9680 for (split m/\s*[\r\n]\s*/, $txt) {
860 13478 100       21993 if (m/^dimension.*of ([0-9]+) rows.*of ([0-9]+) columns/i) {
861 46         93 @{$data[1]}{qw(maxrow maxcol)} = ($1, $2);
  46         249  
862 46         108 next;
863             }
864 13432 100       32226 s/^r([0-9]+)c([0-9]+)\s*=\s*// or next;
865 3542         6329 my ($c, $r) = map { $_ + 1 } $2, $1;
  7084         15809  
866 3542 100 66     15887 if (m/.* \{(.*)}$/ or m/"(.*)"/) {
867 2714         5160 my $cell = cr2cell ($c, $r);
868 2714 100       8875 $opt{rc} and $data[1]{cell}[$c][$r] = $1;
869 2714 50       7472 $opt{cells} and $data[1]{$cell} = $1;
870 2714 100       6448 $opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
871 2714         4592 next;
872             }
873             # Now only formula's remain. Ignore for now
874             # r67c7 = [P2L] 2*(1000*r67c5-60)
875             }
876 46         761 for (@{$data[1]{cell}}) {
  46         154  
877 360 100       723 defined or $_ = [];
878             }
879 46         223 return _clipsheets \%opt, [ @data ];
880             }
881              
882 33 50 66     907 if ($opt{parser} ? _parser ($opt{parser}) eq "sxc"
    100          
883             : ($txt =~ m/^<\?xml/ or -f $txt)) {
884 7 50       984 $can{sxc} or croak "Spreadsheet::ReadSXC not installed";
885 0 0       0 ref $txt and
886             croak ("Sorry, references as input are not (yet) supported by Spreadsheet::ReadSXC");
887              
888 0         0 my $using = "using $can{sxc}-" . $can{sxc}->VERSION;
889 0         0 my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up
890 0         0 my $sxc;
891 0 0 0     0 if ($txt =~ m/\.(sxc|ods)$/i) {
    0          
    0          
892 0 0       0 $debug and print STDERR "Opening \U$1\E $txt $using\n";
893 0 0       0 $sxc = Spreadsheet::ReadSXC::read_sxc ($txt, $sxc_options) or return;
894             }
895             elsif ($txt =~ m/\.xml$/i) {
896 0 0       0 $debug and print STDERR "Opening XML $txt $using\n";
897 0 0       0 $sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return;
898             }
899             # need to test on pattern to prevent stat warning
900             # on filename with newline
901             elsif ($txt !~ m/^<\?xml/i and -f $txt) {
902 0 0       0 $debug and print STDERR "Opening XML $txt $using\n";
903 0 0       0 open my $f, "<", $txt or return;
904 0         0 local $/;
905 0         0 $txt = <$f>;
906 0         0 close $f;
907             }
908 0 0 0     0 !$sxc && $txt =~ m/^<\?xml/i and
909             $sxc = Spreadsheet::ReadSXC::read_xml_string ($txt, $sxc_options);
910 0 0       0 $debug > 8 and _dump (sxc => $sxc);
911 0 0       0 if ($sxc) {
912 0         0 my @data = ( {
913             type => "sxc",
914             parser => "Spreadsheet::ReadSXC",
915             version => $Spreadsheet::ReadSXC::VERSION,
916             parsers => [{
917             type => "sxc",
918             parser => "Spreadsheet::ReadSXC",
919             version => $Spreadsheet::ReadSXC::VERSION,
920             }],
921             error => undef,
922             sheets => 0,
923             sheet => {},
924             } );
925             my @sheets = ref $sxc eq "HASH" # < 0.20
926             ? map {
927             { label => $_,
928 0         0 data => $sxc->{$_},
929             }
930             } keys %$sxc
931 0 0       0 : @{$sxc};
  0         0  
932 0         0 foreach my $sheet (@sheets) {
933 0         0 my @sheet = @{$sheet->{data}};
  0         0  
934             my %sheet = (
935             parser => 0,
936             label => $sheet->{label},
937 0         0 maxrow => scalar @sheet,
938             maxcol => 0,
939             cell => [],
940             attr => [],
941             merged => [],
942             active => 0,
943             );
944 0         0 my $sheet_idx = 1 + @data;
945 0 0       0 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} rows\n";
946 0         0 foreach my $r (0 .. $#sheet) {
947 0 0       0 my @row = @{$sheet[$r]} or next;
  0         0  
948 0         0 foreach my $c (0 .. $#row) {
949 0 0       0 defined (my $val = $row[$c]) or next;
950 0         0 my $C = $c + 1;
951 0 0       0 $C > $sheet{maxcol} and $sheet{maxcol} = $C;
952 0         0 my $cell = cr2cell ($C, $r + 1);
953 0 0       0 $opt{rc} and $sheet{cell}[$C][$r + 1] = $val;
954 0 0       0 $opt{cells} and $sheet{$cell} = $val;
955 0 0       0 $opt{attr} and $sheet{attr}[$C][$r + 1] = { @def_attr };
956             }
957             }
958 0         0 for (@{$sheet{cell}}) {
  0         0  
959 0 0       0 defined or $_ = [];
960             }
961 0 0       0 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
962 0         0 push @data, { %sheet };
963 0         0 $data[0]{sheets}++;
964 0         0 $data[0]{sheet}{$sheet->{label}} = $#data;
965             }
966 0         0 return _clipsheets \%opt, [ @data ];
967             }
968             }
969              
970 26         168 return;
971             } # ReadData
972              
973             sub add {
974 2     2 1 5 my $book = shift;
975 2 50       7 my $r = ReadData (@_) or return;
976             $book && (ref $book eq "ARRAY" ||
977 2 50 33     25 ref $book eq __PACKAGE__) && $book->[0]{sheets} or return $r;
      33        
      33        
978              
979 2         6 my $c1 = $book->[0];
980 2         4 my $c2 = $r->[0];
981              
982 2 50       7 unless ($c1->{parsers}) {
983 0         0 $c1->{parsers}[0]{$_} = $c1->{$_} for qw( type parser version );
984 0         0 $book->[$_]{parser} = 0 for 1 .. $c1->{sheets};
985             }
986 2         3 my ($pidx) = (grep { my $p = $c1->{parsers}[$_];
987             $p->{type} eq $c2->{type} &&
988             $p->{parser} eq $c2->{parser} &&
989 2 50 33     5 $p->{version} eq $c2->{version} } 0 .. $#{$c1->{parsers}});
  2         19  
  2         8  
990 2 50       6 unless (defined $pidx) {
991 0         0 $pidx = scalar @{$c1->{parsers}};
  0         0  
992 0         0 $c1->{parsers}[$pidx]{$_} = $c2->{$_} for qw( type parser version );
993             }
994              
995 2         4 foreach my $sn (sort { $c2->{sheet}{$a} <=> $c2->{sheet}{$b} } keys %{$c2->{sheet}}) {
  0         0  
  2         11  
996 2         4 my $s = $sn;
997 2         3 my $v = 2;
998 2         6 while (exists $c1->{sheet}{$s}) {
999 1         8 $s = $sn."[".$v++."]";
1000             }
1001 2         8 $c1->{sheet}{$s} = $c1->{sheets} + $c2->{sheet}{$sn};
1002 2         5 $r->[$c2->{sheet}{$sn}]{parser} = $pidx;
1003 2         7 push @$book, $r->[$c2->{sheet}{$sn}];
1004             }
1005 2         5 $c1->{sheets} += $c2->{sheets};
1006              
1007 2         17 return $book;
1008             } # add
1009              
1010             package Spreadsheet::Read::Attribute;
1011              
1012 53     53   584 use Carp;
  53         156  
  53         4595  
1013 53     53   402 use vars qw( $AUTOLOAD );
  53         138  
  53         62049  
1014              
1015             sub AUTOLOAD {
1016 6     6   607 my $self = shift;
1017 6         31 (my $attr = $AUTOLOAD) =~ s/.*:://;
1018 6         194 $self->{$attr};
1019             } # AUTOLOAD
1020              
1021             package Spreadsheet::Read::Sheet;
1022              
1023             sub cell {
1024 55     55   21025 my ($sheet, @id) = @_;
1025             @id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1026 55 50 66     501 return $sheet->{cell}[$id[0]][$id[1]];
      66        
1027             @id && $id[0] && exists $sheet->{$id[0]} and
1028 27 50 33     419 return $sheet->{$id[0]};
      33        
1029             } # cell
1030              
1031             sub attr {
1032 8     8   24 my ($sheet, @id) = @_;
1033 8         15 my $class = "Spreadsheet::Read::Attribute";
1034             @id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1035 8 50 66     66 return bless $sheet->{attr}[$id[0]][$id[1]] => $class;
      66        
1036 5 50 33     31 if (@id && $id[0] && exists $sheet->{$id[0]}) {
      33        
1037 5         13 my ($c, $r) = $sheet->cell2cr ($id[0]);
1038 5         47 return bless $sheet->{attr}[$c][$r] => $class;
1039             }
1040 0         0 undef;
1041             } # attr
1042              
1043             sub maxrow {
1044 2     2   4 my $sheet = shift;
1045 2         9 return $sheet->{maxrow};
1046             } # maxrow
1047              
1048             sub maxcol {
1049 2     2   5 my $sheet = shift;
1050 2         8 return $sheet->{maxcol};
1051             } # maxrow
1052              
1053             sub col2label {
1054 1 50   1   8 $_[0] =~ m/::/ and shift; # class unused
1055 1         5 return Spreadsheet::Read::col2label (@_);
1056             } # col2label
1057              
1058             sub cr2cell {
1059 24 50   24   66 $_[0] =~ m/::/ and shift; # class unused
1060 24         50 return Spreadsheet::Read::cr2cell (@_);
1061             } # cr2cell
1062              
1063             sub cell2cr {
1064 21 50   21   12888 $_[0] =~ m/::/ and shift; # class unused
1065 21         56 return Spreadsheet::Read::cell2cr (@_);
1066             } # cell2cr
1067              
1068             sub label {
1069 2     2   6 my ($sheet, $label) = @_;
1070 2 100       9 defined $label and $sheet->{label} = $label;
1071 2         73 return $sheet->{label};
1072             } # label
1073              
1074             sub active {
1075 1     1   3 my $sheet = shift;
1076 1         4 return $sheet->{active};
1077             } # label
1078              
1079             # my @row = $sheet->cellrow (1);
1080             sub cellrow {
1081 4     4   10 my ($sheet, $row) = @_;
1082 4 100 66     36 defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
      100        
1083 1         2 my $s = $sheet->{cell};
1084 1         4 map { $s->[$_][$row] } 1..$sheet->{maxcol};
  19         38  
1085             } # cellrow
1086              
1087             # my @row = $sheet->row (1);
1088             sub row {
1089 4     4   10 my ($sheet, $row) = @_;
1090 4 100 66     36 defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
      100        
1091 1         4 map { $sheet->{$sheet->cr2cell ($_, $row)} } 1..$sheet->{maxcol};
  19         37  
1092             } # row
1093              
1094             # my @col = $sheet->cellcolumn (1);
1095             sub cellcolumn {
1096 4     4   13 my ($sheet, $col) = @_;
1097 4 100 66     36 defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
      100        
1098 1         3 my $s = $sheet->{cell};
1099 1         4 map { $s->[$col][$_] } 1..$sheet->{maxrow};
  5         17  
1100             } # cellcolumn
1101              
1102             # my @col = $sheet->column (1);
1103             sub column {
1104 4     4   10 my ($sheet, $col) = @_;
1105 4 100 66     37 defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
      100        
1106 1         5 map { $sheet->{$sheet->cr2cell ($col, $_)} } 1..$sheet->{maxrow};
  5         14  
1107             } # column
1108              
1109             # Convert {cell}'s [column][row] to a [row][column] list
1110             # my @rows = $sheet->rows ();
1111             sub rows {
1112 1     1   3 my $sheet = shift;
1113 1         2 my $s = $sheet->{cell};
1114              
1115             map {
1116 5         10 my $r = $_;
1117 5         9 [ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
  95         164  
1118 1         4 } 1..$sheet->{maxrow};
1119             } # rows
1120              
1121             1;
1122              
1123             __END__
1124             =head1 DESCRIPTION
1125              
1126             Spreadsheet::Read tries to transparently read *any* spreadsheet and
1127             return its content in a universal manner independent of the parsing
1128             module that does the actual spreadsheet scanning.
1129              
1130             For OpenOffice and/or LibreOffice this module uses
1131             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>
1132              
1133             For Microsoft Excel this module uses
1134             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>,
1135             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX>, or
1136             L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX> (stronly
1137             discouraged).
1138              
1139             For CSV this module uses L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS>
1140             or L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP>.
1141              
1142             For SquirrelCalc there is a very simplistic built-in parser
1143              
1144             =head2 Data structure
1145              
1146             The data is returned as an array reference:
1147              
1148             $book = [
1149             # Entry 0 is the overall control hash
1150             { sheets => 2,
1151             sheet => {
1152             "Sheet 1" => 1,
1153             "Sheet 2" => 2,
1154             },
1155             parsers => [ {
1156             type => "xls",
1157             parser => "Spreadsheet::ParseExcel",
1158             version => 0.59,
1159             }],
1160             error => undef,
1161             },
1162             # Entry 1 is the first sheet
1163             { parser => 0,
1164             label => "Sheet 1",
1165             maxrow => 2,
1166             maxcol => 4,
1167             cell => [ undef,
1168             [ undef, 1 ],
1169             [ undef, undef, undef, undef, undef, "Nugget" ],
1170             ],
1171             attr => [],
1172             merged => [],
1173             active => 1,
1174             A1 => 1,
1175             B5 => "Nugget",
1176             },
1177             # Entry 2 is the second sheet
1178             { parser => 0,
1179             label => "Sheet 2",
1180             :
1181             :
1182              
1183             To keep as close contact to spreadsheet users, row and column 1 have
1184             index 1 too in the C<cell> element of the sheet hash, so cell "A1" is
1185             the same as C<cell> [1, 1] (column first). To switch between the two,
1186             there are helper functions available: C<cell2cr ()>, C<cr2cell ()>,
1187             and C<col2label ()>.
1188              
1189             The C<cell> hash entry contains unformatted data, while the hash entries
1190             with the traditional labels contain the formatted values (if applicable).
1191              
1192             The control hash (the first entry in the returned array ref), contains
1193             some spreadsheet meta-data. The entry C<sheet> is there to be able to find
1194             the sheets when accessing them by name:
1195              
1196             my %sheet2 = %{$book->[$book->[0]{sheet}{"Sheet 2"}]};
1197              
1198             =head2 Functions and methods
1199              
1200             =head3 new
1201              
1202             my $book = Spreadsheet::Read->new (...);
1203              
1204             All options accepted by ReadData are accepted by new.
1205              
1206             =head3 ReadData
1207              
1208             my $book = ReadData ($source [, option => value [, ... ]]);
1209              
1210             my $book = ReadData ("file.csv", sep => ',', quote => '"');
1211              
1212             my $book = ReadData ("file.xls", dtfmt => "yyyy-mm-dd");
1213              
1214             my $book = ReadData ("file.ods");
1215              
1216             my $book = ReadData ("file.sxc");
1217              
1218             my $book = ReadData ("content.xml");
1219              
1220             my $book = ReadData ($content);
1221              
1222             my $book = ReadData ($content, parser => "xlsx");
1223              
1224             my $book = ReadData ($fh, parser => "xlsx");
1225              
1226             my $book = ReadData (\$content, parser => "xlsx");
1227              
1228             Tries to convert the given file, string, or stream to the data structure
1229             described above.
1230              
1231             Processing Excel data from a stream or content is supported through a
1232             L<File::Temp|https://metacpan.org/release/File-Temp> temporary file or
1233             L<IO::Scalar|https://metacpan.org/release/IO-Scalar> when available.
1234              
1235             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>
1236             does preserve sheet order as of version 0.20.
1237              
1238             Choosing between C<$content> and C<\\$content> (with or without passing
1239             the desired C<parser> option) may be depending on trial and terror.
1240             C<ReadData> does try to determine parser type on content if needed, but
1241             not all combinations are checked, and not all signatures are builtin.
1242              
1243             Currently supported options are:
1244              
1245             =over 2
1246              
1247             =item parser
1248             X<parser>
1249              
1250             Force the data to be parsed by a specific format. Possible values are
1251             C<csv>, C<prl> (or C<perl>), C<sc> (or C<squirelcalc>), C<sxc> (or C<oo>,
1252             C<ods>, C<openoffice>, C<libreoffice>) C<xls> (or C<excel>), and C<xlsx>
1253             (or C<excel2007>).
1254              
1255             When parsing streams, instead of files, it is highly recommended to pass
1256             this option.
1257              
1258             Spreadsheet::Read supports several underlying parsers per spreadsheet
1259             type. It will try those from most favored to least favored. When you
1260             have a good reason to prefer a different parser, you can set that in
1261             environment variables. The other options then will not be tested for:
1262              
1263             env SPREADSHEET_READ_CSV=Text::CSV_PP ...
1264              
1265             =item cells
1266             X<cells>
1267              
1268             Control the generation of named cells ("C<A1>" etc). Default is true.
1269              
1270             =item rc
1271              
1272             Control the generation of the {cell}[c][r] entries. Default is true.
1273              
1274             =item attr
1275              
1276             Control the generation of the {attr}[c][r] entries. Default is false.
1277             See L</Cell Attributes> below.
1278              
1279             =item clip
1280              
1281             If set, L<C<ReadData>|/ReadData> will remove all trailing rows and columns
1282             per sheet that have no data, where no data means only undefined or empty
1283             cells (after optional stripping). If a sheet has no data at all, the sheet
1284             will be skipped entirely when this attribute is true.
1285              
1286             =item strip
1287              
1288             If set, L<C<ReadData>|/ReadData> will remove trailing- and/or
1289             leading-whitespace from every field.
1290              
1291             strip leading strailing
1292             ----- ------- ---------
1293             0 n/a n/a
1294             1 strip n/a
1295             2 n/a strip
1296             3 strip strip
1297              
1298             =item pivot
1299              
1300             Swap all rows and columns.
1301              
1302             When a sheet contains data like
1303              
1304             A1 B1 C1 E1
1305             A2 C2 D2
1306             A3 B3 C3 D3 E3
1307              
1308             using C<pivot> will return the sheet data as
1309              
1310             A1 A2 A3
1311             B1 B3
1312             C1 C2 C3
1313             D2 D3
1314             E1 E3
1315              
1316             =item sep
1317              
1318             Set separator for CSV. Default is comma C<,>.
1319              
1320             =item quote
1321              
1322             Set quote character for CSV. Default is C<">.
1323              
1324             =item dtfmt
1325              
1326             Set the format for MS-Excel date fields that are set to use the default
1327             date format. The default format in Excel is "C<m-d-yy>", which is both
1328             not year 2000 safe, nor very useful. The default is now "C<yyyy-mm-dd>",
1329             which is more ISO-like.
1330              
1331             Note that date formatting in MS-Excel is not reliable at all, as it will
1332             store/replace/change the date field separator in already stored formats
1333             if you change your locale settings. So the above mentioned default can
1334             be either "C<m-d-yy>" OR "C<m/d/yy>" depending on what that specific
1335             character happened to be at the time the user saved the file.
1336              
1337             =item debug
1338              
1339             Enable some diagnostic messages to STDERR.
1340              
1341             The value determines how much diagnostics are dumped (using
1342             L<Data::Peek|https://metacpan.org/release/Data-Peek>). A value of C<9>
1343             and higher will dump the entire structure from the back-end parser.
1344              
1345             =item passwd
1346              
1347             Use this password to decrypt password protected spreadsheet.
1348              
1349             Currently only supports Excel.
1350              
1351             =back
1352              
1353             All other attributes/options will be passed to the underlying parser if
1354             that parser supports attributes.
1355              
1356             =head3 col2label
1357              
1358             my $col_id = col2label (col);
1359              
1360             my $col_id = $book->col2label (col); # OO
1361              
1362             C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1363             traditional cell notation:
1364              
1365             my $id = col2label ( 4); # $id now "D"
1366             my $id = col2label (28); # $id now "AB"
1367              
1368             =head3 cr2cell
1369              
1370             my $cell = cr2cell (col, row);
1371              
1372             my $cell = $book->cr2cell (col, row); # OO
1373              
1374             C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1375             traditional cell notation:
1376              
1377             my $cell = cr2cell ( 4, 14); # $cell now "D14"
1378             my $cell = cr2cell (28, 4); # $cell now "AB4"
1379              
1380             =head3 cell2cr
1381              
1382             my ($col, $row) = cell2cr ($cell);
1383              
1384             my ($col, $row) = $book->cell2cr ($cell); # OO
1385              
1386             C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1387             pair (1 based):
1388              
1389             my ($col, $row) = cell2cr ("D14"); # returns ( 4, 14)
1390             my ($col, $row) = cell2cr ("AB4"); # returns (28, 4)
1391              
1392             =head3 row
1393              
1394             my @row = row ($sheet, $row)
1395              
1396             my @row = Spreadsheet::Read::row ($book->[1], 3);
1397              
1398             my @row = $book->row ($sheet, $row); # OO
1399              
1400             Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1401              
1402             Note that the indexes in the returned list are 0-based.
1403              
1404             C<row ()> is not imported by default, so either specify it in the
1405             use argument list, or call it fully qualified.
1406              
1407             =head3 cellrow
1408              
1409             my @row = cellrow ($sheet, $row);
1410              
1411             my @row = Spreadsheet::Read::cellrow ($book->[1], 3);
1412              
1413             my @row = $book->cellrow ($sheet, $row); # OO
1414              
1415             Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1416              
1417             Note that the indexes in the returned list are 0-based.
1418              
1419             C<cellrow ()> is not imported by default, so either specify it in the
1420             use argument list, or call it fully qualified or as method call.
1421              
1422             =head3 rows
1423              
1424             my @rows = rows ($sheet);
1425              
1426             my @rows = Spreadsheet::Read::rows ($book->[1]);
1427              
1428             my @rows = $book->rows (1); # OO
1429              
1430             Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1431              
1432             Note that the indexes in the returned list are 0-based, where the
1433             index in the C<{cell}> entry is 1-based.
1434              
1435             C<rows ()> is not imported by default, so either specify it in the
1436             use argument list, or call it fully qualified.
1437              
1438             =head3 parses
1439              
1440             parses ($format);
1441              
1442             Spreadsheet::Read::parses ("CSV");
1443              
1444             $book->parses ("CSV"); # OO
1445              
1446             C<parses ()> returns Spreadsheet::Read's capability to parse the
1447             required format. L<C<ReadData>|/ReadData> will pick its preferred parser
1448             for that format unless overruled. See L<C<parser>|/parser>.
1449              
1450             C<parses ()> is not imported by default, so either specify it in the
1451             use argument list, or call it fully qualified.
1452              
1453             =head3 Version
1454              
1455             my $v = Version ()
1456              
1457             my $v = Spreadsheet::Read::Version ()
1458              
1459             my $v = Spreadsheet::Read->VERSION;
1460              
1461             my $v = $book->Version (); # OO
1462              
1463             Returns the current version of Spreadsheet::Read.
1464              
1465             C<Version ()> is not imported by default, so either specify it in the
1466             use argument list, or call it fully qualified.
1467              
1468             This function returns exactly the same as C<< Spreadsheet::Read->VERSION >>
1469             returns and is only kept for backward compatibility reasons.
1470              
1471             =head3 sheets
1472              
1473             my $sheets = $book->sheets; # OO
1474             my @sheets = $book->sheets; # OO
1475              
1476             In scalar context return the number of sheets in the book.
1477             In list context return the labels of the sheets in the book.
1478              
1479             =head3 sheet
1480              
1481             my $sheet = $book->sheet (1); # OO
1482             my $sheet = $book->sheet ("Foo"); # OO
1483              
1484             Return the numbered or named sheet out of the book. Will return C<undef> if
1485             there is no match. Will not work for sheets I<named> with a number between 1
1486             and the number of sheets in the book.
1487              
1488             With named sheets will first try to use the list of sheet-labels as stored in
1489             the control structure. If no match is found, it will scan the actual labels
1490             of the sheets. In that case, it will return the first matching sheet.
1491              
1492             If defined, the returned sheet will be of class C<Spreadsheet::Read::Sheet>.
1493              
1494             =head3 add
1495              
1496             my $book = ReadData ("file.csv");
1497             Spreadsheet::Read::add ($book, "file.xlsx");
1498              
1499             my $book = Spreadsheet::Read->new ("file.csv");
1500             $book->add ("file.xlsx"); # OO
1501              
1502             =head2 Methods on sheets
1503              
1504             =head3 maxcol
1505              
1506             my $col = $sheet->maxcol;
1507              
1508             Return the index of the last in-use column in the sheet. This index is 1-based.
1509              
1510             =head3 maxrow
1511              
1512             my $row = $sheet->maxrow;
1513              
1514             Return the index of the last in-use row in the sheet. This index is 1-based.
1515              
1516             =head3 cell
1517              
1518             my $cell = $sheet->cell ("A3");
1519             my $cell = $sheet->cell (1, 3);
1520              
1521             Return the value for a cell. Using tags will return the formatted value,
1522             using column and row will return unformatted value.
1523              
1524             =head3 attr
1525              
1526             my $cell = $sheet->attr ("A3");
1527             my $cell = $sheet->attr (1, 3);
1528              
1529             Return the attributes of a cell. Only valid if attributes are enabled through
1530             option C<attr>.
1531              
1532             =head3 col2label
1533              
1534             my $col_id = $sheet->col2label (col);
1535              
1536             C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1537             traditional cell notation:
1538              
1539             my $id = $sheet->col2label ( 4); # $id now "D"
1540             my $id = $sheet->col2label (28); # $id now "AB"
1541              
1542             =head3 cr2cell
1543              
1544             my $cell = $sheet->cr2cell (col, row);
1545              
1546             C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1547             traditional cell notation:
1548              
1549             my $cell = $sheet->cr2cell ( 4, 14); # $cell now "D14"
1550             my $cell = $sheet->cr2cell (28, 4); # $cell now "AB4"
1551              
1552             =head3 cell2cr
1553              
1554             my ($col, $row) = $sheet->cell2cr ($cell);
1555              
1556             C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1557             pair (1 based):
1558              
1559             my ($col, $row) = $sheet->cell2cr ("D14"); # returns ( 4, 14)
1560             my ($col, $row) = $sheet->cell2cr ("AB4"); # returns (28, 4)
1561              
1562             =head3 col
1563              
1564             my @col = $sheet->column ($col);
1565              
1566             Get full column of formatted values (like C<< $sheet->{C1} .. $sheet->{C9} >>)
1567              
1568             Note that the indexes in the returned list are 0-based.
1569              
1570             =head3 cellcolumn
1571              
1572             my @col = $sheet->cellcolumn ($col);
1573              
1574             Get full column of unformatted values (like C<< $sheet->{cell}[3][1] .. $sheet->{cell}[3][9] >>)
1575              
1576             Note that the indexes in the returned list are 0-based.
1577              
1578             =head3 row
1579              
1580             my @row = $sheet->row ($row);
1581              
1582             Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1583              
1584             Note that the indexes in the returned list are 0-based.
1585              
1586             =head3 cellrow
1587              
1588             my @row = $sheet->cellrow ($row);
1589              
1590             Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1591              
1592             Note that the indexes in the returned list are 0-based.
1593              
1594             =head3 rows
1595              
1596             my @rows = $sheet->rows ();
1597              
1598             Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1599              
1600             Note that the indexes in the returned list are 0-based, where the
1601             index in the C<{cell}> entry is 1-based.
1602              
1603             =head3 label
1604              
1605             my $label = $sheet->label;
1606             $sheet->label ("New sheet label");
1607              
1608             Set a new label to a sheet. Note that the index in the control structure will
1609             I<NOT> be updated.
1610              
1611             =head3 active
1612              
1613             my $sheet_is_active = $sheet->active;
1614              
1615             Returns 1 if the selected sheet is active, otherwise returns 0.
1616              
1617             Currently only works on XLS (as of Spreadsheed::ParseExcel-0.61).
1618             CSV is always active.
1619              
1620             =head2 Using CSV
1621              
1622             In case of CSV parsing, L<C<ReadData>|/ReadData> will use the first line of
1623             the file to auto-detect the separation character if the first argument is a
1624             file and both C<sep> and C<quote> are not passed as attributes.
1625             L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> (or
1626             L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP>) is able to
1627             automatically detect and use C<\r> line endings.
1628              
1629             CSV can parse streams too, but be sure to pass C<sep> and/or C<quote> if
1630             these do not match the default C<,> and C<">.
1631              
1632             When an error is found in the CSV, it is automatically reported (to STDERR).
1633             The structure will store the error in C<< $ss->[0]{error} >> as anonymous
1634             list returned by
1635             L<C<< $csv->error_diag >>|https://metacpan.org/pod/Text::CSV_XS#error_diag>.
1636             See L<Text::CSV_XS|https://metacpan.org/pod/Text-CSV_XS> for documentation.
1637              
1638             my $ss = ReadData ("bad.csv");
1639             $ss->[0]{error} and say $ss->[0]{error}[1];
1640              
1641             As CSV has no sheet labels, the default label for a CSV sheet is its filename.
1642             For CSV, this can be overruled using the I<label> attribute:
1643              
1644             my $ss = Spreadsheet::Read->new ("/some/place/test.csv", label => "Test");
1645              
1646             =head2 Cell Attributes
1647             X<attr>
1648              
1649             If the constructor was called with C<attr> having a true value,
1650              
1651             my $book = ReadData ("book.xls", attr => 1);
1652             my $book = Spreadsheet::Read->new ("book.xlsx", attr => 1);
1653              
1654             effort is made to analyze and store field attributes like this:
1655              
1656             { label => "Sheet 1",
1657             maxrow => 5,
1658             maxcol => 2,
1659             cell => [ undef,
1660             [ undef, 1 ],
1661             [ undef, undef, undef, undef, undef, "Nugget" ],
1662             ],
1663             attr => [ undef,
1664             [ undef, {
1665             type => "numeric",
1666             fgcolor => "#ff0000",
1667             bgcolor => undef,
1668             font => "Arial",
1669             size => undef,
1670             format => "## ##0.00",
1671             halign => "right",
1672             valign => "top",
1673             uline => 0,
1674             bold => 0,
1675             italic => 0,
1676             wrap => 0,
1677             merged => 0,
1678             hidden => 0,
1679             locked => 0,
1680             enc => "utf-8",
1681             }, ]
1682             [ undef, undef, undef, undef, undef, {
1683             type => "text",
1684             fgcolor => "#e2e2e2",
1685             bgcolor => undef,
1686             font => "Letter Gothic",
1687             size => 15,
1688             format => undef,
1689             halign => "left",
1690             valign => "top",
1691             uline => 0,
1692             bold => 0,
1693             italic => 0,
1694             wrap => 0,
1695             merged => 0,
1696             hidden => 0,
1697             locked => 0,
1698             enc => "iso8859-1",
1699             }, ]
1700             merged => [],
1701             A1 => 1,
1702             B5 => "Nugget",
1703             },
1704              
1705             The entries C<maxrow> and C<maxcol> are 1-based.
1706              
1707             This has now been partially implemented, mainly for Excel, as the other
1708             parsers do not (yet) support all of that. YMMV.
1709              
1710             If a cell itself is not hidden, but the parser holds the information that
1711             either the row or the column (or both) the field is in is hidden, the flag
1712             is inherited into the cell attributes.
1713              
1714             You can get the attributes of a cell (as a hash-ref) like this:
1715              
1716             my $attr = $book[1]{attr}[1][3]; # Direct structure
1717             my $attr = $book->sheet (1)->attr (1, 3); # Same using OO
1718             my $attr = $book->sheet (1)->attr ("A3"); # Same using OO
1719              
1720             To get to the C<font> attribute, use any of these:
1721              
1722             my $font = $book[1]{attr}[1][3]{font};
1723             my $font = $book->sheet (1)->attr (1, 3)->{font};
1724             my $font = $book->sheet (1)->attr ("A3")->font;
1725              
1726             =head3 Merged cells
1727             X<merged>
1728              
1729             Note that only
1730             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>
1731             documents the use of merged cells, and not in a way useful for the spreadsheet
1732             consumer.
1733              
1734             CSV does not support merged cells (though future implementations of CSV
1735             for the web might).
1736              
1737             The documentation of merged areas in
1738             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> and
1739             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> can
1740             be found in
1741             L<Spreadsheet::ParseExcel::Worksheet|https://metacpan.org/release/Spreadsheet-ParseExcel-Worksheet>
1742             and L<Spreadsheet::ParseExcel::Cell|https://metacpan.org/release/Spreadsheet-ParseExcel-Cell>.
1743              
1744             None of basic L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX>,
1745             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>, and
1746             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> manual
1747             pages mention merged cells at all.
1748              
1749             This module just tries to return the information in a generic way.
1750              
1751             Given this spreadsheet as an example
1752              
1753             merged.xlsx:
1754              
1755             A B C
1756             +-----+-----------+
1757             1| | foo |
1758             +-----+ +
1759             2| bar | |
1760             | +-----+-----+
1761             3| | urg | orc |
1762             +-----+-----+-----+
1763              
1764             the information extracted from that undocumented information is
1765             returned in the C<merged> entry of the sheet's hash as a list of
1766             top-left, bottom-right coordinate pars (col, row, col, row). For
1767             given example, that would be:
1768              
1769             $ss->{merged} = [
1770             [ 1, 2, 1, 3 ], # A2-A3
1771             [ 2, 1, 3, 2 ], # B1-C2
1772             ];
1773              
1774             When the attributes are also enabled, there is some merge information
1775             copied directly from the cell information, but again, that stems from
1776             code analysis and not from documentation:
1777              
1778             my $ss = ReadData ("merged.xlsx", attr => 1)->[1];
1779             foreach my $row (1 .. $ss->{maxrow}) {
1780             foreach my $col (1 .. $ss->{maxcol}) {
1781             my $cell = cr2cell ($col, $row);
1782             printf "%s %-3s %d ", $cell, $ss->{$cell},
1783             $ss->{attr}[$col][$row]{merged};
1784             }
1785             print "\n";
1786             }
1787              
1788             A1 0 B1 foo 1 C1 1
1789             A2 bar 1 B2 1 C2 1
1790             A3 1 B3 urg 0 C3 orc 0
1791              
1792             In this example, there is no way to see if C<B2> is merged to C<A2> or
1793             to C<B1> without analyzing all surrounding cells. This could as well
1794             mean C<A2:A3>, C<B1:C1>, C<B2:C2>, as C<A2:A3>, C<B1:B2>, C<C1:C2>, as
1795             C<A2:A3>, C<B1:C2>.
1796             Use the L<C<merged>|/merged> entry described above to find out what
1797             fields are merged to what other fields.
1798              
1799             =head1 TOOLS
1800              
1801             This modules comes with a few tools that perform tasks from the FAQ, like
1802             "How do I select only column D through F from sheet 2 into a CSV file?"
1803              
1804             If the module was installed without the tools, you can find them here:
1805             https://github.com/Tux/Spreadsheet-Read/tree/master/examples
1806              
1807             =head2 C<xlscat>
1808              
1809             Show (parts of) a spreadsheet in plain text, CSV, or HTML
1810              
1811             usage: xlscat [-s <sep>] [-L] [-n] [-A] [-u] [Selection] file.xls
1812             [-c | -m] [-u] [Selection] file.xls
1813             -i [-S sheets] file.xls
1814             Generic options:
1815             -v[#] Set verbose level (xlscat/xlsgrep)
1816             -d[#] Set debug level (Spreadsheet::Read)
1817             -u Use unformatted values
1818             --noclip Do not strip empty sheets and
1819             trailing empty rows and columns
1820             -e <enc> Set encoding for input and output
1821             -b <enc> Set encoding for input
1822             -a <enc> Set encoding for output
1823             Input CSV:
1824             --in-sep=c Set input sep_char for CSV
1825             Input XLS:
1826             --dtfmt=fmt Specify the default date format to replace 'm-d-yy'
1827             the default replacement is 'yyyy-mm-dd'
1828             Output Text (default):
1829             -s <sep> Use separator <sep>. Default '|', \n allowed
1830             -L Line up the columns
1831             -n [skip] Number lines (prefix with column number)
1832             optionally skip <skip> (header) lines
1833             -A Show field attributes in ANSI escapes
1834             -h[#] Show # header lines
1835             Output Index only:
1836             -i Show sheet names and size only
1837             Output CSV:
1838             -c Output CSV, separator = ','
1839             -m Output CSV, separator = ';'
1840             Output HTML:
1841             -H Output HTML
1842             Selection:
1843             -S <sheets> Only print sheets <sheets>. 'all' is a valid set
1844             Default only prints the first sheet
1845             -R <rows> Only print rows <rows>. Default is 'all'
1846             -C <cols> Only print columns <cols>. Default is 'all'
1847             -F <flds> Only fields <flds> e.g. -FA3,B16
1848             Ordering (column numbers in result set *after* selection):
1849             --sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
1850             +# - first # lines do not sort (header)
1851             # - order on column # lexical ascending
1852             #n - order on column # numeric ascending
1853             #r - order on column # lexical descending
1854             #rn - order on column # numeric descending
1855              
1856             =head2 C<xlsgrep>
1857              
1858             Show (parts of) a spreadsheet that match a pattern in plain text, CSV, or HTML
1859              
1860             usage: xlsgrep [-s <sep>] [-L] [-n] [-A] [-u] [Selection] pattern file.xls
1861             [-c | -m] [-u] [Selection] pattern file.xls
1862             -i [-S sheets] pattern file.xls
1863             Generic options:
1864             -v[#] Set verbose level (xlscat/xlsgrep)
1865             -d[#] Set debug level (Spreadsheet::Read)
1866             -u Use unformatted values
1867             --noclip Do not strip empty sheets and
1868             trailing empty rows and columns
1869             -e <enc> Set encoding for input and output
1870             -b <enc> Set encoding for input
1871             -a <enc> Set encoding for output
1872             Input CSV:
1873             --in-sep=c Set input sep_char for CSV
1874             Input XLS:
1875             --dtfmt=fmt Specify the default date format to replace 'm-d-yy'
1876             the default replacement is 'yyyy-mm-dd'
1877             Output Text (default):
1878             -s <sep> Use separator <sep>. Default '|', \n allowed
1879             -L Line up the columns
1880             -n [skip] Number lines (prefix with column number)
1881             optionally skip <skip> (header) lines
1882             -A Show field attributes in ANSI escapes
1883             -h[#] Show # header lines
1884             Grep options:
1885             -i Ignore case
1886             -w Match whole words only
1887             Output CSV:
1888             -c Output CSV, separator = ','
1889             -m Output CSV, separator = ';'
1890             Output HTML:
1891             -H Output HTML
1892             Selection:
1893             -S <sheets> Only print sheets <sheets>. 'all' is a valid set
1894             Default only prints the first sheet
1895             -R <rows> Only print rows <rows>. Default is 'all'
1896             -C <cols> Only print columns <cols>. Default is 'all'
1897             -F <flds> Only fields <flds> e.g. -FA3,B16
1898             Ordering (column numbers in result set *after* selection):
1899             --sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
1900             +# - first # lines do not sort (header)
1901             # - order on column # lexical ascending
1902             #n - order on column # numeric ascending
1903             #r - order on column # lexical descending
1904             #rn - order on column # numeric descending
1905              
1906             =head2 C<xls2csv>
1907              
1908             Convert a spreadsheet to CSV. This is just a small wrapper over C<xlscat>.
1909              
1910             usage: xls2csv [ -o file.csv ] file.xls
1911              
1912             =head2 C<ss2tk>
1913              
1914             Show a spreadsheet in a perl/Tk spreadsheet widget
1915              
1916             usage: ss2tk [-w <width>] [X11 options] file.xls [<pattern>]
1917             -w <width> use <width> as default column width (4)
1918              
1919             =head2 C<ssdiff>
1920              
1921             Show the differences between two spreadsheets.
1922              
1923             usage: examples/ssdiff [--verbose[=1]] file.xls file.xlsx
1924              
1925             =head1 TODO
1926              
1927             =over 4
1928              
1929             =item Options
1930              
1931             =over 2
1932              
1933             =item Module Options
1934              
1935             New Spreadsheet::Read options are bound to happen. I'm thinking of an
1936             option that disables the reading of the data entirely to speed up an
1937             index request (how many sheets/fields/columns). See C<xlscat -i>.
1938              
1939             =item Parser options
1940              
1941             Try to transparently support as many options as the encapsulated modules
1942             support regarding (un)formatted values, (date) formats, hidden columns
1943             rows or fields etc. These could be implemented like C<attr> above but
1944             names C<meta>, or just be new values in the C<attr> hashes.
1945              
1946             =back
1947              
1948             =item Other parsers
1949              
1950             Add support for new(er) parsers for already supported formats, like
1951              
1952             =over 2
1953              
1954             =item Data::XLSX::Parser
1955              
1956             Data::XLSX::Parser provides faster way to parse Microsoft Excel's .xlsx
1957             files. The implementation of this module is highly inspired from Python's
1958             FastXLSX library.
1959              
1960             This is SAX based parser, so you can parse very large XLSX file with
1961             lower memory usage.
1962              
1963             =back
1964              
1965             =item Other spreadsheet formats
1966              
1967             I consider adding any spreadsheet interface that offers a usable API.
1968              
1969             Under investigation:
1970              
1971             =over 2
1972              
1973             =item Gnumeric (.gnumeric)
1974              
1975             I have seen no existing CPAN module yet.
1976              
1977             It is gzip'ed XML
1978              
1979             =item Kspread (.ksp)
1980              
1981             Now knows as Calligra Sheets.
1982              
1983             I have seen no existing CPAN module yet.
1984              
1985             It is XML in ZIP
1986              
1987             =back
1988              
1989             =item Alternative parsers for existing formats
1990              
1991             As long as the alternative has a good reason for its existence, and the
1992             API of that parser reasonable fits in my approach, I will consider to
1993             implement the glue layer, or apply patches to do so as long as these
1994             match what F<CONTRIBUTING.md> describes.
1995              
1996             =back
1997              
1998             =head1 SEE ALSO
1999              
2000             =over 2
2001              
2002             =item Text::CSV_XS, Text::CSV_PP
2003              
2004             See L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> ,
2005             L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP> , and
2006             L<Text::CSV|https://metacpan.org/release/Text-CSV> documentation.
2007              
2008             L<Text::CSV|https://metacpan.org/release/Text-CSV> is a wrapper over Text::CSV_XS (the fast XS version) and/or
2009             L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP> (the pure perl version).
2010              
2011             =item Spreadsheet::ParseExcel
2012              
2013             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> is
2014             the best parser for old-style Microsoft Excel (.xls) files.
2015              
2016             =item Spreadsheet::ParseXLSX
2017              
2018             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> is
2019             like L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>,
2020             but for new Microsoft Excel 2007+ files (.xlsx). They have the same API.
2021              
2022             This module uses L<XML::Twig|https://metacpan.org/release/XML-Twig> to parse the
2023             internal XML.
2024              
2025             =item Spreadsheet::XLSX
2026              
2027             See L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX>
2028             documentation.
2029              
2030             This module is dead and deprecated. It is B<buggy and unmaintained>. I<Please>
2031             use L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX>
2032             instead.
2033              
2034             =item Spreadsheet::ReadSXC
2035              
2036             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> is a
2037             parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files.
2038              
2039             =item Spreadsheet::BasicRead
2040              
2041             See L<Spreadsheet::BasicRead|https://metacpan.org/release/Spreadsheet-BasicRead>
2042             for xlscat-like functionality (Excel only)
2043              
2044             =item Spreadsheet::ConvertAA
2045              
2046             See L<Spreadsheet::ConvertAA|https://metacpan.org/release/Spreadsheet-ConvertAA>
2047             for an alternative set of L</cell2cr>/L</cr2cell> pair.
2048              
2049             =item Spreadsheet::Perl
2050              
2051             L<Spreadsheet::Perl|https://metacpan.org/release/Spreadsheet-Perl> offers a Pure
2052             Perl implementation of a spreadsheet engine. Users that want this format to be
2053             supported in Spreadsheet::Read are hereby motivated to offer patches. It is
2054             not high on my TODO-list.
2055              
2056             =item Spreadsheet::CSV
2057              
2058             L<Spreadsheet::CSV|https://metacpan.org/release/Spreadsheet-CSV> offers the
2059             interesting approach of seeing all supported spreadsheet formats as if it were
2060             CSV, mimicking the L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS>
2061             interface.
2062              
2063             =item xls2csv
2064              
2065             L<xls2csv|https://metacpan.org/release/xls2csv> offers an alternative for my
2066             C<xlscat -c>, in the xls2csv tool, but this tool focuses on character encoding
2067             transparency, and requires some other modules.
2068              
2069             =back
2070              
2071             =head1 AUTHOR
2072              
2073             H.Merijn Brand, <h.m.brand@xs4all.nl>
2074              
2075             =head1 COPYRIGHT AND LICENSE
2076              
2077             Copyright (C) 2005-2018 H.Merijn Brand
2078              
2079             This library is free software; you can redistribute it and/or modify
2080             it under the same terms as Perl itself.
2081              
2082             =cut