line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/pro/bin/perl |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Spreadsheet::Read; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Spreadsheet::Read - Read the data from a spreadsheet |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Spreadsheet::Read; |
12
|
|
|
|
|
|
|
my $book = ReadData ("test.csv", sep => ";"); |
13
|
|
|
|
|
|
|
my $book = ReadData ("test.sxc"); |
14
|
|
|
|
|
|
|
my $book = ReadData ("test.ods"); |
15
|
|
|
|
|
|
|
my $book = ReadData ("test.xls"); |
16
|
|
|
|
|
|
|
my $book = ReadData ("test.xlsx"); |
17
|
|
|
|
|
|
|
my $book = ReadData ($fh, parser => "xls"); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
Spreadsheet::Read::add ($book, "sheet.csv"); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $sheet = $book->[1]; # first datasheet |
22
|
|
|
|
|
|
|
my $cell = $book->[1]{A3}; # content of field A3 of sheet 1 |
23
|
|
|
|
|
|
|
my $cell = $book->[1]{cell}[1][3]; # same, unformatted |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# OO API |
26
|
|
|
|
|
|
|
my $book = Spreadsheet::Read->new ("file.csv"); |
27
|
|
|
|
|
|
|
my $sheet = $book->sheet (1); |
28
|
|
|
|
|
|
|
my $cell = $sheet->cell ("A3"); |
29
|
|
|
|
|
|
|
my $cell = $sheet->cell (1, 3); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
$book->add ("test.xls"); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=cut |
34
|
|
|
|
|
|
|
|
35
|
53
|
|
|
53
|
|
5103204
|
use 5.8.1; |
|
53
|
|
|
|
|
687
|
|
36
|
53
|
|
|
53
|
|
303
|
use strict; |
|
53
|
|
|
|
|
132
|
|
|
53
|
|
|
|
|
1239
|
|
37
|
53
|
|
|
53
|
|
301
|
use warnings; |
|
53
|
|
|
|
|
132
|
|
|
53
|
|
|
|
|
3686
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our $VERSION = "0.80"; |
40
|
1
|
|
|
1
|
1
|
190
|
sub Version { $VERSION } |
41
|
|
|
|
|
|
|
|
42
|
53
|
|
|
53
|
|
386
|
use Carp; |
|
53
|
|
|
|
|
146
|
|
|
53
|
|
|
|
|
3232
|
|
43
|
53
|
|
|
53
|
|
360
|
use Exporter; |
|
53
|
|
|
|
|
118
|
|
|
53
|
|
|
|
|
4581
|
|
44
|
|
|
|
|
|
|
our @ISA = qw( Exporter ); |
45
|
|
|
|
|
|
|
our @EXPORT = qw( ReadData cell2cr cr2cell ); |
46
|
|
|
|
|
|
|
our @EXPORT_OK = qw( parses rows cellrow row add ); |
47
|
|
|
|
|
|
|
|
48
|
53
|
|
|
53
|
|
28812
|
use Encode qw( decode ); |
|
53
|
|
|
|
|
855019
|
|
|
53
|
|
|
|
|
4019
|
|
49
|
53
|
|
|
53
|
|
45456
|
use File::Temp qw( ); |
|
53
|
|
|
|
|
1101698
|
|
|
53
|
|
|
|
|
1586
|
|
50
|
53
|
|
|
53
|
|
32181
|
use Data::Dumper; |
|
53
|
|
|
|
|
326483
|
|
|
53
|
|
|
|
|
54353
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my @parsers = ( |
53
|
|
|
|
|
|
|
[ csv => "Text::CSV_XS", "0.71" ], |
54
|
|
|
|
|
|
|
[ csv => "Text::CSV_PP", "1.17" ], |
55
|
|
|
|
|
|
|
[ csv => "Text::CSV", "1.17" ], |
56
|
|
|
|
|
|
|
[ ods => "Spreadsheet::ReadSXC", "0.20" ], |
57
|
|
|
|
|
|
|
[ sxc => "Spreadsheet::ReadSXC", "0.20" ], |
58
|
|
|
|
|
|
|
[ xls => "Spreadsheet::ParseExcel", "0.34" ], |
59
|
|
|
|
|
|
|
[ xlsx => "Spreadsheet::ParseXLSX", "0.24" ], |
60
|
|
|
|
|
|
|
[ xlsx => "Spreadsheet::XLSX", "0.13" ], |
61
|
|
|
|
|
|
|
[ prl => "Spreadsheet::Perl", "" ], |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# Helper modules |
64
|
|
|
|
|
|
|
[ ios => "IO::Scalar", "" ], |
65
|
|
|
|
|
|
|
[ dmp => "Data::Peek", "" ], |
66
|
|
|
|
|
|
|
); |
67
|
|
|
|
|
|
|
my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers }); |
68
|
|
|
|
|
|
|
foreach my $p (@parsers) { |
69
|
|
|
|
|
|
|
my $format = $p->[0]; |
70
|
|
|
|
|
|
|
$can{$format} and next; |
71
|
|
|
|
|
|
|
$can{$format} = ""; |
72
|
|
|
|
|
|
|
my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next; |
73
|
|
|
|
|
|
|
my $min_version = $can{supports}{$preset}; |
74
|
|
|
|
|
|
|
unless ($min_version) { |
75
|
|
|
|
|
|
|
# Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle" |
76
|
|
|
|
|
|
|
$can{$format} = "!$preset is not supported for the $format format"; |
77
|
|
|
|
|
|
|
next; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
if (eval "local \$_; require $preset" and not $@) { |
80
|
|
|
|
|
|
|
# forcing a parser should still check the version |
81
|
|
|
|
|
|
|
my $ok; |
82
|
|
|
|
|
|
|
my $has = $preset->VERSION; |
83
|
|
|
|
|
|
|
$has =~ s/_[0-9]+$//; # Remove beta-part |
84
|
|
|
|
|
|
|
if ($min_version =~ m/^v([0-9.]+)/) { # clumsy versions |
85
|
|
|
|
|
|
|
my @min = split m/\./ => $1; |
86
|
|
|
|
|
|
|
$has =~ s/^v//; |
87
|
|
|
|
|
|
|
my @has = split m/\./ => $has; |
88
|
|
|
|
|
|
|
$ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >= |
89
|
|
|
|
|
|
|
(($min[0] * 1000 + $min[1]) * 1000 + $min[2]); |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
else { # normal versions |
92
|
|
|
|
|
|
|
$ok = $has >= $min_version; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
$ok or $preset = "!$preset"; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
else { |
97
|
|
|
|
|
|
|
$preset = "!$preset"; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
$can{$format} = $preset; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
delete $can{supports}; |
102
|
|
|
|
|
|
|
for (@parsers) { |
103
|
|
|
|
|
|
|
my ($flag, $mod, $vsn) = @$_; |
104
|
|
|
|
|
|
|
$can{$flag} and next; |
105
|
|
|
|
|
|
|
eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" or |
106
|
|
|
|
|
|
|
$_->[0] = "! Cannot use $mod version $vsn: $@"; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
$can{sc} = __PACKAGE__; # SquirelCalc is built-in |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
defined $Spreadsheet::ParseExcel::VERSION && $Spreadsheet::ParseExcel::VERSION < 0.61 and |
111
|
|
|
|
|
|
|
*Spreadsheet::ParseExcel::Workbook::get_active_sheet = sub { undef; }; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
my $debug = 0; |
114
|
|
|
|
|
|
|
my %def_opts = ( |
115
|
|
|
|
|
|
|
rc => 1, |
116
|
|
|
|
|
|
|
cells => 1, |
117
|
|
|
|
|
|
|
attr => 0, |
118
|
|
|
|
|
|
|
clip => undef, # $opt{cells}; |
119
|
|
|
|
|
|
|
strip => 0, |
120
|
|
|
|
|
|
|
pivot => 0, |
121
|
|
|
|
|
|
|
dtfmt => "yyyy-mm-dd", # Format 14 |
122
|
|
|
|
|
|
|
debug => 0, |
123
|
|
|
|
|
|
|
passwd => undef, |
124
|
|
|
|
|
|
|
parser => undef, |
125
|
|
|
|
|
|
|
sep => undef, |
126
|
|
|
|
|
|
|
quote => undef, |
127
|
|
|
|
|
|
|
label => undef, |
128
|
|
|
|
|
|
|
); |
129
|
|
|
|
|
|
|
my @def_attr = ( |
130
|
|
|
|
|
|
|
type => "text", |
131
|
|
|
|
|
|
|
fgcolor => undef, |
132
|
|
|
|
|
|
|
bgcolor => undef, |
133
|
|
|
|
|
|
|
font => undef, |
134
|
|
|
|
|
|
|
size => undef, |
135
|
|
|
|
|
|
|
format => undef, |
136
|
|
|
|
|
|
|
halign => "left", |
137
|
|
|
|
|
|
|
valign => "top", |
138
|
|
|
|
|
|
|
bold => 0, |
139
|
|
|
|
|
|
|
italic => 0, |
140
|
|
|
|
|
|
|
uline => 0, |
141
|
|
|
|
|
|
|
wrap => 0, |
142
|
|
|
|
|
|
|
merged => 0, |
143
|
|
|
|
|
|
|
hidden => 0, |
144
|
|
|
|
|
|
|
locked => 0, |
145
|
|
|
|
|
|
|
enc => "utf-8", # $ENV{LC_ALL} // $ENV{LANG} // ... |
146
|
|
|
|
|
|
|
formula => undef, |
147
|
|
|
|
|
|
|
); |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# Helper functions |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub _dump { |
152
|
7
|
|
|
7
|
|
17
|
my ($label, $ref) = @_; |
153
|
7
|
50
|
|
|
|
22
|
if ($can{dmp}) { |
154
|
7
|
|
|
|
|
42
|
print STDERR Data::Peek::DDumper ({ $label => $ref }); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
else { |
157
|
0
|
|
|
|
|
0
|
print STDERR Data::Dumper->Dump ([$ref], [$label]); |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
} # _dump |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub _parser { |
162
|
342
|
100
|
|
342
|
|
1370
|
my $type = shift or return ""; |
163
|
100
|
|
|
|
|
287
|
$type = lc $type; |
164
|
|
|
|
|
|
|
# Aliases and fullnames |
165
|
100
|
100
|
|
|
|
327
|
$type eq "excel" and return "xls"; |
166
|
99
|
50
|
|
|
|
307
|
$type eq "excel2007" and return "xlsx"; |
167
|
99
|
100
|
|
|
|
285
|
$type eq "oo" and return "sxc"; |
168
|
98
|
100
|
|
|
|
297
|
$type eq "ods" and return "sxc"; |
169
|
96
|
100
|
|
|
|
277
|
$type eq "openoffice" and return "sxc"; |
170
|
95
|
50
|
|
|
|
278
|
$type eq "libreoffice" and return "sxc"; |
171
|
95
|
100
|
|
|
|
278
|
$type eq "perl" and return "prl"; |
172
|
94
|
50
|
|
|
|
279
|
$type eq "squirelcalc" and return "sc"; |
173
|
94
|
100
|
|
|
|
572
|
return exists $can{$type} ? $type : ""; |
174
|
|
|
|
|
|
|
} # _parser |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub new { |
177
|
12
|
|
|
12
|
1
|
7235
|
my $class = shift; |
178
|
12
|
|
100
|
|
|
42
|
my $r = ReadData (@_) || [{ |
179
|
|
|
|
|
|
|
parsers => [], |
180
|
|
|
|
|
|
|
error => undef, |
181
|
|
|
|
|
|
|
sheets => 0, |
182
|
|
|
|
|
|
|
sheet => { }, |
183
|
|
|
|
|
|
|
}]; |
184
|
12
|
|
|
|
|
167
|
bless $r => $class; |
185
|
|
|
|
|
|
|
} # new |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# Spreadsheet::Read::parses ("csv") or die "Cannot parse CSV" |
188
|
|
|
|
|
|
|
sub parses { |
189
|
63
|
100
|
|
63
|
1
|
142213
|
ref $_[0] eq __PACKAGE__ and shift; |
190
|
63
|
100
|
|
|
|
252
|
my $type = _parser (shift) or return 0; |
191
|
61
|
100
|
|
|
|
422
|
if ($can{$type} =~ m/^!\s*(.*)/) { |
192
|
8
|
|
|
|
|
31
|
$@ = $1; |
193
|
8
|
|
|
|
|
29
|
return 0; |
194
|
|
|
|
|
|
|
} |
195
|
53
|
|
|
|
|
181
|
return $can{$type}; |
196
|
|
|
|
|
|
|
} # parses |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub sheets { |
199
|
4
|
|
|
4
|
1
|
11
|
my $ctrl = shift->[0]; |
200
|
4
|
|
|
|
|
8
|
my %s = %{$ctrl->{sheet}}; |
|
4
|
|
|
|
|
17
|
|
201
|
4
|
100
|
|
|
|
32
|
wantarray ? sort { $s{$a} <=> $s{$b} } keys %s : $ctrl->{sheets}; |
|
4
|
|
|
|
|
25
|
|
202
|
|
|
|
|
|
|
} # sheets |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# col2label (4) => "D" |
205
|
|
|
|
|
|
|
sub col2label { |
206
|
15697
|
100
|
|
15697
|
1
|
33559
|
ref $_[0] eq __PACKAGE__ and shift; |
207
|
15697
|
|
|
|
|
20665
|
my $c = shift; |
208
|
15697
|
100
|
100
|
|
|
40185
|
defined $c && $c > 0 or return ""; |
209
|
15693
|
|
|
|
|
21147
|
my $cell = ""; |
210
|
15693
|
|
|
|
|
25964
|
while ($c) { |
211
|
53
|
|
|
53
|
|
28205
|
use integer; |
|
53
|
|
|
|
|
806
|
|
|
53
|
|
|
|
|
290
|
|
212
|
|
|
|
|
|
|
|
213
|
15710
|
|
|
|
|
29819
|
substr $cell, 0, 0, chr (--$c % 26 + ord "A"); |
214
|
15710
|
|
|
|
|
28222
|
$c /= 26; |
215
|
|
|
|
|
|
|
} |
216
|
15693
|
|
|
|
|
41640
|
$cell; |
217
|
|
|
|
|
|
|
} # col2label |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# cr2cell (4, 18) => "D18" |
220
|
|
|
|
|
|
|
# No prototype to allow 'cr2cell (@rowcol)' |
221
|
|
|
|
|
|
|
sub cr2cell { |
222
|
15495
|
100
|
|
15495
|
1
|
186261
|
ref $_[0] eq __PACKAGE__ and shift; |
223
|
15495
|
|
|
|
|
24713
|
my ($c, $r) = @_; |
224
|
15495
|
100
|
100
|
|
|
71533
|
defined $c && defined $r && $c > 0 && $r > 0 or return ""; |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
225
|
15489
|
|
|
|
|
23358
|
col2label ($c) . $r; |
226
|
|
|
|
|
|
|
} # cr2cell |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# cell2cr ("D18") => (4, 18) |
229
|
|
|
|
|
|
|
sub cell2cr { |
230
|
604
|
100
|
|
604
|
1
|
585639
|
ref $_[0] eq __PACKAGE__ and shift; |
231
|
604
|
100
|
100
|
|
|
5510
|
my ($cc, $r) = (uc ($_[0]||"") =~ m/^([A-Z]+)([0-9]+)$/) or return (0, 0); |
232
|
600
|
|
|
|
|
1208
|
my $c = 0; |
233
|
600
|
|
|
|
|
2466
|
while ($cc =~ s/^([A-Z])//) { |
234
|
608
|
|
|
|
|
2421
|
$c = 26 * $c + 1 + ord ($1) - ord ("A"); |
235
|
|
|
|
|
|
|
} |
236
|
600
|
|
|
|
|
2043
|
($c, $r); |
237
|
|
|
|
|
|
|
} # cell2cr |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# my @row = cellrow ($book->[1], 1); |
240
|
|
|
|
|
|
|
# my @row = $book->cellrow (1, 1); |
241
|
|
|
|
|
|
|
sub cellrow { |
242
|
10
|
100
|
|
10
|
1
|
66
|
my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return; |
|
|
100
|
|
|
|
|
|
243
|
9
|
100
|
66
|
|
|
63
|
ref $sheet eq "HASH" && exists $sheet->{cell} or return; |
244
|
7
|
50
|
33
|
|
|
53
|
exists $sheet->{maxcol} && exists $sheet->{maxrow} or return; |
245
|
7
|
100
|
|
|
|
27
|
my $row = shift or return; |
246
|
6
|
100
|
100
|
|
|
62
|
$row > 0 && $row <= $sheet->{maxrow} or return; |
247
|
4
|
|
|
|
|
13
|
my $s = $sheet->{cell}; |
248
|
4
|
|
|
|
|
15
|
map { $s->[$_][$row] } 1..$sheet->{maxcol}; |
|
76
|
|
|
|
|
143
|
|
249
|
|
|
|
|
|
|
} # cellrow |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# my @row = row ($book->[1], 1); |
252
|
|
|
|
|
|
|
sub row { |
253
|
10
|
100
|
|
10
|
1
|
3241
|
my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return; |
|
|
100
|
|
|
|
|
|
254
|
9
|
100
|
66
|
|
|
67
|
ref $sheet eq "HASH" && exists $sheet->{cell} or return; |
255
|
7
|
50
|
33
|
|
|
36
|
exists $sheet->{maxcol} && exists $sheet->{maxrow} or return; |
256
|
7
|
100
|
|
|
|
24
|
my $row = shift or return; |
257
|
6
|
100
|
100
|
|
|
38
|
$row > 0 && $row <= $sheet->{maxrow} or return; |
258
|
4
|
|
|
|
|
20
|
map { $sheet->{cr2cell ($_, $row)} } 1..$sheet->{maxcol}; |
|
76
|
|
|
|
|
136
|
|
259
|
|
|
|
|
|
|
} # row |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# Convert {cell}'s [column][row] to a [row][column] list |
262
|
|
|
|
|
|
|
# my @rows = rows ($book->[1]); |
263
|
|
|
|
|
|
|
sub rows { |
264
|
33
|
50
|
|
33
|
1
|
19187
|
my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return; |
|
|
100
|
|
|
|
|
|
265
|
30
|
100
|
100
|
|
|
170
|
ref $sheet eq "HASH" && exists $sheet->{cell} or return; |
266
|
26
|
100
|
100
|
|
|
118
|
exists $sheet->{maxcol} && exists $sheet->{maxrow} or return; |
267
|
5
|
|
|
|
|
16
|
my $s = $sheet->{cell}; |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
map { |
270
|
20
|
|
|
|
|
35
|
my $r = $_; |
271
|
20
|
|
|
|
|
38
|
[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ]; |
|
80
|
|
|
|
|
204
|
|
272
|
5
|
|
|
|
|
20
|
} 1..$sheet->{maxrow}; |
273
|
|
|
|
|
|
|
} # rows |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
sub sheet { |
276
|
23
|
|
|
23
|
1
|
611
|
my ($book, $sheet) = @_; |
277
|
23
|
100
|
100
|
|
|
118
|
$book && $sheet or return; |
278
|
21
|
|
|
|
|
38
|
my $class = "Spreadsheet::Read::Sheet"; |
279
|
|
|
|
|
|
|
$sheet =~ m/^[0-9]+$/ && $sheet >= 1 && $sheet <= $book->[0]{sheets} and |
280
|
21
|
100
|
66
|
|
|
276
|
return bless $book->[$sheet] => $class; |
|
|
|
100
|
|
|
|
|
281
|
|
|
|
|
|
|
exists $book->[0]{sheet}{$sheet} and |
282
|
6
|
100
|
|
|
|
27
|
return bless $book->[$book->[0]{sheet}{$sheet}] => $class; |
283
|
5
|
|
|
|
|
13
|
foreach my $idx (1 .. $book->[0]{sheets}) { |
284
|
6
|
100
|
|
|
|
23
|
$book->[$idx]{label} eq $sheet and |
285
|
|
|
|
|
|
|
return bless $book->[$idx] => $class; |
286
|
|
|
|
|
|
|
} |
287
|
4
|
|
|
|
|
19
|
return; |
288
|
|
|
|
|
|
|
} # sheet |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# If option "clip" is set, remove the trailing rows and |
291
|
|
|
|
|
|
|
# columns in each sheet that contain no visible data |
292
|
|
|
|
|
|
|
sub _clipsheets { |
293
|
227
|
|
|
227
|
|
537
|
my ($opt, $ref) = @_; |
294
|
|
|
|
|
|
|
|
295
|
227
|
50
|
|
|
|
758
|
$ref->[0]{sheets} or return $ref; |
296
|
|
|
|
|
|
|
|
297
|
227
|
|
|
|
|
615
|
my ($rc, $cl) = ($opt->{rc}, $opt->{cells}); |
298
|
227
|
|
|
|
|
601
|
my ($oc, $os, $oa) = ($opt->{clip}, $opt->{strip}, $opt->{attr}); |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# Strip leading/trailing spaces |
301
|
227
|
100
|
100
|
|
|
949
|
if ($os || $oc) { |
302
|
212
|
|
|
|
|
646
|
foreach my $sheet (1 .. $ref->[0]{sheets}) { |
303
|
254
|
|
|
|
|
574
|
$ref->[$sheet]{indx} = $sheet; |
304
|
254
|
|
|
|
|
438
|
my $ss = $ref->[$sheet]; |
305
|
254
|
100
|
66
|
|
|
1064
|
$ss->{maxrow} && $ss->{maxcol} or next; |
306
|
247
|
|
|
|
|
540
|
my ($mc, $mr) = (0, 0); |
307
|
247
|
|
|
|
|
646
|
foreach my $row (1 .. $ss->{maxrow}) { |
308
|
1954
|
|
|
|
|
3257
|
foreach my $col (1 .. $ss->{maxcol}) { |
309
|
16636
|
100
|
|
|
|
25590
|
if ($rc) { |
310
|
14738
|
100
|
|
|
|
26748
|
defined $ss->{cell}[$col][$row] or next; |
311
|
5881
|
100
|
|
|
|
10614
|
$os & 2 and $ss->{cell}[$col][$row] =~ s/\s+$//; |
312
|
5881
|
100
|
|
|
|
9558
|
$os & 1 and $ss->{cell}[$col][$row] =~ s/^\s+//; |
313
|
5881
|
100
|
|
|
|
10479
|
if (length $ss->{cell}[$col][$row]) { |
314
|
4417
|
100
|
|
|
|
7282
|
$col > $mc and $mc = $col; |
315
|
4417
|
100
|
|
|
|
7639
|
$row > $mr and $mr = $row; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
} |
318
|
7779
|
100
|
|
|
|
12105
|
if ($cl) { |
319
|
7560
|
|
|
|
|
11401
|
my $cell = cr2cell ($col, $row); |
320
|
7560
|
100
|
|
|
|
15671
|
defined $ss->{$cell} or next; |
321
|
6233
|
100
|
|
|
|
11208
|
$os & 2 and $ss->{$cell} =~ s/\s+$//; |
322
|
6233
|
100
|
|
|
|
9893
|
$os & 1 and $ss->{$cell} =~ s/^\s+//; |
323
|
6233
|
100
|
|
|
|
11233
|
if (length $ss->{$cell}) { |
324
|
4757
|
100
|
|
|
|
7852
|
$col > $mc and $mc = $col; |
325
|
4757
|
100
|
|
|
|
8925
|
$row > $mr and $mr = $row; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
247
|
100
|
66
|
|
|
1341
|
$oc && ($mc < $ss->{maxcol} || $mr < $ss->{maxrow}) or next; |
|
|
|
66
|
|
|
|
|
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# Remove trailing empty columns |
334
|
106
|
|
|
|
|
252
|
foreach my $col (($mc + 1) .. $ss->{maxcol}) { |
335
|
243
|
100
|
|
|
|
564
|
$rc and undef $ss->{cell}[$col]; |
336
|
243
|
100
|
|
|
|
474
|
$oa and undef $ss->{attr}[$col]; |
337
|
243
|
100
|
|
|
|
444
|
$cl or next; |
338
|
193
|
|
|
|
|
308
|
my $c = col2label ($col); |
339
|
193
|
|
|
|
|
1425
|
delete $ss->{"$c$_"} for 1 .. $ss->{maxrow}; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# Remove trailing empty rows |
343
|
106
|
|
|
|
|
247
|
foreach my $row (($mr + 1) .. $ss->{maxrow}) { |
344
|
139
|
|
|
|
|
258
|
foreach my $col (1 .. $mc) { |
345
|
64
|
50
|
|
|
|
203
|
$cl and delete $ss->{cr2cell ($col, $row)}; |
346
|
64
|
50
|
|
|
|
168
|
$rc and undef $ss->{cell} [$col][$row]; |
347
|
64
|
50
|
|
|
|
133
|
$oa and undef $ss->{attr} [$col][$row]; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
106
|
|
|
|
|
314
|
($ss->{maxrow}, $ss->{maxcol}) = ($mr, $mc); |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
227
|
100
|
|
|
|
664
|
if ($opt->{pivot}) { |
356
|
1
|
|
|
|
|
5
|
foreach my $sheet (1 .. $ref->[0]{sheets}) { |
357
|
1
|
|
|
|
|
2
|
my $ss = $ref->[$sheet]; |
358
|
1
|
0
|
33
|
|
|
5
|
$ss->{maxrow} || $ss->{maxcol} or next; |
359
|
1
|
50
|
|
|
|
4
|
my $mx = $ss->{maxrow} > $ss->{maxcol} ? $ss->{maxrow} : $ss->{maxcol}; |
360
|
1
|
|
|
|
|
4
|
foreach my $row (2 .. $mx) { |
361
|
18
|
|
|
|
|
37
|
foreach my $col (1 .. ($row - 1)) { |
362
|
|
|
|
|
|
|
$opt->{rc} and |
363
|
|
|
|
|
|
|
($ss->{cell}[$col][$row], $ss->{cell}[$row][$col]) = |
364
|
171
|
50
|
|
|
|
569
|
($ss->{cell}[$row][$col], $ss->{cell}[$col][$row]); |
365
|
|
|
|
|
|
|
$opt->{cells} and |
366
|
|
|
|
|
|
|
($ss->{cr2cell ($col, $row)}, $ss->{cr2cell ($row, $col)}) = |
367
|
171
|
50
|
|
|
|
361
|
($ss->{cr2cell ($row, $col)}, $ss->{cr2cell ($col, $row)}); |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
} |
370
|
1
|
|
|
|
|
4
|
($ss->{maxcol}, $ss->{maxrow}) = ($ss->{maxrow}, $ss->{maxcol}); |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
227
|
|
|
|
|
5153
|
$ref; |
375
|
|
|
|
|
|
|
} # _clipsheets |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# Convert a single color (index) to a color |
378
|
|
|
|
|
|
|
sub _xls_color { |
379
|
1742
|
|
|
1742
|
|
20358
|
my $clr = shift; |
380
|
1742
|
100
|
|
|
|
3571
|
defined $clr or return undef; |
381
|
850
|
50
|
|
|
|
1553
|
$clr eq "#000000" and return undef; |
382
|
850
|
50
|
|
|
|
1620
|
$clr =~ m/^#[0-9a-fA-F]+$/ and return lc $clr; |
383
|
850
|
100
|
66
|
|
|
2527
|
$clr == 0 || $clr == 32767 and return undef; # Default fg color |
384
|
468
|
|
|
|
|
1097
|
return "#" . lc Spreadsheet::ParseExcel->ColorIdxToRGB ($clr); |
385
|
|
|
|
|
|
|
} # _xls_color |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# Convert a fill [ $pattern, $front_color, $back_color ] to a single background |
388
|
|
|
|
|
|
|
sub _xls_fill { |
389
|
1526
|
|
|
1526
|
|
2714
|
my ($p, $fg, $bg) = @_; |
390
|
1526
|
50
|
|
|
|
2711
|
defined $p or return undef; |
391
|
1526
|
50
|
|
|
|
2593
|
$p == 32767 and return undef; # Default fg color |
392
|
1526
|
100
|
100
|
|
|
16518
|
$p == 0 && !defined $bg and return undef; # No fill bg color |
393
|
634
|
100
|
|
|
|
1117
|
$p == 1 and return _xls_color ($fg); |
394
|
418
|
50
|
33
|
|
|
6189
|
$bg < 8 || $bg > 63 and return undef; # see Workbook.pm#106 |
395
|
0
|
|
|
|
|
0
|
return _xls_color ($bg); |
396
|
|
|
|
|
|
|
} # _xls_fill |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
sub ReadData { |
399
|
279
|
100
|
|
279
|
1
|
285361
|
my $txt = shift or return; |
400
|
|
|
|
|
|
|
|
401
|
276
|
|
|
|
|
551
|
my %opt; |
402
|
276
|
100
|
|
|
|
750
|
if (@_) { |
403
|
221
|
100
|
|
|
|
1080
|
if (ref $_[0] eq "HASH") { %opt = %{shift @_} } |
|
20
|
50
|
|
|
|
30
|
|
|
20
|
|
|
|
|
77
|
|
404
|
201
|
|
|
|
|
842
|
elsif (@_ % 2 == 0) { %opt = @_ } |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
276
|
100
|
|
|
|
1032
|
exists $opt{rc} or $opt{rc} = $def_opts{rc}; |
408
|
276
|
100
|
|
|
|
812
|
exists $opt{cells} or $opt{cells} = $def_opts{cells}; |
409
|
276
|
100
|
|
|
|
775
|
exists $opt{attr} or $opt{attr} = $def_opts{attr}; |
410
|
276
|
100
|
|
|
|
749
|
exists $opt{clip} or $opt{clip} = $opt{cells}; |
411
|
276
|
100
|
|
|
|
741
|
exists $opt{strip} or $opt{strip} = $def_opts{strip}; |
412
|
276
|
100
|
|
|
|
814
|
exists $opt{dtfmt} or $opt{dtfmt} = $def_opts{dtfmt}; |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# $debug = $opt{debug} // 0; |
415
|
276
|
100
|
|
|
|
737
|
$debug = defined $opt{debug} ? $opt{debug} : $def_opts{debug}; |
416
|
276
|
100
|
|
|
|
710
|
$debug > 4 and _dump (Options => \%opt); |
417
|
|
|
|
|
|
|
|
418
|
31
|
|
|
|
|
95
|
my %parser_opts = map { $_ => $opt{$_} } |
419
|
276
|
|
|
|
|
2541
|
grep { !exists $def_opts{$_} } |
|
1764
|
|
|
|
|
3503
|
|
420
|
|
|
|
|
|
|
keys %opt; |
421
|
|
|
|
|
|
|
|
422
|
276
|
|
|
|
|
1036
|
my $_parser = _parser ($opt{parser}); |
423
|
|
|
|
|
|
|
|
424
|
276
|
100
|
|
|
|
1066
|
my $io_ref = ref ($txt) =~ m/GLOB|IO/ ? $txt : undef; |
425
|
|
|
|
|
|
|
my $io_fil = $io_ref ? 0 : $txt =~ m/\0/ ? 0 |
426
|
53
|
100
|
|
53
|
|
111422
|
: do { no warnings "newline"; -f $txt }; |
|
53
|
100
|
|
|
|
174
|
|
|
53
|
|
|
|
|
264656
|
|
|
276
|
|
|
|
|
1023
|
|
|
255
|
|
|
|
|
5495
|
|
427
|
276
|
100
|
100
|
|
|
1722
|
my $io_txt = $io_ref || $io_fil ? 0 : 1; |
428
|
|
|
|
|
|
|
|
429
|
276
|
100
|
100
|
|
|
3194
|
$io_fil && ! -s $txt and return; |
430
|
265
|
100
|
100
|
|
|
1450
|
$io_ref && eof ($txt) and return; |
431
|
|
|
|
|
|
|
|
432
|
260
|
100
|
100
|
|
|
1863
|
if ($opt{parser} ? $_parser eq "csv" : ($io_fil && $txt =~ m/\.(csv)$/i)) { |
|
|
100
|
|
|
|
|
|
433
|
106
|
50
|
|
|
|
395
|
$can{csv} or croak "CSV parser not installed"; |
434
|
|
|
|
|
|
|
|
435
|
106
|
100
|
|
|
|
439
|
my $label = defined $opt{label} ? $opt{label} : $io_fil ? $txt : "IO"; |
|
|
100
|
|
|
|
|
|
436
|
|
|
|
|
|
|
|
437
|
106
|
50
|
|
|
|
366
|
$debug and print STDERR "Opening CSV $label using $can{csv}-", $can{csv}->VERSION, "\n"; |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
my @data = ( |
440
|
|
|
|
|
|
|
{ type => "csv", |
441
|
|
|
|
|
|
|
parser => $can{csv}, |
442
|
|
|
|
|
|
|
version => $can{csv}->VERSION, |
443
|
|
|
|
|
|
|
parsers => [ { |
444
|
|
|
|
|
|
|
type => "csv", |
445
|
|
|
|
|
|
|
parser => $can{csv}, |
446
|
|
|
|
|
|
|
version => $can{csv}->VERSION, |
447
|
106
|
|
|
|
|
3064
|
}], |
448
|
|
|
|
|
|
|
error => undef, |
449
|
|
|
|
|
|
|
quote => '"', |
450
|
|
|
|
|
|
|
sepchar => ',', |
451
|
|
|
|
|
|
|
sheets => 1, |
452
|
|
|
|
|
|
|
sheet => { $label => 1 }, |
453
|
|
|
|
|
|
|
}, |
454
|
|
|
|
|
|
|
{ parser => 0, |
455
|
|
|
|
|
|
|
label => $label, |
456
|
|
|
|
|
|
|
maxrow => 0, |
457
|
|
|
|
|
|
|
maxcol => 0, |
458
|
|
|
|
|
|
|
cell => [], |
459
|
|
|
|
|
|
|
attr => [], |
460
|
|
|
|
|
|
|
merged => [], |
461
|
|
|
|
|
|
|
active => 1, |
462
|
|
|
|
|
|
|
}, |
463
|
|
|
|
|
|
|
); |
464
|
|
|
|
|
|
|
|
465
|
106
|
|
|
|
|
540
|
my ($sep, $quo, $in) = (",", '"'); |
466
|
106
|
100
|
|
|
|
325
|
defined $opt{sep} and $sep = $opt{sep}; |
467
|
106
|
100
|
|
|
|
244
|
defined $opt{quote} and $quo = $opt{quote}; |
468
|
106
|
50
|
|
|
|
264
|
$debug > 8 and _dump (debug => { |
469
|
|
|
|
|
|
|
data => \@data, txt => $txt, io_ref => $io_ref, io_fil => $io_fil }); |
470
|
106
|
100
|
|
|
|
254
|
if ($io_fil) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
471
|
94
|
100
|
100
|
|
|
329
|
unless (defined $opt{quote} && defined $opt{sep}) { |
472
|
87
|
50
|
|
|
|
3393
|
open $in, "<", $txt or return; |
473
|
87
|
|
|
|
|
1646
|
my $l1 = <$in>; |
474
|
|
|
|
|
|
|
|
475
|
87
|
100
|
|
|
|
419
|
$quo = defined $opt{quote} ? $opt{quote} : '"'; |
476
|
|
|
|
|
|
|
$sep = # If explicitly set, use it |
477
|
|
|
|
|
|
|
defined $opt{sep} ? $opt{sep} : |
478
|
|
|
|
|
|
|
# otherwise start auto-detect with quoted strings |
479
|
87
|
100
|
|
|
|
786
|
$l1 =~ m/["0-9];["0-9;]/ ? ";" : |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
480
|
|
|
|
|
|
|
$l1 =~ m/["0-9],["0-9,]/ ? "," : |
481
|
|
|
|
|
|
|
$l1 =~ m/["0-9]\t["0-9,]/ ? "\t" : |
482
|
|
|
|
|
|
|
# If neither, then for unquoted strings |
483
|
|
|
|
|
|
|
$l1 =~ m/\w;[\w;]/ ? ";" : |
484
|
|
|
|
|
|
|
$l1 =~ m/\w,[\w,]/ ? "," : |
485
|
|
|
|
|
|
|
$l1 =~ m/\w\t[\w,]/ ? "\t" : |
486
|
|
|
|
|
|
|
"," ; |
487
|
87
|
|
|
|
|
1243
|
close $in; |
488
|
|
|
|
|
|
|
} |
489
|
94
|
50
|
|
|
|
2630
|
open $in, "<", $txt or return; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
elsif ($io_ref) { |
492
|
10
|
|
|
|
|
22
|
$in = $txt; |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
elsif (ref $txt eq "SCALAR") { |
495
|
1
|
50
|
|
|
|
18
|
open $in, "<", $txt or croak "Cannot open input: $!"; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
elsif ($txt =~ m/[\r\n,;]/) { |
498
|
1
|
50
|
|
2
|
|
59
|
open $in, "<", \$txt or croak "Cannot open input: $!"; |
|
2
|
|
|
|
|
21
|
|
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
18
|
|
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
else { |
501
|
0
|
|
|
|
|
0
|
warn "Input type ", ref $txt, |
502
|
|
|
|
|
|
|
" might not be supported. Please file a ticket\n"; |
503
|
0
|
|
|
|
|
0
|
$in = $txt; # Now pray ... |
504
|
|
|
|
|
|
|
} |
505
|
106
|
50
|
|
|
|
1481
|
$debug > 1 and print STDERR "CSV sep_char '$sep', quote_char '$quo'\n"; |
506
|
|
|
|
|
|
|
my $csv = $can{csv}->new ({ |
507
|
|
|
|
|
|
|
%parser_opts, |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
sep_char => ($data[0]{sepchar} = $sep), |
510
|
106
|
50
|
|
|
|
1339
|
quote_char => ($data[0]{quote} = $quo), |
511
|
|
|
|
|
|
|
keep_meta_info => 1, |
512
|
|
|
|
|
|
|
binary => 1, |
513
|
|
|
|
|
|
|
auto_diag => 1, |
514
|
|
|
|
|
|
|
}) or croak "Cannot create a csv ('$sep', '$quo') parser!"; |
515
|
|
|
|
|
|
|
|
516
|
106
|
|
|
|
|
22143
|
while (my $row = $csv->getline ($in)) { |
517
|
413
|
50
|
|
|
|
70327
|
my @row = @$row or last; |
518
|
|
|
|
|
|
|
|
519
|
413
|
|
|
|
|
886
|
my $r = ++$data[1]{maxrow}; |
520
|
413
|
100
|
|
|
|
1020
|
@row > $data[1]{maxcol} and $data[1]{maxcol} = @row; |
521
|
413
|
|
|
|
|
1097
|
foreach my $c (0 .. $#row) { |
522
|
2427
|
|
|
|
|
3329
|
my $val = $row[$c]; |
523
|
2427
|
|
|
|
|
4225
|
my $cell = cr2cell ($c + 1, $r); |
524
|
2427
|
100
|
|
|
|
6746
|
$opt{rc} and $data[1]{cell}[$c + 1][$r] = $val; |
525
|
2427
|
100
|
|
|
|
6095
|
$opt{cells} and $data[1]{$cell} = $val; |
526
|
2427
|
50
|
|
|
|
12137
|
$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr }; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
} |
529
|
106
|
50
|
|
|
|
14130
|
$csv->eof () or $data[0]{error} = [ $csv->error_diag ]; |
530
|
106
|
|
|
|
|
1593
|
close $in; |
531
|
|
|
|
|
|
|
|
532
|
106
|
|
|
|
|
315
|
for (@{$data[1]{cell}}) { |
|
106
|
|
|
|
|
517
|
|
533
|
734
|
100
|
|
|
|
1602
|
defined or $_ = []; |
534
|
|
|
|
|
|
|
} |
535
|
106
|
|
|
|
|
482
|
return _clipsheets \%opt, [ @data ]; |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
154
|
100
|
|
|
|
411
|
if ($io_txt) { # && $_parser !~ m/^xlsx?$/) { |
539
|
29
|
100
|
66
|
|
|
296
|
if ( # /etc/magic: Microsoft Office Document |
|
|
50
|
66
|
|
|
|
|
540
|
|
|
|
|
|
|
$txt =~ m{\A(\376\067\0\043 |
541
|
|
|
|
|
|
|
|\320\317\021\340\241\261\032\341 |
542
|
|
|
|
|
|
|
|\333\245-\0\0\0)}x |
543
|
|
|
|
|
|
|
# /usr/share/misc/magic |
544
|
|
|
|
|
|
|
|| $txt =~ m{\A.{2080}Microsoft Excel 5.0 Worksheet} |
545
|
|
|
|
|
|
|
|| $txt =~ m{\A\x09\x04\x06\x00\x00\x00\x10\x00} |
546
|
|
|
|
|
|
|
) { |
547
|
1
|
50
|
|
|
|
5
|
$can{xls} or croak "Spreadsheet::ParseExcel not installed"; |
548
|
1
|
|
|
|
|
2
|
my $tmpfile; |
549
|
1
|
50
|
|
|
|
4
|
if ($can{ios}) { # Do not use a temp file if IO::Scalar is available |
550
|
1
|
|
|
|
|
3
|
$tmpfile = \$txt; |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
else { |
553
|
0
|
|
|
|
|
0
|
$tmpfile = File::Temp->new (SUFFIX => ".xls", UNLINK => 1); |
554
|
0
|
|
|
|
|
0
|
binmode $tmpfile; |
555
|
0
|
|
|
|
|
0
|
print $tmpfile $txt; |
556
|
0
|
|
|
|
|
0
|
close $tmpfile; |
557
|
|
|
|
|
|
|
} |
558
|
1
|
50
|
|
|
|
53
|
open $io_ref, "<", $tmpfile or return; |
559
|
1
|
|
|
|
|
981
|
$io_txt = 0; |
560
|
1
|
|
|
|
|
6
|
$_parser = _parser ($opt{parser} = "xls"); |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
elsif ( # /usr/share/misc/magic |
563
|
|
|
|
|
|
|
$txt =~ m{\APK\003\004.{4,30}(?:\[Content_Types\]\.xml|_rels/\.rels)} |
564
|
|
|
|
|
|
|
) { |
565
|
0
|
0
|
|
|
|
0
|
$can{xlsx} or croak "XLSX parser not installed"; |
566
|
0
|
|
|
|
|
0
|
my $tmpfile; |
567
|
0
|
0
|
|
|
|
0
|
if ($can{ios}) { # Do not use a temp file if IO::Scalar is available |
568
|
0
|
|
|
|
|
0
|
$tmpfile = \$txt; |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
else { |
571
|
0
|
|
|
|
|
0
|
$tmpfile = File::Temp->new (SUFFIX => ".xlsx", UNLINK => 1); |
572
|
0
|
|
|
|
|
0
|
binmode $tmpfile; |
573
|
0
|
|
|
|
|
0
|
print $tmpfile $txt; |
574
|
0
|
|
|
|
|
0
|
close $tmpfile; |
575
|
|
|
|
|
|
|
} |
576
|
0
|
0
|
|
|
|
0
|
open $io_ref, "<", $tmpfile or return; |
577
|
0
|
|
|
|
|
0
|
$io_txt = 0; |
578
|
0
|
|
|
|
|
0
|
$_parser = _parser ($opt{parser} = "xlsx"); |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
} |
581
|
154
|
100
|
66
|
|
|
1500
|
if ($opt{parser} ? $_parser =~ m/^xlsx?$/ |
|
|
100
|
|
|
|
|
|
582
|
|
|
|
|
|
|
: ($io_fil && $txt =~ m/\.(xlsx?)$/i && ($_parser = $1))) { |
583
|
75
|
100
|
|
|
|
339
|
my $parse_type = $_parser =~ m/x$/i ? "XLSX" : "XLS"; |
584
|
75
|
50
|
|
|
|
329
|
my $parser = $can{lc $parse_type} or |
585
|
|
|
|
|
|
|
croak "Parser for $parse_type is not installed"; |
586
|
|
|
|
|
|
|
$debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt, |
587
|
75
|
50
|
|
|
|
350
|
" using $parser-", $can{lc $parse_type}->VERSION, "\n"; |
|
|
100
|
|
|
|
|
|
588
|
75
|
50
|
|
|
|
241
|
$opt{passwd} and $parser_opts{Password} = $opt{passwd}; |
589
|
75
|
|
|
|
|
194
|
my $oBook = eval { |
590
|
|
|
|
|
|
|
$io_ref |
591
|
|
|
|
|
|
|
? $parse_type eq "XLSX" |
592
|
|
|
|
|
|
|
? $can{xlsx} =~ m/::XLSX$/ |
593
|
|
|
|
|
|
|
? $parser->new ($io_ref) |
594
|
|
|
|
|
|
|
: $parser->new (%parser_opts)->parse ($io_ref) |
595
|
|
|
|
|
|
|
: $parser->new (%parser_opts)->Parse ($io_ref) |
596
|
|
|
|
|
|
|
: $parse_type eq "XLSX" |
597
|
75
|
0
|
|
|
|
854
|
? $can{xlsx} =~ m/::XLSX$/ |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
598
|
|
|
|
|
|
|
? $parser->new ($txt) |
599
|
|
|
|
|
|
|
: $parser->new (%parser_opts)->parse ($txt) |
600
|
|
|
|
|
|
|
: $parser->new (%parser_opts)->Parse ($txt); |
601
|
|
|
|
|
|
|
}; |
602
|
75
|
50
|
|
|
|
2252417
|
unless ($oBook) { |
603
|
|
|
|
|
|
|
# cleanup will fail on folders with spaces. |
604
|
0
|
|
|
|
|
0
|
(my $msg = $@) =~ s/ at \S+ line \d+.*//s; |
605
|
0
|
|
|
|
|
0
|
croak "$parse_type parser cannot parse data: $msg"; |
606
|
|
|
|
|
|
|
} |
607
|
75
|
50
|
|
|
|
318
|
$debug > 8 and _dump (oBook => $oBook); |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
# WorkBook keys: |
610
|
|
|
|
|
|
|
# aColor _CurSheet Format SheetCount |
611
|
|
|
|
|
|
|
# ActiveSheet _CurSheet_ FormatStr _skip_chart |
612
|
|
|
|
|
|
|
# Author File NotSetCell _string_contin |
613
|
|
|
|
|
|
|
# BIFFVersion Flg1904 Object Version |
614
|
|
|
|
|
|
|
# _buffer FmtClass PkgStr Worksheet |
615
|
|
|
|
|
|
|
# CellHandler Font _previous_info |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
my @data = ( { |
618
|
|
|
|
|
|
|
type => lc $parse_type, |
619
|
|
|
|
|
|
|
parser => $can{lc $parse_type}, |
620
|
|
|
|
|
|
|
version => $can{lc $parse_type}->VERSION, |
621
|
|
|
|
|
|
|
parsers => [{ |
622
|
|
|
|
|
|
|
type => lc $parse_type, |
623
|
|
|
|
|
|
|
parser => $can{lc $parse_type}, |
624
|
|
|
|
|
|
|
version => $can{lc $parse_type}->VERSION, |
625
|
|
|
|
|
|
|
}], |
626
|
|
|
|
|
|
|
error => undef, |
627
|
75
|
|
50
|
|
|
2305
|
sheets => $oBook->{SheetCount} || 0, |
628
|
|
|
|
|
|
|
sheet => {}, |
629
|
|
|
|
|
|
|
} ); |
630
|
|
|
|
|
|
|
# $debug and $data[0]{_parser} = $oBook; |
631
|
|
|
|
|
|
|
# Overrule the default date format strings |
632
|
|
|
|
|
|
|
my %def_fmt = ( |
633
|
|
|
|
|
|
|
0x0E => lc $opt{dtfmt}, # m-d-yy |
634
|
75
|
|
|
|
|
636
|
0x0F => "d-mmm-yyyy", # d-mmm-yy |
635
|
|
|
|
|
|
|
0x11 => "mmm-yyyy", # mmm-yy |
636
|
|
|
|
|
|
|
0x16 => "yyyy-mm-dd hh:mm", # m-d-yy h:mm |
637
|
|
|
|
|
|
|
); |
638
|
75
|
|
|
|
|
538
|
$oBook->{FormatStr}{$_} = $def_fmt{$_} for keys %def_fmt; |
639
|
|
|
|
|
|
|
my $oFmt = $parse_type eq "XLSX" |
640
|
75
|
50
|
|
|
|
742
|
? $can{xlsx} =~ m/::XLSX$/ |
|
|
100
|
|
|
|
|
|
641
|
|
|
|
|
|
|
? Spreadsheet::XLSX::Fmt2007->new |
642
|
|
|
|
|
|
|
: Spreadsheet::ParseExcel::FmtDefault->new |
643
|
|
|
|
|
|
|
: Spreadsheet::ParseExcel::FmtDefault->new; |
644
|
|
|
|
|
|
|
|
645
|
75
|
100
|
|
|
|
846
|
$debug and print STDERR "\t$data[0]{sheets} sheets\n"; |
646
|
|
|
|
|
|
|
my $active_sheet = $oBook->get_active_sheet |
647
|
|
|
|
|
|
|
|| $oBook->{ActiveSheet} |
648
|
75
|
|
33
|
|
|
376
|
|| $oBook->{SelectedSheet}; |
649
|
75
|
|
|
|
|
826
|
my $current_sheet = 0; |
650
|
75
|
|
|
|
|
146
|
foreach my $oWkS (@{$oBook->{Worksheet}}) { |
|
75
|
|
|
|
|
232
|
|
651
|
121
|
|
|
|
|
192
|
$current_sheet++; |
652
|
121
|
100
|
100
|
|
|
676
|
$opt{clip} and !defined $oWkS->{Cells} and next; # Skip empty sheets |
653
|
|
|
|
|
|
|
my %sheet = ( |
654
|
|
|
|
|
|
|
parser => 0, |
655
|
|
|
|
|
|
|
label => $oWkS->{Name}, |
656
|
115
|
|
|
|
|
762
|
maxrow => 0, |
657
|
|
|
|
|
|
|
maxcol => 0, |
658
|
|
|
|
|
|
|
cell => [], |
659
|
|
|
|
|
|
|
attr => [], |
660
|
|
|
|
|
|
|
merged => [], |
661
|
|
|
|
|
|
|
active => 0, |
662
|
|
|
|
|
|
|
); |
663
|
|
|
|
|
|
|
# $debug and $sheet{_parser} = $oWkS; |
664
|
115
|
50
|
|
|
|
401
|
defined $sheet{label} or $sheet{label} = "-- unlabeled --"; |
665
|
115
|
100
|
|
|
|
404
|
exists $oWkS->{MinRow} and $sheet{minrow} = $oWkS->{MinRow} + 1; |
666
|
115
|
100
|
|
|
|
346
|
exists $oWkS->{MaxRow} and $sheet{maxrow} = $oWkS->{MaxRow} + 1; |
667
|
115
|
100
|
|
|
|
420
|
exists $oWkS->{MinCol} and $sheet{mincol} = $oWkS->{MinCol} + 1; |
668
|
115
|
100
|
|
|
|
310
|
exists $oWkS->{MaxCol} and $sheet{maxcol} = $oWkS->{MaxCol} + 1; |
669
|
|
|
|
|
|
|
$sheet{merged} = [ |
670
|
12
|
|
|
|
|
35
|
map { $_->[0] } |
671
|
4
|
|
|
|
|
30
|
sort { $a->[1] cmp $b->[1] } |
672
|
12
|
|
|
|
|
106
|
map {[ $_, pack "NNNN", @$_ ]} |
673
|
12
|
|
|
|
|
90
|
map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]} |
|
48
|
|
|
|
|
133
|
|
|
12
|
|
|
|
|
38
|
|
674
|
115
|
100
|
|
|
|
235
|
@{$oWkS->get_merged_areas || []}]; |
|
115
|
|
|
|
|
460
|
|
675
|
115
|
|
|
|
|
1135
|
my $sheet_idx = 1 + @data; |
676
|
115
|
100
|
|
|
|
345
|
$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n"; |
677
|
115
|
100
|
|
|
|
301
|
if (defined $active_sheet) { |
678
|
|
|
|
|
|
|
# _SheetNo is 0-based |
679
|
7
|
50
|
|
|
|
23
|
my $sheet_no = defined $oWkS->{_SheetNo} ? $oWkS->{_SheetNo} : $current_sheet - 1; |
680
|
7
|
100
|
|
|
|
21
|
$sheet_no eq $active_sheet and $sheet{active} = 1; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
# Sheet keys: |
683
|
|
|
|
|
|
|
# _Book FooterMargin MinCol RightMargin |
684
|
|
|
|
|
|
|
# BottomMargin FooterMergin MinRow RightMergin |
685
|
|
|
|
|
|
|
# BottomMergin HCenter Name RowHeight |
686
|
|
|
|
|
|
|
# Cells Header NoColor RowHidden |
687
|
|
|
|
|
|
|
# ColFmtNo HeaderMargin NoOrient Scale |
688
|
|
|
|
|
|
|
# ColHidden HeaderMergin NoPls SheetHidden |
689
|
|
|
|
|
|
|
# ColWidth Kind Notes _SheetNo |
690
|
|
|
|
|
|
|
# Copis Landscape PageFit SheetType |
691
|
|
|
|
|
|
|
# DefColWidth LeftMargin PageStart SheetVersion |
692
|
|
|
|
|
|
|
# DefRowHeight LeftMergin PaperSize TopMargin |
693
|
|
|
|
|
|
|
# Draft LeftToRight _Pos TopMergin |
694
|
|
|
|
|
|
|
# FitHeight MaxCol PrintGrid UsePage |
695
|
|
|
|
|
|
|
# FitWidth MaxRow PrintHeaders VCenter |
696
|
|
|
|
|
|
|
# Footer MergedArea Res VRes |
697
|
115
|
100
|
|
|
|
363
|
if (exists $oWkS->{MinRow}) { |
698
|
110
|
|
50
|
|
|
452
|
my $hiddenRows = $oWkS->{RowHidden} || []; |
699
|
110
|
|
50
|
|
|
434
|
my $hiddenCols = $oWkS->{ColHidden} || []; |
700
|
110
|
100
|
|
|
|
310
|
if ($opt{clip}) { |
701
|
91
|
|
|
|
|
227
|
my ($mr, $mc) = (-1, -1); |
702
|
91
|
|
|
|
|
286
|
foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) { |
703
|
516
|
|
|
|
|
923
|
foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) { |
704
|
2908
|
100
|
|
|
|
5507
|
my $oWkC = $oWkS->{Cells}[$r][$c] or next; |
705
|
1701
|
50
|
|
|
|
3190
|
defined (my $val = $oWkC->{Val}) or next; |
706
|
1701
|
100
|
|
|
|
3027
|
$val eq "" and next; |
707
|
1679
|
100
|
|
|
|
2759
|
$r > $mr and $mr = $r; |
708
|
1679
|
100
|
|
|
|
3003
|
$c > $mc and $mc = $c; |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
} |
711
|
91
|
|
|
|
|
318
|
($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1); |
712
|
|
|
|
|
|
|
} |
713
|
110
|
|
|
|
|
284
|
foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) { |
714
|
597
|
|
|
|
|
1157
|
foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) { |
715
|
3132
|
100
|
|
|
|
12173
|
my $oWkC = $oWkS->{Cells}[$r][$c] or next; |
716
|
|
|
|
|
|
|
#defined (my $val = $oWkC->{Val}) or next; |
717
|
1802
|
|
|
|
|
2785
|
my $val = $oWkC->{Val}; |
718
|
1802
|
50
|
33
|
|
|
6026
|
if (defined $val and my $enc = $oWkC->{Code}) { |
719
|
0
|
0
|
|
|
|
0
|
$enc eq "ucs2" and $val = decode ("utf-16be", $val); |
720
|
|
|
|
|
|
|
} |
721
|
1802
|
|
|
|
|
3920
|
my $cell = cr2cell ($c + 1, $r + 1); |
722
|
1802
|
100
|
|
|
|
5439
|
$opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original |
723
|
|
|
|
|
|
|
|
724
|
1802
|
|
|
|
|
2461
|
my $fmt; |
725
|
1802
|
|
|
|
|
2713
|
my $FmT = $oWkC->{Format}; |
726
|
1802
|
100
|
|
|
|
3091
|
if ($FmT) { |
727
|
952
|
100
|
|
|
|
1911
|
unless (ref $FmT) { |
728
|
130
|
|
|
|
|
176
|
$fmt = $FmT; |
729
|
130
|
|
|
|
|
211
|
$FmT = {}; |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
else { |
733
|
850
|
|
|
|
|
1697
|
$FmT = {}; |
734
|
|
|
|
|
|
|
} |
735
|
1802
|
|
|
|
|
3104
|
foreach my $attr (qw( AlignH AlignV FmtIdx Hidden Lock |
736
|
|
|
|
|
|
|
Wrap )) { |
737
|
10812
|
100
|
|
|
|
20807
|
exists $FmT->{$attr} or $FmT->{$attr} = 0; |
738
|
|
|
|
|
|
|
} |
739
|
1802
|
100
|
|
|
|
3602
|
exists $FmT->{Fill} or $FmT->{Fill} = [ 0 ]; |
740
|
1802
|
100
|
|
|
|
3941
|
exists $FmT->{Font} or $FmT->{Font} = undef; |
741
|
|
|
|
|
|
|
|
742
|
1802
|
100
|
|
|
|
3285
|
unless (defined $fmt) { |
743
|
|
|
|
|
|
|
$fmt = $FmT->{FmtIdx} |
744
|
|
|
|
|
|
|
? $oBook->{FormatStr}{$FmT->{FmtIdx}} |
745
|
1672
|
100
|
|
|
|
3099
|
: undef; |
746
|
|
|
|
|
|
|
} |
747
|
1802
|
100
|
|
|
|
3476
|
if ($oWkC->{Type} eq "Numeric") { |
748
|
|
|
|
|
|
|
# Fixed in 0.33 and up |
749
|
|
|
|
|
|
|
# see Spreadsheet/ParseExcel/FmtDefault.pm |
750
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x0e || |
751
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x0f || |
752
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x10 || |
753
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x11 || |
754
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x16 || |
755
|
|
|
|
|
|
|
(defined $fmt && $fmt =~ m{^[dmy][-\\/dmy]*$}) and |
756
|
370
|
100
|
33
|
|
|
3261
|
$oWkC->{Type} = "Date"; |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
757
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x09 || |
758
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x0a || |
759
|
|
|
|
|
|
|
(defined $fmt && $fmt =~ m{^0+\.0+%$}) and |
760
|
370
|
100
|
100
|
|
|
2000
|
$oWkC->{Type} = "Percentage"; |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
761
|
|
|
|
|
|
|
} |
762
|
1802
|
100
|
|
|
|
3463
|
defined $fmt and $fmt =~ s/\\//g; |
763
|
|
|
|
|
|
|
$opt{cells} and # Formatted value |
764
|
|
|
|
|
|
|
$sheet{$cell} = defined $val ? $FmT && exists $def_fmt{$FmT->{FmtIdx}} |
765
|
1802
|
100
|
66
|
|
|
9742
|
? $oFmt->ValFmt ($oWkC, $oBook) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
766
|
|
|
|
|
|
|
: $oWkC->Value : undef; |
767
|
1802
|
100
|
|
|
|
15126
|
if ($opt{attr}) { |
768
|
1526
|
|
|
|
|
2210
|
my $FnT = $FmT->{Font}; |
769
|
|
|
|
|
|
|
my $fmi = $FmT->{FmtIdx} |
770
|
|
|
|
|
|
|
? $oBook->{FormatStr}{$FmT->{FmtIdx}} |
771
|
1526
|
100
|
|
|
|
2594
|
: undef; |
772
|
1526
|
100
|
|
|
|
2717
|
$fmi and $fmi =~ s/\\//g; |
773
|
|
|
|
|
|
|
$sheet{attr}[$c + 1][$r + 1] = { |
774
|
|
|
|
|
|
|
@def_attr, |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
type => lc $oWkC->{Type}, |
777
|
|
|
|
|
|
|
enc => $oWkC->{Code}, |
778
|
|
|
|
|
|
|
merged => (defined $oWkC->{Merged} ? $oWkC->{Merged} : $oWkC->is_merged) || 0, |
779
|
|
|
|
|
|
|
hidden => ($hiddenRows->[$r] || $hiddenCols->[$c] ? 1 : |
780
|
|
|
|
|
|
|
defined $oWkC->{Hidden} ? $oWkC->{Hidden} : $FmT->{Hidden}) || 0, |
781
|
|
|
|
|
|
|
locked => $FmT->{Lock} || 0, |
782
|
|
|
|
|
|
|
format => $fmi, |
783
|
|
|
|
|
|
|
halign => [ undef, qw( left center right |
784
|
|
|
|
|
|
|
fill justify ), undef, |
785
|
|
|
|
|
|
|
"equal_space" ]->[$FmT->{AlignH}], |
786
|
|
|
|
|
|
|
valign => [ qw( top center bottom justify |
787
|
|
|
|
|
|
|
equal_space )]->[$FmT->{AlignV}], |
788
|
|
|
|
|
|
|
wrap => $FmT->{Wrap}, |
789
|
|
|
|
|
|
|
font => $FnT->{Name}, |
790
|
|
|
|
|
|
|
size => $FnT->{Height}, |
791
|
|
|
|
|
|
|
bold => $FnT->{Bold}, |
792
|
|
|
|
|
|
|
italic => $FnT->{Italic}, |
793
|
|
|
|
|
|
|
uline => $FnT->{Underline}, |
794
|
|
|
|
|
|
|
fgcolor => _xls_color ($FnT->{Color}), |
795
|
1526
|
|
|
|
|
6032
|
bgcolor => _xls_fill (@{$FmT->{Fill}}), |
796
|
|
|
|
|
|
|
formula => $oWkC->{Formula}, |
797
|
1526
|
|
100
|
|
|
4827
|
}; |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
798
|
|
|
|
|
|
|
#_dump "cell", $sheet{attr}[$c + 1][$r + 1]; |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
} |
801
|
|
|
|
|
|
|
} |
802
|
|
|
|
|
|
|
} |
803
|
115
|
|
|
|
|
206
|
for (@{$sheet{cell}}) { |
|
115
|
|
|
|
|
353
|
|
804
|
409
|
100
|
|
|
|
1072
|
defined or $_ = []; |
805
|
|
|
|
|
|
|
} |
806
|
115
|
|
|
|
|
1919
|
push @data, { %sheet }; |
807
|
|
|
|
|
|
|
# $data[0]{sheets}++; |
808
|
115
|
50
|
|
|
|
534
|
if ($sheet{label} eq "-- unlabeled --") { |
809
|
0
|
|
|
|
|
0
|
$sheet{label} = ""; |
810
|
|
|
|
|
|
|
} |
811
|
|
|
|
|
|
|
else { |
812
|
115
|
|
|
|
|
739
|
$data[0]{sheet}{$sheet{label}} = $#data; |
813
|
|
|
|
|
|
|
} |
814
|
|
|
|
|
|
|
} |
815
|
75
|
|
|
|
|
361
|
return _clipsheets \%opt, [ @data ]; |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
|
818
|
79
|
100
|
|
|
|
438
|
if ($opt{parser} ? _parser ($opt{parser}) eq "sc" |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
819
|
|
|
|
|
|
|
: $io_fil |
820
|
|
|
|
|
|
|
? $txt =~ m/\.sc$/ |
821
|
|
|
|
|
|
|
: $txt =~ m/^# .*SquirrelCalc/) { |
822
|
46
|
100
|
|
|
|
113
|
if ($io_ref) { |
|
|
100
|
|
|
|
|
|
823
|
2
|
|
|
|
|
8
|
local $/; |
824
|
2
|
|
|
|
|
39
|
my $x = <$txt>; |
825
|
2
|
|
|
|
|
11
|
$txt = $x; |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
elsif ($io_fil) { |
828
|
42
|
|
|
|
|
168
|
local $/; |
829
|
42
|
50
|
|
|
|
1449
|
open my $sc, "<", $txt or return; |
830
|
42
|
|
|
|
|
1623
|
$txt = <$sc>; |
831
|
42
|
|
|
|
|
623
|
close $sc; |
832
|
|
|
|
|
|
|
} |
833
|
46
|
50
|
|
|
|
282
|
$txt =~ m/\S/ or return; |
834
|
46
|
50
|
|
|
|
148
|
my $label = defined $opt{label} ? $opt{label} : "sheet"; |
835
|
46
|
|
|
|
|
571
|
my @data = ( |
836
|
|
|
|
|
|
|
{ type => "sc", |
837
|
|
|
|
|
|
|
parser => "Spreadsheet::Read", |
838
|
|
|
|
|
|
|
version => $VERSION, |
839
|
|
|
|
|
|
|
parsers => [{ |
840
|
|
|
|
|
|
|
type => "sc", |
841
|
|
|
|
|
|
|
parser => "Spreadsheet::Read", |
842
|
|
|
|
|
|
|
version => $VERSION, |
843
|
|
|
|
|
|
|
}], |
844
|
|
|
|
|
|
|
error => undef, |
845
|
|
|
|
|
|
|
sheets => 1, |
846
|
|
|
|
|
|
|
sheet => { $label => 1 }, |
847
|
|
|
|
|
|
|
}, |
848
|
|
|
|
|
|
|
{ parser => 0, |
849
|
|
|
|
|
|
|
label => $label, |
850
|
|
|
|
|
|
|
maxrow => 0, |
851
|
|
|
|
|
|
|
maxcol => 0, |
852
|
|
|
|
|
|
|
cell => [], |
853
|
|
|
|
|
|
|
attr => [], |
854
|
|
|
|
|
|
|
merged => [], |
855
|
|
|
|
|
|
|
active => 1, |
856
|
|
|
|
|
|
|
}, |
857
|
|
|
|
|
|
|
); |
858
|
|
|
|
|
|
|
|
859
|
46
|
|
|
|
|
9680
|
for (split m/\s*[\r\n]\s*/, $txt) { |
860
|
13478
|
100
|
|
|
|
21993
|
if (m/^dimension.*of ([0-9]+) rows.*of ([0-9]+) columns/i) { |
861
|
46
|
|
|
|
|
93
|
@{$data[1]}{qw(maxrow maxcol)} = ($1, $2); |
|
46
|
|
|
|
|
249
|
|
862
|
46
|
|
|
|
|
108
|
next; |
863
|
|
|
|
|
|
|
} |
864
|
13432
|
100
|
|
|
|
32226
|
s/^r([0-9]+)c([0-9]+)\s*=\s*// or next; |
865
|
3542
|
|
|
|
|
6329
|
my ($c, $r) = map { $_ + 1 } $2, $1; |
|
7084
|
|
|
|
|
15809
|
|
866
|
3542
|
100
|
66
|
|
|
15887
|
if (m/.* \{(.*)}$/ or m/"(.*)"/) { |
867
|
2714
|
|
|
|
|
5160
|
my $cell = cr2cell ($c, $r); |
868
|
2714
|
100
|
|
|
|
8875
|
$opt{rc} and $data[1]{cell}[$c][$r] = $1; |
869
|
2714
|
50
|
|
|
|
7472
|
$opt{cells} and $data[1]{$cell} = $1; |
870
|
2714
|
100
|
|
|
|
6448
|
$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr }; |
871
|
2714
|
|
|
|
|
4592
|
next; |
872
|
|
|
|
|
|
|
} |
873
|
|
|
|
|
|
|
# Now only formula's remain. Ignore for now |
874
|
|
|
|
|
|
|
# r67c7 = [P2L] 2*(1000*r67c5-60) |
875
|
|
|
|
|
|
|
} |
876
|
46
|
|
|
|
|
761
|
for (@{$data[1]{cell}}) { |
|
46
|
|
|
|
|
154
|
|
877
|
360
|
100
|
|
|
|
723
|
defined or $_ = []; |
878
|
|
|
|
|
|
|
} |
879
|
46
|
|
|
|
|
223
|
return _clipsheets \%opt, [ @data ]; |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
|
882
|
33
|
50
|
66
|
|
|
907
|
if ($opt{parser} ? _parser ($opt{parser}) eq "sxc" |
|
|
100
|
|
|
|
|
|
883
|
|
|
|
|
|
|
: ($txt =~ m/^<\?xml/ or -f $txt)) { |
884
|
7
|
50
|
|
|
|
984
|
$can{sxc} or croak "Spreadsheet::ReadSXC not installed"; |
885
|
0
|
0
|
|
|
|
0
|
ref $txt and |
886
|
|
|
|
|
|
|
croak ("Sorry, references as input are not (yet) supported by Spreadsheet::ReadSXC"); |
887
|
|
|
|
|
|
|
|
888
|
0
|
|
|
|
|
0
|
my $using = "using $can{sxc}-" . $can{sxc}->VERSION; |
889
|
0
|
|
|
|
|
0
|
my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up |
890
|
0
|
|
|
|
|
0
|
my $sxc; |
891
|
0
|
0
|
0
|
|
|
0
|
if ($txt =~ m/\.(sxc|ods)$/i) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
892
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "Opening \U$1\E $txt $using\n"; |
893
|
0
|
0
|
|
|
|
0
|
$sxc = Spreadsheet::ReadSXC::read_sxc ($txt, $sxc_options) or return; |
894
|
|
|
|
|
|
|
} |
895
|
|
|
|
|
|
|
elsif ($txt =~ m/\.xml$/i) { |
896
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "Opening XML $txt $using\n"; |
897
|
0
|
0
|
|
|
|
0
|
$sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return; |
898
|
|
|
|
|
|
|
} |
899
|
|
|
|
|
|
|
# need to test on pattern to prevent stat warning |
900
|
|
|
|
|
|
|
# on filename with newline |
901
|
|
|
|
|
|
|
elsif ($txt !~ m/^<\?xml/i and -f $txt) { |
902
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "Opening XML $txt $using\n"; |
903
|
0
|
0
|
|
|
|
0
|
open my $f, "<", $txt or return; |
904
|
0
|
|
|
|
|
0
|
local $/; |
905
|
0
|
|
|
|
|
0
|
$txt = <$f>; |
906
|
0
|
|
|
|
|
0
|
close $f; |
907
|
|
|
|
|
|
|
} |
908
|
0
|
0
|
0
|
|
|
0
|
!$sxc && $txt =~ m/^<\?xml/i and |
909
|
|
|
|
|
|
|
$sxc = Spreadsheet::ReadSXC::read_xml_string ($txt, $sxc_options); |
910
|
0
|
0
|
|
|
|
0
|
$debug > 8 and _dump (sxc => $sxc); |
911
|
0
|
0
|
|
|
|
0
|
if ($sxc) { |
912
|
0
|
|
|
|
|
0
|
my @data = ( { |
913
|
|
|
|
|
|
|
type => "sxc", |
914
|
|
|
|
|
|
|
parser => "Spreadsheet::ReadSXC", |
915
|
|
|
|
|
|
|
version => $Spreadsheet::ReadSXC::VERSION, |
916
|
|
|
|
|
|
|
parsers => [{ |
917
|
|
|
|
|
|
|
type => "sxc", |
918
|
|
|
|
|
|
|
parser => "Spreadsheet::ReadSXC", |
919
|
|
|
|
|
|
|
version => $Spreadsheet::ReadSXC::VERSION, |
920
|
|
|
|
|
|
|
}], |
921
|
|
|
|
|
|
|
error => undef, |
922
|
|
|
|
|
|
|
sheets => 0, |
923
|
|
|
|
|
|
|
sheet => {}, |
924
|
|
|
|
|
|
|
} ); |
925
|
|
|
|
|
|
|
my @sheets = ref $sxc eq "HASH" # < 0.20 |
926
|
|
|
|
|
|
|
? map { |
927
|
|
|
|
|
|
|
{ label => $_, |
928
|
0
|
|
|
|
|
0
|
data => $sxc->{$_}, |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
} keys %$sxc |
931
|
0
|
0
|
|
|
|
0
|
: @{$sxc}; |
|
0
|
|
|
|
|
0
|
|
932
|
0
|
|
|
|
|
0
|
foreach my $sheet (@sheets) { |
933
|
0
|
|
|
|
|
0
|
my @sheet = @{$sheet->{data}}; |
|
0
|
|
|
|
|
0
|
|
934
|
|
|
|
|
|
|
my %sheet = ( |
935
|
|
|
|
|
|
|
parser => 0, |
936
|
|
|
|
|
|
|
label => $sheet->{label}, |
937
|
0
|
|
|
|
|
0
|
maxrow => scalar @sheet, |
938
|
|
|
|
|
|
|
maxcol => 0, |
939
|
|
|
|
|
|
|
cell => [], |
940
|
|
|
|
|
|
|
attr => [], |
941
|
|
|
|
|
|
|
merged => [], |
942
|
|
|
|
|
|
|
active => 0, |
943
|
|
|
|
|
|
|
); |
944
|
0
|
|
|
|
|
0
|
my $sheet_idx = 1 + @data; |
945
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} rows\n"; |
946
|
0
|
|
|
|
|
0
|
foreach my $r (0 .. $#sheet) { |
947
|
0
|
0
|
|
|
|
0
|
my @row = @{$sheet[$r]} or next; |
|
0
|
|
|
|
|
0
|
|
948
|
0
|
|
|
|
|
0
|
foreach my $c (0 .. $#row) { |
949
|
0
|
0
|
|
|
|
0
|
defined (my $val = $row[$c]) or next; |
950
|
0
|
|
|
|
|
0
|
my $C = $c + 1; |
951
|
0
|
0
|
|
|
|
0
|
$C > $sheet{maxcol} and $sheet{maxcol} = $C; |
952
|
0
|
|
|
|
|
0
|
my $cell = cr2cell ($C, $r + 1); |
953
|
0
|
0
|
|
|
|
0
|
$opt{rc} and $sheet{cell}[$C][$r + 1] = $val; |
954
|
0
|
0
|
|
|
|
0
|
$opt{cells} and $sheet{$cell} = $val; |
955
|
0
|
0
|
|
|
|
0
|
$opt{attr} and $sheet{attr}[$C][$r + 1] = { @def_attr }; |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
} |
958
|
0
|
|
|
|
|
0
|
for (@{$sheet{cell}}) { |
|
0
|
|
|
|
|
0
|
|
959
|
0
|
0
|
|
|
|
0
|
defined or $_ = []; |
960
|
|
|
|
|
|
|
} |
961
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n"; |
962
|
0
|
|
|
|
|
0
|
push @data, { %sheet }; |
963
|
0
|
|
|
|
|
0
|
$data[0]{sheets}++; |
964
|
0
|
|
|
|
|
0
|
$data[0]{sheet}{$sheet->{label}} = $#data; |
965
|
|
|
|
|
|
|
} |
966
|
0
|
|
|
|
|
0
|
return _clipsheets \%opt, [ @data ]; |
967
|
|
|
|
|
|
|
} |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
|
970
|
26
|
|
|
|
|
168
|
return; |
971
|
|
|
|
|
|
|
} # ReadData |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
sub add { |
974
|
2
|
|
|
2
|
1
|
5
|
my $book = shift; |
975
|
2
|
50
|
|
|
|
7
|
my $r = ReadData (@_) or return; |
976
|
|
|
|
|
|
|
$book && (ref $book eq "ARRAY" || |
977
|
2
|
50
|
33
|
|
|
25
|
ref $book eq __PACKAGE__) && $book->[0]{sheets} or return $r; |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
978
|
|
|
|
|
|
|
|
979
|
2
|
|
|
|
|
6
|
my $c1 = $book->[0]; |
980
|
2
|
|
|
|
|
4
|
my $c2 = $r->[0]; |
981
|
|
|
|
|
|
|
|
982
|
2
|
50
|
|
|
|
7
|
unless ($c1->{parsers}) { |
983
|
0
|
|
|
|
|
0
|
$c1->{parsers}[0]{$_} = $c1->{$_} for qw( type parser version ); |
984
|
0
|
|
|
|
|
0
|
$book->[$_]{parser} = 0 for 1 .. $c1->{sheets}; |
985
|
|
|
|
|
|
|
} |
986
|
2
|
|
|
|
|
3
|
my ($pidx) = (grep { my $p = $c1->{parsers}[$_]; |
987
|
|
|
|
|
|
|
$p->{type} eq $c2->{type} && |
988
|
|
|
|
|
|
|
$p->{parser} eq $c2->{parser} && |
989
|
2
|
50
|
33
|
|
|
5
|
$p->{version} eq $c2->{version} } 0 .. $#{$c1->{parsers}}); |
|
2
|
|
|
|
|
19
|
|
|
2
|
|
|
|
|
8
|
|
990
|
2
|
50
|
|
|
|
6
|
unless (defined $pidx) { |
991
|
0
|
|
|
|
|
0
|
$pidx = scalar @{$c1->{parsers}}; |
|
0
|
|
|
|
|
0
|
|
992
|
0
|
|
|
|
|
0
|
$c1->{parsers}[$pidx]{$_} = $c2->{$_} for qw( type parser version ); |
993
|
|
|
|
|
|
|
} |
994
|
|
|
|
|
|
|
|
995
|
2
|
|
|
|
|
4
|
foreach my $sn (sort { $c2->{sheet}{$a} <=> $c2->{sheet}{$b} } keys %{$c2->{sheet}}) { |
|
0
|
|
|
|
|
0
|
|
|
2
|
|
|
|
|
11
|
|
996
|
2
|
|
|
|
|
4
|
my $s = $sn; |
997
|
2
|
|
|
|
|
3
|
my $v = 2; |
998
|
2
|
|
|
|
|
6
|
while (exists $c1->{sheet}{$s}) { |
999
|
1
|
|
|
|
|
8
|
$s = $sn."[".$v++."]"; |
1000
|
|
|
|
|
|
|
} |
1001
|
2
|
|
|
|
|
8
|
$c1->{sheet}{$s} = $c1->{sheets} + $c2->{sheet}{$sn}; |
1002
|
2
|
|
|
|
|
5
|
$r->[$c2->{sheet}{$sn}]{parser} = $pidx; |
1003
|
2
|
|
|
|
|
7
|
push @$book, $r->[$c2->{sheet}{$sn}]; |
1004
|
|
|
|
|
|
|
} |
1005
|
2
|
|
|
|
|
5
|
$c1->{sheets} += $c2->{sheets}; |
1006
|
|
|
|
|
|
|
|
1007
|
2
|
|
|
|
|
17
|
return $book; |
1008
|
|
|
|
|
|
|
} # add |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
package Spreadsheet::Read::Attribute; |
1011
|
|
|
|
|
|
|
|
1012
|
53
|
|
|
53
|
|
584
|
use Carp; |
|
53
|
|
|
|
|
156
|
|
|
53
|
|
|
|
|
4595
|
|
1013
|
53
|
|
|
53
|
|
402
|
use vars qw( $AUTOLOAD ); |
|
53
|
|
|
|
|
138
|
|
|
53
|
|
|
|
|
62049
|
|
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
sub AUTOLOAD { |
1016
|
6
|
|
|
6
|
|
607
|
my $self = shift; |
1017
|
6
|
|
|
|
|
31
|
(my $attr = $AUTOLOAD) =~ s/.*:://; |
1018
|
6
|
|
|
|
|
194
|
$self->{$attr}; |
1019
|
|
|
|
|
|
|
} # AUTOLOAD |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
package Spreadsheet::Read::Sheet; |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
sub cell { |
1024
|
55
|
|
|
55
|
|
21025
|
my ($sheet, @id) = @_; |
1025
|
|
|
|
|
|
|
@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and |
1026
|
55
|
50
|
66
|
|
|
501
|
return $sheet->{cell}[$id[0]][$id[1]]; |
|
|
|
66
|
|
|
|
|
1027
|
|
|
|
|
|
|
@id && $id[0] && exists $sheet->{$id[0]} and |
1028
|
27
|
50
|
33
|
|
|
419
|
return $sheet->{$id[0]}; |
|
|
|
33
|
|
|
|
|
1029
|
|
|
|
|
|
|
} # cell |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
sub attr { |
1032
|
8
|
|
|
8
|
|
24
|
my ($sheet, @id) = @_; |
1033
|
8
|
|
|
|
|
15
|
my $class = "Spreadsheet::Read::Attribute"; |
1034
|
|
|
|
|
|
|
@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and |
1035
|
8
|
50
|
66
|
|
|
66
|
return bless $sheet->{attr}[$id[0]][$id[1]] => $class; |
|
|
|
66
|
|
|
|
|
1036
|
5
|
50
|
33
|
|
|
31
|
if (@id && $id[0] && exists $sheet->{$id[0]}) { |
|
|
|
33
|
|
|
|
|
1037
|
5
|
|
|
|
|
13
|
my ($c, $r) = $sheet->cell2cr ($id[0]); |
1038
|
5
|
|
|
|
|
47
|
return bless $sheet->{attr}[$c][$r] => $class; |
1039
|
|
|
|
|
|
|
} |
1040
|
0
|
|
|
|
|
0
|
undef; |
1041
|
|
|
|
|
|
|
} # attr |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
sub maxrow { |
1044
|
2
|
|
|
2
|
|
4
|
my $sheet = shift; |
1045
|
2
|
|
|
|
|
9
|
return $sheet->{maxrow}; |
1046
|
|
|
|
|
|
|
} # maxrow |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
sub maxcol { |
1049
|
2
|
|
|
2
|
|
5
|
my $sheet = shift; |
1050
|
2
|
|
|
|
|
8
|
return $sheet->{maxcol}; |
1051
|
|
|
|
|
|
|
} # maxrow |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
sub col2label { |
1054
|
1
|
50
|
|
1
|
|
8
|
$_[0] =~ m/::/ and shift; # class unused |
1055
|
1
|
|
|
|
|
5
|
return Spreadsheet::Read::col2label (@_); |
1056
|
|
|
|
|
|
|
} # col2label |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
sub cr2cell { |
1059
|
24
|
50
|
|
24
|
|
66
|
$_[0] =~ m/::/ and shift; # class unused |
1060
|
24
|
|
|
|
|
50
|
return Spreadsheet::Read::cr2cell (@_); |
1061
|
|
|
|
|
|
|
} # cr2cell |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
sub cell2cr { |
1064
|
21
|
50
|
|
21
|
|
12888
|
$_[0] =~ m/::/ and shift; # class unused |
1065
|
21
|
|
|
|
|
56
|
return Spreadsheet::Read::cell2cr (@_); |
1066
|
|
|
|
|
|
|
} # cell2cr |
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
sub label { |
1069
|
2
|
|
|
2
|
|
6
|
my ($sheet, $label) = @_; |
1070
|
2
|
100
|
|
|
|
9
|
defined $label and $sheet->{label} = $label; |
1071
|
2
|
|
|
|
|
73
|
return $sheet->{label}; |
1072
|
|
|
|
|
|
|
} # label |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
sub active { |
1075
|
1
|
|
|
1
|
|
3
|
my $sheet = shift; |
1076
|
1
|
|
|
|
|
4
|
return $sheet->{active}; |
1077
|
|
|
|
|
|
|
} # label |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
# my @row = $sheet->cellrow (1); |
1080
|
|
|
|
|
|
|
sub cellrow { |
1081
|
4
|
|
|
4
|
|
10
|
my ($sheet, $row) = @_; |
1082
|
4
|
100
|
66
|
|
|
36
|
defined $row && $row > 0 && $row <= $sheet->{maxrow} or return; |
|
|
|
100
|
|
|
|
|
1083
|
1
|
|
|
|
|
2
|
my $s = $sheet->{cell}; |
1084
|
1
|
|
|
|
|
4
|
map { $s->[$_][$row] } 1..$sheet->{maxcol}; |
|
19
|
|
|
|
|
38
|
|
1085
|
|
|
|
|
|
|
} # cellrow |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
# my @row = $sheet->row (1); |
1088
|
|
|
|
|
|
|
sub row { |
1089
|
4
|
|
|
4
|
|
10
|
my ($sheet, $row) = @_; |
1090
|
4
|
100
|
66
|
|
|
36
|
defined $row && $row > 0 && $row <= $sheet->{maxrow} or return; |
|
|
|
100
|
|
|
|
|
1091
|
1
|
|
|
|
|
4
|
map { $sheet->{$sheet->cr2cell ($_, $row)} } 1..$sheet->{maxcol}; |
|
19
|
|
|
|
|
37
|
|
1092
|
|
|
|
|
|
|
} # row |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
# my @col = $sheet->cellcolumn (1); |
1095
|
|
|
|
|
|
|
sub cellcolumn { |
1096
|
4
|
|
|
4
|
|
13
|
my ($sheet, $col) = @_; |
1097
|
4
|
100
|
66
|
|
|
36
|
defined $col && $col > 0 && $col <= $sheet->{maxcol} or return; |
|
|
|
100
|
|
|
|
|
1098
|
1
|
|
|
|
|
3
|
my $s = $sheet->{cell}; |
1099
|
1
|
|
|
|
|
4
|
map { $s->[$col][$_] } 1..$sheet->{maxrow}; |
|
5
|
|
|
|
|
17
|
|
1100
|
|
|
|
|
|
|
} # cellcolumn |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
# my @col = $sheet->column (1); |
1103
|
|
|
|
|
|
|
sub column { |
1104
|
4
|
|
|
4
|
|
10
|
my ($sheet, $col) = @_; |
1105
|
4
|
100
|
66
|
|
|
37
|
defined $col && $col > 0 && $col <= $sheet->{maxcol} or return; |
|
|
|
100
|
|
|
|
|
1106
|
1
|
|
|
|
|
5
|
map { $sheet->{$sheet->cr2cell ($col, $_)} } 1..$sheet->{maxrow}; |
|
5
|
|
|
|
|
14
|
|
1107
|
|
|
|
|
|
|
} # column |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
# Convert {cell}'s [column][row] to a [row][column] list |
1110
|
|
|
|
|
|
|
# my @rows = $sheet->rows (); |
1111
|
|
|
|
|
|
|
sub rows { |
1112
|
1
|
|
|
1
|
|
3
|
my $sheet = shift; |
1113
|
1
|
|
|
|
|
2
|
my $s = $sheet->{cell}; |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
map { |
1116
|
5
|
|
|
|
|
10
|
my $r = $_; |
1117
|
5
|
|
|
|
|
9
|
[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ]; |
|
95
|
|
|
|
|
164
|
|
1118
|
1
|
|
|
|
|
4
|
} 1..$sheet->{maxrow}; |
1119
|
|
|
|
|
|
|
} # rows |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
1; |
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
__END__ |
1124
|
|
|
|
|
|
|
=head1 DESCRIPTION |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
Spreadsheet::Read tries to transparently read *any* spreadsheet and |
1127
|
|
|
|
|
|
|
return its content in a universal manner independent of the parsing |
1128
|
|
|
|
|
|
|
module that does the actual spreadsheet scanning. |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
For OpenOffice and/or LibreOffice this module uses |
1131
|
|
|
|
|
|
|
L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
For Microsoft Excel this module uses |
1134
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>, |
1135
|
|
|
|
|
|
|
L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX>, or |
1136
|
|
|
|
|
|
|
L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX> (stronly |
1137
|
|
|
|
|
|
|
discouraged). |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
For CSV this module uses L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> |
1140
|
|
|
|
|
|
|
or L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP>. |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
For SquirrelCalc there is a very simplistic built-in parser |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
=head2 Data structure |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
The data is returned as an array reference: |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
$book = [ |
1149
|
|
|
|
|
|
|
# Entry 0 is the overall control hash |
1150
|
|
|
|
|
|
|
{ sheets => 2, |
1151
|
|
|
|
|
|
|
sheet => { |
1152
|
|
|
|
|
|
|
"Sheet 1" => 1, |
1153
|
|
|
|
|
|
|
"Sheet 2" => 2, |
1154
|
|
|
|
|
|
|
}, |
1155
|
|
|
|
|
|
|
parsers => [ { |
1156
|
|
|
|
|
|
|
type => "xls", |
1157
|
|
|
|
|
|
|
parser => "Spreadsheet::ParseExcel", |
1158
|
|
|
|
|
|
|
version => 0.59, |
1159
|
|
|
|
|
|
|
}], |
1160
|
|
|
|
|
|
|
error => undef, |
1161
|
|
|
|
|
|
|
}, |
1162
|
|
|
|
|
|
|
# Entry 1 is the first sheet |
1163
|
|
|
|
|
|
|
{ parser => 0, |
1164
|
|
|
|
|
|
|
label => "Sheet 1", |
1165
|
|
|
|
|
|
|
maxrow => 2, |
1166
|
|
|
|
|
|
|
maxcol => 4, |
1167
|
|
|
|
|
|
|
cell => [ undef, |
1168
|
|
|
|
|
|
|
[ undef, 1 ], |
1169
|
|
|
|
|
|
|
[ undef, undef, undef, undef, undef, "Nugget" ], |
1170
|
|
|
|
|
|
|
], |
1171
|
|
|
|
|
|
|
attr => [], |
1172
|
|
|
|
|
|
|
merged => [], |
1173
|
|
|
|
|
|
|
active => 1, |
1174
|
|
|
|
|
|
|
A1 => 1, |
1175
|
|
|
|
|
|
|
B5 => "Nugget", |
1176
|
|
|
|
|
|
|
}, |
1177
|
|
|
|
|
|
|
# Entry 2 is the second sheet |
1178
|
|
|
|
|
|
|
{ parser => 0, |
1179
|
|
|
|
|
|
|
label => "Sheet 2", |
1180
|
|
|
|
|
|
|
: |
1181
|
|
|
|
|
|
|
: |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
To keep as close contact to spreadsheet users, row and column 1 have |
1184
|
|
|
|
|
|
|
index 1 too in the C<cell> element of the sheet hash, so cell "A1" is |
1185
|
|
|
|
|
|
|
the same as C<cell> [1, 1] (column first). To switch between the two, |
1186
|
|
|
|
|
|
|
there are helper functions available: C<cell2cr ()>, C<cr2cell ()>, |
1187
|
|
|
|
|
|
|
and C<col2label ()>. |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
The C<cell> hash entry contains unformatted data, while the hash entries |
1190
|
|
|
|
|
|
|
with the traditional labels contain the formatted values (if applicable). |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
The control hash (the first entry in the returned array ref), contains |
1193
|
|
|
|
|
|
|
some spreadsheet meta-data. The entry C<sheet> is there to be able to find |
1194
|
|
|
|
|
|
|
the sheets when accessing them by name: |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
my %sheet2 = %{$book->[$book->[0]{sheet}{"Sheet 2"}]}; |
1197
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
=head2 Functions and methods |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
=head3 new |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
my $book = Spreadsheet::Read->new (...); |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
All options accepted by ReadData are accepted by new. |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
=head3 ReadData |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
my $book = ReadData ($source [, option => value [, ... ]]); |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
my $book = ReadData ("file.csv", sep => ',', quote => '"'); |
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
my $book = ReadData ("file.xls", dtfmt => "yyyy-mm-dd"); |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
my $book = ReadData ("file.ods"); |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
my $book = ReadData ("file.sxc"); |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
my $book = ReadData ("content.xml"); |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
my $book = ReadData ($content); |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
my $book = ReadData ($content, parser => "xlsx"); |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
my $book = ReadData ($fh, parser => "xlsx"); |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
my $book = ReadData (\$content, parser => "xlsx"); |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
Tries to convert the given file, string, or stream to the data structure |
1229
|
|
|
|
|
|
|
described above. |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
Processing Excel data from a stream or content is supported through a |
1232
|
|
|
|
|
|
|
L<File::Temp|https://metacpan.org/release/File-Temp> temporary file or |
1233
|
|
|
|
|
|
|
L<IO::Scalar|https://metacpan.org/release/IO-Scalar> when available. |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> |
1236
|
|
|
|
|
|
|
does preserve sheet order as of version 0.20. |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
Choosing between C<$content> and C<\\$content> (with or without passing |
1239
|
|
|
|
|
|
|
the desired C<parser> option) may be depending on trial and terror. |
1240
|
|
|
|
|
|
|
C<ReadData> does try to determine parser type on content if needed, but |
1241
|
|
|
|
|
|
|
not all combinations are checked, and not all signatures are builtin. |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
Currently supported options are: |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
=over 2 |
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
=item parser |
1248
|
|
|
|
|
|
|
X<parser> |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
Force the data to be parsed by a specific format. Possible values are |
1251
|
|
|
|
|
|
|
C<csv>, C<prl> (or C<perl>), C<sc> (or C<squirelcalc>), C<sxc> (or C<oo>, |
1252
|
|
|
|
|
|
|
C<ods>, C<openoffice>, C<libreoffice>) C<xls> (or C<excel>), and C<xlsx> |
1253
|
|
|
|
|
|
|
(or C<excel2007>). |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
When parsing streams, instead of files, it is highly recommended to pass |
1256
|
|
|
|
|
|
|
this option. |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
Spreadsheet::Read supports several underlying parsers per spreadsheet |
1259
|
|
|
|
|
|
|
type. It will try those from most favored to least favored. When you |
1260
|
|
|
|
|
|
|
have a good reason to prefer a different parser, you can set that in |
1261
|
|
|
|
|
|
|
environment variables. The other options then will not be tested for: |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
env SPREADSHEET_READ_CSV=Text::CSV_PP ... |
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
=item cells |
1266
|
|
|
|
|
|
|
X<cells> |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
Control the generation of named cells ("C<A1>" etc). Default is true. |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
=item rc |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
Control the generation of the {cell}[c][r] entries. Default is true. |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
=item attr |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
Control the generation of the {attr}[c][r] entries. Default is false. |
1277
|
|
|
|
|
|
|
See L</Cell Attributes> below. |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
=item clip |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
If set, L<C<ReadData>|/ReadData> will remove all trailing rows and columns |
1282
|
|
|
|
|
|
|
per sheet that have no data, where no data means only undefined or empty |
1283
|
|
|
|
|
|
|
cells (after optional stripping). If a sheet has no data at all, the sheet |
1284
|
|
|
|
|
|
|
will be skipped entirely when this attribute is true. |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
=item strip |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
If set, L<C<ReadData>|/ReadData> will remove trailing- and/or |
1289
|
|
|
|
|
|
|
leading-whitespace from every field. |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
strip leading strailing |
1292
|
|
|
|
|
|
|
----- ------- --------- |
1293
|
|
|
|
|
|
|
0 n/a n/a |
1294
|
|
|
|
|
|
|
1 strip n/a |
1295
|
|
|
|
|
|
|
2 n/a strip |
1296
|
|
|
|
|
|
|
3 strip strip |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
=item pivot |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
Swap all rows and columns. |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
When a sheet contains data like |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
A1 B1 C1 E1 |
1305
|
|
|
|
|
|
|
A2 C2 D2 |
1306
|
|
|
|
|
|
|
A3 B3 C3 D3 E3 |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
using C<pivot> will return the sheet data as |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
A1 A2 A3 |
1311
|
|
|
|
|
|
|
B1 B3 |
1312
|
|
|
|
|
|
|
C1 C2 C3 |
1313
|
|
|
|
|
|
|
D2 D3 |
1314
|
|
|
|
|
|
|
E1 E3 |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
=item sep |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
Set separator for CSV. Default is comma C<,>. |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
=item quote |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
Set quote character for CSV. Default is C<">. |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
=item dtfmt |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
Set the format for MS-Excel date fields that are set to use the default |
1327
|
|
|
|
|
|
|
date format. The default format in Excel is "C<m-d-yy>", which is both |
1328
|
|
|
|
|
|
|
not year 2000 safe, nor very useful. The default is now "C<yyyy-mm-dd>", |
1329
|
|
|
|
|
|
|
which is more ISO-like. |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
Note that date formatting in MS-Excel is not reliable at all, as it will |
1332
|
|
|
|
|
|
|
store/replace/change the date field separator in already stored formats |
1333
|
|
|
|
|
|
|
if you change your locale settings. So the above mentioned default can |
1334
|
|
|
|
|
|
|
be either "C<m-d-yy>" OR "C<m/d/yy>" depending on what that specific |
1335
|
|
|
|
|
|
|
character happened to be at the time the user saved the file. |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
=item debug |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
Enable some diagnostic messages to STDERR. |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
The value determines how much diagnostics are dumped (using |
1342
|
|
|
|
|
|
|
L<Data::Peek|https://metacpan.org/release/Data-Peek>). A value of C<9> |
1343
|
|
|
|
|
|
|
and higher will dump the entire structure from the back-end parser. |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
=item passwd |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
Use this password to decrypt password protected spreadsheet. |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
Currently only supports Excel. |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
=back |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
All other attributes/options will be passed to the underlying parser if |
1354
|
|
|
|
|
|
|
that parser supports attributes. |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
=head3 col2label |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
my $col_id = col2label (col); |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
my $col_id = $book->col2label (col); # OO |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
C<col2label ()> converts a C<(column)> (1 based) to the letters used in the |
1363
|
|
|
|
|
|
|
traditional cell notation: |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
my $id = col2label ( 4); # $id now "D" |
1366
|
|
|
|
|
|
|
my $id = col2label (28); # $id now "AB" |
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
=head3 cr2cell |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
my $cell = cr2cell (col, row); |
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
my $cell = $book->cr2cell (col, row); # OO |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the |
1375
|
|
|
|
|
|
|
traditional cell notation: |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
my $cell = cr2cell ( 4, 14); # $cell now "D14" |
1378
|
|
|
|
|
|
|
my $cell = cr2cell (28, 4); # $cell now "AB4" |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
=head3 cell2cr |
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
my ($col, $row) = cell2cr ($cell); |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
my ($col, $row) = $book->cell2cr ($cell); # OO |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
C<cell2cr ()> converts traditional cell notation to a C<(column, row)> |
1387
|
|
|
|
|
|
|
pair (1 based): |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
my ($col, $row) = cell2cr ("D14"); # returns ( 4, 14) |
1390
|
|
|
|
|
|
|
my ($col, $row) = cell2cr ("AB4"); # returns (28, 4) |
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
=head3 row |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
my @row = row ($sheet, $row) |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
my @row = Spreadsheet::Read::row ($book->[1], 3); |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
my @row = $book->row ($sheet, $row); # OO |
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>) |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
C<row ()> is not imported by default, so either specify it in the |
1405
|
|
|
|
|
|
|
use argument list, or call it fully qualified. |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
=head3 cellrow |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
my @row = cellrow ($sheet, $row); |
1410
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
my @row = Spreadsheet::Read::cellrow ($book->[1], 3); |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
my @row = $book->cellrow ($sheet, $row); # OO |
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>) |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
C<cellrow ()> is not imported by default, so either specify it in the |
1420
|
|
|
|
|
|
|
use argument list, or call it fully qualified or as method call. |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
=head3 rows |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
my @rows = rows ($sheet); |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
my @rows = Spreadsheet::Read::rows ($book->[1]); |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
my @rows = $book->rows (1); # OO |
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list. |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based, where the |
1433
|
|
|
|
|
|
|
index in the C<{cell}> entry is 1-based. |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
C<rows ()> is not imported by default, so either specify it in the |
1436
|
|
|
|
|
|
|
use argument list, or call it fully qualified. |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
=head3 parses |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
parses ($format); |
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
Spreadsheet::Read::parses ("CSV"); |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
$book->parses ("CSV"); # OO |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
C<parses ()> returns Spreadsheet::Read's capability to parse the |
1447
|
|
|
|
|
|
|
required format. L<C<ReadData>|/ReadData> will pick its preferred parser |
1448
|
|
|
|
|
|
|
for that format unless overruled. See L<C<parser>|/parser>. |
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
C<parses ()> is not imported by default, so either specify it in the |
1451
|
|
|
|
|
|
|
use argument list, or call it fully qualified. |
1452
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
=head3 Version |
1454
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
my $v = Version () |
1456
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
my $v = Spreadsheet::Read::Version () |
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
my $v = Spreadsheet::Read->VERSION; |
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
my $v = $book->Version (); # OO |
1462
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
Returns the current version of Spreadsheet::Read. |
1464
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
C<Version ()> is not imported by default, so either specify it in the |
1466
|
|
|
|
|
|
|
use argument list, or call it fully qualified. |
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
This function returns exactly the same as C<< Spreadsheet::Read->VERSION >> |
1469
|
|
|
|
|
|
|
returns and is only kept for backward compatibility reasons. |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
=head3 sheets |
1472
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
my $sheets = $book->sheets; # OO |
1474
|
|
|
|
|
|
|
my @sheets = $book->sheets; # OO |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
In scalar context return the number of sheets in the book. |
1477
|
|
|
|
|
|
|
In list context return the labels of the sheets in the book. |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
=head3 sheet |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
my $sheet = $book->sheet (1); # OO |
1482
|
|
|
|
|
|
|
my $sheet = $book->sheet ("Foo"); # OO |
1483
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
Return the numbered or named sheet out of the book. Will return C<undef> if |
1485
|
|
|
|
|
|
|
there is no match. Will not work for sheets I<named> with a number between 1 |
1486
|
|
|
|
|
|
|
and the number of sheets in the book. |
1487
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
With named sheets will first try to use the list of sheet-labels as stored in |
1489
|
|
|
|
|
|
|
the control structure. If no match is found, it will scan the actual labels |
1490
|
|
|
|
|
|
|
of the sheets. In that case, it will return the first matching sheet. |
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
If defined, the returned sheet will be of class C<Spreadsheet::Read::Sheet>. |
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
=head3 add |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
my $book = ReadData ("file.csv"); |
1497
|
|
|
|
|
|
|
Spreadsheet::Read::add ($book, "file.xlsx"); |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
my $book = Spreadsheet::Read->new ("file.csv"); |
1500
|
|
|
|
|
|
|
$book->add ("file.xlsx"); # OO |
1501
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
=head2 Methods on sheets |
1503
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
=head3 maxcol |
1505
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
my $col = $sheet->maxcol; |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
Return the index of the last in-use column in the sheet. This index is 1-based. |
1509
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
=head3 maxrow |
1511
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
my $row = $sheet->maxrow; |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
Return the index of the last in-use row in the sheet. This index is 1-based. |
1515
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
=head3 cell |
1517
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
my $cell = $sheet->cell ("A3"); |
1519
|
|
|
|
|
|
|
my $cell = $sheet->cell (1, 3); |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
Return the value for a cell. Using tags will return the formatted value, |
1522
|
|
|
|
|
|
|
using column and row will return unformatted value. |
1523
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
=head3 attr |
1525
|
|
|
|
|
|
|
|
1526
|
|
|
|
|
|
|
my $cell = $sheet->attr ("A3"); |
1527
|
|
|
|
|
|
|
my $cell = $sheet->attr (1, 3); |
1528
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
Return the attributes of a cell. Only valid if attributes are enabled through |
1530
|
|
|
|
|
|
|
option C<attr>. |
1531
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
=head3 col2label |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
my $col_id = $sheet->col2label (col); |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
C<col2label ()> converts a C<(column)> (1 based) to the letters used in the |
1537
|
|
|
|
|
|
|
traditional cell notation: |
1538
|
|
|
|
|
|
|
|
1539
|
|
|
|
|
|
|
my $id = $sheet->col2label ( 4); # $id now "D" |
1540
|
|
|
|
|
|
|
my $id = $sheet->col2label (28); # $id now "AB" |
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
=head3 cr2cell |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
my $cell = $sheet->cr2cell (col, row); |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the |
1547
|
|
|
|
|
|
|
traditional cell notation: |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
my $cell = $sheet->cr2cell ( 4, 14); # $cell now "D14" |
1550
|
|
|
|
|
|
|
my $cell = $sheet->cr2cell (28, 4); # $cell now "AB4" |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
=head3 cell2cr |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
my ($col, $row) = $sheet->cell2cr ($cell); |
1555
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
C<cell2cr ()> converts traditional cell notation to a C<(column, row)> |
1557
|
|
|
|
|
|
|
pair (1 based): |
1558
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
my ($col, $row) = $sheet->cell2cr ("D14"); # returns ( 4, 14) |
1560
|
|
|
|
|
|
|
my ($col, $row) = $sheet->cell2cr ("AB4"); # returns (28, 4) |
1561
|
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
|
=head3 col |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
my @col = $sheet->column ($col); |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
Get full column of formatted values (like C<< $sheet->{C1} .. $sheet->{C9} >>) |
1567
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
=head3 cellcolumn |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
my @col = $sheet->cellcolumn ($col); |
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
Get full column of unformatted values (like C<< $sheet->{cell}[3][1] .. $sheet->{cell}[3][9] >>) |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1577
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
=head3 row |
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
my @row = $sheet->row ($row); |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>) |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
=head3 cellrow |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
my @row = $sheet->cellrow ($row); |
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>) |
1591
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1593
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
=head3 rows |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
my @rows = $sheet->rows (); |
1597
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list. |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based, where the |
1601
|
|
|
|
|
|
|
index in the C<{cell}> entry is 1-based. |
1602
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
=head3 label |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
my $label = $sheet->label; |
1606
|
|
|
|
|
|
|
$sheet->label ("New sheet label"); |
1607
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
Set a new label to a sheet. Note that the index in the control structure will |
1609
|
|
|
|
|
|
|
I<NOT> be updated. |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
=head3 active |
1612
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
my $sheet_is_active = $sheet->active; |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
Returns 1 if the selected sheet is active, otherwise returns 0. |
1616
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
Currently only works on XLS (as of Spreadsheed::ParseExcel-0.61). |
1618
|
|
|
|
|
|
|
CSV is always active. |
1619
|
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
=head2 Using CSV |
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
In case of CSV parsing, L<C<ReadData>|/ReadData> will use the first line of |
1623
|
|
|
|
|
|
|
the file to auto-detect the separation character if the first argument is a |
1624
|
|
|
|
|
|
|
file and both C<sep> and C<quote> are not passed as attributes. |
1625
|
|
|
|
|
|
|
L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> (or |
1626
|
|
|
|
|
|
|
L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP>) is able to |
1627
|
|
|
|
|
|
|
automatically detect and use C<\r> line endings. |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
CSV can parse streams too, but be sure to pass C<sep> and/or C<quote> if |
1630
|
|
|
|
|
|
|
these do not match the default C<,> and C<">. |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
When an error is found in the CSV, it is automatically reported (to STDERR). |
1633
|
|
|
|
|
|
|
The structure will store the error in C<< $ss->[0]{error} >> as anonymous |
1634
|
|
|
|
|
|
|
list returned by |
1635
|
|
|
|
|
|
|
L<C<< $csv->error_diag >>|https://metacpan.org/pod/Text::CSV_XS#error_diag>. |
1636
|
|
|
|
|
|
|
See L<Text::CSV_XS|https://metacpan.org/pod/Text-CSV_XS> for documentation. |
1637
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
my $ss = ReadData ("bad.csv"); |
1639
|
|
|
|
|
|
|
$ss->[0]{error} and say $ss->[0]{error}[1]; |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
As CSV has no sheet labels, the default label for a CSV sheet is its filename. |
1642
|
|
|
|
|
|
|
For CSV, this can be overruled using the I<label> attribute: |
1643
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
my $ss = Spreadsheet::Read->new ("/some/place/test.csv", label => "Test"); |
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
=head2 Cell Attributes |
1647
|
|
|
|
|
|
|
X<attr> |
1648
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
If the constructor was called with C<attr> having a true value, |
1650
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
my $book = ReadData ("book.xls", attr => 1); |
1652
|
|
|
|
|
|
|
my $book = Spreadsheet::Read->new ("book.xlsx", attr => 1); |
1653
|
|
|
|
|
|
|
|
1654
|
|
|
|
|
|
|
effort is made to analyze and store field attributes like this: |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
{ label => "Sheet 1", |
1657
|
|
|
|
|
|
|
maxrow => 5, |
1658
|
|
|
|
|
|
|
maxcol => 2, |
1659
|
|
|
|
|
|
|
cell => [ undef, |
1660
|
|
|
|
|
|
|
[ undef, 1 ], |
1661
|
|
|
|
|
|
|
[ undef, undef, undef, undef, undef, "Nugget" ], |
1662
|
|
|
|
|
|
|
], |
1663
|
|
|
|
|
|
|
attr => [ undef, |
1664
|
|
|
|
|
|
|
[ undef, { |
1665
|
|
|
|
|
|
|
type => "numeric", |
1666
|
|
|
|
|
|
|
fgcolor => "#ff0000", |
1667
|
|
|
|
|
|
|
bgcolor => undef, |
1668
|
|
|
|
|
|
|
font => "Arial", |
1669
|
|
|
|
|
|
|
size => undef, |
1670
|
|
|
|
|
|
|
format => "## ##0.00", |
1671
|
|
|
|
|
|
|
halign => "right", |
1672
|
|
|
|
|
|
|
valign => "top", |
1673
|
|
|
|
|
|
|
uline => 0, |
1674
|
|
|
|
|
|
|
bold => 0, |
1675
|
|
|
|
|
|
|
italic => 0, |
1676
|
|
|
|
|
|
|
wrap => 0, |
1677
|
|
|
|
|
|
|
merged => 0, |
1678
|
|
|
|
|
|
|
hidden => 0, |
1679
|
|
|
|
|
|
|
locked => 0, |
1680
|
|
|
|
|
|
|
enc => "utf-8", |
1681
|
|
|
|
|
|
|
}, ] |
1682
|
|
|
|
|
|
|
[ undef, undef, undef, undef, undef, { |
1683
|
|
|
|
|
|
|
type => "text", |
1684
|
|
|
|
|
|
|
fgcolor => "#e2e2e2", |
1685
|
|
|
|
|
|
|
bgcolor => undef, |
1686
|
|
|
|
|
|
|
font => "Letter Gothic", |
1687
|
|
|
|
|
|
|
size => 15, |
1688
|
|
|
|
|
|
|
format => undef, |
1689
|
|
|
|
|
|
|
halign => "left", |
1690
|
|
|
|
|
|
|
valign => "top", |
1691
|
|
|
|
|
|
|
uline => 0, |
1692
|
|
|
|
|
|
|
bold => 0, |
1693
|
|
|
|
|
|
|
italic => 0, |
1694
|
|
|
|
|
|
|
wrap => 0, |
1695
|
|
|
|
|
|
|
merged => 0, |
1696
|
|
|
|
|
|
|
hidden => 0, |
1697
|
|
|
|
|
|
|
locked => 0, |
1698
|
|
|
|
|
|
|
enc => "iso8859-1", |
1699
|
|
|
|
|
|
|
}, ] |
1700
|
|
|
|
|
|
|
merged => [], |
1701
|
|
|
|
|
|
|
A1 => 1, |
1702
|
|
|
|
|
|
|
B5 => "Nugget", |
1703
|
|
|
|
|
|
|
}, |
1704
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
The entries C<maxrow> and C<maxcol> are 1-based. |
1706
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
This has now been partially implemented, mainly for Excel, as the other |
1708
|
|
|
|
|
|
|
parsers do not (yet) support all of that. YMMV. |
1709
|
|
|
|
|
|
|
|
1710
|
|
|
|
|
|
|
If a cell itself is not hidden, but the parser holds the information that |
1711
|
|
|
|
|
|
|
either the row or the column (or both) the field is in is hidden, the flag |
1712
|
|
|
|
|
|
|
is inherited into the cell attributes. |
1713
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
You can get the attributes of a cell (as a hash-ref) like this: |
1715
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
my $attr = $book[1]{attr}[1][3]; # Direct structure |
1717
|
|
|
|
|
|
|
my $attr = $book->sheet (1)->attr (1, 3); # Same using OO |
1718
|
|
|
|
|
|
|
my $attr = $book->sheet (1)->attr ("A3"); # Same using OO |
1719
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
To get to the C<font> attribute, use any of these: |
1721
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
my $font = $book[1]{attr}[1][3]{font}; |
1723
|
|
|
|
|
|
|
my $font = $book->sheet (1)->attr (1, 3)->{font}; |
1724
|
|
|
|
|
|
|
my $font = $book->sheet (1)->attr ("A3")->font; |
1725
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
=head3 Merged cells |
1727
|
|
|
|
|
|
|
X<merged> |
1728
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
Note that only |
1730
|
|
|
|
|
|
|
L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> |
1731
|
|
|
|
|
|
|
documents the use of merged cells, and not in a way useful for the spreadsheet |
1732
|
|
|
|
|
|
|
consumer. |
1733
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
CSV does not support merged cells (though future implementations of CSV |
1735
|
|
|
|
|
|
|
for the web might). |
1736
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
The documentation of merged areas in |
1738
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> and |
1739
|
|
|
|
|
|
|
L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> can |
1740
|
|
|
|
|
|
|
be found in |
1741
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel::Worksheet|https://metacpan.org/release/Spreadsheet-ParseExcel-Worksheet> |
1742
|
|
|
|
|
|
|
and L<Spreadsheet::ParseExcel::Cell|https://metacpan.org/release/Spreadsheet-ParseExcel-Cell>. |
1743
|
|
|
|
|
|
|
|
1744
|
|
|
|
|
|
|
None of basic L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX>, |
1745
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>, and |
1746
|
|
|
|
|
|
|
L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> manual |
1747
|
|
|
|
|
|
|
pages mention merged cells at all. |
1748
|
|
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
This module just tries to return the information in a generic way. |
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
Given this spreadsheet as an example |
1752
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
merged.xlsx: |
1754
|
|
|
|
|
|
|
|
1755
|
|
|
|
|
|
|
A B C |
1756
|
|
|
|
|
|
|
+-----+-----------+ |
1757
|
|
|
|
|
|
|
1| | foo | |
1758
|
|
|
|
|
|
|
+-----+ + |
1759
|
|
|
|
|
|
|
2| bar | | |
1760
|
|
|
|
|
|
|
| +-----+-----+ |
1761
|
|
|
|
|
|
|
3| | urg | orc | |
1762
|
|
|
|
|
|
|
+-----+-----+-----+ |
1763
|
|
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
the information extracted from that undocumented information is |
1765
|
|
|
|
|
|
|
returned in the C<merged> entry of the sheet's hash as a list of |
1766
|
|
|
|
|
|
|
top-left, bottom-right coordinate pars (col, row, col, row). For |
1767
|
|
|
|
|
|
|
given example, that would be: |
1768
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
$ss->{merged} = [ |
1770
|
|
|
|
|
|
|
[ 1, 2, 1, 3 ], # A2-A3 |
1771
|
|
|
|
|
|
|
[ 2, 1, 3, 2 ], # B1-C2 |
1772
|
|
|
|
|
|
|
]; |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
When the attributes are also enabled, there is some merge information |
1775
|
|
|
|
|
|
|
copied directly from the cell information, but again, that stems from |
1776
|
|
|
|
|
|
|
code analysis and not from documentation: |
1777
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
my $ss = ReadData ("merged.xlsx", attr => 1)->[1]; |
1779
|
|
|
|
|
|
|
foreach my $row (1 .. $ss->{maxrow}) { |
1780
|
|
|
|
|
|
|
foreach my $col (1 .. $ss->{maxcol}) { |
1781
|
|
|
|
|
|
|
my $cell = cr2cell ($col, $row); |
1782
|
|
|
|
|
|
|
printf "%s %-3s %d ", $cell, $ss->{$cell}, |
1783
|
|
|
|
|
|
|
$ss->{attr}[$col][$row]{merged}; |
1784
|
|
|
|
|
|
|
} |
1785
|
|
|
|
|
|
|
print "\n"; |
1786
|
|
|
|
|
|
|
} |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
A1 0 B1 foo 1 C1 1 |
1789
|
|
|
|
|
|
|
A2 bar 1 B2 1 C2 1 |
1790
|
|
|
|
|
|
|
A3 1 B3 urg 0 C3 orc 0 |
1791
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
In this example, there is no way to see if C<B2> is merged to C<A2> or |
1793
|
|
|
|
|
|
|
to C<B1> without analyzing all surrounding cells. This could as well |
1794
|
|
|
|
|
|
|
mean C<A2:A3>, C<B1:C1>, C<B2:C2>, as C<A2:A3>, C<B1:B2>, C<C1:C2>, as |
1795
|
|
|
|
|
|
|
C<A2:A3>, C<B1:C2>. |
1796
|
|
|
|
|
|
|
Use the L<C<merged>|/merged> entry described above to find out what |
1797
|
|
|
|
|
|
|
fields are merged to what other fields. |
1798
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
=head1 TOOLS |
1800
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
This modules comes with a few tools that perform tasks from the FAQ, like |
1802
|
|
|
|
|
|
|
"How do I select only column D through F from sheet 2 into a CSV file?" |
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
If the module was installed without the tools, you can find them here: |
1805
|
|
|
|
|
|
|
https://github.com/Tux/Spreadsheet-Read/tree/master/examples |
1806
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
=head2 C<xlscat> |
1808
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
Show (parts of) a spreadsheet in plain text, CSV, or HTML |
1810
|
|
|
|
|
|
|
|
1811
|
|
|
|
|
|
|
usage: xlscat [-s <sep>] [-L] [-n] [-A] [-u] [Selection] file.xls |
1812
|
|
|
|
|
|
|
[-c | -m] [-u] [Selection] file.xls |
1813
|
|
|
|
|
|
|
-i [-S sheets] file.xls |
1814
|
|
|
|
|
|
|
Generic options: |
1815
|
|
|
|
|
|
|
-v[#] Set verbose level (xlscat/xlsgrep) |
1816
|
|
|
|
|
|
|
-d[#] Set debug level (Spreadsheet::Read) |
1817
|
|
|
|
|
|
|
-u Use unformatted values |
1818
|
|
|
|
|
|
|
--noclip Do not strip empty sheets and |
1819
|
|
|
|
|
|
|
trailing empty rows and columns |
1820
|
|
|
|
|
|
|
-e <enc> Set encoding for input and output |
1821
|
|
|
|
|
|
|
-b <enc> Set encoding for input |
1822
|
|
|
|
|
|
|
-a <enc> Set encoding for output |
1823
|
|
|
|
|
|
|
Input CSV: |
1824
|
|
|
|
|
|
|
--in-sep=c Set input sep_char for CSV |
1825
|
|
|
|
|
|
|
Input XLS: |
1826
|
|
|
|
|
|
|
--dtfmt=fmt Specify the default date format to replace 'm-d-yy' |
1827
|
|
|
|
|
|
|
the default replacement is 'yyyy-mm-dd' |
1828
|
|
|
|
|
|
|
Output Text (default): |
1829
|
|
|
|
|
|
|
-s <sep> Use separator <sep>. Default '|', \n allowed |
1830
|
|
|
|
|
|
|
-L Line up the columns |
1831
|
|
|
|
|
|
|
-n [skip] Number lines (prefix with column number) |
1832
|
|
|
|
|
|
|
optionally skip <skip> (header) lines |
1833
|
|
|
|
|
|
|
-A Show field attributes in ANSI escapes |
1834
|
|
|
|
|
|
|
-h[#] Show # header lines |
1835
|
|
|
|
|
|
|
Output Index only: |
1836
|
|
|
|
|
|
|
-i Show sheet names and size only |
1837
|
|
|
|
|
|
|
Output CSV: |
1838
|
|
|
|
|
|
|
-c Output CSV, separator = ',' |
1839
|
|
|
|
|
|
|
-m Output CSV, separator = ';' |
1840
|
|
|
|
|
|
|
Output HTML: |
1841
|
|
|
|
|
|
|
-H Output HTML |
1842
|
|
|
|
|
|
|
Selection: |
1843
|
|
|
|
|
|
|
-S <sheets> Only print sheets <sheets>. 'all' is a valid set |
1844
|
|
|
|
|
|
|
Default only prints the first sheet |
1845
|
|
|
|
|
|
|
-R <rows> Only print rows <rows>. Default is 'all' |
1846
|
|
|
|
|
|
|
-C <cols> Only print columns <cols>. Default is 'all' |
1847
|
|
|
|
|
|
|
-F <flds> Only fields <flds> e.g. -FA3,B16 |
1848
|
|
|
|
|
|
|
Ordering (column numbers in result set *after* selection): |
1849
|
|
|
|
|
|
|
--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2) |
1850
|
|
|
|
|
|
|
+# - first # lines do not sort (header) |
1851
|
|
|
|
|
|
|
# - order on column # lexical ascending |
1852
|
|
|
|
|
|
|
#n - order on column # numeric ascending |
1853
|
|
|
|
|
|
|
#r - order on column # lexical descending |
1854
|
|
|
|
|
|
|
#rn - order on column # numeric descending |
1855
|
|
|
|
|
|
|
|
1856
|
|
|
|
|
|
|
=head2 C<xlsgrep> |
1857
|
|
|
|
|
|
|
|
1858
|
|
|
|
|
|
|
Show (parts of) a spreadsheet that match a pattern in plain text, CSV, or HTML |
1859
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
usage: xlsgrep [-s <sep>] [-L] [-n] [-A] [-u] [Selection] pattern file.xls |
1861
|
|
|
|
|
|
|
[-c | -m] [-u] [Selection] pattern file.xls |
1862
|
|
|
|
|
|
|
-i [-S sheets] pattern file.xls |
1863
|
|
|
|
|
|
|
Generic options: |
1864
|
|
|
|
|
|
|
-v[#] Set verbose level (xlscat/xlsgrep) |
1865
|
|
|
|
|
|
|
-d[#] Set debug level (Spreadsheet::Read) |
1866
|
|
|
|
|
|
|
-u Use unformatted values |
1867
|
|
|
|
|
|
|
--noclip Do not strip empty sheets and |
1868
|
|
|
|
|
|
|
trailing empty rows and columns |
1869
|
|
|
|
|
|
|
-e <enc> Set encoding for input and output |
1870
|
|
|
|
|
|
|
-b <enc> Set encoding for input |
1871
|
|
|
|
|
|
|
-a <enc> Set encoding for output |
1872
|
|
|
|
|
|
|
Input CSV: |
1873
|
|
|
|
|
|
|
--in-sep=c Set input sep_char for CSV |
1874
|
|
|
|
|
|
|
Input XLS: |
1875
|
|
|
|
|
|
|
--dtfmt=fmt Specify the default date format to replace 'm-d-yy' |
1876
|
|
|
|
|
|
|
the default replacement is 'yyyy-mm-dd' |
1877
|
|
|
|
|
|
|
Output Text (default): |
1878
|
|
|
|
|
|
|
-s <sep> Use separator <sep>. Default '|', \n allowed |
1879
|
|
|
|
|
|
|
-L Line up the columns |
1880
|
|
|
|
|
|
|
-n [skip] Number lines (prefix with column number) |
1881
|
|
|
|
|
|
|
optionally skip <skip> (header) lines |
1882
|
|
|
|
|
|
|
-A Show field attributes in ANSI escapes |
1883
|
|
|
|
|
|
|
-h[#] Show # header lines |
1884
|
|
|
|
|
|
|
Grep options: |
1885
|
|
|
|
|
|
|
-i Ignore case |
1886
|
|
|
|
|
|
|
-w Match whole words only |
1887
|
|
|
|
|
|
|
Output CSV: |
1888
|
|
|
|
|
|
|
-c Output CSV, separator = ',' |
1889
|
|
|
|
|
|
|
-m Output CSV, separator = ';' |
1890
|
|
|
|
|
|
|
Output HTML: |
1891
|
|
|
|
|
|
|
-H Output HTML |
1892
|
|
|
|
|
|
|
Selection: |
1893
|
|
|
|
|
|
|
-S <sheets> Only print sheets <sheets>. 'all' is a valid set |
1894
|
|
|
|
|
|
|
Default only prints the first sheet |
1895
|
|
|
|
|
|
|
-R <rows> Only print rows <rows>. Default is 'all' |
1896
|
|
|
|
|
|
|
-C <cols> Only print columns <cols>. Default is 'all' |
1897
|
|
|
|
|
|
|
-F <flds> Only fields <flds> e.g. -FA3,B16 |
1898
|
|
|
|
|
|
|
Ordering (column numbers in result set *after* selection): |
1899
|
|
|
|
|
|
|
--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2) |
1900
|
|
|
|
|
|
|
+# - first # lines do not sort (header) |
1901
|
|
|
|
|
|
|
# - order on column # lexical ascending |
1902
|
|
|
|
|
|
|
#n - order on column # numeric ascending |
1903
|
|
|
|
|
|
|
#r - order on column # lexical descending |
1904
|
|
|
|
|
|
|
#rn - order on column # numeric descending |
1905
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
=head2 C<xls2csv> |
1907
|
|
|
|
|
|
|
|
1908
|
|
|
|
|
|
|
Convert a spreadsheet to CSV. This is just a small wrapper over C<xlscat>. |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
usage: xls2csv [ -o file.csv ] file.xls |
1911
|
|
|
|
|
|
|
|
1912
|
|
|
|
|
|
|
=head2 C<ss2tk> |
1913
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
Show a spreadsheet in a perl/Tk spreadsheet widget |
1915
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
usage: ss2tk [-w <width>] [X11 options] file.xls [<pattern>] |
1917
|
|
|
|
|
|
|
-w <width> use <width> as default column width (4) |
1918
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
=head2 C<ssdiff> |
1920
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
Show the differences between two spreadsheets. |
1922
|
|
|
|
|
|
|
|
1923
|
|
|
|
|
|
|
usage: examples/ssdiff [--verbose[=1]] file.xls file.xlsx |
1924
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
=head1 TODO |
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
=over 4 |
1928
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
=item Options |
1930
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
=over 2 |
1932
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
=item Module Options |
1934
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
New Spreadsheet::Read options are bound to happen. I'm thinking of an |
1936
|
|
|
|
|
|
|
option that disables the reading of the data entirely to speed up an |
1937
|
|
|
|
|
|
|
index request (how many sheets/fields/columns). See C<xlscat -i>. |
1938
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
=item Parser options |
1940
|
|
|
|
|
|
|
|
1941
|
|
|
|
|
|
|
Try to transparently support as many options as the encapsulated modules |
1942
|
|
|
|
|
|
|
support regarding (un)formatted values, (date) formats, hidden columns |
1943
|
|
|
|
|
|
|
rows or fields etc. These could be implemented like C<attr> above but |
1944
|
|
|
|
|
|
|
names C<meta>, or just be new values in the C<attr> hashes. |
1945
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
=back |
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
=item Other parsers |
1949
|
|
|
|
|
|
|
|
1950
|
|
|
|
|
|
|
Add support for new(er) parsers for already supported formats, like |
1951
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
=over 2 |
1953
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
=item Data::XLSX::Parser |
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
Data::XLSX::Parser provides faster way to parse Microsoft Excel's .xlsx |
1957
|
|
|
|
|
|
|
files. The implementation of this module is highly inspired from Python's |
1958
|
|
|
|
|
|
|
FastXLSX library. |
1959
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
This is SAX based parser, so you can parse very large XLSX file with |
1961
|
|
|
|
|
|
|
lower memory usage. |
1962
|
|
|
|
|
|
|
|
1963
|
|
|
|
|
|
|
=back |
1964
|
|
|
|
|
|
|
|
1965
|
|
|
|
|
|
|
=item Other spreadsheet formats |
1966
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
I consider adding any spreadsheet interface that offers a usable API. |
1968
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
Under investigation: |
1970
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
=over 2 |
1972
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
=item Gnumeric (.gnumeric) |
1974
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
I have seen no existing CPAN module yet. |
1976
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
It is gzip'ed XML |
1978
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
=item Kspread (.ksp) |
1980
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
Now knows as Calligra Sheets. |
1982
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
I have seen no existing CPAN module yet. |
1984
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
It is XML in ZIP |
1986
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
=back |
1988
|
|
|
|
|
|
|
|
1989
|
|
|
|
|
|
|
=item Alternative parsers for existing formats |
1990
|
|
|
|
|
|
|
|
1991
|
|
|
|
|
|
|
As long as the alternative has a good reason for its existence, and the |
1992
|
|
|
|
|
|
|
API of that parser reasonable fits in my approach, I will consider to |
1993
|
|
|
|
|
|
|
implement the glue layer, or apply patches to do so as long as these |
1994
|
|
|
|
|
|
|
match what F<CONTRIBUTING.md> describes. |
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
=back |
1997
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
=head1 SEE ALSO |
1999
|
|
|
|
|
|
|
|
2000
|
|
|
|
|
|
|
=over 2 |
2001
|
|
|
|
|
|
|
|
2002
|
|
|
|
|
|
|
=item Text::CSV_XS, Text::CSV_PP |
2003
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
See L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> , |
2005
|
|
|
|
|
|
|
L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP> , and |
2006
|
|
|
|
|
|
|
L<Text::CSV|https://metacpan.org/release/Text-CSV> documentation. |
2007
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
L<Text::CSV|https://metacpan.org/release/Text-CSV> is a wrapper over Text::CSV_XS (the fast XS version) and/or |
2009
|
|
|
|
|
|
|
L<Text::CSV_PP|https://metacpan.org/release/Text-CSV_PP> (the pure perl version). |
2010
|
|
|
|
|
|
|
|
2011
|
|
|
|
|
|
|
=item Spreadsheet::ParseExcel |
2012
|
|
|
|
|
|
|
|
2013
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> is |
2014
|
|
|
|
|
|
|
the best parser for old-style Microsoft Excel (.xls) files. |
2015
|
|
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
=item Spreadsheet::ParseXLSX |
2017
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> is |
2019
|
|
|
|
|
|
|
like L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>, |
2020
|
|
|
|
|
|
|
but for new Microsoft Excel 2007+ files (.xlsx). They have the same API. |
2021
|
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
This module uses L<XML::Twig|https://metacpan.org/release/XML-Twig> to parse the |
2023
|
|
|
|
|
|
|
internal XML. |
2024
|
|
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
=item Spreadsheet::XLSX |
2026
|
|
|
|
|
|
|
|
2027
|
|
|
|
|
|
|
See L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX> |
2028
|
|
|
|
|
|
|
documentation. |
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
This module is dead and deprecated. It is B<buggy and unmaintained>. I<Please> |
2031
|
|
|
|
|
|
|
use L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> |
2032
|
|
|
|
|
|
|
instead. |
2033
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
=item Spreadsheet::ReadSXC |
2035
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> is a |
2037
|
|
|
|
|
|
|
parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files. |
2038
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
=item Spreadsheet::BasicRead |
2040
|
|
|
|
|
|
|
|
2041
|
|
|
|
|
|
|
See L<Spreadsheet::BasicRead|https://metacpan.org/release/Spreadsheet-BasicRead> |
2042
|
|
|
|
|
|
|
for xlscat-like functionality (Excel only) |
2043
|
|
|
|
|
|
|
|
2044
|
|
|
|
|
|
|
=item Spreadsheet::ConvertAA |
2045
|
|
|
|
|
|
|
|
2046
|
|
|
|
|
|
|
See L<Spreadsheet::ConvertAA|https://metacpan.org/release/Spreadsheet-ConvertAA> |
2047
|
|
|
|
|
|
|
for an alternative set of L</cell2cr>/L</cr2cell> pair. |
2048
|
|
|
|
|
|
|
|
2049
|
|
|
|
|
|
|
=item Spreadsheet::Perl |
2050
|
|
|
|
|
|
|
|
2051
|
|
|
|
|
|
|
L<Spreadsheet::Perl|https://metacpan.org/release/Spreadsheet-Perl> offers a Pure |
2052
|
|
|
|
|
|
|
Perl implementation of a spreadsheet engine. Users that want this format to be |
2053
|
|
|
|
|
|
|
supported in Spreadsheet::Read are hereby motivated to offer patches. It is |
2054
|
|
|
|
|
|
|
not high on my TODO-list. |
2055
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
=item Spreadsheet::CSV |
2057
|
|
|
|
|
|
|
|
2058
|
|
|
|
|
|
|
L<Spreadsheet::CSV|https://metacpan.org/release/Spreadsheet-CSV> offers the |
2059
|
|
|
|
|
|
|
interesting approach of seeing all supported spreadsheet formats as if it were |
2060
|
|
|
|
|
|
|
CSV, mimicking the L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> |
2061
|
|
|
|
|
|
|
interface. |
2062
|
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
=item xls2csv |
2064
|
|
|
|
|
|
|
|
2065
|
|
|
|
|
|
|
L<xls2csv|https://metacpan.org/release/xls2csv> offers an alternative for my |
2066
|
|
|
|
|
|
|
C<xlscat -c>, in the xls2csv tool, but this tool focuses on character encoding |
2067
|
|
|
|
|
|
|
transparency, and requires some other modules. |
2068
|
|
|
|
|
|
|
|
2069
|
|
|
|
|
|
|
=back |
2070
|
|
|
|
|
|
|
|
2071
|
|
|
|
|
|
|
=head1 AUTHOR |
2072
|
|
|
|
|
|
|
|
2073
|
|
|
|
|
|
|
H.Merijn Brand, <h.m.brand@xs4all.nl> |
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
2076
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
Copyright (C) 2005-2018 H.Merijn Brand |
2078
|
|
|
|
|
|
|
|
2079
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
2080
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
2081
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
=cut |