line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Grid; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
52257
|
use warnings FATAL => 'all'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
66
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
1219
|
use File::MMagic (); |
|
1
|
|
|
|
|
62379
|
|
|
1
|
|
|
|
|
38
|
|
7
|
1
|
|
|
1
|
|
16
|
use IO::File (); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
19
|
|
8
|
1
|
|
|
1
|
|
1181
|
use IO::Scalar (); |
|
1
|
|
|
|
|
7240
|
|
|
1
|
|
|
|
|
23
|
|
9
|
1
|
|
|
1
|
|
10
|
use Scalar::Util (); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
457
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
#use overload '@{}' => 'tables'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my %MAP = ( |
14
|
|
|
|
|
|
|
'application/octet-stream' => 'Data::Grid::Excel', |
15
|
|
|
|
|
|
|
'application/msword' => 'Data::Grid::Excel', |
16
|
|
|
|
|
|
|
'application/excel' => 'Data::Grid::Excel', |
17
|
|
|
|
|
|
|
'application/x-zip' => 'Data::Grid::Excel::XLSX', |
18
|
|
|
|
|
|
|
'text/plain' => 'Data::Grid::CSV', |
19
|
|
|
|
|
|
|
'text/csv' => 'Data::Grid::CSV', |
20
|
|
|
|
|
|
|
); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 NAME |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
Data::Grid - Incremental read-only (for now) access to grid-based data |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 VERSION |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Version 0.01_01 |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=cut |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
our $VERSION = '0.01_01'; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 SYNOPSIS |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
use Data::Grid; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Have the parser guess the kind of file, using defaults. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my $grid = Data::Grid->parse('arbitrary.xls'); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# or |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $grid = Data::Grid->parse( |
45
|
|
|
|
|
|
|
source => 'arbitrary.csv', # or xls, or xlsx, or filehandle... |
46
|
|
|
|
|
|
|
header => 1, # first line is a header everywhere |
47
|
|
|
|
|
|
|
fields => [qw(a b c)], # override field header |
48
|
|
|
|
|
|
|
options => \%options, # driver-specific options |
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Each object contains one or more tables. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
for my $table ($grid->tables) { |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Each table has one or more rows. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
while (my $row = $table->next) { |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# The columns can be dereferenced as an array, |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
my @cols = @$row; # or just $row->columns |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# or, if header is present or fields were named in the |
64
|
|
|
|
|
|
|
# constructor, as a hash. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
my %cols = %$row; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Now we can do stuff. |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head1 DESCRIPTION |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=over 4 |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=item Problem 1 |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
You have a mountain of data files from two decades of using MS Office |
79
|
|
|
|
|
|
|
(and other) products, and you want to collate their contents into |
80
|
|
|
|
|
|
|
someplace sane. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=item Problem 2 |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
The files are in numerous different formats, and a consistent |
85
|
|
|
|
|
|
|
interface would really cut down on the effort of extracting them. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item Problem 3 |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
You've looked at L and L, but deemed |
90
|
|
|
|
|
|
|
their table-at-a-time strategy to be inappropriate for your purposes. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=back |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
The goal of L is to provide an extensible, uniform, |
95
|
|
|
|
|
|
|
object-oriented interface to all kinds of grid-shaped data. A key |
96
|
|
|
|
|
|
|
behaviour I'm after is to perform an incremental read over a |
97
|
|
|
|
|
|
|
potentially large data source, so as not to unnecessarily gobble up |
98
|
|
|
|
|
|
|
system resources. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 DEVELOPER RELEASE |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Odds are I will probably decide to change the interface at some point |
103
|
|
|
|
|
|
|
before locking in, and I don't want to guarantee consistency yet. If I |
104
|
|
|
|
|
|
|
do, and you use this, your code will probably break. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Suffice to say this module is B at best. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head1 METHODS |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 parse $file | %params |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
The principal way to instantiate a L object is through the |
113
|
|
|
|
|
|
|
C factory method. This method detects |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub parse { |
118
|
0
|
|
|
0
|
1
|
|
my $class = shift; |
119
|
0
|
|
|
|
|
|
my %p; |
120
|
0
|
0
|
|
|
|
|
if (@_ == 1) { |
121
|
0
|
|
|
|
|
|
$p{source} = shift; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
else { |
124
|
0
|
|
|
|
|
|
%p = @_; |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# croak unless source is defined |
128
|
0
|
0
|
|
|
|
|
Carp::croak("I can't do any work unless you specify a data source.") |
129
|
|
|
|
|
|
|
unless defined $p{source}; |
130
|
|
|
|
|
|
|
|
131
|
0
|
0
|
|
|
|
|
if (ref $p{source}) { |
132
|
|
|
|
|
|
|
# if it is a reference, it depends on the kind |
133
|
0
|
0
|
0
|
|
|
|
if (ref $p{source} eq 'SCALAR') { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# scalar ref as a literal |
135
|
0
|
|
|
|
|
|
$p{fh} = IO::Scalar->new($p{source}); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
elsif (ref $p{source} eq 'ARRAY') { |
138
|
|
|
|
|
|
|
# array ref as a list of lines |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
elsif (ref $p{source} eq 'GLOB' or Scalar::Util::blessed($p{source}) |
141
|
|
|
|
|
|
|
&& $p{source}->isa('IO::Seekable')) { |
142
|
|
|
|
|
|
|
# ioref as just a straight fh |
143
|
0
|
|
|
|
|
|
$p{fh} = $p{source}; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
else { |
146
|
|
|
|
|
|
|
# dunno |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
else { |
150
|
|
|
|
|
|
|
# if it is a string, it is assumed to be a filename |
151
|
0
|
0
|
|
|
|
|
$p{fh} = IO::File->new($p{source}) or Carp::croak($!); |
152
|
|
|
|
|
|
|
} |
153
|
0
|
|
|
|
|
|
binmode $p{fh}; |
154
|
|
|
|
|
|
|
# now check mime type |
155
|
0
|
|
|
|
|
|
my $magic = File::MMagic->new; |
156
|
0
|
|
|
|
|
|
my $type = $magic->checktype_filehandle($p{fh}); |
157
|
0
|
|
|
|
|
|
seek $p{fh}, 0, 0; |
158
|
0
|
0
|
|
|
|
|
Carp::croak("There is no driver mapped to $type") unless $MAP{$type}; |
159
|
0
|
0
|
|
|
|
|
eval "require $MAP{$type};" or die; |
160
|
|
|
|
|
|
|
# or die "Type $type points to driver $MAP{$type} which is broken or nonexistent"; |
161
|
0
|
|
|
|
|
|
$MAP{$type}->new(%p); |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head2 tables |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Retrieve the tables |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |
169
|
|
|
|
|
|
|
|
170
|
0
|
|
|
0
|
1
|
|
sub tables { |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head1 EXTENSION INTERFACE |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=head2 new |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
This I is only part of the extension interface. It is a basic |
178
|
|
|
|
|
|
|
utility constructor intended to take an already-parsed object and |
179
|
|
|
|
|
|
|
parameters and proxy them. |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=cut |
182
|
|
|
|
|
|
|
|
183
|
0
|
|
|
0
|
1
|
|
sub new { |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head2 table_class |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Returns the class to use for instantiating tables. Defaults to |
189
|
|
|
|
|
|
|
L, which is an abstract class. Override this method |
190
|
|
|
|
|
|
|
with your own value for extensions. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=cut |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub table_class { |
195
|
0
|
|
|
0
|
1
|
|
'Data::Grid::Table'; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head2 row_class |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Returns the class to use for instantiating rows. Defaults to |
201
|
|
|
|
|
|
|
L. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub row_class { |
206
|
0
|
|
|
0
|
1
|
|
'Data::Grid::Row'; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head2 cell_class |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Returns the class to use for instantiating cells. Defaults to |
212
|
|
|
|
|
|
|
L, again an abstract class. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=cut |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub cell_class { |
217
|
0
|
|
|
0
|
1
|
|
'Data::Grid::Cell'; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=head1 AUTHOR |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Dorian Taylor, C<< >> |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head1 BUGS |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Please report any bugs or feature requests to C
|
227
|
|
|
|
|
|
|
rt.cpan.org>, or through the web interface at |
228
|
|
|
|
|
|
|
L. I will |
229
|
|
|
|
|
|
|
be notified, and then you'll automatically be notified of progress on |
230
|
|
|
|
|
|
|
your bug as I make changes. |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head1 SUPPORT |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
perldoc Data::Grid |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
You can also look for information at: |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=over 4 |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
L |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
L |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=item * CPAN Ratings |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
L |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=item * Search CPAN |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
L |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=back |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=head1 SEE ALSO |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
L, L, L, |
264
|
|
|
|
|
|
|
L |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
Copyright 2010 Dorian Taylor. |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License"); you |
271
|
|
|
|
|
|
|
may not use this file except in compliance with the License. You may |
272
|
|
|
|
|
|
|
obtain a copy of the License at |
273
|
|
|
|
|
|
|
L. |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software |
276
|
|
|
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS, |
277
|
|
|
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
278
|
|
|
|
|
|
|
implied. See the License for the specific language governing |
279
|
|
|
|
|
|
|
permissions and limitations under the License. |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=cut |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
1; # End of Data::Grid |