| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package OpenOffice::Parse::SXC; |
|
2
|
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
30901
|
use 5.006; |
|
|
2
|
|
|
|
|
8
|
|
|
|
2
|
|
|
|
|
93
|
|
|
4
|
2
|
|
|
2
|
|
10
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
75
|
|
|
5
|
2
|
|
|
2
|
|
11
|
use warnings; |
|
|
2
|
|
|
|
|
9
|
|
|
|
2
|
|
|
|
|
71
|
|
|
6
|
2
|
|
|
2
|
|
4155
|
use XML::Parser; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use IO::File; |
|
8
|
|
|
|
|
|
|
require Exporter; |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
|
13
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
|
14
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# This allows declaration use OpenOffice::Parse::SXC ':all'; |
|
17
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
|
18
|
|
|
|
|
|
|
# will save memory. |
|
19
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( |
|
20
|
|
|
|
|
|
|
parse_sxc csv_quote dump_sxc_file |
|
21
|
|
|
|
|
|
|
) ] ); |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our @EXPORT = qw( |
|
26
|
|
|
|
|
|
|
); |
|
27
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
my %valid_options = ( worksheets => 1, |
|
30
|
|
|
|
|
|
|
no_trim => 1, |
|
31
|
|
|
|
|
|
|
); |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
################################################################## |
|
34
|
|
|
|
|
|
|
# EXPORT_OK methods: |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub csv_quote { |
|
37
|
|
|
|
|
|
|
my $text = shift; |
|
38
|
|
|
|
|
|
|
return "" if( ! defined $text ); |
|
39
|
|
|
|
|
|
|
$text =~ s/\n//g; # Remove all newlines! |
|
40
|
|
|
|
|
|
|
$text =~ s/\"/\"\"/g; |
|
41
|
|
|
|
|
|
|
if( $text =~ /[,"']/ ) { |
|
42
|
|
|
|
|
|
|
$text = "\"$text\""; |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
return $text; |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub parse_sxc { |
|
48
|
|
|
|
|
|
|
my $sxc_filename = shift; |
|
49
|
|
|
|
|
|
|
my %options = @_; |
|
50
|
|
|
|
|
|
|
my $SXC = OpenOffice::Parse::SXC->new( %options ); |
|
51
|
|
|
|
|
|
|
# OpenOffice::Parse::SXC implements the 'data_handler' interface, so we can |
|
52
|
|
|
|
|
|
|
# create an object to use itself as a data handler. |
|
53
|
|
|
|
|
|
|
$SXC->set_data_handler( $SXC ); |
|
54
|
|
|
|
|
|
|
$SXC->parse_file( $sxc_filename ); |
|
55
|
|
|
|
|
|
|
return $SXC->parse_sxc_rows; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# Used for debugging, dump_sxc_file parses a file and dumps the resultant objects |
|
59
|
|
|
|
|
|
|
# onto STDOUT. This is a good way to view just what's going on behind the scenes. |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub dump_sxc_file { |
|
62
|
|
|
|
|
|
|
my $filename = shift; |
|
63
|
|
|
|
|
|
|
my $Parser = XML::Parser->new( Style => "Objects" ); |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
my $results = $Parser->parsefile( $filename ); |
|
66
|
|
|
|
|
|
|
print Dumper( $results ); |
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
################################################################## |
|
70
|
|
|
|
|
|
|
# The data_handler routines: |
|
71
|
|
|
|
|
|
|
# |
|
72
|
|
|
|
|
|
|
# These are provided to provide the simple interface parse_sxc() |
|
73
|
|
|
|
|
|
|
# |
|
74
|
|
|
|
|
|
|
# See parse_sxc() for more details |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub row { |
|
77
|
|
|
|
|
|
|
my $self = shift; |
|
78
|
|
|
|
|
|
|
shift; |
|
79
|
|
|
|
|
|
|
my $row = shift; |
|
80
|
|
|
|
|
|
|
push @{$self->{parse_sxc_rows}}, $row; |
|
81
|
|
|
|
|
|
|
# print join(",", @$row ),"\n"; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub worksheet { |
|
85
|
|
|
|
|
|
|
my $self = shift; |
|
86
|
|
|
|
|
|
|
shift; |
|
87
|
|
|
|
|
|
|
my $worksheet = shift; |
|
88
|
|
|
|
|
|
|
if( ! $self->{parse_sxc_rows} ) { |
|
89
|
|
|
|
|
|
|
$self->{parse_sxc_rows} = []; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
# print "IN WORKSHEET '$worksheet'.\n"; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub workbook { |
|
95
|
|
|
|
|
|
|
my $self = shift; |
|
96
|
|
|
|
|
|
|
shift; |
|
97
|
|
|
|
|
|
|
my $workbook = shift; |
|
98
|
|
|
|
|
|
|
# print "IN WORKBOOK '$workbook'.\n"; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub parse_sxc_rows { |
|
102
|
|
|
|
|
|
|
my $self = shift; |
|
103
|
|
|
|
|
|
|
return @{$self->{parse_sxc_rows}}; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub clear_parse_sxc_rows { |
|
107
|
|
|
|
|
|
|
my $self = shift; |
|
108
|
|
|
|
|
|
|
$self->{parse_sxc_rows} = []; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# End data_handler routines |
|
113
|
|
|
|
|
|
|
################################################################## |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
################################################################## |
|
116
|
|
|
|
|
|
|
# Main OpenOffice::Parse::SXC methods: |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub new { |
|
119
|
|
|
|
|
|
|
my $type = shift; |
|
120
|
|
|
|
|
|
|
my $self = { options => {}, |
|
121
|
|
|
|
|
|
|
}; |
|
122
|
|
|
|
|
|
|
bless $self, $type; |
|
123
|
|
|
|
|
|
|
my %options = @_; |
|
124
|
|
|
|
|
|
|
$self->set_options( %options ) if( %options ); |
|
125
|
|
|
|
|
|
|
$self->repeat_following_cell( 1 ); # Times the cell is to be repeated |
|
126
|
|
|
|
|
|
|
$self->repeat_following_row( 1 ); # Times the row is to be repeated |
|
127
|
|
|
|
|
|
|
$self->reset_cell_list; # Clear out the cell list |
|
128
|
|
|
|
|
|
|
# If the user hasn't supplied a row handler, set up a default one for |
|
129
|
|
|
|
|
|
|
# him which prints out the data to STDOUT. |
|
130
|
|
|
|
|
|
|
if( ! $self->get_data_handler ) { |
|
131
|
|
|
|
|
|
|
$self->set_data_handler( $self ); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
$self->accept_rows( 0 ); # By default, start off accepting NOTHING |
|
134
|
|
|
|
|
|
|
$self->accept_cells( 0 ); |
|
135
|
|
|
|
|
|
|
$self->accept_text( 0 ); |
|
136
|
|
|
|
|
|
|
return $self; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# PUBLIC parse() accepts a filehandle |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub parse { |
|
142
|
|
|
|
|
|
|
my $self = shift; |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
my $SXC_FH = shift; # Data source |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# We need to use closures to provide a true object-oriented way of doing things. This can |
|
147
|
|
|
|
|
|
|
# be considered a memory leak, but only a few bytes per parse call: |
|
148
|
|
|
|
|
|
|
my $Parser = XML::Parser->new |
|
149
|
|
|
|
|
|
|
( Handlers => { Start => sub { $self->start_handler( @_ ); }, |
|
150
|
|
|
|
|
|
|
Char => sub { $self->char_handler( @_ ); }, |
|
151
|
|
|
|
|
|
|
End => sub { $self->end_handler( @_ ); }, |
|
152
|
|
|
|
|
|
|
}, |
|
153
|
|
|
|
|
|
|
); |
|
154
|
|
|
|
|
|
|
my $results = $Parser->parse( $SXC_FH ); |
|
155
|
|
|
|
|
|
|
return $results; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# PUBLIC calls parse() after opening a filehandle |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub parse_file { |
|
161
|
|
|
|
|
|
|
my $self = shift; |
|
162
|
|
|
|
|
|
|
my $filename = shift || die "No file to parse"; |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
if( ! -f $filename ) { |
|
165
|
|
|
|
|
|
|
die "Could not find file '$filename' to parse"; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
my $SXC_FH = IO::File->new( "unzip -p $filename content.xml|" ) |
|
168
|
|
|
|
|
|
|
|| die "Could not open pipe: 'unzip -p $filename content.xml'"; |
|
169
|
|
|
|
|
|
|
$self->get_data_handler->workbook( $self, $filename ); |
|
170
|
|
|
|
|
|
|
return $self->parse( $SXC_FH ); |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# The XML::Parser handler for ending of tags. |
|
174
|
|
|
|
|
|
|
# It's used to trigger the end of cell and end of row actions. |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub end_handler { |
|
177
|
|
|
|
|
|
|
my $self = shift; |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
my $Expat = shift; |
|
180
|
|
|
|
|
|
|
my $type = shift; |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
if( $type eq "table:table-row" ) { |
|
183
|
|
|
|
|
|
|
if( $self->accept_rows ) { |
|
184
|
|
|
|
|
|
|
$self->accept_cells( 0 ); |
|
185
|
|
|
|
|
|
|
$self->end_row; # The row is done |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
elsif( $type eq "table:table-cell" ) { |
|
189
|
|
|
|
|
|
|
if( $self->accept_cells ) { |
|
190
|
|
|
|
|
|
|
if( $self->accept_text ) { |
|
191
|
|
|
|
|
|
|
$self->end_cell; # The cell is done |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
$self->accept_text( 0 ); |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
elsif( $type eq "text:p" ) { |
|
197
|
|
|
|
|
|
|
# Kludging along to infinity... The data in each cell |
|
198
|
|
|
|
|
|
|
# comes in tags. Each is assumed to NOT end in |
|
199
|
|
|
|
|
|
|
# a newline, however, if a newline is added () it ends |
|
200
|
|
|
|
|
|
|
# the previous block and starts a new one. |
|
201
|
|
|
|
|
|
|
# |
|
202
|
|
|
|
|
|
|
# I'll add a newline after the end of each tag, and then |
|
203
|
|
|
|
|
|
|
# remove the last newline on the list when the cell is 'closed'. |
|
204
|
|
|
|
|
|
|
if( $self->accept_text ) { |
|
205
|
|
|
|
|
|
|
$self->append_cell_data( "\n" ); |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# E() implements an "Object O Exists in list L" boolean function |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub E { |
|
213
|
|
|
|
|
|
|
my $item = shift; |
|
214
|
|
|
|
|
|
|
my @set = @_; |
|
215
|
|
|
|
|
|
|
for( @set ) { |
|
216
|
|
|
|
|
|
|
return 1 if( $item eq $_ ); |
|
217
|
|
|
|
|
|
|
} |
|
218
|
|
|
|
|
|
|
return 0; |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# The start_handler for XML::Parser. |
|
222
|
|
|
|
|
|
|
# It's responsible for things such as the following: |
|
223
|
|
|
|
|
|
|
# |
|
224
|
|
|
|
|
|
|
# - Locking and allowing the parsing of worksheets, rows, and cells. |
|
225
|
|
|
|
|
|
|
# - |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub start_handler { |
|
228
|
|
|
|
|
|
|
my $self = shift; |
|
229
|
|
|
|
|
|
|
my $Expat = shift; |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
my $type = shift; |
|
232
|
|
|
|
|
|
|
my %args = @_; |
|
233
|
|
|
|
|
|
|
if( $type eq "table:table" ) { |
|
234
|
|
|
|
|
|
|
# Restrict processing of a 'worksheet' if the user has specified worksheets that he wants: |
|
235
|
|
|
|
|
|
|
if( ! $self->get_option( "worksheets" ) or E( $args{"table:name"}, @{$self->get_option( "worksheets" )} ) ) { |
|
236
|
|
|
|
|
|
|
# Ok, we process this worksheet: |
|
237
|
|
|
|
|
|
|
$self->accept_rows( 1 ); # Accept rows |
|
238
|
|
|
|
|
|
|
$self->set_current_worksheet_name( $args{"table:name"} ); |
|
239
|
|
|
|
|
|
|
$self->get_data_handler->worksheet( $self, $args{"table:name"} ); |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
else { |
|
242
|
|
|
|
|
|
|
$self->accept_rows( 0 ); # Do not accept row data |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
elsif( $type eq "table:table-row" ) { # ROW |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
if( $self->accept_rows ) { |
|
248
|
|
|
|
|
|
|
if( $args{"table:number-rows-repeated"} ) { |
|
249
|
|
|
|
|
|
|
# Cause next row to be repeated... |
|
250
|
|
|
|
|
|
|
$self->repeat_following_row( $args{"table:number-rows-repeated"} ); |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
$self->accept_cells( 1 ); |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
elsif( $type eq "table:table-cell" ) { # CELL |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
if( $self->accept_cells ) { |
|
258
|
|
|
|
|
|
|
# Cell repeat |
|
259
|
|
|
|
|
|
|
if( $args{"table:number-columns-repeated"} ) { |
|
260
|
|
|
|
|
|
|
$self->repeat_following_cell( $args{"table:number-columns-repeated"} ); |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
$self->accept_text( 1 ); |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
elsif( $type eq "text:s" ) { # TEXT |
|
266
|
|
|
|
|
|
|
# NOTE: Text type 'text:s' = space, I assume! OpenOffice uses this tag to |
|
267
|
|
|
|
|
|
|
# represent spaces that are longer than 2 characters. There may be other |
|
268
|
|
|
|
|
|
|
# special 'text' elements, but I'm unaware of them currently. This is the |
|
269
|
|
|
|
|
|
|
# routine to modify to handle them though! |
|
270
|
|
|
|
|
|
|
if( $self->accept_text ) { |
|
271
|
|
|
|
|
|
|
my $multiplier = $args{"text:c"} || 1; # Number of characters |
|
272
|
|
|
|
|
|
|
$self->append_cell_data( " " x $multiplier ); |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
elsif( $type eq "text:p" ) { |
|
276
|
|
|
|
|
|
|
# Yikes, I initially wrote this without text:p in the start handler, instead |
|
277
|
|
|
|
|
|
|
# relying on char_handler. I SHOULD change the restrictions layer to handle |
|
278
|
|
|
|
|
|
|
# accept_text_p... maybe when I have the energy |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# The XML::Parser character handler. It builds up cells piece by piece |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
sub char_handler { |
|
285
|
|
|
|
|
|
|
my $self = shift; |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
if( $self->accept_text ) { |
|
288
|
|
|
|
|
|
|
my $Expat = shift; |
|
289
|
|
|
|
|
|
|
my $text = shift; |
|
290
|
|
|
|
|
|
|
$self->append_cell_data( $text ); # Build up cell data from multiple bits of text |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
################################################################## |
|
295
|
|
|
|
|
|
|
# These routines restrict what gets processed. They each |
|
296
|
|
|
|
|
|
|
# take a boolean value, turning the switch on or off. There |
|
297
|
|
|
|
|
|
|
# are 3 levels of restriction: rows, cells, and text. |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub accept_cells { |
|
300
|
|
|
|
|
|
|
my $self = shift; |
|
301
|
|
|
|
|
|
|
my $value = shift; |
|
302
|
|
|
|
|
|
|
if( ! defined $value ) { |
|
303
|
|
|
|
|
|
|
return $self->{accept_cells}; |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
else { |
|
306
|
|
|
|
|
|
|
$self->{accept_cells} = $value; |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
} |
|
309
|
|
|
|
|
|
|
sub accept_rows { |
|
310
|
|
|
|
|
|
|
my $self = shift; |
|
311
|
|
|
|
|
|
|
my $value = shift; |
|
312
|
|
|
|
|
|
|
if( ! defined $value ) { |
|
313
|
|
|
|
|
|
|
return $self->{accept_rows}; |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
else { |
|
316
|
|
|
|
|
|
|
$self->{accept_rows} = $value; |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
sub accept_text { |
|
320
|
|
|
|
|
|
|
my $self = shift; |
|
321
|
|
|
|
|
|
|
my $value = shift; |
|
322
|
|
|
|
|
|
|
if( ! defined $value ) { |
|
323
|
|
|
|
|
|
|
return $self->{accept_text}; |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
else { |
|
326
|
|
|
|
|
|
|
$self->{accept_text} = $value; |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
################################################################## |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
sub set_current_worksheet_name { |
|
333
|
|
|
|
|
|
|
my $self = shift; |
|
334
|
|
|
|
|
|
|
$self->{current_worksheet_name} = shift; |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# PUBLIC, returns the name of the current worksheet. |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub get_current_worksheet_name { |
|
340
|
|
|
|
|
|
|
my $self = shift; |
|
341
|
|
|
|
|
|
|
return $self->{current_worksheet_name}; |
|
342
|
|
|
|
|
|
|
} |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
# Reset the list of cells to the empty list |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
sub reset_cell_list { |
|
348
|
|
|
|
|
|
|
my $self = shift; |
|
349
|
|
|
|
|
|
|
$self->{cells} = []; |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# PUBLIC Set some options via a hash |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
sub set_options { |
|
355
|
|
|
|
|
|
|
my $self = shift; |
|
356
|
|
|
|
|
|
|
my %options = @_; |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# Check to ensure the options are valid |
|
359
|
|
|
|
|
|
|
for( keys %options ) { |
|
360
|
|
|
|
|
|
|
if( ! $valid_options{$_} ) { |
|
361
|
|
|
|
|
|
|
die "Invalid option: '$_' ($options{$_}) passed as an option to ".ref $self."->set_options()"; |
|
362
|
|
|
|
|
|
|
} |
|
363
|
|
|
|
|
|
|
} |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
$self->{options} = { %{$self->{options}}, %options }; |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
# PUBLIC Get an option |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
sub get_option { |
|
371
|
|
|
|
|
|
|
my $self = shift; |
|
372
|
|
|
|
|
|
|
my $opt_name = shift; |
|
373
|
|
|
|
|
|
|
return $self->{options}{$opt_name}; |
|
374
|
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub append_cell_data { |
|
377
|
|
|
|
|
|
|
my $self = shift; |
|
378
|
|
|
|
|
|
|
$self->{current_cell_data} .= shift; |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub clear_cell { |
|
382
|
|
|
|
|
|
|
my $self = shift; |
|
383
|
|
|
|
|
|
|
$self->{current_cell_data} = ""; |
|
384
|
|
|
|
|
|
|
} |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
# Specify that a cell is to be repeated N times. N is usually 1. |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
sub repeat_following_cell { |
|
389
|
|
|
|
|
|
|
my $self = shift; |
|
390
|
|
|
|
|
|
|
$self->{cell_repeat} = shift; |
|
391
|
|
|
|
|
|
|
} |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# See repeat_following_cell() |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub repeat_following_row { |
|
396
|
|
|
|
|
|
|
my $self = shift; |
|
397
|
|
|
|
|
|
|
$self->{row_repeat} = shift; |
|
398
|
|
|
|
|
|
|
} |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# The data_handler is how we use this module. |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
sub set_data_handler { |
|
403
|
|
|
|
|
|
|
my $self = shift; |
|
404
|
|
|
|
|
|
|
my $data_handler = shift || die "No row handler provided"; |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
$self->{data_handler} = $data_handler; |
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub get_data_handler { |
|
410
|
|
|
|
|
|
|
my $self = shift; |
|
411
|
|
|
|
|
|
|
return $self->{data_handler}; |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# The end of the row has been reached, we call the data_handler: |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub end_row { |
|
417
|
|
|
|
|
|
|
my $self = shift; |
|
418
|
|
|
|
|
|
|
my $cells = $self->{cells}; |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# OpenOffice actually specifies ALL the cells in the spreadsheet, some 32000 of |
|
421
|
|
|
|
|
|
|
# them, but using a repeat. This bit of code detects the repeat, and can either |
|
422
|
|
|
|
|
|
|
# ignore it, since there likely won't be any data after a long repeat value, or |
|
423
|
|
|
|
|
|
|
# print them all out, if the "no_trim" option has been supplied. |
|
424
|
|
|
|
|
|
|
if( $self->{row_repeat} < 500 or $self->get_option( "no_trim" ) ) { |
|
425
|
|
|
|
|
|
|
for( 1 .. $self->{row_repeat} ) { |
|
426
|
|
|
|
|
|
|
$self->get_data_handler->row( $self, $cells ); # Assume the row handler is an object |
|
427
|
|
|
|
|
|
|
} |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
$self->repeat_following_row( 1 ); # Default 1 |
|
430
|
|
|
|
|
|
|
$self->reset_cell_list; # Clear out cells |
|
431
|
|
|
|
|
|
|
} |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
# Ends the current cell. It will be added to the cell list. |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub end_cell { |
|
436
|
|
|
|
|
|
|
my $self = shift; |
|
437
|
|
|
|
|
|
|
chomp $self->{current_cell_data}; # remove the last newline |
|
438
|
|
|
|
|
|
|
for( 1 .. $self->{cell_repeat} ) { |
|
439
|
|
|
|
|
|
|
push @{$self->{cells}}, $self->{current_cell_data}; |
|
440
|
|
|
|
|
|
|
} |
|
441
|
|
|
|
|
|
|
$self->repeat_following_cell( 1 ); # Default to 1 |
|
442
|
|
|
|
|
|
|
$self->clear_cell; |
|
443
|
|
|
|
|
|
|
} |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
1; |
|
447
|
|
|
|
|
|
|
__END__ |