File Coverage

lib/Spreadsheet/Reader/ExcelXML/WorksheetToRow.pm
Criterion Covered Total %
statement 88 90 97.7
branch 37 40 92.5
condition 17 21 80.9
subroutine 11 11 100.0
pod 1 1 100.0
total 154 163 94.4


line stmt bran cond sub pod time code
1             package Spreadsheet::Reader::ExcelXML::WorksheetToRow;
2             our $AUTHORITY = 'cpan:JANDREW';
3 19     19   36689 use version; our $VERSION = version->declare('v0.16.8');
  19         36  
  19         141  
4             ###LogSD warn "You uncovered internal logging statements for Spreadsheet::Reader::ExcelXML::WorksheetToRow-$VERSION";
5              
6 19     19   2135 use 5.010;
  19         54  
7 19     19   76 use Moose::Role;
  19         30  
  19         171  
8             requires qw(
9             not_end_of_file advance_row_position close_the_file
10             build_row_data start_the_file_over
11             );# current_row_node_parsed
12 19     19   73116 use Clone 'clone';
  19         29  
  19         984  
13 19     19   75 use Carp qw( confess );
  19         29  
  19         762  
14 19         161 use Types::Standard qw(
15             InstanceOf ArrayRef Maybe HashRef
16             Bool Int
17 19     19   77 );
  19         27  
18 19     19   15838 use MooseX::ShortCut::BuildInstance qw ( build_instance should_re_use_classes );
  19         33  
  19         175  
19             should_re_use_classes( 1 );
20 19     19   7429 use lib '../../../../lib';
  19         25  
  19         317  
21             ###LogSD use Log::Shiras::Telephone;
22              
23 19     19   10049 use Spreadsheet::Reader::ExcelXML::Row;
  19         92  
  19         10788  
24             #~ use Data::Dumper;
25             #########1 Dispatch Tables & Package Variables 5#########6#########7#########8#########9
26              
27              
28              
29             #########1 Public Attributes 3#########4#########5#########6#########7#########8#########9
30              
31             has cache_positions =>(
32             isa => Bool,
33             reader => 'should_cache_positions',
34             default => 1,
35             );
36              
37             #########1 Public Methods 3#########4#########5#########6#########7#########8#########9
38              
39             sub go_to_or_past_row{# Counting from 1!
40 554     554 1 5183 my( $self, $target_row ) = @_;
41 554 100       12769 my $current_row = $self->has_new_row_inst ? $self->get_new_row_number : 0;
42 554         15060 my $max_known_row = $self->_max_row_position_recorded - 1;# The array has position 0
43 554   66     4843 $target_row //= $current_row + 1;
44             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
45             ###LogSD $self->get_all_space . '::WorksheetToRow::go_to_or_past_row', );
46             ###LogSD $phone->talk( level => 'info', message => [
47             ###LogSD "Indexing the row forward to find row: $target_row",
48             ###LogSD "From current row: $current_row",
49             ###LogSD "..with max known row: $max_known_row",
50             ###LogSD "..with position caching set to: " . $self->should_cache_positions ] );
51              
52             # Handle fully cached files with requested EOF here
53 554 100 100     12972 if( !$self->has_file and $target_row > $max_known_row ){
54             ###LogSD $phone->talk( level => 'info', message => [
55             ###LogSD "Having already processed the whole file requested row -$target_row- doesn't exist" ] );
56 11         270 $self->_clear_new_row_inst;
57 11         31 return 'EOF';
58             }
59              
60             # Handle the known range of rows (by number only)
61 543         545 my $fast_forward;
62 543 100       1000 my $next_known_target = $max_known_row < $target_row ? $max_known_row : $target_row;
63             ###LogSD $phone->talk( level => 'info', message => [
64             ###LogSD "Calculated next known target: $next_known_target", $self->_get_all_positions ] );
65 543   100     3700 while( ($next_known_target < $max_known_row) and !defined $self->_get_row_position( $next_known_target ) ){
66 896         25466 $next_known_target++;
67             ###LogSD $phone->talk( level => 'info', message => [
68             ###LogSD "Bumping next known target to: $next_known_target",
69             ###LogSD "To find a known row" ] );
70             }
71             # Find the right fast forward amount
72 543 100       1043 if( $next_known_target >= $target_row ){
73 335 100 100     701 if( $current_row == $next_known_target ){
    100          
    50          
74             ###LogSD $phone->talk( level => 'info', message => [
75             ###LogSD "Asked for a row that has already been built and loaded: $next_known_target" ] );
76 290         631 return $next_known_target;
77             }elsif( $current_row > 0 and $current_row < $next_known_target ){
78 31         800 $fast_forward = $self->_get_row_position( $next_known_target ) - $self->_get_row_position( $current_row );
79             ###LogSD $phone->talk( level => 'info', message => [
80             ###LogSD "Target is forward so fast forward set to -$fast_forward- times to row: $next_known_target" ] );
81 31         747 $self->_clear_new_row_inst;# Clear old tracking for cached case
82             }elsif( $max_known_row > 0 ){
83 14 100       386 if( !$self->should_cache_positions ){
84 3         14 $self->start_the_file_over ;
85 3         82 $fast_forward = $self->_get_row_position( $next_known_target ) + 1;
86             ###LogSD $phone->talk( level => 'info', message => [
87             ###LogSD "Target was backward so reset the file and fast forwarding " .
88             ###LogSD "-$fast_forward- times to row: $next_known_target" ] );
89             }
90 14         367 $self->_clear_new_row_inst;# Clear old tracking especially for cached case
91             }else{ # Handles the brand new file case
92 0         0 $fast_forward = 0;
93             ###LogSD $phone->talk( level => 'info', message => [
94             ###LogSD "New file no fast forwarding to be done" ] );
95 0         0 $next_known_target = $current_row;
96             }
97             }
98 253         321 my $result = 1;
99 253 100 66     1355 if( $fast_forward and ( !$self->should_cache_positions or $max_known_row < $target_row ) ){
      66        
100             ###LogSD $phone->talk( level => 'info', message => [
101             ###LogSD "Fast forwarding -$fast_forward- times to get to where we need to be in the file" ] );
102 4         16 $result = $self->advance_row_position( $fast_forward );
103             }
104 253         337 $current_row = $next_known_target;
105             ###LogSD $phone->talk( level => 'info', message => [
106             ###LogSD "Current row is now set to: $current_row" ] );
107              
108             # Update/build the new row node/inst if you need something in the known range
109 253 100       585 if( $current_row >= $target_row ){
110 45 100       1067 if( $self->should_cache_positions ){# Retrieve a known row
111             ###LogSD $phone->talk( level => 'info', message => [
112             ###LogSD "Using cached position for row: $current_row",
113             ###LogSD "..stored in position: " . $self->_get_row_position( $current_row ), ] );
114             ###LogSD $phone->talk( level => 'trace', message => [
115             ###LogSD "Cached row stack:", $self->_get_row_inst_all ] );
116             my $row_node_ref = Spreadsheet::Reader::ExcelXML::Row->new(
117 41         60 %{$self->_get_row_inst( $self->_get_row_position( $current_row ) )},
  41         1068  
118             ###LogSD log_space => $self->get_log_space,
119             );
120 41         1202 $self->_set_new_row_inst( $row_node_ref );
121 41         149 return $current_row;
122             }else{# Build the row since caching is off
123 4         14 my $full_row_ref = $self->build_row_data;
124             ###LogSD $phone->talk( level => 'trace', message =>[
125             ###LogSD "row build returned:", $full_row_ref ] );
126 4 50       16 return $full_row_ref if !ref $full_row_ref;
127 4         102 my $row_node_ref = Spreadsheet::Reader::ExcelXML::Row->new(
128             %$full_row_ref,
129             ###LogSD log_space => $self->get_log_space,
130             );
131 4         107 $self->_set_new_row_inst( $row_node_ref );
132             ###LogSD $phone->talk( level => 'trace', message =>[
133             ###LogSD "Finished building: $current_row", ] );
134 4         39 return $current_row;
135             }
136             }
137              
138             # Handle processing unknown rows
139 208         214 my $base_row_ref;
140 208 100       4943 my $current_row_position =
    50          
141             $fast_forward ? $self->_get_row_position( $max_known_row ) :
142             ($current_row > 0) ? $self->_get_row_position( $current_row ) : -1;
143             ###LogSD $phone->talk( level => 'info', message => [
144             ###LogSD "Current row position for row -$current_row- is now: $current_row_position" ] );
145 208         4310 INITIALROWREAD: while( $result ){
146             ###LogSD $phone->talk( level => 'info', message => [
147             ###LogSD "Need to read an additional unknown row since target row: $target_row",
148             ###LogSD "..is still greater than current row: $current_row" ] );
149 274         945 my $row_ref = $self->advance_row_position;
150             ###LogSD $phone->talk( level => 'info', message => [
151             ###LogSD "Current row top node is:", $row_ref ] );
152 274         438 $current_row_position++;
153              
154             # Handle EOF
155 274 100 66     7467 if( !$row_ref or $self->not_end_of_file == 0 ){
156             ###LogSD $phone->talk( level => 'debug', message =>[
157             ###LogSD "Already at the 'EOF' - returning failure", ] );
158              
159             # Adjust max row
160 21 100       535 if( $self->_max_row > $self->_max_row_position_recorded - 1 ){
161 1         30 $self->_set_max_row( $self->_max_row_position_recorded - 1 );
162             }
163              
164             #close file if caching is on
165 21 100       565 if( $self->should_cache_positions ){
166 16         107 $self->close_the_file;
167             }
168             # Don't kill sharedStrings here since it might be used for other worksheets
169 21         533 $self->_clear_new_row_inst;
170 21         119 return 'EOF';
171             }
172 253         321 $result = 1;
173              
174             #build the row and manage it
175 253         386 $current_row = $row_ref->{r};
176             ###LogSD $phone->talk( level => 'debug', message =>[
177             ###LogSD "Attempting the full row build for row number: $current_row", ] );
178 253         773 my $full_row_ref = $self->build_row_data;# Must-build is on since this is a used data set
179             ###LogSD $phone->talk( level => 'trace', message =>[
180             ###LogSD "row build returned:", $full_row_ref ] );
181 253 100       532 if( $full_row_ref ){
182 239         6674 $self->_set_row_position( $current_row => $current_row_position );
183             ###LogSD $phone->talk( level => 'debug', message =>[
184             ###LogSD "Updated row position stack:", $self->_get_all_positions, ] );
185 239 100       6149 if( $self->should_cache_positions ){
186             ###LogSD $phone->talk( level => 'debug', message =>[
187             ###LogSD "Caching row position: $current_row_position", $row_ref->{r}, $full_row_ref->{row_number} ] );
188 200         5157 $self->_set_row_inst( $current_row_position => $full_row_ref );
189             ###LogSD $phone->talk( level => 'trace', message =>[
190             ###LogSD "row node ref stack:", $self->_get_row_inst_all, ] );
191             }
192 239 100       700 if( $current_row >= $target_row ){
193             ###LogSD $phone->talk( level => 'debug', message =>[
194             ###LogSD "The row -$current_row- is greater than or equal to the target row: $target_row" ] );
195 187         4744 my $row_node_ref = Spreadsheet::Reader::ExcelXML::Row->new(
196             %$full_row_ref,
197             ###LogSD log_space => $self->get_log_space,
198             );
199 187         5048 $self->_set_new_row_inst( $row_node_ref );
200             # No need to increment $current_row_position here!!!
201 187         3236 last INITIALROWREAD;
202             }else{
203             ###LogSD $phone->talk( level => 'info', message =>[
204             ###LogSD "Cached an intermediate row - moving on" ] );
205             }
206             }else{
207             ###LogSD $phone->talk( level => 'debug', message =>[
208             ###LogSD "Found an empty row - moving on" ] );
209             }
210             }
211              
212             ###LogSD $phone->talk( level => 'info', message =>[
213             ###LogSD "Arrived at ( and built ) row: $current_row", ] );
214 187         751 return $current_row;
215             }
216              
217             #########1 Private Attributes 3#########4#########5#########6#########7#########8#########9
218              
219             has _row_position_lookup =>(
220             isa => ArrayRef[ Maybe[Int] ],
221             traits =>['Array'],
222             default => sub{ [] },
223             reader => '_get_all_positions',
224             writer => '_set_all_positions',
225             handles =>{
226             _max_row_position_recorded => 'count',
227             _get_row_position => 'get',
228             _set_row_position => 'set',
229             },
230             );
231              
232             has _new_row_inst =>(# For non cached sheets
233             isa => InstanceOf[ 'Spreadsheet::Reader::ExcelXML::Row' ],
234             reader => '_get_new_row_inst',
235             writer => '_set_new_row_inst',
236             clearer => '_clear_new_row_inst',
237             predicate => 'has_new_row_inst',
238             handles =>{
239             get_new_row_number => 'get_row_number',
240             get_new_column => 'get_the_column', # pass an Excel based column number (no next default) returns (cell|undef|EOR)
241             get_new_next_value => 'get_the_next_value_position', # pass nothing returns next (cell|EOR)
242             get_new_row_all => 'get_row_all',
243             #~ _is_new_row_hidden => 'is_row_hidden',
244             #~ _get_new_row_formats => 'get_row_format', # pass the desired format key
245             #~ _get_new_last_value_col => 'get_last_value_column',
246             #~ _get_new_row_end => 'get_row_end'
247             },
248             );
249              
250             has _cached_row_insts =>(# For cached sheets
251             isa => ArrayRef[HashRef],
252             traits => ['Array'],
253             reader => '_get_row_inst_all',
254             clearer => '_clear_row_inst_all',
255             default => sub{ [] },
256             handles =>{
257             _get_row_inst => 'get',
258             _set_row_inst => 'set',
259             },
260             );
261              
262             #########1 Private Methods 3#########4#########5#########6#########7#########8#########9
263              
264              
265              
266             #########1 Phinish 3#########4#########5#########6#########7#########8#########9
267              
268 19     19   142 no Moose::Role;
  19         29  
  19         165  
269              
270             1;
271              
272             #########1 Documentation 3#########4#########5#########6#########7#########8#########9
273             __END__
274              
275             =head1 NAME
276              
277             Spreadsheet::Reader::ExcelXML::WorksheetToRow - Builds row objects from
278             worksheet files
279              
280             =head1 SYNOPSIS
281              
282             See t\Spreadsheet\Reader\ExcelXML\09-worksheet_to_row.t
283              
284             =head1 DESCRIPTION
285              
286             This documentation is written to explain ways to use this module when writing your own
287             excel parser. To use the general package for excel parsing out of the box please review
288             the documentation for L<Workbooks|Spreadsheet::Reader::ExcelXML>,
289             L<Worksheets|Spreadsheet::Reader::ExcelXML::Worksheet>, and
290             L<Cells|Spreadsheet::Reader::ExcelXML::Cell>
291              
292             This module provides the generic connection to individual worksheet files (not chartsheets)
293             for parsing xlsx(and xml) workbooks. It does not provide a way to connect to L<chartsheets
294             |Spreadsheet::Reader::ExcelXML::Chartsheet>. It does not provide the final view of a given
295             cell. The final view of the cell is collated with the role (Interface)
296             L<Spreadsheet::Reader::ExcelXML::Worksheet>. This reader extends the base reader class
297             L<Spreadsheet::Reader::ExcelXML::XMLReader>. This module also uses a file type interpreter.
298             The functionality provided by those modules is not explained here.
299              
300             For now this module reads each full row (with values) into a L<Spreadsheet::Reader::ExcelXML::Row>
301             instance. It stores either the currently read row or all rows based on the
302             L<Spreadsheet::Reader::ExcelXML/cache_positions> setting for Worksheet_instance.
303             When a position past the end of the sheet is called the current row is cleared and an 'EOF'
304             or undef value is returned. See L<Spreadsheet::Reader::ExcelXML/file_boundary_flags> for
305             more details.
306              
307             I<All positions (row and column places and integers) at this level are stored and returned in count
308             from one mode!>
309              
310             To replace this part in the package look in the raw code of
311             L<Spreadsheet::Reader::ExcelXML::Workbook> and adjust the 'worksheet_interface' key of the
312             $parser_modules variable.
313              
314             =head2 requires
315              
316             This module is a L<role|Moose::Manual::Roles> and as such only adds incremental methods and
317             attributes to some base class. In order to use this role some base object methods are
318             required. The requirments are listed below with links to the default provider.
319              
320             =over
321              
322             L<Spreadsheet::Reader::ExcelXML::FileWorksheet/advance_row_position( $element, [$iterations] )>
323              
324             L<Spreadsheet::Reader::ExcelXML::FileWorksheet/build_row_data>
325              
326             L<Spreadsheet::Reader::ExcelXML::XMLReader/not_end_of_file>
327              
328             L<Spreadsheet::Reader::ExcelXML::XMLReader/start_the_file_over>
329              
330             L<Spreadsheet::Reader::ExcelXML::XMLReader/close_the_file>
331              
332             =back
333              
334             =head2 Attributes
335              
336             Data passed to new when creating an instance. For access to the values in these
337             attributes see the listed 'attribute methods'. For general information on attributes see
338             L<Moose::Manual::Attributes>. For ways to manage the instance when opened see the
339             L<Methods|/Methods>.
340              
341             =head3 cache_positions
342              
343             =over
344              
345             B<Definition:> This is a boolean value which controls whether the parser caches rows that
346             have been parsed or just stores the top level hash. In general this should repsond to the
347             top level attribute L<Spreadsheet::Reader::ExcelXML/cache_positions>
348              
349             B<Default:> 1 = caching on
350              
351             B<Range:> (1|0)
352              
353             B<attribute methods> Methods provided to adjust this attribute
354              
355             =over
356              
357             B<should_cache_positions>
358              
359             =over
360              
361             B<Definition:> return the attribute value
362              
363             =back
364              
365             =back
366              
367             =back
368              
369             =head2 Methods
370              
371             These are the methods provided by this class for use within the package but are not intended
372             to be used by the end user. Other private methods not listed here are used in the module but
373             not used by the package. If the private method is listed here then replacement of this module
374             either requires replacing them or rewriting all the associated connecting roles and classes.
375              
376             =head3 has_new_row_inst
377              
378             =over
379              
380             B<Definition:> Generally in the processing of a worksheet file there will be a currently
381             active row. This row is stored as an object so elements of the row can be retrieved via
382             L<delegation|Moose::Manual::Delegation/NATIVE DELEGATION>
383              
384             B<Accepts:> nothing
385              
386             B<Returns:> (1|0) depending on the presence of a currently stored row
387              
388             =back
389              
390             =head3 get_new_row_number
391              
392             =over
393              
394             B<Definition:> This returns the row number (in count from 1 mode) for the currently stored
395             row.
396              
397             B<Accepts:> nothing
398              
399             B<Returns:> an integer $row
400              
401             =back
402              
403             =head3 get_new_column( $column )
404              
405             =over
406              
407             B<Definition:> This returns the column data for the selected $column. If the request is
408             for a column with no data then it returns undef. If the column is requested pased the
409             last column with data but before the end of the span it returns 'EOD'. If the request is
410             for a column past the end of the span it returns 'EOF'. THe request and return are all
411             handled in count from 1 context.
412              
413             B<Accepts:> an integer $column number
414              
415             B<Returns:> The cell contents for that column (or undef, 'EOD', or 'EOF')
416              
417             =back
418              
419             =head3 get_new_next_value
420              
421             =over
422              
423             B<Definition:> like get_new_column this will return one cell. However, this method
424             will only return cells with content or 'EOR'. The role keeps track of which one
425             was called last even it it was through get_new_column.
426              
427             B<Accepts:> nothing
428              
429             B<Returns:> the cell contents or 'EOR'
430              
431             =back
432              
433             =head3 get_new_row_all
434              
435             =over
436              
437             B<Definition:> This is returns an array ref of each of the values in the current row placed
438             in their 'count from 0' position.
439              
440             B<Accepts:> nothing
441              
442             B<Returns:> an array ref
443              
444             =back
445              
446             =head3 go_to_or_past_row( $row )
447              
448             =over
449              
450             B<Definition:> This will attempt to advance to the row provided by $row. It will continue to
451             advance past that row until it arrives at a row with values or the end of the file.
452              
453             B<Accepts:> $row (integer in count from 1 context)
454              
455             B<Returns:> The actual row number that was arrived at (in count from 1 context)
456              
457             =back
458              
459             =head1 SUPPORT
460              
461             =over
462              
463             L<github Spreadsheet::Reader::ExcelXML/issues
464             |https://github.com/jandrew/p5-spreadsheet-reader-excelxml/issues>
465              
466             =back
467              
468             =head1 TODO
469              
470             =over
471              
472             B<1.> If a the primary cell of a merge range is hidden show that value
473             in the top left unhidden cell even when the attribute
474             L<Spreadsheet::Reader::ExcelXML::Workbook/spread_merged_values> is not
475             set. (This is the way excel does it(ish))
476              
477             =back
478              
479             =head1 AUTHOR
480              
481             =over
482              
483             =item Jed Lund
484              
485             =item jandrew@cpan.org
486              
487             =back
488              
489             =head1 COPYRIGHT
490              
491             This program is free software; you can redistribute
492             it and/or modify it under the same terms as Perl itself.
493              
494             The full text of the license can be found in the
495             LICENSE file included with this module.
496              
497             This software is copyrighted (c) 2016 by Jed Lund
498              
499             =head1 DEPENDENCIES
500              
501             =over
502              
503             L<Spreadsheet::Reader::ExcelXML> - the package
504              
505             =back
506              
507             =head1 SEE ALSO
508              
509             =over
510              
511             L<Spreadsheet::Read> - generic Spreadsheet reader
512              
513             L<Spreadsheet::ParseExcel> - Excel binary version 2003 and earlier (.xls files)
514              
515             L<Spreadsheet::XLSX> - Excel version 2007 and later
516              
517             L<Spreadsheet::ParseXLSX> - Excel version 2007 and later
518              
519             L<Log::Shiras|https://github.com/jandrew/Log-Shiras>
520              
521             =over
522              
523             All lines in this package that use Log::Shiras are commented out
524              
525             =back
526              
527             =back
528              
529             =cut
530              
531             #########1 Documentation End 3#########4#########5#########6#########7#########8#########9