File Coverage

lib/Spreadsheet/XLSX/Reader/LibXML.pm
Criterion Covered Total %
statement 24 26 92.3
branch n/a
condition n/a
subroutine 9 9 100.0
pod n/a
total 33 35 94.2


line stmt bran cond sub pod time code
1             package Spreadsheet::XLSX::Reader::LibXML;
2             our $AUTHORITY = 'cpan:JANDREW';
3 11     11   2170876 use version 0.77; our $VERSION = qv('v0.38.18');
  11         20723  
  11         74  
4             ###LogSD warn "You uncovered internal logging statements for Spreadsheet::XLSX::Reader::LibXML-$VERSION";
5              
6 11     11   1075 use 5.010;
  11         49  
7 11     11   54 use List::Util 1.33;
  11         284  
  11         610  
8 11     11   8576 use Moose;
  11         3481603  
  11         82  
9 11     11   83801 use MooseX::StrictConstructor;
  11         323878  
  11         61  
10 11     11   101636 use MooseX::HasDefaults::RO;
  11         78252  
  11         51  
11 11     11   100372 use Carp qw( confess );
  11         28  
  11         706  
12 11     11   12109 use Archive::Zip qw( AZ_OK );
  11         1457089  
  11         647  
13 11     11   19515 use XML::LibXML;
  0            
  0            
14             use IO::File;
15             use Clone 'clone';
16             use Types::Standard qw(
17             InstanceOf Str StrMatch
18             Enum HashRef ArrayRef
19             CodeRef Int HasMethods
20             Bool is_Object is_HashRef
21             );
22             use lib '../../../../lib',;
23             use Data::Dumper;
24             ###LogSD use Log::Shiras::Telephone;
25             ###LogSD use Log::Shiras::UnhideDebug;
26             use Spreadsheet::XLSX::Reader::LibXML::XMLReader::Styles;
27             use Spreadsheet::XLSX::Reader::LibXML::FmtDefault;
28             use Spreadsheet::XLSX::Reader::LibXML::XMLReader::SharedStrings;
29             use Spreadsheet::XLSX::Reader::LibXML::XMLReader::WorksheetToRow;
30             use Spreadsheet::XLSX::Reader::LibXML::Worksheet;
31             use Spreadsheet::XLSX::Reader::LibXML::XMLReader::Chartsheet;
32             use Spreadsheet::XLSX::Reader::LibXML::Error;
33             use Spreadsheet::XLSX::Reader::LibXML::Types qw( XLSXFile ParserType IOFileType );
34             ###LogSD use Log::Shiras::UnhideDebug;
35             use MooseX::ShortCut::BuildInstance 1.032 qw( build_instance should_re_use_classes );
36             should_re_use_classes( 1 );
37             ###LogSD with 'Log::Shiras::LogSpace';
38             ###LogSD sub get_class_space{ 'Workbook' }
39              
40             #########1 Dispatch Tables 3#########4#########5#########6#########7#########8#########9
41              
42             my $parser_modules ={
43             reader =>{
44             build_method => '_build_reader',
45             sharedStrings =>{
46             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::XMLReader::SharedStrings'],
47             attributes => [qw( error_inst cache_positions empty_return_type group_return_type )],
48             store => '_set_shared_strings_instance',
49             package => 'SharedStrings',
50             },
51             styles =>{
52             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::XMLReader::Styles'],
53             attributes => [qw( error_inst cache_positions format_inst empty_return_type )],
54             store => '_set_styles_instance',
55             package => 'Styles',
56             },
57             worksheet =>{
58             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::XMLReader::WorksheetToRow'],
59             roles => ['Spreadsheet::XLSX::Reader::LibXML::Worksheet'],
60             package => 'Worksheet',
61             },
62             chartsheet =>{
63             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::XMLReader::Chartsheet'],
64             package => 'Chartsheet',
65             },
66             },
67             };
68             my $xml_parser = XML::LibXML->new();
69             my $build_ref = {
70             top_level_workbook =>{
71             zip => 'xl/workbook.xml',
72             },
73             workbook_rels =>{
74             zip => 'xl/_rels/workbook.xml.rels',
75             },
76             doc_props =>{
77             zip => 'docProps/core.xml',
78             },
79             sharedStrings =>{
80             zip => 'xl/sharedStrings.xml',
81             },
82             styles =>{
83             zip => 'xl/styles.xml',
84             },
85             calcChain =>{
86             zip => 'xl/calcChain.xml',
87             },
88             };
89             my $attribute_defaults ={
90             error_inst =>{
91             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::Error'],
92             package => 'ErrorInstance',
93             should_warn => 0,
94             },
95             format_inst =>{
96             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::FmtDefault'],
97             package => 'FormatInstance',
98             },
99             sheet_parser => 'reader',
100             count_from_zero => 1,
101             file_boundary_flags => 1,
102             empty_is_end => 0,
103             values_only => 0,
104             from_the_edge => 1,
105             group_return_type => 'instance',
106             empty_return_type => 'empty_string',
107             };
108             my $flag_settings ={
109             alt_default =>{
110             values_only => 1,
111             count_from_zero => 0,
112             empty_is_end => 1,
113             },
114             just_the_data =>{
115             count_from_zero => 0,
116             values_only => 1,
117             empty_is_end => 1,
118             group_return_type => 'value',
119             cache_positions => 1,
120             from_the_edge => 0,
121             empty_return_type => 'undef_string',
122             },
123             just_raw_data =>{
124             count_from_zero => 0,
125             values_only => 1,
126             empty_is_end => 1,
127             group_return_type => 'unformatted',
128             cache_positions => 1,
129             from_the_edge => 0,
130             empty_return_type => 'undef_string',
131             },
132             like_ParseExcel =>{
133             count_from_zero => 1,
134             cache_positions => 1,
135             group_return_type => 'instance',
136             },
137             debug =>{
138             error_inst =>{
139             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::Error'],
140             package => 'ErrorInstance',
141             should_warn => 1,
142             },
143             },
144             };
145              
146             #########1 Public Attributes 3#########4#########5#########6#########7#########8#########9
147              
148             has error_inst =>(
149             isa => HasMethods[qw(
150             error set_error clear_error set_warnings if_warn
151             ) ],
152             clearer => '_clear_error_inst',
153             reader => 'get_error_inst',
154             predicate => 'has_error_inst',
155             required => 1,
156             handles =>[ qw(
157             error set_error clear_error set_warnings if_warn
158             ) ],
159             trigger => sub{
160             if( $_[0]->_has_format_inst and !$_[0]->get_format_inst->block_inherit ){
161             $_[0]->get_format_inst->set_error_inst( $_[1] );
162             }
163             },
164             );
165            
166             has format_inst =>(
167             isa => HasMethods[qw(
168             set_error_inst set_excel_region
169             set_target_encoding get_defined_excel_format
170             set_defined_excel_formats change_output_encoding
171             set_epoch_year set_cache_behavior
172             set_date_behavior get_defined_conversion
173             parse_excel_format_string )],
174             writer => 'set_format_inst',
175             reader => 'get_format_inst',
176             predicate => '_has_format_inst',
177             handles =>[qw(
178             get_defined_excel_format parse_excel_format_string
179             change_output_encoding set_date_behavior
180             get_date_behavior set_defined_excel_formats)],
181             trigger => \&_import_format_settings,
182             );
183            
184             has file_name =>(
185             isa => XLSXFile,
186             writer => 'set_file_name',
187             clearer => '_clear_file_name',
188             predicate => 'has_file_name',
189             trigger => \&_build_workbook,
190             );
191              
192             has file_handle =>(
193             isa => IOFileType,
194             writer => 'set_file_handle',
195             clearer => '_clear_file_handle',
196             predicate => 'has_file_handle',
197             coerce => 1,
198             trigger => \&_build_workbook,
199             );
200              
201             has sheet_parser =>(
202             isa => ParserType,
203             writer => 'set_parser_type',
204             reader => 'get_parser_type',
205             coerce => 1,
206             );
207              
208             has count_from_zero =>(
209             isa => Bool,
210             reader => 'counting_from_zero',
211             writer => 'set_count_from_zero',
212             );
213            
214             has file_boundary_flags =>(
215             isa => Bool,
216             reader => 'boundary_flag_setting',
217             writer => 'change_boundary_flag',
218             required => 1,
219             );
220              
221             has empty_is_end =>(
222             isa => Bool,
223             writer => 'set_empty_is_end',
224             reader => 'is_empty_the_end',
225             );
226              
227             has values_only =>(
228             isa => Bool,
229             writer => 'set_values_only',
230             reader => 'get_values_only',
231             );
232              
233             has from_the_edge =>(
234             isa => Bool,
235             reader => '_starts_at_the_edge',
236             writer => 'set_from_the_edge',
237             );
238              
239             has default_format_list =>(
240             isa => Str,
241             writer => 'set_default_format_list',
242             reader => 'get_default_format_list',
243             );
244              
245             has format_string_parser =>(
246             isa => Str,
247             writer => 'set_format_string_parser',
248             reader => 'get_format_string_parser',
249             );
250              
251             has group_return_type =>(
252             isa => Enum[qw( unformatted value instance xml_value )],
253             reader => 'get_group_return_type',
254             writer => 'set_group_return_type',
255             );
256              
257             has empty_return_type =>(
258             isa => Enum[qw( empty_string undef_string )],
259             reader => 'get_empty_return_type',
260             writer => 'set_empty_return_type',
261             );
262            
263             has cache_positions =>(
264             isa => Bool,
265             reader => 'get_cache_positions',
266             default => 1,
267             );
268              
269             #########1 Public Methods 3#########4#########5#########6#########7#########8#########9
270             #~ use Data::Dumper;
271             sub import{# Flags handled here!
272             my ( $self, @flag_list ) = @_;
273            
274             if( scalar( @flag_list ) ){
275             for my $flag ( @flag_list ){
276             #~ print "Arrived at import with flag: $flag\n";
277             if( $flag =~ /^:(\w*)$/ ){# Handle text based flags
278             my $default_choice = $1;
279             #~ print "Attempting to change the default group type to: $default_choice\n";
280             if( exists $flag_settings->{$default_choice} ){
281             for my $attribute ( keys %{$flag_settings->{$default_choice}} ){
282             #~ print "Changing flag -$attribute- to:" . Dumper( $flag_settings->{$default_choice}->{$attribute} );
283             $attribute_defaults->{$attribute} = $flag_settings->{$default_choice}->{$attribute};
284             }
285             }else{
286             confess "No settings available for the flag: $flag";
287             }
288             }elsif( $flag =~ /^v?\d+\.?\d*/ ){# Version check may wind up here
289             #~ print "Running version check on version: $flag\n";
290             my $result = $VERSION <=> version->parse( $flag );
291             #~ print "Tested against version -$VERSION- gives result: $result\n";
292             if( $result < 0 ){
293             confess "Version -$flag- required - the installed version is: $VERSION";
294             }
295             }else{
296             confess "Passed attribute default flag -$flag- does not comply with the correct format";
297             }
298             }
299             }
300             }
301              
302             sub parse{
303              
304             my ( $self, $file, $formatter ) = @_;
305             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
306             ###LogSD $self->get_all_space . '::parse', );
307             ###LogSD $phone->talk( level => 'info', message =>[
308             ###LogSD "Arrived at parse for:", $file,
309             ###LogSD (($formatter) ? "with formatter: $formatter" : '') ] );
310             $self->set_format_string_parser( $formatter ) if $formatter;
311             if( IOFileType->check( $file ) ){
312             ###LogSD $phone->talk( level => 'info', message =>[ 'passed a file handle:', $file, ] );
313             eval '$self->set_file_handle( $file )';
314             }else{
315             ###LogSD $phone->talk( level => 'info', message =>[ 'passed a file name: ' . $file, ] );
316             eval '$self->set_file_name( $file )';
317             }
318             if( $@ ){
319             my $error_message = $@;
320             ###LogSD $phone->talk( level => 'info', message =>[ 'saving error:', $error_message, ] );
321             $self->set_error( $error_message );
322             return undef;
323             }else{
324             return $self;
325             }
326             }
327              
328             sub worksheets{
329              
330             my ( $self, ) = @_;
331             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
332             ###LogSD $self->get_all_space . '::worksheets', );
333             ###LogSD $phone->talk( level => 'info', message =>[
334             ###LogSD 'Attempting to build all worksheets: ', $self->get_worksheet_names ] );
335             my @worksheet_array;
336             while( my $worksheet_object = $self->worksheet ){
337             ###LogSD $phone->talk( level => 'info', message =>[
338             ###LogSD 'Built worksheet: ' . $worksheet_object->get_name ] );
339             push @worksheet_array, $worksheet_object;#$self->worksheet( $worksheet_name );
340             }
341             ###LogSD $phone->talk( level => 'trace', message =>[
342             ###LogSD 'sending worksheet array: ',@worksheet_array ] );
343             return @worksheet_array;
344             }
345              
346             sub worksheet{
347              
348             my ( $self, $worksheet_name ) = @_;
349             my ( $next_position );
350             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
351             ###LogSD $self->get_all_space . '::worksheet', );
352             ###LogSD $phone->talk( level => 'info', message =>[
353             ###LogSD "Arrived at (build a) worksheet with: ", $worksheet_name ] );
354            
355             # Handle an implied 'next sheet'
356             if( !$worksheet_name ){
357             my $worksheet_position = $self->_get_current_worksheet_position;
358             ###LogSD $phone->talk( level => 'info', message =>[
359             ###LogSD "No worksheet name passed",
360             ###LogSD ((defined $worksheet_position) ? "Starting after position: $worksheet_position" : '')] );
361             $next_position = ( !$self->in_the_list ) ? 0 : ($self->_get_current_worksheet_position + 1);
362             ###LogSD $phone->talk( level => 'info', message =>[
363             ###LogSD "No worksheet name passed", "Attempting position: $next_position" ] );
364             if( $next_position >= $self->worksheet_count ){
365             ###LogSD $phone->talk( level => 'info', message =>[
366             ###LogSD "Reached the end of the worksheet list" ] );
367             return undef;
368             }
369             $worksheet_name = $self->worksheet_name( $next_position );
370             }else{
371             my $sheet_data = $self->_get_sheet_info( $worksheet_name );
372             ###LogSD $phone->talk( level => 'info', message =>[
373             ###LogSD "Info for the worksheet -$worksheet_name- is:", $sheet_data ] );
374             $next_position = $sheet_data->{sheet_position};
375             }
376            
377             # Deal with chartsheet requests
378             my $worksheet_info = $self->_get_sheet_info( $worksheet_name );
379             # Check for sheet existence
380             if( !$worksheet_info or !$worksheet_info->{sheet_type} ){
381             $self->set_error( "The worksheet -$worksheet_name- could not be located!" );
382             return undef;
383             }elsif( $worksheet_info->{sheet_type} and $worksheet_info->{sheet_type} eq 'chartsheet' ){
384             $self->set_error( "You have requested -$worksheet_name- which is a 'chartsheet' using a worksheet focused method" );
385             return undef;
386             }
387             # NOTE: THE CHARTSHEET / WORKSHEET COMMON SUB-METHOD COULD PROBABLY START HERE
388             ###LogSD $phone->talk( level => 'info', message =>[
389             ###LogSD "Building: $worksheet_name", "..with data:", $worksheet_info ] );
390            
391             # Check for a file and an available parser type
392             confess "No file loaded yet" if !$self->has_file_name and !$self->has_file_handle;
393             my ( $translation_method, $parser_type );
394             if( exists $parser_modules->{ $self->get_parser_type } ){
395             $translation_method = $parser_modules->{ $self->get_parser_type }->{build_method};
396             $parser_type = $parser_modules->{ $self->get_parser_type }->{$worksheet_info->{sheet_type}};
397             }else{
398             confess 'This package still under development - parser type |' . $self->get_parser_type . '| not yet supported - try the "reader" parser';
399             return undef;
400             }
401             ###LogSD $phone->talk( level => 'info', message =>[
402             ###LogSD "Using translation: $translation_method", "With parser: ", $parser_type, ] );
403            
404             # build the worksheet
405             my %args = $self->$translation_method( $worksheet_info, $self->_get_zip_file_handle );
406             ###LogSD $phone->talk( level => 'debug', message => [
407             ###LogSD 'Intermediate worksheet args are:', %args ], );
408             if( !$args{file} and !$args{dom} ){
409             $self->set_error( "Unable to load XML::LibXML with the element: $worksheet_name" );
410             return undef;
411             }
412             ###LogSD $phone->talk( level => 'debug', message => [
413             ###LogSD 'made it past the has-file check with:', $parser_type, $worksheet_name, ], );
414            
415             confess "No worksheet info for: $worksheet_name" if !exists $worksheet_info->{sheet_position};
416             $args{superclasses} = $parser_type->{superclasses};
417             $args{roles} = $parser_type->{roles};
418             $args{sheet_name} = $worksheet_name;
419             $args{workbook_instance} = $self;
420             $args{error_inst} = $self->get_error_inst;
421             $args{package} = $parser_type->{package};
422             ###LogSD $args{log_space} = $self->get_log_space;
423             ###LogSD $phone->talk( level => 'trace', message =>[
424             ###LogSD 'Finalized build info:', %args, ] );
425             my $worksheet = build_instance( %args );
426             if( $worksheet ){
427             ###LogSD $phone->talk( level => 'info', message =>[
428             ###LogSD "Successfully loaded: $worksheet_name",
429             ###LogSD "Setting the current worksheet position to: $next_position" ] );
430             $self->_set_current_worksheet_position( $next_position );
431             return $worksheet;
432             }else{
433             $self->set_error( "Failed to build the object for worksheet: $worksheet_name" );
434             return undef;
435             }
436             }
437              
438             #########1 Private Attributes 3#########4#########5#########6#########7#########8#########9
439            
440             has _file_creator =>(
441             isa => Str,
442             reader => 'creator',
443             writer => '_set_creator',
444             clearer => '_clear_creator',
445             );
446            
447             has _file_modified_by =>(
448             isa => Str,
449             reader => 'modified_by',
450             writer => '_set_modified_by',
451             clearer => '_clear_modified_by',
452             );
453            
454             has _file_date_created =>(
455             isa => StrMatch[qr/^\d{4}\-\d{2}\-\d{2}/],
456             reader => 'date_created',
457             writer => '_set_date_created',
458             clearer => '_clear_date_created',
459             );
460            
461             has _file_date_modified =>(
462             isa => StrMatch[qr/^\d{4}\-\d{2}\-\d{2}/],
463             reader => 'date_modified',
464             writer => '_set_date_modified',
465             clearer => '_clear_date_modified',
466             );
467              
468             has _epoch_year =>(
469             isa => Enum[qw( 1900 1904 )],
470             writer => '_set_epoch_year',
471             reader => 'get_epoch_year',
472             predicate => 'has_epoch_year',
473             default => 1900,
474             trigger => sub{
475             if( $_[0]->_has_format_inst and !$_[0]->get_format_inst->block_inherit ){
476             $_[0]->get_format_inst->set_epoch_year( $_[1] );
477             }
478             },
479             );
480            
481             has _shared_strings_instance =>(
482             isa => HasMethods[ 'get_shared_string_position' ],
483             predicate => '_has_shared_strings_file',
484             writer => '_set_shared_strings_instance',
485             reader => '_get_shared_strings_instance',
486             clearer => '_clear_shared_strings',
487             handles =>{
488             'get_shared_string_position' => 'get_shared_string_position',
489             _demolish_shared_strings => 'DEMOLISH',
490             },
491             );
492            
493             has _styles_instance =>(
494             isa => HasMethods[qw( get_format_position )],
495             writer => '_set_styles_instance',
496             reader => '_get_styles_instance',
497             clearer => '_clear_styles',
498             predicate => '_has_styles_file',
499             handles =>{
500             get_format_position => 'get_format_position',
501             _demolish_styles => 'DEMOLISH',
502             },
503             );
504              
505             has _calc_chain_instance =>(
506             isa => HasMethods[qw( get_calc_chain_position )],
507             writer =>'_set_calc_chain_instance',
508             reader =>'_get_calc_chain_instance',
509             clearer => '_clear_calc_chain',
510             predicate => '_has_calc_chain_file',
511             handles =>{
512             _demolish_calc_chain => 'DEMOLISH',
513             },
514             );
515              
516             has _worksheet_list =>(
517             isa => ArrayRef,
518             traits => ['Array'],
519             clearer => '_clear_worksheet_list',
520             reader => 'get_worksheet_names',
521             handles =>{
522             worksheet_name => 'get',
523             worksheet_count => 'count',
524             _add_worksheet => 'push',
525             },
526             default => sub{ [] },
527             );
528              
529             has _chartsheet_list =>(
530             isa => ArrayRef,
531             traits => ['Array'],
532             clearer => '_clear_chartsheet_list',
533             reader => 'get_chartsheet_names',
534             handles =>{
535             chartsheet_name => 'get',
536             chartsheet_count => 'count',
537             _add_chartsheet => 'push',
538             },
539             default => sub{ [] },
540             );
541              
542             has _sheet_list =>(
543             isa => ArrayRef,
544             traits => ['Array'],
545             writer => '_set_sheet_list',
546             clearer => '_clear_sheet_list',
547             reader => 'get_sheet_names',
548             handles =>{
549             get_sheet_name => 'get',
550             sheet_count => 'count',
551             },
552             default => sub{ [] },
553             );
554              
555             has _sheet_lookup =>(
556             isa => HashRef,
557             traits => ['Hash'],
558             writer => '_set_sheet_lookup',
559             clearer => '_clear_sheet_lookup',
560             reader => '_get_sheet_lookup',
561             handles =>{
562             _get_sheet_info => 'get',
563             },
564             default => sub{ {} },
565             );
566              
567             has _current_worksheet_position =>(
568             isa => Int,
569             writer => '_set_current_worksheet_position',
570             reader => '_get_current_worksheet_position',
571             clearer => 'start_at_the_beginning',
572             predicate => 'in_the_list',
573             );
574            
575             has _file_type =>(
576             isa => Enum[qw( zip xml )],
577             clearer => '_clear_file_type',
578             writer => '_set_file_type',
579             reader => '_get_file_type',
580             );
581            
582             has _zip_file_handle =>(
583             isa => InstanceOf[ 'Archive::Zip' ],
584             clearer => '_clear_zip_file_handle',
585             writer => '_set_zip_file_handle',
586             reader => '_get_zip_file_handle',
587             predicate => '_has_zip_file_handle',
588             );
589              
590             #########1 Private Methods 3#########4#########5#########6#########7#########8#########9
591              
592             around BUILDARGS => sub {
593             my ( $orig, $class, %args ) = @_;
594             ###LogSD my $log_space = $args{log_space};
595             ###LogSD $log_space .= '::' if $log_space;
596             ###LogSD $log_space .= 'Workbook::BUILDARGS';
597             ###LogSD my $phone = Log::Shiras::Telephone->new(
598             ###LogSD name_space => $log_space, );
599             ###LogSD $phone->talk( level => 'trace', message =>[
600             ###LogSD 'Arrived at BUILDARGS with: ', %args ] );
601            
602             # Check if this was called by Spreadsheet::Read;
603             my $like_ParseExcel = 0;
604             for my $x ( 2 .. 5 ){
605             ###LogSD $phone->talk( level => 'trace', message =>[
606             ###LogSD "Caller is:", caller( $x ) ] );
607             if( (caller( $x ))[0] and (caller( $x ))[0] =~ /Spreadsheet::Read/ ){
608             $like_ParseExcel = 1;
609             ###LogSD $phone->talk( level => 'trace', message =>[
610             ###LogSD "Spreadsheet::XLSX::Reader::LibXML is being called by Spreadsheet::Read - enforcing the :like_ParseExcel flag" ] );
611             last;
612             }
613             }
614            
615             # Add any defaults
616             for my $key ( keys %$attribute_defaults ){
617             if( exists $args{$key} ){
618             ###LogSD $phone->talk( level => 'trace', message =>[
619             ###LogSD "Found user defined -$key- with value: $args{$key}" ] );
620             }else{
621             ###LogSD $phone->talk( level => 'trace', message =>[
622             ###LogSD "Setting default -$key- with value: $attribute_defaults->{$key}" ] );
623             $args{$key} = clone( $attribute_defaults->{$key} );
624             }
625             }
626            
627             # Enforce Spreadsheet::Read requirements
628             if( $like_ParseExcel ){
629             ###LogSD $phone->talk( level => 'trace', message =>[
630             ###LogSD "Enforcing like_ParseExcel defaults" ] );
631             for my $key ( %{$flag_settings->{like_ParseExcel}} ){
632             $args{$key} = clone( $flag_settings->{like_ParseExcel}->{$key} );
633             }
634             }
635            
636             # Build object instances as needed
637             for my $key ( keys %args ){
638             ###LogSD $phone->talk( level => 'trace', message =>[
639             ###LogSD "Checking if an instance needs built for key: $key" ] );
640             if( $key =~ /_inst$/ and !is_Object( $args{$key} ) and is_HashRef( $args{$key} ) ){
641             # Import log_space as needed
642             ###LogSD if( exists $args{log_space} and $args{log_space} ){
643             ###LogSD $args{$key}->{log_space} = $args{log_space};
644             ###LogSD }
645             ###LogSD $phone->talk( level => 'trace', message =>[
646             ###LogSD "Key -$key- requires and instance built from:", $args{$key} ] );
647             $args{$key} = build_instance( $args{$key} );
648             }
649             }
650            
651             ###LogSD $phone->talk( level => 'trace', message =>[
652             ###LogSD "Final BUILDARGS:", %args ] );
653             return $class->$orig(%args);
654             };
655              
656             sub _build_workbook{
657              
658             my ( $self, $file ) = @_;
659             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
660             ###LogSD $self->get_all_space . '::hidden::_build_file', );
661             ###LogSD $phone->talk( level => 'info', message =>[
662             ###LogSD 'Arrived at _build_file for: ', $file ] );
663             $self->_clear_shared_strings;
664             $self->_clear_calc_chain;
665             $self->_clear_styles;
666             $self->_clear_worksheet_list;
667             $self->_clear_sheet_lookup;
668             $self->_clear_creator;
669             $self->_clear_modified_by;
670             $self->_clear_date_created;
671             $self->_clear_date_modified;
672             $self->clear_error;
673             $self->start_at_the_beginning;
674             $self->_clear_file_type;
675            
676             # Ensure we have a file handle
677             my $file_handle;
678             eval '$file_handle = IOFileType->assert_coerce( $file )';
679             ###LogSD $phone->talk( level => 'trace', message =>[
680             ###LogSD "Passed the file type coercion" ] );
681             if( $@ ){
682             ###LogSD $phone->talk( level => 'warn', message =>[
683             ###LogSD "Unable to create a valid file instance with: $file" ] );
684             $self->_clear_file_handle;
685             $self->_clear_file_name;
686             $self->set_error( "Unable to create a valid file instance with: $file" );
687             return undef;
688             }
689             ###LogSD $phone->talk( level => 'trace', message =>[
690             ###LogSD "Current file handle: $file_handle" ] );
691            
692             # Read the XLSX zip file and catch any errors (other zip file sanity tests go here)
693             my $workbook_file = Archive::Zip->new();
694             if( $workbook_file->readFromFileHandle($file_handle) != AZ_OK ){
695             ###LogSD $phone->talk( level => 'warn', message =>[
696             ###LogSD "Failed to open a zip file" ] );
697             $self->_clear_file_handle;
698             $self->_clear_file_name;
699             $self->_set_file_type( 'xml' );# Build from this when adding all-in-one single file Excel Workbooks!!!!
700             confess "|$file| won't open as a zip file";
701             }else{
702             ###LogSD $phone->talk( level => 'debug', message =>[
703             ###LogSD "Certified this as a zip file" ] );
704             $self->_set_file_type( 'zip' );
705             }
706             ###LogSD $phone->talk( level => 'debug', message =>[
707             ###LogSD 'Zip file test passed with: ' . $self->_get_file_type ] );
708            
709             # Extract the workbook top level info
710             my %answer = $self->_build_dom( $build_ref->{top_level_workbook}, $workbook_file );
711             ###LogSD $phone->talk( level => 'debug', message =>[ "DOM built for method: _load_top_level_workbook" ] );
712             my ( $rel_lookup, $id_lookup ) = $self->_load_top_level_workbook( $answer{dom} );
713             return undef if !$rel_lookup;
714             ###LogSD $phone->talk( level => 'debug', message =>[ 'Rel lookup:', $rel_lookup, 'ID lookukp:', $id_lookup ] );
715            
716             # Load the workbook rels info
717             %answer = $self->_build_dom( $build_ref->{workbook_rels}, $workbook_file );
718             ###LogSD $phone->talk( level => 'debug', message =>[ "DOM built for method: _load_workbook_rels" ] );
719             my ( $load_success, $pivot_lookup ) = $self->_load_workbook_rels( $rel_lookup, $answer{dom} );
720             return undef if !$load_success;
721             ###LogSD $phone->talk( level => 'debug', message =>[ 'pivot lookup:', $pivot_lookup, ] );
722            
723             # Load the docProps info
724             %answer = $self->_build_dom( $build_ref->{doc_props}, $workbook_file );
725             ###LogSD $phone->talk( level => 'debug', message =>[ "DOM built for method: _load_doc_props" ] );
726             $self->_load_doc_props( $answer{dom} );
727             ###LogSD $phone->talk( level => 'debug', message =>[ 'docProps loaded', ] );
728            
729             # Build the instances for all the shared files (data for sheets shared across worksheets)
730             if( exists $parser_modules->{ $self->get_parser_type } ){
731             ###LogSD $phone->talk( level => 'debug', message =>[ 'loading shared worksheets for: ' . $self->get_parser_type, ] );
732             my $result = $self->_set_shared_worksheet_files(
733             $parser_modules->{ $self->get_parser_type },
734             $workbook_file,
735             );
736             return undef if !$result;
737             $self->_set_zip_file_handle( $workbook_file );
738             }else{
739             confess 'This package still under development - parser type |' . $self->get_parser_type . '| not yet supported - try the "reader" parser';
740             return undef;
741             }
742             return $self;
743             }
744              
745             sub _build_dom{
746             my( $self, $target, $workbook_file ) = @_;
747             my $build_target = clone( $target );
748             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
749             ###LogSD $self->get_all_space . '::hidden::_build_dom', );
750             ###LogSD $phone->talk( level => 'debug', message => [
751             ###LogSD 'Building DOM object for the target:', $build_target,
752             ###LogSD "With file:", $workbook_file ] );
753             my ( $dom, $encoding );
754             if( $self->_get_file_type eq 'zip' ){
755             ###LogSD $phone->talk( level => 'debug', message => [
756             ###LogSD "Working on a zip file targeting: $build_target->{zip}",] );
757             my $zip_workbook = $workbook_file->memberNamed( $build_target->{zip} );
758             delete $build_target->{zip};
759             ###LogSD $phone->talk( level => 'debug', message =>[ 'zip member: ' . $zip_workbook ] );
760             my $workbook_fh = IO::File->new_tmpfile;
761             $workbook_fh->binmode();
762             $zip_workbook->extractToFileHandle( $workbook_fh );
763             $workbook_fh->seek( 0, 0 );
764             $dom = $xml_parser->load_xml( { IO => $workbook_fh } );
765             }else{
766             confess "I don't know how to handle file type: " . $self->_get_file_type;
767             }
768             my %return_args = ( dom => $dom, %$build_target );
769             ###LogSD $phone->talk( level => 'debug', message =>[ "Returning: ", %return_args ] );
770             return %return_args;
771             }
772              
773             sub _build_reader{
774             my( $self, $target, $workbook_file ) = @_;
775             my $build_target = clone( $target );
776             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
777             ###LogSD $self->get_all_space . '::hidden::_build_reader', );
778             ###LogSD $phone->talk( level => 'debug', message => [
779             ###LogSD 'Building Reader object for the target:', $build_target,
780             ###LogSD "With file:", $workbook_file ] );
781             my ( $workbook_fh, $xml_reader, $encoding );
782             if( $self->_get_file_type eq 'zip' ){
783             ###LogSD $phone->talk( level => 'debug', message => [
784             ###LogSD "Working on a zip file targeting: $build_target->{zip}",] );
785             my $zip_member = $workbook_file->memberNamed( $build_target->{zip} );
786             ###LogSD $phone->talk( level => 'debug', message =>[ 'zip member:', $zip_member ] );
787             if( $zip_member ){
788             $workbook_fh = IO::File->new_tmpfile;
789             $workbook_fh->binmode();
790             $zip_member->extractToFileHandle( $workbook_fh );
791             $workbook_fh->seek( 0, 0 );
792             }else{
793             ###LogSD $phone->talk( level => 'debug', message =>[ "no zip file for: $build_target->{zip}" ] );
794             return undef;
795             }
796             delete $build_target->{zip};
797             }else{
798             confess "I don't know how to handle file type: " . $self->_get_file_type;
799             }
800             my %return_args = ( file => $workbook_fh, %$build_target );# xml_reader => $xml_reader,
801             ###LogSD $phone->talk( level => 'debug', message =>[ "Returning: ", %return_args ] );
802             return %return_args;
803             }
804              
805             sub _load_top_level_workbook{
806             my( $self, $dom ) = @_;
807             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
808             ###LogSD $self->get_all_space . '::hidden::_load_workbook_file', );
809             ###LogSD $phone->talk( level => 'info', message => [
810             ###LogSD "Building the top level data for the workbook", ] );
811             my ( $list, $sheet_ref, $rel_lookup, $id_lookup );
812             my $position = 0;
813             my ( $setting_node ) = $dom->getElementsByTagName( 'workbookPr' );
814             if( $setting_node and $setting_node->getAttribute( 'date1904' ) ){
815             $self->_set_epoch_year( 1904 );
816             }
817             for my $sheet ( $dom->getElementsByTagName( 'sheet' ) ){
818             my $sheet_name = $sheet->getAttribute( 'name' );
819             push @$list, $sheet_name;
820             @{$sheet_ref->{$sheet_name}}{ 'sheet_id', 'sheet_rel_id', 'sheet_position', 'is_hidden' } = (
821             $sheet->getAttribute( 'sheetId' ),
822             $sheet->getAttribute( 'r:id' ),
823             $position++,
824             ($sheet->getAttribute( 'state' ) ? 1 : 0),
825             );
826             $rel_lookup->{$sheet->getAttribute( 'r:id' )} = $sheet_name;
827             $id_lookup->{$sheet->getAttribute( 'sheetId' )} = $sheet_name;
828             }
829             for my $sheet ( $dom->getElementsByTagName( 'pivotCache' ) ){
830             my $sheet_id = $sheet->getAttribute( 'cacheId' );
831             my $rel_id = $sheet->getAttribute( 'r:id' );
832             ###LogSD $phone->talk( level => 'debug', message =>[
833             ###LogSD "Sheet ID: $sheet_id", "Rel ID: $rel_id", ] );
834             $rel_lookup->{$rel_id} = $sheet_id;
835             $id_lookup->{$sheet_id} = $rel_id;
836             }
837             ###LogSD $phone->talk( level => 'debug', message => [
838             ###LogSD "Sheet list: ", $list,
839             ###LogSD "Worksheet lookup:", $sheet_ref,
840             ###LogSD "rel lookup:", $rel_lookup,
841             ###LogSD "id lookup:", $id_lookup, ] );
842             $dom = undef;
843             if( !$list ){
844             $self->set_error( "No worksheets identified in this workbook" );
845             return undef;
846             }
847             $self->_set_sheet_list( $list );
848             $self->_set_sheet_lookup( $sheet_ref );
849             return( $rel_lookup, $id_lookup );
850             }
851              
852             sub _load_workbook_rels{
853             my( $self, $rel_lookup, $dom ) = @_;
854             my ( $pivot_lookup, );
855             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
856             ###LogSD $self->get_all_space . '::hidden::_load_workbook_rels', );
857             ###LogSD $phone->talk( level => 'debug', message => [
858             ###LogSD "Adding the rels file data for the workbook with:", $rel_lookup ] );
859             my $sheet_ref = $self->_get_sheet_lookup;
860             ###LogSD $phone->talk( level => 'debug', message => [
861             ###LogSD "Working on sheet ref:", $sheet_ref, '..and rel lookup:', $rel_lookup ] );
862             my $found_member_names = 0;
863             my ( $worksheet_list, $chartsheet_list );
864             for my $sheet ( $dom->getElementsByTagName( 'Relationship' ) ){
865             my $rel_ID = $sheet->getAttribute( 'Id' );
866             ###LogSD $phone->talk( level => 'debug', message => [
867             ###LogSD "Processing relID:", $rel_ID, ] );
868             if( exists $rel_lookup->{$rel_ID} ){
869             my $target = 'xl/';
870             $target .= $sheet->getAttribute( 'Target' );
871             ###LogSD $phone->talk( level => 'debug', message => [
872             ###LogSD "Building relationship for: $rel_ID", "With target: $target" ] );
873             $target =~ s/\\/\//g;
874             if( $target =~ /worksheets(\\|\/)/ ){
875             $sheet_ref->{$rel_lookup->{$rel_ID}}->{zip} = $target;
876             $sheet_ref->{$rel_lookup->{$rel_ID}}->{sheet_type} = 'worksheet';
877             $worksheet_list->[$sheet_ref->{$rel_lookup->{$rel_ID}}->{sheet_position}] = $rel_lookup->{$rel_ID};
878             $found_member_names = 1;
879             }elsif( $target =~ /chartsheets(\\|\/)/ ){
880             $sheet_ref->{$rel_lookup->{$rel_ID}}->{zip} = $target;
881             $sheet_ref->{$rel_lookup->{$rel_ID}}->{sheet_type} = 'chartsheet';
882             $chartsheet_list->[$sheet_ref->{$rel_lookup->{$rel_ID}}->{sheet_position}] = $rel_lookup->{$rel_ID};
883             $found_member_names = 1;
884             }else{
885             $pivot_lookup->{$rel_ID} = $target;
886             }
887             }
888             }
889             ###LogSD $phone->talk( level => 'debug', message => [
890             ###LogSD "Worksheet lookup:", $sheet_ref,
891             ###LogSD "Pivot lookup:", $pivot_lookup ] );
892             if( !$found_member_names ){
893             $self->set_error( "Couldn't find any zip member (file) names for the sheets" );
894             return ( 0, undef );
895             }
896             map{ $self->_add_worksheet( $_ ) if $_ } @$worksheet_list if $worksheet_list;
897             map{ $self->_add_chartsheet( $_ ) if $_ } @$chartsheet_list if $chartsheet_list;
898             $self->_set_sheet_lookup( $sheet_ref );
899             return ( 1, $pivot_lookup );
900             }
901              
902             sub _load_doc_props{
903             my( $self, $dom ) = @_;
904             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
905             ###LogSD $self->get_all_space . '::hidden::_load_doc_props', );
906             ###LogSD $phone->talk( level => 'debug', message => [
907             ###LogSD "Collecting data from the doc props file", ] );
908             $self->_set_creator( ($dom->getElementsByTagName( 'dc:creator' ))[0]->textContent() );
909             $self->_set_modified_by( ($dom->getElementsByTagName( 'cp:lastModifiedBy' ))[0]->textContent() );
910             $self->_set_date_created(
911             ($dom->getElementsByTagName( 'dcterms:created' ))[0]->textContent()
912             );
913             $self->_set_date_modified(
914             ($dom->getElementsByTagName( 'dcterms:modified' ))[0]->textContent()
915             );
916             ###LogSD $phone->talk( level => 'trace', message => [ "Current object:", $self ] );
917             }
918              
919             sub _set_shared_worksheet_files{
920             my( $self, $object_ref, $zip_workbook ) = @_;
921             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
922             ###LogSD $self->get_all_space . '::hidden::_set_shared_worksheet_files', );
923             ###LogSD $phone->talk( level => 'debug', message => [
924             ###LogSD "Building the shared worksheet files with the lookup ref:", $object_ref, ] );
925             my $translation_method = $object_ref->{build_method};
926             for my $file ( keys %$object_ref ){
927            
928             # Build out the shared files (shared by all worksheets) only!
929             next if $file eq 'build_method' or $file eq 'worksheet' or $file eq 'chartsheet';
930            
931             ###LogSD $phone->talk( level => 'debug', message => [
932             ###LogSD "Attempting to load the file: ${file}\.xml",
933             ###LogSD "With translation method: $translation_method" ], );
934             my %args;
935             my @list = $self->$translation_method( $build_ref->{$file}, $zip_workbook );
936             ###LogSD $phone->talk( level => 'debug', message => [ $translation_method . " returned args:", @list ], );
937             if( scalar( @list ) < 2 ){
938             if( !$args{file} and !$args{dom} ){
939             $self->set_error( "Unable to load XML::LibXML with the element: $file" );
940             }
941             }else{
942             %args = @list;
943             ###LogSD $phone->talk( level => 'debug', message =>[ "Built an xml_object", ], );
944             $args{package} = $object_ref->{$file}->{package} if exists $object_ref->{$file}->{package};
945             ###LogSD $args{log_space} = $self->get_log_space;
946             $args{superclasses} = $object_ref->{$file}->{superclasses} if exists $object_ref->{$file}->{superclasses};
947             for my $attribute ( @{$object_ref->{$file}->{attributes}} ){
948             ###LogSD $phone->talk( level => 'debug', message => [
949             ###LogSD "Loading attribute: $attribute", ], );
950             my $method = 'get_' . $attribute;
951             $args{$attribute} = $self->$method;
952             }
953             my $role_ref;
954             for my $role ( @{$object_ref->{$file}->{add_roles_in_sequence}} ){
955             ###LogSD $phone->talk( level => 'debug', message => [
956             ###LogSD "collecting the role for: $role", ], );
957             my $method = 'get_' . $role;
958             push @$role_ref, $self->$method;
959             }
960             $args{add_roles_in_sequence} = $role_ref if $role_ref;
961             ###LogSD $args{log_space} = $self->get_log_space;
962             my $method = $object_ref->{$file}->{store};
963             ###LogSD $phone->talk( level => 'debug', message =>[
964             ###LogSD "Final args for building the instance:", %args,
965             ###LogSD "Loading -$method- with build_instance( 'args' )" ], );
966             my $object = build_instance( %args );################################ Add a $object build-fail flag
967             ###LogSD $phone->talk( level => 'debug', message =>[
968             ###LogSD "Finished building instance for: $file",
969             ###LogSD "Loading to the worbook with method: $method", # $object
970             ###LogSD ], );
971             $self->$method( $object );
972             ###LogSD $phone->talk( level => 'debug', message =>[
973             ###LogSD "Finished building and installing: $file", ], );
974             }
975             }
976             ###LogSD $phone->talk( level => 'debug', message => [
977             ###LogSD "All shared files that can be built are built!" ], );
978             return 1;
979             }
980              
981             sub _import_format_settings{
982              
983             my ( $self, $formatter ) = @_;
984             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
985             ###LogSD $self->get_all_space . '::hidden::_import_format_settings', );
986             if( !$formatter->block_inherit ){
987             ###LogSD $phone->talk( level => 'info', message =>[
988             ###LogSD 'Arrived at _import_format_settings for: ', $formatter ] );
989             if( $self->has_error_inst ){
990             ###LogSD $phone->talk( level => 'info', message =>[
991             ###LogSD 'Setting the global error instance to the formatter' ] );
992             $formatter->set_error_inst( $self->get_error_inst );
993             }
994             my $year = $self->get_epoch_year;
995             ###LogSD $phone->talk( level => 'info', message =>[
996             ###LogSD "Setting the epoch year for the formatter: $year" ] );
997             $formatter->set_epoch_year( $year );
998             my $cache = $self->get_cache_positions;
999             ###LogSD $phone->talk( level => 'info', message =>[
1000             ###LogSD "Setting the cache to: $cache" ] );
1001             $formatter->set_cache_behavior( $cache );
1002             }
1003             }
1004              
1005             sub DEMOLISH{
1006             my ( $self ) = @_;
1007             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
1008             ###LogSD $self->get_all_space . '::hidden::DEMOLISH', );
1009             if( $self->_has_calc_chain_file ){
1010             #~ print "closing calcChain.xml\n";
1011             ###LogSD $phone->talk( level => 'debug', message => [
1012             ###LogSD "Clearing the calcChain.xml file" ] );
1013             $self->_demolish_calc_chain;
1014             }
1015             if( $self->_has_shared_strings_file ){
1016             my $instance = $self->_get_shared_strings_instance;
1017             #~ print "closing sharedStrings.xml\n";# . Dumper( $instance )
1018             ###LogSD $phone->talk( level => 'debug', message => [
1019             ###LogSD "Clearing the sharedStrings.xml file" ] );
1020             if( $instance ){
1021             $self->_demolish_shared_strings;
1022             }else{
1023             $self->_clear_shared_strings;
1024             $instance = undef;
1025             }
1026             }
1027            
1028             if( $self->_has_styles_file ){
1029             my $instance = $self->_get_styles_instance;
1030             #~ print "closing styles.xml\n";# . Dumper( $instance )
1031             ###LogSD $phone->talk( level => 'debug', message => [
1032             ###LogSD "Clearing the styles.xml file" ] );
1033             if( $instance ){
1034             $self->_demolish_styles;
1035             }else{
1036             $self->_clear_shared_strings;
1037             $instance = undef;
1038             }
1039             }
1040            
1041             if( $self->_has_zip_file_handle ){
1042             #~ print "closing zip file handle\n";
1043             ###LogSD $phone->talk( level => 'debug', message => [
1044             ###LogSD "Clearing the Zip file handle" ] );
1045             $self->_clear_zip_file_handle;
1046             }
1047            
1048             if( $self->has_file_handle ){
1049             #~ print "closing general file handle\n";
1050             ###LogSD $phone->talk( level => 'debug', message => [
1051             ###LogSD "Clearing the top level file handle" ] );
1052             $self->_clear_file_handle;
1053             }
1054             }
1055              
1056             #########1 Phinish 3#########4#########5#########6#########7#########8#########9
1057              
1058             no Moose;
1059             __PACKAGE__->meta->make_immutable;
1060            
1061             1;
1062              
1063             #########1 Documentation 3#########4#########5#########6#########7#########8#########9
1064             __END__
1065              
1066             =head1 NAME
1067              
1068             Spreadsheet::XLSX::Reader::LibXML - Read xlsx spreadsheet files with LibXML
1069              
1070             =begin html
1071              
1072             <a href="https://www.perl.org">
1073             <img src="https://img.shields.io/badge/perl-5.10+-brightgreen.svg" alt="perl version">
1074             </a>
1075              
1076             <a href="https://travis-ci.org/jandrew/Spreadsheet-XLSX-Reader-LibXML">
1077             <img alt="Build Status" src="https://travis-ci.org/jandrew/Spreadsheet-XLSX-Reader-LibXML.png?branch=master" alt='Travis Build'/>
1078             </a>
1079              
1080             <a href='https://coveralls.io/r/jandrew/Spreadsheet-XLSX-Reader-LibXML?branch=master'>
1081             <img src='https://coveralls.io/repos/jandrew/Spreadsheet-XLSX-Reader-LibXML/badge.svg?branch=master' alt='Coverage Status' />
1082             </a>
1083              
1084             <a>
1085             <img src="https://img.shields.io/badge/this version-0.38.18-brightgreen.svg" alt="this version">
1086             </a>
1087              
1088             <a href="https://metacpan.org/pod/Spreadsheet::XLSX::Reader::LibXML">
1089             <img src="https://badge.fury.io/pl/Spreadsheet-XLSX-Reader-LibXML.svg?label=cpan version" alt="CPAN version" height="20">
1090             </a>
1091              
1092             <a href='http://cpants.cpanauthors.org/dist/Spreadsheet-XLSX-Reader-LibXML'>
1093             <img src='http://cpants.cpanauthors.org/dist/Spreadsheet-XLSX-Reader-LibXML.png' alt='kwalitee' height="20"/>
1094             </a>
1095              
1096             =end html
1097              
1098             =head1 SYNOPSIS
1099              
1100             The following uses the 'TestBook.xlsx' file found in the t/test_files/ folder
1101              
1102             #!/usr/bin/env perl
1103             use strict;
1104             use warnings;
1105             use Spreadsheet::XLSX::Reader::LibXML;
1106              
1107             my $parser = Spreadsheet::XLSX::Reader::LibXML->new();
1108             my $workbook = $parser->parse( 'TestBook.xlsx' );
1109              
1110             if ( !defined $workbook ) {
1111             die $parser->error(), "\n";
1112             }
1113              
1114             for my $worksheet ( $workbook->worksheets() ) {
1115              
1116             my ( $row_min, $row_max ) = $worksheet->row_range();
1117             my ( $col_min, $col_max ) = $worksheet->col_range();
1118              
1119             for my $row ( $row_min .. $row_max ) {
1120             for my $col ( $col_min .. $col_max ) {
1121              
1122             my $cell = $worksheet->get_cell( $row, $col );
1123             next unless $cell;
1124              
1125             print "Row, Col = ($row, $col)\n";
1126             print "Value = ", $cell->value(), "\n";
1127             print "Unformatted = ", $cell->unformatted(), "\n";
1128             print "\n";
1129             }
1130             }
1131             last;# In order not to read all sheets
1132             }
1133              
1134             ###########################
1135             # SYNOPSIS Screen Output
1136             # 01: Row, Col = (0, 0)
1137             # 02: Value = Category
1138             # 03: Unformatted = Category
1139             # 04:
1140             # 05: Row, Col = (0, 1)
1141             # 06: Value = Total
1142             # 07: Unformatted = Total
1143             # 08:
1144             # 09: Row, Col = (0, 2)
1145             # 10: Value = Date
1146             # 11: Unformatted = Date
1147             # 12:
1148             # 13: Row, Col = (1, 0)
1149             # 14: Value = Red
1150             # 16: Unformatted = Red
1151             # 17:
1152             # 18: Row, Col = (1, 1)
1153             # 19: Value = 5
1154             # 20: Unformatted = 5
1155             # 21:
1156             # 22: Row, Col = (1, 2)
1157             # 23: Value = 2017-2-14 #(shows as 2/14/2017 in the sheet)
1158             # 24: Unformatted = 41318
1159             # 25:
1160             # More intermediate rows ...
1161             # 82:
1162             # 83: Row, Col = (6, 2)
1163             # 84: Value = 2016-2-6 #(shows as 2/6/2016 in the sheet)
1164             # 85: Unformatted = 40944
1165             ###########################
1166              
1167             =head1 DESCRIPTION
1168              
1169             This is another package for parsing Excel 2007+ workbooks. The goals of this package are
1170             three fold. First, as close as possible produce the same output as is visible in an
1171             excel spreadsheet with exposure to underlying settings from Excel. Second, adhere as
1172             close as is reasonable to the L<Spreadsheet::ParseExcel> API (where it doesn't conflict
1173             with the first objective) so that less work would be needed to integrate ParseExcel and
1174             this package. An addendum to the second goal is this package will not expose elements of
1175             the object hash for use by the consuming program. This package will either return an
1176             unblessed hash with the equivalent elements to the Spreadsheet::ParseExcel output instead
1177             of a class instance or it will provide methods to provide these sets of data. The third
1178             goal is to provide an XLSX sheet parser that is built on L<XML::LibXML>. The other two
1179             primary options for XLSX parsing on CPAN use either a one-off XML parser (L<Spreadsheet::XLSX>)
1180             or L<XML::Twig> (L<Spreadsheet::ParseXLSX>). In general if either of them already work for
1181             you without issue then there is no reason to change to this package. I personally found
1182             some bugs and functionality boundaries in both that I wanted to improve, and by the time
1183             I had educated myself enough to make improvement suggestions including root causing the
1184             bugs to either the XML parser or the reader logic I had written this.
1185              
1186             In the process of learning and building I also wrote some additional features for
1187             this parser that are not found in the L<Spreadsheet::ParseExcel> package. For instance
1188             in the L<SYNOPSIS|/SYNOPSIS> the '$parser' and the '$workbook' are actually the same
1189             class. You could combine both steps by calling ->new with the 'file_name' (or
1190             'file_handle') attribute called out. Afterward it is still possible to call ->error on
1191             the instance. The test in that case for load success would be
1192             $instance->has_file_name(handle) Another improvement (from my perspective) is date
1193             handling. This package allows for a simple pluggable custom output format that is very
1194             flexible as well as handling dates older than 1-January-1900. I leveraged coercions from
1195             L<Type::Tiny|Type::Tiny::Manual> to do this but anything that follows that general format
1196             will work here. Additionally, this is a L<Moose> based package. As such it is designed
1197             to be (fairly) extensible by writing roles
1198             and adding them to this package rather than requiring that you extend the package to some
1199             new branch. Read the full documentation for all opportunities!
1200              
1201             In the realm of extensibility, L<XML::LibXML> has multiple ways to read an XML file but
1202             this release only has an L<XML::LibXML::Reader> parser option. Future iterations could
1203             include a DOM parser option. Additionally this package does not (yet) provide the same
1204             access to the formatting elements provided in L<Spreadsheet::ParseExcel>. That is on the
1205             longish and incomplete TODO list.
1206              
1207             The package operates on the workbook with three primary tiers of classes. All other
1208             classes in this package are for architectual extensibility.
1209              
1210             =over
1211              
1212             ---> L<Workbook level|Spreadsheet::XLSX::Reader::LibXML>
1213              
1214             =over
1215              
1216             ---> L<Worksheet level|Spreadsheet::XLSX::Reader::LibXML::Worksheet>*
1217              
1218             =over
1219              
1220             ---> L<Cell level|Spreadsheet::XLSX::Reader::LibXML::Cell>* -
1221             L<optional|/group_return_type>
1222              
1223             =back
1224              
1225             =back
1226              
1227             =back
1228              
1229             =head2 Warnings
1230              
1231             B<1.> Archive-Zip versions greater than 1.30 appear to be broken. This package requires
1232             Archive::Zip so I reccomend Archive-Zip-1.30.
1233              
1234             B<2.> This package requires that you can load L<XML::LibXML> which requires the L<libxml2
1235             |http://xmlsoft.org/> and 'libxml2-devel' libraries. I have included L<Alien::LibXML> in
1236             the build profile in an attempt to resolve any library issues but being new to usage of
1237             Alien libraries in general I'm not certain I got it quite right. Many OS's have these
1238             libraries installed as part of their core but if this package fails to load please log an
1239             issue in my repo on L<github|/SUPPORT>. On the other hand the correct libraries are
1240             loading on travis-ci during the builds so if no issue is logged before then I will B<remove
1241             this warning on 2/1/2016.>
1242              
1243             B<3.> Not all workbook sheets (tabs) are created equal! Some Excel sheet tabs are only a
1244             chart. These tabs are 'chartsheets'. The methods with 'worksheet' in the name only act on
1245             the sub set of tabs that are worksheets. Future methods with 'chartsheet' in the name will
1246             focus on the subset of sheets that are chartsheets. Methods with just 'sheet' in the name
1247             have the potential to act on both. The documentation for the chartsheet level class is found
1248             in L<Spreadsheet::XLSX::Reader::LibXML::Chartsheet> (still under construction). All chartsheet
1249             classes do not provide access to cells.
1250              
1251             B<4.> L<HMBRAND|https://metacpan.org/author/HMBRAND> pointed out that the formatter portion of
1252             this package for versions older than v0.38 do not follow the L<Spreadsheet::ParseExcel API
1253             |Spreadsheet::ParseExcel/Formatter-Class> for the formatter class. (I always welcome feeback)
1254             I suppose the original implementation was, in part, laziness. In an effort to comply with goal
1255             #2 of this package I have updated the API so that in versions starting with v0.38 the formatter
1256             is a stand-alone class. For details of the implemenation see
1257             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/CLASS DESCRIPTION> This more closely follows
1258             Spreadsheet::ParseExcel, and incidentally probably makes building alternate formatting modules
1259             easier. I<The formatters will still not exchange back and forth between
1260             L<Spreadsheet::ParseExcel::FmtDefault> and back since they are both built to interface with
1261             fundamentally different architecture.> This change also affects how the role
1262             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings> is consumed. If
1263             you wrote your own formatter for this package for the old way I would be willing
1264             to provide troubleshooting support for the transition to the the new API. However if you are
1265             setting specific formats today using set_defined_excel_format_list you should be able to switch to
1266             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/set_defined_excel_formats( %args )> or use the
1267             attribute L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/defined_excel_translations>. B<This warning
1268             will be removed on 2/1/2016.>
1269              
1270             B<5.> This package now supports reading xlsm files (Macro enabled Excel 2007+ workbooks).
1271             xlsm files allow for binaries to be embedded that may contain malicious code. However, other
1272             than unzipping the excel file no work is done by this package with the sub-file 'vbaProject.bin'
1273             containing the binaries. This update does not provide an API to that sub-file and I have no
1274             intention of doing so. Therefore my research indicates there should be no risk of virus activation
1275             while parsing even an infected xlsm file with this package but I encourage you to use your own
1276             judgement in this area.
1277              
1278             =head2 Attributes
1279              
1280             Data passed to new when creating an instance. For modification of these attributes see the
1281             listed 'attribute methods'. For general information on attributes see
1282             L<Moose::Manual::Attributes>. For ways to manage the workbook when opened see the
1283             L<Primary Methods|/Primary Methods>. For additional lesser used workbook options
1284             see L<Secondary Methods|/Secondary Methods>.
1285              
1286             B<Example>
1287              
1288             $workbook_instance = Spreadsheet::XLSX::Reader::LibXML->new( %attributes )
1289              
1290             I<note: if a file name or file handle for an .xlsx file are not included in the initial
1291             %attributes then one of them must be set by one of the attribute setter methods below
1292             before the rest of the package can be used.>
1293              
1294             =head3 file_name
1295              
1296             =over
1297              
1298             B<Definition:> This attribute holds the full file name and path for the xlsx|xlsm file to be
1299             parsed.
1300              
1301             B<Default> no default - either this or a L<file handle|/file_handle> must be provided to
1302             read a file
1303              
1304             B<Range> any unencrypted xlsx|xlsm file that can be opened in Microsoft Excel.
1305              
1306             B<attribute methods> Methods provided to adjust this attribute
1307            
1308             =over
1309              
1310             B<set_file_name>
1311              
1312             =over
1313              
1314             B<Definition:> change the file name value in the attribute (this will reboot
1315             the workbook instance)
1316              
1317             =back
1318              
1319             B<has_file_name>
1320              
1321             =over
1322              
1323             B<Definition:> this is used to see if the workbook loaded correctly using the
1324             file_name option to open an Excel .xlsx file.
1325              
1326             =back
1327              
1328             =back
1329              
1330             =back
1331              
1332             =head3 file_handle
1333              
1334             =over
1335              
1336             B<Definition:> This attribute holds a copy of the passed file handle reference.
1337              
1338             B<Default> no default - either this or a L<file name|/file_name> must be provided to read
1339             a file
1340              
1341             B<Range> any unencrypted xlsx file handle that can be opened in Microsoft Excel
1342              
1343             B<attribute methods> Methods provided to adjust this attribute
1344            
1345             =over
1346              
1347             B<set_file_handle>
1348              
1349             =over
1350              
1351             B<Definition:> change the set file handle (this will reboot the workbook instance)
1352              
1353             =back
1354              
1355             B<has_file_handle>
1356              
1357             =over
1358              
1359             B<Definition:> this is used to see if the workbook loaded correctly when using the
1360             file_handle option to open an Excel .xlsx file.
1361              
1362             =back
1363              
1364             =back
1365              
1366             =back
1367              
1368             =head3 error_inst
1369              
1370             =over
1371              
1372             B<Definition:> This attribute holds an 'error' object instance. It should have several
1373             methods for managing errors. Currently no error codes or error language translation
1374             options are available but this should make implementation of that easier.
1375              
1376             B<Default:> a L<Spreadsheet::XLSX::Reader::LibXML::Error> instance with the attributes set
1377             as;
1378            
1379             ( should_warn => 0 )
1380              
1381             B<Range:> The minimum list of methods to implement for your own instance is;
1382              
1383             error set_error clear_error set_warnings if_warn
1384            
1385             The error instance must be able to extract the error string from a passed error
1386             object as well. For now the current implementation will attempt ->as_string first
1387             and then ->message if an object is passed.
1388              
1389             B<attribute methods> Methods provided to adjust this attribute
1390              
1391             =over
1392              
1393             B<get_error_inst>
1394              
1395             =over
1396              
1397             B<Definition:> returns this instance
1398              
1399             =back
1400              
1401             B<error>
1402              
1403             =over
1404              
1405             B<Definition:> delegated method from the class used to get the most recently
1406             logged error string
1407              
1408             =back
1409              
1410             B<set_error>
1411              
1412             =over
1413              
1414             B<Definition:> delegated method from the class used to set a new error string
1415             (or pass an error object for extraction of the error string)
1416              
1417             =back
1418              
1419             B<clear_error>
1420              
1421             =over
1422              
1423             B<Definition:> delegated method from the class used to clear the current error
1424             string
1425              
1426             =back
1427              
1428             B<set_warnings>
1429              
1430             =over
1431              
1432             B<Definition:> delegated method from the class used to turn on or off real time
1433             warnings when errors are set
1434              
1435             =back
1436              
1437             B<if_warn>
1438              
1439             =over
1440              
1441             B<Definition:> delegated method from the class used to extend this package and
1442             see if warnings should be emitted.
1443              
1444             =back
1445            
1446             =back
1447              
1448             =back
1449              
1450             =head3 sheet_parser
1451              
1452             =over
1453              
1454             B<Definition:> This sets the way the .xlsx file is parsed. For now the only
1455             choice is 'reader'.
1456              
1457             B<Default> 'reader'
1458              
1459             B<Range> 'reader'
1460              
1461             B<attribute methods> Methods provided to adjust this attribute
1462            
1463             =over
1464              
1465             B<set_parser_type>
1466              
1467             =over
1468              
1469             B<Definition:> the way to change the parser type
1470              
1471             =back
1472              
1473             B<get_parser_type>
1474              
1475             =over
1476              
1477             B<Definition:> returns the currently set parser type
1478              
1479             =back
1480              
1481             =back
1482              
1483             =back
1484              
1485             =head3 count_from_zero
1486              
1487             =over
1488              
1489             B<Definition:> Excel spreadsheets count from 1. L<Spreadsheet::ParseExcel>
1490             counts from zero. This allows you to choose either way.
1491              
1492             B<Default> 1
1493              
1494             B<Range> 1 = counting from zero like Spreadsheet::ParseExcel,
1495             0 = Counting from 1 like Excel
1496              
1497             B<attribute methods> Methods provided to adjust this attribute
1498            
1499             =over
1500              
1501             B<counting_from_zero>
1502              
1503             =over
1504              
1505             B<Definition:> a way to check the current attribute setting
1506              
1507             =back
1508              
1509             B<set_count_from_zero>
1510              
1511             =over
1512              
1513             B<Definition:> a way to change the current attribute setting
1514              
1515             =back
1516              
1517             =back
1518              
1519             =back
1520              
1521             =head3 file_boundary_flags
1522              
1523             =over
1524              
1525             B<Definition:> When you request data to the right of the last column or below
1526             the last row of the data this package can return 'EOR' or 'EOF' to indicate that
1527             state. This is especially helpful in 'while' loops. The other option is to
1528             return 'undef'. This is problematic if some cells in your table are empty which
1529             also returns undef. What is determined to be the last column and row is determined
1530             by the attribute L<empty_is_end|/empty_is_end>.
1531              
1532             B<Default> 1
1533              
1534             B<Range> 1 = return 'EOR' or 'EOF' flags as appropriate, 0 = return undef when
1535             requesting a position that is out of bounds
1536              
1537             B<attribute methods> Methods provided to adjust this attribute
1538            
1539             =over
1540              
1541             B<boundary_flag_setting>
1542              
1543             =over
1544              
1545             B<Definition:> a way to check the current attribute setting
1546              
1547             =back
1548              
1549             B<change_boundary_flag>
1550              
1551             =over
1552              
1553             B<Definition:> a way to change the current attribute setting
1554              
1555             =back
1556              
1557             =back
1558              
1559             =back
1560              
1561             =head3 empty_is_end
1562              
1563             =over
1564              
1565             B<Definition:> The excel convention is to read the table left to right and top
1566             to bottom. Some tables have an uneven number of columns with real data from row
1567             to row. This allows the several methods that excersize a 'next' function to wrap
1568             after the last element with data rather than going to the max column. This also
1569             triggers 'EOR' flags after the last data element and before the sheet max column
1570             when not implementing 'next' functionality.
1571              
1572             B<Default> 0
1573              
1574             B<Range> 1 = treat all columns short of the max column for the sheet as being in
1575             the table, 0 = end each row after the last cell with data rather than going to the
1576             max sheet column
1577              
1578             B<attribute methods> Methods provided to adjust this attribute
1579            
1580             =over
1581              
1582             B<is_empty_the_end>
1583              
1584             =over
1585              
1586             B<Definition:> a way to check the current attribute setting
1587              
1588             =back
1589              
1590             B<set_empty_is_end>
1591              
1592             =over
1593              
1594             B<Definition:> a way to set the current attribute setting
1595              
1596             =back
1597              
1598             =back
1599              
1600             =back
1601              
1602             =head3 values_only
1603              
1604             =over
1605              
1606             B<Definition:> Excel will store information about a cell even if it only contains
1607             formatting data. In many cases you only want to see cells that actually have
1608             values. This attribute will change the package behaviour regarding cells that have
1609             formatting stored against that cell but no actual value.
1610              
1611             B<Default> 0
1612              
1613             B<Range> 1 = skip cells with formatting only and treat them as completely empty,
1614             0 = return informat about cells that only contain formatting
1615              
1616             B<attribute methods> Methods provided to adjust this attribute
1617            
1618             =over
1619              
1620             B<get_values_only>
1621              
1622             =over
1623              
1624             B<Definition:> a way to check the current attribute setting
1625              
1626             =back
1627              
1628             B<set_values_only>
1629              
1630             =over
1631              
1632             B<Definition:> a way to set the current attribute setting
1633              
1634             =back
1635              
1636             =back
1637              
1638             =back
1639              
1640             =head3 from_the_edge
1641              
1642             =over
1643              
1644             B<Definition:> Some data tables start in the top left corner. Others do not. I
1645             don't reccomend that practice but when aquiring data in the wild it is often good
1646             to adapt. This attribute sets whether the file reads from the top left edge or from
1647             the top row with data and starting from the leftmost column with data.
1648              
1649             B<Default> 1
1650              
1651             B<Range> 1 = treat the top left corner of the sheet as the beginning of rows and
1652             columns even if there is no data in the top row or leftmost column, 0 = Set the
1653             minimum row and minimum columns to be the first row and first column with data
1654              
1655             B<attribute methods> Methods provided to adjust this attribute
1656            
1657             =over
1658              
1659             B<set_from_the_edge>
1660              
1661             =over
1662              
1663             B<Definition:> a way to set the current attribute setting
1664              
1665             =back
1666              
1667             =back
1668              
1669             =back
1670              
1671             =head3 cache_positions
1672              
1673             =over
1674              
1675             B<Definition:> This parse can be slow. It does this by trading processing and
1676             file storage for RAM usage but that is probably not the average users choice. Not all
1677             things that can be cached are cached yet. However, when this attribute is set where
1678             the parser knows how to cache it will.
1679              
1680             B<Default> 1 = caching is turned on
1681              
1682             B<attribute methods> Methods provided to adjust this attribute
1683            
1684             =over
1685              
1686             B<get_cache_positions>
1687              
1688             =over
1689              
1690             B<Definition:> read the attribute
1691              
1692             =back
1693              
1694             =back
1695              
1696             =back
1697              
1698             =head3 format_inst
1699              
1700             =over
1701              
1702             B<Definition:> This is the attribute containing the format class. In general the
1703             default value is sufficient. However, If you want to tweak this a bit then review the
1704             L<class documentation|Spreadsheet::XLSX::Reader::LibXML::FmtDefault>. It does include
1705             a role that interprets the excel L<format string
1706             |https://support.office.com/en-us/article/Create-or-delete-a-custom-number-format-2d450d95-2630-43b8-bf06-ccee7cbe6864?ui=en-US&rs=en-US&ad=US>
1707             into a L<Type::Tiny> coercion.
1708              
1709             B<Default> L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault>->new
1710              
1711             B<attribute methods> Methods provided to adjust this attribute
1712            
1713             =over
1714              
1715             B<set_format_inst>
1716              
1717             =over
1718              
1719             B<Definition:> a way to set the current attribute instance
1720              
1721             =back
1722              
1723             B<get_format_inst>
1724              
1725             =over
1726              
1727             B<Definition:> a way to get the current attribute setting
1728              
1729             =back
1730              
1731             =back
1732              
1733             B<delegated methods:>
1734              
1735             =over
1736              
1737             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/get_defined_excel_format>
1738              
1739             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/change_output_encoding>
1740              
1741             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/parse_excel_format_string>
1742              
1743             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings//set_date_behavior>
1744              
1745             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings//get_date_behavior>
1746              
1747             =back
1748              
1749             =back
1750              
1751             =head3 group_return_type
1752              
1753             =over
1754              
1755             B<Definition:> Traditionally ParseExcel returns a cell object with lots of methods
1756             to reveal information about the cell. In reality the extra information is not used very
1757             much (witness the popularity of L<Spreadsheet::XLSX>). Because many users don't need or
1758             want the extra cell formatting information it is possible to get either the raw xml value,
1759             the raw visible cell value (seen in the Excel format bar), or the formatted cell value
1760             returned either the way the Excel file specified or the way you specify instead of a Cell
1761             instance with all the data. . See
1762             L<Spreadsheet::XLSX::Reader::LibXML::Worksheet/custom_formats> to insert custom targeted
1763             formats for use with the parser. All empty cells return undef no matter what.
1764              
1765             B<Default> instance
1766              
1767             B<Range> instance = returns a populated L<Spreadsheet::XLSX::Reader::LibXML::Cell> instance,
1768             unformatted = returns just the raw visible value of the cell shown in the Excel formula bar,
1769             value = returns just the formatted value stored in the excel cell, xml_value = the raw value
1770             for the cell as stored in the sub-xml files
1771              
1772             B<attribute methods> Methods provided to adjust this attribute
1773            
1774             =over
1775              
1776             B<get_group_return_type>
1777              
1778             =over
1779              
1780             B<Definition:> a way to check the current attribute setting
1781              
1782             =back
1783              
1784             B<set_group_return_type>
1785              
1786             =over
1787              
1788             B<Definition:> a way to set the current attribute setting
1789              
1790             =back
1791              
1792             =back
1793              
1794             =back
1795              
1796             =head3 empty_return_type
1797              
1798             =over
1799              
1800             B<Definition:> Traditionally L<Spreadsheet::ParseExcel> returns an empty string for cells
1801             with unique formatting but no stored value. It may be that the more accurate way of returning
1802             undef works better for you. This will turn that behaviour on. I<If Excel stores an empty
1803             string having this attribute set to 'undef_string' will still return the empty string!>
1804              
1805             B<Default> empty_string
1806              
1807             B<Range>
1808             empty_string = populates the unformatted value with '' even if it is set to undef
1809             undef_string = if excel stores undef for an unformatted value it will return undef
1810              
1811             B<attribute methods> Methods provided to adjust this attribute
1812            
1813             =over
1814              
1815             B<get_empty_return_type>
1816              
1817             =over
1818              
1819             B<Definition:> a way to check the current attribute setting
1820              
1821             =back
1822              
1823             B<set_empty_return_type>
1824              
1825             =over
1826              
1827             B<Definition:> a way to set the current attribute setting
1828              
1829             =back
1830              
1831             =back
1832              
1833             =back
1834              
1835             =head2 Primary Methods
1836              
1837             These are the primary ways to use this class. They can be used to open an .xlsx workbook.
1838             They are also ways to investigate information at the workbook level. For information on
1839             how to retrieve data from the worksheets see the
1840             L<Worksheet|Spreadsheet::XLSX::Reader::LibXML::Worksheet> and
1841             L<Cell|Spreadsheet::XLSX::Reader::LibXML::Cell> documentation. For additional workbook
1842             options see the L<Secondary Methods|/Secondary Methods>
1843             and the L<Attributes|/Attributes> sections. The attributes section specifically contains
1844             all the methods used to adjust the attributes of this class.
1845              
1846             All methods are object methods and should be implemented on the object instance.
1847              
1848             B<Example:>
1849              
1850             my @worksheet_array = $workbook_instance->worksheets;
1851              
1852             =head3 parse( $file_name|$file_handle, $formatter )
1853              
1854             =over
1855              
1856             B<Definition:> This is a convenience method to match L<Spreadsheet::ParseExcel/parse($filename, $formatter)>.
1857             It only works if the L<file_name|/file_name> or L<file_handle|/file_handle> attribute was not
1858             set with ->new. It is one way to set the 'file_name' or 'file_handle' attribute [and the
1859             L<default_format_list|/default_format_list> attribute]. I<You cannot pass both a file name
1860             and a file handle simultaneously to this method.>
1861              
1862             B<Accepts:>
1863              
1864             $file = a valid xlsx file [or a valid xlsx file handle] (required)
1865             [$formatter] = see the default_format_list attribute for valid options (optional)
1866              
1867             B<Returns:> itself when passing with the xlsx file loaded to the workbook level or
1868             undef for failure.
1869              
1870             =back
1871              
1872             =head3 worksheets
1873              
1874             =over
1875              
1876             B<Definition:> This method will return an array (I<not an array reference>)
1877             containing a list of references to all worksheets in the workbook. This is not
1878             a reccomended method. It is provided for compatibility to Spreadsheet::ParseExcel.
1879             For alternatives see the L<get_worksheet_names|/get_worksheet_names> method and the
1880             L<worksheet|/worksheet( $name )> methods. B<For now it also only returns the tabular
1881             worksheets in the workbook. All chart worksheets are ignored! (future inclusion will
1882             included a backwards compatibility policy)>
1883              
1884             B<Accepts:> nothing
1885              
1886             B<Returns:> an array ref of L<Worksheet|Spreadsheet::XLSX::Reader::LibXML::Worksheet>
1887             objects for all worksheets in the workbook.
1888              
1889             =back
1890              
1891             =head3 worksheet( $name )
1892              
1893             =over
1894              
1895             B<Definition:> This method will return an object to read values in the worksheet.
1896             If no value is passed to $name then the 'next' worksheet in physical order is
1897             returned. I<'next' will NOT wrap> It also only iterates through the 'worksheets'
1898             in the workbook (but not the 'chartsheets').
1899              
1900             B<Accepts:> the $name string representing the name of the worksheet object you
1901             want to open. This name is the word visible on the tab when opening the spreadsheet
1902             in Excel. (not the underlying zip member file name - which can be different. It will
1903             not accept chart tab names.)
1904              
1905             B<Returns:> a L<Worksheet|Spreadsheet::XLSX::Reader::LibXML::Worksheet> object with the
1906             ability to read the worksheet of that name. It returns undef and sets the error attribute
1907             if a 'chartsheet' is requested. Or in 'next' mode it returns undef if past the last sheet.
1908              
1909             B<Example:> using the implied 'next' worksheet;
1910              
1911             while( my $worksheet = $workbook->worksheet ){
1912             print "Reading: " . $worksheet->name . "\n";
1913             # get the data needed from this worksheet
1914             }
1915              
1916             =back
1917              
1918             =head3 in_the_list
1919              
1920             =over
1921              
1922             B<Definition:> This is a predicate method that indicates if the 'next'
1923             L<worksheet|/worksheet( $name )> function has been implemented at least once.
1924              
1925             B<Accepts:>nothing
1926              
1927             B<Returns:> true = 1, false = 0
1928             once
1929              
1930             =back
1931              
1932             =head3 start_at_the_beginning
1933              
1934             =over
1935              
1936             B<Definition:> This restarts the 'next' worksheet at the first worksheet. This
1937             method is only useful in the context of the L<worksheet|/worksheet( $name )>
1938             function.
1939              
1940             B<Accepts:> nothing
1941              
1942             B<Returns:> nothing
1943              
1944             =back
1945              
1946             =head3 worksheet_count
1947              
1948             =over
1949              
1950             B<Definition:> This method returns the count of worksheets (excluding charts) in
1951             the workbook.
1952              
1953             B<Accepts:>nothing
1954              
1955             B<Returns:> an integer
1956              
1957             =back
1958              
1959             =head3 get_worksheet_names
1960              
1961             =over
1962              
1963             B<Definition:> This method returns an array ref of all the worksheet names in the
1964             workbook. (It excludes chartsheets.)
1965              
1966             B<Accepts:> nothing
1967              
1968             B<Returns:> an array ref
1969              
1970             B<Example:> Another way to parse a workbook without building all the sheets at
1971             once is;
1972              
1973             for $sheet_name ( @{$workbook->worksheet_names} ){
1974             my $worksheet = $workbook->worksheet( $sheet_name );
1975             # Read the worksheet here
1976             }
1977              
1978             =back
1979              
1980             =head3 get_sheet_names
1981              
1982             =over
1983              
1984             B<Definition:> This method returns an array ref of all the sheet names (tabs) in the
1985             workbook. (It includes chartsheets.)
1986              
1987             B<Accepts:> nothing
1988              
1989             B<Returns:> an array ref
1990              
1991             =back
1992              
1993             =head3 get_chartheet_names
1994              
1995             =over
1996              
1997             B<Definition:> This method returns an array ref of all the chartsheet names in the
1998             workbook. (It excludes worksheets.)
1999              
2000             B<Accepts:> nothing
2001              
2002             B<Returns:> an array ref
2003              
2004             =back
2005              
2006             =head3 sheet_name( $Int )
2007              
2008             =over
2009              
2010             B<Definition:> This method returns the sheet name for a given physical position
2011             in the workbook from left to right. It counts from zero even if the workbook is in
2012             'count_from_one' mode. B(It will return chart names but chart tab names cannot currently
2013             be converted to worksheets). You may actually want L<worksheet_name|worksheet_name( $Int )>
2014             instead of this function.
2015              
2016             B<Accepts:> integers
2017              
2018             B<Returns:> the sheet name (both workbook and worksheet)
2019              
2020             B<Example:> To return only worksheet positions 2 through 4
2021              
2022             for $x (2..4){
2023             my $worksheet = $workbook->worksheet( $workbook->worksheet_name( $x ) );
2024             # Read the worksheet here
2025             }
2026              
2027             =back
2028              
2029             =head3 sheet_count
2030              
2031             =over
2032              
2033             B<Definition:> This method returns the count of all sheets in the workbook (worksheets
2034             and chartsheets).
2035              
2036             B<Accepts:> nothing
2037              
2038             B<Returns:> a count of all sheets
2039              
2040             =back
2041              
2042             =head3 worksheet_name( $Int )
2043              
2044             =over
2045              
2046             B<Definition:> This method returns the worksheet name for a given order in the workbook
2047             from left to right. It does not count any 'chartsheet' positions as valid. It counts
2048             from zero even if the workbook is in 'count_from_one' mode.
2049              
2050             B<Accepts:> integers
2051              
2052             B<Returns:> the worksheet name
2053              
2054             B<Example:> To return only worksheet positions 2 through 4 and then parse them
2055              
2056             for $x (2..4){
2057             my $worksheet = $workbook->worksheet( $workbook->worksheet_name( $x ) );
2058             # Read the worksheet here
2059             }
2060              
2061             =back
2062              
2063             =head3 worksheet_count
2064              
2065             =over
2066              
2067             B<Definition:> This method returns the count of all worksheets in the workbook (not
2068             including chartsheets).
2069              
2070             B<Accepts:> nothing
2071              
2072             B<Returns:> a count of all worksheets
2073              
2074             =back
2075              
2076             =head3 chartsheet_name( $Int )
2077              
2078             =over
2079              
2080             B<Definition:> This method returns the chartsheet name for a given order in the workbook
2081             from left to right. It does not count any 'worksheet' positions as valid. It counts
2082             from zero even if the workbook is in 'count_from_one' mode.
2083              
2084             B<Accepts:> integers
2085              
2086             B<Returns:> the chartsheet name
2087              
2088             =back
2089              
2090             =head3 chartsheet_count
2091              
2092             =over
2093              
2094             B<Definition:> This method returns the count of all chartsheets in the workbook (not
2095             including worksheets).
2096              
2097             B<Accepts:> nothing
2098              
2099             B<Returns:> a count of all chartsheets
2100              
2101             =back
2102              
2103             =head3 error
2104              
2105             =over
2106              
2107             B<Definition:> This returns the most recent error message logged by the package. This
2108             method is mostly relevant when an unexpected result is returned by some other method.
2109              
2110             B<Accepts:>nothing
2111              
2112             B<Returns:> an error string.
2113              
2114             =back
2115              
2116             =head2 Secondary Methods
2117              
2118             These are the additional methods that include ways to extract additional information about
2119             the .xlsx file and ways to modify workbook and worksheet parsing that are less common.
2120             Note that all methods specifically used to adjust workbook level attributes are listed in
2121             the L<Attribute|/Attribute> section. This section primarily contains methods for or
2122             L<delegated|Moose::Manual::Delegation> from private attributes set up during the workbook
2123             load process.
2124              
2125             =head3 parse_excel_format_string( $format_string )
2126              
2127             =over
2128              
2129             Roundabout delegation from
2130             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/parse_excel_format_string( $string )>
2131              
2132             =back
2133              
2134             =head3 creator
2135              
2136             =over
2137              
2138             B<Definition:> Retrieve the stored creator string from the Excel file.
2139              
2140             B<Accepts> nothing
2141              
2142             B<Returns> A string
2143              
2144             =back
2145              
2146             =head3 date_created
2147              
2148             =over
2149              
2150             B<Definition:> returns the date the file was created
2151              
2152             B<Accepts> nothing
2153              
2154             B<Returns> A string
2155              
2156             =back
2157              
2158             =head3 modified_by
2159              
2160             =over
2161              
2162             B<Definition:> returns the user name of the person who last modified the file
2163              
2164             B<Accepts> nothing
2165              
2166             B<Returns> A string
2167              
2168             =back
2169              
2170             =head3 date_modified
2171              
2172             =over
2173              
2174             B<Definition:> returns the date when the file was last modified
2175              
2176             B<Accepts> nothing
2177              
2178             B<Returns> A string
2179              
2180             =back
2181              
2182             =head3 get_epoch_year
2183              
2184             =over
2185              
2186             B<Definition:> This returns the epoch year defined by the Excel workbook.
2187              
2188             B<Accepts:> nothing
2189              
2190             B<Returns:> 1900 = Windows Excel or 1904 = Apple Excel
2191              
2192             =back
2193              
2194             =head3 get_shared_string_position
2195              
2196             =over
2197              
2198             Roundabout delegation from
2199             L<Spreadsheet::XLSX::Reader::LibXML::SharedStrings/get_shared_string_position( $position )>
2200              
2201             =back
2202              
2203             =head3 get_format_position
2204              
2205             =over
2206              
2207             Roundabout delegation from
2208             L<Spreadsheet::XLSX::Reader::LibXML::Styles/get_format_position( $position, [$header] )>
2209              
2210             =back
2211              
2212             =head3 set_defined_excel_format_list
2213              
2214             =over
2215              
2216             Roundabout delegation from
2217             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/set_defined_excel_format_list>
2218              
2219             =back
2220              
2221             =head3 change_output_encoding
2222              
2223             =over
2224              
2225             Roundabout delegation from
2226             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/change_output_encoding( $string )>
2227              
2228             =back
2229              
2230             =head3 set_cache_behavior
2231              
2232             =over
2233              
2234             Roundabout delegation from
2235             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/cache_formats>
2236              
2237             =back
2238              
2239             =head3 get_date_behavior
2240              
2241             =over
2242              
2243             Roundabout delegation from
2244             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/datetime_dates>
2245              
2246             =back
2247              
2248             =head3 set_date_behavior
2249              
2250             =over
2251              
2252             Roundabout delegation from
2253             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/datetime_dates>
2254              
2255             =back
2256              
2257             =head1 FLAGS
2258              
2259             The parameter list (attributes) that are possible to pass to ->new is somewhat long.
2260             Therefore you may want a shortcut that aggregates some set of attribute settings that
2261             are not the defaults but wind up being boilerplate. I have provided possible
2262             alternate sets like this and am open to providing others that are suggested. The
2263             flags will have a : in front of the identifier and will be passed to the class in the
2264             'use' statement for consumption by the import method. The flags can be stacked and
2265             where there is conflict between the flag settings the rightmost passed flag setting is
2266             used.
2267              
2268             Example;
2269              
2270             use Spreadsheet::XLSX::Reader::LibXML v0.34.4 qw( :alt_default :debug );
2271              
2272             =head2 :alt_default
2273              
2274             This is intended for a deep look at data and skip formatting cells.
2275              
2276             =over
2277              
2278             B<Default attribute differences>
2279              
2280             =over
2281              
2282             L<values_only|/values_only> => 1
2283              
2284             L<count_from_zero|/count_from_zero> => 0
2285              
2286             L<empty_is_end|/empty_is_end> => 1
2287              
2288             =back
2289              
2290             =back
2291              
2292             =head2 :just_the_data
2293              
2294             This is intended for a shallow look at data and skip formatting.
2295              
2296             =over
2297              
2298             B<Default attribute differences>
2299              
2300             =over
2301              
2302             L<values_only|/values_only> => 1
2303              
2304             L<count_from_zero|/count_from_zero> => 0
2305              
2306             L<empty_is_end|/empty_is_end> => 1
2307              
2308             L<group_return_type|/group_return_type> => 'value'
2309              
2310             L<cache_positions|/cache_positions> => 1
2311              
2312             L<from_the_edge|/from_the_edge> => 0,
2313              
2314             =back
2315              
2316             =back
2317              
2318             =head2 :just_raw_data
2319              
2320             This is intended for a shallow look at raw text and skips all formatting including number formats.
2321              
2322             =over
2323              
2324             B<Default attribute differences>
2325              
2326             =over
2327              
2328             L<values_only|/values_only> => 1
2329              
2330             L<count_from_zero|/count_from_zero> => 0
2331              
2332             L<empty_is_end|/empty_is_end> => 1
2333              
2334             L<group_return_type|/group_return_type> => 'unformatted'
2335              
2336             L<cache_positions|/cache_positions> => 1
2337              
2338             L<from_the_edge|/from_the_edge> => 0,
2339              
2340             =back
2341              
2342             =back
2343              
2344             =head2 :debug
2345              
2346             Turn on L<Spreadsheet::XLSX::Reader::LibXML::Error/should_warn> in the Error attribute (instance)
2347              
2348             =over
2349              
2350             B<Default attribute differences>
2351              
2352             =over
2353              
2354             L<Spreadsheet::XLSX::Reader::LibXML::Error/should_warn> => 1
2355              
2356             =back
2357              
2358             =back
2359              
2360             =head1 BUILD / INSTALL from Source
2361              
2362             B<0.> Please note that using L<cpanm|https://metacpan.org/pod/App::cpanminus> is much easier
2363             than a source build! (but it will not always give the latest github version)
2364              
2365             cpanm Spreadsheet::XLSX::Reader::LibXML
2366            
2367             And then if you feel kindly
2368              
2369             cpanm-reporter
2370              
2371             B<1.> This package uses L<Alien::LibXML> to try and ensure that the mandatory prerequisite
2372             L<XML::LibXML> will load. The biggest gotcha here is that older (<5.20.0.2) versions of
2373             Strawberry Perl and some other Win32 perls may not support the script 'pkg-config' which is
2374             required. You can resolve this by installation L<PkgConfig> as 'pkg-config'. I have
2375             included the short version of that process below but download the full L<PkgConfig> distribution
2376             and read README.win32 file for other options and much more explanation.
2377              
2378             =over
2379              
2380             B<this will conflict with any existing pkg-config installed>
2381              
2382             C:\> cpanm PkgConfig --configure-args=--script=pkg-config
2383            
2384             =back
2385              
2386             It may be that you still need to use a system package manager to L<load|http://xmlsoft.org/> the
2387             'libxml2-devel' library. If this is the case or you experience any other installation issues please
2388             L<submit them to github|https://github.com/jandrew/Spreadsheet-XLSX-Reader-LibXML/issues> especially
2389             if they occur prior to starting the test suit as these failures will not auto push from CPAN Testers
2390             so I won't know to fix them!
2391            
2392             B<2.> Download a compressed file with this package code from your favorite source
2393              
2394             =over
2395              
2396             L<github|https://github.com/jandrew/Spreadsheet-XLSX-Reader-LibXML>
2397              
2398             L<Meta::CPAN|https://metacpan.org/pod/Spreadsheet::XLSX::Reader::LibXML>
2399              
2400             L<CPAN|http://search.cpan.org/~jandrew/Spreadsheet-XLSX-Reader-LibXML/>
2401              
2402             =back
2403            
2404             B<3.> Extract the code from the compressed file.
2405              
2406             =over
2407              
2408             If you are using tar on a .tar.gz file this should work:
2409              
2410             tar -zxvf Spreadsheet-XLSX-Reader-LibXML-v0.xx.tar.gz
2411            
2412             =back
2413              
2414             B<4.> Change (cd) into the extracted directory
2415              
2416             B<5.> Run the following
2417              
2418             =over
2419              
2420             (for Windows find what version of make was used to compile your perl)
2421              
2422             perl -V:make
2423            
2424             (then for Windows substitute the correct make function (s/make/dmake/g)? below)
2425            
2426             =back
2427              
2428             perl Makefile.PL
2429              
2430             make
2431              
2432             make test
2433              
2434             make install # As sudo/root
2435              
2436             make clean
2437              
2438             =head1 SUPPORT
2439              
2440             =over
2441              
2442             L<github Spreadsheet::XLSX::Reader::LibXML/issues|https://github.com/jandrew/Spreadsheet-XLSX-Reader-LibXML/issues>
2443              
2444             =back
2445              
2446             =head1 TODO
2447              
2448             =over
2449              
2450             B<1.> Add POD for all the new chart methods!
2451              
2452             B<1.> Build an 'Alien::LibXML::Devel' package to load the libxml2-devel libraries from source and
2453             require that and L<Alien::LibXML> in the build file. So all needed requirements for L<XML::LibXML>
2454             are met
2455              
2456             =over
2457              
2458             Both libxml2 and libxml2-devel libraries are required for XML::LibXML
2459              
2460             =back
2461              
2462             B<2.> Add a pivot table reader (Not just read the values from the sheet)
2463              
2464             B<3.> Add calc chain methods
2465              
2466             B<4.> Add more exposure to workbook formatting methods
2467              
2468             B<5.> Build a DOM parser alternative for the sheets
2469              
2470             =over
2471              
2472             (Theoretically faster than the reader but uses more memory)
2473              
2474             =back
2475              
2476             =back
2477              
2478             =head1 AUTHOR
2479              
2480             =over
2481              
2482             Jed Lund
2483              
2484             jandrew@cpan.org
2485              
2486             =back
2487              
2488             =head1 CONTRIBUTORS
2489              
2490             This is the (likely incomplete) list of people who have helped
2491             make this distribution what it is, either via code contributions,
2492             patches, bug reports, help with troubleshooting, etc. A huge
2493             'thank you' to all of them.
2494              
2495             =over
2496              
2497             L<Frank Maas|https://github.com/Frank071>
2498              
2499             L<Stuart Watt|https://github.com/morungos>
2500              
2501             L<Toby Inkster|https://github.com/morungos>
2502              
2503             L<Breno G. de Oliveira|https://github.com/garu>
2504              
2505             L<Bill Baker|https://github.com/wdbaker54>
2506              
2507             L<H.Merijin Brand|https://github.com/Tux>
2508              
2509             L<Todd Eigenschink|mailto:todd@xymmetrix.com>
2510              
2511             =back
2512              
2513             =head1 COPYRIGHT
2514              
2515             This program is free software; you can redistribute
2516             it and/or modify it under the same terms as Perl itself.
2517              
2518             The full text of the license can be found in the
2519             LICENSE file included with this module.
2520              
2521             This software is copyrighted (c) 2014, 2015 by Jed Lund
2522              
2523             =head1 DEPENDENCIES
2524              
2525             =over
2526              
2527             L<perl 5.010|perl/5.10.0>
2528              
2529             L<Archive::Zip>
2530              
2531             L<Carp>
2532              
2533             L<Clone>
2534              
2535             L<DateTime::Format::Flexible>
2536              
2537             L<DateTimeX::Format::Excel>
2538              
2539             L<IO::File>
2540              
2541             L<List::Util> - 1.33
2542              
2543             L<Moose> - 2.1213
2544              
2545             L<MooseX::HasDefaults::RO>
2546              
2547             L<MooseX::ShortCut::BuildInstance> - 1.032
2548              
2549             L<MooseX::StrictConstructor>
2550              
2551             L<Type::Tiny> - 1.000
2552              
2553             L<XML::LibXML>
2554              
2555             L<version> - 0.077
2556              
2557             =back
2558              
2559             =head1 SEE ALSO
2560              
2561             =over
2562              
2563             L<Spreadsheet::Read> - generic Spreadsheet reader that (hopefully) supports this package
2564              
2565             L<Spreadsheet::ParseExcel> - Excel version 2003 and earlier
2566              
2567             L<Spreadsheet::XLSX> - Excel version 2007 and later
2568              
2569             L<Spreadsheet::ParseXLSX> - Excel version 2007 and later
2570              
2571             L<Log::Shiras|https://github.com/jandrew/Log-Shiras>
2572              
2573             =over
2574              
2575             All lines in this package that use Log::Shiras are commented out
2576              
2577             =back
2578              
2579             =back
2580              
2581             =cut
2582              
2583             #########1#########2 main pod documentation end 5#########6#########7#########8#########9