File Coverage

lib/Spreadsheet/XLSX/Reader/LibXML.pm
Criterion Covered Total %
statement 24 26 92.3
branch n/a
condition n/a
subroutine 9 9 100.0
pod n/a
total 33 35 94.2


line stmt bran cond sub pod time code
1             package Spreadsheet::XLSX::Reader::LibXML;
2             our $AUTHORITY = 'cpan:JANDREW';
3 11     11   2316178 use version 0.77; our $VERSION = version->declare('v0.38.22');
  11         22977  
  11         80  
4             ###LogSD warn "You uncovered internal logging statements for Spreadsheet::XLSX::Reader::LibXML-$VERSION";
5              
6 11     11   1142 use 5.010;
  11         54  
7 11     11   57 use List::Util 1.33;
  11         300  
  11         616  
8 11     11   9553 use Moose;
  11         3813800  
  11         75  
9 11     11   91281 use MooseX::StrictConstructor;
  11         361513  
  11         57  
10 11     11   108907 use MooseX::HasDefaults::RO;
  11         85464  
  11         55  
11 11     11   107733 use Carp qw( confess );
  11         23  
  11         661  
12 11     11   12920 use Archive::Zip qw( AZ_OK );
  11         828311  
  11         654  
13 11     11   20384 use XML::LibXML;
  0            
  0            
14             use IO::File;
15             use Clone 'clone';
16             use Types::Standard qw(
17             InstanceOf Str StrMatch
18             Enum HashRef ArrayRef
19             CodeRef Int HasMethods
20             Bool is_Object is_HashRef
21             );
22             use lib '../../../../lib',;
23             use Data::Dumper;
24             ###LogSD use Log::Shiras::Telephone;
25             ###LogSD use Log::Shiras::UnhideDebug;
26             use Spreadsheet::XLSX::Reader::LibXML::XMLReader::Styles;
27             use Spreadsheet::XLSX::Reader::LibXML::FmtDefault;
28             use Spreadsheet::XLSX::Reader::LibXML::XMLReader::SharedStrings;
29             use Spreadsheet::XLSX::Reader::LibXML::XMLReader::WorksheetToRow;
30             use Spreadsheet::XLSX::Reader::LibXML::Worksheet;
31             use Spreadsheet::XLSX::Reader::LibXML::XMLReader::Chartsheet;
32             use Spreadsheet::XLSX::Reader::LibXML::Error;
33             use Spreadsheet::XLSX::Reader::LibXML::Types qw( XLSXFile ParserType IOFileType );
34             ###LogSD use Log::Shiras::UnhideDebug;
35             use MooseX::ShortCut::BuildInstance 1.032 qw( build_instance should_re_use_classes );
36             should_re_use_classes( 1 );
37             ###LogSD with 'Log::Shiras::LogSpace';
38             ###LogSD sub get_class_space{ 'Workbook' }
39              
40             #########1 Dispatch Tables 3#########4#########5#########6#########7#########8#########9
41              
42             my $parser_modules ={
43             reader =>{
44             build_method => '_build_reader',
45             sharedStrings =>{
46             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::XMLReader::SharedStrings'],
47             attributes => [qw( error_inst cache_positions empty_return_type group_return_type )],
48             store => '_set_shared_strings_instance',
49             package => 'SharedStrings',
50             },
51             styles =>{
52             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::XMLReader::Styles'],
53             attributes => [qw( error_inst cache_positions format_inst empty_return_type )],
54             store => '_set_styles_instance',
55             package => 'Styles',
56             },
57             worksheet =>{
58             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::XMLReader::WorksheetToRow'],
59             roles => ['Spreadsheet::XLSX::Reader::LibXML::Worksheet'],
60             package => 'Worksheet',
61             },
62             chartsheet =>{
63             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::XMLReader::Chartsheet'],
64             package => 'Chartsheet',
65             },
66             },
67             };
68             my $xml_parser = XML::LibXML->new();
69             my $build_ref = {
70             top_level_workbook =>{
71             zip => 'xl/workbook.xml',
72             },
73             workbook_rels =>{
74             zip => 'xl/_rels/workbook.xml.rels',
75             },
76             doc_props =>{
77             zip => 'docProps/core.xml',
78             },
79             sharedStrings =>{
80             zip => 'xl/sharedStrings.xml',
81             },
82             styles =>{
83             zip => 'xl/styles.xml',
84             },
85             calcChain =>{
86             zip => 'xl/calcChain.xml',
87             },
88             };
89             my $attribute_defaults ={
90             error_inst =>{
91             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::Error'],
92             package => 'ErrorInstance',
93             should_warn => 0,
94             },
95             format_inst =>{
96             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::FmtDefault'],
97             package => 'FormatInstance',
98             },
99             sheet_parser => 'reader',
100             count_from_zero => 1,
101             file_boundary_flags => 1,
102             empty_is_end => 0,
103             values_only => 0,
104             from_the_edge => 1,
105             group_return_type => 'instance',
106             empty_return_type => 'empty_string',
107             };
108             my $flag_settings ={
109             alt_default =>{
110             values_only => 1,
111             count_from_zero => 0,
112             empty_is_end => 1,
113             },
114             just_the_data =>{
115             count_from_zero => 0,
116             values_only => 1,
117             empty_is_end => 1,
118             group_return_type => 'value',
119             cache_positions => 1,
120             from_the_edge => 0,
121             empty_return_type => 'undef_string',
122             },
123             just_raw_data =>{
124             count_from_zero => 0,
125             values_only => 1,
126             empty_is_end => 1,
127             group_return_type => 'unformatted',
128             cache_positions => 1,
129             from_the_edge => 0,
130             empty_return_type => 'undef_string',
131             },
132             like_ParseExcel =>{
133             count_from_zero => 1,
134             cache_positions => 1,
135             group_return_type => 'instance',
136             },
137             debug =>{
138             error_inst =>{
139             superclasses => ['Spreadsheet::XLSX::Reader::LibXML::Error'],
140             package => 'ErrorInstance',
141             should_warn => 1,
142             },
143             },
144             };
145              
146             #########1 Public Attributes 3#########4#########5#########6#########7#########8#########9
147              
148             has error_inst =>(
149             isa => HasMethods[qw(
150             error set_error clear_error set_warnings if_warn should_spew_longmess spewing_longmess
151             ),
152             ###LogSD 'set_log_space',
153             ],
154             clearer => '_clear_error_inst',
155             reader => 'get_error_inst',
156             predicate => 'has_error_inst',
157             required => 1,
158             handles =>{ qw(
159             error error
160             set_error set_error
161             clear_error clear_error
162             set_warnings set_warnings
163             if_warn if_warn
164             should_spew_longmess should_spew_longmess
165             spewing_longmess spewing_longmess
166             ),
167             ###LogSD set_error_log_space => 'set_log_space',
168             },
169             trigger => sub{
170             if( $_[0]->_has_format_inst and !$_[0]->get_format_inst->block_inherit ){
171             $_[0]->get_format_inst->set_error_inst( $_[1] );
172             }
173             },
174             );
175            
176             has format_inst =>(
177             isa => HasMethods[qw(
178             set_error_inst set_excel_region
179             set_target_encoding get_defined_excel_format
180             set_defined_excel_formats change_output_encoding
181             set_epoch_year set_cache_behavior
182             set_date_behavior get_defined_conversion
183             parse_excel_format_string set_european_first
184             ),
185             ###LogSD 'set_log_space',
186             ],
187             writer => 'set_format_inst',
188             reader => 'get_format_inst',
189             predicate => '_has_format_inst',
190             handles => { qw(
191             get_defined_excel_format get_defined_excel_format
192             parse_excel_format_string parse_excel_format_string
193             change_output_encoding change_output_encoding
194             set_date_behavior set_date_behavior
195             get_date_behavior get_date_behavior
196             set_defined_excel_formats set_defined_excel_formats
197             set_european_first set_european_first
198             ),
199             ###LogSD set_formatter_log_space => 'set_log_space',
200             },
201             trigger => \&_import_format_settings,
202             );
203            
204             has file_name =>(
205             isa => XLSXFile,
206             writer => 'set_file_name',
207             clearer => '_clear_file_name',
208             predicate => 'has_file_name',
209             trigger => \&_build_workbook,
210             );
211              
212             has file_handle =>(
213             isa => IOFileType,
214             writer => 'set_file_handle',
215             clearer => '_clear_file_handle',
216             predicate => 'has_file_handle',
217             coerce => 1,
218             trigger => \&_build_workbook,
219             );
220              
221             has sheet_parser =>(
222             isa => ParserType,
223             writer => 'set_parser_type',
224             reader => 'get_parser_type',
225             coerce => 1,
226             );
227              
228             has count_from_zero =>(
229             isa => Bool,
230             reader => 'counting_from_zero',
231             writer => 'set_count_from_zero',
232             );
233            
234             has file_boundary_flags =>(
235             isa => Bool,
236             reader => 'boundary_flag_setting',
237             writer => 'change_boundary_flag',
238             required => 1,
239             );
240              
241             has empty_is_end =>(
242             isa => Bool,
243             writer => 'set_empty_is_end',
244             reader => 'is_empty_the_end',
245             );
246              
247             has values_only =>(
248             isa => Bool,
249             writer => 'set_values_only',
250             reader => 'get_values_only',
251             );
252              
253             has from_the_edge =>(
254             isa => Bool,
255             reader => '_starts_at_the_edge',
256             writer => 'set_from_the_edge',
257             );
258              
259             has default_format_list =>(
260             isa => Str,
261             writer => 'set_default_format_list',
262             reader => 'get_default_format_list',
263             );
264              
265             has format_string_parser =>(
266             isa => Str,
267             writer => 'set_format_string_parser',
268             reader => 'get_format_string_parser',
269             );
270              
271             has group_return_type =>(
272             isa => Enum[qw( unformatted value instance xml_value )],
273             reader => 'get_group_return_type',
274             writer => 'set_group_return_type',
275             );
276              
277             has empty_return_type =>(
278             isa => Enum[qw( empty_string undef_string )],
279             reader => 'get_empty_return_type',
280             writer => 'set_empty_return_type',
281             );
282            
283             has cache_positions =>(
284             isa => Bool,
285             reader => 'get_cache_positions',
286             default => 1,
287             );
288              
289             #########1 Public Methods 3#########4#########5#########6#########7#########8#########9
290              
291             sub import{# Flags handled here!
292             my ( $self, @flag_list ) = @_;
293            
294             if( scalar( @flag_list ) ){
295             for my $flag ( @flag_list ){
296             #~ print "Arrived at import with flag: $flag\n";
297             if( $flag =~ /^:(\w*)$/ ){# Handle text based flags
298             my $default_choice = $1;
299             #~ print "Attempting to change the default group type to: $default_choice\n";
300             if( exists $flag_settings->{$default_choice} ){
301             for my $attribute ( keys %{$flag_settings->{$default_choice}} ){
302             #~ print "Changing flag -$attribute- to:" . Dumper( $flag_settings->{$default_choice}->{$attribute} );
303             $attribute_defaults->{$attribute} = $flag_settings->{$default_choice}->{$attribute};
304             }
305             }else{
306             confess "No settings available for the flag: $flag";
307             }
308             }elsif( $flag =~ /^v?\d+\.?\d*/ ){# Version check may wind up here
309             #~ print "Running version check on version: $flag\n";
310             my $result = $VERSION <=> version->parse( $flag );
311             #~ print "Tested against version -$VERSION- gives result: $result\n";
312             if( $result < 0 ){
313             confess "Version -$flag- required - the installed version is: $VERSION";
314             }
315             }else{
316             confess "Passed attribute default flag -$flag- does not comply with the correct format";
317             }
318             }
319             }
320             }
321              
322             sub parse{
323              
324             my ( $self, $file, $formatter ) = @_;
325             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
326             ###LogSD $self->get_all_space . '::parse', );
327             ###LogSD $phone->talk( level => 'info', message =>[
328             ###LogSD "Arrived at parse for:", $file,
329             ###LogSD (($formatter) ? "with formatter: $formatter" : '') ] );
330             $self->set_format_string_parser( $formatter ) if $formatter;
331             if( IOFileType->check( $file ) ){
332             ###LogSD $phone->talk( level => 'info', message =>[ 'passed a file handle:', $file, ] );
333             eval '$self->set_file_handle( $file )';
334             }else{
335             ###LogSD $phone->talk( level => 'info', message =>[ 'passed a file name: ' . $file, ] );
336             eval '$self->set_file_name( $file )';
337             }
338             if( $@ ){
339             my $error_message = $@;
340             ###LogSD $phone->talk( level => 'info', message =>[ 'saving error:', $error_message, ] );
341             $self->set_error( $error_message );
342             return undef;
343             }else{
344             return $self;
345             }
346             }
347              
348             sub worksheets{
349              
350             my ( $self, ) = @_;
351             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
352             ###LogSD $self->get_all_space . '::worksheets', );
353             ###LogSD $phone->talk( level => 'info', message =>[
354             ###LogSD 'Attempting to build all worksheets: ', $self->get_worksheet_names ] );
355             my @worksheet_array;
356             while( my $worksheet_object = $self->worksheet ){
357             ###LogSD $phone->talk( level => 'info', message =>[
358             ###LogSD 'Built worksheet: ' . $worksheet_object->get_name ] );
359             push @worksheet_array, $worksheet_object;#$self->worksheet( $worksheet_name );
360             }
361             ###LogSD $phone->talk( level => 'trace', message =>[
362             ###LogSD 'sending worksheet array: ',@worksheet_array ] );
363             return @worksheet_array;
364             }
365              
366             sub worksheet{
367              
368             my ( $self, $worksheet_name ) = @_;
369             my ( $next_position );
370             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
371             ###LogSD $self->get_all_space . '::worksheet', );
372             ###LogSD $phone->talk( level => 'info', message =>[
373             ###LogSD "Arrived at (build a) worksheet with: ", $worksheet_name ] );
374            
375             # Handle an implied 'next sheet'
376             if( !$worksheet_name ){
377             my $worksheet_position = $self->_get_current_worksheet_position;
378             ###LogSD $phone->talk( level => 'info', message =>[
379             ###LogSD "No worksheet name passed",
380             ###LogSD ((defined $worksheet_position) ? "Starting after position: $worksheet_position" : '')] );
381             $next_position = ( !$self->in_the_list ) ? 0 : ($self->_get_current_worksheet_position + 1);
382             ###LogSD $phone->talk( level => 'info', message =>[
383             ###LogSD "No worksheet name passed", "Attempting position: $next_position" ] );
384             if( $next_position >= $self->worksheet_count ){
385             ###LogSD $phone->talk( level => 'info', message =>[
386             ###LogSD "Reached the end of the worksheet list" ] );
387             return undef;
388             }
389             $worksheet_name = $self->worksheet_name( $next_position );
390             }else{
391             my $sheet_data = $self->_get_sheet_info( $worksheet_name );
392             ###LogSD $phone->talk( level => 'info', message =>[
393             ###LogSD "Info for the worksheet -$worksheet_name- is:", $sheet_data ] );
394             $next_position = $sheet_data->{sheet_position};
395             }
396            
397             # Deal with chartsheet requests
398             my $worksheet_info = $self->_get_sheet_info( $worksheet_name );
399             # Check for sheet existence
400             if( !$worksheet_info or !$worksheet_info->{sheet_type} ){
401             $self->set_error( "The worksheet -$worksheet_name- could not be located!" );
402             return undef;
403             }elsif( $worksheet_info->{sheet_type} and $worksheet_info->{sheet_type} eq 'chartsheet' ){
404             $self->set_error( "You have requested -$worksheet_name- which is a 'chartsheet' using a worksheet focused method" );
405             return undef;
406             }
407             # NOTE: THE CHARTSHEET / WORKSHEET COMMON SUB-METHOD COULD PROBABLY START HERE
408             ###LogSD $phone->talk( level => 'info', message =>[
409             ###LogSD "Building: $worksheet_name", "..with data:", $worksheet_info ] );
410            
411             # Check for a file and an available parser type
412             confess "No file loaded yet" if !$self->has_file_name and !$self->has_file_handle;
413             my ( $translation_method, $parser_type );
414             if( exists $parser_modules->{ $self->get_parser_type } ){
415             $translation_method = $parser_modules->{ $self->get_parser_type }->{build_method};
416             $parser_type = $parser_modules->{ $self->get_parser_type }->{$worksheet_info->{sheet_type}};
417             }else{
418             confess 'This package still under development - parser type |' . $self->get_parser_type . '| not yet supported - try the "reader" parser';
419             return undef;
420             }
421             ###LogSD $phone->talk( level => 'info', message =>[
422             ###LogSD "Using translation: $translation_method", "With parser: ", $parser_type, ] );
423            
424             # build the worksheet
425             my %args = $self->$translation_method( $worksheet_info, $self->_get_zip_file_handle );
426             ###LogSD $phone->talk( level => 'debug', message => [
427             ###LogSD 'Intermediate worksheet args are:', %args ], );
428             if( !$args{file} and !$args{dom} ){
429             $self->set_error( "Unable to load XML::LibXML with the element: $worksheet_name" );
430             return undef;
431             }
432             ###LogSD $phone->talk( level => 'debug', message => [
433             ###LogSD 'made it past the has-file check with:', $parser_type, $worksheet_name, ], );
434            
435             confess "No worksheet info for: $worksheet_name" if !exists $worksheet_info->{sheet_position};
436             $args{superclasses} = $parser_type->{superclasses};
437             $args{roles} = $parser_type->{roles};
438             $args{sheet_name} = $worksheet_name;
439             $args{workbook_instance} = $self;
440             $args{error_inst} = $self->get_error_inst;
441             $args{package} = $parser_type->{package};
442             ###LogSD $args{log_space} = $self->get_log_space;
443             ###LogSD $phone->talk( level => 'trace', message =>[
444             ###LogSD 'Finalized build info:', %args, ] );
445             my $worksheet = build_instance( %args );
446             if( $worksheet ){
447             ###LogSD $phone->talk( level => 'info', message =>[
448             ###LogSD "Successfully loaded: $worksheet_name",
449             ###LogSD "Setting the current worksheet position to: $next_position" ] );
450             $self->_set_current_worksheet_position( $next_position );
451             return $worksheet;
452             }else{
453             $self->set_error( "Failed to build the object for worksheet: $worksheet_name" );
454             return undef;
455             }
456             }
457              
458             #########1 Private Attributes 3#########4#########5#########6#########7#########8#########9
459            
460             has _file_creator =>(
461             isa => Str,
462             reader => 'creator',
463             writer => '_set_creator',
464             clearer => '_clear_creator',
465             );
466            
467             has _file_modified_by =>(
468             isa => Str,
469             reader => 'modified_by',
470             writer => '_set_modified_by',
471             clearer => '_clear_modified_by',
472             );
473            
474             has _file_date_created =>(
475             isa => StrMatch[qr/^\d{4}\-\d{2}\-\d{2}/],
476             reader => 'date_created',
477             writer => '_set_date_created',
478             clearer => '_clear_date_created',
479             );
480            
481             has _file_date_modified =>(
482             isa => StrMatch[qr/^\d{4}\-\d{2}\-\d{2}/],
483             reader => 'date_modified',
484             writer => '_set_date_modified',
485             clearer => '_clear_date_modified',
486             );
487              
488             has _epoch_year =>(
489             isa => Enum[qw( 1900 1904 )],
490             writer => '_set_epoch_year',
491             reader => 'get_epoch_year',
492             predicate => 'has_epoch_year',
493             default => 1900,
494             trigger => sub{
495             if( $_[0]->_has_format_inst and !$_[0]->get_format_inst->block_inherit ){
496             $_[0]->get_format_inst->set_epoch_year( $_[1] );
497             }
498             },
499             );
500            
501             has _shared_strings_instance =>(
502             isa => HasMethods[ 'get_shared_string_position' ],
503             predicate => '_has_shared_strings_file',
504             writer => '_set_shared_strings_instance',
505             reader => '_get_shared_strings_instance',
506             clearer => '_clear_shared_strings',
507             handles =>{
508             'get_shared_string_position' => 'get_shared_string_position',
509             _demolish_shared_strings => 'DEMOLISH',
510             },
511             );
512            
513             has _styles_instance =>(
514             isa => HasMethods[qw( get_format_position )],
515             writer => '_set_styles_instance',
516             reader => '_get_styles_instance',
517             clearer => '_clear_styles',
518             predicate => '_has_styles_file',
519             handles =>{
520             get_format_position => 'get_format_position',
521             _demolish_styles => 'DEMOLISH',
522             },
523             );
524              
525             has _calc_chain_instance =>(
526             isa => HasMethods[qw( get_calc_chain_position )],
527             writer =>'_set_calc_chain_instance',
528             reader =>'_get_calc_chain_instance',
529             clearer => '_clear_calc_chain',
530             predicate => '_has_calc_chain_file',
531             handles =>{
532             _demolish_calc_chain => 'DEMOLISH',
533             },
534             );
535              
536             has _worksheet_list =>(
537             isa => ArrayRef,
538             traits => ['Array'],
539             clearer => '_clear_worksheet_list',
540             reader => 'get_worksheet_names',
541             handles =>{
542             worksheet_name => 'get',
543             worksheet_count => 'count',
544             _add_worksheet => 'push',
545             },
546             default => sub{ [] },
547             );
548              
549             has _chartsheet_list =>(
550             isa => ArrayRef,
551             traits => ['Array'],
552             clearer => '_clear_chartsheet_list',
553             reader => 'get_chartsheet_names',
554             handles =>{
555             chartsheet_name => 'get',
556             chartsheet_count => 'count',
557             _add_chartsheet => 'push',
558             },
559             default => sub{ [] },
560             );
561              
562             has _sheet_list =>(
563             isa => ArrayRef,
564             traits => ['Array'],
565             writer => '_set_sheet_list',
566             clearer => '_clear_sheet_list',
567             reader => 'get_sheet_names',
568             handles =>{
569             get_sheet_name => 'get',
570             sheet_count => 'count',
571             },
572             default => sub{ [] },
573             );
574              
575             has _sheet_lookup =>(
576             isa => HashRef,
577             traits => ['Hash'],
578             writer => '_set_sheet_lookup',
579             clearer => '_clear_sheet_lookup',
580             reader => '_get_sheet_lookup',
581             handles =>{
582             _get_sheet_info => 'get',
583             },
584             default => sub{ {} },
585             );
586              
587             has _current_worksheet_position =>(
588             isa => Int,
589             writer => '_set_current_worksheet_position',
590             reader => '_get_current_worksheet_position',
591             clearer => 'start_at_the_beginning',
592             predicate => 'in_the_list',
593             );
594            
595             has _file_type =>(
596             isa => Enum[qw( zip xml )],
597             clearer => '_clear_file_type',
598             writer => '_set_file_type',
599             reader => '_get_file_type',
600             );
601            
602             has _zip_file_handle =>(
603             isa => InstanceOf[ 'Archive::Zip' ],
604             clearer => '_clear_zip_file_handle',
605             writer => '_set_zip_file_handle',
606             reader => '_get_zip_file_handle',
607             predicate => '_has_zip_file_handle',
608             );
609              
610             #########1 Private Methods 3#########4#########5#########6#########7#########8#########9
611              
612             around BUILDARGS => sub {
613             my ( $orig, $class, %args ) = @_;
614             ###LogSD my $log_space = $args{log_space};
615             ###LogSD $log_space .= '::' if $log_space;
616             ###LogSD $log_space .= 'Workbook::BUILDARGS';
617             ###LogSD my $phone = Log::Shiras::Telephone->new(
618             ###LogSD name_space => $log_space, );
619             ###LogSD $phone->talk( level => 'trace', message =>[
620             ###LogSD 'Arrived at BUILDARGS with: ', %args ] );
621            
622             # Check if this was called by Spreadsheet::Read; Warning discontinuing after 1-Jan-2016 unless someone asks
623             my $like_ParseExcel = 0;
624             for my $x ( 2 .. 5 ){
625             ###LogSD $phone->talk( level => 'trace', message =>[
626             ###LogSD "Caller is:", caller( $x ) ] );
627             if( (caller( $x ))[0] and (caller( $x ))[0] =~ /Spreadsheet::Read/ ){
628             $like_ParseExcel = 1;
629             ###LogSD $phone->talk( level => 'trace', message =>[
630             ###LogSD "Spreadsheet::XLSX::Reader::LibXML is being called by Spreadsheet::Read - enforcing the :like_ParseExcel flag" ] );
631             last;
632             }
633             }
634            
635             # Add any defaults
636             for my $key ( keys %$attribute_defaults ){
637             if( exists $args{$key} ){
638             ###LogSD $phone->talk( level => 'trace', message =>[
639             ###LogSD "Found user defined -$key- with value: $args{$key}" ] );
640             }else{
641             ###LogSD $phone->talk( level => 'trace', message =>[
642             ###LogSD "Setting default -$key- with value: $attribute_defaults->{$key}" ] );
643             $args{$key} = clone( $attribute_defaults->{$key} );
644             }
645             }
646            
647             # Enforce Spreadsheet::Read requirements - Warning discontinuing after 1-Jan-2016 unless someone asks
648             if( $like_ParseExcel ){
649             ###LogSD $phone->talk( level => 'trace', message =>[
650             ###LogSD "Enforcing like_ParseExcel defaults" ] );
651             for my $key ( %{$flag_settings->{like_ParseExcel}} ){
652             $args{$key} = clone( $flag_settings->{like_ParseExcel}->{$key} );
653             }
654             }
655            
656             # Build object instances as needed
657             for my $key ( keys %args ){
658             ###LogSD $phone->talk( level => 'trace', message =>[
659             ###LogSD "Checking if an instance needs built for key: $key" ] );
660             if( $key =~ /_inst$/ and !is_Object( $args{$key} ) and is_HashRef( $args{$key} ) ){
661             # Import log_space as needed
662             ###LogSD if( exists $args{log_space} and $args{log_space} ){
663             ###LogSD $args{$key}->{log_space} = $args{log_space};
664             ###LogSD }
665             ###LogSD $phone->talk( level => 'trace', message =>[
666             ###LogSD "Key -$key- requires and instance built from:", $args{$key} ] );
667             $args{$key} = build_instance( $args{$key} );
668             }
669             }
670            
671             ###LogSD $phone->talk( level => 'trace', message =>[
672             ###LogSD "Final BUILDARGS:", %args ] );
673             return $class->$orig(%args);
674             };
675              
676             sub _build_workbook{
677              
678             my ( $self, $file ) = @_;
679             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
680             ###LogSD $self->get_all_space . '::hidden::_build_file', );
681             ###LogSD $phone->talk( level => 'info', message =>[
682             ###LogSD 'Arrived at _build_file for: ', $file ] );
683             $self->_clear_shared_strings;
684             $self->_clear_calc_chain;
685             $self->_clear_styles;
686             $self->_clear_worksheet_list;
687             $self->_clear_sheet_lookup;
688             $self->_clear_creator;
689             $self->_clear_modified_by;
690             $self->_clear_date_created;
691             $self->_clear_date_modified;
692             $self->clear_error;
693             $self->start_at_the_beginning;
694             $self->_clear_file_type;
695            
696             # Ensure we have a file handle
697             my $file_handle;
698             eval '$file_handle = IOFileType->assert_coerce( $file )';
699             ###LogSD $phone->talk( level => 'trace', message =>[
700             ###LogSD "Passed the file type coercion" ] );
701             if( $@ ){
702             ###LogSD $phone->talk( level => 'warn', message =>[
703             ###LogSD "Unable to create a valid file instance with: $file" ] );
704             $self->_clear_file_handle;
705             $self->_clear_file_name;
706             $self->set_error( "Unable to create a valid file instance with: $file" );
707             return undef;
708             }
709             ###LogSD $phone->talk( level => 'trace', message =>[
710             ###LogSD "Current file handle: $file_handle" ] );
711            
712             # Read the XLSX zip file and catch any errors (other zip file sanity tests go here)
713             my $workbook_file = Archive::Zip->new();
714             if( $workbook_file->readFromFileHandle($file_handle) != AZ_OK ){
715             ###LogSD $phone->talk( level => 'warn', message =>[
716             ###LogSD "Failed to open a zip file" ] );
717             $self->_clear_file_handle;
718             $self->_clear_file_name;
719             $self->_set_file_type( 'xml' );# Build from this when adding all-in-one single file Excel Workbooks!!!!
720             confess "|$file| won't open as a zip file";
721             }else{
722             ###LogSD $phone->talk( level => 'debug', message =>[
723             ###LogSD "Certified this as a zip file" ] );
724             $self->_set_file_type( 'zip' );
725             }
726             ###LogSD $phone->talk( level => 'debug', message =>[
727             ###LogSD 'Zip file test passed with: ' . $self->_get_file_type ] );
728            
729             # Extract the workbook top level info
730             my %answer = $self->_build_dom( $build_ref->{top_level_workbook}, $workbook_file );
731             ###LogSD $phone->talk( level => 'debug', message =>[ "DOM built for method: _load_top_level_workbook" ] );
732             my ( $rel_lookup, $id_lookup ) = $self->_load_top_level_workbook( $answer{dom} );
733             return undef if !$rel_lookup;
734             ###LogSD $phone->talk( level => 'debug', message =>[ 'Rel lookup:', $rel_lookup, 'ID lookukp:', $id_lookup ] );
735            
736             # Load the workbook rels info
737             %answer = $self->_build_dom( $build_ref->{workbook_rels}, $workbook_file );
738             ###LogSD $phone->talk( level => 'debug', message =>[ "DOM built for method: _load_workbook_rels" ] );
739             my ( $load_success, $pivot_lookup ) = $self->_load_workbook_rels( $rel_lookup, $answer{dom} );
740             return undef if !$load_success;
741             ###LogSD $phone->talk( level => 'debug', message =>[ 'pivot lookup:', $pivot_lookup, ] );
742            
743             # Load the docProps info
744             %answer = $self->_build_dom( $build_ref->{doc_props}, $workbook_file );
745             ###LogSD $phone->talk( level => 'debug', message =>[ "DOM built for method: _load_doc_props" ] );
746             $self->_load_doc_props( $answer{dom} );
747             ###LogSD $phone->talk( level => 'debug', message =>[ 'docProps loaded', ] );
748            
749             # Build the instances for all the shared files (data for sheets shared across worksheets)
750             if( exists $parser_modules->{ $self->get_parser_type } ){
751             ###LogSD $phone->talk( level => 'debug', message =>[ 'loading shared worksheets for: ' . $self->get_parser_type, ] );
752             my $result = $self->_set_shared_worksheet_files(
753             $parser_modules->{ $self->get_parser_type },
754             $workbook_file,
755             );
756             return undef if !$result;
757             $self->_set_zip_file_handle( $workbook_file );
758             }else{
759             confess 'This package still under development - parser type |' . $self->get_parser_type . '| not yet supported - try the "reader" parser';
760             return undef;
761             }
762             return $self;
763             }
764              
765             sub _build_dom{
766             my( $self, $target, $workbook_file ) = @_;
767             my $build_target = clone( $target );
768             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
769             ###LogSD $self->get_all_space . '::hidden::_build_dom', );
770             ###LogSD $phone->talk( level => 'debug', message => [
771             ###LogSD 'Building DOM object for the target:', $build_target,
772             ###LogSD "With file:", $workbook_file ] );
773             my ( $dom, $encoding );
774             if( $self->_get_file_type eq 'zip' ){
775             ###LogSD $phone->talk( level => 'debug', message => [
776             ###LogSD "Working on a zip file targeting: $build_target->{zip}",] );
777             my $zip_workbook = $workbook_file->memberNamed( $build_target->{zip} );
778             delete $build_target->{zip};
779             ###LogSD $phone->talk( level => 'debug', message =>[ 'zip member: ' . $zip_workbook ] );
780             my $workbook_fh = IO::File->new_tmpfile;
781             $workbook_fh->binmode();
782             $zip_workbook->extractToFileHandle( $workbook_fh );
783             $workbook_fh->seek( 0, 0 );
784             $dom = $xml_parser->load_xml( { IO => $workbook_fh } );
785             }else{
786             confess "I don't know how to handle file type: " . $self->_get_file_type;
787             }
788             my %return_args = ( dom => $dom, %$build_target );
789             ###LogSD $phone->talk( level => 'debug', message =>[ "Returning: ", %return_args ] );
790             return %return_args;
791             }
792              
793             sub _build_reader{
794             my( $self, $target, $workbook_file ) = @_;
795             my $build_target = clone( $target );
796             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
797             ###LogSD $self->get_all_space . '::hidden::_build_reader', );
798             ###LogSD $phone->talk( level => 'debug', message => [
799             ###LogSD 'Building Reader object for the target:', $build_target,
800             ###LogSD "With file:", $workbook_file ] );
801             my ( $workbook_fh, $xml_reader, $encoding );
802             if( $self->_get_file_type eq 'zip' ){
803             ###LogSD $phone->talk( level => 'debug', message => [
804             ###LogSD "Working on a zip file targeting: $build_target->{zip}",] );
805             my $zip_member = $workbook_file->memberNamed( $build_target->{zip} );
806             ###LogSD $phone->talk( level => 'debug', message =>[ 'zip member:', $zip_member ] );
807             if( $zip_member ){
808             $workbook_fh = IO::File->new_tmpfile;
809             $workbook_fh->binmode();
810             $zip_member->extractToFileHandle( $workbook_fh );
811             $workbook_fh->seek( 0, 0 );
812             }else{
813             ###LogSD $phone->talk( level => 'debug', message =>[ "no zip file for: $build_target->{zip}" ] );
814             return undef;
815             }
816             delete $build_target->{zip};
817             }else{
818             confess "I don't know how to handle file type: " . $self->_get_file_type;
819             }
820             my %return_args = ( file => $workbook_fh, %$build_target );# xml_reader => $xml_reader,
821             ###LogSD $phone->talk( level => 'debug', message =>[ "Returning: ", %return_args ] );
822             return %return_args;
823             }
824              
825             sub _load_top_level_workbook{
826             my( $self, $dom ) = @_;
827             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
828             ###LogSD $self->get_all_space . '::hidden::_load_workbook_file', );
829             ###LogSD $phone->talk( level => 'info', message => [
830             ###LogSD "Building the top level data for the workbook", ] );
831             my ( $list, $sheet_ref, $rel_lookup, $id_lookup );
832             my $position = 0;
833             my ( $setting_node ) = $dom->getElementsByTagName( 'workbookPr' );
834             if( $setting_node and $setting_node->getAttribute( 'date1904' ) ){
835             $self->_set_epoch_year( 1904 );
836             }
837             for my $sheet ( $dom->getElementsByTagName( 'sheet' ) ){
838             my $sheet_name = $sheet->getAttribute( 'name' );
839             push @$list, $sheet_name;
840             @{$sheet_ref->{$sheet_name}}{ 'sheet_id', 'sheet_rel_id', 'sheet_position', 'is_hidden' } = (
841             $sheet->getAttribute( 'sheetId' ),
842             $sheet->getAttribute( 'r:id' ),
843             $position++,
844             ($sheet->getAttribute( 'state' ) ? 1 : 0),
845             );
846             $rel_lookup->{$sheet->getAttribute( 'r:id' )} = $sheet_name;
847             $id_lookup->{$sheet->getAttribute( 'sheetId' )} = $sheet_name;
848             }
849             for my $sheet ( $dom->getElementsByTagName( 'pivotCache' ) ){
850             my $sheet_id = $sheet->getAttribute( 'cacheId' );
851             my $rel_id = $sheet->getAttribute( 'r:id' );
852             ###LogSD $phone->talk( level => 'debug', message =>[
853             ###LogSD "Sheet ID: $sheet_id", "Rel ID: $rel_id", ] );
854             $rel_lookup->{$rel_id} = $sheet_id;
855             $id_lookup->{$sheet_id} = $rel_id;
856             }
857             ###LogSD $phone->talk( level => 'debug', message => [
858             ###LogSD "Sheet list: ", $list,
859             ###LogSD "Worksheet lookup:", $sheet_ref,
860             ###LogSD "rel lookup:", $rel_lookup,
861             ###LogSD "id lookup:", $id_lookup, ] );
862             $dom = undef;
863             if( !$list ){
864             $self->set_error( "No worksheets identified in this workbook" );
865             return undef;
866             }
867             $self->_set_sheet_list( $list );
868             $self->_set_sheet_lookup( $sheet_ref );
869             return( $rel_lookup, $id_lookup );
870             }
871              
872             sub _load_workbook_rels{
873             my( $self, $rel_lookup, $dom ) = @_;
874             my ( $pivot_lookup, );
875             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
876             ###LogSD $self->get_all_space . '::hidden::_load_workbook_rels', );
877             ###LogSD $phone->talk( level => 'debug', message => [
878             ###LogSD "Adding the rels file data for the workbook with:", $rel_lookup ] );
879             my $sheet_ref = $self->_get_sheet_lookup;
880             ###LogSD $phone->talk( level => 'debug', message => [
881             ###LogSD "Working on sheet ref:", $sheet_ref, '..and rel lookup:', $rel_lookup ] );
882             my $found_member_names = 0;
883             my ( $worksheet_list, $chartsheet_list );
884             for my $sheet ( $dom->getElementsByTagName( 'Relationship' ) ){
885             my $rel_ID = $sheet->getAttribute( 'Id' );
886             ###LogSD $phone->talk( level => 'debug', message => [
887             ###LogSD "Processing relID:", $rel_ID, ] );
888             if( exists $rel_lookup->{$rel_ID} ){
889             my $target = 'xl/';
890             $target .= $sheet->getAttribute( 'Target' );
891             ###LogSD $phone->talk( level => 'debug', message => [
892             ###LogSD "Building relationship for: $rel_ID", "With target: $target" ] );
893             $target =~ s/\\/\//g;
894             if( $target =~ /worksheets(\\|\/)/ ){
895             $sheet_ref->{$rel_lookup->{$rel_ID}}->{zip} = $target;
896             $sheet_ref->{$rel_lookup->{$rel_ID}}->{sheet_type} = 'worksheet';
897             $worksheet_list->[$sheet_ref->{$rel_lookup->{$rel_ID}}->{sheet_position}] = $rel_lookup->{$rel_ID};
898             $found_member_names = 1;
899             }elsif( $target =~ /chartsheets(\\|\/)/ ){
900             $sheet_ref->{$rel_lookup->{$rel_ID}}->{zip} = $target;
901             $sheet_ref->{$rel_lookup->{$rel_ID}}->{sheet_type} = 'chartsheet';
902             $chartsheet_list->[$sheet_ref->{$rel_lookup->{$rel_ID}}->{sheet_position}] = $rel_lookup->{$rel_ID};
903             $found_member_names = 1;
904             }else{
905             $pivot_lookup->{$rel_ID} = $target;
906             }
907             }
908             }
909             ###LogSD $phone->talk( level => 'debug', message => [
910             ###LogSD "Worksheet lookup:", $sheet_ref,
911             ###LogSD "Pivot lookup:", $pivot_lookup ] );
912             if( !$found_member_names ){
913             $self->set_error( "Couldn't find any zip member (file) names for the sheets" );
914             return ( 0, undef );
915             }
916             map{ $self->_add_worksheet( $_ ) if $_ } @$worksheet_list if $worksheet_list;
917             map{ $self->_add_chartsheet( $_ ) if $_ } @$chartsheet_list if $chartsheet_list;
918             $self->_set_sheet_lookup( $sheet_ref );
919             return ( 1, $pivot_lookup );
920             }
921              
922             sub _load_doc_props{
923             my( $self, $dom ) = @_;
924             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
925             ###LogSD $self->get_all_space . '::hidden::_load_doc_props', );
926             ###LogSD $phone->talk( level => 'debug', message => [
927             ###LogSD "Collecting data from the doc props file", ] );
928             $self->_set_creator( ($dom->getElementsByTagName( 'dc:creator' ))[0]->textContent() );
929             $self->_set_modified_by( ($dom->getElementsByTagName( 'cp:lastModifiedBy' ))[0]->textContent() );
930             $self->_set_date_created(
931             ($dom->getElementsByTagName( 'dcterms:created' ))[0]->textContent()
932             );
933             $self->_set_date_modified(
934             ($dom->getElementsByTagName( 'dcterms:modified' ))[0]->textContent()
935             );
936             ###LogSD $phone->talk( level => 'trace', message => [ "Current object:", $self ] );
937             }
938              
939             sub _set_shared_worksheet_files{
940             my( $self, $object_ref, $zip_workbook ) = @_;
941             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
942             ###LogSD $self->get_all_space . '::hidden::_set_shared_worksheet_files', );
943             ###LogSD $phone->talk( level => 'debug', message => [
944             ###LogSD "Building the shared worksheet files with the lookup ref:", $object_ref, ] );
945             my $translation_method = $object_ref->{build_method};
946             for my $file ( keys %$object_ref ){
947            
948             # Build out the shared files (shared by all worksheets) only!
949             next if $file eq 'build_method' or $file eq 'worksheet' or $file eq 'chartsheet';
950            
951             ###LogSD $phone->talk( level => 'debug', message => [
952             ###LogSD "Attempting to load the file: ${file}\.xml",
953             ###LogSD "With translation method: $translation_method" ], );
954             my %args;
955             my @list = $self->$translation_method( $build_ref->{$file}, $zip_workbook );
956             ###LogSD $phone->talk( level => 'debug', message => [ $translation_method . " returned args:", @list ], );
957             if( scalar( @list ) < 2 ){
958             if( !$args{file} and !$args{dom} ){
959             $self->set_error( "Unable to load XML::LibXML with the element: $file" );
960             }
961             }else{
962             %args = @list;
963             ###LogSD $phone->talk( level => 'debug', message =>[ "Built an xml_object", ], );
964             $args{package} = $object_ref->{$file}->{package} if exists $object_ref->{$file}->{package};
965             ###LogSD $args{log_space} = $self->get_log_space;
966             $args{superclasses} = $object_ref->{$file}->{superclasses} if exists $object_ref->{$file}->{superclasses};
967             for my $attribute ( @{$object_ref->{$file}->{attributes}} ){
968             ###LogSD $phone->talk( level => 'debug', message => [
969             ###LogSD "Loading attribute: $attribute", ], );
970             my $method = 'get_' . $attribute;
971             $args{$attribute} = $self->$method;
972             }
973             my $role_ref;
974             for my $role ( @{$object_ref->{$file}->{add_roles_in_sequence}} ){
975             ###LogSD $phone->talk( level => 'debug', message => [
976             ###LogSD "collecting the role for: $role", ], );
977             my $method = 'get_' . $role;
978             push @$role_ref, $self->$method;
979             }
980             $args{add_roles_in_sequence} = $role_ref if $role_ref;
981             ###LogSD $args{log_space} = $self->get_log_space;
982             my $method = $object_ref->{$file}->{store};
983             ###LogSD $phone->talk( level => 'debug', message =>[
984             ###LogSD "Final args for building the instance:", %args,
985             ###LogSD "Loading -$method- with build_instance( 'args' )" ], );
986             my $object = build_instance( %args );################################ Add a $object build-fail flag
987             ###LogSD $phone->talk( level => 'debug', message =>[
988             ###LogSD "Finished building instance for: $file",
989             ###LogSD "Loading to the worbook with method: $method", # $object
990             ###LogSD ], );
991             $self->$method( $object );
992             ###LogSD $phone->talk( level => 'debug', message =>[
993             ###LogSD "Finished building and installing: $file", ], );
994             }
995             }
996             ###LogSD $phone->talk( level => 'debug', message => [
997             ###LogSD "All shared files that can be built are built!" ], );
998             return 1;
999             }
1000              
1001             sub _import_format_settings{
1002              
1003             my ( $self, $formatter ) = @_;
1004             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
1005             ###LogSD $self->get_all_space . '::hidden::_import_format_settings', );
1006             if( !$formatter->block_inherit ){
1007             ###LogSD $phone->talk( level => 'info', message =>[
1008             ###LogSD 'Arrived at _import_format_settings for: ', $formatter ] );
1009             if( $self->has_error_inst ){
1010             ###LogSD $phone->talk( level => 'info', message =>[
1011             ###LogSD 'Setting the global error instance to the formatter' ] );
1012             $formatter->set_error_inst( $self->get_error_inst );
1013             }
1014             my $year = $self->get_epoch_year;
1015             ###LogSD $phone->talk( level => 'info', message =>[
1016             ###LogSD "Setting the epoch year for the formatter: $year" ] );
1017             $formatter->set_epoch_year( $year );
1018             my $cache = $self->get_cache_positions;
1019             ###LogSD $phone->talk( level => 'info', message =>[
1020             ###LogSD "Setting the cache to: $cache" ] );
1021             $formatter->set_cache_behavior( $cache );
1022             }
1023             }
1024              
1025             sub DEMOLISH{
1026             my ( $self ) = @_;
1027             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
1028             ###LogSD $self->get_all_space . '::hidden::DEMOLISH', );
1029             ###LogSD $phone->talk( level => 'debug', message => [
1030             ###LogSD "Last recorded error: " . ($self->error//'none') ] );
1031             if( $self->_has_calc_chain_file ){
1032             #~ print "closing calcChain.xml\n";
1033             ###LogSD $phone->talk( level => 'debug', message => [
1034             ###LogSD "Clearing the calcChain.xml file" ] );
1035             $self->_demolish_calc_chain;
1036             }
1037             if( $self->_has_shared_strings_file ){
1038             my $instance = $self->_get_shared_strings_instance;
1039             #~ print "closing sharedStrings.xml\n";# . Dumper( $instance )
1040             ###LogSD $phone->talk( level => 'debug', message => [
1041             ###LogSD "Clearing the sharedStrings.xml file" ] );
1042             if( $instance ){
1043             $self->_demolish_shared_strings;
1044             }else{
1045             $self->_clear_shared_strings;
1046             $instance = undef;
1047             }
1048             }
1049            
1050             if( $self->_has_styles_file ){
1051             my $instance = $self->_get_styles_instance;
1052             #~ print "closing styles.xml\n";# . Dumper( $instance )
1053             ###LogSD $phone->talk( level => 'debug', message => [
1054             ###LogSD "Clearing the styles.xml file" ] );
1055             if( $instance ){
1056             $self->_demolish_styles;
1057             }else{
1058             $self->_clear_shared_strings;
1059             $instance = undef;
1060             }
1061             }
1062            
1063             if( $self->_has_zip_file_handle ){
1064             #~ print "closing zip file handle\n";
1065             ###LogSD $phone->talk( level => 'debug', message => [
1066             ###LogSD "Clearing the Zip file handle" ] );
1067             $self->_clear_zip_file_handle;
1068             }
1069            
1070             if( $self->has_file_handle ){
1071             #~ print "closing general file handle\n";
1072             ###LogSD $phone->talk( level => 'debug', message => [
1073             ###LogSD "Clearing the top level file handle" ] );
1074             $self->_clear_file_handle;
1075             }
1076             }
1077              
1078             #########1 Phinish 3#########4#########5#########6#########7#########8#########9
1079              
1080             no Moose;
1081             __PACKAGE__->meta->make_immutable;
1082            
1083             1;
1084              
1085             #########1 Documentation 3#########4#########5#########6#########7#########8#########9
1086             __END__
1087              
1088             =head1 NAME
1089              
1090             Spreadsheet::XLSX::Reader::LibXML - Read xlsx spreadsheet files with LibXML
1091              
1092             =begin html
1093              
1094             <a href="https://www.perl.org">
1095             <img src="https://img.shields.io/badge/perl-5.10+-brightgreen.svg" alt="perl version">
1096             </a>
1097              
1098             <a href="https://travis-ci.org/jandrew/Spreadsheet-XLSX-Reader-LibXML">
1099             <img alt="Build Status" src="https://travis-ci.org/jandrew/Spreadsheet-XLSX-Reader-LibXML.png?branch=master" alt='Travis Build'/>
1100             </a>
1101              
1102             <a href='https://coveralls.io/r/jandrew/Spreadsheet-XLSX-Reader-LibXML?branch=master'>
1103             <img src='https://coveralls.io/repos/jandrew/Spreadsheet-XLSX-Reader-LibXML/badge.svg?branch=master' alt='Coverage Status' />
1104             </a>
1105              
1106             <a>
1107             <img src="https://img.shields.io/badge/this version-0.38.22-brightgreen.svg" alt="this version">
1108             </a>
1109              
1110             <a href="https://metacpan.org/pod/Spreadsheet::XLSX::Reader::LibXML">
1111             <img src="https://badge.fury.io/pl/Spreadsheet-XLSX-Reader-LibXML.svg?label=cpan version" alt="CPAN version" height="20">
1112             </a>
1113              
1114             <a href='http://cpants.cpanauthors.org/dist/Spreadsheet-XLSX-Reader-LibXML'>
1115             <img src='http://cpants.cpanauthors.org/dist/Spreadsheet-XLSX-Reader-LibXML.png' alt='kwalitee' height="20"/>
1116             </a>
1117              
1118             =end html
1119              
1120             =head1 SYNOPSIS
1121              
1122             The following uses the 'TestBook.xlsx' file found in the t/test_files/ folder
1123              
1124             #!/usr/bin/env perl
1125             use strict;
1126             use warnings;
1127             use Spreadsheet::XLSX::Reader::LibXML;
1128              
1129             my $parser = Spreadsheet::XLSX::Reader::LibXML->new();
1130             my $workbook = $parser->parse( 'TestBook.xlsx' );
1131              
1132             if ( !defined $workbook ) {
1133             die $parser->error(), "\n";
1134             }
1135              
1136             for my $worksheet ( $workbook->worksheets() ) {
1137              
1138             my ( $row_min, $row_max ) = $worksheet->row_range();
1139             my ( $col_min, $col_max ) = $worksheet->col_range();
1140              
1141             for my $row ( $row_min .. $row_max ) {
1142             for my $col ( $col_min .. $col_max ) {
1143              
1144             my $cell = $worksheet->get_cell( $row, $col );
1145             next unless $cell;
1146              
1147             print "Row, Col = ($row, $col)\n";
1148             print "Value = ", $cell->value(), "\n";
1149             print "Unformatted = ", $cell->unformatted(), "\n";
1150             print "\n";
1151             }
1152             }
1153             last;# In order not to read all sheets
1154             }
1155              
1156             ###########################
1157             # SYNOPSIS Screen Output
1158             # 01: Row, Col = (0, 0)
1159             # 02: Value = Category
1160             # 03: Unformatted = Category
1161             # 04:
1162             # 05: Row, Col = (0, 1)
1163             # 06: Value = Total
1164             # 07: Unformatted = Total
1165             # 08:
1166             # 09: Row, Col = (0, 2)
1167             # 10: Value = Date
1168             # 11: Unformatted = Date
1169             # 12:
1170             # 13: Row, Col = (1, 0)
1171             # 14: Value = Red
1172             # 16: Unformatted = Red
1173             # 17:
1174             # 18: Row, Col = (1, 1)
1175             # 19: Value = 5
1176             # 20: Unformatted = 5
1177             # 21:
1178             # 22: Row, Col = (1, 2)
1179             # 23: Value = 2017-2-14 #(shows as 2/14/2017 in the sheet)
1180             # 24: Unformatted = 41318
1181             # 25:
1182             # More intermediate rows ...
1183             # 82:
1184             # 83: Row, Col = (6, 2)
1185             # 84: Value = 2016-2-6 #(shows as 2/6/2016 in the sheet)
1186             # 85: Unformatted = 40944
1187             ###########################
1188              
1189             =head1 DESCRIPTION
1190              
1191             This is another package for parsing Excel 2007+ workbooks. The goals of this package are
1192             three fold. First, as close as possible produce the same output as is visible in an
1193             excel spreadsheet with exposure to underlying settings from Excel. Second, adhere as
1194             close as is reasonable to the L<Spreadsheet::ParseExcel> API (where it doesn't conflict
1195             with the first objective) so that less work would be needed to integrate ParseExcel and
1196             this package. An addendum to the second goal is this package will not expose elements of
1197             the object hash for use by the consuming program. This package will either return an
1198             unblessed hash with the equivalent elements to the Spreadsheet::ParseExcel output instead
1199             of a class instance or it will provide methods to provide these sets of data. The third
1200             goal is to provide an XLSX sheet parser that is built on L<XML::LibXML>. The other two
1201             primary options for XLSX parsing on CPAN use either a one-off XML parser (L<Spreadsheet::XLSX>)
1202             or L<XML::Twig> (L<Spreadsheet::ParseXLSX>). In general if either of them already work for
1203             you without issue then there is no reason to change to this package. I personally found
1204             some bugs and functionality boundaries in both that I wanted to improve, and by the time
1205             I had educated myself enough to make improvement suggestions including root causing the
1206             bugs to either the XML parser or the reader logic I had written this.
1207              
1208             In the process of learning and building I also wrote some additional features for
1209             this parser that are not found in the L<Spreadsheet::ParseExcel> package. For instance
1210             in the L<SYNOPSIS|/SYNOPSIS> the '$parser' and the '$workbook' are actually the same
1211             class. You could combine both steps by calling ->new with the 'file_name' (or
1212             'file_handle') attribute called out. Afterward it is still possible to call ->error on
1213             the instance. The test in that case for load success would be
1214             $instance->has_file_name(handle) Another improvement (from my perspective) is date
1215             handling. This package allows for a simple pluggable custom output format that is very
1216             flexible as well as handling dates older than 1-January-1900. I leveraged coercions from
1217             L<Type::Tiny|Type::Tiny::Manual> to do this but anything that follows that general format
1218             will work here. Additionally, this is a L<Moose> based package. As such it is designed
1219             to be (fairly) extensible by writing roles
1220             and adding them to this package rather than requiring that you extend the package to some
1221             new branch. Read the full documentation for all opportunities!
1222              
1223             In the realm of extensibility, L<XML::LibXML> has multiple ways to read an XML file but
1224             this release only has an L<XML::LibXML::Reader> parser option. Future iterations could
1225             include a DOM parser option. Additionally this package does not (yet) provide the same
1226             access to the formatting elements provided in L<Spreadsheet::ParseExcel>. That is on the
1227             longish and incomplete TODO list.
1228              
1229             The package operates on the workbook with three primary tiers of classes. All other
1230             classes in this package are for architectual extensibility.
1231              
1232             =over
1233              
1234             ---> L<Workbook level|Spreadsheet::XLSX::Reader::LibXML>
1235              
1236             =over
1237              
1238             ---> L<Worksheet level|Spreadsheet::XLSX::Reader::LibXML::Worksheet>*
1239              
1240             =over
1241              
1242             ---> L<Cell level|Spreadsheet::XLSX::Reader::LibXML::Cell>* -
1243             L<optional|/group_return_type>
1244              
1245             =back
1246              
1247             =back
1248              
1249             =back
1250              
1251             =head2 Warnings
1252              
1253             B<1.> Archive-Zip versions greater than 1.30 appear to be broken. This package requires
1254             Archive::Zip so I reccomend Archive-Zip-1.30.
1255              
1256             B<2.> This package requires that you can load L<XML::LibXML> which requires the L<libxml2
1257             |http://xmlsoft.org/> and 'libxml2-devel' libraries. I have included L<Alien::LibXML> in
1258             the build profile in an attempt to resolve any library issues but being new to usage of
1259             Alien libraries in general I'm not certain I got it quite right. Many OS's have these
1260             libraries installed as part of their core but if this package fails to load please log an
1261             issue in my repo on L<github|/SUPPORT>. On the other hand the correct libraries are
1262             loading on travis-ci during the builds so if no issue is logged before then I will B<remove
1263             this warning on 2/1/2016.>
1264              
1265             B<3.> Not all workbook sheets (tabs) are created equal! Some Excel sheet tabs are only a
1266             chart. These tabs are 'chartsheets'. The methods with 'worksheet' in the name only act on
1267             the sub set of tabs that are worksheets. Future methods with 'chartsheet' in the name will
1268             focus on the subset of sheets that are chartsheets. Methods with just 'sheet' in the name
1269             have the potential to act on both. The documentation for the chartsheet level class is found
1270             in L<Spreadsheet::XLSX::Reader::LibXML::Chartsheet> (still under construction). All chartsheet
1271             classes do not provide access to cells.
1272              
1273             B<4.> L<HMBRAND|https://metacpan.org/author/HMBRAND> pointed out that the formatter portion of
1274             this package for versions older than v0.38 do not follow the L<Spreadsheet::ParseExcel API
1275             |Spreadsheet::ParseExcel/Formatter-Class> for the formatter class. (I always welcome feeback)
1276             I suppose the original implementation was, in part, laziness. In an effort to comply with goal
1277             #2 of this package I have updated the API so that in versions starting with v0.38 the formatter
1278             is a stand-alone class. For details of the implemenation see
1279             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/CLASS DESCRIPTION> This more closely follows
1280             Spreadsheet::ParseExcel, and incidentally probably makes building alternate formatting modules
1281             easier. I<The formatters will still not exchange back and forth between
1282             L<Spreadsheet::ParseExcel::FmtDefault> and back since they are both built to interface with
1283             fundamentally different architecture.> This change also affects how the role
1284             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings> is consumed. If
1285             you wrote your own formatter for this package for the old way I would be willing
1286             to provide troubleshooting support for the transition to the the new API. However if you are
1287             setting specific formats today using set_defined_excel_format_list you should be able to switch to
1288             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/set_defined_excel_formats( %args )> or use the
1289             attribute L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/defined_excel_translations>. B<This warning
1290             will be removed on 2/1/2016.>
1291              
1292             B<5.> This package now supports reading xlsm files (Macro enabled Excel 2007+ workbooks).
1293             xlsm files allow for binaries to be embedded that may contain malicious code. However, other
1294             than unzipping the excel file no work is done by this package with the sub-file 'vbaProject.bin'
1295             containing the binaries. This update does not provide an API to that sub-file and I have no
1296             intention of doing so. Therefore my research indicates there should be no risk of virus activation
1297             while parsing even an infected xlsm file with this package but I encourage you to use your own
1298             judgement in this area.
1299              
1300             B<6.> This package will read some files with 'broken' xml. In general this should be
1301             transparent but in the case of the maximum row value and the maximum column value for a
1302             worksheet it can cause some surprising problems. For instance the answer to the methods
1303             L<Spreadsheet::XLSX::Reader::LibXML::Worksheet/row_range> and
1304             L<Spreadsheet::XLSX::Reader::LibXML::Worksheet/col_range> can change as more of the sheet is
1305             parsed. This includes the possibility that the maximum values are initially listed as 'undef'
1306             if the sheet does not provide them in the metadata. The sheet will improve these values as cells
1307             are read and more definitive information is available based on the dimensional scope of the users
1308             cell parsing. The primary cause of these broken XML elements are non-XML applications writing to
1309             the excel spreadsheet. You can use the attribute L<file_boundary_flags|/file_boundary_flags> or
1310             the methods L<Spreadsheet::XLSX::Reader::LibXML::Worksheet/get_next_value> or
1311             L<Spreadsheet::XLSX::Reader::LibXML::Worksheet/fetchrow_arrayref> to overcome the missing
1312             metadata.
1313              
1314             =head2 Attributes
1315              
1316             Data passed to new when creating an instance. For modification of these attributes see the
1317             listed 'attribute methods'. For general information on attributes see
1318             L<Moose::Manual::Attributes>. For ways to manage the workbook when opened see the
1319             L<Primary Methods|/Primary Methods>. For additional lesser used workbook options
1320             see L<Secondary Methods|/Secondary Methods>.
1321              
1322             B<Example>
1323              
1324             $workbook_instance = Spreadsheet::XLSX::Reader::LibXML->new( %attributes )
1325              
1326             I<note: if a file name or file handle for an .xlsx file are not included in the initial
1327             %attributes then one of them must be set by one of the attribute setter methods below
1328             before the rest of the package can be used.>
1329              
1330             =head3 file_name
1331              
1332             =over
1333              
1334             B<Definition:> This attribute holds the full file name and path for the xlsx|xlsm file to be
1335             parsed.
1336              
1337             B<Default> no default - either this or a L<file handle|/file_handle> must be provided to
1338             read a file
1339              
1340             B<Range> any unencrypted xlsx|xlsm file that can be opened in Microsoft Excel.
1341              
1342             B<attribute methods> Methods provided to adjust this attribute
1343            
1344             =over
1345              
1346             B<set_file_name>
1347              
1348             =over
1349              
1350             B<Definition:> change the file name value in the attribute (this will reboot
1351             the workbook instance)
1352              
1353             =back
1354              
1355             B<has_file_name>
1356              
1357             =over
1358              
1359             B<Definition:> this is used to see if the workbook loaded correctly using the
1360             file_name option to open an Excel .xlsx file.
1361              
1362             =back
1363              
1364             =back
1365              
1366             =back
1367              
1368             =head3 file_handle
1369              
1370             =over
1371              
1372             B<Definition:> This attribute holds a copy of the passed file handle reference.
1373              
1374             B<Default> no default - either this or a L<file name|/file_name> must be provided to read
1375             a file
1376              
1377             B<Range> any unencrypted xlsx file handle that can be opened in Microsoft Excel
1378              
1379             B<attribute methods> Methods provided to adjust this attribute
1380            
1381             =over
1382              
1383             B<set_file_handle>
1384              
1385             =over
1386              
1387             B<Definition:> change the set file handle (this will reboot the workbook instance)
1388              
1389             =back
1390              
1391             B<has_file_handle>
1392              
1393             =over
1394              
1395             B<Definition:> this is used to see if the workbook loaded correctly when using the
1396             file_handle option to open an Excel .xlsx file.
1397              
1398             =back
1399              
1400             =back
1401              
1402             =back
1403              
1404             =head3 error_inst
1405              
1406             =over
1407              
1408             B<Definition:> This attribute holds an 'error' object instance. It should have several
1409             methods for managing errors. Currently no error codes or error language translation
1410             options are available but this should make implementation of that easier.
1411              
1412             B<Default:> a L<Spreadsheet::XLSX::Reader::LibXML::Error> instance with the attributes set
1413             as;
1414            
1415             ( should_warn => 0 )
1416              
1417             B<Range:> The minimum list of methods to implement for your own instance is;
1418              
1419             error set_error clear_error set_warnings if_warn
1420            
1421             The error instance must be able to extract the error string from a passed error
1422             object as well. For now the current implementation will attempt ->as_string first
1423             and then ->message if an object is passed.
1424              
1425             B<attribute methods> Methods provided to adjust this attribute
1426              
1427             =over
1428              
1429             B<get_error_inst>
1430              
1431             =over
1432              
1433             B<Definition:> returns this instance
1434              
1435             =back
1436              
1437             B<error>
1438              
1439             =over
1440              
1441             B<Definition:> delegated method from the class used to get the most recently
1442             logged error string
1443              
1444             =back
1445              
1446             B<set_error>
1447              
1448             =over
1449              
1450             B<Definition:> delegated method from the class used to set a new error string
1451             (or pass an error object for extraction of the error string)
1452              
1453             =back
1454              
1455             B<clear_error>
1456              
1457             =over
1458              
1459             B<Definition:> delegated method from the class used to clear the current error
1460             string
1461              
1462             =back
1463              
1464             B<set_warnings>
1465              
1466             =over
1467              
1468             B<Definition:> delegated method from the class used to turn on or off real time
1469             warnings when errors are set
1470              
1471             =back
1472              
1473             B<if_warn>
1474              
1475             =over
1476              
1477             B<Definition:> delegated method from the class used to extend this package and
1478             see if warnings should be emitted.
1479              
1480             =back
1481              
1482             B<should_spew_longmess>
1483              
1484             =over
1485              
1486             B<Definition:> delegated method from the class used to turn on or off the L<Carp>
1487             'longmess'for error messages
1488              
1489             =back
1490              
1491             B<spewing_longmess>
1492              
1493             =over
1494              
1495             B<Definition:> delegated method from the class used to understand the current state
1496             the longmess concatenation for error messages
1497              
1498             =back
1499            
1500             =back
1501              
1502             =back
1503              
1504             =head3 sheet_parser
1505              
1506             =over
1507              
1508             B<Definition:> This sets the way the .xlsx file is parsed. For now the only
1509             choice is 'reader'.
1510              
1511             B<Default> 'reader'
1512              
1513             B<Range> 'reader'
1514              
1515             B<attribute methods> Methods provided to adjust this attribute
1516            
1517             =over
1518              
1519             B<set_parser_type>
1520              
1521             =over
1522              
1523             B<Definition:> the way to change the parser type
1524              
1525             =back
1526              
1527             B<get_parser_type>
1528              
1529             =over
1530              
1531             B<Definition:> returns the currently set parser type
1532              
1533             =back
1534              
1535             =back
1536              
1537             =back
1538              
1539             =head3 count_from_zero
1540              
1541             =over
1542              
1543             B<Definition:> Excel spreadsheets count from 1. L<Spreadsheet::ParseExcel>
1544             counts from zero. This allows you to choose either way.
1545              
1546             B<Default> 1
1547              
1548             B<Range> 1 = counting from zero like Spreadsheet::ParseExcel,
1549             0 = Counting from 1 like Excel
1550              
1551             B<attribute methods> Methods provided to adjust this attribute
1552            
1553             =over
1554              
1555             B<counting_from_zero>
1556              
1557             =over
1558              
1559             B<Definition:> a way to check the current attribute setting
1560              
1561             =back
1562              
1563             B<set_count_from_zero>
1564              
1565             =over
1566              
1567             B<Definition:> a way to change the current attribute setting
1568              
1569             =back
1570              
1571             =back
1572              
1573             =back
1574              
1575             =head3 file_boundary_flags
1576              
1577             =over
1578              
1579             B<Definition:> When you request data to the right of the last column or below
1580             the last row of the data this package can return 'EOR' or 'EOF' to indicate that
1581             state. This is especially helpful in 'while' loops. The other option is to
1582             return 'undef'. This is problematic if some cells in your table are empty which
1583             also returns undef. What is determined to be the last column and row is determined
1584             by the attribute L<empty_is_end|/empty_is_end>.
1585              
1586             B<Default> 1
1587              
1588             B<Range> 1 = return 'EOR' or 'EOF' flags as appropriate, 0 = return undef when
1589             requesting a position that is out of bounds
1590              
1591             B<attribute methods> Methods provided to adjust this attribute
1592            
1593             =over
1594              
1595             B<boundary_flag_setting>
1596              
1597             =over
1598              
1599             B<Definition:> a way to check the current attribute setting
1600              
1601             =back
1602              
1603             B<change_boundary_flag>
1604              
1605             =over
1606              
1607             B<Definition:> a way to change the current attribute setting
1608              
1609             =back
1610              
1611             =back
1612              
1613             =back
1614              
1615             =head3 empty_is_end
1616              
1617             =over
1618              
1619             B<Definition:> The excel convention is to read the table left to right and top
1620             to bottom. Some tables have an uneven number of columns with real data from row
1621             to row. This allows the several methods that excersize a 'next' function to wrap
1622             after the last element with data rather than going to the max column. This also
1623             triggers 'EOR' flags after the last data element and before the sheet max column
1624             when not implementing 'next' functionality.
1625              
1626             B<Default> 0
1627              
1628             B<Range> 1 = treat all columns short of the max column for the sheet as being in
1629             the table, 0 = end each row after the last cell with data rather than going to the
1630             max sheet column
1631              
1632             B<attribute methods> Methods provided to adjust this attribute
1633            
1634             =over
1635              
1636             B<is_empty_the_end>
1637              
1638             =over
1639              
1640             B<Definition:> a way to check the current attribute setting
1641              
1642             =back
1643              
1644             B<set_empty_is_end>
1645              
1646             =over
1647              
1648             B<Definition:> a way to set the current attribute setting
1649              
1650             =back
1651              
1652             =back
1653              
1654             =back
1655              
1656             =head3 values_only
1657              
1658             =over
1659              
1660             B<Definition:> Excel will store information about a cell even if it only contains
1661             formatting data. In many cases you only want to see cells that actually have
1662             values. This attribute will change the package behaviour regarding cells that have
1663             formatting stored against that cell but no actual value.
1664              
1665             B<Default> 0
1666              
1667             B<Range> 1 = skip cells with formatting only and treat them as completely empty,
1668             0 = return informat about cells that only contain formatting
1669              
1670             B<attribute methods> Methods provided to adjust this attribute
1671            
1672             =over
1673              
1674             B<get_values_only>
1675              
1676             =over
1677              
1678             B<Definition:> a way to check the current attribute setting
1679              
1680             =back
1681              
1682             B<set_values_only>
1683              
1684             =over
1685              
1686             B<Definition:> a way to set the current attribute setting
1687              
1688             =back
1689              
1690             =back
1691              
1692             =back
1693              
1694             =head3 from_the_edge
1695              
1696             =over
1697              
1698             B<Definition:> Some data tables start in the top left corner. Others do not. I
1699             don't reccomend that practice but when aquiring data in the wild it is often good
1700             to adapt. This attribute sets whether the file reads from the top left edge or from
1701             the top row with data and starting from the leftmost column with data.
1702              
1703             B<Default> 1
1704              
1705             B<Range> 1 = treat the top left corner of the sheet as the beginning of rows and
1706             columns even if there is no data in the top row or leftmost column, 0 = Set the
1707             minimum row and minimum columns to be the first row and first column with data
1708              
1709             B<attribute methods> Methods provided to adjust this attribute
1710            
1711             =over
1712              
1713             B<set_from_the_edge>
1714              
1715             =over
1716              
1717             B<Definition:> a way to set the current attribute setting
1718              
1719             =back
1720              
1721             =back
1722              
1723             =back
1724              
1725             =head3 cache_positions
1726              
1727             =over
1728              
1729             B<Definition:> This parse can be slow. It does this by trading processing and
1730             file storage for RAM usage but that is probably not the average users choice. Not all
1731             things that can be cached are cached yet. However, when this attribute is set where
1732             the parser knows how to cache it will.
1733              
1734             B<Default> 1 = caching is turned on
1735              
1736             B<attribute methods> Methods provided to adjust this attribute
1737            
1738             =over
1739              
1740             B<get_cache_positions>
1741              
1742             =over
1743              
1744             B<Definition:> read the attribute
1745              
1746             =back
1747              
1748             =back
1749              
1750             =back
1751              
1752             =head3 format_inst
1753              
1754             =over
1755              
1756             B<Definition:> This is the attribute containing the format class. In general the
1757             default value is sufficient. However, If you want to tweak this a bit then review the
1758             L<class documentation|Spreadsheet::XLSX::Reader::LibXML::FmtDefault>. It does include
1759             a role that interprets the excel L<format string
1760             |https://support.office.com/en-us/article/Create-or-delete-a-custom-number-format-2d450d95-2630-43b8-bf06-ccee7cbe6864?ui=en-US&rs=en-US&ad=US>
1761             into a L<Type::Tiny> coercion.
1762              
1763             B<Default> L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault>->new
1764              
1765             B<attribute methods> Methods provided to adjust this attribute
1766            
1767             =over
1768              
1769             B<set_format_inst>
1770              
1771             =over
1772              
1773             B<Definition:> a way to set the current attribute instance
1774              
1775             =back
1776              
1777             B<get_format_inst>
1778              
1779             =over
1780              
1781             B<Definition:> a way to get the current attribute setting
1782              
1783             =back
1784              
1785             =back
1786              
1787             B<delegated methods:>
1788              
1789             =over
1790              
1791             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/get_defined_excel_format>
1792              
1793             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/change_output_encoding>
1794              
1795             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/parse_excel_format_string>
1796              
1797             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings//set_date_behavior>
1798              
1799             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings//get_date_behavior>
1800              
1801             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings//set_european_first>
1802              
1803             =back
1804              
1805             =back
1806              
1807             =head3 group_return_type
1808              
1809             =over
1810              
1811             B<Definition:> Traditionally ParseExcel returns a cell object with lots of methods
1812             to reveal information about the cell. In reality the extra information is not used very
1813             much (witness the popularity of L<Spreadsheet::XLSX>). Because many users don't need or
1814             want the extra cell formatting information it is possible to get either the raw xml value,
1815             the raw visible cell value (seen in the Excel format bar), or the formatted cell value
1816             returned either the way the Excel file specified or the way you specify instead of a Cell
1817             instance with all the data. . See
1818             L<Spreadsheet::XLSX::Reader::LibXML::Worksheet/custom_formats> to insert custom targeted
1819             formats for use with the parser. All empty cells return undef no matter what.
1820              
1821             B<Default> instance
1822              
1823             B<Range> instance = returns a populated L<Spreadsheet::XLSX::Reader::LibXML::Cell> instance,
1824             unformatted = returns just the raw visible value of the cell shown in the Excel formula bar,
1825             value = returns just the formatted value stored in the excel cell, xml_value = the raw value
1826             for the cell as stored in the sub-xml files
1827              
1828             B<attribute methods> Methods provided to adjust this attribute
1829            
1830             =over
1831              
1832             B<get_group_return_type>
1833              
1834             =over
1835              
1836             B<Definition:> a way to check the current attribute setting
1837              
1838             =back
1839              
1840             B<set_group_return_type>
1841              
1842             =over
1843              
1844             B<Definition:> a way to set the current attribute setting
1845              
1846             =back
1847              
1848             =back
1849              
1850             =back
1851              
1852             =head3 empty_return_type
1853              
1854             =over
1855              
1856             B<Definition:> Traditionally L<Spreadsheet::ParseExcel> returns an empty string for cells
1857             with unique formatting but no stored value. It may be that the more accurate way of returning
1858             undef works better for you. This will turn that behaviour on. I<If Excel stores an empty
1859             string having this attribute set to 'undef_string' will still return the empty string!>
1860              
1861             B<Default> empty_string
1862              
1863             B<Range>
1864             empty_string = populates the unformatted value with '' even if it is set to undef
1865             undef_string = if excel stores undef for an unformatted value it will return undef
1866              
1867             B<attribute methods> Methods provided to adjust this attribute
1868            
1869             =over
1870              
1871             B<get_empty_return_type>
1872              
1873             =over
1874              
1875             B<Definition:> a way to check the current attribute setting
1876              
1877             =back
1878              
1879             B<set_empty_return_type>
1880              
1881             =over
1882              
1883             B<Definition:> a way to set the current attribute setting
1884              
1885             =back
1886              
1887             =back
1888              
1889             =back
1890              
1891             =head2 Primary Methods
1892              
1893             These are the primary ways to use this class. They can be used to open an .xlsx workbook.
1894             They are also ways to investigate information at the workbook level. For information on
1895             how to retrieve data from the worksheets see the
1896             L<Worksheet|Spreadsheet::XLSX::Reader::LibXML::Worksheet> and
1897             L<Cell|Spreadsheet::XLSX::Reader::LibXML::Cell> documentation. For additional workbook
1898             options see the L<Secondary Methods|/Secondary Methods>
1899             and the L<Attributes|/Attributes> sections. The attributes section specifically contains
1900             all the methods used to adjust the attributes of this class.
1901              
1902             All methods are object methods and should be implemented on the object instance.
1903              
1904             B<Example:>
1905              
1906             my @worksheet_array = $workbook_instance->worksheets;
1907              
1908             =head3 parse( $file_name|$file_handle, $formatter )
1909              
1910             =over
1911              
1912             B<Definition:> This is a convenience method to match L<Spreadsheet::ParseExcel/parse($filename, $formatter)>.
1913             It only works if the L<file_name|/file_name> or L<file_handle|/file_handle> attribute was not
1914             set with ->new. It is one way to set the 'file_name' or 'file_handle' attribute [and the
1915             L<default_format_list|/default_format_list> attribute]. I<You cannot pass both a file name
1916             and a file handle simultaneously to this method.>
1917              
1918             B<Accepts:>
1919              
1920             $file = a valid xlsx file [or a valid xlsx file handle] (required)
1921             [$formatter] = see the default_format_list attribute for valid options (optional)
1922              
1923             B<Returns:> itself when passing with the xlsx file loaded to the workbook level or
1924             undef for failure.
1925              
1926             =back
1927              
1928             =head3 worksheets
1929              
1930             =over
1931              
1932             B<Definition:> This method will return an array (I<not an array reference>)
1933             containing a list of references to all worksheets in the workbook. This is not
1934             a reccomended method. It is provided for compatibility to Spreadsheet::ParseExcel.
1935             For alternatives see the L<get_worksheet_names|/get_worksheet_names> method and the
1936             L<worksheet|/worksheet( $name )> methods. B<For now it also only returns the tabular
1937             worksheets in the workbook. All chart worksheets are ignored! (future inclusion will
1938             included a backwards compatibility policy)>
1939              
1940             B<Accepts:> nothing
1941              
1942             B<Returns:> an array ref of L<Worksheet|Spreadsheet::XLSX::Reader::LibXML::Worksheet>
1943             objects for all worksheets in the workbook.
1944              
1945             =back
1946              
1947             =head3 worksheet( $name )
1948              
1949             =over
1950              
1951             B<Definition:> This method will return an object to read values in the worksheet.
1952             If no value is passed to $name then the 'next' worksheet in physical order is
1953             returned. I<'next' will NOT wrap> It also only iterates through the 'worksheets'
1954             in the workbook (but not the 'chartsheets').
1955              
1956             B<Accepts:> the $name string representing the name of the worksheet object you
1957             want to open. This name is the word visible on the tab when opening the spreadsheet
1958             in Excel. (not the underlying zip member file name - which can be different. It will
1959             not accept chart tab names.)
1960              
1961             B<Returns:> a L<Worksheet|Spreadsheet::XLSX::Reader::LibXML::Worksheet> object with the
1962             ability to read the worksheet of that name. It returns undef and sets the error attribute
1963             if a 'chartsheet' is requested. Or in 'next' mode it returns undef if past the last sheet.
1964              
1965             B<Example:> using the implied 'next' worksheet;
1966              
1967             while( my $worksheet = $workbook->worksheet ){
1968             print "Reading: " . $worksheet->name . "\n";
1969             # get the data needed from this worksheet
1970             }
1971              
1972             =back
1973              
1974             =head3 in_the_list
1975              
1976             =over
1977              
1978             B<Definition:> This is a predicate method that indicates if the 'next'
1979             L<worksheet|/worksheet( $name )> function has been implemented at least once.
1980              
1981             B<Accepts:>nothing
1982              
1983             B<Returns:> true = 1, false = 0
1984             once
1985              
1986             =back
1987              
1988             =head3 start_at_the_beginning
1989              
1990             =over
1991              
1992             B<Definition:> This restarts the 'next' worksheet at the first worksheet. This
1993             method is only useful in the context of the L<worksheet|/worksheet( $name )>
1994             function.
1995              
1996             B<Accepts:> nothing
1997              
1998             B<Returns:> nothing
1999              
2000             =back
2001              
2002             =head3 worksheet_count
2003              
2004             =over
2005              
2006             B<Definition:> This method returns the count of worksheets (excluding charts) in
2007             the workbook.
2008              
2009             B<Accepts:>nothing
2010              
2011             B<Returns:> an integer
2012              
2013             =back
2014              
2015             =head3 get_worksheet_names
2016              
2017             =over
2018              
2019             B<Definition:> This method returns an array ref of all the worksheet names in the
2020             workbook. (It excludes chartsheets.)
2021              
2022             B<Accepts:> nothing
2023              
2024             B<Returns:> an array ref
2025              
2026             B<Example:> Another way to parse a workbook without building all the sheets at
2027             once is;
2028              
2029             for $sheet_name ( @{$workbook->worksheet_names} ){
2030             my $worksheet = $workbook->worksheet( $sheet_name );
2031             # Read the worksheet here
2032             }
2033              
2034             =back
2035              
2036             =head3 get_sheet_names
2037              
2038             =over
2039              
2040             B<Definition:> This method returns an array ref of all the sheet names (tabs) in the
2041             workbook. (It includes chartsheets.)
2042              
2043             B<Accepts:> nothing
2044              
2045             B<Returns:> an array ref
2046              
2047             =back
2048              
2049             =head3 get_chartheet_names
2050              
2051             =over
2052              
2053             B<Definition:> This method returns an array ref of all the chartsheet names in the
2054             workbook. (It excludes worksheets.)
2055              
2056             B<Accepts:> nothing
2057              
2058             B<Returns:> an array ref
2059              
2060             =back
2061              
2062             =head3 sheet_name( $Int )
2063              
2064             =over
2065              
2066             B<Definition:> This method returns the sheet name for a given physical position
2067             in the workbook from left to right. It counts from zero even if the workbook is in
2068             'count_from_one' mode. B(It will return chart names but chart tab names cannot currently
2069             be converted to worksheets). You may actually want L<worksheet_name|worksheet_name( $Int )>
2070             instead of this function.
2071              
2072             B<Accepts:> integers
2073              
2074             B<Returns:> the sheet name (both workbook and worksheet)
2075              
2076             B<Example:> To return only worksheet positions 2 through 4
2077              
2078             for $x (2..4){
2079             my $worksheet = $workbook->worksheet( $workbook->worksheet_name( $x ) );
2080             # Read the worksheet here
2081             }
2082              
2083             =back
2084              
2085             =head3 sheet_count
2086              
2087             =over
2088              
2089             B<Definition:> This method returns the count of all sheets in the workbook (worksheets
2090             and chartsheets).
2091              
2092             B<Accepts:> nothing
2093              
2094             B<Returns:> a count of all sheets
2095              
2096             =back
2097              
2098             =head3 worksheet_name( $Int )
2099              
2100             =over
2101              
2102             B<Definition:> This method returns the worksheet name for a given order in the workbook
2103             from left to right. It does not count any 'chartsheet' positions as valid. It counts
2104             from zero even if the workbook is in 'count_from_one' mode.
2105              
2106             B<Accepts:> integers
2107              
2108             B<Returns:> the worksheet name
2109              
2110             B<Example:> To return only worksheet positions 2 through 4 and then parse them
2111              
2112             for $x (2..4){
2113             my $worksheet = $workbook->worksheet( $workbook->worksheet_name( $x ) );
2114             # Read the worksheet here
2115             }
2116              
2117             =back
2118              
2119             =head3 worksheet_count
2120              
2121             =over
2122              
2123             B<Definition:> This method returns the count of all worksheets in the workbook (not
2124             including chartsheets).
2125              
2126             B<Accepts:> nothing
2127              
2128             B<Returns:> a count of all worksheets
2129              
2130             =back
2131              
2132             =head3 chartsheet_name( $Int )
2133              
2134             =over
2135              
2136             B<Definition:> This method returns the chartsheet name for a given order in the workbook
2137             from left to right. It does not count any 'worksheet' positions as valid. It counts
2138             from zero even if the workbook is in 'count_from_one' mode.
2139              
2140             B<Accepts:> integers
2141              
2142             B<Returns:> the chartsheet name
2143              
2144             =back
2145              
2146             =head3 chartsheet_count
2147              
2148             =over
2149              
2150             B<Definition:> This method returns the count of all chartsheets in the workbook (not
2151             including worksheets).
2152              
2153             B<Accepts:> nothing
2154              
2155             B<Returns:> a count of all chartsheets
2156              
2157             =back
2158              
2159             =head3 error
2160              
2161             =over
2162              
2163             B<Definition:> This returns the most recent error message logged by the package. This
2164             method is mostly relevant when an unexpected result is returned by some other method.
2165              
2166             B<Accepts:>nothing
2167              
2168             B<Returns:> an error string.
2169              
2170             =back
2171              
2172             =head2 Secondary Methods
2173              
2174             These are the additional methods that include ways to extract additional information about
2175             the .xlsx file and ways to modify workbook and worksheet parsing that are less common.
2176             Note that all methods specifically used to adjust workbook level attributes are listed in
2177             the L<Attribute|/Attribute> section. This section primarily contains methods for or
2178             L<delegated|Moose::Manual::Delegation> from private attributes set up during the workbook
2179             load process.
2180              
2181             =head3 parse_excel_format_string( $format_string )
2182              
2183             =over
2184              
2185             Roundabout delegation from
2186             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/parse_excel_format_string( $string )>
2187              
2188             =back
2189              
2190             =head3 creator
2191              
2192             =over
2193              
2194             B<Definition:> Retrieve the stored creator string from the Excel file.
2195              
2196             B<Accepts> nothing
2197              
2198             B<Returns> A string
2199              
2200             =back
2201              
2202             =head3 date_created
2203              
2204             =over
2205              
2206             B<Definition:> returns the date the file was created
2207              
2208             B<Accepts> nothing
2209              
2210             B<Returns> A string
2211              
2212             =back
2213              
2214             =head3 modified_by
2215              
2216             =over
2217              
2218             B<Definition:> returns the user name of the person who last modified the file
2219              
2220             B<Accepts> nothing
2221              
2222             B<Returns> A string
2223              
2224             =back
2225              
2226             =head3 date_modified
2227              
2228             =over
2229              
2230             B<Definition:> returns the date when the file was last modified
2231              
2232             B<Accepts> nothing
2233              
2234             B<Returns> A string
2235              
2236             =back
2237              
2238             =head3 get_epoch_year
2239              
2240             =over
2241              
2242             B<Definition:> This returns the epoch year defined by the Excel workbook.
2243              
2244             B<Accepts:> nothing
2245              
2246             B<Returns:> 1900 = Windows Excel or 1904 = Apple Excel
2247              
2248             =back
2249              
2250             =head3 get_shared_string_position
2251              
2252             =over
2253              
2254             Roundabout delegation from
2255             L<Spreadsheet::XLSX::Reader::LibXML::SharedStrings/get_shared_string_position( $position )>
2256              
2257             =back
2258              
2259             =head3 get_format_position
2260              
2261             =over
2262              
2263             Roundabout delegation from
2264             L<Spreadsheet::XLSX::Reader::LibXML::Styles/get_format_position( $position, [$header] )>
2265              
2266             =back
2267              
2268             =head3 set_defined_excel_format_list
2269              
2270             =over
2271              
2272             Roundabout delegation from
2273             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/set_defined_excel_format_list>
2274              
2275             =back
2276              
2277             =head3 change_output_encoding
2278              
2279             =over
2280              
2281             Roundabout delegation from
2282             L<Spreadsheet::XLSX::Reader::LibXML::FmtDefault/change_output_encoding( $string )>
2283              
2284             =back
2285              
2286             =head3 set_cache_behavior
2287              
2288             =over
2289              
2290             Roundabout delegation from
2291             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/cache_formats>
2292              
2293             =back
2294              
2295             =head3 get_date_behavior
2296              
2297             =over
2298              
2299             Roundabout delegation from
2300             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/datetime_dates>
2301              
2302             =back
2303              
2304             =head3 set_date_behavior
2305              
2306             =over
2307              
2308             Roundabout delegation from
2309             L<Spreadsheet::XLSX::Reader::LibXML::ParseExcelFormatStrings/datetime_dates>
2310              
2311             =back
2312              
2313             =head1 FLAGS
2314              
2315             The parameter list (attributes) that are possible to pass to ->new is somewhat long.
2316             Therefore you may want a shortcut that aggregates some set of attribute settings that
2317             are not the defaults but wind up being boilerplate. I have provided possible
2318             alternate sets like this and am open to providing others that are suggested. The
2319             flags will have a : in front of the identifier and will be passed to the class in the
2320             'use' statement for consumption by the import method. The flags can be stacked and
2321             where there is conflict between the flag settings the rightmost passed flag setting is
2322             used.
2323              
2324             Example;
2325              
2326             use Spreadsheet::XLSX::Reader::LibXML v0.34.4 qw( :alt_default :debug );
2327              
2328             =head2 :alt_default
2329              
2330             This is intended for a deep look at data and skip formatting cells.
2331              
2332             =over
2333              
2334             B<Default attribute differences>
2335              
2336             =over
2337              
2338             L<values_only|/values_only> => 1
2339              
2340             L<count_from_zero|/count_from_zero> => 0
2341              
2342             L<empty_is_end|/empty_is_end> => 1
2343              
2344             =back
2345              
2346             =back
2347              
2348             =head2 :just_the_data
2349              
2350             This is intended for a shallow look at data and skip formatting.
2351              
2352             =over
2353              
2354             B<Default attribute differences>
2355              
2356             =over
2357              
2358             L<values_only|/values_only> => 1
2359              
2360             L<count_from_zero|/count_from_zero> => 0
2361              
2362             L<empty_is_end|/empty_is_end> => 1
2363              
2364             L<group_return_type|/group_return_type> => 'value'
2365              
2366             L<cache_positions|/cache_positions> => 1
2367              
2368             L<from_the_edge|/from_the_edge> => 0,
2369              
2370             =back
2371              
2372             =back
2373              
2374             =head2 :just_raw_data
2375              
2376             This is intended for a shallow look at raw text and skips all formatting including number formats.
2377              
2378             =over
2379              
2380             B<Default attribute differences>
2381              
2382             =over
2383              
2384             L<values_only|/values_only> => 1
2385              
2386             L<count_from_zero|/count_from_zero> => 0
2387              
2388             L<empty_is_end|/empty_is_end> => 1
2389              
2390             L<group_return_type|/group_return_type> => 'unformatted'
2391              
2392             L<cache_positions|/cache_positions> => 1
2393              
2394             L<from_the_edge|/from_the_edge> => 0,
2395              
2396             =back
2397              
2398             =back
2399              
2400             =head2 :debug
2401              
2402             Turn on L<Spreadsheet::XLSX::Reader::LibXML::Error/should_warn> in the Error attribute (instance)
2403              
2404             =over
2405              
2406             B<Default attribute differences>
2407              
2408             =over
2409              
2410             L<Spreadsheet::XLSX::Reader::LibXML::Error/should_warn> => 1
2411              
2412             =back
2413              
2414             =back
2415              
2416             =head1 BUILD / INSTALL from Source
2417              
2418             B<0.> Please note that using L<cpanm|https://metacpan.org/pod/App::cpanminus> is much easier
2419             than a source build! (but it will not always give the latest github version)
2420              
2421             cpanm Spreadsheet::XLSX::Reader::LibXML
2422            
2423             And then if you feel kindly
2424              
2425             cpanm-reporter
2426              
2427             B<1.> This package uses L<Alien::LibXML> to try and ensure that the mandatory prerequisite
2428             L<XML::LibXML> will load. The biggest gotcha here is that older (<5.20.0.2) versions of
2429             Strawberry Perl and some other Win32 perls may not support the script 'pkg-config' which is
2430             required. You can resolve this by installation L<PkgConfig> as 'pkg-config'. I have
2431             included the short version of that process below but download the full L<PkgConfig> distribution
2432             and read README.win32 file for other options and much more explanation.
2433              
2434             =over
2435              
2436             B<this will conflict with any existing pkg-config installed>
2437              
2438             C:\> cpanm PkgConfig --configure-args=--script=pkg-config
2439            
2440             =back
2441              
2442             It may be that you still need to use a system package manager to L<load|http://xmlsoft.org/> the
2443             'libxml2-devel' library. If this is the case or you experience any other installation issues please
2444             L<submit them to github|https://github.com/jandrew/Spreadsheet-XLSX-Reader-LibXML/issues> especially
2445             if they occur prior to starting the test suit as these failures will not auto push from CPAN Testers
2446             so I won't know to fix them!
2447            
2448             B<2.> Download a compressed file with this package code from your favorite source
2449              
2450             =over
2451              
2452             L<github|https://github.com/jandrew/Spreadsheet-XLSX-Reader-LibXML>
2453              
2454             L<Meta::CPAN|https://metacpan.org/pod/Spreadsheet::XLSX::Reader::LibXML>
2455              
2456             L<CPAN|http://search.cpan.org/~jandrew/Spreadsheet-XLSX-Reader-LibXML/>
2457              
2458             =back
2459            
2460             B<3.> Extract the code from the compressed file.
2461              
2462             =over
2463              
2464             If you are using tar on a .tar.gz file this should work:
2465              
2466             tar -zxvf Spreadsheet-XLSX-Reader-LibXML-v0.xx.tar.gz
2467            
2468             =back
2469              
2470             B<4.> Change (cd) into the extracted directory
2471              
2472             B<5.> Run the following
2473              
2474             =over
2475              
2476             (for Windows find what version of make was used to compile your perl)
2477              
2478             perl -V:make
2479            
2480             (then for Windows substitute the correct make function (s/make/dmake/g)? below)
2481            
2482             =back
2483              
2484             perl Makefile.PL
2485              
2486             make
2487              
2488             make test
2489              
2490             make install # As sudo/root
2491              
2492             make clean
2493              
2494             =head1 SUPPORT
2495              
2496             =over
2497              
2498             L<github Spreadsheet::XLSX::Reader::LibXML/issues|https://github.com/jandrew/Spreadsheet-XLSX-Reader-LibXML/issues>
2499              
2500             =back
2501              
2502             =head1 TODO
2503              
2504             =over
2505              
2506             B<1.> Add POD for all the new chart methods!
2507              
2508             B<1.> Build an 'Alien::LibXML::Devel' package to load the libxml2-devel libraries from source and
2509             require that and L<Alien::LibXML> in the build file. So all needed requirements for L<XML::LibXML>
2510             are met
2511              
2512             =over
2513              
2514             Both libxml2 and libxml2-devel libraries are required for XML::LibXML
2515              
2516             =back
2517              
2518             B<2.> Add a pivot table reader (Not just read the values from the sheet)
2519              
2520             B<3.> Add calc chain methods
2521              
2522             B<4.> Add more exposure to workbook formatting methods
2523              
2524             B<5.> Build a DOM parser alternative for the sheets
2525              
2526             =over
2527              
2528             (Theoretically faster than the reader but uses more memory)
2529              
2530             =back
2531              
2532             =back
2533              
2534             =head1 AUTHOR
2535              
2536             =over
2537              
2538             Jed Lund
2539              
2540             jandrew@cpan.org
2541              
2542             =back
2543              
2544             =head1 CONTRIBUTORS
2545              
2546             This is the (likely incomplete) list of people who have helped
2547             make this distribution what it is, either via code contributions,
2548             patches, bug reports, help with troubleshooting, etc. A huge
2549             'thank you' to all of them.
2550              
2551             =over
2552              
2553             L<Frank Maas|https://github.com/Frank071>
2554              
2555             L<Stuart Watt|https://github.com/morungos>
2556              
2557             L<Toby Inkster|https://github.com/morungos>
2558              
2559             L<Breno G. de Oliveira|https://github.com/garu>
2560              
2561             L<Bill Baker|https://github.com/wdbaker54>
2562              
2563             L<H.Merijin Brand|https://github.com/Tux>
2564              
2565             L<Todd Eigenschink|mailto:todd@xymmetrix.com>
2566              
2567             =back
2568              
2569             =head1 COPYRIGHT
2570              
2571             This program is free software; you can redistribute
2572             it and/or modify it under the same terms as Perl itself.
2573              
2574             The full text of the license can be found in the
2575             LICENSE file included with this module.
2576              
2577             This software is copyrighted (c) 2014, 2015 by Jed Lund
2578              
2579             =head1 DEPENDENCIES
2580              
2581             =over
2582              
2583             L<perl 5.010|perl/5.10.0>
2584              
2585             L<Archive::Zip>
2586              
2587             L<Carp>
2588              
2589             L<Clone>
2590              
2591             L<DateTime::Format::Flexible>
2592              
2593             L<DateTimeX::Format::Excel>
2594              
2595             L<IO::File>
2596              
2597             L<List::Util> - 1.33
2598              
2599             L<Moose> - 2.1213
2600              
2601             L<MooseX::HasDefaults::RO>
2602              
2603             L<MooseX::ShortCut::BuildInstance> - 1.032
2604              
2605             L<MooseX::StrictConstructor>
2606              
2607             L<Type::Tiny> - 1.000
2608              
2609             L<XML::LibXML>
2610              
2611             L<version> - 0.077
2612              
2613             =back
2614              
2615             =head1 SEE ALSO
2616              
2617             =over
2618              
2619             L<Spreadsheet::Read> - generic Spreadsheet reader that (hopefully) supports this package
2620              
2621             L<Spreadsheet::ParseExcel> - Excel version 2003 and earlier
2622              
2623             L<Spreadsheet::XLSX> - Excel version 2007 and later
2624              
2625             L<Spreadsheet::ParseXLSX> - Excel version 2007 and later
2626              
2627             L<Log::Shiras|https://github.com/jandrew/Log-Shiras>
2628              
2629             =over
2630              
2631             All lines in this package that use Log::Shiras are commented out
2632              
2633             =back
2634              
2635             =back
2636              
2637             =cut
2638              
2639             #########1#########2 main pod documentation end 5#########6#########7#########8#########9