File Coverage

lib/Spreadsheet/Reader/ExcelXML.pm
Criterion Covered Total %
statement 67 72 93.0
branch 12 18 66.6
condition n/a
subroutine 19 19 100.0
pod 1 2 50.0
total 99 111 89.1


line stmt bran cond sub pod time code
1             package Spreadsheet::Reader::ExcelXML;
2             our $AUTHORITY = 'cpan:JANDREW';
3 15     15   4022159 use version 0.77; our $VERSION = version->declare('v0.16.8');
  15         22357  
  15         99  
4             ###LogSD warn "You uncovered internal logging statements for Spreadsheet::Reader::ExcelXML-$VERSION";
5              
6 15     15   1580 use 5.010;
  15         68  
7 15     15   6722 use Moose;
  15         3009318  
  15         89  
8 15     15   80359 use MooseX::StrictConstructor;
  15         301509  
  15         58  
9 15     15   96343 use MooseX::HasDefaults::RO;
  15         72620  
  15         47  
10 15     15   98488 use Types::Standard qw( is_HashRef is_Object );
  15         682985  
  15         142  
11 15     15   13774 use Clone 'clone';
  15         24485  
  15         958  
12             ###LogSD use B::Deparse;
13             ###LogSD my $deparser = B::Deparse->new;
14              
15 15         90 use MooseX::ShortCut::BuildInstance 1.040 qw(
16             build_instance should_re_use_classes set_args_cloning
17 15     15   5734 );
  15         5481521  
18             should_re_use_classes( 1 );
19             set_args_cloning ( 0 );
20             ###LogSD use Log::Shiras::Telephone v0.48.0;
21 15     15   7434 use lib '../../../../lib',;
  15         25  
  15         108  
22 15     15   8683 use Spreadsheet::Reader::ExcelXML::Error;
  15         68  
  15         812  
23 15     15   8952 use Spreadsheet::Reader::ExcelXML::Workbook;
  15         55  
  15         2551  
24 15     15   10450 use Spreadsheet::Reader::Format v0.6.4;
  15         90440  
  15         483  
25 15     15   7278 use Spreadsheet::Reader::Format::FmtDefault;
  15         780981  
  15         566  
26 15     15   12108 use Spreadsheet::Reader::Format::ParseExcelFormatStrings;
  15         6605652  
  15         707  
27 15     15   117 use Spreadsheet::Reader::ExcelXML::Types qw( XLSXFile IOFileType );
  15         24  
  15         169  
28             ###LogSD with 'Log::Shiras::LogSpace';
29              
30             #########1 Dispatch Tables and data 4#########5#########6#########7#########8#########9
31              
32             my $attribute_defaults ={
33             error_inst =>{
34             package => 'ErrorInstance',
35             superclasses => ['Spreadsheet::Reader::ExcelXML::Error'],
36             should_warn => 0,
37             },
38             formatter_inst =>{
39             package => 'FormatInstance',
40             superclasses => [ 'Spreadsheet::Reader::Format::FmtDefault' ],
41             add_roles_in_sequence =>[qw(
42             Spreadsheet::Reader::Format::ParseExcelFormatStrings
43             Spreadsheet::Reader::Format
44             )],
45             },
46             count_from_zero => 1,
47             file_boundary_flags => 1,
48             empty_is_end => 0,
49             values_only => 0,
50             from_the_edge => 1,
51             group_return_type => 'instance',
52             empty_return_type => 'empty_string',
53             cache_positions =>{# Test this !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
54             shared_strings_interface => 5242880,# 5 MB
55             styles_interface => 5242880,# 5 MB
56             worksheet_interface => 2097152,# 2 MB
57             },
58             show_sub_file_size => 0,
59             spread_merged_values => 0,
60             skip_hidden => 0,
61             spaces_are_empty => 0,
62             merge_data => 1,
63             column_formats => 1,
64              
65             };
66             my $flag_settings ={
67             alt_default =>{
68             values_only => 1,
69             count_from_zero => 0,
70             empty_is_end => 1,
71             },
72             just_the_data =>{
73             count_from_zero => 0,
74             values_only => 1,
75             empty_is_end => 1,
76             group_return_type => 'value',
77             from_the_edge => 0,
78             empty_return_type => 'undef_string',
79             spaces_are_empty => 1,
80             merge_data => 0,
81             column_formats => 0,
82             },
83             just_raw_data =>{
84             count_from_zero => 0,
85             values_only => 1,
86             empty_is_end => 1,
87             group_return_type => 'unformatted',
88             from_the_edge => 0,
89             empty_return_type => 'undef_string',
90             spaces_are_empty => 1,
91             merge_data => 0,
92             column_formats => 0,
93             },
94             like_ParseExcel =>{
95             count_from_zero => 1,
96             group_return_type => 'instance',
97             },
98             debug =>{
99             error_inst =>{
100             superclasses => ['Spreadsheet::Reader::ExcelXML::Error'],
101             package => 'ErrorInstance',
102             should_warn => 1,
103             },
104             show_sub_file_size => 1,
105             },
106             lots_of_ram =>{
107             cache_positions =>{
108             shared_strings_interface => 209715200,# 200 MB
109             styles_interface => 209715200,# 200 MB
110             worksheet_interface => 209715200,# 200 MB
111             },
112             },
113             less_ram =>{
114             cache_positions =>{
115             shared_strings_interface => 10240,# 10 KB
116             styles_interface => 10240,# 10 KB
117             worksheet_interface => 1024,# 1 KB
118             },
119             },
120             };
121             my $delay_till_build = [qw( formatter_inst )];
122              
123             #########1 Public Methods 3#########4#########5#########6#########7#########8#########9
124              
125             ###LogSD sub get_class_space{ 'Top' }
126              
127             sub import{# Flags handled here!
128 20     20   2274 my ( $self, @flag_list ) = @_;
129             #~ print "Made it to import\n";
130             #~ if( scalar( @flag_list ) ){
131 20         51680 for my $flag ( @flag_list ){
132             #~ print "Arrived at import with flag: $flag\n";
133 9 100       40 if( $flag =~ /^:(\w*)$/ ){# Handle text based flags
    50          
134 8         15 my $default_choice = $1;
135             #~ print "Attempting to change the default group type to: $default_choice\n";
136 8 50       31 if( exists $flag_settings->{$default_choice} ){
137 8         11 for my $attribute ( keys %{$flag_settings->{$default_choice}} ){
  8         31  
138             #~ print "Changing flag -$attribute- to:" . Dumper( $flag_settings->{$default_choice}->{$attribute} );
139 30         8884 $attribute_defaults->{$attribute} = $flag_settings->{$default_choice}->{$attribute};
140             }
141             }else{
142 0         0 confess "No settings available for the flag: $flag";
143             }
144             }elsif( $flag =~ /^v?\d+\.?\d*/ ){# Version check may wind up here
145             #~ print "Running version check on version: $flag\n";
146 0         0 my $result = $VERSION <=> version->parse( $flag );
147             #~ print "Tested against version -$VERSION- gives result: $result\n";
148 0 0       0 if( $result < 0 ){
149 0         0 confess "Version -$flag- required - the installed version is: $VERSION";
150             }
151             }else{
152 1         27 confess "Passed attribute default flag -$flag- does not comply with the correct format";
153             }
154             }
155             #~ }
156             #~ print "Finished import\n";
157             }
158              
159             sub parse{
160              
161 11     11 1 3392 my ( $self, $file, $formatter ) = @_;
162             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
163             ###LogSD $self->get_all_space . '::parse', );
164             ###LogSD $phone->talk( level => 'info', message =>[
165             ###LogSD "Arrived at parse for:", $file,
166             ###LogSD (($formatter) ? "with formatter: $formatter" : '') ] );
167              
168             # Test the file
169 11 100       59 if( XLSXFile->check( $file ) ){
    50          
170             ###LogSD $phone->talk( level => 'info', message =>[ "This is an xlsx file: $file" ] );
171             }elsif( IOFileType->check( $file ) ){
172             ###LogSD $phone->talk( level => 'info', message =>[ "This is a file handle:", $file ] );
173             }else{
174 2         50 $self->set_error( "Value \"$file\" did not pass type constraint \"IOFileType\"" );
175 2         9 return undef;
176             }
177              
178             # Load the formatter
179 9 50       86 if( $formatter ){
180 0         0 $self->set_formatter_inst( $formatter );
181             ###LogSD $phone->talk( level => 'info', message =>[ "Formatter added" ] );
182             }
183              
184 9         57 $self->set_file( $file );
185             ###LogSD $phone->talk( level => 'info', message =>[ "Build workbook attempt complete", $self->file_opened ] );
186 9 100       67 return $self->file_opened ? $self : undef;
187             }
188              
189             #########1 Private Attributes 3#########4#########5#########6#########7#########8#########9
190              
191             has _workbook =>(
192             isa => 'Spreadsheet::Reader::ExcelXML::Workbook',
193             predicate => '_has_workbook',
194             writer => '_set_workbook',
195             clearer => '_clear_the_workbook',
196             handles => [qw(
197             error set_error clear_error set_warnings
198             should_spew_longmess spewing_longmess if_warn has_error
199             get_error_inst has_error_inst set_formatter_inst get_excel_region
200              
201             get_formatter_region has_target_encoding get_target_encoding set_workbook_for_formatter
202             set_target_encoding set_european_first set_date_behavior get_defined_conversion
203             set_defined_excel_formats change_output_encoding
204             set_formatter_cache_behavior parse_excel_format_string
205              
206             set_file counting_from_zero boundary_flag_setting spreading_merged_values
207             is_empty_the_end get_values_only starts_at_the_edge get_group_return_type
208             get_empty_return_type cache_positions get_cache_size has_cache_size
209             should_skip_hidden are_spaces_empty
210              
211             worksheet worksheets build_workbook demolish_the_workbook
212              
213             file_name file_opened get_epoch_year has_epoch_year
214             get_sheet_names get_sheet_name sheet_count get_sheet_info
215             get_rel_info get_id_info get_worksheet_names worksheet_name
216             worksheet_count get_chartsheet_names chartsheet_name chartsheet_count
217             creator modified_by date_created date_modified
218             in_the_list start_at_the_beginning
219             )],
220             );
221             #~ get_shared_string start_the_ss_file_over has_shared_strings_interface
222             #~ has_styles_interface get_format
223              
224             #########1 Private Methods 3#########4#########5#########6#########7#########8#########9
225              
226             around BUILDARGS => sub {
227             my ( $orig, $class, @args ) = @_;
228             my %args = is_HashRef( $args[0] ) ? %{$args[0]} : @args;
229             ###LogSD $args{log_space} //= 'ExcelXML';
230             ###LogSD my $class_space = __PACKAGE__->get_class_space;
231             ###LogSD my $log_space = $args{log_space} . "::$class_space" . '::_hidden::BUILDARGS';
232             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space => $log_space, );
233             ###LogSD $phone->talk( level => 'trace', message =>[
234             ###LogSD 'Arrived at BUILDARGS with: ', @args, ] );# caller(3), caller(4), caller(5)
235              
236             # Handle depricated cache_positions
237             #~ print longmess( Dumper( %args ) );
238             if( exists $args{cache_positions} ){
239             ###LogSD $phone->talk( level => 'trace', message =>[
240             ###LogSD "The user did pass a value to cache_positions as:", $args{cache_positions}] );
241             if( !is_HashRef( $args{cache_positions} ) ){
242             warn "Passing a boolean value to the attribute 'cache_positions' is depricated since v0.40.2 - the input will be converted per the documentation";
243             $args{cache_positions} = !$args{cache_positions} ?
244             $flag_settings->{big_file}->{cache_positions} :
245             $attribute_defaults->{cache_positions};
246             }
247              
248             #scrub cache_positions
249             for my $passed_key ( keys %{$args{cache_positions}} ){
250             if( !exists $attribute_defaults->{cache_positions}->{$passed_key} ){
251             warn "Passing a cache position for '$passed_key' but that is not allowed";
252             }
253             }
254             for my $stored_key ( keys %{$attribute_defaults->{cache_positions}} ){
255             if( !exists $args{cache_positions}->{$stored_key} ){
256             warn "Passed cache positions are missing key => values for key: $stored_key";
257             }
258             }
259             }
260              
261             # Add any defaults
262             ###LogSD $phone->talk( level => 'trace', message =>[
263             ###LogSD "Processing possible default values", $attribute_defaults ] );
264             for my $key ( keys %$attribute_defaults ){
265             ###LogSD $phone->talk( level => 'trace', message =>[
266             ###LogSD "Processing possible default for -$key- with value:", $attribute_defaults->{$key} ] );
267             if( exists $args{$key} ){
268             ###LogSD $phone->talk( level => 'trace', message =>[
269             ###LogSD "Found user defined -$key- with value(s): ", $args{$key} ] );
270             }else{
271             ###LogSD $phone->talk( level => 'trace', message =>[
272             ###LogSD "Setting default -$key- with value(s): ", $attribute_defaults->{$key} ] );
273             $args{$key} = clone( $attribute_defaults->{$key} );
274             }
275             }
276              
277             # Build object instances as needed
278             for my $key ( keys %args ){
279             ###LogSD $phone->talk( level => 'trace', message =>[
280             ###LogSD "Checking if an instance needs built for key: $key" ] );
281             if( $key =~ /_inst$/ and !is_Object( $args{$key} ) and is_HashRef( $args{$key} ) ){
282             # Import log_space as needed
283             ###LogSD if( exists $args{log_space} and $args{log_space} ){
284             ###LogSD $args{$key}->{log_space} = $args{log_space};
285             ###LogSD }
286             ###LogSD $phone->talk( level => 'trace', message =>[
287             ###LogSD "Key -$key- requires an instance built from:", $args{$key} ] );
288             $args{$key} = build_instance( $args{$key} );
289             }
290             }
291              
292             ###LogSD $phone->talk( level => 'trace', message =>[
293             ###LogSD "Final BUILDARGS:", %args ] );
294             my $workbook = Spreadsheet::Reader::ExcelXML::Workbook->new( %args );
295             ###LogSD $phone->talk( level => 'trace', message =>[
296             ###LogSD "Assigning the built workbook to the _workbook attribute with: ", $deparser->coderef2text( $orig ),
297             ###LogSD "..with workboook: ", $workbook->dump ] );
298             return $class->$orig(
299             _workbook => $workbook,
300             ###LogSD log_space => $args{log_space}
301             );
302             };
303              
304             sub DEMOLISH{
305 22     22 0 661 my ( $self ) = @_;
306             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
307             ###LogSD $self->get_all_space . '::_hidden::DEMOLISH', );
308             ###LogSD $phone->talk( level => 'debug', message =>[
309             ###LogSD "Forcing Non-recursive garbage collection on recursive stuff" ] );
310 22 100       521 if( $self->_has_workbook ){
311             ###LogSD $phone->talk( level => 'debug', message =>[
312             ###LogSD "Need to demolish the workbook" ] );
313 21         119 $self->demolish_the_workbook;
314             ###LogSD $phone->talk( level => 'debug', message =>[
315             ###LogSD "Clearing the attribute" ] );
316 21         1024 $self->_clear_the_workbook;
317             }
318             }
319              
320             #########1 Phinish 3#########4#########5#########6#########7#########8#########9
321              
322 15     15   22337 no Moose;
  15         23  
  15         98  
323             __PACKAGE__->meta->make_immutable;
324              
325             1;
326              
327             #########1 Documentation 3#########4#########5#########6#########7#########8#########9
328             __END__
329              
330             =head1 NAME
331              
332             Spreadsheet::Reader::ExcelXML - Read xlsx/xlsm/xml extention Excel files
333              
334             =begin html
335              
336             <a href="https://www.perl.org">
337             <img src="https://img.shields.io/badge/perl-5.10+-brightgreen.svg" alt="perl version">
338             </a>
339              
340             <a href="https://travis-ci.org/jandrew/p5-spreadsheet-reader-excelxml">
341             <img alt="Build Status" src="https://travis-ci.org/jandrew/p5-spreadsheet-reader-excelxml.png?branch=master" alt='Travis Build'/>
342             </a>
343              
344             <a href='https://coveralls.io/github/jandrew/p5-spreadsheet-reader-excelxml?branch=master'>
345             <img src='https://coveralls.io/repos/github/jandrew/p5-spreadsheet-reader-excelxml/badge.svg?branch=master' alt='Coverage Status' />
346             </a>
347              
348             <a href='https://github.com/jandrew/p5-spreadsheet-reader-excelxml'>
349             <img src="https://img.shields.io/github/tag/jandrew/p5-spreadsheet-reader-excelxml.svg?label=github version" alt="github version"/>
350             </a>
351              
352             <a href="https://metacpan.org/pod/Spreadsheet::Reader::ExcelXML">
353             <img src="https://badge.fury.io/pl/Spreadsheet-Reader-ExcelXML.svg?label=cpan version" alt="CPAN version" height="20">
354             </a>
355              
356             <a href='http://cpants.cpanauthors.org/dist/Spreadsheet-Reader-ExcelXML'>
357             <img src='http://cpants.cpanauthors.org/dist/Spreadsheet-Reader-ExcelXML.png' alt='kwalitee' height="20"/>
358             </a>
359              
360             =end html
361              
362             =encoding UTF-8
363              
364             =head1 SYNOPSIS
365              
366             The following uses the 'TestBook.xlsx' file found in the t/test_files/ folder of the package
367              
368             #!/usr/bin/env perl
369             use strict;
370             use warnings;
371             use Spreadsheet::Reader::ExcelXML;
372              
373             my $parser = Spreadsheet::Reader::ExcelXML->new();
374             my $workbook = $parser->parse( 'TestBook.xlsx' );
375              
376             if ( !defined $workbook ) {
377             die $parser->error(), "\n";
378             }
379              
380             for my $worksheet ( $workbook->worksheets() ) {
381              
382             my ( $row_min, $row_max ) = $worksheet->row_range();
383             my ( $col_min, $col_max ) = $worksheet->col_range();
384              
385             for my $row ( $row_min .. $row_max ) {
386             for my $col ( $col_min .. $col_max ) {
387              
388             my $cell = $worksheet->get_cell( $row, $col );
389             next unless $cell;
390              
391             print "Row, Col = ($row, $col)\n";
392             print "Value = ", $cell->value(), "\n";
393             print "Unformatted = ", $cell->unformatted(), "\n";
394             print "\n";
395             }
396             }
397             last;# In order not to read all sheets
398             }
399              
400             ###########################
401             # SYNOPSIS Screen Output
402             # 01: Row, Col = (0, 0)
403             # 02: Value = Category
404             # 03: Unformatted = Category
405             # 04:
406             # 05: Row, Col = (0, 1)
407             # 06: Value = Total
408             # 07: Unformatted = Total
409             # 08:
410             # 09: Row, Col = (0, 2)
411             # 10: Value = Date
412             # 11: Unformatted = Date
413             # 12:
414             # 13: Row, Col = (1, 0)
415             # 14: Value = Red
416             # 16: Unformatted = Red
417             # 17:
418             # 18: Row, Col = (1, 1)
419             # 19: Value = 5
420             # 20: Unformatted = 5
421             # 21:
422             # 22: Row, Col = (1, 2)
423             # 23: Value = 2017-2-14 #(shows as 2/14/2017 in the sheet)
424             # 24: Unformatted = 41318
425             # 25:
426             # More intermediate rows ...
427             # 82:
428             # 83: Row, Col = (6, 2)
429             # 84: Value = 2016-2-6 #(shows as 2/6/2016 in the sheet)
430             # 85: Unformatted = 40944
431             ###########################
432              
433             =head1 DESCRIPTION
434              
435             This is an Excel spreadsheet reading package that should parse all excel files with the
436             extentions .xlsx, .xlsm, .xml I<L<Excel 2003 xml
437             |https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats> (L<SpreadsheetML
438             |https://en.wikipedia.org/wiki/SpreadsheetML>)> that can be opened in the Excel 2007+
439             applications. The quick-start example provided in the SYNOPSIS attempts to follow the
440             example from L<Spreadsheet::ParseExcel> (.xls binary file reader) as close as possible.
441             There are additional methods and other approaches that can be used by this package for
442             spreadsheet reading but the basic access to data from newer xml based Excel files can be
443             as simple as above.
444              
445             This is L<not the only perl package|/SEE ALSO> able to parse .xlsx files on METACPAN. For
446             now it does appear to be the only package that will parse .xlsm and Excel 2003 .xml
447             workbooks.
448              
449             There is some documentation throughout this package for users who intend to extend the
450             package but the primary documentation is intended for the person who uses the package as
451             is. Parsing through an Excel workbook is done with three levels of classes;
452              
453             =head2 Workbook level (This doc)
454              
455             =over
456              
457             =item * General L<attribute|/Attributes> settings that affect parsing of the file in general
458              
459             =item * The place to L<set workbook level output formatting|Spreadsheet::Reader::Format>
460              
461             =item * Object L<methods|/Methods> to retreive document level metadata and worksheets
462              
463             =back
464              
465             =head2 L<Worksheet level|Spreadsheet::Reader::ExcelXML::Worksheet>
466              
467             =over
468              
469             =item * Object methods to return specific cell instances/L<data|/group_return_type>
470              
471             =item * Access to some worksheet level format information (more access pending)
472              
473             =item * The place to L<customize|Spreadsheet::Reader::ExcelXML::Worksheet/custom_formats>
474             data output formats targeting specific cell ranges
475              
476             =back
477              
478             =head2 L<Cell level|Spreadsheet::Reader::ExcelXML::Cell>
479              
480             =over
481              
482             =item * Access to the cell contents
483              
484             =item * Access to the cell formats (more access pending)
485              
486             =back
487              
488             There are some differences from the L<Spreadsheet::ParseExcel> package. For instance
489             in the L<SYNOPSIS|/SYNOPSIS> the '$parser' and the '$workbook' are actually the same
490             class for this package. You could therefore combine both steps by calling ->new with
491             the 'file' attribute called out. The test for load success would then rely on the
492             method L<file_opened|/file_opened>. Afterward it is still possible to call ->error
493             on the instance. Another difference is the data formatter and specifically date
494             handling. This package leverages L<Spreadsheet::Reader::Format> to allows for a
495             simple pluggable custom output format that is very flexible as well as handling dates
496             in the Excel file older than 1-January-1900. I leveraged coercions from L<Type::Tiny
497             |Type::Tiny::Manual> to do this but anything that follows that general format will work
498             here.
499              
500             The why and nitty gritty of design choices I made are in the L<Architecture Choices
501             |/Architecture Choices> section. Some pitfalls are outlined in the L<Warnings|/Warnings>
502             section. Read the full documentation for all opportunities!
503              
504             =head2 Primary Methods
505              
506             These are the primary ways to use this class. They can be used to open a workbook,
507             investigate information at the workbook level, and provide ways to access sheets in
508             the workbook.
509              
510             All methods are object methods and should be implemented on the object instance.
511              
512             B<Example:>
513              
514             my @worksheet_array = $workbook_instance->worksheets;
515              
516             =head3 parse( $file_name|$file_handle, $formatter )
517              
518             =over
519              
520             B<Definition:> This is a convenience method to match L<Spreadsheet::ParseExcel/parse($filename, $formatter)>.
521             It is one way to set the L<file|/file> attribute [and the L<formatter_inst|/formatter_inst> attribute].
522              
523             B<Accepts:>
524              
525             $file = see the L<file|/file> attribute for valid options (required) (required)
526             [$formatter] = see the L<formatter_inst|/formatter_inst> attribute for valid options (optional)
527              
528             B<Returns:> an instance of the package (not cloned) when passing with the xlsx file successfully
529             opened or undef for failure.
530              
531             =back
532              
533             =head3 worksheets
534              
535             =over
536              
537             B<Definition:> This method will return an array (I<not an array reference>) containing a list of references
538             to all worksheets in the workbook as objects. This is not a reccomended method because it builds all
539             worksheet instance and returns an array of objects. It is provided for compatibility to
540             Spreadsheet::ParseExcel. For alternatives see the L<get_worksheet_names|/get_worksheet_names> method and
541             the L<worksheet|/worksheet( $name )> methods. B<It also only returns the tabular worksheets in the
542             workbook. All chart sheets are ignored!>
543              
544             B<Accepts:> nothing
545              
546             B<Returns:> an array ref of L<Worksheet|Spreadsheet::Reader::ExcelXML::Worksheet>
547             objects for all worksheets in the workbook.
548              
549             =back
550              
551             =head3 worksheet( $name )
552              
553             =over
554              
555             B<Definition:> This method will return an object to read values in the identified
556             worksheet. If no value is passed to $name then the 'next' worksheet in physical order
557             is returned. I<'next' will NOT wrap> It also only iterates through the 'worksheets'
558             in the workbook (not the 'chartsheets').
559              
560             B<Accepts:> the $name string representing the name of the worksheet object you
561             want to open. This name is the word visible on the tab when opening the spreadsheet
562             in Excel. (not the underlying zip member file name - which can be different. It will
563             not accept chart tab names.)
564              
565             B<Returns:> a L<Worksheet|Spreadsheet::Reader::ExcelXML::Worksheet> object with the
566             ability to read the worksheet of that name. It returns undef and sets the error attribute
567             if a 'chartsheet' is requested. Or in 'next' mode it returns undef if past the last sheet.
568              
569             B<Example:> using the implied 'next' worksheet;
570              
571             while( my $worksheet = $workbook->worksheet ){
572             print "Reading: " . $worksheet->name . "\n";
573             # get the data needed from this worksheet
574             }
575              
576             =back
577              
578             =head3 file_name
579              
580             =over
581              
582             B<Definition:> If you pass a file $location/$name string to the attribute L<file|/file> then before
583             the package converts it to a file handle it will store the string. You can retreive that string
584             with this method. This is true if you pass a string to the L<parse
585             |/parse( $file_name|$file_handle, $formatter )> method as well.
586              
587             B<Accepts:> nothing
588              
589             B<Returns:> the $location/$name file string if available.
590              
591             =back
592              
593             =head3 file_opened
594              
595             =over
596              
597             B<Definition:> This method is the test for success that should be used when opening a workbook
598             using the -E<gt>new method. This allows for the object to store the error without dying
599             entirely.
600              
601             B<Accepts:> nothing
602              
603             B<Returns:> 1 if the workbook file was successfully opened
604              
605             B<Example:>
606              
607             use Spreadsheet::Reader::ExcelXML qw( :just_the_data );
608              
609             my $workbook = Spreadsheet::Reader::ExcelXML->new( file => 'TestBook.xlsx' );
610              
611             if ( !$workbook->file_opened ) {
612             die $workbook->error(), "\n";
613             }
614              
615             for my $worksheet ( $workbook->worksheets ) {
616              
617             print "Reading worksheet named: " . $worksheet->get_name . "\n";
618              
619             while( 1 ){
620             my $cell = $worksheet->get_next_value;
621             print "Cell is: $cell\n";
622             last if $cell eq 'EOF';
623             }
624             }
625              
626             =back
627              
628             =head3 get_sheet_names
629              
630             =over
631              
632             B<Definition:> This method returns an array ref of all the sheet names (tabs) in the
633             workbook in order. (It includes chartsheets.)
634              
635             B<Accepts:> nothing
636              
637             B<Returns:> an array ref of strings
638              
639             =back
640              
641             =head3 worksheet_name( $position )
642              
643             =over
644              
645             B<Definition:> This returns the name of the worksheet in that $position. (counting from zero)
646             interspersed chartsheets in the file are not considered to hold a position by this accounting.
647              
648             B<Accepts:> $position (an integer)
649              
650             B<Returns:> the worksheet name
651              
652             B<Example:> To return only worksheet positions 2 through 4 without building them all at once
653              
654             for $x (2..4){
655             my $worksheet = $workbook->worksheet( $workbook->worksheet_name( $x ) );
656             # Read the worksheet here
657             }
658              
659             =back
660              
661             =head3 get_worksheet_names
662              
663             =over
664              
665             B<Definition:> This method returns an array ref of all the worksheet names (tabs) in the
666             workbook in order. (No chartsheets.)
667              
668             B<Accepts:> nothing
669              
670             B<Returns:> an array ref of strings
671              
672             B<Example:> Another way to parse a workbook without building all the sheets at
673             once is;
674              
675             for $sheet_name ( @{$workbook->worksheet_names} ){
676             my $worksheet = $workbook->worksheet( $sheet_name );
677             # Read the worksheet here
678             }
679              
680             =back
681              
682             =head3 worksheet_count
683              
684             =over
685              
686             B<Definition:> This returns the total number of recorded worksheets
687              
688             B<Accepts:> nothing
689              
690             B<Returns:> $total - a count of all worksheets (only)
691              
692             =back
693              
694             =head2 Attributes
695              
696             Data passed to new when creating an instance. For modification of these attributes
697             see the listed 'attribute methods'. For general information on attributes see
698             L<Moose::Manual::Attributes>. For additional lesser used workbook options see
699             L<Secondary Methods|/Secondary Methods>. There are several grouped default values
700             for these attributes documented in the L<Flags|/Flags> section.
701              
702             B<Example>
703              
704             $workbook_instance = Spreadsheet::Reader::ExcelXML->new( %attributes )
705              
706             I<note: if the file information is not included in the initial %attributes then it must be
707             set by one of the attribute setter methods below or the L<parse
708             |parse( $file_nameE<verbar>$file_handle, $formatter )> method before the rest of the package
709             can be used.>
710              
711             =head3 file
712              
713             =over
714              
715             B<Definition:> This attribute holds the file handle for the top level workbook. If a
716             file name is passed it is coerced into an L<IO::File> handle and stored that way. The
717             originaly file name can be retrieved with the method L<file_name|/file_name>.
718              
719             B<Default> no default
720              
721             B<Required:> yes
722              
723             B<Range> any unencrypted xlsx|xlsm|xml file that can be opened in Microsoft Excel 2007+.
724              
725             B<attribute methods> Methods provided to adjust this attribute
726              
727             =over
728              
729             B<set_file( $file|$file_handle )>
730              
731             =over
732              
733             B<Definition:> change the file value in the attribute (this will reboot the workbook instance)
734              
735             =back
736              
737             =back
738              
739             =back
740              
741             =head3 error_inst
742              
743             =over
744              
745             B<Definition:> This attribute holds an 'error' object instance. It should have several
746             methods for managing errors. Currently no error codes or error language translation
747             options are available but this should make implementation of that easier.
748              
749             B<Default:> a L<Spreadsheet::Reader::ExcelXML::Error> instance with the attributes set
750             as;
751              
752             ( should_warn => 0 )
753              
754             B<Range:> See the 'Exported methods' section below for methods required by the workbook.
755             The error instance must also be able to extract the error string from a passed error
756             object as well. For now the current implementation will attempt ->as_string first
757             and then ->message if an object is passed.
758              
759             B<attribute methods> Methods provided to manage this attribute
760              
761             =over
762              
763             B<get_error_inst>
764              
765             =over
766              
767             B<Definition:> returns this instance
768              
769             =back
770              
771             B<has_error_inst>
772              
773             =over
774              
775             B<Definition:> indicates in the error instance has been set
776              
777             =back
778              
779             B<Exported methods:>
780              
781             The following methods are exported (delegated) to the workbook level
782             from the stored instance of this class. Links are provided to the default implemenation;
783              
784             =over
785              
786             L<Spreadsheet::Reader::ExcelXML::Error/error>
787              
788             L<Spreadsheet::Reader::ExcelXML::Error/set_error>
789              
790             L<Spreadsheet::Reader::ExcelXML::Error/clear_error>
791              
792             L<Spreadsheet::Reader::ExcelXML::Error/set_warnings>
793              
794             L<Spreadsheet::Reader::ExcelXML::Error/if_warn>
795              
796             L<Spreadsheet::Reader::ExcelXML::Error/should_spew_longmess>
797              
798             L<Spreadsheet::Reader::ExcelXML::Error/spewing_longmess>
799              
800             L<Spreadsheet::Reader::ExcelXML::Error/has_error>
801              
802             =back
803              
804             =back
805              
806             =back
807              
808             =head3 formatter_inst
809              
810             =over
811              
812             B<Definition:> This attribute holds a 'formatter' object instance. This instance does all
813             the heavy lifting to transform raw text into desired output. It does include
814             a role that interprets the excel L<format string
815             |https://support.office.com/en-us/article/Create-or-delete-a-custom-number-format-2d450d95-2630-43b8-bf06-ccee7cbe6864?ui=en-US&rs=en-US&ad=US>
816             into a L<Type::Tiny> coercion. The default case is actually built from a number of
817             different elements using L<MooseX::ShortCut::BuildInstance> on the fly so you can
818             just call out the replacement base class or role rather than fully building
819             the formatter prior to calling new on the workbook. However the naming of the interface
820             |http://www.cs.utah.edu/~germain/PPS/Topics/interfaces.html> is locked and should not be
821             tampered with since it manages the methods to be imported into the workbook;
822              
823             B<Default> An instance built with L<MooseX::ShortCut::BuildInstance> from the following
824             arguments (note the instance itself is not built here)
825             {
826             superclasses => ['Spreadsheet::Reader::ExcelXML::FmtDefault'], # base class
827             add_roles_in_sequence =>[qw(
828             Spreadsheet::Reader::ExcelXML::ParseExcelFormatStrings # role containing the heavy lifting methods
829             Spreadsheet::Reader::ExcelXML::FormatInterface # the interface
830             )],
831             package => 'FormatInstance', # a formality more than anything
832             }
833              
834             B<Range:> A replacement formatter instance or a set of arguments that will lead to building an acceptable
835             formatter instance. See the 'Exported methods'section below for all methods required methods for the
836             workbook. The FormatInterface is required by name so a replacement of that role requires the same name.
837              
838             B<attribute methods> Methods provided to manage this attribute
839              
840             =over
841              
842             B<get_formatter_inst>
843              
844             =over
845              
846             B<Definition:> returns the stored formatter instance
847              
848             =back
849              
850             B<set_formatter_inst>
851              
852             =over
853              
854             B<Definition:> sets the formatter instance
855              
856             =back
857              
858             B<Exported methods:>
859              
860             Additionally the following methods are exported (delegated) to the workbook level
861             from the stored instance of this class. Links are provided to the default implemenation;
862              
863             =over
864              
865             B<Example:> name_the_workbook_uses_to_access_the_method => B<Link to the default source of the method>
866              
867             get_formatter_region => L<Spreadsheet::Reader::Format::FmtDefault/get_excel_region>
868              
869             has_target_encoding => L<Spreadsheet::Reader::Format::FmtDefault/has_target_encoding>
870              
871             get_target_encoding => L<Spreadsheet::Reader::Format::FmtDefault/get_target_encoding>
872              
873             set_target_encoding => L<Spreadsheet::Reader::Format::FmtDefault/set_target_encoding( $encoding )>
874              
875             change_output_encoding => L<Spreadsheet::Reader::Format::FmtDefault/change_output_encoding( $string )>
876              
877             set_defined_excel_formats => L<Spreadsheet::Reader::Format::FmtDefault/set_defined_excel_formats( %args )>
878              
879             get_defined_conversion => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/get_defined_conversion( $position )>
880              
881             parse_excel_format_string => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/parse_excel_format_string( $string, $name )>
882              
883             set_date_behavior => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/set_date_behavior( $bool )>
884              
885             set_european_first => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/set_european_first( $bool )>
886              
887             set_formatter_cache_behavior => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/set_cache_behavior( $bool )>
888              
889             set_workbook_for_formatter => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/set_workbook_inst( $instance )>
890              
891             =back
892              
893             =back
894              
895             =back
896              
897             =head3 count_from_zero
898              
899             =over
900              
901             B<Definition:> Excel spreadsheets count from 1. L<Spreadsheet::ParseExcel>
902             counts from zero. This allows you to choose either way.
903              
904             B<Default> 1
905              
906             B<Range> 1 = counting from zero like Spreadsheet::ParseExcel,
907             0 = Counting from 1 like Excel
908              
909             B<attribute methods> Methods provided to adjust this attribute
910              
911             =over
912              
913             B<counting_from_zero>
914              
915             =over
916              
917             B<Definition:> a way to check the current attribute setting
918              
919             =back
920              
921             =back
922              
923             =back
924              
925             =head3 file_boundary_flags
926              
927             =over
928              
929             B<Definition:> When you request data to the right of the last column or below
930             the last row of the data this package can return 'EOR' or 'EOF' to indicate that
931             state. This is especially helpful in 'while' loops. The other option is to
932             return 'undef'. This is problematic if some cells in your table are empty which
933             also returns undef. The determination for what constitues the last column and
934             row is selected with the attributes L<empty_is_end|/empty_is_end>, L<values_only
935             |/values_only>, and L<spaces_are_empty|/spaces_are_empty>.
936              
937             B<Default> 1
938              
939             B<Range> 1 = return 'EOR' or 'EOF' flags as appropriate, 0 = return undef when
940             requesting a position that is out of bounds
941              
942             B<attribute methods> Methods provided to adjust this attribute
943              
944             =over
945              
946             B<boundary_flag_setting>
947              
948             =over
949              
950             B<Definition:> a way to check the current attribute setting
951              
952             =back
953              
954             =back
955              
956             =back
957              
958             =head3 empty_is_end
959              
960             =over
961              
962             B<Definition:> The excel convention is to read the table left to right and top
963             to bottom. Some tables have an uneven number of columns with real data from row
964             to row. This allows the several methods that excersize a 'next' function to wrap
965             after the last element with data rather than going to the max column. This also
966             can combine with the attribute L<file_boundary_flags|/file_boundary_flags> to
967             trigger 'EOR' flags after the last data element and before the sheet max column
968             when not implementing 'next' functionality. It will also return 'EOF' if the
969             remaining rows are empty even if the max row is farther on.
970              
971             B<Default> 0
972              
973             B<Range> 0 = treat all columns short of the max column for the sheet as being in
974             the table, 1 = treat all cells after the last cell with data as past the end of
975             the row. This will be most visible when
976             L<boundary flags are turned on|/boundary_flag_setting> or next functionality is
977             used in the context of the attribute L<values_only|/values_only>.
978              
979             B<attribute methods> Methods provided to adjust this attribute
980              
981             =over
982              
983             B<is_empty_the_end>
984              
985             =over
986              
987             B<Definition:> a way to check the current attribute setting
988              
989             =back
990              
991             =back
992              
993             =back
994              
995             =head3 values_only
996              
997             =over
998              
999             B<Definition:> Excel will store information about a cell even if it only contains
1000             formatting data. In many cases you only want to see cells that actually have
1001             values. This attribute will change the package behaviour regarding cells that have
1002             formatting stored against that cell but no actual value. If values in the cells
1003             exist as zero length strings or spaces only you can also set those to empty with
1004             the attribute L<spaces_are_empty|/spaces_are_empty>.
1005              
1006             B<Default> 0
1007              
1008             B<Range> 1 = return 'undef' for cells with formatting only,
1009             0 = return the result of L<empty_return_type|/empty_return_type> (or cell objects)
1010             for cells that only contain formatting.
1011              
1012             B<attribute methods> Methods provided to adjust this attribute
1013              
1014             =over
1015              
1016             B<get_values_only>
1017              
1018             =over
1019              
1020             B<Definition:> a way to check the current attribute setting
1021              
1022             =back
1023              
1024             =back
1025              
1026             =back
1027              
1028             =head3 from_the_edge
1029              
1030             =over
1031              
1032             B<Definition:> Some data tables start in the top left corner. Others do not. I
1033             don't reccomend that practice but when aquiring data in the wild it is often good
1034             to adapt. This attribute sets whether the file percieves the L<min_col
1035             |/Spreadsheet::Reader::Worksheet/min_col> and L<min_row
1036             |/Spreadsheet::Reader::Worksheet/min_col> as the top left edge of the sheeto or
1037             from the top row with data and starting from the leftmost column with data.
1038              
1039             B<Default> 1
1040              
1041             B<Range> 1 = treat the top left corner of the sheet as the beginning of rows and
1042             columns even if there is no data in the top row or leftmost column, 0 = Set the
1043             minimum row and minimum columns to be the first row and first column with data
1044              
1045             B<attribute methods> Methods provided to adjust this attribute
1046              
1047             =over
1048              
1049             B<starts_at_the_edge>
1050              
1051             =over
1052              
1053             B<Definition:> returns the attribute state
1054              
1055             =back
1056              
1057             =back
1058              
1059             =back
1060              
1061             =head3 cache_positions
1062              
1063             =over
1064              
1065             B<Definition:> Using the standard architecture this parser would go back and
1066             read the sharedStrings and styles files sequentially from the beginning each
1067             time it had to access a sub elelement. This trade-off is generally not desired
1068             for these two files since the data is generally stored in a less than sequential
1069             fasion. The solution is to cache these files as they are read the first time so
1070             that a second pass through is not necessary to retreive an earlier element. The
1071             only time this doesn't make sence is if either of the files would overwhelm RAM if
1072             cached. The package has file size break points below which the files will cache.
1073             The thinking is that above these points the RAM is at risk of being overwhelmed
1074             and that not crashing and slow is better than a possible out-of-memory state.
1075             This attribute allows you to change those break points based on the target machine
1076             you are running on. The breaks are set on the byte size of the sub file not on the
1077             cached expansion of the sub file. In general the styles file is cached into a hash
1078             and the shared strings file is cached into an array ref. The attribute
1079             L<group_return_type|/group_return_type> also affects the size of the cache for the
1080             sharedStrings file since it will not cache the string formats unless the attribute
1081             is set to 'instance'. There is also a setting for caching worksheet data. Some
1082             worksheet row position settings will always be cached in order to speed up multiple
1083             reads over the same sheet or to query meta data about the rows. However, this
1084             cache level is set lower since the row caching creates much deeper data structures.
1085              
1086             B<Default>
1087              
1088             {
1089             shared_strings_interface => 5242880,# 5 MB
1090             styles_interface => 5242880,# 5 MB
1091             worksheet_interface => 2097152,# 2 MB
1092             }
1093              
1094             B<attribute methods> Methods provided to adjust this attribute
1095              
1096             =over
1097              
1098             B<cache_positions>
1099              
1100             =over
1101              
1102             B<Definition:> returns the full attribute settings as a hashref
1103              
1104             =back
1105              
1106             B<get_cache_size( (shared_strings_interface|styles_interface|worksheet_interface) )>
1107              
1108             =over
1109              
1110             B<Definition:> return the max file size allowed to cache for the indicated interface
1111              
1112             =back
1113              
1114             B<set_cache_size( $target_interface => $max_file_size )>
1115              
1116             =over
1117              
1118             B<Definition:> set the $max_file_size in bytes to be cached for the indicated $target_interface
1119              
1120             =back
1121              
1122             B<has_cache_size( $target_interface )>
1123              
1124             =over
1125              
1126             B<Definition:> returns true if the $target_interface has a cache size set
1127              
1128             =back
1129              
1130             =back
1131              
1132             =back
1133              
1134             =head3 show_sub_file_size
1135              
1136             =over
1137              
1138             B<Definition:> Especially for zip (xlsx and xlsm) files you may not know how big the
1139             file is and want to the package to tell you what size it thinks the file is. This
1140             attribute turns on a warning statment that prints to STDERR with information on the
1141             size of potientially cached files.
1142              
1143             B<Default> 0
1144              
1145             B<Range> 0 = don't warn the file size, 1 = send the potentially cached file sizes to
1146             STDERR for review
1147              
1148             =back
1149              
1150             =head3 group_return_type
1151              
1152             =over
1153              
1154             B<Definition:> Traditionally ParseExcel returns a cell object with lots of methods
1155             to reveal information about the cell. In reality the extra information is not used very
1156             much (witness the popularity of L<Spreadsheet::XLSX>). Because many users don't need or
1157             want the extra cell formatting information it is possible to get either the raw xml value,
1158             the raw visible cell value (seen in the Excel format bar), or the formatted cell value
1159             returned either the way the Excel file specified or the L<way you specify
1160             |Spreadsheet::Reader::ExcelXML::Worksheet/custom_formats> instead of a Cell instance with
1161             all the data. All empty cells return undef no matter what.
1162              
1163             B<Default> instance
1164              
1165             B<Range> instance = returns a populated L<Spreadsheet::Reader::ExcelXML::Cell> instance,
1166             xml_value = returns the string stored in the xml file - for xml based sheets this can sometimes
1167             be different thant the visible value in the cell or formula bar. unformatted = returns just the
1168             raw visible value of the cell shown in the Excel formula bar, value = returns just the formatted
1169             value stored in the excel cell
1170              
1171             B<attribute methods> Methods provided to adjust this attribute
1172              
1173             =over
1174              
1175             B<get_group_return_type>
1176              
1177             =over
1178              
1179             B<Definition:> a way to check the current attribute setting
1180              
1181             =back
1182              
1183             =back
1184              
1185             =back
1186              
1187             =head3 empty_return_type
1188              
1189             =over
1190              
1191             B<Definition:> Traditionally L<Spreadsheet::ParseExcel> returns an empty string for cells
1192             with unique formatting but no stored value. It may be that the more accurate way of returning
1193             undef works better for you. This will turn that behaviour on.
1194              
1195             B<Default> empty_string
1196              
1197             B<Range>
1198             empty_string = populates the unformatted value with '' even if it is set to undef
1199             undef_string = if excel stores undef for an unformatted value it will return undef
1200              
1201             B<attribute methods> Methods provided to adjust this attribute
1202              
1203             =over
1204              
1205             B<get_empty_return_type>
1206              
1207             =over
1208              
1209             B<Definition:> a way to check the current attribute setting
1210              
1211             =back
1212              
1213             =back
1214              
1215             =back
1216              
1217             =head3 spread_merged_values
1218              
1219             =over
1220              
1221             B<Definition:> In Excel you visibly see the value of the primary cell in a merged range displayed
1222             in all the cells. This attribute lets the code see the primary value show in each of the merged
1223             cells. There is some mandatory caching to pull this off so it will consume more memory.
1224              
1225             B<Default> 0 (To match the Excel formula bar, VBscript, and Spreadsheet::ParseExcel)
1226              
1227             B<Range> 0 = don't spread the primary value, 1 = spread the primary value
1228              
1229             B<attribute methods> Methods provided to adjust this attribute
1230              
1231             =over
1232              
1233             B<spreading_merged_values>
1234              
1235             =over
1236              
1237             B<Definition:> a way to check the current attribute setting
1238              
1239             =back
1240              
1241             =back
1242              
1243             =back
1244              
1245             =head3 skip_hidden
1246              
1247             =over
1248              
1249             B<Definition:> Like the previous attribute this attempts to match a visual effect in Excel.
1250             Even though hidden cells still contain values you can't see them visibly. This allows
1251             you to skip hidden rows and columns (not hidden sheets). The one gotcha is Excel will
1252             place the primary value in the new primary merged cell (formula bar) if a merge range is
1253             only partially obscured to include the original primary cell. This package can't do that.
1254             Either spread the primary to all cells or none.
1255              
1256             B<Default> 0 (To match VBscript and Spreadsheet::ParseExcel)
1257              
1258             B<Range> 0 = don't skip hidden rows and columns, 1 = skip hidden rows and columns
1259              
1260             B<attribute methods> Methods provided to adjust this attribute
1261              
1262             =over
1263              
1264             B<should_skip_hidden>
1265              
1266             =over
1267              
1268             B<Definition:> a way to check the current attribute setting
1269              
1270             =back
1271              
1272             =back
1273              
1274             =back
1275              
1276             =head3 spaces_are_empty
1277              
1278             =over
1279              
1280             B<Definition:> Some auto file generators tend to add empty strings or strings with spaces to
1281             fill empty cells. There may be some visual value in this but they can slow down parsing scripts.
1282             this attribute allows the sheet to treat spaces as empty or undef instead of cells with values.
1283              
1284             B<Default> 0 (To match Excel and Spreadsheet::ParseExcel)
1285              
1286             B<Range> 0 = cells with zero length strings and spaces are considered to have 'values", 1 = There must
1287             be something other than spaces or a zero length string for the cell to have value.
1288              
1289             B<attribute methods> Methods provided to adjust this attribute
1290              
1291             =over
1292              
1293             B<are_spaces_empty>
1294              
1295             =over
1296              
1297             B<Definition:> a way to check the current attribute setting
1298              
1299             =back
1300              
1301             =back
1302              
1303             =back
1304              
1305             =head3 merge_data
1306              
1307             =over
1308              
1309             B<Definition:> For zip based worksheets the merge data is stored at the end of the file. In order for
1310             the parser to arrive at that point it has to read through the whole sheet first. For big worksheet
1311             files this is very slow. If you are willing to not know or implement cell merge information then turn
1312             this off and the sheet should load much faster.
1313              
1314             B<Default> 1 (collect merge data)
1315              
1316             B<Range> 1 = The merge data is parsed from the worksheet file when it is opened, 0 = No merge data is
1317             parsed. The effect is equal to the cell merges dissapearing.
1318              
1319             B<attribute methods> Methods provided to adjust this attribute
1320              
1321             =over
1322              
1323             B<collecting_merge_data>
1324              
1325             =over
1326              
1327             B<Definition:> a way to check the current attribute setting
1328              
1329             =back
1330              
1331             =back
1332              
1333             =back
1334              
1335             =head2 FLAGS
1336              
1337             The parameter list (Attributes) that are possible to pass to ->new is somewhat long.
1338             Therefore you may want a shortcut that aggregates some set of attribute settings that
1339             are not the defaults but wind up being boilerplate. I have provided possible
1340             alternate sets like this and am open to providing others that are suggested. The
1341             flags will have a : in front of the identifier and will be passed to the class in the
1342             'use' statement for consumption by the import method. The flags can be stacked and
1343             where there is conflict between the flag settings the rightmost passed flag setting is
1344             used. If everything in the flag but one or two settings are desirable still use the flag and
1345             then overwrite those settings when calling new.
1346              
1347             Example;
1348              
1349             use Spreadsheet::Reader::ExcelXML v0.2 qw( :alt_default :debug );
1350              
1351             =head3 :alt_default
1352              
1353             This is intended for a deep look at data and skip formatting cells.
1354              
1355             =over
1356              
1357             B<Default attribute differences>
1358              
1359             =over
1360              
1361             L<values_only|/values_only> => 1
1362              
1363             L<count_from_zero|/count_from_zero> => 0
1364              
1365             L<empty_is_end|/empty_is_end> => 1
1366              
1367             =back
1368              
1369             =back
1370              
1371             =head3 :just_the_data
1372              
1373             This is intended for a shallow look at data with value formatting implemented
1374              
1375             =over
1376              
1377             B<Default attribute differences>
1378              
1379             =over
1380              
1381             L<count_from_zero|/count_from_zero> => 0
1382              
1383             L<values_only|/values_only> => 1
1384              
1385             L<empty_is_end|/empty_is_end> => 1
1386              
1387             L<group_return_type|/group_return_type> => 'value'
1388              
1389             L<from_the_edge|/from_the_edge> => 0
1390              
1391             L<empty_return_type|/empty_return_type> => 'undef_string'
1392              
1393             L<spaces_are_empty|/spaces_are_empty> => 1
1394              
1395             L<merge_data|/merge_data> => 0
1396              
1397             L<column_formats|/column_formats> => 0
1398              
1399             =back
1400              
1401             =back
1402              
1403             =head3 :just_raw_data
1404              
1405             This is intended for a shallow look at raw text and skips all formatting including number formats.
1406              
1407             =over
1408              
1409             B<Default attribute differences>
1410              
1411             =over
1412              
1413             L<count_from_zero|/count_from_zero> => 0
1414              
1415             L<values_only|/values_only> => 1
1416              
1417             L<empty_is_end|/empty_is_end> => 1
1418              
1419             L<group_return_type|/group_return_type> => 'xml_value'
1420              
1421             L<from_the_edge|/from_the_edge> => 0,
1422              
1423             L<empty_return_type|/empty_return_type> => 'undef_string'
1424              
1425             L<spaces_are_empty|/spaces_are_empty> => 1
1426              
1427             L<merge_data|/merge_data> => 0
1428              
1429             L<column_formats|/column_formats> => 0
1430              
1431             =back
1432              
1433             =back
1434              
1435             =head3 :like_ParseExcel
1436              
1437             This is a way to force some of the other groups back to instance and count from zero
1438              
1439             =over
1440              
1441             B<Default attribute differences>
1442              
1443             =over
1444              
1445             L<count_from_zero|/count_from_zero> => 1
1446              
1447             L<group_return_type|/group_return_type> => 'instance'
1448              
1449             =back
1450              
1451             =back
1452              
1453             =head3 :debug
1454              
1455             This is a way to turn on as much reporting as possible
1456              
1457             =over
1458              
1459             B<Default attribute differences>
1460              
1461             =over
1462              
1463             L<error_inst|/error_inst> ->
1464              
1465             error_inst =>{
1466             superclasses => ['Spreadsheet::Reader::ExcelXML::Error'],
1467             package => 'ErrorInstance',
1468             should_warn => 1,
1469             }
1470              
1471             L<show_sub_file_size|/show_sub_file_size> => 1
1472              
1473             =back
1474              
1475             =back
1476              
1477             =head3 :lots_of_ram
1478              
1479             This opens the caching size allowances way up
1480              
1481             =over
1482              
1483             B<Default attribute differences>
1484              
1485             =over
1486              
1487             L<cache_positions|/cache_positions> ->
1488              
1489             cache_positions =>{
1490             shared_strings_interface => 209715200,# 200 MB
1491             styles_interface => 209715200,# 200 MB
1492             worksheet_interface => 209715200,# 200 MB
1493             },
1494              
1495             =back
1496              
1497             =back
1498              
1499             =head3 :less_ram
1500              
1501             This tightens caching size allowances way down
1502              
1503             =over
1504              
1505             B<Default attribute differences>
1506              
1507             =over
1508              
1509             L<cache_positions|/cache_positions> ->
1510              
1511             cache_positions =>{
1512             shared_strings_interface => 10240,# 10 KB
1513             styles_interface => 10240,# 10 KB
1514             worksheet_interface => 1024,# 1 KB
1515             },
1516              
1517             =back
1518              
1519             =back
1520              
1521             =head2 Secondary Methods
1522              
1523             These are additional ways to use this class. They can be used to open an .xlsx workbook.
1524             They are also ways to investigate information at the workbook level. For information on
1525             how to retrieve data from the worksheets see the
1526             L<Worksheet|Spreadsheet::Reader::ExcelXML::Worksheet> and
1527             L<Cell|Spreadsheet::Reader::ExcelXML::Cell> documentation. For additional workbook
1528             options see the L<Secondary Methods|/Secondary Methods>
1529             and the L<Attributes|/Attributes> sections. The attributes section specifically contains
1530             all the methods used to adjust the attributes of this class.
1531              
1532             All methods are object methods and should be implemented on the object instance.
1533              
1534             B<Example:>
1535              
1536             my @worksheet_array = $workbook_instance->worksheets;
1537              
1538             =head3 get_epoch_year
1539              
1540             =over
1541              
1542             B<Definition:> This returns the epoch year defined by the Excel workbook. The epoch year
1543             affects the way dates are processed in the formatter L<Spreadsheet::Reader::Format>
1544              
1545             B<Accepts:> nothing
1546              
1547             B<Returns:> 1900 = Windows Excel or 1904 = Apple Excel
1548              
1549             =back
1550              
1551             =head3 has_epoch_year
1552              
1553             =over
1554              
1555             B<Definition:> This indicates if an epoch_year has been determined for the workbook (yet)
1556              
1557             B<Accepts:> nothing
1558              
1559             B<Returns:> 1 = yes there is one, 0 = nothing (yet)
1560              
1561             =back
1562              
1563             =head3 get_sheet_name( $Int )
1564              
1565             =over
1566              
1567             B<Definition:> This method returns the sheet name for a given physical position
1568             in the workbook from left to right. It counts from zero even if the workbook is in
1569             'count_from_one' mode. B(It will return chart names but chart tab names cannot currently
1570             be converted to worksheets). You may actually want L<worksheet_name|worksheet_name( $Int )>
1571             instead of this function.
1572              
1573             B<Accepts:> integers
1574              
1575             B<Returns:> the sheet name (both worksheet and chartsheet )
1576              
1577             =back
1578              
1579             =head3 sheet_count
1580              
1581             =over
1582              
1583             B<Definition:> This returns the total number of recorded sheets
1584              
1585             B<Accepts:> nothing
1586              
1587             B<Returns:> $total - a count of all sheets (including chartsheets and worksheets)
1588              
1589             =back
1590              
1591             =head3 get_sheet_info( $name )
1592              
1593             =over
1594              
1595             B<Definition:> This returns any stored metadata about the sheet in a hashref
1596              
1597             B<Accepts:> $name
1598              
1599             B<Returns:> a hashref of sheet metadata ( a pretty thin list still )
1600              
1601             =back
1602              
1603             =head3 get_rel_info( $relId )
1604              
1605             =over
1606              
1607             B<Definition:> This returns the sheet name for the $relId
1608              
1609             B<Accepts:> $relId ex; 'rId5'
1610              
1611             B<Returns:> The sheet $name associated with that relId
1612              
1613             =back
1614              
1615             =head3 get_id_info( $Id )
1616              
1617             =over
1618              
1619             B<Definition:> This returns the sheet name for the $Id
1620              
1621             B<Accepts:> $Id (an integer) ex; '2'
1622              
1623             B<Returns:> The sheet $name associated with that Id
1624              
1625             =back
1626              
1627             =head3 get_chartsheet_names
1628              
1629             =over
1630              
1631             B<Definition:> This method returns an array ref of all the chartsheet names (tabs) in the
1632             workbook in order. (No worksheets.)
1633              
1634             B<Accepts:> nothing
1635              
1636             B<Returns:> an array ref of strings
1637              
1638             =back
1639              
1640             =head3 chartsheet_name( $position )
1641              
1642             =over
1643              
1644             B<Definition:> This returns the name of the chartsheet in that $position. (counting from zero)
1645             interspersed worksheets in the file are not considered to hold a position by this accounting.
1646              
1647             B<Accepts:> $position (an integer)
1648              
1649             B<Returns:> the chartsheet name
1650              
1651             =back
1652              
1653             =head3 chartsheet_count
1654              
1655             =over
1656              
1657             B<Definition:> This returns the total number of recorded chartsheets
1658              
1659             B<Accepts:> nothing
1660              
1661             B<Returns:> $total - a count of all chartsheets (only)
1662              
1663             =back
1664              
1665             =head3 creator
1666              
1667             =over
1668              
1669             B<Definition:> Returns the recorded creator of the file from the parsed metadata
1670              
1671             B<Accepts:> nothing
1672              
1673             B<Returns:> a string
1674              
1675             =back
1676              
1677             =head3 modified_by
1678              
1679             =over
1680              
1681             B<Definition:> Returns the recorded last entity to modify the file from the parsed metadata
1682              
1683             B<Accepts:> nothing
1684              
1685             B<Returns:> a string
1686              
1687             =back
1688              
1689             =head3 date_created
1690              
1691             =over
1692              
1693             B<Definition:> Returns the date that Excel recorded for the file creation
1694              
1695             B<Accepts:> nothing
1696              
1697             B<Returns:> a string (YYYY-MM-DD)
1698              
1699             =back
1700              
1701             =head3 date_modified
1702              
1703             =over
1704              
1705             B<Definition:> Returns the date that Excel recorded for the last file modification
1706              
1707             B<Accepts:> nothing
1708              
1709             B<Returns:> a string (YYYY-MM-DD)
1710              
1711             =back
1712              
1713             =head3 in_the_list
1714              
1715             =over
1716              
1717             B<Definition:> This is a predicate method that indicates if the 'next'
1718             L<worksheet|/worksheet( $name )> function has been implemented at least once.
1719              
1720             B<Accepts:>nothing
1721              
1722             B<Returns:> true = 1, false = 0
1723              
1724             =back
1725              
1726             =head3 start_at_the_beginning
1727              
1728             =over
1729              
1730             B<Definition:> This restarts the 'next' worksheet at the first worksheet. This
1731             method is only useful in the context of the L<worksheet|/worksheet( $name )>
1732             function.
1733              
1734             B<Accepts:> nothing
1735              
1736             B<Returns:> nothing
1737              
1738             =back
1739              
1740             =head2 Architecture Choices
1741              
1742             This is yet another package for parsing Excel xml or 2007+ (and 2003+ xml) workbooks.
1743             There are two other options for 2007+ XLSX parsing (but not 2003 xml parsing) on CPAN.
1744             (L<Spreadsheet::ParseXLSX> and L<Spreadsheet::XLSX>) In general if either of them
1745             already work for you without issue then there is probably no compelling reason to
1746             switch to this package. However, the goals of this package which may provide
1747             differentiation are five fold. First, as close as possible produce the same output as
1748             is visible in an excel spreadsheet with exposure to underlying settings from Excel.
1749             Second, adhere as close as is reasonable to the L<Spreadsheet::ParseExcel> API (where
1750             it doesn't conflict with the first objective) so that less work would be needed to
1751             integrate ParseExcel and this package. An addendum to the second goal is this package
1752             will not expose elements of the object hash for use by the consuming program. This
1753             package will either return an unblessed hash with the equivalent elements to the
1754             Spreadsheet::ParseExcel output (instead of a class instance) or it will provide methods
1755             to provide these sets of data. The third goal is to read the excel files in a 'just in
1756             time' manner without storing all the data in memory. The intent is to minimize the
1757             footprint of large file reads. Initially I did this using L<XML::LibXML> but it
1758             eventually L<proved to not play well|http://www.perlmonks.org/?node_id=1151609> with
1759             Moose ( or perl? ) garbage collection so this package uses a pure perl xml parser.
1760             In general this means that design decisions will generally sacrifice speed to keep RAM
1761             consumption low. Since the data in the sheet is parsed just in time the information that
1762             is not contained in the primary meta-data headers will not be available for review L<until
1763             the sheet parses to that point|Spreadsheet::Reader::ExcelXML::Worksheet/max_row>. In
1764             cases where the parser has made choices that prioritize speed over RAM savings there will
1765             generally be an L<attribute available to turn that decision off|/set_cache_behavior>.
1766             Fourth, Excel files get abused in the wild. In general the Microsoft (TM) Excel
1767             application handles these mangled files gracefully. The goal is to be able to read any
1768             xml based spreadsheet Excel can read from the supported extention list. Finally, this
1769             parser supports the Excel 2003 xml format. All in all this package solves many of the
1770             issues I found parsing Excel in the wild. I hope it solves some of yours as well.
1771              
1772             =head2 Warnings
1773              
1774             B<1.>This package uses L<Archive::Zip>. Not all versions of Archive::Zip work for everyone.
1775             I have tested this with Archive::Zip 1.30. Please let me know if this does not work with a
1776             sucessfully installed (read passed the full test suit) version of Archive::Zip newer than that.
1777              
1778             B<2.> Not all workbook sheets (tabs) are created equal! Some Excel sheet tabs are only a
1779             chart. These tabs are 'chartsheets'. The methods with 'worksheet' in the name only act on
1780             the sub set of tabs that are worksheets. Future methods with 'chartsheet' in the name will
1781             focus on the subset of sheets that are chartsheets. Methods with just 'sheet' in the name
1782             have the potential to act on both. The documentation for the chartsheet level class is
1783             found in L<Spreadsheet::Reader::ExcelXML::Chartsheet> (still under construction).
1784             All chartsheet classes do not provide access to cells.
1785              
1786             B<3.> This package supports reading xlsm files (Macro enabled Excel 2007+ workbooks).
1787             xlsm files allow for binaries to be embedded that may contain malicious code. However,
1788             other than unzipping the excel file no work is done by this package with the sub-file
1789             'vbaProject.bin' containing the binaries. This package does not provide an API to that
1790             sub-file and I have no intention of doing so. Therefore my research indicates there should
1791             be no risk of virus activation while parsing even an infected xlsm file with this package
1792             but I encourage you to use your own judgement in this area. B<L<caveat utilitor!
1793             |https://en.wiktionary.org/wiki/Appendix:List_of_Latin_phrases>>
1794              
1795             B<4.> This package will read some files with 'broken' xml. In general this should be
1796             transparent but in the case of the maximum row value and the maximum column value for a
1797             worksheet it can cause some surprising problems. This includes the possibility that the
1798             maximum values are initially stored as 'undef' if the sheet does not provide them in the
1799             metadata as expected. I<These values are generally never available in Excel 2003 xml
1800             files.> The answer to the methods L<Spreadsheet::Reader::ExcelXML::Worksheet/row_range>
1801             and L<Spreadsheet::Reader::ExcelXML::Worksheet/col_range> will then change as more
1802             of the sheet is parsed. You can use the attribute L<file_boundary_flags
1803             |/file_boundary_flags> or the methods L<Spreadsheet::Reader::ExcelXML::Worksheet/get_next_value>
1804             or L<Spreadsheet::Reader::ExcelXML::Worksheet/fetchrow_arrayref> as alternates to
1805             pre-testing for boundaries when iterating. The primary cause of these broken XML
1806             elements in Excel 2007+ files are non-XML applications writing to or editing the
1807             underlying xml. If you have an example of other broken xml files readable by the
1808             Excel application that are not parsable by this package please L<submit them
1809             |https://github.com/jandrew/Spreadsheet-XLSX-Reader-LibXML/issues> to my github repo
1810             so I can work to improve this package. If you don't want your test case included
1811             with the distribution I will use it to improve the package without publishing it.
1812              
1813             B<5.> I reserve the right to tweak the sub file L<caching breakpoints|/cache_positions>
1814             over the next few releases. The goal is to have a default that appears to be the
1815             best compromise by 2017-1-1.
1816              
1817             B<6.> This package provides support for L<SpreadsheetML
1818             |https://odieweblog.wordpress.com/2012/02/12/how-to-read-and-write-office-2003-excel-xml-files/>
1819             (Excel 2003) .xml extention documents. These files should include the header;
1820              
1821             <?mso-application progid="Excel.Sheet"?>
1822              
1823             to indicate their intended format. Please L<submit|/SUPPORT> any cases that
1824             appear to behave differently than expected for .xml extention files that are
1825             readable by the Excel application. I am also interested in cases where an out of
1826             memory error occurs with an .xml extension file. This warning will stay till
1827             2017-1-1.
1828              
1829             B<7.> This package uses two classes at the top to L<handle cleanup for some self
1830             referential|http://perldoc.perl.org/5.8.9/perlobj.html#Two-Phased-Garbage-Collection>
1831             object organization that I use. As a result the action taken on this package is
1832             (mostly) implemented in L<Spreadsheet::Reader::ExcelXML::Workbook> code. I documented
1833             most of that code API here. If you want to look at the raw code go there.
1834              
1835             =head1 BUILD / INSTALL from Source
1836              
1837             B<0.> Using L<cpanm|https://metacpan.org/pod/App::cpanminus> is much easier
1838             than a source build!
1839              
1840             cpanm Spreadsheet::Reader::ExcelXML
1841              
1842             And then if you feel kindly L<App::cpanminus::reporter>
1843              
1844             cpanm-reporter
1845              
1846             B<1.> Download a compressed file with this package code from your favorite source
1847              
1848             =over
1849              
1850             L<github|https://github.com/jandrew/p5-spreadsheet-reader-excelxml>
1851              
1852             L<Meta::CPAN|https://metacpan.org/pod/Spreadsheet::Reader::ExcelXML>
1853              
1854             L<CPAN|http://search.cpan.org/~jandrew/Spreadsheet-Reader-ExcelXML/>
1855              
1856             =back
1857              
1858             B<2.> Extract the code from the compressed file.
1859              
1860             =over
1861              
1862             If you are using tar on a .tar.gz file this should work:
1863              
1864             tar -zxvf Spreadsheet-Reader-ExcelXML-v0.xx.tar.gz
1865              
1866             =back
1867              
1868             B<3.> Change (cd) into the extracted directory
1869              
1870             B<4.> Run the following
1871              
1872             =over
1873              
1874             (for Windows find what version of make was used to compile your perl)
1875              
1876             perl -V:make
1877              
1878             (then for Windows substitute the correct make function (s/make/dmake/g)? below)
1879              
1880             =back
1881              
1882             perl Makefile.PL
1883              
1884             make
1885              
1886             make test
1887              
1888             make install # As sudo/root
1889              
1890             make clean
1891              
1892             =head1 SUPPORT
1893              
1894             =over
1895              
1896             L<github Spreadsheet::Reader::Format/issues
1897             |https://github.com/jandrew/p5-spreadsheet-reader-excelxml/issues>
1898              
1899             =back
1900              
1901             =head1 TODO
1902              
1903             =over
1904              
1905             B<1.> Write a chartsheet parser and functions
1906              
1907             B<2.> Add a pivot table reader (Not just read the values from the sheet)
1908              
1909             B<3.> Add calc chain methods
1910              
1911             B<4.> Add more exposure to workbook/worksheet formatting values
1912              
1913             =back
1914              
1915             =head1 AUTHOR
1916              
1917             =over
1918              
1919             Jed Lund
1920              
1921             jandrew@cpan.org
1922              
1923             =back
1924              
1925             =head1 CONTRIBUTORS
1926              
1927             This is the (likely incomplete) list of people who have helped
1928             make this distribution what it is, either via code contributions,
1929             patches, bug reports, help with troubleshooting, etc. A huge
1930             'thank you' to all of them. Most were contributors to
1931             L<Spreadsheet::XLSX::Reader::LibXML> but the contributions have
1932             (hopefully) not been lost.
1933              
1934             =over
1935              
1936             L<Frank Maas|https://github.com/Frank071>
1937              
1938             L<Stuart Watt|https://github.com/morungos>
1939              
1940             L<Toby Inkster|https://github.com/tobyink>
1941              
1942             L<Breno G. de Oliveira|https://github.com/garu>
1943              
1944             L<Bill Baker|https://github.com/wdbaker54>
1945              
1946             L<H.Merijin Brand|https://github.com/Tux>
1947              
1948             L<Todd Eigenschink|mailto:todd@xymmetrix.com>
1949              
1950             L<Slaven Rezić|https://metacpan.org/author/SREZIC>
1951              
1952             =back
1953              
1954             =head1 COPYRIGHT
1955              
1956             This program is free software; you can redistribute
1957             it and/or modify it under the same terms as Perl itself.
1958              
1959             The full text of the license can be found in the
1960             LICENSE file included with this module.
1961              
1962             This software is copyrighted (c) 2016, 2017 by Jed Lund
1963              
1964             =head1 DEPENDENCIES
1965              
1966             =over
1967              
1968             L<perl 5.010|perl/5.10.0>
1969              
1970             L<Archive::Zip>
1971              
1972             L<Carp>
1973              
1974             L<Clone>
1975              
1976             L<DateTime::Format::Flexible>
1977              
1978             L<DateTimeX::Format::Excel>
1979              
1980             L<IO::File>
1981              
1982             L<Moose> - 2.1213
1983              
1984             L<MooseX::HasDefaults::RO>
1985              
1986             L<MooseX::ShortCut::BuildInstance> - 1.032
1987              
1988             L<MooseX::StrictConstructor>
1989              
1990             L<Type::Tiny> - 1.000
1991              
1992             L<version> - 0.077
1993              
1994             =back
1995              
1996             =head1 SEE ALSO
1997              
1998             =over
1999              
2000             L<Spreadsheet::Read> - generic Spreadsheet reader
2001              
2002             L<Spreadsheet::ParseExcel> - Excel binary files from 2003 and earlier
2003              
2004             L<Spreadsheet::ParseXLSX> - Excel version 2007 and later
2005              
2006             L<Spreadsheet::XLSX> - Excel version 2007 and later (Very rough)
2007              
2008             L<Log::Shiras|https://github.com/jandrew/Log-Shiras>
2009              
2010             =over
2011              
2012             All lines in this package that use Log::Shiras are commented out
2013              
2014             =back
2015              
2016             =back
2017              
2018             =begin html
2019              
2020             <a href="http://www.perlmonks.org/?node_id=706986">
2021             <img src="http://www.perlmonksflair.com/jandrew.jpg" alt="perl monks">
2022             </a>
2023              
2024             =end html
2025              
2026             =cut
2027              
2028             #########1#########2 main pod documentation end 5#########6#########7#########8#########9