File Coverage

lib/Spreadsheet/Reader/ExcelXML/XMLReader/NamedStyles.pm
Criterion Covered Total %
statement 81 88 92.0
branch 34 42 80.9
condition n/a
subroutine 12 12 100.0
pod 3 3 100.0
total 130 145 89.6


line stmt bran cond sub pod time code
1             package Spreadsheet::Reader::ExcelXML::XMLReader::NamedStyles;
2             our $AUTHORITY = 'cpan:JANDREW';
3 4     4   15814 use version; our $VERSION = version->declare('v0.16.8');
  4         6  
  4         32  
4             ###LogSD warn "You uncovered internal logging statements for Spreadsheet::Reader::ExcelXML::XMLReader::NamedStyles-$VERSION";
5              
6 4     4   498 use 5.010;
  4         11  
7 4     4   21 use Moose::Role;
  4         4  
  4         36  
8             requires qw(
9             get_defined_conversion start_the_file_over close_the_file
10             advance_element_position parse_element set_defined_excel_formats
11             current_named_node set_error squash_node
12             parse_excel_format_string good_load
13             );
14 4     4   15300 use Types::Standard qw( Bool HashRef );
  4         10  
  4         73  
15 4     4   2549 use Carp qw( confess );
  4         6  
  4         217  
16 4     4   15 use Clone qw( clone );
  4         9  
  4         2816  
17              
18             #########1 Dispatch Tables & Package Variables 5#########6#########7#########8#########9
19              
20             my $cell_attributes ={
21             Font => 'cell_font',
22             Borders => 'cell_border',
23             Interior => 'cell_fill',
24             'ss:Name' => 'cell_style',
25             NumberFormat => 'cell_coercion',
26             Alignment => 'cell_alignment',
27             };
28              
29             my $xml_from_cell ={
30             cell_font => 'fontId',
31             cell_border => 'borderId',
32             cell_fill => 'fillId',
33             cell_style => 'xfId',
34             cell_coercion => 'numFmtId',
35             cell_alignment => 'alignment',
36             };
37              
38             my $date_keys ={# SpreadsheetML to Excel 2007+ custom date formats
39             'Short Date' => 'yyyy-mm-dd',
40             'Fixed' => '0.00',
41             'ShortDate' => 'yyyy-mm-dd',
42             };
43              
44             #########1 Public Attributes 3#########4#########5#########6#########7#########8#########9
45              
46             has cache_positions =>(
47             isa => Bool,
48             reader => 'should_cache_positions',
49             default => 1,
50             );
51              
52             #########1 Public Methods 3#########4#########5#########6#########7#########8#########9
53              
54             sub get_format{
55 79     79 1 5664 my( $self, $name, $header, $exclude_header ) = @_;
56 79 100       136 my $xml_target_header = $header ? $header : '';#$xml_from_cell->{$header}
57 79 50       131 my $xml_exclude_header = $exclude_header ? $exclude_header : '';
58             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
59             ###LogSD $self->get_all_space . '::get_format', );
60             ###LogSD $phone->talk( level => 'info', message => [
61             ###LogSD "Get defined formats named: $name",
62             ###LogSD ( $header ? "Returning only the values for header: $header - $xml_target_header" : '' ),
63             ###LogSD ( $exclude_header ? "..excluding the values for header: $exclude_header - $xml_exclude_header" : '' ) , ] );
64              
65              
66 79         62 my $target_ref;
67 79         82 my $found_it = 0;
68 79 100       1931 if( $self->_has_styles ){# Check for stored value - when caching implemented
69 75 50       1923 if( !$self->_has_s_name( $name ) ){
70 0         0 $self->set_error( "Style named |$name| is not recorded!" );
71 0         0 return undef;
72             }
73 75         1857 $target_ref = clone( $self->_get_s_name( $name ) );
74             ###LogSD $phone->talk( level => 'debug', message => [
75             ###LogSD "The complete cached style is:", $target_ref, ] );
76 75         460 $found_it = 1;
77             }else{# pull the value the long (hard and slow) way
78 4         8 my $x = 0;
79 4         24 $self->start_the_file_over;
80 4         12 while( !$found_it ){
81             ###LogSD $phone->talk( level => 'debug', message => [
82             ###LogSD "Looking for a 'Style' node on pass: $x", ] );
83 18         66 my ( $result, $node_name, $node_level, $result_ref ) =
84             $self->advance_element_position( 'Style' );
85             ###LogSD $phone->talk( level => 'trace', message =>[
86             ###LogSD "After search for header 'Style' arrived at node -$node_name- with result: $result" ] );
87 18 50       37 last if !$result;
88 18         44 my $xml_ref = $self->parse_element;
89             ###LogSD $phone->talk( level => 'debug', message => [
90             ###LogSD "Parsing the node to find the name", $xml_ref ] );
91 18         52 my $perlized_ref = $self->squash_node( $xml_ref );
92             ###LogSD $phone->talk( level => 'trace', message => [
93             ###LogSD "After squashing:", $perlized_ref ] );
94 18         39 ( my $element_name, $target_ref ) = $self->_transform_element( $perlized_ref );
95 18 100       83 if( $element_name eq $name ){
96             ###LogSD $phone->talk( level => 'trace', message => [
97             ###LogSD "Found requested node named: $element_name" ] );
98 4         6 $found_it = 1;
99 4         12 last;
100             }
101             }
102             }
103              
104             # Restrict the return value based on passed parameters
105 79 50       143 if( $found_it ){
106             ###LogSD $phone->talk( level => 'debug', message => [
107             ###LogSD "The style named -$name- is:", $target_ref ] );
108 79 100       127 if( $header ){
    50          
109 77 100       217 $target_ref = $target_ref->{$header} ? { $header => $target_ref->{$header} } : undef;
110             ###LogSD $phone->talk( level => 'debug', message => [
111             ###LogSD "The style with target header -$header- only is:", $target_ref ] );
112             }elsif( $exclude_header ){
113 0         0 delete $target_ref->{$exclude_header};
114             ###LogSD $phone->talk( level => 'debug', message => [
115             ###LogSD "The style with exclude header -$exclude_header- removed is:", $target_ref ] );
116             }
117             }
118 79         429 return $target_ref;
119             }
120              
121             sub get_default_format{#### Re-use the get_format method for this since we are using names
122 2     2 1 1621 my( $self, $header, $exclude_header ) = @_;
123             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
124             ###LogSD $self->get_all_space . '::get_default_format', );
125 2         4 my $name = 'Default';
126             ###LogSD $phone->talk( level => 'info', message =>[ "Get the 'Default' format" ] );
127 2         8 return $self->get_format( $name, $header, $exclude_header );
128             }
129              
130             sub load_unique_bits{
131 6     6 1 12 my( $self, ) = @_;
132             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
133             ###LogSD $self->get_all_space . '::load_unique_bits', );
134              
135             # Check for empty node and react
136 6         12 my( $result, $node_name, $node_level, $result_ref );
137 6         33 my $current_node = $self->current_node_parsed;
138             ###LogSD $phone->talk( level => 'trace', message =>[
139             ###LogSD "The current node is:", $current_node ] );
140 6 50       27 if( (keys %$current_node)[0] eq 'Styles' ){
141             ###LogSD $phone->talk( level => 'trace', message =>[
142             ###LogSD "Found the Styles node" ] );
143 6         7 $result = 2;
144 6         12 $node_name = 'Styles';
145             }else{
146 0         0 ( $result, $node_name, $node_level, $result_ref ) =
147             $self->advance_element_position( 'Styles' );
148             ###LogSD $phone->talk( level => 'trace', message =>[
149             ###LogSD "After search for header 'Style' arrived at node -$node_name- with result: $result" ] );
150             }
151 6 50       18 if( $result ){
152             ###LogSD $phone->talk( level => 'debug', message => [
153             ###LogSD "Found a Styles node" ] );
154 6         173 $self->good_load( 1 );
155             }else{
156 0         0 $self->set_error( "No 'Styles' element with content found - can't parse this as a styles file" );
157 0         0 return undef;
158             }
159              
160             # Cache nodes as needed (No standard number formats to record)
161 6         11 my ( $success, $top_level_ref );
162 6 100       151 if( $self->should_cache_positions ){
163 5         26 $top_level_ref = $self->parse_element;
164             ###LogSD $phone->talk( level => 'trace', message => [
165             ###LogSD "Parsing the whole thing for caching", $top_level_ref ] );
166 5         35 $self->close_the_file;# Don't need the file open any more!
167              
168 5         21 $top_level_ref = $self->squash_node( $top_level_ref );#, 'numFmts'
169             ###LogSD $phone->talk( level => 'trace', message => [
170             ###LogSD "Squashed ref:", $top_level_ref ] );
171              
172             # Handle the single style case
173 5 50       19 if( exists $top_level_ref->{Style} ){
174             ###LogSD $phone->talk( level => 'debug', message => [
175             ###LogSD "Converting a single style node" ] );
176 0         0 $self->_add_style_element( $top_level_ref->{Style} );
177             }else{
178             ###LogSD $phone->talk( level => 'debug', message => [
179             ###LogSD "Converting a list of style nodes" ] );
180 5         9 for my $style ( @{$top_level_ref->{list}} ){
  5         16  
181             ###LogSD $phone->talk( level => 'debug', message => [
182             ###LogSD "Loading:", $style ] );
183 40         61 $self->_add_style_element( $style );
184             }
185             }
186             ###LogSD $phone->talk( level => 'trace', message => [
187             ###LogSD "Final style ref:", $self->_get_all_cache ] );
188             }
189 6         54 return 1;
190             }
191              
192             #########1 Private Attributes 3#########4#########5#########6#########7#########8#########9
193              
194             has _styles =>(
195             isa => HashRef,
196             traits => ['Hash'],
197             handles =>{
198             _get_s_name => 'get',
199             _set_s_name => 'set',
200             _has_s_name => 'exists',
201             },
202             reader => '_get_all_cache',
203             predicate => '_has_styles'
204             );
205              
206             #########1 Private Methods 3#########4#########5#########6#########7#########8#########9
207              
208             sub _add_style_element{
209 40     40   38 my( $self, $sub_element ) = @_;
210             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
211             ###LogSD $self->get_all_space . '::_load_unique_bits_add_style_element', );
212             ###LogSD $phone->talk( level => 'trace', message =>[
213             ###LogSD "Adding sub element:", $sub_element, "To style ref:", $self->_get_all_cache ] );
214 40         66 ( my $element_name, $sub_element ) = $self->_transform_element( $sub_element );
215 40         1017 $self->_set_s_name( $element_name => $sub_element );
216             ###LogSD $phone->talk( level => 'trace', message => [
217             ###LogSD "Updated style named -$element_name- ref:", $sub_element ] );
218             }
219              
220             sub _transform_element{
221 58     58   51 my( $self, $sub_element ) = @_;
222             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
223             ###LogSD $self->get_all_space . '::_transform_element', );
224             ###LogSD $phone->talk( level => 'trace', message =>[
225             ###LogSD "Transforming sub element:", $sub_element, ] );
226 58         41 my ( $element_name, $new_sub );
227 58         116 for my $key ( keys %$sub_element ){
228             ###LogSD $phone->talk( level => 'debug', message => [
229             ###LogSD "Processing sub element key: $key" ] );
230 168 100       8716 if( $key =~ /\:ID/ ){
    100          
231             ###LogSD $phone->talk( level => 'debug', message => [
232             ###LogSD "Found the style name: $sub_element->{$key}" ] );
233 58         85 $element_name = $sub_element->{$key};
234             }elsif( exists $cell_attributes->{$key} ){
235             ###LogSD $phone->talk( level => 'debug', message => [
236             ###LogSD "Key matches cell attribute: $cell_attributes->{$key}" ] );
237 102 100       154 if( $key =~ /NumberFormat/ ){
238             ###LogSD $phone->talk( level => 'debug', message => [
239             ###LogSD "Found a number format: ", $sub_element->{$key} ] );
240             my $replaced_string =
241             !defined $sub_element->{$key} ? undef :
242             exists( $date_keys->{$sub_element->{$key}->{'ss:Format'}} ) ?
243             $date_keys->{$sub_element->{$key}->{'ss:Format'}} :
244 46 100       132 $sub_element->{$key}->{'ss:Format'} ;
    100          
245             ###LogSD $phone->talk( level => 'debug', message => [
246             ###LogSD "Replace string result: $replaced_string", ] );
247 46 100       74 if( $replaced_string ){
248 27         79 $replaced_string =~ /^(\[[A-Z]{3}\])?(.*)/;######### Take this back out when localization is implemented
249 27         42 $replaced_string = $2;
250             }
251             ###LogSD $phone->talk( level => 'debug', message => [
252             ###LogSD "Processing the date format string: " . ($replaced_string//'undef') ] );
253 46 100       173 $new_sub->{$cell_attributes->{$key}} =
254             defined( $replaced_string ) ?
255             $self->parse_excel_format_string( $replaced_string ) : undef;
256             ###LogSD $phone->talk( level => 'debug', message => [
257             ###LogSD "Finished the excel format parsing" ] );
258             }else{
259 56         119 $new_sub->{$cell_attributes->{$key}} = $sub_element->{$key};
260             }
261             ###LogSD $phone->talk( level => 'debug', message => [
262             ###LogSD "Updated new sub:", $new_sub ] );
263             }
264             }
265             ###LogSD $phone->talk( level => 'debug', message =>[
266             ###LogSD "Returning element name: $element_name" ] );
267             ###LogSD $phone->talk( level => 'trace', message =>[ "..with ref:", $new_sub ] );
268 58         9994 return( $element_name, $new_sub );
269             }
270              
271             #########1 Phinish 3#########4#########5#########6#########7#########8#########9
272              
273 4     4   21 no Moose::Role;
  4         7  
  4         21  
274              
275             1;
276              
277             #########1 Documentation 3#########4#########5#########6#########7#########8#########9
278             __END__
279              
280             =head1 NAME
281              
282             Spreadsheet::Reader::ExcelXML::XMLReader::NamedStyles - Support for Excel 2003 XML Styles files
283              
284             =head1 SYNOPSYS
285              
286             !!!! Example code - will not run standalone !!!!
287              
288             use MooseX::ShortCut::BuildInstance qw( build_instance );
289             use Spreadsheet::Reader::ExcelXML::XMLReader::NamedStyles;
290             use Spreadsheet::Reader::ExcelXML::XMLReader;
291             my $test_instance = build_instance(
292             package => 'StylesInterface',
293             superclasses => ['Spreadsheet::Reader::ExcelXML::XMLReader'],
294             add_roles_in_sequence => [
295             'Spreadsheet::Reader::ExcelXML::XMLReader::PositionStyles',
296             ],
297             file => ! Styles file handle with extracted content !
298             workbook_inst => $workbook_instance,<--- Built elswhere!!!
299             );
300              
301             =head1 DESCRIPTION
302              
303             This role is written to provide the methods 'get_format' and 'get_default_format' for
304             the styles file reading where the styles file elements are called out by name. This
305             generally implies that the styles section was a node in a flat xml file written to the
306             Microsoft (TM) Excel 2003 xml format. The extration should be accomplished external
307             to this instance creation usually with L<Spreadsheet::Reader::ExcelXML::XMLReader/extract_file>.
308              
309             =head2 Requires
310              
311             These are the methods required by this role and their default provider. All
312             methods are imported straight across with no re-naming.
313              
314             =over
315              
316             L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/get_defined_conversion( $position )>
317              
318             l<Spreadsheet::Reader::Format::ParseExcelFormatStrings/parse_excel_format_string( $string, $name )>
319              
320             L<Spreadsheet::Reader::Format::FmtDefault/set_defined_excel_formats( %args )>
321              
322             L<Spreadsheet::Reader::ExcelXML::XMLReader/good_load>
323              
324             L<Spreadsheet::Reader::ExcelXML::XMLReader/start_the_file_over>
325              
326             L<Spreadsheet::Reader::ExcelXML::XMLReader/close_the_file>
327              
328             L<Spreadsheet::Reader::ExcelXML::XMLReader/advance_element_position( $element, [$iterations] )>
329              
330             L<Spreadsheet::Reader::ExcelXML::XMLReader/parse_element>
331              
332             L<Spreadsheet::Reader::ExcelXML::XMLReader/current_named_node>
333              
334             L<Spreadsheet::Reader::ExcelXML::XMLReader/squash_node( $node )>
335              
336             L<Spreadsheet::Reader::ExcelXML::Error/set_error( $error_string )>
337              
338             =back
339              
340             =head2 Method(s)
341              
342             These are the methods mandated by this interface.
343              
344             =head3 get_format( $name, [$header], [$exclude_header] )
345              
346             =over
347              
348             B<Definition:> This will return the styles information from the identified $name in the
349             style node. The target name is usually drawn from the cell data stored in the worksheet.
350             The information is returned as a perl hash ref. Since the styles data is in two tiers it
351             finds all the subtier information for each indicated piece and appends them to the hash
352             ref as values for each type key.
353              
354             B<Accepts position 0:> $name = a (sub) node name indicating which styles node should be
355             returned
356              
357             B<Accepts position 1:> $header = the target header key (use the
358             L<Spreadsheet::Reader::ExcelXML::Cell/Attributes> that are cell formats as the definition
359             of range for this.) It will cause only this header subset to be returned
360              
361             B<Accepts position 2:> $exclude_header = the target header key (use the
362             L<Spreadsheet::Reader::ExcelXML::Cell/Attributes> that are cell formats as the definition
363             of range for this.) It will exclude the header from the returned data set.
364              
365             B<Returns:> a hash ref of data
366              
367             =back
368              
369             =head3 get_default_format( [$header], [$exclude_header] )
370              
371             =over
372              
373             B<Definition:> For any cell that does not have a unquely identified format excel generally
374             stores a default format for the remainder of the sheet. This will return the two
375             tiered default styles information. The information is returned in the same format as the
376             get_format method.
377              
378             B<Accepts position 0:> $header = the target header key (use the
379             L<Spreadsheet::Reader::ExcelXML::Cell/Attributes> that are cell formats as the definition
380             of range for this.) It will cause only this header subset to be returned
381              
382             B<Accepts position 1:> $exclude_header = the target header key (optional at position 2) (use the
383             L<Spreadsheet::Reader::ExcelXML::Cell/Attributes> that are cell formats as the definition
384             of range for this.) It will exclude the header from the returned data set.
385              
386             B<Returns:> a hash ref of data
387              
388             =back
389              
390             =head3 load_unique_bits
391              
392             =over
393              
394             B<Definition:> When the xml file first loads this is available to pull customized data.
395             It mostly pulls metadata and stores it in hidden attributes for use later. If all goes
396             according to plan it sets L<Spreadsheet::Reader::ExcelXML::XMLReader/good_load> to 1.
397              
398             B<Accepts:> Nothing
399              
400             B<Returns:> Nothing
401              
402             =back
403              
404             =head2 Attributes
405              
406             Data passed to new when creating an instance with this role. For
407             modification of this(ese) attribute(s) see the listed 'attribute
408             methods'. For more information on attributes see
409             L<Moose::Manual::Attributes>. The easiest way to modify this(ese)
410             attribute(s) is during instance creation before it is passed to the
411             workbook or parser.
412              
413             =head3 cache_positions
414              
415             =over
416              
417             B<Definition:> Especially for sheets with lots of stored formats the
418             parser can slow way down when accessing each postion. This is
419             because the are not stored sequentially and the reader is a JIT linear
420             parser. To go back it must restart and index through each position till
421             it gets to the right place. This is especially true for excel sheets
422             that have experienced any significant level of manual intervention prior
423             to being read. This attribute sets caching (default on) for styles
424             so the parser builds and stores all the styles settings at the beginning.
425             If the file is cached it will close and release the file handle in order
426             to free up some space. (a small win in exchange for the space taken by
427             the cache).
428              
429             B<Default:> 1 = caching is on
430              
431             B<Range:> 1|0
432              
433             B<Attribute required:> yes
434              
435             B<attribute methods> Methods provided to adjust this attribute
436              
437             =over
438              
439             none - (will be autoset by L<Spreadsheet::Reader::ExcelXML/cache_positions>)
440              
441             =back
442              
443             =back
444              
445             =head1 SUPPORT
446              
447             =over
448              
449             L<github Spreadsheet::Reader::ExcelXML/issues
450             |https://github.com/jandrew/p5-spreadsheet-reader-excelxml/issues>
451              
452             =back
453              
454             =head1 TODO
455              
456             =over
457              
458             B<1.> Nothing yet
459              
460             =back
461              
462             =head1 AUTHOR
463              
464             =over
465              
466             =item Jed Lund
467              
468             =item jandrew@cpan.org
469              
470             =back
471              
472             =head1 COPYRIGHT
473              
474             This program is free software; you can redistribute
475             it and/or modify it under the same terms as Perl itself.
476              
477             The full text of the license can be found in the
478             LICENSE file included with this module.
479              
480             This software is copyrighted (c) 2016 by Jed Lund
481              
482             =head1 DEPENDENCIES
483              
484             =over
485              
486             L<Spreadsheet::Reader::ExcelXML> - the package
487              
488             =back
489              
490             =head1 SEE ALSO
491              
492             =over
493              
494             L<Spreadsheet::Read> - generic Spreadsheet reader
495              
496             L<Spreadsheet::ParseExcel> - Excel binary version 2003 and earlier (.xls files)
497              
498             L<Spreadsheet::XLSX> - Excel version 2007 and later
499              
500             L<Spreadsheet::ParseXLSX> - Excel version 2007 and later
501              
502             L<Log::Shiras|https://github.com/jandrew/Log-Shiras>
503              
504             =over
505              
506             All lines in this package that use Log::Shiras are commented out
507              
508             =back
509              
510             =back
511              
512             =cut
513              
514             #########1#########2 main pod documentation end 5#########6#########7#########8#########9