File Coverage

blib/lib/MsOffice/Word/Template/Engine.pm
Criterion Covered Total %
statement 65 65 100.0
branch 9 10 90.0
condition n/a
subroutine 10 10 100.0
pod 0 2 0.0
total 84 87 96.5


line stmt bran cond sub pod time code
1             package MsOffice::Word::Template::Engine;
2 1     1   922 use 5.024;
  1         4  
3 1     1   8 use Moose;
  1         1  
  1         9  
4 1     1   7311 use MooseX::AbstractMethod;
  1         23026  
  1         7  
5 1     1   12762 use MsOffice::Word::Surgeon::Utils qw(decode_entities);
  1         3  
  1         116  
6              
7 1     1   9 use namespace::clean -except => 'meta';
  1         3  
  1         7  
8              
9             our $VERSION = '2.05';
10              
11             #======================================================================
12             # ATTRIBUTES
13             #======================================================================
14              
15             # passed through the constructor
16             has 'word_template' => (is => 'ro', isa => 'MsOffice::Word::Template', required => 1, weak_ref => 1);
17              
18             # lazily constructed, not received through the constructor
19             has 'xml_regexes' => (is => 'ro', isa => 'ArrayRef[RegexpRef]',
20             lazy => 1, builder => '_xml_regexes', init_arg => undef);
21             has 'compiled_template' => (is => 'ro', isa => 'HashRef',
22             lazy => 1, builder => '_compiled_template', init_arg => undef);
23             has '_constructor_args' => (is => 'bare', isa => 'HashRef', init_arg => undef);
24              
25              
26             #======================================================================
27             # ABSTRACT METHODS -- to be defined in subclasses
28             #======================================================================
29              
30             abstract 'start_tag';
31             abstract 'end_tag';
32             abstract 'compile_template';
33             abstract 'process_part';
34             abstract 'process';
35              
36             #======================================================================
37             # GLOBALS
38             #======================================================================
39              
40             my $XML_COMMENT_FOR_MARKING_DIRECTIVES = '<!--TEMPLATE_DIRECTIVE_ABOVE-->';
41              
42             #======================================================================
43             # INSTANCE CONSTRUCTION
44             #======================================================================
45              
46              
47             sub BUILD {
48 2     2 0 4861 my ($self, $args) = @_;
49 2         9 $self->{_constructor_args} = $args; # stored to be available for lazy attr constructors in subclasses
50             }
51              
52              
53             #======================================================================
54             # LAZY ATTRIBUTE CONSTRUCTORS
55             #======================================================================
56              
57             sub _xml_regexes {
58 2     2   7 my ($self) = @_;
59              
60             # start and end character sequences for a template fragment
61 2         132 my $rx_start = quotemeta $self->start_tag;
62 2         99 my $rx_end = quotemeta $self->end_tag;
63              
64             # Regexes for extracting template directives within the XML.
65             # Such directives are identified through a specific XML comment -- this comment is
66             # inserted by method "template_fragment_for_run()" below.
67             # The (*SKIP) instructions are used to avoid backtracking after a
68             # closing tag for the subexpression has been found. Otherwise the
69             # .*? inside could possibly match across boundaries of the current
70             # XML node, we don't want that.
71              
72             # regex for matching directives to be treated outside the text flow.
73 2         84 my $rx_outside_text_flow = qr{
74             <w:r\b [^>]*> # start run node
75             (?: <w:rPr> .*? </w:rPr> (*SKIP) )? # optional run properties
76             <w:t\b [^>]*> # start text node
77             ($rx_start .*? $rx_end) (*SKIP) # template directive
78             $XML_COMMENT_FOR_MARKING_DIRECTIVES # specific XML comment
79             </w:t> # close text node
80             </w:r> # close run node
81             }sx;
82              
83             # regex for matching paragraphs that contain only a directive
84 2         68 my $rx_paragraph = qr{
85             <w:p\b [^>]*> # start paragraph node
86             (?: <w:pPr> .*? </w:pPr> (*SKIP) )? # optional paragraph properties
87             $rx_outside_text_flow
88             </w:p> # close paragraph node
89             }sx;
90              
91             # regex for matching table rows that contain only a directive in the first cell
92 2         81 my $rx_row = qr{
93             <w:tr\b [^>]*> # start row node
94             <w:tc\b [^>]*> # start cell node
95             (?:<w:tcPr> .*? </w:tcPr> (*SKIP) )? # cell properties
96             $rx_paragraph # paragraph in cell
97             </w:tc> # close cell node
98             (?:<w:tc> .*? </w:tc> (*SKIP) )* # ignore other cells on the same row
99             </w:tr> # close row node
100             }sx;
101              
102 2         106 return [$rx_row, $rx_paragraph, $rx_outside_text_flow];
103             # Note : the order is important -- the most specific regex is tried first, the least specific is tried last
104             }
105              
106              
107              
108             sub _compiled_template {
109 1     1   4 my ($self) = @_;
110              
111 1         57 my $surgeon = $self->word_template->surgeon;
112 1         3 my %compiled_template;
113              
114             # build a compiled template for each document part
115 1         68 foreach my $part_name ($self->word_template->part_names->@*) {
116 7         50788 my $part = $surgeon->part($part_name);
117 7         186 my $template_text = $self->template_text_for_part($part);
118 7         40 $compiled_template{$part_name} = $self->compile_template($template_text);
119             }
120              
121             # build a compiled template for each property file (core.xml, app.xml, custom.xml)
122 1         1027 foreach my $property_file ($self->word_template->property_files->@*) {
123 3 100       5717 if ($surgeon->zip->memberNamed($property_file)) {
124 2         391 my $xml = $surgeon->xml_member($property_file);
125 2         3361 $compiled_template{$property_file} = $self->compile_template($xml);
126             }
127             }
128              
129 1         239 return \%compiled_template;
130              
131             }
132              
133              
134              
135              
136              
137             #======================================================================
138             # COMPILING INNER TEMPLATES
139             #======================================================================
140              
141              
142              
143             sub template_text_for_part {
144 8     8 0 23 my ($self, $part) = @_;
145              
146             # assemble template fragments from all runs in the part into a global template text
147 8         53 $part->cleanup_XML;
148 8         813081 my @template_fragments = map {$self->_template_fragment_for_run($_)} $part->runs->@*;
  82         280889  
149 8         868 my $template_text = join "", @template_fragments;
150              
151             # remove markup around directives, successively for table rows, for paragraphs, and finally
152             # for remaining directives embedded within text runs.
153 8         501 $template_text =~ s/$_/$1/g foreach $self->xml_regexes->@*;
154              
155 8         64 return $template_text;
156             }
157              
158              
159             sub _template_fragment_for_run { # given an instance of Surgeon::Run, build a template fragment
160 82     82   227 my ($self, $run) = @_;
161              
162 82         3401 my $props = $run->props;
163 82         4545 my $data_color = $self->word_template->data_color;
164 82         4024 my $control_color = $self->word_template->control_color;
165              
166             # if this run is highlighted in data or control color, it must be translated into a template directive
167 82 100       827 if ($props =~ s{<w:highlight w:val="($data_color|$control_color)"/>}{}) {
168 41         122 my $color = $1;
169 41         1870 my $xml = $run->xml_before;
170              
171             # re-build the run, removing the highlight, and adding the start/end tags for the template engine
172 41         1921 my $inner_texts = $run->inner_texts;
173 41 50       468 if (@$inner_texts) {
174 41         101 $xml .= "<w:r>"; # opening XML tag for run node
175 41 100       102 $xml .= "<w:rPr>" . $props . "</w:rPr>" if $props; # optional run properties
176 41         73 $xml .= "<w:t>"; # opening XML tag for text node
177 41         2072 $xml .= $self->start_tag; # start a template directive
178 41         108 foreach my $inner_text (@$inner_texts) { # loop over text nodes
179 43         1772 my $txt = $inner_text->literal_text; # just take inner literal text
180 43         498 decode_entities($txt);
181 43         352 $xml .= $txt . "\n";
182             # NOTE : adding "\n" because the template parser may need them for identifying end of comments
183             }
184              
185 41         2251 $xml .= $self->end_tag; # end of template directive
186 41 100       158 $xml .= $XML_COMMENT_FOR_MARKING_DIRECTIVES
187             if $color eq $control_color; # XML comment for marking
188 41         108 $xml .= "</w:t>"; # closing XML tag for text node
189 41         100 $xml .= "</w:r>"; # closing XML tag for run node
190             }
191              
192 41         186 return $xml;
193             }
194              
195             # otherwise this run is just regular MsWord content
196             else {
197 41         203 return $run->as_xml;
198             }
199             }
200              
201              
202              
203              
204             1;
205              
206             __END__
207              
208             =encoding ISO-8859-1
209              
210             =head1 NAME
211              
212             MsOffice::Word::Template::Engine -- abstract class for template engines
213              
214             =head1 DESCRIPTION
215              
216             This abstract class encapsulates functionalities common to all templating engines.
217             Concrete classes such as L<MsOffice::Word::Template::Engine::TT2> inherit from the
218             present class.
219              
220             Templating engines encapsulate internal implementation algorithms; they are not meant to
221             be called from external clients. Methods documented below are just to explain the internal
222             architecture.
223              
224             =head1 METHODS
225              
226             =head2 _compile_templates
227              
228             $self->_compile_templates($word_template);
229              
230             Calls the subclass's concrete method C<compile_template> on each document part.
231              
232             =head2 _template_fragment_for_run
233              
234             Translates a given text run into a fragment suitable to be processed by the template compiler.
235              
236             =head2 xml_regexes
237              
238             Compiles the regexes to be tried on each text run.
239              
240              
241             =head1 COPYRIGHT AND LICENSE
242              
243             Copyright 2020-2024 by Laurent Dami.
244              
245             This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0.
246              
247              
248              
249              
250              
251