| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package MsOffice::Word::Template::Engine; |
|
2
|
1
|
|
|
1
|
|
922
|
use 5.024; |
|
|
1
|
|
|
|
|
4
|
|
|
3
|
1
|
|
|
1
|
|
8
|
use Moose; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
9
|
|
|
4
|
1
|
|
|
1
|
|
7311
|
use MooseX::AbstractMethod; |
|
|
1
|
|
|
|
|
23026
|
|
|
|
1
|
|
|
|
|
7
|
|
|
5
|
1
|
|
|
1
|
|
12762
|
use MsOffice::Word::Surgeon::Utils qw(decode_entities); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
116
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
9
|
use namespace::clean -except => 'meta'; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
7
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '2.05'; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
#====================================================================== |
|
12
|
|
|
|
|
|
|
# ATTRIBUTES |
|
13
|
|
|
|
|
|
|
#====================================================================== |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# passed through the constructor |
|
16
|
|
|
|
|
|
|
has 'word_template' => (is => 'ro', isa => 'MsOffice::Word::Template', required => 1, weak_ref => 1); |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# lazily constructed, not received through the constructor |
|
19
|
|
|
|
|
|
|
has 'xml_regexes' => (is => 'ro', isa => 'ArrayRef[RegexpRef]', |
|
20
|
|
|
|
|
|
|
lazy => 1, builder => '_xml_regexes', init_arg => undef); |
|
21
|
|
|
|
|
|
|
has 'compiled_template' => (is => 'ro', isa => 'HashRef', |
|
22
|
|
|
|
|
|
|
lazy => 1, builder => '_compiled_template', init_arg => undef); |
|
23
|
|
|
|
|
|
|
has '_constructor_args' => (is => 'bare', isa => 'HashRef', init_arg => undef); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#====================================================================== |
|
27
|
|
|
|
|
|
|
# ABSTRACT METHODS -- to be defined in subclasses |
|
28
|
|
|
|
|
|
|
#====================================================================== |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
abstract 'start_tag'; |
|
31
|
|
|
|
|
|
|
abstract 'end_tag'; |
|
32
|
|
|
|
|
|
|
abstract 'compile_template'; |
|
33
|
|
|
|
|
|
|
abstract 'process_part'; |
|
34
|
|
|
|
|
|
|
abstract 'process'; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
#====================================================================== |
|
37
|
|
|
|
|
|
|
# GLOBALS |
|
38
|
|
|
|
|
|
|
#====================================================================== |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my $XML_COMMENT_FOR_MARKING_DIRECTIVES = '<!--TEMPLATE_DIRECTIVE_ABOVE-->'; |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
#====================================================================== |
|
43
|
|
|
|
|
|
|
# INSTANCE CONSTRUCTION |
|
44
|
|
|
|
|
|
|
#====================================================================== |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub BUILD { |
|
48
|
2
|
|
|
2
|
0
|
4861
|
my ($self, $args) = @_; |
|
49
|
2
|
|
|
|
|
9
|
$self->{_constructor_args} = $args; # stored to be available for lazy attr constructors in subclasses |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
#====================================================================== |
|
54
|
|
|
|
|
|
|
# LAZY ATTRIBUTE CONSTRUCTORS |
|
55
|
|
|
|
|
|
|
#====================================================================== |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub _xml_regexes { |
|
58
|
2
|
|
|
2
|
|
7
|
my ($self) = @_; |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# start and end character sequences for a template fragment |
|
61
|
2
|
|
|
|
|
132
|
my $rx_start = quotemeta $self->start_tag; |
|
62
|
2
|
|
|
|
|
99
|
my $rx_end = quotemeta $self->end_tag; |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# Regexes for extracting template directives within the XML. |
|
65
|
|
|
|
|
|
|
# Such directives are identified through a specific XML comment -- this comment is |
|
66
|
|
|
|
|
|
|
# inserted by method "template_fragment_for_run()" below. |
|
67
|
|
|
|
|
|
|
# The (*SKIP) instructions are used to avoid backtracking after a |
|
68
|
|
|
|
|
|
|
# closing tag for the subexpression has been found. Otherwise the |
|
69
|
|
|
|
|
|
|
# .*? inside could possibly match across boundaries of the current |
|
70
|
|
|
|
|
|
|
# XML node, we don't want that. |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# regex for matching directives to be treated outside the text flow. |
|
73
|
2
|
|
|
|
|
84
|
my $rx_outside_text_flow = qr{ |
|
74
|
|
|
|
|
|
|
<w:r\b [^>]*> # start run node |
|
75
|
|
|
|
|
|
|
(?: <w:rPr> .*? </w:rPr> (*SKIP) )? # optional run properties |
|
76
|
|
|
|
|
|
|
<w:t\b [^>]*> # start text node |
|
77
|
|
|
|
|
|
|
($rx_start .*? $rx_end) (*SKIP) # template directive |
|
78
|
|
|
|
|
|
|
$XML_COMMENT_FOR_MARKING_DIRECTIVES # specific XML comment |
|
79
|
|
|
|
|
|
|
</w:t> # close text node |
|
80
|
|
|
|
|
|
|
</w:r> # close run node |
|
81
|
|
|
|
|
|
|
}sx; |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# regex for matching paragraphs that contain only a directive |
|
84
|
2
|
|
|
|
|
68
|
my $rx_paragraph = qr{ |
|
85
|
|
|
|
|
|
|
<w:p\b [^>]*> # start paragraph node |
|
86
|
|
|
|
|
|
|
(?: <w:pPr> .*? </w:pPr> (*SKIP) )? # optional paragraph properties |
|
87
|
|
|
|
|
|
|
$rx_outside_text_flow |
|
88
|
|
|
|
|
|
|
</w:p> # close paragraph node |
|
89
|
|
|
|
|
|
|
}sx; |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# regex for matching table rows that contain only a directive in the first cell |
|
92
|
2
|
|
|
|
|
81
|
my $rx_row = qr{ |
|
93
|
|
|
|
|
|
|
<w:tr\b [^>]*> # start row node |
|
94
|
|
|
|
|
|
|
<w:tc\b [^>]*> # start cell node |
|
95
|
|
|
|
|
|
|
(?:<w:tcPr> .*? </w:tcPr> (*SKIP) )? # cell properties |
|
96
|
|
|
|
|
|
|
$rx_paragraph # paragraph in cell |
|
97
|
|
|
|
|
|
|
</w:tc> # close cell node |
|
98
|
|
|
|
|
|
|
(?:<w:tc> .*? </w:tc> (*SKIP) )* # ignore other cells on the same row |
|
99
|
|
|
|
|
|
|
</w:tr> # close row node |
|
100
|
|
|
|
|
|
|
}sx; |
|
101
|
|
|
|
|
|
|
|
|
102
|
2
|
|
|
|
|
106
|
return [$rx_row, $rx_paragraph, $rx_outside_text_flow]; |
|
103
|
|
|
|
|
|
|
# Note : the order is important -- the most specific regex is tried first, the least specific is tried last |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub _compiled_template { |
|
109
|
1
|
|
|
1
|
|
4
|
my ($self) = @_; |
|
110
|
|
|
|
|
|
|
|
|
111
|
1
|
|
|
|
|
57
|
my $surgeon = $self->word_template->surgeon; |
|
112
|
1
|
|
|
|
|
3
|
my %compiled_template; |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# build a compiled template for each document part |
|
115
|
1
|
|
|
|
|
68
|
foreach my $part_name ($self->word_template->part_names->@*) { |
|
116
|
7
|
|
|
|
|
50788
|
my $part = $surgeon->part($part_name); |
|
117
|
7
|
|
|
|
|
186
|
my $template_text = $self->template_text_for_part($part); |
|
118
|
7
|
|
|
|
|
40
|
$compiled_template{$part_name} = $self->compile_template($template_text); |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# build a compiled template for each property file (core.xml, app.xml, custom.xml) |
|
122
|
1
|
|
|
|
|
1027
|
foreach my $property_file ($self->word_template->property_files->@*) { |
|
123
|
3
|
100
|
|
|
|
5717
|
if ($surgeon->zip->memberNamed($property_file)) { |
|
124
|
2
|
|
|
|
|
391
|
my $xml = $surgeon->xml_member($property_file); |
|
125
|
2
|
|
|
|
|
3361
|
$compiled_template{$property_file} = $self->compile_template($xml); |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
1
|
|
|
|
|
239
|
return \%compiled_template; |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
#====================================================================== |
|
138
|
|
|
|
|
|
|
# COMPILING INNER TEMPLATES |
|
139
|
|
|
|
|
|
|
#====================================================================== |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub template_text_for_part { |
|
144
|
8
|
|
|
8
|
0
|
23
|
my ($self, $part) = @_; |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# assemble template fragments from all runs in the part into a global template text |
|
147
|
8
|
|
|
|
|
53
|
$part->cleanup_XML; |
|
148
|
8
|
|
|
|
|
813081
|
my @template_fragments = map {$self->_template_fragment_for_run($_)} $part->runs->@*; |
|
|
82
|
|
|
|
|
280889
|
|
|
149
|
8
|
|
|
|
|
868
|
my $template_text = join "", @template_fragments; |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# remove markup around directives, successively for table rows, for paragraphs, and finally |
|
152
|
|
|
|
|
|
|
# for remaining directives embedded within text runs. |
|
153
|
8
|
|
|
|
|
501
|
$template_text =~ s/$_/$1/g foreach $self->xml_regexes->@*; |
|
154
|
|
|
|
|
|
|
|
|
155
|
8
|
|
|
|
|
64
|
return $template_text; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub _template_fragment_for_run { # given an instance of Surgeon::Run, build a template fragment |
|
160
|
82
|
|
|
82
|
|
227
|
my ($self, $run) = @_; |
|
161
|
|
|
|
|
|
|
|
|
162
|
82
|
|
|
|
|
3401
|
my $props = $run->props; |
|
163
|
82
|
|
|
|
|
4545
|
my $data_color = $self->word_template->data_color; |
|
164
|
82
|
|
|
|
|
4024
|
my $control_color = $self->word_template->control_color; |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# if this run is highlighted in data or control color, it must be translated into a template directive |
|
167
|
82
|
100
|
|
|
|
827
|
if ($props =~ s{<w:highlight w:val="($data_color|$control_color)"/>}{}) { |
|
168
|
41
|
|
|
|
|
122
|
my $color = $1; |
|
169
|
41
|
|
|
|
|
1870
|
my $xml = $run->xml_before; |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# re-build the run, removing the highlight, and adding the start/end tags for the template engine |
|
172
|
41
|
|
|
|
|
1921
|
my $inner_texts = $run->inner_texts; |
|
173
|
41
|
50
|
|
|
|
468
|
if (@$inner_texts) { |
|
174
|
41
|
|
|
|
|
101
|
$xml .= "<w:r>"; # opening XML tag for run node |
|
175
|
41
|
100
|
|
|
|
102
|
$xml .= "<w:rPr>" . $props . "</w:rPr>" if $props; # optional run properties |
|
176
|
41
|
|
|
|
|
73
|
$xml .= "<w:t>"; # opening XML tag for text node |
|
177
|
41
|
|
|
|
|
2072
|
$xml .= $self->start_tag; # start a template directive |
|
178
|
41
|
|
|
|
|
108
|
foreach my $inner_text (@$inner_texts) { # loop over text nodes |
|
179
|
43
|
|
|
|
|
1772
|
my $txt = $inner_text->literal_text; # just take inner literal text |
|
180
|
43
|
|
|
|
|
498
|
decode_entities($txt); |
|
181
|
43
|
|
|
|
|
352
|
$xml .= $txt . "\n"; |
|
182
|
|
|
|
|
|
|
# NOTE : adding "\n" because the template parser may need them for identifying end of comments |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
|
|
185
|
41
|
|
|
|
|
2251
|
$xml .= $self->end_tag; # end of template directive |
|
186
|
41
|
100
|
|
|
|
158
|
$xml .= $XML_COMMENT_FOR_MARKING_DIRECTIVES |
|
187
|
|
|
|
|
|
|
if $color eq $control_color; # XML comment for marking |
|
188
|
41
|
|
|
|
|
108
|
$xml .= "</w:t>"; # closing XML tag for text node |
|
189
|
41
|
|
|
|
|
100
|
$xml .= "</w:r>"; # closing XML tag for run node |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
41
|
|
|
|
|
186
|
return $xml; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# otherwise this run is just regular MsWord content |
|
196
|
|
|
|
|
|
|
else { |
|
197
|
41
|
|
|
|
|
203
|
return $run->as_xml; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
1; |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
__END__ |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=encoding ISO-8859-1 |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=head1 NAME |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
MsOffice::Word::Template::Engine -- abstract class for template engines |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
This abstract class encapsulates functionalities common to all templating engines. |
|
217
|
|
|
|
|
|
|
Concrete classes such as L<MsOffice::Word::Template::Engine::TT2> inherit from the |
|
218
|
|
|
|
|
|
|
present class. |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Templating engines encapsulate internal implementation algorithms; they are not meant to |
|
221
|
|
|
|
|
|
|
be called from external clients. Methods documented below are just to explain the internal |
|
222
|
|
|
|
|
|
|
architecture. |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head1 METHODS |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head2 _compile_templates |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
$self->_compile_templates($word_template); |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Calls the subclass's concrete method C<compile_template> on each document part. |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head2 _template_fragment_for_run |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
Translates a given text run into a fragment suitable to be processed by the template compiler. |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=head2 xml_regexes |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
Compiles the regexes to be tried on each text run. |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
Copyright 2020-2024 by Laurent Dami. |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
|