File Coverage

blib/lib/MsOffice/Word/Template.pm
Criterion Covered Total %
statement 36 36 100.0
branch 5 8 62.5
condition 0 2 0.0
subroutine 8 8 100.0
pod 1 1 100.0
total 50 55 90.9


line stmt bran cond sub pod time code
1             package MsOffice::Word::Template;
2 3     3   21731 use 5.024;
  3         14  
3 3     3   2205 use Moose;
  3         1596300  
  3         28  
4 3     3   31878 use MooseX::StrictConstructor;
  3         126378  
  3         14  
5 3     3   37504 use Carp qw(croak);
  3         6  
  3         284  
6 3     3   2284 use MsOffice::Word::Surgeon 2.0;
  3         1960024  
  3         487  
7              
8             # syntactic sugar for attributes
9             sub has_inner ($@) {my $attr = shift; has($attr => @_, init_arg => undef, lazy => 1, builder => "_$attr")}
10              
11 3     3   34 use namespace::clean -except => 'meta';
  3         7  
  3         38  
12              
13             our $VERSION = '2.05';
14              
15             #======================================================================
16             # ATTRIBUTES
17             #======================================================================
18              
19             # constructor attributes for interacting with MsWord
20             # See also BUILDARGS: the constructor can also take a "docx" arg
21             # that will be automatically translated into a "surgeon" attribute
22             has 'surgeon' => (is => 'ro', isa => 'MsOffice::Word::Surgeon', required => 1);
23             has 'data_color' => (is => 'ro', isa => 'Str', default => "yellow");
24             has 'control_color' => (is => 'ro', isa => 'Str', default => "green");
25             has 'part_names' => (is => 'ro', isa => 'ArrayRef[Str]', lazy => 1,
26             default => sub {[keys shift->surgeon->parts->%*]});
27             has 'property_files'=> (is => 'ro', isa => 'ArrayRef[Str]',
28             default => sub {[qw(docProps/core.xml docProps/app.xml docProps/custom.xml)]});
29              
30             # constructor attributes for building a templating engine
31             has 'engine_class' => (is => 'ro', isa => 'Str', default => 'TT2');
32             has 'engine_args' => (is => 'ro', isa => 'ArrayRef', default => sub {[]});
33              
34             # attributes lazily constructed by the module -- not received through the constructor
35             has_inner 'engine' => (is => 'ro', isa => 'MsOffice::Word::Template::Engine');
36              
37              
38             #======================================================================
39             # BUILDING INSTANCES
40             #======================================================================
41              
42             # syntactic sugar for supporting ->new($surgeon) instead of ->new(surgeon => $surgeon)
43             around BUILDARGS => sub {
44             my $orig = shift;
45             my $class = shift;
46              
47             # if there is a unique arg without any keyword ...
48             if ( @_ == 1) {
49              
50             # if the unique arg is an instance of Surgeon, it's the "surgeon" parameter
51             unshift @_, 'surgeon' if $_[0]->isa('MsOffice::Word::Surgeon');
52              
53             # if the unique arg is a string, it's the "docx" parameter
54             unshift @_, 'docx' if $_[0] && !ref $_[0];
55             }
56              
57             # translate the "docx" parameter into a "surgeon" parameter
58             my %args = @_;
59             if (my $docx = delete $args{docx}) {
60             $args{surgeon} = MsOffice::Word::Surgeon->new(docx => $docx);
61             }
62              
63             # now call the regular Moose method
64             return $class->$orig(%args);
65             };
66              
67              
68             #======================================================================
69             # LAZY ATTRIBUTE CONSTRUCTORS
70             #======================================================================
71              
72              
73             sub _engine {
74 2     2   7 my ($self) = @_;
75              
76             # instantiate the templating engine
77 2         76 my $engine_class = $self->engine_class;
78 2         5 my $engine;
79             my @load_errors;
80             CLASS:
81 2         8 for my $class ("MsOffice::Word::Template::Engine::$engine_class", $engine_class) {
82 2 50 0     207 eval "require $class; 1" or push @load_errors, $@ and next CLASS;
83 2 50       115 $engine = $class->new(word_template => $self,
84             $self->engine_args->@*) and last CLASS;
85             }
86 2 50       27 $engine or die "could not load engine class '$engine_class'", @load_errors;
87              
88 2         85 return $engine;
89             }
90              
91              
92              
93             #======================================================================
94             # PROCESSING THE TEMPLATE
95             #======================================================================
96              
97             sub process {
98 2     2 1 5544 my ($self, $vars) = @_;
99              
100             # create a clone of the original
101 2         89 my $new_doc = $self->surgeon->clone;
102              
103             # process each package part
104 2         8689 foreach my $part_name ($self->part_names->@*) {
105 14         2018 my $new_doc_part = $new_doc->part($part_name);
106 14         37017 my $new_contents = $self->engine->process_part($part_name, $new_doc_part, $vars);
107 14         650 $new_doc_part->contents($new_contents);
108             }
109              
110             # process the property files (core.xml, app.xml. custom.xml -- if present in the original word template)
111 2         260 foreach my $property_file ($self->property_files->@*) {
112 6 100       2493 if ($self->surgeon->zip->memberNamed($property_file)) {
113 4         761 my $new_contents = $self->engine->process($property_file, $vars);
114 4         20 $new_doc->xml_member($property_file, $new_contents);
115             }
116             }
117              
118 2         286 return $new_doc;
119             }
120              
121              
122             1;
123              
124             __END__
125              
126             =encoding ISO-8859-1
127              
128             =head1 NAME
129              
130             MsOffice::Word::Template - generate Microsoft Word documents from Word templates
131              
132             =head1 SYNOPSIS
133              
134             my $template = MsOffice::Word::Template->new($filename);
135             my $new_doc = $template->process(\%data);
136             $new_doc->save_as($path_for_new_doc);
137              
138             =head1 DESCRIPTION
139              
140             =head2 Purpose
141              
142             This module treats a Microsoft Word document as a template for generating other documents. The idea is
143             similar to the "mail merge" functionality in Word, but with much richer possibilities. The
144             whole power of a Perl templating engine can be exploited, for example for
145              
146             =over
147              
148             =item *
149              
150             dealing with complex, nested datastructures
151              
152             =item *
153              
154             using control directives for loops, conditionals, subroutines, etc.
155              
156             =item *
157              
158             defining custom data processing functions or macros
159              
160             =back
161              
162              
163             Template authors just use basic highlighing in MsWord to
164             mark the templating directives :
165              
166             =over
167              
168             =item *
169              
170             fragments highlighted in B<yelllow> are interpreted as I<data>
171             directives, i.e. the template result will be inserted at that point in
172             the document, keeping the current formatting properties (bold, italic,
173             font, etc.).
174              
175             =item *
176              
177             fragments highlighted in B<green> are interpreted as I<control>
178             directives that do not directly generate content, like loops, conditionals,
179             etc. Paragraphs or table rows around such directives are dismissed,
180             in order to avoid empty paragraphs or empty rows in the resulting document.
181              
182             =back
183              
184             The syntax of data and control directives depends on the backend
185             templating engine. The default engine is the L<Perl Template Toolkit|Template>;
186             other engines can be specified as subclasses -- see the L</TEMPLATE ENGINE> section below.
187              
188              
189             =head2 Status
190              
191             This distribution is a major refactoring
192             of the first version, together with a refactoring of
193             L<MsOffice::Word::Surgeon>. New features include support for headers
194             and footers, for metadata and for image insertion. The internal
195             object-oriented structure has been redesigned.
196              
197             This module has been used successfully for a pilot project in my
198             organization, generating quite complex documents from deeply nested
199             datastructures. However it has not been used yet at large scale in
200             production, so it is quite likely that some youth defects may still be
201             discovered. If you use this module, please keep me informed of your
202             difficulties, tricks, suggestions, etc.
203              
204              
205             =head1 METHODS
206              
207             =head2 new
208              
209             my $template = MsOffice::Word::Template->new($docx);
210             # or : my $template = MsOffice::Word::Template->new($surgeon); # an instance of MsOffice::Word::Surgeon
211             # or : my $template = MsOffice::Word::Template->new(docx => $docx, %options);
212              
213             In its simplest form, the constructor takes a single argument which
214             is either a string (path to a F<docx> document), or an instance of
215             L<MsOffice::Word::Surgeon>. Otherwise the constructor takes a list of named parameters,
216             which can be
217              
218              
219             =over
220              
221             =item docx
222              
223             path to a MsWord document in F<docx> format. This will automatically create
224             an instance of L<MsOffice::Word::Surgeon> and pass it to the constructor
225             through the C<surgeon> keyword.
226              
227             =item surgeon
228              
229             an instance of L<MsOffice::Word::Surgeon>. This is a mandatory parameter, either
230             directly through the C<surgeon> keyword, or indirectly through the C<docx> keyword.
231              
232             =item data_color
233              
234             the Word highlight color for marking data directives (default : yellow)
235              
236             =item control_color
237              
238             the Word highlight color for marking control directives (default : green).
239             Such directives should produce no content. They are treated outside of the regular text flow.
240              
241             =item part_names
242              
243             an arrayref to the list of package parts to be processed as templates within the C<.docx>
244             ZIP archive. The default list is the main document (C<document.xml>), together with all
245             headers and footers found in the ZIP archive.
246              
247             =item property_files
248              
249             an arrayref to the list of property files (i.e. metadata) to be processed as templates within the C<.docx>
250             ZIP archive. For historical reasons, MsWord has three different XML files for storing document
251             properties : C<core.xml>, C<app.xml> and C<custom.xml> : the default list contains those
252             three files. Supply an empty list if you don't want any document property to be processed.
253              
254              
255             =back
256              
257             In addition to the attributes above, other attributes can be passed to the
258             constructor for specifying a templating engine different from the
259             default L<Perl Template Toolkit|Template>.
260             These are described in section L</TEMPLATE ENGINE> below.
261              
262              
263             =head2 process
264              
265             my $new_doc = $template->process(\%data);
266             $new_doc->save_as($path_for_new_doc);
267              
268             Processes the template on a given data tree, and returns a new document
269             (actually, a new instance of L<MsOffice::Word::Surgeon>).
270             That document can then be saved using L<MsOffice::Word::Surgeon/save_as>.
271              
272              
273             =head1 AUTHORING TEMPLATES
274              
275             =head2 Textual content
276              
277             A template is just a regular Word document, in which the highlighted
278             fragments represent templating directives.
279              
280             The data directives, i.e. the "holes" to be filled must be highlighted
281             in B<yellow>. Such zones must contain the names of variables to fill the
282             holes. If the template engine supports it, names of variables can be paths
283             into a complex datastructure, with dots separating the levels, like
284             C<foo.3.bar.-1> -- see L<Template::Manual::Directive/GET> and
285             L<Template::Manual::Variables> if you are using the Template Toolkit.
286              
287             Control directives like C<IF>, C<FOREACH>, etc. must be highlighted in
288             B<green>. When seeing a green zone, the system will remove XML markup for
289             the surrounding text and run nodes. If the directive is the only content
290             of the paragraph, then the paragraph node is also removed. If this
291             occurs within the first cell of a table row, the markup for that row is also
292             removed. This mechanism ensures that the final result will not contain
293             empty paragraphs or empty rows at places corresponding to control directives.
294              
295             In consequence of this distinction between yellow and green
296             highlights, templating zones cannot mix data directives with control
297             directives : a data directive within a green zone would generate output
298             outside of the regular XML flow (paragraph nodes, run nodes and text
299             nodes), and therefore MsWord would generate an error when trying to
300             open such content. There is a workaround, however : data directives
301             within a green zone will work if they I<also generate the appropriate markup>
302             for paragraph nodes, run nodes and text nodes.
303              
304             To highlight using LibreOffice, set the Character Highlighting to Export As
305             "Highlighting" instead of the default "Shading". See
306             L<https://help.libreoffice.org/7.5/en-US/text/shared/optionen/01130200.html|LibreOffice help for MS Office>.
307              
308              
309             See also L<MsOffice::Word::Template::Engine::TT2> for
310             additional advice on authoring templates based on the
311             L<Template Toolkit|Template>.
312              
313              
314             =head2 Images
315              
316             Insertion of generated images such as barcodes is done in two steps:
317              
318             =over
319              
320             =item *
321              
322             the template must contain a I<placeholder image> : this is an arbitrary image,
323             positioned within the document through usual MsWord commands, including alignment
324             instructions, border, etc. That image must be given an I<alternative text> -- see
325             L<https://support.microsoft.com/en-us/office/add-alternative-text-to-a-shape-picture-chart-smartart-graphic-or-other-object-44989b2a-903c-4d9a-b742-6a75b451c669|MsOffice documentation>). That text
326             will be used as a unique identifier for the image.
327              
328             =item *
329              
330             somewhere in the document (it doesn't matter where), a directive
331             must replace the placeholder image by a generated image.
332             For example for a barcode, the TT2 directive looks like :
333              
334             [[ PROCESS barcode type="QRCode" img="my_image_name" content="some value for the QR code" ]]
335              
336             See L<MsOffice::Word::Template::Engine::TT2/barcodes> for details. The source
337             code can be used as an example of how to implement other image generating blocks.
338              
339             =back
340              
341             =head2 Metadata (also known as "document properties" in MsWord parlance)
342              
343             MsWord documents store metadata, also called "document properties". Each property
344             has a name and a value. A number of property names are builtin, like 'author' or 'description';
345             other custom properties can be defined. Properties are edited from the MsWord
346             "Backstage view" (the screen displayed after a click on the File tab).
347              
348             For feeding values into document properties, just use the regular syntax of
349             the templating engine. For example with the default Template Toolkit engine,
350             directives are enclosed in C<'[% '> and C<' %]'>; so you can write
351              
352             [% path.to.subject.data %]
353              
354             within the 'subject' property of the MsWord template, and the resulting document
355             will have its subject filled with the given data path.
356              
357             Obviously, the reason for this different mechanism is that MsWord has no support
358             for highlighting contents in property values.
359              
360             Unfortunately, this mechanism only works for document properties of type 'string'.
361             MsWord would not allow specific templating syntax within fields of type
362             boolean, number or date.
363              
364              
365              
366             =head1 TEMPLATE ENGINE
367              
368             This module invokes a backend I<templating engine> for interpreting the
369             template directives. The default engine is
370             L<MsOffice::Word::Template::Engine::TT2>, built on top of
371             L<Template Toolkit|Template>. Another engine supplied in this distribution is
372             L<MsOffice::Word::Template::Engine::Mustache>, mostly as an example.
373             To implement another engine, just subclass
374             L<MsOffice::Word::Template::Engine>.
375              
376             To use an engine different from the default, the following arguments
377             must be supplied to the L</new> method :
378              
379             =over
380              
381             =item engine_class
382              
383             The name of the engine class. If the class sits within the L<MsOffice::Word::Template::Engine>
384             namespace, just the suffix is sufficient; otherwise, specify the fully qualified class name.
385              
386             =item engine_args
387              
388             An optional list of parameters that may be used for initializing the engine
389              
390             =back
391              
392             After initialization the engine will receive a C<compile_template> method call for each part in the
393             C<.docx> package. The default parts to be handled are the main document body (C<document.xml>), and
394             all headers and footers. A different list of package parts can be supplied through the
395             C<part_names> argument to the constructor.
396              
397             In addition to the package parts, templates are also compiled for the I<property> files that contain
398             metadata such as author name, subject, description, etc. The list of files can be controlled through
399             the C<property_files> argument to the constructor.
400              
401             When processing templates, the engine must make sure that ampersand
402             characters and angle brackets are automatically replaced by the
403             corresponding HTML entities (otherwise the resulting XML would be
404             incorrect and could not be opened by Microsoft Word).
405             The L<Mustache engine|MsOffice::Word::Template::Engine::Mustache> does this
406             automatically.
407             The L<Template Toolkit engine|MsOffice::Word::Template::Engine::TT2>
408             would normally require to
409             explicitly add an C<html> filter at each directive :
410              
411             [% foo.bar | html %]
412              
413             but thanks to the L<Template::AutoFilter>
414             module, this is performed automatically.
415              
416             =head1 TROUBLESHOOTING
417              
418             If a document generated by this module cannot open in Word, it is probably because the XML
419             generated by your template is not equilibrated and therefore not valid.
420             For example a template like this :
421              
422             This paragraph [[ IF condition ]]
423             may have problems
424             [[END]]
425              
426             is likely to generate incorrect XML, because the IF statement starts in the middle
427             of a paragraph and closes at a different paragraph -- therefore when the I<condition>
428             evaluates to false, the XML tag for closing the initial paragraph will be missing.
429              
430             Compound directives like IF .. END, FOREACH .. END, TRY .. CATCH .. END should therefore
431             be equilibrated, either all within the same paragraph, or each directive on a separate
432             paragraph. Examples like this should be successful :
433              
434             This paragraph [[ IF condition ]]has an optional part[[ ELSE ]]or an alternative[[ END ]].
435            
436             [[ SWITCH result ]]
437             [[ CASE 123 ]]
438             Not a big deal.
439             [[ CASE 789 ]]
440             You won the lottery.
441             [[ END ]]
442              
443              
444              
445             =head1 AUTHOR
446              
447             Laurent Dami, E<lt>dami AT cpan DOT org<gt>
448              
449             =head1 COPYRIGHT AND LICENSE
450              
451             Copyright 2020-2024 by Laurent Dami.
452              
453             This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0.
454              
455              
456