File Coverage

blib/lib/MsOffice/Word/Surgeon/Text.pm
Criterion Covered Total %
statement 70 78 89.7
branch 17 24 70.8
condition 7 10 70.0
subroutine 14 15 93.3
pod 4 4 100.0
total 112 131 85.5


line stmt bran cond sub pod time code
1             package MsOffice::Word::Surgeon::Text;
2 1     1   21 use 5.24.0;
  1         4  
3 1     1   7 use Moose;
  1         2  
  1         8  
4 1     1   5164 use MooseX::StrictConstructor;
  1         2  
  1         11  
5 1     1   3378 use MsOffice::Word::Surgeon::Utils qw(maybe_preserve_spaces is_at_run_level);
  1         3  
  1         87  
6 1     1   8 use Carp qw(croak);
  1         2  
  1         61  
7              
8 1     1   8 use namespace::clean -except => 'meta';
  1         2  
  1         15  
9              
10              
11             our $VERSION = '2.01';
12              
13             #======================================================================
14             # ATTRIBUTES
15             #======================================================================
16              
17             has 'xml_before' => (is => 'ro', isa => 'Str');
18             has 'literal_text' => (is => 'ro', isa => 'Str', required => 1);
19              
20             #======================================================================
21             # METHODS
22             #======================================================================
23              
24              
25             sub as_xml {
26 2594     2594 1 4315 my $self = shift;
27              
28 2594   100     78389 my $xml = $self->xml_before // '';
29 2594         75395 my $lit_txt = $self->literal_text;
30 2594 100 66     9161 if (defined $lit_txt && $lit_txt ne '') {
31 2587         6550 my $space_attr = maybe_preserve_spaces($lit_txt);
32 2587         8409 $xml .= "<w:t$space_attr>$lit_txt</w:t>";
33             }
34 2594         8025 return $xml;
35             }
36              
37              
38              
39             sub merge {
40 270     270 1 469 my ($self, $next_text) = @_;
41              
42 270 50       7666 !$next_text->xml_before
43             or croak "cannot merge -- next text contains xml before the text : "
44             . $next_text->xml_before;
45              
46 270         8280 $self->{literal_text} .= $next_text->literal_text;
47              
48             }
49              
50              
51             sub replace {
52 1046     1046 1 2686 my ($self, $pattern, $replacement, %args) = @_;
53              
54 1046         1642 my $xml = "";
55 1046         1604 my $current_text_node;
56 1046         30356 my $xml_before = $self->xml_before;
57              
58             # closure to make sure that $xml_before is used only once
59             my $maybe_xml_before = sub {
60 1037 100   1037   2400 my @r = $xml_before ? (xml_before => $xml_before) : ();
61 1037         1653 $xml_before = undef;
62 1037         3395 return @r;
63 1046         3587 };
64              
65             # closure to create a new text node
66             my $mk_new_text = sub {
67 1036     1036   1771 my ($literal_text) = @_;
68 1036         1838 return MsOffice::Word::Surgeon::Text->new(
69             $maybe_xml_before->(),
70             literal_text => $literal_text,
71             );
72 1046         2797 };
73              
74             # closure to create a new run node for enclosing a text node
75             my $add_new_run = sub {
76 0     0   0 my ($text_node) = @_;
77             my $run = MsOffice::Word::Surgeon::Run->new(
78             xml_before => '',
79             props => $args{run}->props,
80 0         0 inner_texts => [$text_node],
81             );
82 0         0 $xml .= $run->as_xml;
83 1046         2609 };
84              
85             # closure to add text to the current text node
86             my $add_to_current_text_node = sub {
87 1047     1047   1935 my ($txt_to_add) = @_;
88 1047   66     3315 $current_text_node //= $mk_new_text->('');
89 1047         749626 $current_text_node->{literal_text} .= $txt_to_add;
90 1046         2419 };
91              
92             # closure to clear the current text node
93             my $maybe_clear_current_text_node = sub {
94 1046 100   1046   2336 if ($current_text_node) {
95 1036 50       2715 if (is_at_run_level($xml)) {
96 0         0 $add_new_run->($current_text_node);
97             }
98             else {
99 1036         2333 $xml .= $current_text_node->as_xml;
100             }
101 1036         3849 $current_text_node = undef;
102             }
103 1046         2480 };
104              
105             # find pattern within $self, each match becomes a fragment to handle
106 1046         9415 my @fragments = split qr[($pattern)], $self->{literal_text}, -1;
107 1046         2901 my $txt_after_last_match = pop @fragments;
108              
109             # loop to handle each match
110 1046         3613 while (my ($txt_before, $matched) = splice (@fragments, 0, 2)) {
111              
112             # new contents to replace the matched fragment
113 16 100       101 my $replacement_contents
    50          
114             = !ref $replacement ? $replacement
115             : $replacement->(matched => $matched,
116             (!$txt_before ? $maybe_xml_before->() : ()),
117             %args);
118              
119 16         89 my $replacement_is_xml = $replacement_contents =~ /^</;
120 16 50       37 if ($replacement_is_xml) {
121             # if there was text before the match, add it as a new run
122 0 0       0 if ($txt_before) {
123 0         0 $maybe_clear_current_text_node->();
124 0         0 $add_new_run->($mk_new_text->($txt_before));
125             }
126              
127             # add the xml that replaces the match
128 0         0 $xml .= $replacement_contents;
129             }
130             else { # $replacement_contents is not xml but just literal text
131 16   50     65 $add_to_current_text_node->(($txt_before // '') . $replacement_contents);
132             }
133             }
134              
135             # handle remaining contents after the last match
136 1046 100       2305 if ($txt_after_last_match) {
137 1031         2217 $add_to_current_text_node->($txt_after_last_match);
138             }
139 1046         2877 $maybe_clear_current_text_node->();
140 1046 100       101421 if ($xml_before) {
141 10 50       22 !$xml or croak "internal error : Text::xml_before was ignored during replacements";
142 10         15 $xml = $xml_before;
143             }
144              
145 1046         12617 return $xml;
146             }
147              
148              
149              
150             sub to_uppercase {
151 1     1 1 3 my $self = shift;
152              
153             # split text fragments around HTML entities
154 1         15 my @fragments = split /(&\w+?;)/, $self->{literal_text};
155 1         4 my $txt_after_last_entity = pop @fragments;
156 1         3 my $txt_upcase = "";
157              
158             # assemble upcased text fragments
159 1         10 while (my ($txt_before, $entity) = splice (@fragments, 0, 2)) {
160 1         11 $txt_upcase .= uc($txt_before) . $entity;
161             }
162 1         3 $txt_upcase .= uc($txt_after_last_entity);
163              
164             # return the upcased text
165 1         5 $self->{literal_text} = $txt_upcase;
166             }
167              
168              
169             1;
170              
171             __END__
172              
173             =encoding ISO-8859-1
174              
175             =head1 NAME
176              
177             MsOffice::Word::Surgeon::Text - internal representation for a node of literal text
178              
179             =head1 DESCRIPTION
180              
181             This is used internally by L<MsOffice::Word::Surgeon> for storing
182             a chunk of literal text in a MsWord document. It loosely corresponds to
183             a C<< <w:t> >> node in OOXML, but may also contain an anonymous XML
184             fragment which is the part of the document just before the C<< <w:t> >>
185             node -- used for reconstructing the complete document after having changed
186             the contents of some text nodes.
187              
188              
189             =head1 METHODS
190              
191             =head2 new
192              
193             my $text_node = MsOffice::Word::Surgeon::Text(
194             xml_before => $xml_string,
195             literal_text => $text_string,
196             );
197              
198             Constructor for a new text object. Arguments are :
199              
200             =over
201              
202             =item xml_before
203              
204             A string containing arbitrary XML preceding that text node in the complete document.
205             The string may be empty but must be present.
206              
207              
208             =item literal_text
209              
210             A string of literal text.
211              
212             =back
213              
214              
215              
216             =head2 as_xml
217              
218             my $xml = $text_node->as_xml;
219              
220             Returns the XML representation of that text node.
221             The attribute C<< xml:space="preserve" >> is automatically added
222             if the literal text starts of ends with a space character.
223              
224              
225             =head2 merge
226              
227             $text_node->merge($next_text_node);
228              
229             Merge the contents of C<$next_text_node> together with the current text node.
230             This is only possible if the next text node has
231             an empty C<xml_before> attribute; if this condition is not met,
232             an exception is raised.
233              
234             =head2 replace
235              
236             my $xml = $text_node->replace($pattern, $replacement_callback, %args);
237              
238             Replaces all occurrences of C<$pattern> within the text node by
239             a new string computed by C<$replacement_callback>, and returns a new xml
240             string corresponding to the result of all these replacements. This is the
241             internal implementation for public method
242             L<MsOffice::Word::Surgeon/replace>.
243              
244             =head2 to_uppercase
245              
246             Puts the literal text within the node into uppercase letters.
247              
248              
249             =head1 AUTHOR
250              
251             Laurent Dami, E<lt>dami AT cpan DOT org<gt>
252              
253             =head1 COPYRIGHT AND LICENSE
254              
255             Copyright 2019-2022 by Laurent Dami.
256              
257             This library is free software; you can redistribute it and/or modify
258             it under the same terms as Perl itself.
259