File Coverage

blib/lib/I22r/Translate/Filter/Literal.pm
Criterion Covered Total %
statement 49 55 89.0
branch 8 16 50.0
condition 6 13 46.1
subroutine 6 7 85.7
pod 0 2 0.0
total 69 93 74.1


line stmt bran cond sub pod time code
1             package I22r::Translate::Filter::Literal;
2 3     3   689 use Carp;
  3         5  
  3         271  
3 3     3   883 use Moose;
  3         490765  
  3         63  
4             with 'I22r::Translate::Filter';
5              
6             our $VERSION = '0.96';
7              
8             our $DOUBLE_BRACES_WRAP_LITERAL = 1;
9             our $DOUBLE_BRACES_WRAP_PARAMETER_LITERAL = 1;
10             our $HTML_ENTITIES_ARE_LITERAL = 1;
11              
12             sub apply {
13 26     26 0 1557 my ($self, $req, $key) = @_;
14              
15 26         1059 local $_ = $req->text->{$key};
16 26   50     224 my $keymap = $self->{map}{$key} //= { __keys__ => [] };
17              
18 26 50       75 if ($DOUBLE_BRACES_WRAP_PARAMETER_LITERAL) {
19 26         58 s[ ( \{\{ _\d+ \}\} ) ]
20 0         0 [ $self->_literal_transform( $keymap, $1 ) ]gexs;
21             }
22              
23 26 50       61 if ($HTML_ENTITIES_ARE_LITERAL) {
24 26         44 s[ (&[#]?\w+;) ][
25 0         0 $self->_literal_transform( $keymap, $1 )
26             ]gexs;
27             }
28              
29 26 50       58 if ($DOUBLE_BRACES_WRAP_LITERAL) {
30 26         123 s[ \{\{ ([^_].*?) \}\} ]
31             [
32 14         73 $self->_literal_transform( $keymap, $1, ['{{','}}'] )
33             ]gexs;
34             }
35              
36             s{ (\[lit(?:eral)?\]) (.*?) (\[/lit(?:eral)?\]) }
37 2         13 [ $self->_literal_transform( $keymap, $2, [$1,$3] )
38 26         61 ]gexs;
39              
40 26         55 s{ (<\s*span(?:[^>]*) lang=["']..['"](?:[^>]*)>)
41             (.*?)
42             (<\s*/\s*span\s*>) }
43 1         7 [ $self->_literal_transform( $keymap, $2, [$1,$3] ) ]gexs;
44              
45 26         1061 $req->text->{$key} = $_;
46 26         152 return;
47             }
48              
49             sub unapply {
50 26     26 0 4445 my ($self, $req, $key) = @_;
51              
52             ### remove next line when bare filter tests are fixed. See ...::Filter::HTML
53 26         2278 $req->text->{$key} = $self->_unapply( $req, $key, $req->text->{$key}, 1 );
54 26 100       972 if ($req->results->{$key}) {
55             $req->results->{$key}{text} =
56 21         723 $self->_unapply( $req, $key, $req->results->{$key}->text, 0 );;
57             }
58             }
59              
60             sub _unapply {
61 47     47   82 my ($self, $req, $key, $topic, $apply_m2) = @_;
62 47         65 local $_ = $topic;
63              
64 47         76 my $keymap = $self->{map}{$key};
65              
66 47         49 foreach my $enc (reverse @{$keymap->{__keys__}}) {
  47         102  
67 30         46 my $mapping = $keymap->{$enc};
68 30 50       66 next if !defined $mapping;
69              
70 30         45 my $element = $mapping->[1];
71 30 50 66     116 if ($apply_m2 && $mapping->[2]) {
72 17         50 $element = $mapping->[2][0] . $element . $mapping->[2][1];
73             }
74 30 50       83 if ($enc ne lc $enc) {
75             # sometimes external translator will change case of the
76             # literal placeholder, e.g. _XZX_ => _xzx_
77 30         43 my $lc_enc = lc $enc;
78 30         360 s/(?<!#|_)$lc_enc(?!#|_)/$enc/i;
79             }
80             s/(?<!#|_)$enc(?!#|_)/$element/ or
81             s/(?<!#)$enc(?!#)/$element/ or
82             $self->_untransform2($_,$enc,$element) or
83 30 0 33     335 do {
      33        
84 0         0 carp "Could not find place to restore ",
85             "literal text $element with encoding ",
86             "$enc in translated text result $_\n";
87             };
88             }
89 47         1798 return $_;
90             }
91              
92             sub _literal_transform {
93 17     17   52 my ($self, $map,$element,$unmapping) = @_;
94 17   50     81 my $mapping = [ 'literal', $element, $unmapping // ['', ''] ];
95             # my $subst = '###';
96             # while (defined $map->{$subst}) {
97             # $subst .= '#';
98             # }
99              
100 17         27 my $subst = "_XZX_";
101 17         66 while (defined $map->{$subst}) {
102 0         0 $subst =~ s/^_/_XZ/;
103             }
104              
105 17         38 $map->{$subst} = $mapping;
106 17         25 push @{$map->{__keys__}}, $subst;
  17         50  
107 17         88 return $subst;
108             }
109              
110             sub _untransform2 {
111 0     0     my ($string, $encoding, $replacement) = @_;
112 0           return 0;
113             }
114              
115             1;
116              
117             __END__
118              
119             =head1 NAME
120              
121             I22r::Translate::Filter::Literal - protect text in input to I22r::Translate
122              
123             =head1 SYNOPSIS
124              
125             I22r::Translate->config(
126             ...,
127             filter => [ 'Literal' ]
128             );
129              
130             $t = I22r::Translate->translate_string(
131             src => ..., dest => ..., text => 'string with Proper Nouns',
132             filter => [ 'Literal' ] )
133              
134             =head1 DESCRIPTION
135              
136             A preprocessing and postprocessing filter that recognizes words or
137             phrases with particular markup, and prevents that text from
138             being altered in a translation engine.
139              
140             Sometimes, content that you wish to translate may contain words
141             or phrases that you I<don't> want to translate.
142              
143             My friend Paul Fisher lives in Key West.
144              
145             The French word for cat is "chat".
146              
147             If you wished to translate these sentences into, say, Spanish,
148             you would probably B<not> want some of those words to be translated,
149             including all the proper nouns and the "foreign" word which also
150             happens to have the same spelling as an English word. That is, you
151             would much prefer a translation output like
152              
153             Mi amigo Paul Fisher vive en Key West.
154              
155             La palabra francesca para gato es "chat".
156              
157             rathen than
158              
159             Mi amigo Pablo Pescador vive en Clave Oeste.
160              
161             La palabra francesca para gato es "charlar".
162              
163             The C<I22r::Translate::Filter::Literal> filter will recognize
164             certain markup in translation input and interpret it as an
165             instruction to hide certain words from the translation engine.
166             The untranslated words will then be (hopefully) restored to
167             the correct place in the translated output.
168              
169             =head1 MARKUP
170              
171             The C<I22r::Translate::Filter::Literal> filter recognizes
172             any of the following ways to protect parts of the input
173             from being seen by the translators:
174              
175             =head2 double braces
176              
177             Parts of the input enclosed in a set of double braces will
178             be protected from the translator.
179              
180             The French word for cat is {{"chat"}}.
181              
182             =head2 lit and literal pseudo tags
183              
184             Text enclosed in C<[lit]...[/lit]> or C<[literal]...[/literal]>
185             tokens will be protected from the translator.
186              
187             My friend Mr. [lit]Wong[/lit] lives in [literal]Los Angeles[/literal].
188              
189             =head2 span tag with lang attribute
190              
191             Text inside a C<< <span> >> tag with an attribute called C<< lang >>
192             will be protected from the translator. This is somewhat of a convention
193             for identifying the source language of some text in an HTML document,
194             and it wouldn't be sensible for a translator to render text inside those
195             tags in another language.
196              
197             The French word for "hat" is <span lang="fr">"chapeau"</span>.
198              
199             Note that if your input uses this construction and you also intend to
200             pass translation input through the L<I22r::Translate::Filter::HTML>
201             filter, you should include this filter first, or the
202             C<< <span>...</span> >> tags will not be visible to this filter.
203             That is, you should specify
204              
205             filter => [ 'Literal', 'HTML' ]
206              
207             rather than
208              
209             filter => [ 'HTML', 'Literal' ]
210              
211              
212             These markup specifications are kind of arbitrary. More may be
213             added and some may be removed in future releases of this module.
214             Send me a note (C<< mob at cpan.org >>) if you have an opinion
215             one way or the other about what is a good way to specify
216             protected text.
217              
218             =head1 SEE ALSO
219              
220             L<I22r::Translate::Filter>, L<I22r::Translate::Filter::HTML>,
221             L<I22r::Translate>.
222              
223             =cut