File Coverage

blib/lib/Localizer/Style/Maketext.pm
Criterion Covered Total %
statement 103 131 78.6
branch 42 74 56.7
condition 11 21 52.3
subroutine 9 9 100.0
pod 0 2 0.0
total 165 237 69.6


line stmt bran cond sub pod time code
1             package Localizer::Style::Maketext;
2 5     5   26518 use strict;
  5         11  
  5         498  
3 4     4   20 use warnings;
  4         9  
  4         259  
4 4     4   19 use utf8;
  4         8  
  4         204  
5 4     4   118 use 5.010_001;
  4         11  
  4         6280  
6              
7             sub DEBUG () { 0 }
8              
9 3     3 0 2150 sub new { bless {}, shift }
10              
11             sub compile {
12 17     17 0 1814 my ($self, $msgid, $fmt, $functions) = @_;
13 17         40 my $code = $self->_compile($msgid, $fmt, $functions);
14 16         46 return $code;
15             }
16              
17             # Based on Locale::Maketext::_compile
18             sub _compile {
19             # This big scary routine compiles an entry.
20             # It returns either a coderef if there's brackety bits in this, or
21             # otherwise a ref to a scalar.
22              
23 17     17   24 my $msgid = $_[1];
24 17         21 my $string_to_compile = $_[2]; # There are taint issues using regex on @_ - perlbug 60378,27344
25 17         20 my $functions = $_[3];
26              
27             # The while() regex is more expensive than this check on strings that don't need a compile.
28             # this op causes a ~2% speed hit for strings that need compile and a 250% speed improvement
29             # on strings that don't need compiling.
30 17 100       78 return \"$string_to_compile" if($string_to_compile !~ m/[\[~\]]/ms); # return a string ref if chars [~] are not in the string
31              
32 10   33     30 my $target = ref($_[0]) || $_[0];
33              
34 10         12 my(@code);
35 10         21 my(@c) = (''); # "chunks" -- scratch.
36 10         14 my $call_count = 0;
37 10         14 my $big_pile = '';
38             {
39 10         10 my $in_group = 0; # start out outside a group
  10         13  
40 10         22 my($m, @params); # scratch
41              
42 10         51 while($string_to_compile =~ # Iterate over chunks.
43             m/(
44             [^\~\[\]]+ # non-~[] stuff (Capture everything else here)
45             |
46             ~. # ~[, ~], ~~, ~other
47             |
48             \[ # [ presumably opening a group
49             |
50             \] # ] presumably closing a group
51             |
52             ~ # terminal ~ ?
53             |
54             $
55             )/xgs
56             ) {
57 60         71 DEBUG>2 and warn qq{ "$1"\n};
58              
59 60 100 100     324 if($1 eq '[' or $1 eq '') { # "[" or end
    100          
    50          
    0          
    0          
    0          
    0          
    0          
60             # Whether this is "[" or end, force processing of any
61             # preceding literal.
62 22 50       42 if($in_group) {
63 0 0       0 if($1 eq '') {
64 0         0 $target->_die_pointing($string_to_compile, 'Unterminated bracket group');
65             }
66             else {
67 0         0 $target->_die_pointing($string_to_compile, 'You can\'t nest bracket groups');
68             }
69             }
70             else {
71 22 100       42 if ($1 eq '') {
72 9         11 DEBUG>2 and warn " [end-string]\n";
73             }
74             else {
75 13         20 $in_group = 1;
76             }
77 22 50       46 die "How come \@c is empty?? in <$string_to_compile>" unless @c; # sanity
78 22 100       91 if(length $c[-1]) {
79             # Now actually processing the preceding literal
80 10         16 $big_pile .= $c[-1];
81 10 100       28 if ($c[-1] !~ m/[^\x20-\x7E]/s) { # ASCII very safe chars
82             # normal case -- all very safe chars
83 7         12 $c[-1] =~ s/'/\\'/g;
84 7         15 push @code, q{ '} . $c[-1] . "',\n";
85 7         30 $c[-1] = ''; # reuse this slot
86             }
87             else {
88 3         10 $c[-1] =~ s/\\\\/\\/g;
89 3         11 push @code, ' $c[' . $#c . "],\n";
90 3         19 push @c, ''; # new chunk
91             }
92             }
93             # else just ignore the empty string.
94             }
95              
96             }
97             elsif($1 eq ']') { # "]"
98             # close group -- go back in-band
99 14 100       34 if($in_group) {
100 13         19 $in_group = 0;
101              
102 13         10 DEBUG>2 and warn " --Closing group [$c[-1]]\n";
103              
104             # And now process the group...
105              
106 13 50 33     113 if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
107 0         0 DEBUG>2 and warn " -- (Ignoring)\n";
108 0         0 $c[-1] = ''; # reset out chink
109 0         0 next;
110             }
111              
112             #$c[-1] =~ s/^\s+//s;
113             #$c[-1] =~ s/\s+$//s;
114 13         47 ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/
115              
116             # A bit of a hack -- we've turned "~,"'s into DELs, so turn
117             # 'em into real commas here.
118 13         29 foreach($m, @params) { tr/\x7F/,/ }
  20         44  
119              
120             # Special-case handling of some method names:
121 13 100 100     115 if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) {
    100          
    100          
122             # Treat [_1,...] as [,_1,...], etc.
123 9         17 unshift @params, $m;
124 9         12 $m = '';
125             }
126             elsif($m eq '*') {
127 1         3 $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
128             }
129             elsif($m eq '#') {
130 1         2 $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
131             }
132              
133             # Most common case: a simple, legal-looking method name
134 13 100       53 if($m eq '') {
    50          
135             # 0-length method name means to just interpolate:
136 9         13 push @code, ' (';
137             }
138             elsif($m =~ /^\w+$/s
139             # exclude anything fancy, especially fully-qualified module names
140             ) {
141 4 50       13 unless (exists $functions->{$m}) {
142 0         0 Carp::confess("Language resource compilation error. Unknown function: '${m}'");
143             }
144 4         13 push @code, ' $functions->{"' . $m . '"}->(';
145             }
146             else {
147             # TODO: implement something? or just too icky to consider?
148 0         0 $target->_die_pointing(
149             $string_to_compile,
150             "Can't use \"$m\" as a method name in bracket group",
151             2 + length($c[-1])
152             );
153             }
154              
155 13         17 pop @c; # we don't need that chunk anymore
156 13         17 ++$call_count;
157              
158 13         26 foreach my $p (@params) {
159 16 100       72 if($p eq '_*') {
    100          
    50          
160             # Meaning: all parameters except $_[0]
161 2         5 $code[-1] .= ' @_[1 .. $#_], ';
162             # and yes, that does the right thing for all @_ < 3
163             }
164             elsif($p =~ m/^_(-?\d+)$/s) {
165             # _3 meaning $_[3]
166 10         43 $code[-1] .= '$_[' . (0 + $1) . '], ';
167             }
168             elsif($p !~ m/[^\x20-\x7E]/s) { # ASCII very safe chars
169             # Normal case: a literal containing only safe characters
170 4         9 $p =~ s/'/\\'/g;
171 4         15 $code[-1] .= q{'} . $p . q{', };
172             }
173             else {
174             # Stow it on the chunk-stack, and just refer to that.
175 0         0 push @c, $p;
176 0         0 push @code, ' $c[' . $#c . '], ';
177             }
178             }
179 13         22 $code[-1] .= "),\n";
180              
181 13         54 push @c, '';
182             }
183             else {
184 1         5 $target->_die_pointing($string_to_compile, q{Unbalanced ']'});
185             }
186              
187             }
188             elsif(substr($1,0,1) ne '~') {
189             # it's stuff not containing "~" or "[" or "]"
190             # i.e., a literal blob
191 24         39 my $text = $1;
192 24         41 $text =~ s/\\/\\\\/g;
193 24         106 $c[-1] .= $text;
194              
195             }
196             elsif($1 eq '~~') { # "~~"
197 0         0 $c[-1] .= '~';
198              
199             }
200             elsif($1 eq '~[') { # "~["
201 0         0 $c[-1] .= '[';
202              
203             }
204             elsif($1 eq '~]') { # "~]"
205 0         0 $c[-1] .= ']';
206              
207             }
208             elsif($1 eq '~,') { # "~,"
209 0 0       0 if($in_group) {
210             # This is a hack, based on the assumption that no-one will actually
211             # want a DEL inside a bracket group. Let's hope that's it's true.
212 0         0 $c[-1] .= "\x7F";
213             }
214             else {
215 0         0 $c[-1] .= '~,';
216             }
217              
218             }
219             elsif($1 eq '~') { # possible only at string-end, it seems.
220 0         0 $c[-1] .= '~';
221              
222             }
223             else {
224             # It's a "~X" where X is not a special character.
225             # Consider it a literal ~ and X.
226 0         0 my $text = $1;
227 0         0 $text =~ s/\\/\\\\/g;
228 0         0 $c[-1] .= $text;
229             }
230             }
231             }
232              
233 9 50       22 if($call_count) {
234 9         23 undef $big_pile; # Well, nevermind that.
235             }
236             else {
237             # It's all literals! Ahwell, that can happen.
238             # So don't bother with the eval. Return a SCALAR reference.
239 0         0 return \$big_pile;
240             }
241              
242 9 50 33     57 die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity
243 9         12 DEBUG and warn scalar(@c), " chunks under closure\n";
244 9 50       46 if(@code == 0) { # not possible?
    50          
245 0         0 DEBUG and warn "Empty code\n";
246 0         0 return \'';
247             }
248             elsif(@code > 1) { # most cases, presumably!
249 9         20 unshift @code, "join '',\n";
250             }
251 9         24 unshift @code, qq!#line 1 "${msgid}"\n!;
252 9         31 unshift @code, "use strict; sub {\n";
253 9         22 push @code, "}\n";
254              
255 9         11 DEBUG and warn @code;
256 4     4   25 my $sub = eval(join '', @code); ## no critic.
  4         13  
  4         843  
  9         684  
257 9 50       108 die "Language resource compilation error: $@ while evalling" . join('', @code) if $@; # Should be impossible.
258 9         36 return $sub;
259             }
260              
261             #--------------------------------------------------------------------------
262              
263             sub _die_pointing {
264             # This is used by _compile to throw a fatal error
265 1     1   2 my $target = shift; # class name
266             # ...leaving $_[0] the error-causing text, and $_[1] the error message
267              
268 1         4 my $i = index($_[0], "\n");
269              
270 1         1 my $pointy;
271 1 50       6 my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
272 1 50       3 if($pos < 1) {
273 0         0 $pointy = "^=== near there\n";
274             }
275             else { # we need to space over
276 1         3 my $first_tab = index($_[0], "\t");
277 1 50 33     8 if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
      33        
278             # No tabs, or the first tab is harmlessly after where we will point to,
279             # AND we're far enough from the margin that we can draw a proper arrow.
280 1         5 $pointy = ('=' x $pos) . "^ near there\n";
281             }
282             else {
283             # tabs screw everything up!
284 0         0 $pointy = substr($_[0],0,$pos);
285 0         0 $pointy =~ tr/\t //cd;
286             # make everything into whitespace, but preserving tabs
287 0         0 $pointy .= "^=== near there\n";
288             }
289             }
290              
291 1         4 my $errmsg = "$_[1], in\:\n$_[0]";
292              
293 1 50       3 if($i == -1) {
    0          
294             # No newline.
295 1         3 $errmsg .= "\n" . $pointy;
296             }
297             elsif($i == (length($_[0]) - 1) ) {
298             # Already has a newline at end.
299 0         0 $errmsg .= $pointy;
300             }
301             else {
302             # don't bother with the pointy bit, I guess.
303             }
304 1         184 Carp::croak( "$errmsg via $target, as used" );
305             }
306              
307             1;
308             __END__