File Coverage

blib/lib/Locale/Maketext/Utils/Phrase/Norm/Ellipsis.pm
Criterion Covered Total %
statement 66 73 90.4
branch 27 28 96.4
condition 3 3 100.0
subroutine 3 3 100.0
pod 0 1 0.0
total 99 108 91.6


line stmt bran cond sub pod time code
1             package Locale::Maketext::Utils::Phrase::Norm::Ellipsis;
2              
3 4     4   1944 use strict;
  4         4  
  4         89  
4 4     4   11 use warnings;
  4         4  
  4         2852  
5              
6             sub normalize_maketext_string {
7 86     86 0 71 my ($filter) = @_;
8              
9 86         138 my $string_sr = $filter->get_string_sr();
10              
11             # 1. placeholder for BN w/ empty string args: ',,'
12 86         78 while ( ${$string_sr} =~ m/(\[.*?\])/g ) { # see note about this regex in Consider.pm
  182         525  
13 96         131 my $bn_match = $1;
14 96 50       166 if ( $bn_match =~ m/[,]{2,}/ ) {
15 0         0 my $bn_match_tmp = $bn_match;
16 0         0 $bn_match_tmp =~ s/([,]{2,})/my $n=CORE::length("$1");"MULTI_COMMA_IN_BN_$n"/ge;
  0         0  
  0         0  
17 0         0 ${$string_sr} =~ s/\Q$bn_match\E/$bn_match_tmp/;
  0         0  
18             }
19             }
20              
21             # 2. look for multi's
22 86 100       59 if ( ${$string_sr} =~ s/(?:[.]{2,}|[,]{2,})/…/g ) {
  86         518  
23 8         19 $filter->add_warning('multiple period/comma instead of ellipsis character');
24             }
25              
26             # 3. restore placeholder
27 86         67 ${$string_sr} =~ s/MULTI_COMMA_IN_BN_([0-9]+)/"," x "$1"/eg;
  86         103  
  0         0  
28              
29             # TODO: output,latin so this occurance is more rare:
30             # if ( ${$string_sr} =~ s/([,.]{2,})/\[comment,should “$1” here be an ellipsis?\]/g ) {
31             # $filter->add_warning('multiple concurrent period and comma');
32             # }
33              
34 86 100       60 if ( ${$string_sr} =~ s/^(|\xc2\xa0|\[output\,nbsp\])…/ …/ ) {
  86         194  
35 8         18 $filter->add_warning('initial ellipsis should be preceded by a normal space');
36             }
37              
38             # 1. placeholders for legit ones
39 86         80 my %l;
40 86         55 my $copy = ${$string_sr};
  86         79  
41 86 100       70 if ( ${$string_sr} =~ s/((?:\x20|\xc2\xa0|\[output\,nbsp\])…[\!\?\.\:])$/ELLIPSIS_END/ ) { # final
  86         279  
42 8         23 $l{'ELLIPSIS_END'} = $1;
43             }
44              
45 86 100       62 if ( ${$string_sr} =~ s/^( …(?:\x20|\xc2\xa0|\[output\,nbsp\]))/ELLIPSIS_START/ ) { # initial
  86         155  
46 6         14 $l{'ELLIPSIS_START'} = $1;
47             }
48              
49 86         65 while ( ${$string_sr} =~ m/(\(|\x20|\xc2\xa0|\[output\,nbsp\])…(\)|\x20|\xc2\xa0|\[output\,nbsp\])/g ) {
  148         439  
50 62         35 ${$string_sr} =~ s/(\(|\x20|\xc2\xa0|\[output\,nbsp\])…(\)|\x20|\xc2\xa0|\[output\,nbsp\])/ELLIPSIS_MEDIAL/;
  62         247  
51 62         43 push @{ $l{'ELLIPSIS_MEDIAL'} }, [ $1, $2 ];
  62         136  
52             }
53              
54             # 2. mark any remaining ones (that are not legit)
55 86 100       69 if ( ${$string_sr} =~ s/\A …(?!\x20|\xc2\xa0|\[output\,nbsp\])/ … / ) {
  86         153  
56 8         19 $filter->add_warning('initial ellipsis should be followed by a normal space or a non-break-space (in bracket notation or character form)');
57             }
58              
59 86 100       54 if ( ${$string_sr} =~ s/…(?:\x20|\xc2\xa0|\[output\,nbsp\]|\s)+\z/…/ ) {
  86         194  
60 8         17 $filter->add_warning('final ellipsis should be followed by a valid punctuation mark or nothing');
61             }
62              
63 86 100 100     62 if ( ${$string_sr} =~ m/…\z/ && ${$string_sr} !~ m/(?:\x20|\xc2\xa0|\[output\,nbsp\])…\z/ ) {
  86         179  
  16         73  
64 8         3 ${$string_sr} =~ s/…$/ …/;
  8         22  
65 8         14 $filter->add_warning('final ellipsis should be preceded by a normal space or a non-break-space (in bracket notation or character form)');
66             }
67              
68 86         80 my $medial_prob = 0;
69 86 100       65 if ( ${$string_sr} =~ s/(.{1})((?:(?
  86         813  
70 8         9 $medial_prob++;
71             }
72              
73 86 100       69 if ( ${$string_sr} =~ s/(.{2})…(?!\x20|\xc2\xa0|\[output\,nbsp\]|\z)(.{1})/$1… $2/g ) {
  86         190  
74 8         7 $medial_prob++;
75             }
76              
77 86 100       115 if ($medial_prob) {
78 8         22 $filter->add_warning('medial ellipsis should be surrounded on each side by a parenthesis or normal space or a non-break-space (in bracket notation or character form)');
79             }
80              
81             # 3. reconstruct the valid ones
82 86 100       140 ${$string_sr} =~ s/ELLIPSIS_END/$l{'ELLIPSIS_END'}/ if exists $l{'ELLIPSIS_END'};
  8         31  
83 86 100       119 ${$string_sr} =~ s/ELLIPSIS_START/$l{'ELLIPSIS_START'}/ if exists $l{'ELLIPSIS_START'};
  6         23  
84 86 100       140 if ( exists $l{'ELLIPSIS_MEDIAL'} ) {
85 14         12 for my $medial ( @{ $l{'ELLIPSIS_MEDIAL'} } ) {
  14         20  
86 62         38 ${$string_sr} =~ s/ELLIPSIS_MEDIAL/$medial->[0]…$medial->[1]/;
  62         181  
87             }
88             }
89              
90 86         154 return $filter->return_value;
91             }
92              
93             1;
94              
95             __END__