line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Locale::Maketext::Utils::Phrase::Norm::WhiteSpace; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
3539
|
use strict; |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
135
|
|
4
|
4
|
|
|
4
|
|
21
|
use warnings; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
95
|
|
5
|
|
|
|
|
|
|
|
6
|
4
|
|
|
4
|
|
10060
|
use Encode (); |
|
4
|
|
|
|
|
66257
|
|
|
4
|
|
|
|
|
4037
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
my $space_and_no_break_space = qr/(?:\x20|\xc2\xa0)/; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# regex is made from the Unicode code points from: `unichars '\p{WhiteSpace}'` (sans SPACE and NO-BREAK SPACE) |
11
|
|
|
|
|
|
|
my $disallowed_whitespace = qr/(?:\x09|\x0a|\x0b|\x0c|\x0d|\xc2\x85|\xe1\x9a\x80|\xe1\xa0\x8e|\xe2\x80\x80|\xe2\x80\x81|\xe2\x80\x82|\xe2\x80\x83|\xe2\x80\x84|\xe2\x80\x85|\xe2\x80\x86|\xe2\x80\x87|\xe2\x80\x88|\xe2\x80\x89|\xe2\x80\x8a|\xe2\x80\xa8|\xe2\x80\xa9|\xe2\x80\xaf|\xe2\x81\x9f|\xe3\x80\x80)/; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# regex is made from the Unicode code points from: `uninames invisible` |
14
|
|
|
|
|
|
|
my $invisible = qr/(?:\xe2\x80\x8b|\xe2\x81\xa2|\xe2\x81\xa3|\xe2\x81\xa4)/; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# regex is made from the Unicode code points from: `unichars '\p{Control}'` |
17
|
|
|
|
|
|
|
my $control = |
18
|
|
|
|
|
|
|
qr/(?:\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x09|\x0a|\x0b|\x0c|\x0d|\x0e|\x0f|\x10|\x11|\x12|\x13|\x14|\x15|\x16|\x17|\x18|\x19|\x1a|\x1b|\x1c|\x1d|\x1e|\x1f|\x7f|\xc2\x80|\xc2\x81|\xc2\x82|\xc2\x83|\xc2\x84|\xc2\x85|\xc2\x86|\xc2\x87|\xc2\x88|\xc2\x89|\xc2\x8a|\xc2\x8b|\xc2\x8c|\xc2\x8d|\xc2\x8e|\xc2\x8f|\xc2\x90|\xc2\x91|\xc2\x92|\xc2\x93|\xc2\x94|\xc2\x95|\xc2\x96|\xc2\x97|\xc2\x98|\xc2\x99|\xc2\x9a|\xc2\x9b|\xc2\x9c|\xc2\x9d|\xc2\x9e|\xc2\x9f)/; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub normalize_maketext_string { |
21
|
72
|
|
|
72
|
0
|
141
|
my ($filter) = @_; |
22
|
|
|
|
|
|
|
|
23
|
72
|
|
|
|
|
226
|
my $string_sr = $filter->get_string_sr(); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# detect any whitespace-ish characters that are not ' ' or "\xC2\xA0" (non-break-space) |
26
|
72
|
100
|
|
|
|
95
|
if ( ${$string_sr} =~ s/($disallowed_whitespace|$invisible|$control)/my $uh=sprintf('%04X', unpack('U',Encode::decode_utf8($1)));"[comment,invalid char Ux$uh]"/exmsg ) { |
|
72
|
|
|
|
|
2011
|
|
|
24
|
|
|
|
|
74
|
|
|
24
|
|
|
|
|
712
|
|
27
|
8
|
|
|
|
|
42
|
$filter->add_violation('Invalid whitespace, control, or invisible characters'); |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# The only WS possible after that is $space_and_no_break_space |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# remove beginning and trailing white space |
33
|
72
|
100
|
66
|
|
|
183
|
if ( ${$string_sr} !~ m/\A \xE2\x80\xA6/ms && ${$string_sr} =~ s/\A($space_and_no_break_space+)//xms ) { |
|
72
|
|
|
|
|
347
|
|
|
72
|
|
|
|
|
765
|
|
34
|
8
|
|
|
|
|
18
|
my $startswith = $1; |
35
|
8
|
50
|
|
|
|
9
|
if ( substr( ${$string_sr}, 0, 3 ) eq "\xE2\x80\xA6" ) { |
|
8
|
|
|
|
|
33
|
|
36
|
8
|
50
|
|
|
|
28
|
if ( $startswith =~ m/\xc2\xa0/ ) { |
37
|
8
|
|
|
|
|
25
|
$filter->add_violation('Beginning ellipsis space should be a normal space'); |
38
|
|
|
|
|
|
|
} |
39
|
8
|
|
|
|
|
16
|
${$string_sr} = " ${$string_sr}"; |
|
8
|
|
|
|
|
19
|
|
|
8
|
|
|
|
|
17
|
|
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
8
|
|
|
|
|
66
|
$filter->add_violation('Beginning white space'); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
72
|
100
|
|
|
|
125
|
if ( ${$string_sr} =~ s/(?:$space_and_no_break_space)+\z//xms ) { |
|
72
|
|
|
|
|
1053
|
|
47
|
8
|
|
|
|
|
26
|
$filter->add_violation('Trailing white space'); |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# collapse internal white space into a single space |
51
|
72
|
100
|
|
|
|
125
|
if ( ${$string_sr} =~ s/$space_and_no_break_space{2,}/ /xms ) { |
|
72
|
|
|
|
|
921
|
|
52
|
8
|
|
|
|
|
25
|
$filter->add_violation('Multiple internal white space'); |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
72
|
|
|
|
|
254
|
return $filter->return_value; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
1; |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
__END__ |