| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Encode::DoubleEncodedUTF8; |
|
2
|
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
24260
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
78
|
|
|
4
|
2
|
|
|
2
|
|
11
|
use base qw( Encode::Encoding ); |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
8855
|
|
|
5
|
2
|
|
|
2
|
|
33094
|
use Encode 2.12 (); |
|
|
2
|
|
|
|
|
84
|
|
|
|
2
|
|
|
|
|
538
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.05'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
__PACKAGE__->Define('utf-8-de'); |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $latin1_as_utf8 = "[\xC2\xC3][\x80-\xBF]"; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# (Taken from Test::utf8 module) |
|
14
|
|
|
|
|
|
|
# A Regexp string to match valid UTF8 bytes |
|
15
|
|
|
|
|
|
|
# this info comes from page 78 of "The Unicode Standard 4.0" |
|
16
|
|
|
|
|
|
|
# published by the Unicode Consortium |
|
17
|
|
|
|
|
|
|
my $valid_utf8_regexp = <<'.' ; |
|
18
|
|
|
|
|
|
|
[\x{00}-\x{7f}] |
|
19
|
|
|
|
|
|
|
| [\x{c2}-\x{df}][\x{80}-\x{bf}] |
|
20
|
|
|
|
|
|
|
| \x{e0} [\x{a0}-\x{bf}][\x{80}-\x{bf}] |
|
21
|
|
|
|
|
|
|
| [\x{e1}-\x{ec}][\x{80}-\x{bf}][\x{80}-\x{bf}] |
|
22
|
|
|
|
|
|
|
| \x{ed} [\x{80}-\x{9f}][\x{80}-\x{bf}] |
|
23
|
|
|
|
|
|
|
| [\x{ee}-\x{ef}][\x{80}-\x{bf}][\x{80}-\x{bf}] |
|
24
|
|
|
|
|
|
|
| \x{f0} [\x{90}-\x{bf}][\x{80}-\x{bf}] |
|
25
|
|
|
|
|
|
|
| [\x{f1}-\x{f3}][\x{80}-\x{bf}][\x{80}-\x{bf}][\x{80}-\x{bf}] |
|
26
|
|
|
|
|
|
|
| \x{f4} [\x{80}-\x{8f}][\x{80}-\x{bf}][\x{80}-\x{bf}] |
|
27
|
|
|
|
|
|
|
. |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub decode { |
|
30
|
6
|
|
|
6
|
1
|
174
|
my($obj, $buf, $chk) = @_; |
|
31
|
|
|
|
|
|
|
|
|
32
|
6
|
|
|
|
|
54
|
$buf =~ s{((?:$latin1_as_utf8){2,3})}{ _check_utf8_bytes($1) }ego; |
|
|
7
|
|
|
|
|
18
|
|
|
33
|
6
|
50
|
|
|
|
22
|
$_[1] = '' if $chk; # this is what in-place edit means |
|
34
|
|
|
|
|
|
|
|
|
35
|
6
|
|
|
|
|
28
|
Encode::decode_utf8($buf); |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub _check_utf8_bytes { |
|
39
|
7
|
|
|
7
|
|
18
|
my $bytes = shift; |
|
40
|
7
|
|
|
|
|
11
|
my $copy = $bytes; |
|
41
|
|
|
|
|
|
|
|
|
42
|
7
|
|
|
|
|
10
|
my $possible_utf8 = ''; |
|
43
|
7
|
|
|
|
|
35
|
while ($copy =~ s/^(.)(.)//) { |
|
44
|
20
|
|
|
|
|
202
|
$possible_utf8 .= chr( (ord($1) << 6 & 0xff) | ord($2) ) |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
7
|
50
|
|
|
|
115
|
$possible_utf8 =~ /$valid_utf8_regexp/xo ? $possible_utf8 : $bytes; |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub encode { |
|
51
|
2
|
|
|
2
|
|
13
|
use Carp; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
235
|
|
|
52
|
0
|
|
|
0
|
1
|
|
Carp::croak("utf-8-de doesn't support encode() ... Why do you want to do that?"); |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
1; |
|
56
|
|
|
|
|
|
|
__END__ |