File Coverage

blib/lib/Encode/DoubleEncodedUTF8.pm
Criterion Covered Total %
statement 23 24 95.8
branch 2 4 50.0
condition n/a
subroutine 6 7 85.7
pod 2 2 100.0
total 33 37 89.1


line stmt bran cond sub pod time code
1             package Encode::DoubleEncodedUTF8;
2              
3 2     2   24260 use strict;
  2         5  
  2         78  
4 2     2   11 use base qw( Encode::Encoding );
  2         3  
  2         8855  
5 2     2   33094 use Encode 2.12 ();
  2         84  
  2         538  
6              
7             our $VERSION = '0.05';
8              
9             __PACKAGE__->Define('utf-8-de');
10              
11             my $latin1_as_utf8 = "[\xC2\xC3][\x80-\xBF]";
12              
13             # (Taken from Test::utf8 module)
14             # A Regexp string to match valid UTF8 bytes
15             # this info comes from page 78 of "The Unicode Standard 4.0"
16             # published by the Unicode Consortium
17             my $valid_utf8_regexp = <<'.' ;
18             [\x{00}-\x{7f}]
19             | [\x{c2}-\x{df}][\x{80}-\x{bf}]
20             | \x{e0} [\x{a0}-\x{bf}][\x{80}-\x{bf}]
21             | [\x{e1}-\x{ec}][\x{80}-\x{bf}][\x{80}-\x{bf}]
22             | \x{ed} [\x{80}-\x{9f}][\x{80}-\x{bf}]
23             | [\x{ee}-\x{ef}][\x{80}-\x{bf}][\x{80}-\x{bf}]
24             | \x{f0} [\x{90}-\x{bf}][\x{80}-\x{bf}]
25             | [\x{f1}-\x{f3}][\x{80}-\x{bf}][\x{80}-\x{bf}][\x{80}-\x{bf}]
26             | \x{f4} [\x{80}-\x{8f}][\x{80}-\x{bf}][\x{80}-\x{bf}]
27             .
28              
29             sub decode {
30 6     6 1 174 my($obj, $buf, $chk) = @_;
31              
32 6         54 $buf =~ s{((?:$latin1_as_utf8){2,3})}{ _check_utf8_bytes($1) }ego;
  7         18  
33 6 50       22 $_[1] = '' if $chk; # this is what in-place edit means
34              
35 6         28 Encode::decode_utf8($buf);
36             }
37              
38             sub _check_utf8_bytes {
39 7     7   18 my $bytes = shift;
40 7         11 my $copy = $bytes;
41              
42 7         10 my $possible_utf8 = '';
43 7         35 while ($copy =~ s/^(.)(.)//) {
44 20         202 $possible_utf8 .= chr( (ord($1) << 6 & 0xff) | ord($2) )
45             }
46              
47 7 50       115 $possible_utf8 =~ /$valid_utf8_regexp/xo ? $possible_utf8 : $bytes;
48             }
49              
50             sub encode {
51 2     2   13 use Carp;
  2         5  
  2         235  
52 0     0 1   Carp::croak("utf-8-de doesn't support encode() ... Why do you want to do that?");
53             }
54              
55             1;
56             __END__