File Coverage

blib/lib/Encode/DoubleEncodedUTF8.pm

Criterion	Covered	Total	%
statement	23	24	95.8
branch	2	4	50.0
condition			n/a
subroutine	6	7	85.7
pod	2	2	100.0
total	33	37	89.1

line	stmt	bran	sub	pod	time	code
1						package Encode::DoubleEncodedUTF8;
2
3	2		2		24260	use strict;
	2				5
	2				78
4	2		2		11	use base qw( Encode::Encoding );
	2				3
	2				8855
5	2		2		33094	use Encode 2.12 ();
	2				84
	2				538
6
7						our $VERSION = '0.05';
8
9						__PACKAGE__->Define('utf-8-de');
10
11						my $latin1_as_utf8 = "[\xC2\xC3][\x80-\xBF]";
12
13						# (Taken from Test::utf8 module)
14						# A Regexp string to match valid UTF8 bytes
15						# this info comes from page 78 of "The Unicode Standard 4.0"
16						# published by the Unicode Consortium
17						my $valid_utf8_regexp = <<'.' ;
18						[\x{00}-\x{7f}]
19						\| [\x{c2}-\x{df}][\x{80}-\x{bf}]
20						\| \x{e0} [\x{a0}-\x{bf}][\x{80}-\x{bf}]
21						\| [\x{e1}-\x{ec}][\x{80}-\x{bf}][\x{80}-\x{bf}]
22						\| \x{ed} [\x{80}-\x{9f}][\x{80}-\x{bf}]
23						\| [\x{ee}-\x{ef}][\x{80}-\x{bf}][\x{80}-\x{bf}]
24						\| \x{f0} [\x{90}-\x{bf}][\x{80}-\x{bf}]
25						\| [\x{f1}-\x{f3}][\x{80}-\x{bf}][\x{80}-\x{bf}][\x{80}-\x{bf}]
26						\| \x{f4} [\x{80}-\x{8f}][\x{80}-\x{bf}][\x{80}-\x{bf}]
27						.
28
29						sub decode {
30	6		6	1	174	my($obj, $buf, $chk) = @_;
31
32	6				54	$buf =~ s{((?:$latin1_as_utf8){2,3})}{ _check_utf8_bytes($1) }ego;
	7				18
33	6	50			22	$_[1] = '' if $chk; # this is what in-place edit means
34
35	6				28	Encode::decode_utf8($buf);
36						}
37
38						sub _check_utf8_bytes {
39	7		7		18	my $bytes = shift;
40	7				11	my $copy = $bytes;
41
42	7				10	my $possible_utf8 = '';
43	7				35	while ($copy =~ s/^(.)(.)//) {
44	20				202	$possible_utf8 .= chr( (ord($1) << 6 & 0xff) \| ord($2) )
45						}
46
47	7	50			115	$possible_utf8 =~ /$valid_utf8_regexp/xo ? $possible_utf8 : $bytes;
48						}
49
50						sub encode {
51	2		2		13	use Carp;
	2				5
	2				235
52	0		0	1		Carp::croak("utf-8-de doesn't support encode() ... Why do you want to do that?");
53						}
54
55						1;
56						__END__