File Coverage

blib/lib/Encode/ZapCP1252.pm
Criterion Covered Total %
statement 38 38 100.0
branch 15 20 75.0
condition n/a
subroutine 9 9 100.0
pod 0 2 0.0
total 62 69 89.8


line stmt bran cond sub pod time code
1             package Encode::ZapCP1252;
2              
3 3     3   74560 use strict;
  3         5  
  3         140  
4             require Exporter;
5 3     3   17 use vars qw($VERSION @ISA @EXPORT);
  3         5  
  3         229  
6 3     3   89 use 5.006_002;
  3         15  
  3         265  
7              
8             $VERSION = '0.33';
9             @ISA = qw(Exporter);
10             @EXPORT = qw(zap_cp1252 fix_cp1252);
11 3     3   18 use constant PERL588 => $] >= 5.008_008;
  3         6  
  3         1574  
12             require Encode if PERL588;
13              
14             our %ascii_for = (
15             # http://en.wikipedia.org/wiki/Windows-1252
16             "\x80" => 'e', # EURO SIGN
17             "\x82" => ',', # SINGLE LOW-9 QUOTATION MARK
18             "\x83" => 'f', # LATIN SMALL LETTER F WITH HOOK
19             "\x84" => ',,', # DOUBLE LOW-9 QUOTATION MARK
20             "\x85" => '...', # HORIZONTAL ELLIPSIS
21             "\x86" => '+', # DAGGER
22             "\x87" => '++', # DOUBLE DAGGER
23             "\x88" => '^', # MODIFIER LETTER CIRCUMFLEX ACCENT
24             "\x89" => '%', # PER MILLE SIGN
25             "\x8a" => 'S', # LATIN CAPITAL LETTER S WITH CARON
26             "\x8b" => '<', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
27             "\x8c" => 'OE', # LATIN CAPITAL LIGATURE OE
28             "\x8e" => 'Z', # LATIN CAPITAL LETTER Z WITH CARON
29             "\x91" => "'", # LEFT SINGLE QUOTATION MARK
30             "\x92" => "'", # RIGHT SINGLE QUOTATION MARK
31             "\x93" => '"', # LEFT DOUBLE QUOTATION MARK
32             "\x94" => '"', # RIGHT DOUBLE QUOTATION MARK
33             "\x95" => '*', # BULLET
34             "\x96" => '-', # EN DASH
35             "\x97" => '--', # EM DASH
36             "\x98" => '~', # SMALL TILDE
37             "\x99" => '(tm)', # TRADE MARK SIGN
38             "\x9a" => 's', # LATIN SMALL LETTER S WITH CARON
39             "\x9b" => '>', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
40             "\x9c" => 'oe', # LATIN SMALL LIGATURE OE
41             "\x9e" => 'z', # LATIN SMALL LETTER Z WITH CARON
42             "\x9f" => 'Y', # LATIN CAPITAL LETTER Y WITH DIAERESIS
43             );
44              
45             our %utf8_for = (
46             # http://en.wikipedia.org/wiki/Windows-1252
47             "\x80" => '€', # EURO SIGN
48             "\x82" => ',', # SINGLE LOW-9 QUOTATION MARK
49             "\x83" => 'ƒ', # LATIN SMALL LETTER F WITH HOOK
50             "\x84" => '„', # DOUBLE LOW-9 QUOTATION MARK
51             "\x85" => '…', # HORIZONTAL ELLIPSIS
52             "\x86" => '†', # DAGGER
53             "\x87" => '‡', # DOUBLE DAGGER
54             "\x88" => 'ˆ', # MODIFIER LETTER CIRCUMFLEX ACCENT
55             "\x89" => '‰', # PER MILLE SIGN
56             "\x8a" => 'Š', # LATIN CAPITAL LETTER S WITH CARON
57             "\x8b" => '‹', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
58             "\x8c" => 'Œ', # LATIN CAPITAL LIGATURE OE
59             "\x8e" => 'Ž', # LATIN CAPITAL LETTER Z WITH CARON
60             "\x91" => '‘', # LEFT SINGLE QUOTATION MARK
61             "\x92" => '’', # RIGHT SINGLE QUOTATION MARK
62             "\x93" => '“', # LEFT DOUBLE QUOTATION MARK
63             "\x94" => '”', # RIGHT DOUBLE QUOTATION MARK
64             "\x95" => '•', # BULLET
65             "\x96" => '–', # EN DASH
66             "\x97" => '—', # EM DASH
67             "\x98" => '˜', # SMALL TILDE
68             "\x99" => '™', # TRADE MARK SIGN
69             "\x9a" => 'š', # LATIN SMALL LETTER S WITH CARON
70             "\x9b" => '›', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
71             "\x9c" => 'œ', # LATIN SMALL LIGATURE OE
72             "\x9e" => 'ž', # LATIN SMALL LETTER Z WITH CARON
73             "\x9f" => 'Ÿ', # LATIN CAPITAL LETTER Y WITH DIAERESIS
74             );
75              
76             BEGIN {
77 3 50   3   15 my $proto = $] >= 5.010000 ? '_' : '$';
78 3     8 0 207 eval "sub zap_cp1252($proto) { unshift \@_, \\%ascii_for; &_tweakit; }";
  8         2823  
  8         25  
79 3     9 0 1371 eval "sub fix_cp1252($proto) { unshift \@_, \\%utf8_for; &_tweakit; }";
  9         2722  
  9         29  
80             }
81              
82             sub _tweakit {
83 17     17   22 my $table = shift;
84 17 100       58 return unless defined $_[0];
85 15 100       38 local $_[0] = $_[0] if defined wantarray;
86 15 100       66 if (PERL588 && Encode::is_utf8($_[0])) {
87 5         13 _tweak_decoded($table, $_[0]);
88             } else {
89 10 50       42 $_[0] =~ s{([\x80-\x9f])}{$table->{$1} || $1}emxsg;
  270         893  
90             }
91 15 100       87 return $_[0] if defined wantarray;
92             }
93              
94             sub _tweak_decoded {
95 5     5   7 my $table = shift;
96 5         8 local $@;
97             # First, try to replace in the decoded string.
98 5         9 eval {
99 5         32 $_[0] =~ s{([\x80-\x9f])}{
100 81 50       3451 $table->{$1} ? Encode::decode('UTF-8', $table->{$1}) : $1
101             }emxsg
102             };
103 5 100       980 if (my $err = $@) {
104             # If we got a "Malformed UTF-8 character" error, then someone
105             # likely turned on the utf8 flag without decoding. So turn it off.
106             # and try again.
107 2 50       9 die if $err !~ /Malformed/;
108 2         5 Encode::_utf8_off($_[0]);
109 2 50       9 $_[0] =~ s/([\x80-\x9f])/$table->{$1} || $1/emxsg;
  54         200  
110 2         11 Encode::_utf8_on($_[0]);
111             }
112             }
113              
114             1;
115             __END__