File Coverage

blib/lib/Encode/Simple.pm
Criterion Covered Total %
statement 155 157 98.7
branch 35 54 64.8
condition 14 43 32.5
subroutine 26 26 100.0
pod 8 8 100.0
total 238 288 82.6


line stmt bran cond sub pod time code
1             package Encode::Simple;
2              
3 2     2   54392 use strict;
  2         16  
  2         51  
4 2     2   9 use warnings;
  2         2  
  2         35  
5 2     2   7 use Carp ();
  2         9  
  2         27  
6 2     2   532 use Encode ();
  2         16805  
  2         38  
7 2     2   10 use Exporter 'import';
  2         3  
  2         160  
8              
9             our $VERSION = '1.000';
10              
11             our @EXPORT = qw(encode encode_utf8 decode decode_utf8);
12             our @EXPORT_OK = qw(encode_lax encode_utf8_lax decode_lax decode_utf8_lax);
13             our %EXPORT_TAGS = (
14             all => [@EXPORT, @EXPORT_OK],
15             strict => [qw(encode encode_utf8 decode decode_utf8)],
16             lax => [qw(encode_lax encode_utf8_lax decode_lax decode_utf8_lax)],
17             utf8 => [qw(encode_utf8 encode_utf8_lax decode_utf8 decode_utf8_lax)],
18             );
19              
20 2     2   10 use constant HAS_UNICODE_UTF8 => do { local $@; !!eval { require Unicode::UTF8; 1 } };
  2         2  
  2         3  
  2         2  
  2         3  
  2         8  
  1         49  
21 2     2   294 use constant MASK_STRICT => Encode::FB_CROAK | Encode::LEAVE_SRC;
  2         4  
  2         76  
22 2     2   8 use constant MASK_LAX => Encode::FB_DEFAULT | Encode::LEAVE_SRC;
  2         4  
  2         175  
23              
24             my %ENCODINGS;
25              
26             sub encode {
27 5     5 1 1347 my ($encoding, $input) = @_;
28 5 50       12 return undef unless defined $input;
29 5   66     13 my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
30 5         6 my ($output, $error);
31 2     2   11 { local $@; use warnings FATAL => 'utf8'; # Encode::Unicode throws warnings in this category
  2         4  
  2         316  
  5         6  
  5         6  
32 5 100 50     6 unless (eval { $output = $obj->encode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  5         52  
  1         3  
  4         9  
33             }
34 5 100       15 _rethrow($error) if defined $error;
35 1         4 return $output;
36             }
37              
38             sub encode_lax {
39 4     4 1 1077 my ($encoding, $input) = @_;
40 4 50       10 return undef unless defined $input;
41 4   33     9 my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
42 4         6 my ($output, $error);
43 2     2   10 { local $@; no warnings 'utf8'; # Encode::Unicode throws warnings in this category
  2         4  
  2         244  
  4         5  
  4         4  
44 4 50 0     5 unless (eval { $output = $obj->encode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  4         17  
  4         12  
  0         0  
45             }
46 4 50       6 _rethrow($error) if defined $error;
47 4         13 return $output;
48             }
49              
50             sub encode_utf8 {
51 4     4 1 73 my ($input) = @_;
52 4 50       12 return undef unless defined $input;
53 4         6 my ($output, $error);
54 4         5 if (HAS_UNICODE_UTF8) {
55 2     2   11 local $@; use warnings FATAL => 'utf8'; # Unicode::UTF8 throws warnings in this category
  2         3  
  2         309  
56             unless (eval { $output = Unicode::UTF8::encode_utf8($input); 1 }) { $error = $@ || 'Error' }
57             } else {
58 4   33     7 my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
59 4         6 local $@;
60 4 100 50     19 unless (eval { $output = $obj->encode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  3         15  
  2         5  
  3         9  
61             }
62 3 100       10 _rethrow($error) if defined $error;
63 1         3 return $output;
64             }
65              
66             sub encode_utf8_lax {
67 4     4 1 9 my ($input) = @_;
68 4 50       13 return undef unless defined $input;
69 4         5 my ($output, $error);
70 4         5 if (HAS_UNICODE_UTF8) {
71 2     2   27 local $@; no warnings 'utf8'; # Unicode::UTF8 throws warnings in this category
  2         3  
  2         317  
72             unless (eval { $output = Unicode::UTF8::encode_utf8($input); 1 }) { $error = $@ || 'Error' }
73             } else {
74 4   33     9 my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
75 4         5 local $@;
76 4 50 0     11 unless (eval { $output = $obj->encode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  4         18  
  2         6  
  2         4  
77             }
78 4 50       13 _rethrow($error) if defined $error;
79 2         6 return $output;
80             }
81              
82             sub decode {
83 2     2 1 5 my ($encoding, $input) = @_;
84 2 50       6 return undef unless defined $input;
85 2   33     6 my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
86 2         2 my ($output, $error);
87 2     2   11 { local $@; use warnings FATAL => 'utf8'; # Encode::Unicode throws warnings in this category
  2         2  
  2         255  
  2         2  
  2         3  
88 2 100 50     4 unless (eval { $output = $obj->decode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  2         12  
  1         7  
  1         11  
89             }
90 2 100       7 _rethrow($error) if defined $error;
91 1         3 return $output;
92             }
93              
94             sub decode_lax {
95 2     2 1 4 my ($encoding, $input) = @_;
96 2 50       7 return undef unless defined $input;
97 2   33     5 my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
98 2         3 my ($output, $error);
99 2     2   12 { local $@; no warnings 'utf8'; # Encode::Unicode throws warnings in this category
  2         3  
  2         226  
  2         2  
  2         3  
100 2 50 0     4 unless (eval { $output = $obj->decode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  2         9  
  2         11  
  0         0  
101             }
102 2 50       4 _rethrow($error) if defined $error;
103 2         6 return $output;
104             }
105              
106             sub decode_utf8 {
107 4     4 1 8 my ($input) = @_;
108 4 50       10 return undef unless defined $input;
109 4         6 my ($output, $error);
110 4         5 if (HAS_UNICODE_UTF8) {
111 2     2   10 local $@; use warnings FATAL => 'utf8'; # Unicode::UTF8 throws warnings in this category
  2         3  
  2         291  
112             unless (eval { $output = Unicode::UTF8::decode_utf8($input); 1 }) { $error = $@ || 'Error' }
113             } else {
114 4   33     6 my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
115 4         6 local $@;
116 4 100 50     13 unless (eval { $output = $obj->decode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  3         12  
  2         10  
  3         17  
117             }
118 3 100       8 _rethrow($error) if defined $error;
119 1         4 return $output;
120             }
121              
122             sub decode_utf8_lax {
123 4     4 1 8 my ($input) = @_;
124 4 50       11 return undef unless defined $input;
125 4         7 my ($output, $error);
126 4         6 if (HAS_UNICODE_UTF8) {
127 2     2   11 local $@; no warnings 'utf8'; # Unicode::UTF8 throws warnings in this category
  2         3  
  2         551  
128             unless (eval { $output = Unicode::UTF8::decode_utf8($input); 1 }) { $error = $@ || 'Error' }
129             } else {
130 4   33     7 my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
131 4         6 local $@;
132 4 50 0     9 unless (eval { $output = $obj->decode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  4         15  
  2         11  
  2         3  
133             }
134 4 50       14 _rethrow($error) if defined $error;
135 2         6 return $output;
136             }
137              
138             sub _find_encoding {
139 3     3   5 my ($encoding) = @_;
140 3 50       6 Carp::croak('Encoding name should not be undef') unless defined $encoding;
141 3         7 my $obj = Encode::find_encoding($encoding);
142 3 50       3132 Carp::croak("Unknown encoding '$encoding'") unless defined $obj;
143 3         14 return $ENCODINGS{$encoding} = $obj;
144             }
145              
146             sub _rethrow {
147 9     9   14 my ($error) = @_;
148 9 50 33     64 die $error if ref $error or $error =~ m/\n(?!\z)/;
149 9         51 $error =~ s/ at .+? line [0-9]+\.\n\z//;
150 9         941 Carp::croak($error);
151             }
152              
153             1;
154              
155             =head1 NAME
156              
157             Encode::Simple - Encode and decode text, simply
158              
159             =head1 SYNOPSIS
160              
161             use Encode::Simple qw(encode encode_lax encode_utf8 decode decode_lax decode_utf8);
162             my $bytes = encode 'Shift_JIS', $characters;
163             my $bytes = encode_lax 'ASCII', $characters;
164             my $bytes = encode_utf8 $characters;
165             my $characters = decode 'cp1252', $bytes;
166             my $characters = decode_lax 'UTF-8', $bytes;
167             my $characters = decode_utf8 $bytes;
168              
169             =head1 DESCRIPTION
170              
171             This module is a simple wrapper around L that presents L and
172             L functions with straightforward behavior and error handling. See
173             L for a list of supported encodings.
174              
175             =head1 FUNCTIONS
176              
177             All functions are exported by name, as well as via the tags C<:all>,
178             C<:strict>, C<:lax>, and C<:utf8>. By default, L, L,
179             L, and L are exported as in L.
180              
181             =head2 encode
182              
183             my $bytes = encode $encoding, $characters;
184              
185             Encodes the input string of characters into a byte string using C<$encoding>.
186             Throws an exception if the input string contains characters that are not valid
187             or possible to represent in C<$encoding>.
188              
189             =head2 encode_lax
190              
191             my $bytes = encode_lax $encoding, $characters;
192              
193             Encodes the input string of characters into a byte string using C<$encoding>,
194             encoding any invalid characters as a substitution character (the substitution
195             character used depends on the encoding). Note that some encoders do not respect
196             this option and may throw an exception anyway, this notably includes
197             L (but not UTF-8).
198              
199             =head2 encode_utf8
200              
201             my $bytes = encode_utf8 $characters;
202              
203             Encodes the input string of characters into a UTF-8 byte string. Throws an
204             exception if the input string contains characters that are not valid or
205             possible to represent in UTF-8.
206              
207             This function will use the more consistent and efficient
208             L if installed, and is otherwise equivalent to
209             L with an encoding of C.
210              
211             =head2 encode_utf8_lax
212              
213             my $bytes = encode_utf8_lax $characters;
214              
215             Encodes the input string of characters into a UTF-8 byte string, encoding any
216             invalid characters as the Unicode replacement character C, represented
217             in UTF-8 as the three bytes C<0xEFBFBD>.
218              
219             This function will use the more consistent and efficient
220             L if installed, and is otherwise equivalent to
221             L with an encoding of C.
222              
223             =head2 decode
224              
225             my $characters = decode $encoding, $bytes;
226              
227             Decodes the input byte string into a string of characters using C<$encoding>.
228             Throws an exception if the input bytes are not valid for C<$encoding>.
229              
230             =head2 decode_lax
231              
232             my $characters = decode_lax $encoding, $bytes;
233              
234             Decodes the input byte string into a string of characters using C<$encoding>,
235             decoding any malformed bytes to the Unicode replacement character (U+FFFD).
236             Note that some encoders do not respect this option and may throw an exception
237             anyway, this notably includes L (but not UTF-8).
238              
239             =head2 decode_utf8
240              
241             my $characters = decode_utf8 $bytes;
242              
243             Decodes the input UTF-8 byte string into a string of characters. Throws an
244             exception if the input bytes are not valid for UTF-8.
245              
246             This function will use the more consistent and efficient
247             L if installed, and is otherwise equivalent to
248             L with an encoding of C.
249              
250             =head2 decode_utf8_lax
251              
252             my $characters = decode_utf8_lax $bytes;
253              
254             Decodes the input UTF-8 byte string into a string of characters, decoding any
255             malformed bytes to the Unicode replacement character C.
256              
257             This function will use the more consistent and efficient
258             L if installed, and is otherwise equivalent to
259             L with an encoding of C.
260              
261             =head1 BUGS
262              
263             Report any issues on the public bugtracker.
264              
265             =head1 AUTHOR
266              
267             Dan Book
268              
269             =head1 COPYRIGHT AND LICENSE
270              
271             This software is Copyright (c) 2018 by Dan Book.
272              
273             This is free software, licensed under:
274              
275             The Artistic License 2.0 (GPL Compatible)
276              
277             =head1 SEE ALSO
278              
279             L