line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# $Id: IMAPUTF7.pm 3398 2009-04-21 13:18:16Z makholm $ |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
package Encode::IMAPUTF7; |
5
|
2
|
|
|
2
|
|
84491
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
74
|
|
6
|
2
|
|
|
2
|
|
11
|
no warnings 'redefine'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
77
|
|
7
|
2
|
|
|
2
|
|
12
|
use base qw(Encode::Encoding); |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
2735
|
|
8
|
|
|
|
|
|
|
__PACKAGE__->Define('IMAP-UTF-7', 'imap-utf-7'); |
9
|
|
|
|
|
|
|
our $VERSION = '1.05'; |
10
|
2
|
|
|
2
|
|
38421
|
use MIME::Base64; |
|
2
|
|
|
|
|
1705
|
|
|
2
|
|
|
|
|
137
|
|
11
|
2
|
|
|
2
|
|
15
|
use Encode; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
1365
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# Algorithms taken from Unicode::String by Gisle Aas |
15
|
|
|
|
|
|
|
# Code directly borrowed from Encode::Unicode::UTF7 by Dan Kogai |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# Directly from the definition in RFC2060: |
19
|
|
|
|
|
|
|
# Ampersand (\x26) is represented as a special case |
20
|
|
|
|
|
|
|
my $re_asis = qr/(?:[\x20-\x25\x27-\x7e])/; # printable US-ASCII except "&" represents itself |
21
|
|
|
|
|
|
|
my $re_encoded = qr/(?:[^\x20-\x7e])/; # Everything else are represented by modified base64 |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
my $e_utf16 = find_encoding("UTF-16BE"); |
24
|
|
|
|
|
|
|
|
25
|
0
|
|
|
0
|
1
|
0
|
sub needs_lines { 1 }; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub encode($$;$) { |
28
|
6
|
|
|
6
|
|
1079
|
my ( $obj, $str, $chk ) = @_; |
29
|
6
|
|
|
|
|
68
|
my $len = length($str); |
30
|
6
|
|
|
|
|
18
|
pos($str) = 0; |
31
|
6
|
|
|
|
|
12
|
my $bytes = ''; |
32
|
6
|
|
|
|
|
20
|
while ( pos($str) < $len ) { |
33
|
739
|
100
|
|
|
|
3492
|
if ( $str =~ /\G($re_asis+)/ogc ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
34
|
367
|
|
|
|
|
1114
|
$bytes .= $1; |
35
|
|
|
|
|
|
|
} elsif ( $str =~ /\G&/ogc ) { |
36
|
5
|
|
|
|
|
13
|
$bytes .= "&-"; |
37
|
|
|
|
|
|
|
} elsif ( $str =~ /\G($re_encoded+)/ogsc ) { |
38
|
367
|
|
|
|
|
642
|
my $s = $1; |
39
|
367
|
|
|
|
|
2452
|
my $base64 = encode_base64( $e_utf16->encode($s), '' ); |
40
|
367
|
|
|
|
|
1312
|
$base64 =~ s/=+$//; |
41
|
367
|
|
|
|
|
741
|
$base64 =~ s/\//,/g; |
42
|
367
|
|
|
|
|
1568
|
$bytes .= "&$base64-"; |
43
|
|
|
|
|
|
|
} else { |
44
|
0
|
|
|
|
|
0
|
die "This should not happen! (pos=" . pos($str) . ")"; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
} |
47
|
6
|
50
|
|
|
|
16
|
$_[1] = '' if $chk; |
48
|
6
|
|
|
|
|
74
|
return $bytes; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub decode($$;$) { |
52
|
1
|
|
|
1
|
|
134
|
my ( $obj, $bytes, $chk ) = @_; |
53
|
1
|
|
|
|
|
114
|
my $len = length($bytes); |
54
|
1
|
|
|
|
|
2
|
my $str = ""; |
55
|
1
|
|
|
|
|
3
|
pos($bytes) = 0; |
56
|
1
|
|
|
|
|
6
|
while ( pos($bytes) < $len ) { |
57
|
726
|
100
|
|
|
|
2566
|
if ( $bytes =~ /\G([^&]+)/ogc ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
58
|
363
|
|
|
|
|
840
|
$str .= $1; |
59
|
|
|
|
|
|
|
} elsif ( $bytes =~ /\G\&-/ogc ) { |
60
|
0
|
|
|
|
|
0
|
$str .= "&"; |
61
|
|
|
|
|
|
|
} elsif ( $bytes =~ /\G\&([A-Za-z0-9+,]+)-?/ogsc ) { |
62
|
363
|
|
|
|
|
526
|
my $base64 = $1; |
63
|
363
|
|
|
|
|
914
|
$base64 =~ s/,/\//g; |
64
|
363
|
|
|
|
|
682
|
my $pad = length($base64) % 4; |
65
|
363
|
100
|
|
|
|
895
|
$base64 .= "=" x ( 4 - $pad ) if $pad; |
66
|
363
|
|
|
|
|
2632
|
$str .= $e_utf16->decode( decode_base64($base64) ); |
67
|
|
|
|
|
|
|
} elsif ( $bytes =~ /\G\&/ogc ) { |
68
|
0
|
0
|
|
|
|
0
|
$^W and warn "Bad IMAP-UTF7 data escape"; |
69
|
0
|
|
|
|
|
0
|
$str .= "&"; |
70
|
|
|
|
|
|
|
} else { |
71
|
0
|
|
|
|
|
0
|
die "This should not happen " . pos($bytes); |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
} |
74
|
1
|
50
|
|
|
|
4
|
$_[1] = '' if $chk; |
75
|
1
|
|
|
|
|
45
|
return $str; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
1; |
80
|
|
|
|
|
|
|
__END__ |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 NAME |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Encode::IMAPUTF7 - modification of UTF-7 encoding for IMAP |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 SYNOPSIS |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
use Encode qw/encode decode/; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
print encode('IMAP-UTF-7', 'Répertoire'); |
91
|
|
|
|
|
|
|
print decode('IMAP-UTF-7', R&AOk-pertoire'); |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head1 ABSTRACT |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
IMAP mailbox names are encoded in a modified UTF7 when names contains |
96
|
|
|
|
|
|
|
international characters outside of the printable ASCII range. The |
97
|
|
|
|
|
|
|
modified UTF-7 encoding is defined in RFC2060 (section 5.1.3). |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
There is another CPAN module with same purpose, Unicode::IMAPUtf7. However, it |
100
|
|
|
|
|
|
|
works correctly only with strings, which encoded form does not |
101
|
|
|
|
|
|
|
contain plus sign. For example, the Cyrillic string |
102
|
|
|
|
|
|
|
\x{043f}\x{0440}\x{0435}\x{0434}\x{043b}\x{043e}\x{0433} is represented in UTF-7 as |
103
|
|
|
|
|
|
|
+BD8EQAQ1BDQEOwQ+BDM- Note the second plus sign 4 characters before the end. |
104
|
|
|
|
|
|
|
Unicode::IMAPUtf7 encodes the above string as +BD8EQAQ1BDQEOwQ&BDM- |
105
|
|
|
|
|
|
|
which is not valid modified UTF-7 (the ampersand and |
106
|
|
|
|
|
|
|
the plus are swapped). The problem is solved by the current module, |
107
|
|
|
|
|
|
|
which is slightly modified Encode::Unicode::UTF7 and has nothing common with |
108
|
|
|
|
|
|
|
Unicode::IMAPUtf7. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 RFC2060 - section 5.1.3 - Mailbox International Naming Convention |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
By convention, international mailbox names are specified using a |
113
|
|
|
|
|
|
|
modified version of the UTF-7 encoding described in [UTF-7]. The |
114
|
|
|
|
|
|
|
purpose of these modifications is to correct the following problems |
115
|
|
|
|
|
|
|
with UTF-7: |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
1) UTF-7 uses the "+" character for shifting; this conflicts with |
118
|
|
|
|
|
|
|
the common use of "+" in mailbox names, in particular USENET |
119
|
|
|
|
|
|
|
newsgroup names. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
2) UTF-7's encoding is BASE64 which uses the "/" character; this |
122
|
|
|
|
|
|
|
conflicts with the use of "/" as a popular hierarchy delimiter. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with |
125
|
|
|
|
|
|
|
the use of "\" as a popular hierarchy delimiter. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with |
128
|
|
|
|
|
|
|
the use of "~" in some servers as a home directory indicator. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
5) UTF-7 permits multiple alternate forms to represent the same |
131
|
|
|
|
|
|
|
string; in particular, printable US-ASCII chararacters can be |
132
|
|
|
|
|
|
|
represented in encoded form. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
In modified UTF-7, printable US-ASCII characters except for "&" |
135
|
|
|
|
|
|
|
represent themselves; that is, characters with octet values 0x20-0x25 |
136
|
|
|
|
|
|
|
and 0x27-0x7e. The character "&" (0x26) is represented by the two- |
137
|
|
|
|
|
|
|
octet sequence "&-". |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all |
140
|
|
|
|
|
|
|
Unicode 16-bit octets) are represented in modified BASE64, with a |
141
|
|
|
|
|
|
|
further modification from [UTF-7] that "," is used instead of "/". |
142
|
|
|
|
|
|
|
Modified BASE64 MUST NOT be used to represent any printing US-ASCII |
143
|
|
|
|
|
|
|
character which can represent itself. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
"&" is used to shift to modified BASE64 and "-" to shift back to US- |
146
|
|
|
|
|
|
|
ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that |
147
|
|
|
|
|
|
|
is, a name that ends with a Unicode 16-bit octet MUST end with a "- |
148
|
|
|
|
|
|
|
"). |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
For example, here is a mailbox name which mixes English, Japanese, |
151
|
|
|
|
|
|
|
and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw- |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head1 REQUESTS & BUGS |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Please report any requests, suggestions or bugs via the RT bug-tracking system |
156
|
|
|
|
|
|
|
at http://rt.cpan.org/ or email to bug-Encode-IMAPUTF7@rt.cpan.org. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
http://rt.cpan.org/NoAuth/Bugs.html?Dist=Encode-IMAPUTF7 is the RT queue for Encode::IMAPUTF7. |
159
|
|
|
|
|
|
|
Please check to see if your bug has already been reported. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 COPYRIGHT |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Copyright 2005 Sava Chankov |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
Sava Chankov, sava@cpan.org |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
This software may be freely copied and distributed under the same |
168
|
|
|
|
|
|
|
terms and conditions as Perl. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 AUTHORS |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Peter Makholm E<lt>peter@makholm.netE<gt>, current maintainer |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Sava Chankov E<lt>sava@cpan.orgE<gt>, original author |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head1 SEE ALSO |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
perl(1), Encode. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=cut |