File Coverage

blib/lib/Encode/IMAPUTF7.pm
Criterion Covered Total %
statement 44 50 88.0
branch 13 22 59.0
condition n/a
subroutine 7 8 87.5
pod 1 1 100.0
total 65 81 80.2


line stmt bran cond sub pod time code
1             #
2             # $Id: IMAPUTF7.pm 3398 2009-04-21 13:18:16Z makholm $
3             #
4             package Encode::IMAPUTF7;
5 2     2   84491 use strict;
  2         4  
  2         74  
6 2     2   11 no warnings 'redefine';
  2         4  
  2         77  
7 2     2   12 use base qw(Encode::Encoding);
  2         7  
  2         2735  
8             __PACKAGE__->Define('IMAP-UTF-7', 'imap-utf-7');
9             our $VERSION = '1.05';
10 2     2   38421 use MIME::Base64;
  2         1705  
  2         137  
11 2     2   15 use Encode;
  2         4  
  2         1365  
12              
13             #
14             # Algorithms taken from Unicode::String by Gisle Aas
15             # Code directly borrowed from Encode::Unicode::UTF7 by Dan Kogai
16             #
17              
18             # Directly from the definition in RFC2060:
19             # Ampersand (\x26) is represented as a special case
20             my $re_asis = qr/(?:[\x20-\x25\x27-\x7e])/; # printable US-ASCII except "&" represents itself
21             my $re_encoded = qr/(?:[^\x20-\x7e])/; # Everything else are represented by modified base64
22              
23             my $e_utf16 = find_encoding("UTF-16BE");
24              
25 0     0 1 0 sub needs_lines { 1 };
26              
27             sub encode($$;$) {
28 6     6   1079 my ( $obj, $str, $chk ) = @_;
29 6         68 my $len = length($str);
30 6         18 pos($str) = 0;
31 6         12 my $bytes = '';
32 6         20 while ( pos($str) < $len ) {
33 739 100       3492 if ( $str =~ /\G($re_asis+)/ogc ) {
    100          
    50          
34 367         1114 $bytes .= $1;
35             } elsif ( $str =~ /\G&/ogc ) {
36 5         13 $bytes .= "&-";
37             } elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
38 367         642 my $s = $1;
39 367         2452 my $base64 = encode_base64( $e_utf16->encode($s), '' );
40 367         1312 $base64 =~ s/=+$//;
41 367         741 $base64 =~ s/\//,/g;
42 367         1568 $bytes .= "&$base64-";
43             } else {
44 0         0 die "This should not happen! (pos=" . pos($str) . ")";
45             }
46             }
47 6 50       16 $_[1] = '' if $chk;
48 6         74 return $bytes;
49             }
50              
51             sub decode($$;$) {
52 1     1   134 my ( $obj, $bytes, $chk ) = @_;
53 1         114 my $len = length($bytes);
54 1         2 my $str = "";
55 1         3 pos($bytes) = 0;
56 1         6 while ( pos($bytes) < $len ) {
57 726 100       2566 if ( $bytes =~ /\G([^&]+)/ogc ) {
    50          
    50          
    0          
58 363         840 $str .= $1;
59             } elsif ( $bytes =~ /\G\&-/ogc ) {
60 0         0 $str .= "&";
61             } elsif ( $bytes =~ /\G\&([A-Za-z0-9+,]+)-?/ogsc ) {
62 363         526 my $base64 = $1;
63 363         914 $base64 =~ s/,/\//g;
64 363         682 my $pad = length($base64) % 4;
65 363 100       895 $base64 .= "=" x ( 4 - $pad ) if $pad;
66 363         2632 $str .= $e_utf16->decode( decode_base64($base64) );
67             } elsif ( $bytes =~ /\G\&/ogc ) {
68 0 0       0 $^W and warn "Bad IMAP-UTF7 data escape";
69 0         0 $str .= "&";
70             } else {
71 0         0 die "This should not happen " . pos($bytes);
72             }
73             }
74 1 50       4 $_[1] = '' if $chk;
75 1         45 return $str;
76             }
77              
78              
79             1;
80             __END__
81              
82             =head1 NAME
83              
84             Encode::IMAPUTF7 - modification of UTF-7 encoding for IMAP
85              
86             =head1 SYNOPSIS
87              
88             use Encode qw/encode decode/;
89              
90             print encode('IMAP-UTF-7', 'Répertoire');
91             print decode('IMAP-UTF-7', R&AOk-pertoire');
92              
93             =head1 ABSTRACT
94              
95             IMAP mailbox names are encoded in a modified UTF7 when names contains
96             international characters outside of the printable ASCII range. The
97             modified UTF-7 encoding is defined in RFC2060 (section 5.1.3).
98              
99             There is another CPAN module with same purpose, Unicode::IMAPUtf7. However, it
100             works correctly only with strings, which encoded form does not
101             contain plus sign. For example, the Cyrillic string
102             \x{043f}\x{0440}\x{0435}\x{0434}\x{043b}\x{043e}\x{0433} is represented in UTF-7 as
103             +BD8EQAQ1BDQEOwQ+BDM- Note the second plus sign 4 characters before the end.
104             Unicode::IMAPUtf7 encodes the above string as +BD8EQAQ1BDQEOwQ&BDM-
105             which is not valid modified UTF-7 (the ampersand and
106             the plus are swapped). The problem is solved by the current module,
107             which is slightly modified Encode::Unicode::UTF7 and has nothing common with
108             Unicode::IMAPUtf7.
109              
110             =head1 RFC2060 - section 5.1.3 - Mailbox International Naming Convention
111              
112             By convention, international mailbox names are specified using a
113             modified version of the UTF-7 encoding described in [UTF-7]. The
114             purpose of these modifications is to correct the following problems
115             with UTF-7:
116              
117             1) UTF-7 uses the "+" character for shifting; this conflicts with
118             the common use of "+" in mailbox names, in particular USENET
119             newsgroup names.
120              
121             2) UTF-7's encoding is BASE64 which uses the "/" character; this
122             conflicts with the use of "/" as a popular hierarchy delimiter.
123              
124             3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
125             the use of "\" as a popular hierarchy delimiter.
126              
127             4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
128             the use of "~" in some servers as a home directory indicator.
129              
130             5) UTF-7 permits multiple alternate forms to represent the same
131             string; in particular, printable US-ASCII chararacters can be
132             represented in encoded form.
133              
134             In modified UTF-7, printable US-ASCII characters except for "&"
135             represent themselves; that is, characters with octet values 0x20-0x25
136             and 0x27-0x7e. The character "&" (0x26) is represented by the two-
137             octet sequence "&-".
138              
139             All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
140             Unicode 16-bit octets) are represented in modified BASE64, with a
141             further modification from [UTF-7] that "," is used instead of "/".
142             Modified BASE64 MUST NOT be used to represent any printing US-ASCII
143             character which can represent itself.
144              
145             "&" is used to shift to modified BASE64 and "-" to shift back to US-
146             ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that
147             is, a name that ends with a Unicode 16-bit octet MUST end with a "-
148             ").
149              
150             For example, here is a mailbox name which mixes English, Japanese,
151             and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
152              
153             =head1 REQUESTS & BUGS
154              
155             Please report any requests, suggestions or bugs via the RT bug-tracking system
156             at http://rt.cpan.org/ or email to bug-Encode-IMAPUTF7@rt.cpan.org.
157              
158             http://rt.cpan.org/NoAuth/Bugs.html?Dist=Encode-IMAPUTF7 is the RT queue for Encode::IMAPUTF7.
159             Please check to see if your bug has already been reported.
160              
161             =head1 COPYRIGHT
162              
163             Copyright 2005 Sava Chankov
164              
165             Sava Chankov, sava@cpan.org
166              
167             This software may be freely copied and distributed under the same
168             terms and conditions as Perl.
169              
170             =head1 AUTHORS
171              
172             Peter Makholm E<lt>peter@makholm.netE<gt>, current maintainer
173              
174             Sava Chankov E<lt>sava@cpan.orgE<gt>, original author
175              
176             =head1 SEE ALSO
177              
178             perl(1), Encode.
179              
180             =cut