File Coverage

lib/Sisimai/String.pm
Criterion Covered Total %
statement 80 83 96.3
branch 22 28 78.5
condition 20 26 76.9
subroutine 12 12 100.0
pod 5 6 83.3
total 139 155 89.6


line stmt bran cond sub pod time code
1             package Sisimai::String;
2 85     85   71119 use feature ':5.10';
  85         176  
  85         6305  
3 85     85   494 use strict;
  85         194  
  85         1630  
4 85     85   358 use warnings;
  85         185  
  85         2019  
5 85     85   49984 use Encode;
  85         877035  
  85         6052  
6 85     85   45098 use Digest::SHA;
  85         264556  
  85         7434  
7              
8             my $EncodingsC = [qw/big5-eten gb2312/];
9             my $EncodingsE = [qw/iso-8859-1/];
10             my $EncodingsJ = [qw/7bit-jis iso-2022-jp euc-jp shiftjis/];
11 85     85   41493 use Encode::Guess; Encode::Guess->add_suspects(@$EncodingsC, @$EncodingsE, @$EncodingsJ);
  85         346109  
  85         318  
12 17     17 0 193 sub encodenames { return [@$EncodingsC, @$EncodingsE, @$EncodingsJ] };
13              
14             sub token {
15             # Create the message token from an addresser and a recipient
16             # @param [String] addr1 A sender's email address
17             # @param [String] addr2 A recipient's email address
18             # @param [Integer] epoch Machine time of the email bounce
19             # @return [String] Message token(MD5 hex digest) or empty string
20             # if the any argument is missing
21             # @see http://en.wikipedia.org/wiki/ASCII
22             # @see https://metacpan.org/pod/Digest::MD5
23 2968   50 2968 1 75180 my $class = shift || return '';
24 2968   100     6552 my $addr1 = shift || return '';
25 2967   100     5868 my $addr2 = shift || return '';
26 2966   100     5270 my $epoch = shift // return '';
27              
28             # Format: STX(0x02) Sender-Address RS(0x1e) Recipient-Address ETX(0x03)
29 2965         41570 return Digest::SHA::sha1_hex(sprintf("\x02%s\x1e%s\x1e%d\x03", lc $addr1, lc $addr2, $epoch));
30             }
31              
32             sub is_8bit {
33             # The argument is 8-bit text or not
34             # @param [String] argv1 Any string to be checked
35             # @return [Integer] 0: ASCII Characters only
36             # 1: Including 8-bit character
37 2710     2710 1 4279 my $class = shift;
38 2710   50     5438 my $argv1 = shift // return undef;
39              
40 2710 50       6593 return undef unless ref $argv1 eq 'SCALAR';
41 2710 100       9021 return 1 unless $$argv1 =~ /\A[\x00-\x7f]+\z/;
42 2670         7369 return 0;
43             }
44              
45             sub sweep {
46             # Clean the string out
47             # @param [String] argv1 String to be cleaned
48             # @return [Scalar] Cleaned out string
49             # @example Clean up text
50             # sweep(' neko ') #=> 'neko'
51 3115     3115 1 4972 my $class = shift;
52 3115   100     6418 my $argv1 = shift // return undef;
53              
54 3114         4812 chomp $argv1;
55 3114         8128 $argv1 =~ y/ //s;
56 3114         5127 $argv1 =~ y/\t//d;
57 3114 100       10512 $argv1 =~ s/\A //g if index($argv1, ' ') == 0;
58 3114 100       9307 $argv1 =~ s/ \z//g if substr($argv1, -1, 1) eq ' ';
59 3114         5953 $argv1 =~ s/ [-]{2,}[^ \t].+\z//;
60 3114         9081 return $argv1;
61             }
62              
63             sub to_plain {
64             # Convert given HTML text to plain text
65             # @param [Scalar] argv1 HTML text(reference to string)
66             # @param [Integer] loose Loose check flag
67             # @return [Scalar] Plain text(reference to string)
68 3     3 1 7109 my $class = shift;
69 3   50     9 my $argv1 = shift // return \'';
70 3   100     16 my $loose = shift // 0;
71 3 50       8 return \'' unless ref $argv1 eq 'SCALAR';
72              
73 3         6 my $plain = $$argv1;
74 3         12 state $match = {
75             'html' => qr|].+?|sim,
76             'body' => qr|.+.*].+|sim,
77             };
78              
79 3 100 100     68 if( $loose || $plain =~ $match->{'html'} || $plain =~ $match->{'body'} ) {
      66        
80             # ...
81             # 1. Remove ...
82             # 2. Remove
83             # 3. ... to " http://... "
84             # 4. ... to " Value "
85 2         12 $plain =~ s|.*?||gsim;
86 2         8 $plain =~ s|.*?||gsim;
87 2         18 $plain =~ s|(.*?)| [$2]($1) |gsim;
88 2         14 $plain =~ s|(.*?)| [$2](mailto:$1) |gsim;
89              
90 2         19 $plain =~ s/<[^<@>]+?>\s*/ /g; # Delete HTML tags except
91 2         5 $plain =~ s/</
92 2         4 $plain =~ s/>/>/g; # Convert to right angle brackets
93 2         3 $plain =~ s/&/&/g; # Convert to "&"
94 2         3 $plain =~ s/"/"/g; # Convert to '"'
95 2         3 $plain =~ s/'/'/g; # Convert to "'"
96 2         3 $plain =~ s/ / /g; # Convert to ' '
97              
98 2 50       6 if( length($$argv1) > length($plain) ) {
99 2         5 $plain =~ y/ //s;
100 2         4 $plain .= "\n"
101             }
102             }
103 3         12 return \$plain;
104             }
105              
106             sub to_utf8 {
107             # Convert given string to UTF-8
108             # @param [String] argv1 String to be converted
109             # @param [String] argv2 Encoding name before converting
110             # @return [String] UTF-8 Encoded string
111 60     60 1 123 my $class = shift;
112 60   50     172 my $argv1 = shift || return \'';
113 60         119 my $argv2 = shift;
114              
115 60         109 my $tobeutf8ed = $$argv1;
116 60   50     202 my $encodefrom = lc $argv2 || '';
117 60         98 my $hasencoded = undef;
118 60         551 my $hasguessed = Encode::Guess->guess($tobeutf8ed);
119 60 100       67048 my $encodingto = ref $hasguessed ? lc($hasguessed->name) : '';
120 60         169 state $dontencode = qr/\A(?>utf[-]?8|(?:us[-])?ascii)\z/;
121              
122 60 50       181 if( $encodefrom ) {
123             # The 2nd argument is a encoding name of the 1st argument
124 60         87 while(1) {
125             # Encode a given string when the encoding of the string is neigther
126             # utf8 nor ascii.
127 60 100       508 last if $encodefrom =~ $dontencode;
128 59 100       303 last if $encodingto =~ $dontencode;
129              
130 49         89 eval {
131             # Try to convert the string to UTF-8
132 49         228 Encode::from_to($tobeutf8ed, $encodefrom, 'utf8');
133 49         9332 $hasencoded = 1;
134             };
135 49         113 last;
136             }
137             }
138 60 100       280 return \$tobeutf8ed if $hasencoded;
139 11 50       51 return \$tobeutf8ed unless $encodingto;
140 11 50       106 return \$tobeutf8ed if $encodingto =~ $dontencode;
141              
142             # a. The 2nd argument was not given or failed to convert from $encodefrom to UTF-8
143             # b. Guessed encoding name is available, try to encode using it.
144             # c. Encode a given string when the encoding of the string is neigther utf8 nor ascii.
145 0           eval { Encode::from_to($tobeutf8ed, $encodingto, 'utf8') };
  0            
146 0           return \$tobeutf8ed;
147             }
148              
149             1;
150             __END__