line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Sisimai::String; |
2
|
85
|
|
|
85
|
|
69074
|
use feature ':5.10'; |
|
85
|
|
|
|
|
153
|
|
|
85
|
|
|
|
|
6382
|
|
3
|
85
|
|
|
85
|
|
481
|
use strict; |
|
85
|
|
|
|
|
163
|
|
|
85
|
|
|
|
|
1546
|
|
4
|
85
|
|
|
85
|
|
377
|
use warnings; |
|
85
|
|
|
|
|
162
|
|
|
85
|
|
|
|
|
1925
|
|
5
|
85
|
|
|
85
|
|
47681
|
use Encode; |
|
85
|
|
|
|
|
838973
|
|
|
85
|
|
|
|
|
5879
|
|
6
|
85
|
|
|
85
|
|
41319
|
use Digest::SHA; |
|
85
|
|
|
|
|
250686
|
|
|
85
|
|
|
|
|
6711
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
my $EncodingsC = [qw/big5-eten gb2312/]; |
9
|
|
|
|
|
|
|
my $EncodingsE = [qw/iso-8859-1/]; |
10
|
|
|
|
|
|
|
my $EncodingsJ = [qw/7bit-jis iso-2022-jp euc-jp shiftjis/]; |
11
|
85
|
|
|
85
|
|
39375
|
use Encode::Guess; Encode::Guess->add_suspects(@$EncodingsC, @$EncodingsE, @$EncodingsJ); |
|
85
|
|
|
|
|
329215
|
|
|
85
|
|
|
|
|
314
|
|
12
|
17
|
|
|
17
|
0
|
171
|
sub encodenames { return [@$EncodingsC, @$EncodingsE, @$EncodingsJ] }; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub token { |
15
|
|
|
|
|
|
|
# Create the message token from an addresser and a recipient |
16
|
|
|
|
|
|
|
# @param [String] addr1 A sender's email address |
17
|
|
|
|
|
|
|
# @param [String] addr2 A recipient's email address |
18
|
|
|
|
|
|
|
# @param [Integer] epoch Machine time of the email bounce |
19
|
|
|
|
|
|
|
# @return [String] Message token(MD5 hex digest) or empty string |
20
|
|
|
|
|
|
|
# if the any argument is missing |
21
|
|
|
|
|
|
|
# @see http://en.wikipedia.org/wiki/ASCII |
22
|
|
|
|
|
|
|
# @see https://metacpan.org/pod/Digest::MD5 |
23
|
2968
|
|
50
|
2968
|
1
|
73702
|
my $class = shift || return ''; |
24
|
2968
|
|
100
|
|
|
5738
|
my $addr1 = shift || return ''; |
25
|
2967
|
|
100
|
|
|
5177
|
my $addr2 = shift || return ''; |
26
|
2966
|
|
100
|
|
|
5560
|
my $epoch = shift // return ''; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# Format: STX(0x02) Sender-Address RS(0x1e) Recipient-Address ETX(0x03) |
29
|
2965
|
|
|
|
|
43063
|
return Digest::SHA::sha1_hex(sprintf("\x02%s\x1e%s\x1e%d\x03", lc $addr1, lc $addr2, $epoch)); |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub is_8bit { |
33
|
|
|
|
|
|
|
# The argument is 8-bit text or not |
34
|
|
|
|
|
|
|
# @param [String] argv1 Any string to be checked |
35
|
|
|
|
|
|
|
# @return [Integer] 0: ASCII Characters only |
36
|
|
|
|
|
|
|
# 1: Including 8-bit character |
37
|
2715
|
|
|
2715
|
1
|
3790
|
my $class = shift; |
38
|
2715
|
|
50
|
|
|
5348
|
my $argv1 = shift // return undef; |
39
|
|
|
|
|
|
|
|
40
|
2715
|
50
|
|
|
|
8015
|
return undef unless ref $argv1 eq 'SCALAR'; |
41
|
2715
|
100
|
|
|
|
9538
|
return 1 unless $$argv1 =~ /\A[\x00-\x7f]+\z/; |
42
|
2675
|
|
|
|
|
7595
|
return 0; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub sweep { |
46
|
|
|
|
|
|
|
# Clean the string out |
47
|
|
|
|
|
|
|
# @param [String] argv1 String to be cleaned |
48
|
|
|
|
|
|
|
# @return [Scalar] Cleaned out string |
49
|
|
|
|
|
|
|
# @example Clean up text |
50
|
|
|
|
|
|
|
# sweep(' neko ') #=> 'neko' |
51
|
3115
|
|
|
3115
|
1
|
5163
|
my $class = shift; |
52
|
3115
|
|
100
|
|
|
6104
|
my $argv1 = shift // return undef; |
53
|
|
|
|
|
|
|
|
54
|
3114
|
|
|
|
|
5407
|
chomp $argv1; |
55
|
3114
|
|
|
|
|
8972
|
$argv1 =~ y/ //s; |
56
|
3114
|
|
|
|
|
5104
|
$argv1 =~ y/\t//d; |
57
|
3114
|
100
|
|
|
|
10654
|
$argv1 =~ s/\A //g if index($argv1, ' ') == 0; |
58
|
3114
|
100
|
|
|
|
9508
|
$argv1 =~ s/ \z//g if substr($argv1, -1, 1) eq ' '; |
59
|
3114
|
|
|
|
|
5990
|
$argv1 =~ s/ [-]{2,}[^ \t].+\z//; |
60
|
3114
|
|
|
|
|
8651
|
return $argv1; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub to_plain { |
64
|
|
|
|
|
|
|
# Convert given HTML text to plain text |
65
|
|
|
|
|
|
|
# @param [Scalar] argv1 HTML text(reference to string) |
66
|
|
|
|
|
|
|
# @param [Integer] loose Loose check flag |
67
|
|
|
|
|
|
|
# @return [Scalar] Plain text(reference to string) |
68
|
24
|
|
|
24
|
1
|
6710
|
my $class = shift; |
69
|
24
|
|
50
|
|
|
81
|
my $argv1 = shift // return \''; |
70
|
24
|
|
100
|
|
|
120
|
my $loose = shift // 0; |
71
|
24
|
50
|
|
|
|
80
|
return \'' unless ref $argv1 eq 'SCALAR'; |
72
|
|
|
|
|
|
|
|
73
|
24
|
|
|
|
|
55
|
my $plain = $$argv1; |
74
|
24
|
|
|
|
|
69
|
state $match = { |
75
|
|
|
|
|
|
|
'html' => qr|].+?|sim, |
76
|
|
|
|
|
|
|
'body' => qr|.+.*].+|sim, |
77
|
|
|
|
|
|
|
}; |
78
|
|
|
|
|
|
|
|
79
|
24
|
100
|
100
|
|
|
1997
|
if( $loose || $plain =~ $match->{'html'} || $plain =~ $match->{'body'} ) { |
|
|
|
66
|
|
|
|
|
80
|
|
|
|
|
|
|
# ... |
81
|
|
|
|
|
|
|
# 1. Remove ... |
82
|
|
|
|
|
|
|
# 2. Remove |
83
|
|
|
|
|
|
|
# 3. ... to " http://... " |
84
|
|
|
|
|
|
|
# 4. ... to " Value " |
85
|
23
|
|
|
|
|
4134
|
$plain =~ s|.*?||gsim; |
86
|
23
|
|
|
|
|
4565
|
$plain =~ s|.*?||gsim; |
87
|
23
|
|
|
|
|
6155
|
$plain =~ s|(.*?)| [$2]($1) |gsim; |
88
|
23
|
|
|
|
|
3446
|
$plain =~ s|(.*?)| [$2](mailto:$1) |gsim; |
89
|
|
|
|
|
|
|
|
90
|
23
|
|
|
|
|
12099
|
$plain =~ s/<[^<@>]+?>\s*/ /g; # Delete HTML tags except |
91
|
23
|
|
|
|
|
100
|
$plain =~ s/</
|
92
|
23
|
|
|
|
|
57
|
$plain =~ s/>/>/g; # Convert to right angle brackets |
93
|
23
|
|
|
|
|
59
|
$plain =~ s/&/&/g; # Convert to "&" |
94
|
23
|
|
|
|
|
71
|
$plain =~ s/"/"/g; # Convert to '"' |
95
|
23
|
|
|
|
|
49
|
$plain =~ s/'/'/g; # Convert to "'" |
96
|
23
|
|
|
|
|
170
|
$plain =~ s/ / /g; # Convert to ' ' |
97
|
|
|
|
|
|
|
|
98
|
23
|
50
|
|
|
|
105
|
if( length($$argv1) > length($plain) ) { |
99
|
23
|
|
|
|
|
221
|
$plain =~ y/ //s; |
100
|
23
|
|
|
|
|
54
|
$plain .= "\n" |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
24
|
|
|
|
|
132
|
return \$plain; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub to_utf8 { |
107
|
|
|
|
|
|
|
# Convert given string to UTF-8 |
108
|
|
|
|
|
|
|
# @param [String] argv1 String to be converted |
109
|
|
|
|
|
|
|
# @param [String] argv2 Encoding name before converting |
110
|
|
|
|
|
|
|
# @return [String] UTF-8 Encoded string |
111
|
60
|
|
|
60
|
1
|
153
|
my $class = shift; |
112
|
60
|
|
50
|
|
|
252
|
my $argv1 = shift || return \''; |
113
|
60
|
|
|
|
|
140
|
my $argv2 = shift; |
114
|
|
|
|
|
|
|
|
115
|
60
|
|
|
|
|
111
|
my $tobeutf8ed = $$argv1; |
116
|
60
|
|
50
|
|
|
206
|
my $encodefrom = lc $argv2 || ''; |
117
|
60
|
|
|
|
|
95
|
my $hasencoded = undef; |
118
|
60
|
|
|
|
|
515
|
my $hasguessed = Encode::Guess->guess($tobeutf8ed); |
119
|
60
|
100
|
|
|
|
65507
|
my $encodingto = ref $hasguessed ? lc($hasguessed->name) : ''; |
120
|
60
|
|
|
|
|
135
|
state $dontencode = qr/\A(?>utf[-]?8|(?:us[-])?ascii)\z/; |
121
|
|
|
|
|
|
|
|
122
|
60
|
50
|
|
|
|
187
|
if( $encodefrom ) { |
123
|
|
|
|
|
|
|
# The 2nd argument is a encoding name of the 1st argument |
124
|
60
|
|
|
|
|
117
|
while(1) { |
125
|
|
|
|
|
|
|
# Encode a given string when the encoding of the string is neigther |
126
|
|
|
|
|
|
|
# utf8 nor ascii. |
127
|
60
|
100
|
|
|
|
494
|
last if $encodefrom =~ $dontencode; |
128
|
59
|
100
|
|
|
|
298
|
last if $encodingto =~ $dontencode; |
129
|
|
|
|
|
|
|
|
130
|
49
|
|
|
|
|
105
|
eval { |
131
|
|
|
|
|
|
|
# Try to convert the string to UTF-8 |
132
|
49
|
|
|
|
|
214
|
Encode::from_to($tobeutf8ed, $encodefrom, 'utf8'); |
133
|
49
|
|
|
|
|
9312
|
$hasencoded = 1; |
134
|
|
|
|
|
|
|
}; |
135
|
49
|
|
|
|
|
121
|
last; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
60
|
100
|
|
|
|
337
|
return \$tobeutf8ed if $hasencoded; |
139
|
11
|
50
|
|
|
|
37
|
return \$tobeutf8ed unless $encodingto; |
140
|
11
|
50
|
|
|
|
116
|
return \$tobeutf8ed if $encodingto =~ $dontencode; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# a. The 2nd argument was not given or failed to convert from $encodefrom to UTF-8 |
143
|
|
|
|
|
|
|
# b. Guessed encoding name is available, try to encode using it. |
144
|
|
|
|
|
|
|
# c. Encode a given string when the encoding of the string is neigther utf8 nor ascii. |
145
|
0
|
|
|
|
|
|
eval { Encode::from_to($tobeutf8ed, $encodingto, 'utf8') }; |
|
0
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
return \$tobeutf8ed; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
1; |
150
|
|
|
|
|
|
|
__END__ |