| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Sisimai::String; |
|
2
|
83
|
|
|
83
|
|
56477
|
use feature ':5.10'; |
|
|
83
|
|
|
|
|
132
|
|
|
|
83
|
|
|
|
|
6141
|
|
|
3
|
83
|
|
|
83
|
|
465
|
use strict; |
|
|
83
|
|
|
|
|
166
|
|
|
|
83
|
|
|
|
|
1423
|
|
|
4
|
83
|
|
|
83
|
|
324
|
use warnings; |
|
|
83
|
|
|
|
|
154
|
|
|
|
83
|
|
|
|
|
1890
|
|
|
5
|
83
|
|
|
83
|
|
40236
|
use Encode; |
|
|
83
|
|
|
|
|
723381
|
|
|
|
83
|
|
|
|
|
5515
|
|
|
6
|
83
|
|
|
83
|
|
35753
|
use Digest::SHA; |
|
|
83
|
|
|
|
|
217998
|
|
|
|
83
|
|
|
|
|
6214
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
my $EncodingsC = [qw/big5-eten gb2312/]; |
|
9
|
|
|
|
|
|
|
my $EncodingsE = [qw/iso-8859-1/]; |
|
10
|
|
|
|
|
|
|
my $EncodingsJ = [qw/7bit-jis iso-2022-jp euc-jp shiftjis/]; |
|
11
|
83
|
|
|
83
|
|
36177
|
use Encode::Guess; Encode::Guess->add_suspects(@$EncodingsC, @$EncodingsE, @$EncodingsJ); |
|
|
83
|
|
|
|
|
284631
|
|
|
|
83
|
|
|
|
|
283
|
|
|
12
|
17
|
|
|
17
|
0
|
188
|
sub encodenames { return [@$EncodingsC, @$EncodingsE, @$EncodingsJ] }; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub token { |
|
15
|
|
|
|
|
|
|
# Create the message token from an addresser and a recipient |
|
16
|
|
|
|
|
|
|
# @param [String] addr1 A sender's email address |
|
17
|
|
|
|
|
|
|
# @param [String] addr2 A recipient's email address |
|
18
|
|
|
|
|
|
|
# @param [Integer] epoch Machine time of the email bounce |
|
19
|
|
|
|
|
|
|
# @return [String] Message token(MD5 hex digest) or empty string |
|
20
|
|
|
|
|
|
|
# if the any argument is missing |
|
21
|
|
|
|
|
|
|
# @see http://en.wikipedia.org/wiki/ASCII |
|
22
|
|
|
|
|
|
|
# @see https://metacpan.org/pod/Digest::MD5 |
|
23
|
2938
|
|
50
|
2938
|
1
|
66043
|
my $class = shift || return ''; |
|
24
|
2938
|
|
100
|
|
|
5224
|
my $addr1 = shift || return ''; |
|
25
|
2937
|
|
100
|
|
|
5363
|
my $addr2 = shift || return ''; |
|
26
|
2936
|
|
100
|
|
|
5680
|
my $epoch = shift // return ''; |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# Format: STX(0x02) Sender-Address RS(0x1e) Recipient-Address ETX(0x03) |
|
29
|
2935
|
|
|
|
|
38970
|
return Digest::SHA::sha1_hex(sprintf("\x02%s\x1e%s\x1e%d\x03", lc $addr1, lc $addr2, $epoch)); |
|
30
|
|
|
|
|
|
|
} |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub is_8bit { |
|
33
|
|
|
|
|
|
|
# The argument is 8-bit text or not |
|
34
|
|
|
|
|
|
|
# @param [String] argv1 Any string to be checked |
|
35
|
|
|
|
|
|
|
# @return [Integer] 0: ASCII Characters only |
|
36
|
|
|
|
|
|
|
# 1: Including 8-bit character |
|
37
|
2685
|
|
|
2685
|
1
|
3936
|
my $class = shift; |
|
38
|
2685
|
|
50
|
|
|
5043
|
my $argv1 = shift // return undef; |
|
39
|
|
|
|
|
|
|
|
|
40
|
2685
|
50
|
|
|
|
6858
|
return undef unless ref $argv1 eq 'SCALAR'; |
|
41
|
2685
|
100
|
|
|
|
9643
|
return 1 unless $$argv1 =~ /\A[\x00-\x7f]+\z/; |
|
42
|
2645
|
|
|
|
|
7206
|
return 0; |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub sweep { |
|
46
|
|
|
|
|
|
|
# Clean the string out |
|
47
|
|
|
|
|
|
|
# @param [String] argv1 String to be cleaned |
|
48
|
|
|
|
|
|
|
# @return [Scalar] Cleaned out string |
|
49
|
|
|
|
|
|
|
# @example Clean up text |
|
50
|
|
|
|
|
|
|
# sweep(' neko ') #=> 'neko' |
|
51
|
3085
|
|
|
3085
|
1
|
6012
|
my $class = shift; |
|
52
|
3085
|
|
100
|
|
|
5908
|
my $argv1 = shift // return undef; |
|
53
|
|
|
|
|
|
|
|
|
54
|
3084
|
|
|
|
|
4699
|
chomp $argv1; |
|
55
|
3084
|
|
|
|
|
7188
|
$argv1 =~ y/ //s; |
|
56
|
3084
|
|
|
|
|
5587
|
$argv1 =~ y/\t//d; |
|
57
|
3084
|
100
|
|
|
|
9181
|
$argv1 =~ s/\A //g if index($argv1, ' ') == 0; |
|
58
|
3084
|
100
|
|
|
|
8530
|
$argv1 =~ s/ \z//g if substr($argv1, -1, 1) eq ' '; |
|
59
|
3084
|
|
|
|
|
6188
|
$argv1 =~ s/ [-]{2,}[^ \t].+\z//; |
|
60
|
3084
|
|
|
|
|
6946
|
return $argv1; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub to_plain { |
|
64
|
|
|
|
|
|
|
# Convert given HTML text to plain text |
|
65
|
|
|
|
|
|
|
# @param [Scalar] argv1 HTML text(reference to string) |
|
66
|
|
|
|
|
|
|
# @param [Integer] loose Loose check flag |
|
67
|
|
|
|
|
|
|
# @return [Scalar] Plain text(reference to string) |
|
68
|
24
|
|
|
24
|
1
|
5493
|
my $class = shift; |
|
69
|
24
|
|
50
|
|
|
71
|
my $argv1 = shift // return \''; |
|
70
|
24
|
|
100
|
|
|
95
|
my $loose = shift // 0; |
|
71
|
24
|
50
|
|
|
|
91
|
return \'' unless ref $argv1 eq 'SCALAR'; |
|
72
|
|
|
|
|
|
|
|
|
73
|
24
|
|
|
|
|
43
|
my $plain = $$argv1; |
|
74
|
24
|
|
|
|
|
60
|
state $match = { |
|
75
|
|
|
|
|
|
|
'html' => qr|].+?|sim, |
|
76
|
|
|
|
|
|
|
'body' => qr|.+.*].+|sim, |
|
77
|
|
|
|
|
|
|
}; |
|
78
|
|
|
|
|
|
|
|
|
79
|
24
|
100
|
100
|
|
|
1620
|
if( $loose || $plain =~ $match->{'html'} || $plain =~ $match->{'body'} ) { |
|
|
|
|
66
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# ... |
|
81
|
|
|
|
|
|
|
# 1. Remove ... |
|
82
|
|
|
|
|
|
|
# 2. Remove |
|
83
|
|
|
|
|
|
|
# 3. ... to " http://... " |
|
84
|
|
|
|
|
|
|
# 4. ... to " Value " |
|
85
|
23
|
|
|
|
|
8336
|
$plain =~ s|.+||gsim; |
|
86
|
23
|
|
|
|
|
7859
|
$plain =~ s|.+||gsim; |
|
87
|
23
|
|
|
|
|
5118
|
$plain =~ s|(.*?)| [$2]($1) |gsim; |
|
88
|
23
|
|
|
|
|
2825
|
$plain =~ s|(.*?)| [$2](mailto:$1) |gsim; |
|
89
|
|
|
|
|
|
|
|
|
90
|
23
|
|
|
|
|
9806
|
$plain =~ s/<[^<@>]+?>\s*/ /g; # Delete HTML tags except |
|
91
|
23
|
|
|
|
|
76
|
$plain =~ s/</
|
|
92
|
23
|
|
|
|
|
53
|
$plain =~ s/>/>/g; # Convert to right angle brackets |
|
93
|
23
|
|
|
|
|
57
|
$plain =~ s/&/&/g; # Convert to "&" |
|
94
|
23
|
|
|
|
|
74
|
$plain =~ s/"/"/g; # Convert to '"' |
|
95
|
23
|
|
|
|
|
43
|
$plain =~ s/'/'/g; # Convert to "'" |
|
96
|
23
|
|
|
|
|
154
|
$plain =~ s/ / /g; # Convert to ' ' |
|
97
|
|
|
|
|
|
|
|
|
98
|
23
|
50
|
|
|
|
106
|
if( length($$argv1) > length($plain) ) { |
|
99
|
23
|
|
|
|
|
186
|
$plain =~ y/ //s; |
|
100
|
23
|
|
|
|
|
46
|
$plain .= "\n" |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
} |
|
103
|
24
|
|
|
|
|
136
|
return \$plain; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub to_utf8 { |
|
107
|
|
|
|
|
|
|
# Convert given string to UTF-8 |
|
108
|
|
|
|
|
|
|
# @param [String] argv1 String to be converted |
|
109
|
|
|
|
|
|
|
# @param [String] argv2 Encoding name before converting |
|
110
|
|
|
|
|
|
|
# @return [String] UTF-8 Encoded string |
|
111
|
60
|
|
|
60
|
1
|
125
|
my $class = shift; |
|
112
|
60
|
|
50
|
|
|
174
|
my $argv1 = shift || return \''; |
|
113
|
60
|
|
|
|
|
122
|
my $argv2 = shift; |
|
114
|
|
|
|
|
|
|
|
|
115
|
60
|
|
|
|
|
113
|
my $tobeutf8ed = $$argv1; |
|
116
|
60
|
|
50
|
|
|
183
|
my $encodefrom = lc $argv2 || ''; |
|
117
|
60
|
|
|
|
|
89
|
my $hasencoded = undef; |
|
118
|
60
|
|
|
|
|
517
|
my $hasguessed = Encode::Guess->guess($tobeutf8ed); |
|
119
|
60
|
100
|
|
|
|
53500
|
my $encodingto = ref $hasguessed ? lc($hasguessed->name) : ''; |
|
120
|
60
|
|
|
|
|
123
|
state $dontencode = qr/\A(?>utf[-]?8|(?:us[-])?ascii)\z/; |
|
121
|
|
|
|
|
|
|
|
|
122
|
60
|
50
|
|
|
|
199
|
if( $encodefrom ) { |
|
123
|
|
|
|
|
|
|
# The 2nd argument is a encoding name of the 1st argument |
|
124
|
60
|
|
|
|
|
98
|
while(1) { |
|
125
|
|
|
|
|
|
|
# Encode a given string when the encoding of the string is neigther |
|
126
|
|
|
|
|
|
|
# utf8 nor ascii. |
|
127
|
60
|
100
|
|
|
|
440
|
last if $encodefrom =~ $dontencode; |
|
128
|
59
|
100
|
|
|
|
282
|
last if $encodingto =~ $dontencode; |
|
129
|
|
|
|
|
|
|
|
|
130
|
49
|
|
|
|
|
101
|
eval { |
|
131
|
|
|
|
|
|
|
# Try to convert the string to UTF-8 |
|
132
|
49
|
|
|
|
|
197
|
Encode::from_to($tobeutf8ed, $encodefrom, 'utf8'); |
|
133
|
49
|
|
|
|
|
7504
|
$hasencoded = 1; |
|
134
|
|
|
|
|
|
|
}; |
|
135
|
49
|
|
|
|
|
105
|
last; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
} |
|
138
|
60
|
100
|
|
|
|
272
|
return \$tobeutf8ed if $hasencoded; |
|
139
|
11
|
50
|
|
|
|
30
|
return \$tobeutf8ed unless $encodingto; |
|
140
|
11
|
50
|
|
|
|
92
|
return \$tobeutf8ed if $encodingto =~ $dontencode; |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# a. The 2nd argument was not given or failed to convert from $encodefrom to UTF-8 |
|
143
|
|
|
|
|
|
|
# b. Guessed encoding name is available, try to encode using it. |
|
144
|
|
|
|
|
|
|
# c. Encode a given string when the encoding of the string is neigther utf8 nor ascii. |
|
145
|
0
|
|
|
|
|
|
eval { Encode::from_to($tobeutf8ed, $encodingto, 'utf8') }; |
|
|
0
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
return \$tobeutf8ed; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
1; |
|
150
|
|
|
|
|
|
|
__END__ |