File Coverage

lib/Sisimai/String.pm
Criterion Covered Total %
statement 80 83 96.3
branch 22 28 78.5
condition 20 26 76.9
subroutine 12 12 100.0
pod 5 6 83.3
total 139 155 89.6


line stmt bran cond sub pod time code
1             package Sisimai::String;
2 83     83   56477 use feature ':5.10';
  83         132  
  83         6141  
3 83     83   465 use strict;
  83         166  
  83         1423  
4 83     83   324 use warnings;
  83         154  
  83         1890  
5 83     83   40236 use Encode;
  83         723381  
  83         5515  
6 83     83   35753 use Digest::SHA;
  83         217998  
  83         6214  
7              
8             my $EncodingsC = [qw/big5-eten gb2312/];
9             my $EncodingsE = [qw/iso-8859-1/];
10             my $EncodingsJ = [qw/7bit-jis iso-2022-jp euc-jp shiftjis/];
11 83     83   36177 use Encode::Guess; Encode::Guess->add_suspects(@$EncodingsC, @$EncodingsE, @$EncodingsJ);
  83         284631  
  83         283  
12 17     17 0 188 sub encodenames { return [@$EncodingsC, @$EncodingsE, @$EncodingsJ] };
13              
14             sub token {
15             # Create the message token from an addresser and a recipient
16             # @param [String] addr1 A sender's email address
17             # @param [String] addr2 A recipient's email address
18             # @param [Integer] epoch Machine time of the email bounce
19             # @return [String] Message token(MD5 hex digest) or empty string
20             # if the any argument is missing
21             # @see http://en.wikipedia.org/wiki/ASCII
22             # @see https://metacpan.org/pod/Digest::MD5
23 2938   50 2938 1 66043 my $class = shift || return '';
24 2938   100     5224 my $addr1 = shift || return '';
25 2937   100     5363 my $addr2 = shift || return '';
26 2936   100     5680 my $epoch = shift // return '';
27              
28             # Format: STX(0x02) Sender-Address RS(0x1e) Recipient-Address ETX(0x03)
29 2935         38970 return Digest::SHA::sha1_hex(sprintf("\x02%s\x1e%s\x1e%d\x03", lc $addr1, lc $addr2, $epoch));
30             }
31              
32             sub is_8bit {
33             # The argument is 8-bit text or not
34             # @param [String] argv1 Any string to be checked
35             # @return [Integer] 0: ASCII Characters only
36             # 1: Including 8-bit character
37 2685     2685 1 3936 my $class = shift;
38 2685   50     5043 my $argv1 = shift // return undef;
39              
40 2685 50       6858 return undef unless ref $argv1 eq 'SCALAR';
41 2685 100       9643 return 1 unless $$argv1 =~ /\A[\x00-\x7f]+\z/;
42 2645         7206 return 0;
43             }
44              
45             sub sweep {
46             # Clean the string out
47             # @param [String] argv1 String to be cleaned
48             # @return [Scalar] Cleaned out string
49             # @example Clean up text
50             # sweep(' neko ') #=> 'neko'
51 3085     3085 1 6012 my $class = shift;
52 3085   100     5908 my $argv1 = shift // return undef;
53              
54 3084         4699 chomp $argv1;
55 3084         7188 $argv1 =~ y/ //s;
56 3084         5587 $argv1 =~ y/\t//d;
57 3084 100       9181 $argv1 =~ s/\A //g if index($argv1, ' ') == 0;
58 3084 100       8530 $argv1 =~ s/ \z//g if substr($argv1, -1, 1) eq ' ';
59 3084         6188 $argv1 =~ s/ [-]{2,}[^ \t].+\z//;
60 3084         6946 return $argv1;
61             }
62              
63             sub to_plain {
64             # Convert given HTML text to plain text
65             # @param [Scalar] argv1 HTML text(reference to string)
66             # @param [Integer] loose Loose check flag
67             # @return [Scalar] Plain text(reference to string)
68 24     24 1 5493 my $class = shift;
69 24   50     71 my $argv1 = shift // return \'';
70 24   100     95 my $loose = shift // 0;
71 24 50       91 return \'' unless ref $argv1 eq 'SCALAR';
72              
73 24         43 my $plain = $$argv1;
74 24         60 state $match = {
75             'html' => qr|].+?|sim,
76             'body' => qr|.+.*].+|sim,
77             };
78              
79 24 100 100     1620 if( $loose || $plain =~ $match->{'html'} || $plain =~ $match->{'body'} ) {
      66        
80             # ...
81             # 1. Remove ...
82             # 2. Remove
83             # 3. ... to " http://... "
84             # 4. ... to " Value "
85 23         8336 $plain =~ s|.+||gsim;
86 23         7859 $plain =~ s|.+||gsim;
87 23         5118 $plain =~ s|(.*?)| [$2]($1) |gsim;
88 23         2825 $plain =~ s|(.*?)| [$2](mailto:$1) |gsim;
89              
90 23         9806 $plain =~ s/<[^<@>]+?>\s*/ /g; # Delete HTML tags except
91 23         76 $plain =~ s/</
92 23         53 $plain =~ s/>/>/g; # Convert to right angle brackets
93 23         57 $plain =~ s/&/&/g; # Convert to "&"
94 23         74 $plain =~ s/"/"/g; # Convert to '"'
95 23         43 $plain =~ s/'/'/g; # Convert to "'"
96 23         154 $plain =~ s/ / /g; # Convert to ' '
97              
98 23 50       106 if( length($$argv1) > length($plain) ) {
99 23         186 $plain =~ y/ //s;
100 23         46 $plain .= "\n"
101             }
102             }
103 24         136 return \$plain;
104             }
105              
106             sub to_utf8 {
107             # Convert given string to UTF-8
108             # @param [String] argv1 String to be converted
109             # @param [String] argv2 Encoding name before converting
110             # @return [String] UTF-8 Encoded string
111 60     60 1 125 my $class = shift;
112 60   50     174 my $argv1 = shift || return \'';
113 60         122 my $argv2 = shift;
114              
115 60         113 my $tobeutf8ed = $$argv1;
116 60   50     183 my $encodefrom = lc $argv2 || '';
117 60         89 my $hasencoded = undef;
118 60         517 my $hasguessed = Encode::Guess->guess($tobeutf8ed);
119 60 100       53500 my $encodingto = ref $hasguessed ? lc($hasguessed->name) : '';
120 60         123 state $dontencode = qr/\A(?>utf[-]?8|(?:us[-])?ascii)\z/;
121              
122 60 50       199 if( $encodefrom ) {
123             # The 2nd argument is a encoding name of the 1st argument
124 60         98 while(1) {
125             # Encode a given string when the encoding of the string is neigther
126             # utf8 nor ascii.
127 60 100       440 last if $encodefrom =~ $dontencode;
128 59 100       282 last if $encodingto =~ $dontencode;
129              
130 49         101 eval {
131             # Try to convert the string to UTF-8
132 49         197 Encode::from_to($tobeutf8ed, $encodefrom, 'utf8');
133 49         7504 $hasencoded = 1;
134             };
135 49         105 last;
136             }
137             }
138 60 100       272 return \$tobeutf8ed if $hasencoded;
139 11 50       30 return \$tobeutf8ed unless $encodingto;
140 11 50       92 return \$tobeutf8ed if $encodingto =~ $dontencode;
141              
142             # a. The 2nd argument was not given or failed to convert from $encodefrom to UTF-8
143             # b. Guessed encoding name is available, try to encode using it.
144             # c. Encode a given string when the encoding of the string is neigther utf8 nor ascii.
145 0           eval { Encode::from_to($tobeutf8ed, $encodingto, 'utf8') };
  0            
146 0           return \$tobeutf8ed;
147             }
148              
149             1;
150             __END__