File Coverage

lib/Sisimai/Message.pm
Criterion Covered Total %
statement 189 196 96.4
branch 76 100 76.0
condition 26 35 74.2
subroutine 15 15 100.0
pod 1 5 20.0
total 307 351 87.4


line stmt bran cond sub pod time code
1             package Sisimai::Message;
2 80     80   153995 use feature ':5.10';
  80         170  
  80         5644  
3 80     80   394 use strict;
  80         120  
  80         1277  
4 80     80   316 use warnings;
  80         113  
  80         1730  
5 80     80   20517 use Sisimai::RFC5322;
  80         157  
  80         2238  
6 80     80   24286 use Sisimai::Address;
  80         185  
  80         2400  
7 80     80   21204 use Sisimai::String;
  80         235  
  80         2891  
8 80     80   29795 use Sisimai::Order;
  80         179  
  80         2100  
9 80     80   435 use Sisimai::Lhost;
  80         144  
  80         1207  
10 80     80   29354 use Sisimai::MIME;
  80         225  
  80         3709  
11             use Class::Accessor::Lite (
12 80         772 'new' => 0,
13             'rw' => [
14             'from', # [String] UNIX From line
15             'header', # [Hash] Header part of an email
16             'ds', # [Array] Parsed data by Sisimai::Lhost
17             'rfc822', # [Hash] Header part of the original message
18             'catch' # [Any] The results returned by hook method
19             ]
20 80     80   580 );
  80         143  
21              
22             my $ToBeLoaded = [];
23             my $TryOnFirst = [];
24              
25             sub new {
26             # Constructor of Sisimai::Message
27             # @param [Hash] argvs Email text data
28             # @options argvs [String] data Entire email message
29             # @options argvs [Array] load User defined MTA module list
30             # @options argvs [Array] order The order of MTA modules
31             # @options argvs [Code] hook Reference to callback method
32             # @return [Sisimai::Message] Structured email data or Undef if each
33             # value of the arguments are missing
34 2792     2792 1 19410 my $class = shift;
35 2792         6444 my $argvs = { @_ };
36 2792         3960 my $param = {};
37 2792   50     6621 my $email = $argvs->{'data'} || return undef;
38 2792         12422 my $thing = { 'from' => '', 'header' => {}, 'rfc822' => '', 'ds' => [], 'catch' => undef };
39              
40             # 1. Load specified MTA modules
41 2792         5820 for my $e ('load', 'order') {
42             # Order of MTA modules
43 5584 100       11979 next unless exists $argvs->{ $e };
44 1 50       5 next unless ref $argvs->{ $e } eq 'ARRAY';
45 1 50       2 next unless scalar @{ $argvs->{ $e } };
  1         3  
46 1         2 $param->{ $e } = $argvs->{ $e };
47             }
48 2792         8235 $ToBeLoaded = __PACKAGE__->load(%$param);
49              
50             # 2. Split email data to headers and a body part.
51 2792 50       7412 return undef unless my $aftersplit = __PACKAGE__->divideup(\$email);
52              
53             # 3. Convert email headers from text to hash reference
54 2792         5427 $thing->{'from'} = $aftersplit->[0];
55 2792         8036 $thing->{'header'} = __PACKAGE__->makemap(\$aftersplit->[1]);
56              
57             # 4. Decode and rewrite the "Subject:" header
58 2792 50       6299 if( $thing->{'header'}->{'subject'} ) {
59             # Decode MIME-Encoded "Subject:" header
60 2792         4466 my $s = $thing->{'header'}->{'subject'};
61 2792 100       14955 my $q = Sisimai::MIME->is_mimeencoded(\$s) ? Sisimai::MIME->mimedecode([split(/[ ]/, $s)]) : $s;
62              
63             # Remove "Fwd:" string from the "Subject:" header
64 2792 100       8272 if( lc($q) =~ /\A[ \t]*fwd?:[ ]*(.*)\z/ ) {
65             # Delete quoted strings, quote symbols(>)
66 29         99 $q = $1;
67 29         299 $aftersplit->[2] =~ s/^[>]+[ ]//gm;
68 29         136 $aftersplit->[2] =~ s/^[>]$//gm;
69             }
70 2792         5613 $thing->{'header'}->{'subject'} = $q;
71             }
72              
73             # 5. Rewrite message body for detecting the bounce reason
74 2792         14027 $TryOnFirst = Sisimai::Order->make($thing->{'header'}->{'subject'});
75 2792   100     15513 $param = { 'hook' => $argvs->{'hook'} || undef, 'mail' => $thing, 'body' => \$aftersplit->[2] };
76 2792 100       10474 return undef unless my $bouncedata = __PACKAGE__->parse(%$param);
77 2783 50       8102 return undef unless keys %$bouncedata;
78              
79             # 6. Rewrite headers of the original message in the body part
80 2783         11320 $thing->{ $_ } = $bouncedata->{ $_ } for ('ds', 'catch', 'rfc822');
81 2783   66     7813 my $r = $bouncedata->{'rfc822'} || $aftersplit->[2];
82 2783 100       10073 $thing->{'rfc822'} = ref $r ? $r : __PACKAGE__->makemap(\$r, 1);
83              
84 2783         18215 return bless($thing, $class);
85             }
86              
87             sub load {
88             # Load MTA modules which specified at 'order' and 'load' in the argument
89             # @param [Hash] argvs Module information to be loaded
90             # @options argvs [Array] load User defined MTA module list
91             # @options argvs [Array] order The order of MTA modules
92             # @return [Array] Module list
93             # @since v4.20.0
94 2793     2793 0 4847 my $class = shift;
95 2793         4037 my $argvs = { @_ };
96              
97 2793         3606 my @modulelist;
98 2793         3637 my $tobeloaded = [];
99              
100 2793         4060 for my $e ('load', 'order') {
101             # The order of MTA modules specified by user
102 5586 100       10224 next unless exists $argvs->{ $e };
103 1 50       3 next unless ref $argvs->{ $e } eq 'ARRAY';
104 1 50       1 next unless scalar @{ $argvs->{ $e } };
  1         3  
105              
106 1 50       2 push @modulelist, @{ $argvs->{'order'} } if $e eq 'order';
  1         3  
107 1 50       2 next unless $e eq 'load';
108              
109             # Load user defined MTA module
110 0         0 for my $v ( @{ $argvs->{'load'} } ) {
  0         0  
111             # Load user defined MTA module
112 0         0 eval {
113 0         0 (my $modulepath = $v) =~ s|::|/|g;
114 0         0 require $modulepath.'.pm';
115             };
116 0 0       0 next if $@;
117 0         0 push @$tobeloaded, $v;
118             }
119             }
120              
121 2793         4376 for my $e ( @modulelist ) {
122             # Append the custom order of MTA modules
123 6 50       7 next if grep { $e eq $_ } @$tobeloaded;
  15         18  
124 6         8 push @$tobeloaded, $e;
125             }
126 2793         6584 return $tobeloaded;
127             }
128              
129             sub divideup {
130             # Divide email data up headers and a body part.
131             # @param [String] email Email data
132             # @return [Array] Email data after split
133             # @since v4.14.0
134 2792     2792 0 3823 my $class = shift;
135 2792   50     5186 my $email = shift // return undef;
136 2792         6445 my $block = ['', '', '']; # 0:From, 1:Header, 2:Body
137              
138 2792 100       26349 $$email =~ s/\r\n/\n/gm if rindex($$email, "\r\n") > -1;
139 2792 50       108053 $$email =~ s/[ \t]+$//gm if $$email =~ /[ \t]+$/;
140              
141 2792         20814 ($block->[1], $block->[2]) = split(/\n\n/, $$email, 2);
142 2792 50       6546 return undef unless $block->[1];
143 2792 50       5324 return undef unless $block->[2];
144              
145 2792 100       7303 if( substr($block->[1], 0, 5) eq 'From ' ) {
146             # From MAILER-DAEMON Tue Feb 11 00:00:00 2014
147 387         1457 $block->[0] = [split(/\n/, $block->[1], 2)]->[0];
148 387         1036 $block->[0] =~ y/\r\n//d;
149             } else {
150             # Set pseudo UNIX From line
151 2405         3835 $block->[0] = 'MAILER-DAEMON Tue Feb 11 00:00:00 2014';
152             }
153              
154 2792 50       10774 $block->[1] .= "\n" unless $block->[1] =~ /\n\z/;
155 2792         6777 $block->[2] .= "\n";
156 2792         6577 return $block;
157             }
158              
159             sub makemap {
160             # Convert a text including email headers to a hash reference
161             # @param [String] argv0 Email header data
162             # @param [Bool] argv1 Decode "Subject:" header
163             # @return [Hash] Structured email header data
164             # @since v4.25.6
165 5550     5550 0 9041 my $class = shift;
166 5550   50     12294 my $argv0 = shift || return {};
167 5550   100     11564 my $argv1 = shift || 0;
168              
169 5550         10983 $$argv0 =~ s/^[>]+[ ]//mg; # Remove '>' indent symbol of forwarded message
170              
171             # Select and convert all the headers in $argv0. The following regular expression
172             # is based on https://gist.github.com/xtetsuji/b080e1f5551d17242f6415aba8a00239
173 5550         145847 my $firstpairs = { $$argv0 =~ /^([\w-]+):[ ]*(.*?)\n(?![\s\t])/gms };
174 5550         16277 my $headermaps = { 'subject' => '' };
175 5550         7511 my $recvheader = [];
176              
177              
178 5550         64518 $headermaps->{ lc $_ } = $firstpairs->{ $_ } for keys %$firstpairs;
179 5550         15514 for my $e ( values %$headermaps ) { $e =~ s/\n\s+/ /; $e =~ y/\t / /s }
  66642         80153  
  66642         83551  
180              
181 5550 100       15951 if( $$argv0 =~ /^Received:/m ) {
182             # Capture values of each Received: header
183 4858         32427 $recvheader = [$$argv0 =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms];
184 4858         8250 for my $e ( @$recvheader ) { $e =~ s/\n\s+/ /; $e =~ y/\n\t / /s }
  9462         23984  
  9462         17305  
185             }
186 5550         7820 $headermaps->{'received'} = $recvheader;
187              
188 5550 100       17967 return $headermaps unless $argv1;
189 2758 100       7391 return $headermaps unless length $headermaps->{'subject'};
190              
191             # Convert MIME-Encoded subject
192 2537 100       8962 if( Sisimai::String->is_8bit(\$headermaps->{'subject'}) ) {
193             # The value of ``Subject'' header is including multibyte character,
194             # is not MIME-Encoded text.
195 34         67 eval {
196             # Remove invalid byte sequence
197 34         156 Encode::decode_utf8($headermaps->{'subject'});
198 34         1055 Encode::encode_utf8($headermaps->{'subject'});
199             };
200 34 50       285 $headermaps->{'subject'} = 'MULTIBYTE CHARACTERS HAVE BEEN REMOVED' if $@;
201              
202             } else {
203             # MIME-Encoded subject field or ASCII characters only
204 2503         4385 my $r = [];
205 2503 100       6774 if( Sisimai::MIME->is_mimeencoded(\$headermaps->{'subject'}) ) {
206             # split the value of Subject by $borderline
207 241         765 for my $v ( split(/ /, $headermaps->{'subject'}) ) {
208             # Insert value to the array if the string is MIME encoded text
209 306 100       777 push @$r, $v if Sisimai::MIME->is_mimeencoded(\$v);
210             }
211             } else {
212             # Subject line is not MIME encoded
213 2262         4633 $r = [$headermaps->{'subject'}];
214             }
215 2503         6614 $headermaps->{'subject'} = Sisimai::MIME->mimedecode($r);
216             }
217 2537         12024 return $headermaps;
218             }
219              
220             sub parse {
221             # Parse bounce mail with each MTA module
222             # @param [Hash] argvs Processing message entity.
223             # @param options argvs [Hash] mail Email message entity
224             # @param options mail [String] from From line of mbox
225             # @param options mail [Hash] header Email header data
226             # @param options mail [String] rfc822 Original message part
227             # @param options mail [Array] ds Delivery status list(parsed data)
228             # @param options argvs [String] body Email message body
229             # @param options argvs [Code] hook Hook method to be called
230             # @return [Hash] Parsed and structured bounce mails
231 2792     2792 0 4857 my $class = shift;
232 2792         6397 my $argvs = { @_ };
233              
234 2792   50     6725 my $mailheader = $argvs->{'mail'}->{'header'} || return '';
235 2792   50     5804 my $bodystring = $argvs->{'body'} || return '';
236 2792   100     6876 my $hookmethod = $argvs->{'hook'} || undef;
237 2792         3181 my $havecaught = undef;
238              
239 2792         3191 state $defaultset = Sisimai::Order->another;
240 2792         3443 state $lhosttable = Sisimai::Lhost->path;
241              
242 2792   100     6115 $mailheader->{'from'} //= '';
243 2792   50     4900 $mailheader->{'subject'} //= '';
244 2792   100     6203 $mailheader->{'content-type'} //= '';
245              
246             # Decode BASE64 Encoded message body
247 2792   100     7575 my $mesgformat = lc($mailheader->{'content-type'} || '');
248 2792   100     8453 my $ctencoding = lc($mailheader->{'content-transfer-encoding'} || '');
249 2792 100 66     11255 if( index($mesgformat, 'text/plain') == 0 || index($mesgformat, 'text/html') == 0 ) {
250             # Content-Type: text/plain; charset=UTF-8
251 438 100       1655 if( $ctencoding eq 'base64' ) {
    100          
252             # Content-Transfer-Encoding: base64
253 5         18 $bodystring = Sisimai::MIME->base64d($bodystring);
254              
255             } elsif( $ctencoding eq 'quoted-printable' ) {
256             # Content-Transfer-Encoding: quoted-printable
257 45         143 $bodystring = Sisimai::MIME->qprintd($bodystring);
258             }
259              
260             # Content-Type: text/html;...
261 438 50       1286 $bodystring = Sisimai::String->to_plain($bodystring, 1) if $mesgformat =~ m|text/html;?|;
262             } else {
263             # NOT text/plain
264 2354 100       5333 if( index($mesgformat, 'multipart/') == 0 ) {
265             # In case of Content-Type: multipart/*
266 1928         5556 my $p = Sisimai::MIME->makeflat($mailheader->{'content-type'}, $bodystring);
267 1928 100       5942 $bodystring = $p if length $$p;
268             }
269             }
270 2792         13820 $$bodystring =~ tr/\r//d;
271              
272 2792 100       6289 if( ref $hookmethod eq 'CODE' ) {
273             # Call hook method
274 548         2420 my $p = { 'headers' => $mailheader, 'message' => $$bodystring };
275 548         1260 eval { $havecaught = $hookmethod->($p) };
  548         2015  
276 548 50       11137 warn sprintf(" ***warning: Something is wrong in hook method:%s", $@) if $@;
277             }
278              
279 2792         3770 my $haveloaded = {};
280 2792         3222 my $parseddata = undef;
281 2792         3766 my $modulename = '';
282 2792         3422 PARSER: while(1) {
283             # 1. User-Defined Module
284             # 2. MTA Module Candidates to be tried on first
285             # 3. Sisimai::Lhost::*
286             # 4. Sisimai::RFC3464
287             # 5. Sisimai::ARF
288             # 6. Sisimai::RFC3834
289 2792         4990 USER_DEFINED: for my $r ( @$ToBeLoaded ) {
290             # Call user defined MTA modules
291 1 50       2 next if exists $haveloaded->{ $r };
292 1         3 $parseddata = $r->make($mailheader, $bodystring);
293 1         3 $haveloaded->{ $r } = 1;
294 1         2 $modulename = $r;
295 1 50       4 last(PARSER) if $parseddata;
296             }
297              
298 2791         5300 TRY_ON_FIRST_AND_DEFAULTS: for my $r ( @$TryOnFirst, @$defaultset ) {
299             # Try MTA module candidates
300 17425 100       24200 next if exists $haveloaded->{ $r };
301 16838         373979 require $lhosttable->{ $r };
302 16838         83145 $parseddata = $r->make($mailheader, $bodystring);
303 16838         24204 $haveloaded->{ $r } = 1;
304 16838         16544 $modulename = $r;
305 16838 100       24874 last(PARSER) if $parseddata;
306             }
307              
308 181 50       428 unless( $haveloaded->{'Sisimai::RFC3464'} ) {
309             # When the all of Sisimai::Lhost::* modules did not return bounce
310             # data, call Sisimai::RFC3464;
311 181         4290 require Sisimai::RFC3464;
312 181         1091 $parseddata = Sisimai::RFC3464->make($mailheader, $bodystring);
313 181         319 $modulename = 'RFC3464';
314 181 100       460 last(PARSER) if $parseddata;
315             }
316              
317 50 50       145 unless( $haveloaded->{'Sisimai::ARF'} ) {
318             # Feedback Loop message
319 50         1840 require Sisimai::ARF;
320 50 100       323 $parseddata = Sisimai::ARF->make($mailheader, $bodystring) if Sisimai::ARF->is_arf($mailheader);
321 50 100       173 last(PARSER) if $parseddata;
322             }
323              
324 15 50       45 unless( $haveloaded->{'Sisimai::RFC3834'} ) {
325             # Try to parse the message as auto reply message defined in RFC3834
326 15         1257 require Sisimai::RFC3834;
327 15         82 $parseddata = Sisimai::RFC3834->make($mailheader, $bodystring);
328 15         30 $modulename = 'RFC3834';
329 15 100       42 last(PARSER) if $parseddata;
330             }
331              
332 9         14 last; # as of now, we have no sample email for coding this block
333             } # End of while(PARSER)
334 2792 100       6447 return undef unless $parseddata;
335              
336 2783         4482 $parseddata->{'catch'} = $havecaught;
337 2783         10300 $modulename =~ s/\A.+:://;
338 2783   66     4664 $_->{'agent'} ||= $modulename for @{ $parseddata->{'ds'} };
  2783         12383  
339 2783         13491 return $parseddata;
340             }
341              
342             1;
343             __END__