File Coverage

lib/Sisimai/Message.pm
Criterion Covered Total %
statement 199 206 96.6
branch 82 106 77.3
condition 24 32 75.0
subroutine 15 15 100.0
pod 1 5 20.0
total 321 364 88.1


line stmt bran cond sub pod time code
1             package Sisimai::Message;
2 82     82   174233 use feature ':5.10';
  82         236  
  82         6128  
3 82     82   466 use strict;
  82         142  
  82         1675  
4 82     82   380 use warnings;
  82         132  
  82         1942  
5 82     82   24822 use Sisimai::RFC5322;
  82         173  
  82         2543  
6 82     82   27949 use Sisimai::Address;
  82         197  
  82         2503  
7 82     82   22146 use Sisimai::String;
  82         243  
  82         3136  
8 82     82   34364 use Sisimai::Order;
  82         254  
  82         2913  
9 82     82   531 use Sisimai::Lhost;
  82         208  
  82         1574  
10 82     82   36600 use Sisimai::MIME;
  82         252  
  82         4230  
11             use Class::Accessor::Lite (
12 82         1054 'new' => 0,
13             'rw' => [
14             'from', # [String] UNIX From line
15             'header', # [Hash] Header part of an email
16             'ds', # [Array] Parsed data by Sisimai::Lhost
17             'rfc822', # [Hash] Header part of the original message
18             'catch' # [Any] The results returned by hook method
19             ]
20 82     82   655 );
  82         221  
21              
22             my $ToBeLoaded = [];
23             my $TryOnFirst = [];
24              
25             sub new {
26             # Constructor of Sisimai::Message
27             # @param [Hash] argvs Email text data
28             # @options argvs [String] data Entire email message
29             # @options argvs [Array] load User defined MTA module list
30             # @options argvs [Array] order The order of MTA modules
31             # @options argvs [Code] hook Reference to callback method
32             # @return [Sisimai::Message] Structured email data or Undef if each
33             # value of the arguments are missing
34 2822     2822 1 24480 my $class = shift;
35 2822         6671 my $argvs = { @_ };
36 2822         4440 my $param = {};
37 2822   50     6403 my $email = $argvs->{'data'} || return undef;
38 2822         11941 my $thing = { 'from' => '', 'header' => {}, 'rfc822' => '', 'ds' => [], 'catch' => undef };
39              
40             # 1. Load specified MTA modules
41 2822         6285 for my $e ('load', 'order') {
42             # Order of MTA modules
43 5644 100       11647 next unless exists $argvs->{ $e };
44 1 50       5 next unless ref $argvs->{ $e } eq 'ARRAY';
45 1 50       2 next unless scalar @{ $argvs->{ $e } };
  1         3  
46 1         3 $param->{ $e } = $argvs->{ $e };
47             }
48 2822         7990 $ToBeLoaded = __PACKAGE__->load(%$param);
49              
50             # 2. Split email data to headers and a body part.
51 2822 50       6440 return undef unless my $aftersplit = __PACKAGE__->divideup(\$email);
52              
53             # 3. Convert email headers from text to hash reference
54 2822         5455 $thing->{'from'} = $aftersplit->[0];
55 2822         7032 $thing->{'header'} = __PACKAGE__->makemap(\$aftersplit->[1]);
56              
57             # 4. Decode and rewrite the "Subject:" header
58 2822 50       7180 if( $thing->{'header'}->{'subject'} ) {
59             # Decode MIME-Encoded "Subject:" header
60 2822         5205 my $s = $thing->{'header'}->{'subject'};
61 2822 100       14829 my $q = Sisimai::MIME->is_mimeencoded(\$s) ? Sisimai::MIME->mimedecode([split(/[ ]/, $s)]) : $s;
62              
63             # Remove "Fwd:" string from the "Subject:" header
64 2822 100       9159 if( lc($q) =~ /\A[ \t]*fwd?:[ ]*(.*)\z/ ) {
65             # Delete quoted strings, quote symbols(>)
66 29         163 $q = $1;
67 29         435 $aftersplit->[2] =~ s/^[>]+[ ]//gm;
68 29         196 $aftersplit->[2] =~ s/^[>]$//gm;
69             }
70 2822         6271 $thing->{'header'}->{'subject'} = $q;
71             }
72              
73             # 5. Rewrite message body for detecting the bounce reason
74 2822         12716 $TryOnFirst = Sisimai::Order->make($thing->{'header'}->{'subject'});
75 2822   100     15290 $param = { 'hook' => $argvs->{'hook'} || undef, 'mail' => $thing, 'body' => \$aftersplit->[2] };
76 2822 100       10891 return undef unless my $bouncedata = __PACKAGE__->parse(%$param);
77 2813 50       8261 return undef unless keys %$bouncedata;
78              
79             # 6. Rewrite headers of the original message in the body part
80 2813         11382 $thing->{ $_ } = $bouncedata->{ $_ } for ('ds', 'catch', 'rfc822');
81 2813   66     8893 my $r = $bouncedata->{'rfc822'} || $aftersplit->[2];
82 2813 100       10139 $thing->{'rfc822'} = ref $r ? $r : __PACKAGE__->makemap(\$r, 1);
83              
84 2813         18537 return bless($thing, $class);
85             }
86              
87             sub load {
88             # Load MTA modules which specified at 'order' and 'load' in the argument
89             # @param [Hash] argvs Module information to be loaded
90             # @options argvs [Array] load User defined MTA module list
91             # @options argvs [Array] order The order of MTA modules
92             # @return [Array] Module list
93             # @since v4.20.0
94 2823     2823 0 4998 my $class = shift;
95 2823         3931 my $argvs = { @_ };
96              
97 2823         3575 my @modulelist;
98 2823         3464 my $tobeloaded = [];
99              
100 2823         3926 for my $e ('load', 'order') {
101             # The order of MTA modules specified by user
102 5646 100       10366 next unless exists $argvs->{ $e };
103 1 50       4 next unless ref $argvs->{ $e } eq 'ARRAY';
104 1 50       2 next unless scalar @{ $argvs->{ $e } };
  1         3  
105              
106 1 50       3 push @modulelist, @{ $argvs->{'order'} } if $e eq 'order';
  1         3  
107 1 50       3 next unless $e eq 'load';
108              
109             # Load user defined MTA module
110 0         0 for my $v ( @{ $argvs->{'load'} } ) {
  0         0  
111             # Load user defined MTA module
112 0         0 eval {
113 0         0 (my $modulepath = $v) =~ s|::|/|g;
114 0         0 require $modulepath.'.pm';
115             };
116 0 0       0 next if $@;
117 0         0 push @$tobeloaded, $v;
118             }
119             }
120              
121 2823         4693 for my $e ( @modulelist ) {
122             # Append the custom order of MTA modules
123 6 50       9 next if grep { $e eq $_ } @$tobeloaded;
  15         22  
124 6         10 push @$tobeloaded, $e;
125             }
126 2823         6384 return $tobeloaded;
127             }
128              
129             sub divideup {
130             # Divide email data up headers and a body part.
131             # @param [String] email Email data
132             # @return [Array] Email data after split
133             # @since v4.14.0
134 2822     2822 0 3926 my $class = shift;
135 2822   50     6147 my $email = shift // return undef;
136 2822         6945 my $block = ['', '', '']; # 0:From, 1:Header, 2:Body
137              
138 2822 100       30576 $$email =~ s/\r\n/\n/gm if rindex($$email, "\r\n") > -1;
139 2822 50       131024 $$email =~ s/[ \t]+$//gm if $$email =~ /[ \t]+$/;
140              
141 2822         24725 ($block->[1], $block->[2]) = split(/\n\n/, $$email, 2);
142 2822 50       7327 return undef unless $block->[1];
143 2822 50       6075 return undef unless $block->[2];
144              
145 2822 100       7742 if( substr($block->[1], 0, 5) eq 'From ' ) {
146             # From MAILER-DAEMON Tue Feb 11 00:00:00 2014
147 387         2017 $block->[0] = [split(/\n/, $block->[1], 2)]->[0];
148 387         1468 $block->[0] =~ y/\r\n//d;
149             } else {
150             # Set pseudo UNIX From line
151 2435         3521 $block->[0] = 'MAILER-DAEMON Tue Feb 11 00:00:00 2014';
152             }
153 2822 50       12122 $block->[1] .= "\n" unless $block->[1] =~ /\n\z/;
154              
155 2822         5370 for my $e ('image/', 'application/', 'text/html') {
156             # https://github.com/sisimai/p5-sisimai/issues/492, Reduce email size
157 8466         9369 my $p0 = 0;
158 8466         8623 my $p1 = 0;
159 8466 100       12750 my $ep = $e eq 'text/html' ? '' : "--\n";
160 8466         11234 while(1) {
161             # Remove each part from "Content-Type: image/..." to "--\n" (the end of each boundary)
162 8884 100       56881 $p0 = index($block->[2], 'Content-Type: '.$e, $p0); last if $p0 < 0;
  8884         17950  
163 614 100       6293 $p1 = index($block->[2], $ep, $p0 + 32); last if $p1 < 0;
  614         1568  
164 418         1095 substr($block->[2], $p0, $p1 - $p0, '');
165             }
166             }
167 2822         6348 $block->[2] .= "\n";
168 2822         8069 return $block;
169             }
170              
171             sub makemap {
172             # Convert a text including email headers to a hash reference
173             # @param [String] argv0 Email header data
174             # @param [Bool] argv1 Decode "Subject:" header
175             # @return [Hash] Structured email header data
176             # @since v4.25.6
177 5610     5610 0 7524 my $class = shift;
178 5610   50     10064 my $argv0 = shift || return {};
179 5610   100     12175 my $argv1 = shift || 0;
180              
181 5610         13164 $$argv0 =~ s/^[>]+[ ]//mg; # Remove '>' indent symbol of forwarded message
182              
183             # Select and convert all the headers in $argv0. The following regular expression
184             # is based on https://gist.github.com/xtetsuji/b080e1f5551d17242f6415aba8a00239
185 5610         181572 my $firstpairs = { $$argv0 =~ /^([\w-]+):[ ]*(.*?)\n(?![\s\t])/gms };
186 5610         20473 my $headermaps = { 'subject' => '' };
187 5610         9598 my $recvheader = [];
188              
189              
190 5610         78752 $headermaps->{ lc $_ } = $firstpairs->{ $_ } for keys %$firstpairs;
191 5610         18403 for my $e ( values %$headermaps ) { $e =~ s/\n\s+/ /; $e =~ y/\t / /s }
  67152         99530  
  67152         108124  
192              
193 5610 100       18825 if( $$argv0 =~ /^Received:/m ) {
194             # Capture values of each Received: header
195 4918         38761 $recvheader = [$$argv0 =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms];
196 4918         19228 for my $e ( @$recvheader ) { $e =~ s/\n\s+/ /; $e =~ y/\n\t / /s }
  9570         30792  
  9570         22594  
197             }
198 5610         9363 $headermaps->{'received'} = $recvheader;
199              
200 5610 100       22524 return $headermaps unless $argv1;
201 2788 100       6206 return $headermaps unless length $headermaps->{'subject'};
202              
203             # Convert MIME-Encoded subject
204 2562 100       9839 if( Sisimai::String->is_8bit(\$headermaps->{'subject'}) ) {
205             # The value of ``Subject'' header is including multibyte character,
206             # is not MIME-Encoded text.
207 34         120 eval {
208             # Remove invalid byte sequence
209 34         183 Encode::decode_utf8($headermaps->{'subject'});
210 34         1332 Encode::encode_utf8($headermaps->{'subject'});
211             };
212 34 50       415 $headermaps->{'subject'} = 'MULTIBYTE CHARACTERS HAVE BEEN REMOVED' if $@;
213              
214             } else {
215             # MIME-Encoded subject field or ASCII characters only
216 2528         3948 my $r = [];
217 2528 100       7223 if( Sisimai::MIME->is_mimeencoded(\$headermaps->{'subject'}) ) {
218             # split the value of Subject by $borderline
219 241         1076 for my $v ( split(/ /, $headermaps->{'subject'}) ) {
220             # Insert value to the array if the string is MIME encoded text
221 306 100       729 push @$r, $v if Sisimai::MIME->is_mimeencoded(\$v);
222             }
223             } else {
224             # Subject line is not MIME encoded
225 2287         5176 $r = [$headermaps->{'subject'}];
226             }
227 2528         6449 $headermaps->{'subject'} = Sisimai::MIME->mimedecode($r);
228             }
229 2562         12841 return $headermaps;
230             }
231              
232             sub parse {
233             # Parse bounce mail with each MTA module
234             # @param [Hash] argvs Processing message entity.
235             # @param options argvs [Hash] mail Email message entity
236             # @param options mail [String] from From line of mbox
237             # @param options mail [Hash] header Email header data
238             # @param options mail [String] rfc822 Original message part
239             # @param options mail [Array] ds Delivery status list(parsed data)
240             # @param options argvs [String] body Email message body
241             # @param options argvs [Code] hook Hook method to be called
242             # @return [Hash] Parsed and structured bounce mails
243 2822     2822 0 4431 my $class = shift;
244 2822         6264 my $argvs = { @_ };
245              
246 2822   50     7108 my $mailheader = $argvs->{'mail'}->{'header'} || return '';
247 2822   50     5893 my $bodystring = $argvs->{'body'} || return '';
248 2822   100     7899 my $hookmethod = $argvs->{'hook'} || undef;
249 2822         3314 my $havecaught = undef;
250              
251 2822         3457 state $defaultset = Sisimai::Order->another;
252 2822         3777 state $lhosttable = Sisimai::Lhost->path;
253              
254 2822   100     5541 $mailheader->{'from'} //= '';
255 2822   50     4879 $mailheader->{'subject'} //= '';
256 2822   100     5985 $mailheader->{'content-type'} //= '';
257              
258             # Decode BASE64 Encoded message body
259 2822   100     8833 my $mesgformat = lc($mailheader->{'content-type'} || '');
260 2822   100     8275 my $ctencoding = lc($mailheader->{'content-transfer-encoding'} || '');
261 2822 100       7163 if( index($mesgformat, 'text/') == 0 ) {
262             # Content-Type: text/plain; charset=UTF-8
263 438 100       1905 if( $ctencoding eq 'base64' ) {
    100          
264             # Content-Transfer-Encoding: base64
265 5         34 $bodystring = Sisimai::MIME->base64d($bodystring);
266              
267             } elsif( $ctencoding eq 'quoted-printable' ) {
268             # Content-Transfer-Encoding: quoted-printable
269 45         297 $bodystring = Sisimai::MIME->qprintd($bodystring);
270             }
271              
272             # Content-Type: text/html;...
273 438 50       1285 $bodystring = Sisimai::String->to_plain($bodystring, 1) if $mesgformat =~ m|text/html;?|;
274             } else {
275             # NOT text/plain
276 2384 100       5060 if( index($mesgformat, 'multipart/') == 0 ) {
277             # In case of Content-Type: multipart/*
278 1958         5620 my $p = Sisimai::MIME->makeflat($mailheader->{'content-type'}, $bodystring);
279 1958 100       5556 $bodystring = $p if length $$p;
280             }
281             }
282 2822         15695 $$bodystring =~ tr/\r//d;
283              
284 2822 100       6976 if( ref $hookmethod eq 'CODE' ) {
285             # Call hook method
286 553         2181 my $p = { 'headers' => $mailheader, 'message' => $$bodystring };
287 553         920 eval { $havecaught = $hookmethod->($p) };
  553         1429  
288 553 50       11112 warn sprintf(" ***warning: Something is wrong in hook method:%s", $@) if $@;
289             }
290              
291 2822         4283 my $haveloaded = {};
292 2822         4075 my $parseddata = undef;
293 2822         4305 my $modulename = '';
294 2822         4290 PARSER: while(1) {
295             # 1. User-Defined Module
296             # 2. MTA Module Candidates to be tried on first
297             # 3. Sisimai::Lhost::*
298             # 4. Sisimai::RFC3464
299             # 5. Sisimai::ARF
300             # 6. Sisimai::RFC3834
301 2822         5062 USER_DEFINED: for my $r ( @$ToBeLoaded ) {
302             # Call user defined MTA modules
303 1 50       4 next if exists $haveloaded->{ $r };
304 1         4 $parseddata = $r->make($mailheader, $bodystring);
305 1         15 $haveloaded->{ $r } = 1;
306 1         2 $modulename = $r;
307 1 50       4 last(PARSER) if $parseddata;
308             }
309              
310 2821         5788 TRY_ON_FIRST_AND_DEFAULTS: for my $r ( @$TryOnFirst, @$defaultset ) {
311             # Try MTA module candidates
312 17455 100       29028 next if exists $haveloaded->{ $r };
313 16868         439989 require $lhosttable->{ $r };
314 16868         98224 $parseddata = $r->make($mailheader, $bodystring);
315 16868         28669 $haveloaded->{ $r } = 1;
316 16868         19180 $modulename = $r;
317 16868 100       28932 last(PARSER) if $parseddata;
318             }
319              
320 181 50       553 unless( $haveloaded->{'Sisimai::RFC3464'} ) {
321             # When the all of Sisimai::Lhost::* modules did not return bounce
322             # data, call Sisimai::RFC3464;
323 181         5226 require Sisimai::RFC3464;
324 181         1154 $parseddata = Sisimai::RFC3464->make($mailheader, $bodystring);
325 181         391 $modulename = 'RFC3464';
326 181 100       611 last(PARSER) if $parseddata;
327             }
328              
329 50 50       235 unless( $haveloaded->{'Sisimai::ARF'} ) {
330             # Feedback Loop message
331 50         2091 require Sisimai::ARF;
332 50 100       383 $parseddata = Sisimai::ARF->make($mailheader, $bodystring) if Sisimai::ARF->is_arf($mailheader);
333 50 100       199 last(PARSER) if $parseddata;
334             }
335              
336 15 50       55 unless( $haveloaded->{'Sisimai::RFC3834'} ) {
337             # Try to parse the message as auto reply message defined in RFC3834
338 15         1348 require Sisimai::RFC3834;
339 15         225 $parseddata = Sisimai::RFC3834->make($mailheader, $bodystring);
340 15         40 $modulename = 'RFC3834';
341 15 100       53 last(PARSER) if $parseddata;
342             }
343              
344 9         16 last; # as of now, we have no sample email for coding this block
345             } # End of while(PARSER)
346 2822 100       5600 return undef unless $parseddata;
347              
348 2813         4619 $parseddata->{'catch'} = $havecaught;
349 2813         11672 $modulename =~ s/\A.+:://;
350 2813   66     4098 $_->{'agent'} ||= $modulename for @{ $parseddata->{'ds'} };
  2813         12959  
351 2813         13537 return $parseddata;
352             }
353              
354             1;
355             __END__