File Coverage

lib/Sisimai/Message.pm
Criterion Covered Total %
statement 189 196 96.4
branch 76 100 76.0
condition 24 32 75.0
subroutine 15 15 100.0
pod 1 5 20.0
total 305 348 87.6


line stmt bran cond sub pod time code
1             package Sisimai::Message;
2 82     82   168674 use feature ':5.10';
  82         175  
  82         6009  
3 82     82   463 use strict;
  82         147  
  82         1429  
4 82     82   373 use warnings;
  82         146  
  82         1872  
5 82     82   23393 use Sisimai::RFC5322;
  82         162  
  82         2615  
6 82     82   25517 use Sisimai::Address;
  82         180  
  82         2425  
7 82     82   20908 use Sisimai::String;
  82         235  
  82         2914  
8 82     82   31674 use Sisimai::Order;
  82         195  
  82         2379  
9 82     82   464 use Sisimai::Lhost;
  82         187  
  82         1440  
10 82     82   33677 use Sisimai::MIME;
  82         241  
  82         3906  
11             use Class::Accessor::Lite (
12 82         809 'new' => 0,
13             'rw' => [
14             'from', # [String] UNIX From line
15             'header', # [Hash] Header part of an email
16             'ds', # [Array] Parsed data by Sisimai::Lhost
17             'rfc822', # [Hash] Header part of the original message
18             'catch' # [Any] The results returned by hook method
19             ]
20 82     82   593 );
  82         151  
21              
22             my $ToBeLoaded = [];
23             my $TryOnFirst = [];
24              
25             sub new {
26             # Constructor of Sisimai::Message
27             # @param [Hash] argvs Email text data
28             # @options argvs [String] data Entire email message
29             # @options argvs [Array] load User defined MTA module list
30             # @options argvs [Array] order The order of MTA modules
31             # @options argvs [Code] hook Reference to callback method
32             # @return [Sisimai::Message] Structured email data or Undef if each
33             # value of the arguments are missing
34 2822     2822 1 24185 my $class = shift;
35 2822         6636 my $argvs = { @_ };
36 2822         4984 my $param = {};
37 2822   50     6665 my $email = $argvs->{'data'} || return undef;
38 2822         11665 my $thing = { 'from' => '', 'header' => {}, 'rfc822' => '', 'ds' => [], 'catch' => undef };
39              
40             # 1. Load specified MTA modules
41 2822         6099 for my $e ('load', 'order') {
42             # Order of MTA modules
43 5644 100       12676 next unless exists $argvs->{ $e };
44 1 50       5 next unless ref $argvs->{ $e } eq 'ARRAY';
45 1 50       3 next unless scalar @{ $argvs->{ $e } };
  1         4  
46 1         3 $param->{ $e } = $argvs->{ $e };
47             }
48 2822         8942 $ToBeLoaded = __PACKAGE__->load(%$param);
49              
50             # 2. Split email data to headers and a body part.
51 2822 50       7097 return undef unless my $aftersplit = __PACKAGE__->divideup(\$email);
52              
53             # 3. Convert email headers from text to hash reference
54 2822         5519 $thing->{'from'} = $aftersplit->[0];
55 2822         8540 $thing->{'header'} = __PACKAGE__->makemap(\$aftersplit->[1]);
56              
57             # 4. Decode and rewrite the "Subject:" header
58 2822 50       6882 if( $thing->{'header'}->{'subject'} ) {
59             # Decode MIME-Encoded "Subject:" header
60 2822         5023 my $s = $thing->{'header'}->{'subject'};
61 2822 100       14558 my $q = Sisimai::MIME->is_mimeencoded(\$s) ? Sisimai::MIME->mimedecode([split(/[ ]/, $s)]) : $s;
62              
63             # Remove "Fwd:" string from the "Subject:" header
64 2822 100       8569 if( lc($q) =~ /\A[ \t]*fwd?:[ ]*(.*)\z/ ) {
65             # Delete quoted strings, quote symbols(>)
66 29         111 $q = $1;
67 29         374 $aftersplit->[2] =~ s/^[>]+[ ]//gm;
68 29         158 $aftersplit->[2] =~ s/^[>]$//gm;
69             }
70 2822         5710 $thing->{'header'}->{'subject'} = $q;
71             }
72              
73             # 5. Rewrite message body for detecting the bounce reason
74 2822         14460 $TryOnFirst = Sisimai::Order->make($thing->{'header'}->{'subject'});
75 2822   100     17365 $param = { 'hook' => $argvs->{'hook'} || undef, 'mail' => $thing, 'body' => \$aftersplit->[2] };
76 2822 100       10852 return undef unless my $bouncedata = __PACKAGE__->parse(%$param);
77 2813 50       8743 return undef unless keys %$bouncedata;
78              
79             # 6. Rewrite headers of the original message in the body part
80 2813         11606 $thing->{ $_ } = $bouncedata->{ $_ } for ('ds', 'catch', 'rfc822');
81 2813   66     7490 my $r = $bouncedata->{'rfc822'} || $aftersplit->[2];
82 2813 100       10575 $thing->{'rfc822'} = ref $r ? $r : __PACKAGE__->makemap(\$r, 1);
83              
84 2813         19161 return bless($thing, $class);
85             }
86              
87             sub load {
88             # Load MTA modules which specified at 'order' and 'load' in the argument
89             # @param [Hash] argvs Module information to be loaded
90             # @options argvs [Array] load User defined MTA module list
91             # @options argvs [Array] order The order of MTA modules
92             # @return [Array] Module list
93             # @since v4.20.0
94 2823     2823 0 5289 my $class = shift;
95 2823         4421 my $argvs = { @_ };
96              
97 2823         3681 my @modulelist;
98 2823         4254 my $tobeloaded = [];
99              
100 2823         4363 for my $e ('load', 'order') {
101             # The order of MTA modules specified by user
102 5646 100       11029 next unless exists $argvs->{ $e };
103 1 50       4 next unless ref $argvs->{ $e } eq 'ARRAY';
104 1 50       2 next unless scalar @{ $argvs->{ $e } };
  1         5  
105              
106 1 50       4 push @modulelist, @{ $argvs->{'order'} } if $e eq 'order';
  1         4  
107 1 50       4 next unless $e eq 'load';
108              
109             # Load user defined MTA module
110 0         0 for my $v ( @{ $argvs->{'load'} } ) {
  0         0  
111             # Load user defined MTA module
112 0         0 eval {
113 0         0 (my $modulepath = $v) =~ s|::|/|g;
114 0         0 require $modulepath.'.pm';
115             };
116 0 0       0 next if $@;
117 0         0 push @$tobeloaded, $v;
118             }
119             }
120              
121 2823         5284 for my $e ( @modulelist ) {
122             # Append the custom order of MTA modules
123 6 50       10 next if grep { $e eq $_ } @$tobeloaded;
  15         24  
124 6         10 push @$tobeloaded, $e;
125             }
126 2823         6433 return $tobeloaded;
127             }
128              
129             sub divideup {
130             # Divide email data up headers and a body part.
131             # @param [String] email Email data
132             # @return [Array] Email data after split
133             # @since v4.14.0
134 2822     2822 0 3822 my $class = shift;
135 2822   50     6448 my $email = shift // return undef;
136 2822         6286 my $block = ['', '', '']; # 0:From, 1:Header, 2:Body
137              
138 2822 100       30251 $$email =~ s/\r\n/\n/gm if rindex($$email, "\r\n") > -1;
139 2822 50       129273 $$email =~ s/[ \t]+$//gm if $$email =~ /[ \t]+$/;
140              
141 2822         22271 ($block->[1], $block->[2]) = split(/\n\n/, $$email, 2);
142 2822 50       7031 return undef unless $block->[1];
143 2822 50       5492 return undef unless $block->[2];
144              
145 2822 100       7747 if( substr($block->[1], 0, 5) eq 'From ' ) {
146             # From MAILER-DAEMON Tue Feb 11 00:00:00 2014
147 387         1793 $block->[0] = [split(/\n/, $block->[1], 2)]->[0];
148 387         1442 $block->[0] =~ y/\r\n//d;
149             } else {
150             # Set pseudo UNIX From line
151 2435         3903 $block->[0] = 'MAILER-DAEMON Tue Feb 11 00:00:00 2014';
152             }
153              
154 2822 50       11155 $block->[1] .= "\n" unless $block->[1] =~ /\n\z/;
155 2822         7877 $block->[2] .= "\n";
156 2822         7739 return $block;
157             }
158              
159             sub makemap {
160             # Convert a text including email headers to a hash reference
161             # @param [String] argv0 Email header data
162             # @param [Bool] argv1 Decode "Subject:" header
163             # @return [Hash] Structured email header data
164             # @since v4.25.6
165 5610     5610 0 11011 my $class = shift;
166 5610   50     12124 my $argv0 = shift || return {};
167 5610   100     13315 my $argv1 = shift || 0;
168              
169 5610         13030 $$argv0 =~ s/^[>]+[ ]//mg; # Remove '>' indent symbol of forwarded message
170              
171             # Select and convert all the headers in $argv0. The following regular expression
172             # is based on https://gist.github.com/xtetsuji/b080e1f5551d17242f6415aba8a00239
173 5610         174169 my $firstpairs = { $$argv0 =~ /^([\w-]+):[ ]*(.*?)\n(?![\s\t])/gms };
174 5610         19646 my $headermaps = { 'subject' => '' };
175 5610         8746 my $recvheader = [];
176              
177              
178 5610         74333 $headermaps->{ lc $_ } = $firstpairs->{ $_ } for keys %$firstpairs;
179 5610         19584 for my $e ( values %$headermaps ) { $e =~ s/\n\s+/ /; $e =~ y/\t / /s }
  67302         95698  
  67302         102264  
180              
181 5610 100       18498 if( $$argv0 =~ /^Received:/m ) {
182             # Capture values of each Received: header
183 4918         38054 $recvheader = [$$argv0 =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms];
184 4918         10466 for my $e ( @$recvheader ) { $e =~ s/\n\s+/ /; $e =~ y/\n\t / /s }
  9570         29402  
  9570         21097  
185             }
186 5610         9851 $headermaps->{'received'} = $recvheader;
187              
188 5610 100       21109 return $headermaps unless $argv1;
189 2788 100       6599 return $headermaps unless length $headermaps->{'subject'};
190              
191             # Convert MIME-Encoded subject
192 2567 100       9315 if( Sisimai::String->is_8bit(\$headermaps->{'subject'}) ) {
193             # The value of ``Subject'' header is including multibyte character,
194             # is not MIME-Encoded text.
195 34         73 eval {
196             # Remove invalid byte sequence
197 34         158 Encode::decode_utf8($headermaps->{'subject'});
198 34         1278 Encode::encode_utf8($headermaps->{'subject'});
199             };
200 34 50       382 $headermaps->{'subject'} = 'MULTIBYTE CHARACTERS HAVE BEEN REMOVED' if $@;
201              
202             } else {
203             # MIME-Encoded subject field or ASCII characters only
204 2533         4048 my $r = [];
205 2533 100       7733 if( Sisimai::MIME->is_mimeencoded(\$headermaps->{'subject'}) ) {
206             # split the value of Subject by $borderline
207 241         958 for my $v ( split(/ /, $headermaps->{'subject'}) ) {
208             # Insert value to the array if the string is MIME encoded text
209 306 100       1066 push @$r, $v if Sisimai::MIME->is_mimeencoded(\$v);
210             }
211             } else {
212             # Subject line is not MIME encoded
213 2292         4883 $r = [$headermaps->{'subject'}];
214             }
215 2533         6683 $headermaps->{'subject'} = Sisimai::MIME->mimedecode($r);
216             }
217 2567         12878 return $headermaps;
218             }
219              
220             sub parse {
221             # Parse bounce mail with each MTA module
222             # @param [Hash] argvs Processing message entity.
223             # @param options argvs [Hash] mail Email message entity
224             # @param options mail [String] from From line of mbox
225             # @param options mail [Hash] header Email header data
226             # @param options mail [String] rfc822 Original message part
227             # @param options mail [Array] ds Delivery status list(parsed data)
228             # @param options argvs [String] body Email message body
229             # @param options argvs [Code] hook Hook method to be called
230             # @return [Hash] Parsed and structured bounce mails
231 2822     2822 0 4331 my $class = shift;
232 2822         6279 my $argvs = { @_ };
233              
234 2822   50     7159 my $mailheader = $argvs->{'mail'}->{'header'} || return '';
235 2822   50     5601 my $bodystring = $argvs->{'body'} || return '';
236 2822   100     7408 my $hookmethod = $argvs->{'hook'} || undef;
237 2822         3503 my $havecaught = undef;
238              
239 2822         3587 state $defaultset = Sisimai::Order->another;
240 2822         4242 state $lhosttable = Sisimai::Lhost->path;
241              
242 2822   100     5804 $mailheader->{'from'} //= '';
243 2822   50     5338 $mailheader->{'subject'} //= '';
244 2822   100     6125 $mailheader->{'content-type'} //= '';
245              
246             # Decode BASE64 Encoded message body
247 2822   100     8291 my $mesgformat = lc($mailheader->{'content-type'} || '');
248 2822   100     9062 my $ctencoding = lc($mailheader->{'content-transfer-encoding'} || '');
249 2822 100       7246 if( index($mesgformat, 'text/') == 0 ) {
250             # Content-Type: text/plain; charset=UTF-8
251 438 100       1933 if( $ctencoding eq 'base64' ) {
    100          
252             # Content-Transfer-Encoding: base64
253 5         20 $bodystring = Sisimai::MIME->base64d($bodystring);
254              
255             } elsif( $ctencoding eq 'quoted-printable' ) {
256             # Content-Transfer-Encoding: quoted-printable
257 45         215 $bodystring = Sisimai::MIME->qprintd($bodystring);
258             }
259              
260             # Content-Type: text/html;...
261 438 50       1358 $bodystring = Sisimai::String->to_plain($bodystring, 1) if $mesgformat =~ m|text/html;?|;
262             } else {
263             # NOT text/plain
264 2384 100       5717 if( index($mesgformat, 'multipart/') == 0 ) {
265             # In case of Content-Type: multipart/*
266 1958         5798 my $p = Sisimai::MIME->makeflat($mailheader->{'content-type'}, $bodystring);
267 1958 100       5714 $bodystring = $p if length $$p;
268             }
269             }
270 2822         16377 $$bodystring =~ tr/\r//d;
271              
272 2822 100       6371 if( ref $hookmethod eq 'CODE' ) {
273             # Call hook method
274 553         2266 my $p = { 'headers' => $mailheader, 'message' => $$bodystring };
275 553         1041 eval { $havecaught = $hookmethod->($p) };
  553         1761  
276 553 50       11471 warn sprintf(" ***warning: Something is wrong in hook method:%s", $@) if $@;
277             }
278              
279 2822         4622 my $haveloaded = {};
280 2822         3859 my $parseddata = undef;
281 2822         4031 my $modulename = '';
282 2822         3295 PARSER: while(1) {
283             # 1. User-Defined Module
284             # 2. MTA Module Candidates to be tried on first
285             # 3. Sisimai::Lhost::*
286             # 4. Sisimai::RFC3464
287             # 5. Sisimai::ARF
288             # 6. Sisimai::RFC3834
289 2822         5703 USER_DEFINED: for my $r ( @$ToBeLoaded ) {
290             # Call user defined MTA modules
291 1 50       5 next if exists $haveloaded->{ $r };
292 1         5 $parseddata = $r->make($mailheader, $bodystring);
293 1         2 $haveloaded->{ $r } = 1;
294 1         3 $modulename = $r;
295 1 50       5 last(PARSER) if $parseddata;
296             }
297              
298 2821         5705 TRY_ON_FIRST_AND_DEFAULTS: for my $r ( @$TryOnFirst, @$defaultset ) {
299             # Try MTA module candidates
300 17455 100       28581 next if exists $haveloaded->{ $r };
301 16868         421810 require $lhosttable->{ $r };
302 16868         101559 $parseddata = $r->make($mailheader, $bodystring);
303 16868         29255 $haveloaded->{ $r } = 1;
304 16868         19113 $modulename = $r;
305 16868 100       29213 last(PARSER) if $parseddata;
306             }
307              
308 181 50       569 unless( $haveloaded->{'Sisimai::RFC3464'} ) {
309             # When the all of Sisimai::Lhost::* modules did not return bounce
310             # data, call Sisimai::RFC3464;
311 181         4908 require Sisimai::RFC3464;
312 181         1190 $parseddata = Sisimai::RFC3464->make($mailheader, $bodystring);
313 181         423 $modulename = 'RFC3464';
314 181 100       606 last(PARSER) if $parseddata;
315             }
316              
317 50 50       197 unless( $haveloaded->{'Sisimai::ARF'} ) {
318             # Feedback Loop message
319 50         1920 require Sisimai::ARF;
320 50 100       429 $parseddata = Sisimai::ARF->make($mailheader, $bodystring) if Sisimai::ARF->is_arf($mailheader);
321 50 100       203 last(PARSER) if $parseddata;
322             }
323              
324 15 50       47 unless( $haveloaded->{'Sisimai::RFC3834'} ) {
325             # Try to parse the message as auto reply message defined in RFC3834
326 15         1244 require Sisimai::RFC3834;
327 15         200 $parseddata = Sisimai::RFC3834->make($mailheader, $bodystring);
328 15         44 $modulename = 'RFC3834';
329 15 100       47 last(PARSER) if $parseddata;
330             }
331              
332 9         16 last; # as of now, we have no sample email for coding this block
333             } # End of while(PARSER)
334 2822 100       6309 return undef unless $parseddata;
335              
336 2813         4816 $parseddata->{'catch'} = $havecaught;
337 2813         11052 $modulename =~ s/\A.+:://;
338 2813   66     4159 $_->{'agent'} ||= $modulename for @{ $parseddata->{'ds'} };
  2813         13268  
339 2813         14553 return $parseddata;
340             }
341              
342             1;
343             __END__