File Coverage

lib/Sisimai/ARF.pm
Criterion Covered Total %
statement 114 118 96.6
branch 72 82 87.8
condition 25 38 65.7
subroutine 8 8 100.0
pod 0 3 0.0
total 219 249 87.9


line stmt bran cond sub pod time code
1             package Sisimai::ARF;
2 7     7   4637 use feature ':5.10';
  7         13  
  7         605  
3 7     7   42 use strict;
  7         14  
  7         163  
4 7     7   33 use warnings;
  7         13  
  7         200  
5 7     7   429 use Sisimai::Lhost;
  7         17  
  7         162  
6 7     7   37 use Sisimai::RFC5322;
  7         13  
  7         13830  
7              
8 2     2 0 832 sub description { return 'Abuse Feedback Reporting Format' }
9             sub is_arf {
10             # Email is a Feedback-Loop message or not
11             # @param [Hash] heads Email header including "Content-Type", "From" and "Subject" field
12             # @return [Integer] 1: Feedback Loop
13             # 0: is not Feedback loop
14 131     131 0 227 my $class = shift;
15 131   50     308 my $heads = shift || return 0;
16 131         180 my $match = 0;
17              
18 131         223 state $reportfrom = qr/(?:staff[@]hotmail[.]com|complaints[@]email-abuse[.]amazonses[.]com)\z/;
19              
20 131 100       706 if( $heads->{'content-type'} =~ /report-type=["]?feedback-report["]?/ ) {
    100          
21             # Content-Type: multipart/report; report-type=feedback-report; ...
22 101         187 $match = 1;
23              
24             } elsif( index($heads->{'content-type'}, 'multipart/mixed') > -1 ) {
25             # Microsoft (Hotmail, MSN, Live, Outlook) uses its own report format.
26             # Amazon SES Complaints bounces
27 19         123 my $p = Sisimai::Address->s3s4($heads->{'from'});
28 19 100 66     265 if( $p =~ $reportfrom && index($heads->{'subject'}, 'complaint about message from ') > -1 ) {
29             # From: staff@hotmail.com
30             # From: complaints@email-abuse.amazonses.com
31             # Subject: complaint about message from 192.0.2.1
32 15         33 $match = 1;
33             }
34             }
35 131         511 return $match;
36             }
37              
38             sub make {
39             # Detect an error for Feedback Loop
40             # @param [Hash] mhead Message headers of a bounce email
41             # @param [String] mbody Message body of a bounce email
42             # @return [Hash] Bounce data list and message/rfc822 part
43             # @return [Undef] failed to parse or the arguments are missing
44 82     82 0 456 my $class = shift;
45 82   100     272 my $mhead = shift // return undef;
46 81   50     196 my $mbody = shift // return undef;
47 81 50       225 return undef unless is_arf(undef, $mhead);
48              
49             # http://tools.ietf.org/html/rfc5965
50             # http://en.wikipedia.org/wiki/Feedback_loop_(email)
51             # http://en.wikipedia.org/wiki/Abuse_Reporting_Format
52             #
53             # Netease DMARC uses: This is a spf/dkim authentication-failure report for an email message received from IP
54             # OpenDMARC 1.3.0 uses: This is an authentication failure report for an email message received from IP
55             # Abusix ARF uses: this is an autogenerated email abuse complaint regarding your network.
56 81         172 state $startingof = {
57             'rfc822' => ['Content-Type: message/rfc822', 'Content-Type: text/rfc822-headers'],
58             'report' => ['Content-Type: message/feedback-report'],
59             };
60 81         134 state $markingsof = {
61             'message' => qr{\A(?>
62             [Tt]his[ ]is[ ]a[ ][^ ]+[ ](?:email[ ])?[Aa]buse[ ][Rr]eport
63             |[Tt]his[ ]is[ ]an[ ]email[ ]abuse[ ]report
64             |[Tt]his[ ]is[ ](?:
65             a[ ][^ ]+[ ]authentication[ -]failure[ ]report
66             |an[ ]authentication[ -]failure[ ]report
67             |an[ ]autogenerated[ ]email[ ]abuse[ ]complaint
68             |an?[ ][^ ]+[ ]report[ ]for
69             )
70             )
71             }x,
72             };
73 81         183 state $indicators = Sisimai::Lhost->INDICATORS;
74 81         188 state $longfields = Sisimai::RFC5322->LONGFIELDS;
75 81         179 state $rfc822head = Sisimai::RFC5322->HEADERFIELDS;
76              
77 81         334 my $dscontents = [Sisimai::Lhost->DELIVERYSTATUS];
78 81         232 my $rfc822part = ''; # (String) message/rfc822-headers part
79 81         125 my $previousfn = ''; # (String) Previous field name
80 81         141 my $readcursor = 0; # (Integer) Points the current cursor position
81 81         99 my $recipients = 0; # (Integer) The number of 'Final-Recipient' header
82 81         131 my $rcptintext = ''; # (String) Recipient address in the message body
83 81         283 my $commondata = {
84             'diagnosis' => '', # Error message
85             'from' => '', # Original-Mail-From:
86             'rhost' => '', # Reporting-MTA:
87             };
88 81         388 my $arfheaders = {
89             'feedbacktype' => '', # Feedback-Type:
90             'rhost' => '', # Source-IP:
91             'agent' => '', # User-Agent:
92             'date' => '', # Arrival-Date:
93             'authres' => '', # Authentication-Results:
94             };
95 81         177 my $v = undef;
96              
97             # 3.1. Required Fields
98             #
99             # The following report header fields MUST appear exactly once:
100             #
101             # o "Feedback-Type" contains the type of feedback report (as defined
102             # in the corresponding IANA registry and later in this memo). This
103             # is intended to let report parsers distinguish among different
104             # types of reports.
105             #
106             # o "User-Agent" indicates the name and version of the software
107             # program that generated the report. The format of this field MUST
108             # follow section 14.43 of [HTTP]. This field is for documentation
109             # only; there is no registry of user agent names or versions, and
110             # report receivers SHOULD NOT expect user agent names to belong to a
111             # known set.
112             #
113             # o "Version" indicates the version of specification that the report
114             # generator is using to generate the report. The version number in
115             # this specification is set to "1".
116             #
117 81         931 for my $e ( split("\n", $$mbody) ) {
118             # Read each line between the start of the message and the start of rfc822 part.
119              
120             # This is an email abuse report for an email message with the
121             # message-id of 0000-000000000000000000000000000000000@mx
122             # received from IP address 192.0.2.1 on
123             # Thu, 29 Apr 2010 00:00:00 +0900 (JST)
124 2511 100 33     8198 $commondata->{'diagnosis'} ||= $e if $e =~ $markingsof->{'message'};
125              
126 2511 100       3497 unless( $readcursor ) {
127             # Beginning of the bounce message or message/delivery-status part
128 538 100       1319 $readcursor |= $indicators->{'deliverystatus'} if index($e, $startingof->{'report'}->[0]) == 0;
129             }
130              
131 2511 100       3638 unless( $readcursor & $indicators->{'message-rfc822'} ) {
132             # Beginning of the original message part
133 1238 100 100     4060 if( index($e, $startingof->{'rfc822'}->[0]) == 0 ||
134             index($e, $startingof->{'rfc822'}->[1]) == 0 ) {
135 76         164 $readcursor |= $indicators->{'message-rfc822'};
136 76         123 next;
137             }
138             }
139              
140 2435 100       3582 if( $readcursor & $indicators->{'message-rfc822'} ) {
141             # message/rfc822 OR text/rfc822-headers part
142 1273 100       3437 if( $e =~ /X-HmXmrOriginalRecipient:[ ]*(.+)\z/ ) {
    100          
    100          
143             # Microsoft ARF: original recipient.
144 15         63 $dscontents->[-1]->{'recipient'} = Sisimai::Address->s3s4($1);
145 15         39 $recipients++;
146              
147             # The "X-HmXmrOriginalRecipient" header appears only once so
148             # we take this opportunity to hard-code ARF headers missing in
149             # Microsoft's implementation.
150 15         30 $arfheaders->{'feedbacktype'} = 'abuse';
151 15         36 $arfheaders->{'agent'} = 'Microsoft Junk Mail Reporting Program';
152              
153             } elsif( $e =~ /\AFrom:[ ]*(.+)\z/ ) {
154             # Microsoft ARF: original sender.
155 71   66     289 $commondata->{'from'} ||= Sisimai::Address->s3s4($1);
156 71         132 $previousfn = 'from';
157              
158             } elsif( $e =~ /\A[ \t]+/ ) {
159             # Continued line from the previous line
160 207 100       401 if( $previousfn eq 'from' ) {
161             # Multiple lines at From: field
162 5         15 $commondata->{'from'} .= $e;
163 5         12 next;
164              
165             } else {
166 202 50       360 $rfc822part .= $e."\n" if exists $longfields->{ $previousfn };
167 202 50       445 next if length $e;
168             }
169 0 0       0 $rcptintext .= $e if $previousfn eq 'to';
170              
171             } else {
172             # Get required headers only
173 980         2674 my($lhs, $rhs) = split(/:[ ]*/, $e, 2);
174 980 100 100     2607 next unless $lhs = lc($lhs || '');
175              
176 859         920 $previousfn = '';
177 859 100       1911 next unless exists $rfc822head->{ $lhs };
178              
179 309         369 $previousfn = $lhs;
180 309         600 $rfc822part .= $e."\n";
181 309 100       693 $rcptintext = $rhs if $lhs eq 'to';
182             }
183             } else {
184             # message/feedback-report part
185 1162 100       1820 next unless $readcursor & $indicators->{'deliverystatus'};
186 705 100       1168 next unless length $e;
187              
188             # Feedback-Type: abuse
189             # User-Agent: SomeGenerator/1.0
190             # Version: 0.1
191             # Original-Mail-From:
192             # Original-Rcpt-To:
193             # Received-Date: Thu, 29 Apr 2009 00:00:00 JST
194             # Source-IP: 192.0.2.1
195 644         743 $v = $dscontents->[-1];
196              
197 644 100 100     4418 if( $e =~ /\AOriginal-Rcpt-To:[ ]+[<]?(.+)[>]?\z/ ||
    100          
    100          
    100          
    100          
    50          
    100          
    100          
198             $e =~ /\ARedacted-Address:[ ]([^ ].+[@])\z/ ) {
199             # Original-Rcpt-To header field is optional and may appear any
200             # number of times as appropriate:
201             # Original-Rcpt-To:
202             # Redacted-Address: localpart@
203 71 100       199 if( $v->{'recipient'} ) {
204             # There are multiple recipient addresses in the message body.
205 35         110 push @$dscontents, Sisimai::Lhost->DELIVERYSTATUS;
206 35         67 $v = $dscontents->[-1];
207             }
208 71         261 $v->{'recipient'} = Sisimai::Address->s3s4($1);
209 71         136 $recipients++;
210              
211             } elsif( $e =~ /\AFeedback-Type:[ ]*([^ ]+)\z/ ) {
212             # The header field MUST appear exactly once.
213             # Feedback-Type: abuse
214 66         310 $arfheaders->{'feedbacktype'} = $1;
215              
216             } elsif( $e =~ /\AAuthentication-Results:[ ]*(.+)\z/ ) {
217             # "Authentication-Results" indicates the result of one or more
218             # authentication checks run by the report generator.
219             #
220             # Authentication-Results: mail.example.com;
221             # spf=fail smtp.mail=somespammer@example.com
222 25         102 $arfheaders->{'authres'} = $1;
223              
224             } elsif( $e =~ /\AUser-Agent:[ ]*(.+)\z/ ) {
225             # The header field MUST appear exactly once.
226             # User-Agent: SomeGenerator/1.0
227 66         215 $arfheaders->{'agent'} = $1;
228              
229             } elsif( $e =~ /\A(?:Received|Arrival)-Date:[ ]*(.+)\z/ ) {
230             # Arrival-Date header is optional and MUST NOT appear more than
231             # once.
232             # Received-Date: Thu, 29 Apr 2010 00:00:00 JST
233             # Arrival-Date: Thu, 29 Apr 2010 00:00:00 +0000
234 51         209 $arfheaders->{'date'} = $1;
235              
236             } elsif( $e =~ /\AReporting-MTA:[ ]*dns;[ ]*(.+)\z/ ) {
237             # The header is optional and MUST NOT appear more than once.
238             # Reporting-MTA: dns; mx.example.jp
239 0         0 $commondata->{'rhost'} = $1;
240              
241             } elsif( $e =~ /\ASource-I[Pp]:[ ]*(.+)\z/ ) {
242             # The header is optional and MUST NOT appear more than once.
243             # Source-IP: 192.0.2.45
244 46         151 $arfheaders->{'rhost'} = $1;
245              
246             } elsif( $e =~ /\AOriginal-Mail-From:[ ]*(.+)\z/ ) {
247             # the header is optional and MUST NOT appear more than once.
248             # Original-Mail-From:
249 50   33     385 $commondata->{'from'} ||= Sisimai::Address->s3s4($1);
250             }
251             } # End of if: rfc822
252             }
253              
254 81 50 66     491 if( ($arfheaders->{'feedbacktype'} eq 'auth-failure' ) && $arfheaders->{'authres'} ) {
255             # Append the value of Authentication-Results header
256 15         69 $commondata->{'diagnosis'} .= ' '.$arfheaders->{'authres'}
257             }
258              
259 81 100       204 unless( $recipients ) {
260             # The original recipient address was not found
261 30 100       171 if( $rfc822part =~ /^To: (.+[@].+)$/m ) {
262             # pick the address from To: header in message/rfc822 part.
263 15         59 $dscontents->[-1]->{'recipient'} = Sisimai::Address->s3s4($1);
264              
265             } else {
266             # Insert pseudo recipient address when there is no valid recipient
267             # address in the message.
268 15         125 $dscontents->[-1]->{'recipient'} = Sisimai::Address->undisclosed('r');
269             }
270 30         69 $recipients = 1;
271             }
272              
273 81 50       288 unless( $rfc822part =~ /\bFrom: [^ ]+[@][^ ]+\b/ ) {
274             # There is no "From:" header in the original message
275             # Append the value of "Original-Mail-From" value as a sender address.
276 81 100       340 $rfc822part .= 'From: '.$commondata->{'from'}."\n" if $commondata->{'from'};
277             }
278              
279 81 100       331 if( $mhead->{'subject'} =~ /complaint about message from (\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3})/ ) {
280             # Microsoft ARF: remote host address.
281 15         65 $arfheaders->{'rhost'} = $1;
282             $commondata->{'diagnosis'} = sprintf(
283             "This is a Microsoft email abuse report for an email message received from IP %s on %s",
284 15         87 $arfheaders->{'rhost'}, $mhead->{'date'});
285             }
286              
287 81         208 for my $e ( @$dscontents ) {
288             # AOL = http://forums.cpanel.net/f43/aol-brutal-work-71473.html
289 116 100       362 $e->{'recipient'} = Sisimai::Address->s3s4($rcptintext) if $e->{'recipient'} =~ /\A[^ ]+[@]\z/;
290 116   66     1606 $e->{ $_ } ||= $arfheaders->{ $_ } for keys %$arfheaders;
291 116         269 delete $e->{'authres'};
292              
293 116         254 $e->{'softbounce'} = -1;
294 116   33     479 $e->{'diagnosis'} ||= $commondata->{'diagnosis'};
295 116         609 $e->{'diagnosis'} = Sisimai::String->sweep($e->{'diagnosis'});
296 116   66     422 $e->{'date'} ||= $mhead->{'date'};
297 116         219 $e->{'reason'} = 'feedback';
298 116         183 $e->{'command'} = '';
299 116         178 $e->{'action'} = '';
300 116         155 $e->{'agent'} = 'Feedback-Loop';
301              
302             # Get the remote IP address from the message body
303 116 100       312 next if $e->{'rhost'};
304 20 50       129 if( $commondata->{'rhost'} ) {
    50          
305             # The value of "Reporting-MTA" header
306 0         0 $e->{'rhost'} = $commondata->{'rhost'};
307              
308             } elsif( $e->{'diagnosis'} =~ /\breceived from IP address ([^ ]+)/ ) {
309             # This is an email abuse report for an email message received
310             # from IP address 24.64.1.1 on Thu, 29 Apr 2010 00:00:00 +0000
311 0         0 $e->{'rhost'} = $1;
312             }
313             }
314 81         597 return { 'ds' => $dscontents, 'rfc822' => $rfc822part };
315             }
316              
317             1;
318             __END__