File Coverage

blib/lib/Mail/DeliveryStatus/BounceParser.pm
Criterion Covered Total %
statement 414 484 85.5
branch 208 270 77.0
condition 349 417 83.6
subroutine 31 32 96.8
pod 9 16 56.2
total 1011 1219 82.9


line stmt bran cond sub pod time code
1             package Mail::DeliveryStatus::BounceParser;
2              
3             =head1 NAME
4              
5             Mail::DeliveryStatus::BounceParser - Perl extension to analyze bounce messages
6              
7             =head1 SYNOPSIS
8              
9             use Mail::DeliveryStatus::BounceParser;
10              
11             # $message is \*io or $fh or "entire\nmessage" or \@lines
12             my $bounce = eval { Mail::DeliveryStatus::BounceParser->new($message); };
13              
14             if ($@) {
15             # couldn't parse.
16             }
17              
18             my @addresses = $bounce->addresses; # email address strings
19             my @reports = $bounce->reports; # Mail::Header objects
20             my $orig_message_id = $bounce->orig_message_id; #
21             my $orig_message = $bounce->orig_message; # Mail::Internet object
22              
23             =head1 ABSTRACT
24              
25             Mail::DeliveryStatus::BounceParser analyzes RFC822 bounce messages and returns
26             a structured description of the addresses that bounced and the reason they
27             bounced; it also returns information about the original returned message
28             including the Message-ID. It works best with RFC1892 delivery reports, but
29             will gamely attempt to understand any bounce message no matter what MTA
30             generated it.
31              
32             =head1 DESCRIPTION
33              
34             Meng Wong wrote this for the Listbox v2 project; good mailing list managers
35             handle bounce messages so listowners don't have to. The best mailing list
36             managers figure out exactly what is going on with each subscriber so the
37             appropriate action can be taken.
38              
39             =cut
40              
41 49     49   983177 use 5.006;
  49         182  
42 49     49   252 use strict;
  49         91  
  49         1122  
43 49     49   228 use warnings;
  49         97  
  49         2752  
44              
45             our $VERSION = '1.541';
46             $VERSION = eval $VERSION;
47              
48 49     49   47209 use MIME::Parser;
  49         6607329  
  49         1920  
49 49     49   28399 use Mail::DeliveryStatus::Report;
  49         116  
  49         1494  
50 49     49   242 use vars qw($EMAIL_ADDR_REGEX);
  49         83  
  49         178740  
51              
52             $EMAIL_ADDR_REGEX = qr{
53             # Avoid using something like Email::Valid
54             # Full rfc(2)822 compliance isn't exactly what we want, and this seems to work
55             # for most real world cases
56             (?:<|^|\s) # Space, or the start of a string
57             ([^\s\/<]+ # some non-space, non-/ characters; none are <
58             \@ # at sign (duh)
59             (?:[-\w]+\.)+[-\w]+) # word characters or hypens organized into
60             # at least two dot-separated words
61             (?:$|\s|>) # then the end
62             }sx;
63              
64             my $Not_An_Error = qr/
65             \b delayed \b
66             | \b warning \b
67             | transient.{0,20}\serror
68             | Your \s message .{0,100} was \s delivered \s to \s the \s following \s recipient
69             /six;
70              
71             # added "permanent fatal errors" - fix for bug #41874
72             my $Really_An_Error = qr/this is a permanent error|permanent fatal errors/i;
73              
74             my $Returned_Message_Below = qr/(
75             (?:original|returned) \s message \s (?:follows|below)
76             | (?: this \s is \s a \s copy \s of
77             | below \s this \s line \s is \s a \s copy
78             ) .{0,100} \s message\.?
79             | message \s header \s follows
80             | ^ (?:return-path|received|from):
81             )\s+/sixm;
82              
83             my @Preprocessors = qw(
84             p_ims
85             p_aol_senderblock
86             p_novell_groupwise
87             p_plain_smtp_transcript
88             p_xdelivery_status
89             );
90              
91             =head2 parse
92              
93             my $bounce = Mail::DeliveryStatus::BounceParser->parse($message, \%arg);
94              
95             OPTIONS. If you pass BounceParser->new(..., {log=>sub { ... }}) That will be
96             used as a logging callback.
97              
98             NON-BOUNCES. If the message is recognizably a vacation autoresponse, or is a
99             report of a transient nonfatal error, or a spam or virus autoresponse, you'll
100             still get back a C<$bounce>, but its C<< $bounce->is_bounce() >> will return
101             false.
102              
103             It is possible that some bounces are not really bounces; such as
104             anything that apears to have a 2XX status code. To include such
105             non-bounces in the reports, pass the option {report_non_bounces=>1}.
106              
107             For historical reasons, C is an alias for the C method.
108              
109             =cut
110              
111             sub parse {
112 132     132 1 110733 my ($class, $data, $arg) = @_;
113             # my $bounce = Mail::DeliveryStatus::BounceParser->new( \*STDIN | $fh |
114             # "entire\nmessage" | ["array","of","lines"] );
115              
116 132         1471 my $parser = MIME::Parser->new;
117 132         23807 $parser->output_to_core(1);
118 132         1774 $parser->decode_headers(1);
119              
120 132         4721 my $message;
121              
122 132 50       841 if (not $data) {
    50          
    0          
123 0 0       0 print STDERR "BounceParser: expecting bounce mesage on STDIN\n" if -t STDIN;
124 0         0 $message = $parser->parse(\*STDIN);
125             } elsif (not ref $data) {
126 132         766 $message = $parser->parse_data($data);
127             } elsif (ref $data eq "ARRAY") {
128 0         0 $message = $parser->parse_data($data);
129             } else {
130 0         0 $message = $parser->parse($data);
131             }
132              
133             my $self = bless {
134             reports => [],
135             is_bounce => 1,
136             log => $arg->{log},
137             parser => $parser,
138             orig_message_id => undef,
139             prefer_final_recipient => $arg->{prefer_final_recipient},
140 132         3909415 }, $class;
141              
142 132 50       637 $self->log(
    100          
143             "received message with type "
144             . (defined($message->effective_type) ? $message->effective_type : "undef")
145             . ", subject "
146             . (defined($message->head->get("subject")) ? $message->head->get("subject") : "CAN'T GET SUBJECT")
147             );
148              
149             # before we even start to analyze the bounce, we recognize certain special
150             # cases, and rewrite them to be intelligible to us
151 132         661 foreach my $preprocessor (@Preprocessors) {
152 660 100       35928 if (my $newmessage = $self->$preprocessor($message)) {
153 6         33 $message = $newmessage;
154             }
155             }
156              
157 132         15410 $self->{message} = $message;
158              
159 132 100       493 $self->log(
160             "now the message is type "
161             . $message->effective_type
162             . ", subject "
163             . (defined($message->head->get("subject")) ? $message->head->get("subject") : "CAN'T GET SUBJECT")
164             );
165              
166 132         715 my $first_part = _first_non_multi_part($message);
167              
168             # Deal with some common C/R systems like TMDA
169             {
170 132 50 33     453 last unless ($message->head->get("x-delivery-agent")
171             and $message->head->get("X-Delivery-Agent") =~ /^TMDA/);
172 0         0 $self->log("looks like a challenge/response autoresponse; ignoring.");
173 0         0 $self->{type} = "Challenge / Response system autoreply";
174 0         0 $self->{is_bounce} = 0;
175 0         0 return $self;
176             }
177              
178             {
179 132 100 66     460 last unless ($message->head->get("X-Bluebottle-Request") and $first_part->stringify_body =~ /This account is protected by Bluebottle/);
  132         481  
180 1         1389 $self->log("looks like a challenge/response autoresponse; ignoring.");
181 1         5 $self->{type} = "Challenge / Response system autoreply";
182 1         2 $self->{is_bounce} = 0;
183 1         5 return $self;
184             }
185              
186             {
187 132 100 100     4042 last unless defined $first_part and $first_part->stringify_body =~ /Your server requires confirmation/;
  131         967  
188 1         1523 $self->log("Looks like a challenge/response autoresponse; ignoring.");
189 1         4 $self->{type} = "Challenge / Response system autoreply";
190 1         2 $self->{is_bounce} = 0;
191 1         7 return $self;
192             }
193              
194             {
195 131 100 100     4110 last unless defined $first_part and $first_part->stringify_body =~ /Please add yourself to my Boxbe Guest List/;
  130         894  
196 1         2028 $self->log("Looks like a challenge/response autoresponse; ignoring.");
197 1         5 $self->{type} = "Challenge / Response system autoreply";
198 1         2 $self->{is_bounce} = 0;
199             }
200              
201             {
202 130 100 100     208436 last unless defined $first_part and $first_part->stringify_body =~ /This\s+is\s+a\s+one-time\s+automated\s+message\s+to\s+confirm\s+that\s+you're\s+listed\s+on\s+my\s+Boxbe\s+Guest\s+List/;
  130         833  
203 1         1738 $self->log("Looks like a challenge/response autoresponse; ignoring.");
204 1         4 $self->{type} = "Challenge / Response system autoreply";
205 1         3 $self->{is_bounce} = 0;
206             }
207              
208             # we'll deem autoreplies to be usually less than a certain size.
209              
210             # Some vacation autoreplies are (sigh) multipart/mixed, with an additional
211             # part containing a pointless disclaimer; some are multipart/alternative,
212             # with a pointless HTML part saying the exact same thing. (Messages in
213             # this latter category have the decency to self-identify with things like
214             # ' 215             # 5.5.2653.12">', so we know to avoid such software in future.) So look
216             # at the first part of a multipart message (recursively, down the tree).
217              
218             {
219 130 100       195321 last if $message->effective_type eq 'multipart/report';
  130         544  
220 19 100 66     2579 last if !$first_part || $first_part->effective_type ne 'text/plain';
221 17         2090 my $string = $first_part->as_string;
222 17 100       32477 last if length($string) > 3000;
223             # added return receipt (fix for bug #41870)
224 14 100       3975 last if $string !~ /auto.{0,20}(reply|response)|return receipt|vacation|(out|away|on holiday).*office/i;
225 1         4 $self->log("looks like a vacation autoreply, ignoring.");
226 1         4 $self->{type} = "vacation autoreply";
227 1         3 $self->{is_bounce} = 0;
228 1         6 return $self;
229             }
230              
231             # vacation autoreply tagged in the subject
232             {
233 130 100       195194 last if $message->effective_type eq 'multipart/report';
  129         494  
234 18 100 66     2399 last if !$first_part || $first_part->effective_type ne 'text/plain';
235 16         1837 my $subject = $message->head->get('Subject');
236 16 100       637 last if !defined($subject);
237 15 100       96 last if $subject !~ /^AUTO/;
238 1 50       5 last if $subject !~ /is out of the office/;
239 1         4 $self->log("looks like a vacation autoreply, ignoring.");
240 1         4 $self->{type} = "vacation autoreply";
241 1         2 $self->{is_bounce} = 0;
242 1         6 return $self;
243             }
244              
245             # Polish auto-reply
246             {
247 129 100       19656 last if $message->effective_type eq 'multipart/report';
  128         470  
248 17 100 66     2112 last if !$first_part || $first_part->effective_type ne 'text/plain';
249 15         1809 my $subject = $message->head->get('Subject');
250 15 100       598 last if !defined($subject);
251 14 100       91 last if $subject !~ /Automatyczna\s+odpowied/;
252 1         13 $self->log("looks like a polish autoreply, ignoring.");
253 1         5 $self->{type} = "polish autoreply";
254 1         3 $self->{is_bounce} = 0;
255 1         9 return $self;
256             }
257              
258             # "Email address changed but your message has been forwarded"
259             {
260 128 100       18276 last if $message->effective_type eq 'multipart/report';
  127         18856  
  127         455  
261 16 100 66     2310 last if !$first_part || $first_part->effective_type ne 'text/plain';
262 14         1576 my $string = $first_part->as_string;
263 14 100       32791 last if length($string) > 3000;
264 12 50       2254 last if $string
265             !~ /(address .{0,60} changed | domain .{0,40} retired) .*
266             (has\s*been|was|have|will\s*be) \s* (forwarded|delivered)/six;
267 0         0 $self->log('looks like an address-change autoreply, ignoring');
268 0         0 $self->{type} = 'informational address-change autoreply';
269 0         0 $self->{is_bounce} = 0;
270 0         0 return $self;
271             }
272              
273             # Network Associates WebShield SMTP V4.5 MR1a on cpwebshield intercepted a
274             # mail from which caused the Content Filter
275             # Block extension COM to be triggered.
276 127 50 100     18561 if ($message->effective_type eq "text/plain"
      66        
277             and (length $message->as_string) < 3000
278             and $message->bodyhandle->as_string
279             =~ m/norton\sassociates\swebshield|content\s+filter/ix
280             ) {
281 0         0 $self->log("looks like a virus/spam block, ignoring.");
282 0         0 $self->{type} = "virus/spam false positive";
283 0         0 $self->{is_bounce} = 0;
284 0         0 return $self;
285             }
286              
287             # nonfatal errors usually say they're transient
288              
289 127 50 66     43212 if ($message->effective_type eq "text/plain"
290             and $message->bodyhandle->as_string =~ /transient.*error/is) {
291 0         0 $self->log("seems like a nonfatal error, ignoring.");
292 0         0 $self->{is_bounce} = 0;
293 0         0 return $self;
294             }
295              
296             # nonfatal errors usually say they're transient, but sometimes they do it
297             # straight out and sometimes it's wrapped in a multipart/report.
298             #
299             # Be careful not to examine a returned body for the transient-only signature:
300             # $Not_An_Error can match the single words 'delayed' and 'warning', which
301             # could quite reasonably occur in the body of the returned message. This
302             # also means it's worth additionally checking for a regex that gives a very
303             # strong indication that the error was permanent.
304             {
305 127         20081 my $part_for_maybe_transient;
  127         241  
306 127 100       442 $part_for_maybe_transient = $message
307             if $message->effective_type eq "text/plain";
308             ($part_for_maybe_transient)
309 127 100 100     19559 = grep { $_->effective_type eq "text/plain" } $message->parts
  9         2000  
310             if $message->effective_type =~ /multipart/
311             && $message->effective_type ne 'multipart/report';
312              
313 127 100       37803 if ($part_for_maybe_transient) {
314 13         52 my $string = $part_for_maybe_transient->bodyhandle->as_string;
315 13         190 my $transient_pos = _match_position($string, $Not_An_Error);
316 13 100       72 last unless defined $transient_pos;
317 1         3 my $permanent_pos = _match_position($string, $Really_An_Error);
318 1         4 my $orig_msg_pos = _match_position($string, $Returned_Message_Below);
319 1 50       5 last if _position_before($permanent_pos, $orig_msg_pos);
320 0 0       0 if (_position_before($transient_pos, $orig_msg_pos)) {
321 0         0 $self->log("transient error, ignoring.");
322 0         0 $self->{is_bounce} = 0;
323 0         0 return $self;
324             }
325             }
326             }
327              
328             # In all cases we will read the message body to try to pull out a message-id.
329 127 100       499 if ($message->effective_type =~ /multipart/) {
330             # "Internet Mail Service" sends multipart/mixed which still has a
331             # message/rfc822 in it
332 116 100       18626 if (
333             my ($orig_message) =
334 341         27463 grep { $_->effective_type eq "message/rfc822" } $message->parts
335             ) {
336             # see MIME::Entity regarding REPLACE
337 95         10629 my $orig_message_id = $orig_message->parts(0)->head->get("message-id");
338 95 100       4547 if ($orig_message_id) {
339 94         1412 $orig_message_id =~ s/(\r|\n)*$//g;
340 94         715 $self->log("extracted original message-id [$orig_message_id] from the original rfc822/message");
341             } else {
342 1         5 $self->log("Couldn't extract original message-id from the original rfc822/message");
343             }
344 95         334 $self->{orig_message_id} = $orig_message_id;
345 95         411 $self->{orig_message} = $orig_message->parts(0);
346             }
347              
348             # todo: handle pennwomen-la@v2.listbox.com/200209/19/1032468832.1444_1.frodo
349             # which is a multipart/mixed containing an application/tnef instead of a
350             # message/rfc822. yow!
351              
352 116 100 100     3724 if (! $self->{orig_message_id}
353             and
354             my ($rfc822_headers) =
355 59         5135 grep { lc $_->effective_type eq "text/rfc822-headers" } $message->parts
356             ) {
357 16         1749 my $orig_head = Mail::Header->new($rfc822_headers->body);
358 16         45604 my $message_id = $orig_head->get("message-id");
359 16 100       560 if ($message_id) {
360 15         60 chomp ($self->{orig_message_id} = $orig_head->get("message-id"));
361 15         555 $self->{orig_header} = $orig_head;
362 15         113 $self->log("extracted original message-id $self->{orig_message_id} from text/rfc822-headers");
363             }
364             }
365             }
366              
367 127 100       2436 if (! $self->{orig_message_id}) {
368 18 100 100     91 if ($message->bodyhandle and $message->bodyhandle->as_string =~ /Message-ID: (\S+)/i) {
369 5         176 $self->{orig_message_id} = $1;
370 5         42 $self->log("found a message-id $self->{orig_message_id} in the body.");
371             }
372             }
373              
374 127 100       809 if (! $self->{orig_message_id}) {
375 13         49 $self->log("couldn't find original message id.");
376             }
377              
378             #
379             # try to extract email addresses to identify members.
380             # we will also try to extract reasons as much as we can.
381             #
382              
383 127 100       529 if ($message->effective_type eq "multipart/report") {
    100          
    100          
384             my ($delivery_status) =
385 111         18801 grep { $_->effective_type eq "message/delivery-status" } $message->parts;
  332         27316  
386              
387 111         12068 my %global = ("reporting-mta" => undef, "arrival-date" => undef);
388              
389 111         207 my ($seen_action_expanded, $seen_action_failed);
390              
391             # Some MTAs generate malformed multipart/report messages with no
392             # message/delivery-status part; don't die in such cases.
393             my $delivery_status_body
394 111   100     230 = eval { $delivery_status->bodyhandle->as_string } || '';
395              
396             # Used to be \n\n, but now we allow any number of newlines between
397             # individual per-recipient fields to deal with stupid bug with the IIS SMTP
398             # service. RFC1894 (2.1, 2.3) is not 100% clear about whether more than
399             # one line is allowed - it just says "preceded by a blank line". We very
400             # well may put an upper bound on this in the future.
401             #
402             # See t/iis-multiple-bounce.t
403 111         2165 foreach my $para (split /\n{2,}/, $delivery_status_body) {
404              
405             # See t/surfcontrol-extra-newline.t - deal with bug #21249
406 135         2999 $para =~ s/\A\n+//g;
407             # added the following line as part of fix for #41874
408 135         784 $para =~ s/\r/ /g;
409              
410 135         1777 my $report = Mail::DeliveryStatus::Report->new([split /\n/, $para]);
411              
412             # Removed a $report->combine here - doesn't seem to work without a tag
413             # anyway... not sure what that was for. - wby 20060823
414              
415             # Unfold so message doesn't wrap over multiple lines
416 135         93048 $report->unfold;
417              
418             # Some MTAs send unsought delivery-status notifications indicating
419             # success; others send RFC1892/RFC3464 delivery status notifications
420             # for transient failures.
421 135 100 66     7454 if (defined $report->get('Action') and lc $report->get('Action')) {
422 113         412 my $action = lc $report->get('Action');
423 113         309 $action =~ s/^\s+//;
424 113 50       736 if ($action =~ s/^\s*([a-z]+)\b.*/$1/s) {
425             # In general, assume that anything other than 'failed' is a
426             # non-bounce; but 'expanded' is handled after the end of this
427             # foreach loop, because it might be followed by another
428             # per-recipient group that says 'failed'.
429 113 50       583 if ($action eq 'expanded') {
    100          
430 0         0 $seen_action_expanded = 1;
431             } elsif ($action eq 'failed') {
432 112         251 $seen_action_failed = 1;
433             } else {
434 1         6 $self->log("message/delivery-status says 'Action: \L$1'");
435 1         3 $self->{type} = 'delivery-status \L$1';
436 1         3 $self->{is_bounce} = 0;
437 1         13 return $self;
438             }
439             }
440             }
441              
442 134         387 for my $hdr (qw(Reporting-MTA Arrival-Date)) {
443 268   100     15637 my $val = $global{$hdr} ||= $report->get($hdr);
444 268 50       801 if (defined($val)) {
445 268         955 $report->replace($hdr => $val)
446             }
447             }
448              
449 134         13700 my $email;
450              
451 134 100       474 if ($self->{prefer_final_recipient}) {
452 2   66     8 $email = $report->get("final-recipient")
453             || $report->get("original-recipient");
454             } else {
455 132   100     469 $email = $report->get("original-recipient")
456             || $report->get("final-recipient");
457             }
458              
459 134 100       615 next unless $email;
460              
461             # $self->log("email = \"$email\"") if $DEBUG > 3;
462              
463             # Diagnostic-Code: smtp; 550 5.1.1 User unknown
464 112         430 my $reason = $report->get("diagnostic-code");
465              
466 112         564 $email =~ s/[^;]+;\s*//; # strip leading RFC822; or LOCAL; or system;
467 112 50       505 if (defined $reason) {
468 112         526 $reason =~ s/[^;]+;\s*//; # strip leading X-Postfix;
469             }
470              
471 112         445 $email = _cleanup_email($email);
472              
473 112         481 $report->replace(email => $email);
474 112 50       12250 if (defined $reason) {
475 112         393 $report->replace(reason => $reason);
476             } else {
477 0         0 $report->delete("reason");
478             }
479              
480 112         12857 my $status = $report->get('Status');
481 112 50       402 $report->replace(Status => $status) if $status =~ s/ \(permanent failure\)$//;
482              
483 112 50       326 if ($status) {
484             # RFC 1893... prefer Status: if it exists and is something we know
485             # about
486             # Not 100% sure about 5.1.0...
487 112 100       1177 if ($status =~ /^5\.1\.[01]$/) {
    100          
    100          
    100          
    100          
488 9         31 $report->replace(std_reason => "user_unknown");
489             } elsif ($status eq "5.1.2") {
490 1         4 $report->replace(std_reason => "domain_error");
491             } elsif ($status eq "5.2.1") {
492 2         7 $report->replace(std_reason => "user_disabled");
493             } elsif ($status eq "5.2.2") {
494 1         4 $report->replace(std_reason => "over_quota");
495             } elsif ($status eq "5.4.4") {
496 1         3 $report->replace(std_reason => "domain_error");
497             } else {
498 98         399 $report->replace(
499             std_reason => _std_reason($report->get("diagnostic-code"))
500             );
501             }
502             } else {
503 0         0 $report->replace(
504             std_reason => _std_reason($report->get("diagnostic-code"))
505             );
506             }
507 112         12950 my $diag_code = $report->get("diagnostic-code");
508              
509 112         376 my $host;
510 112 50       510 if (defined $diag_code) {
511 112         532 ($host) = $diag_code =~ /\bhost\s+(\S+)/;
512             }
513              
514 112 100       560 $report->replace(host => ($host)) if $host;
515              
516 112         1606 my ($code);
517              
518 112 50       321 if (defined $diag_code) {
519 112         935 ($code) = $diag_code =~
520             m/ ( ( [245] \d{2} ) \s | \s ( [245] \d{2} ) (?!\.) ) /x;
521             }
522              
523 112 50 66     621 if (!$code && $status && $status =~ /\A([245])\.?([0-9])\.?([0-9])/) {
      66        
524 13         57 $code = "$1$2$3";
525             }
526              
527 112 50       344 if ($code) {
528 112         378 $report->replace(smtp_code => $code);
529             }
530              
531 112 100       12017 if (not $report->get("host")) {
532 99         418 my $email = $report->get("email");
533 99 50       430 if (defined $email) {
534 99         461 my $host = ($email =~ /\@(.+)/)[0];
535 99 100       487 $report->replace(host => $host) if $host;
536             }
537             }
538              
539 112 100 66     10545 if ($report->get("smtp_code") and ($report->get("smtp_code") =~ /^2../)) {
540 1         5 $self->log(
541             "smtp code is "
542             . $report->get("smtp_code")
543             . "; no_problemo."
544             );
545              
546             }
547              
548 112 50       412 unless ($arg->{report_non_bounces}) {
549 112 50       393 if ($report->get("std_reason") eq "no_problemo") {
550 0         0 $self->log(
551             "not actually a bounce: " . $report->get("diagnostic-code")
552             );
553 0         0 next;
554             }
555             }
556              
557 112         215 push @{$self->{reports}},
  112         722  
558             Mail::DeliveryStatus::Report->new([ split /\n/, $report->as_string ]
559             );
560             }
561              
562 110 50 33     128963 if ($seen_action_expanded && !$seen_action_failed) {
563             # We've seen at least one 'Action: expanded' DSN-field, but no
564             # 'Action: failed'
565 0         0 $self->log(q[message/delivery-status says 'Action: expanded']);
566 0         0 $self->{type} = 'delivery-status expanded';
567 0         0 $self->{is_bounce} = 0;
568 0         0 return $self;
569             }
570              
571             } elsif ($message->effective_type =~ /multipart/) {
572             # but not a multipart/report. look through each non-message/* section.
573             # See t/corpus/exchange.unknown.msg
574              
575 5 100       1405 my @delivery_status_parts = grep { $_->effective_type =~ m{text/plain}
  9         877  
576             and not $_->is_multipart
577             } $message->parts;
578              
579             # $self->log("error parts: @{[ map { $_->bodyhandle->as_string }
580             # @delivery_status_parts ]}") if $DEBUG > 3;
581              
582 5         797 push @{$self->{reports}}, $self->_extract_reports(@delivery_status_parts);
  5         29  
583              
584             } elsif ($message->effective_type =~ m{text/plain}) {
585             # handle plain-text responses
586              
587             # This used to just take *any* part, even if the only part wasn't a
588             # text/plain part
589             #
590             # We may have to specifically allow some other types, but in my testing, all
591             # the messages that get here and are actual bounces are text/plain
592             # wby - 20060907
593              
594             # they usually say "returned message" somewhere, and we can split on that,
595             # above and below.
596 10   50     3030 my $body_string = $message->bodyhandle->as_string || '';
597              
598 10 100       1198 if ($body_string =~ $Returned_Message_Below) {
    50          
599 5         29 my ($stuff_before, $stuff_splitted, $stuff_after) =
600             split $Returned_Message_Below, $message->bodyhandle->as_string, 2;
601             # $self->log("splitting on \"$stuff_splitted\", " . length($stuff_before)
602             # . " vs " . length($stuff_after) . " bytes.") if $DEBUG > 3;
603 5         541 push @{$self->{reports}}, $self->_extract_reports($stuff_before);
  5         38  
604 5         27 $self->{orig_text} = $stuff_after;
605             } elsif ($body_string =~ /(.+)\n\n(.+?Message-ID:.+)/is) {
606 0         0 push @{$self->{reports}}, $self->_extract_reports($1);
  0         0  
607 0         0 $self->{orig_text} = $2;
608             } else {
609 5         12 push @{$self->{reports}}, $self->_extract_reports($body_string);
  5         33  
610 5         24 $self->{orig_text} = $body_string;
611             }
612             }
613 126         1746 return $self;
614             }
615              
616 49     49   224879 BEGIN { *new = \&parse };
617              
618             =head2 log
619              
620             $bounce->log($messages);
621              
622             If a logging callback has been given, the message will be passed to it.
623              
624             =cut
625              
626             sub log {
627 415     415 1 85496 my ($self, @log) = @_;
628 415 50       2072 if (ref $self->{log} eq "CODE") {
629 0         0 $self->{log}->(@_);
630             }
631 415         1108 return 1;
632             }
633              
634             sub _extract_reports {
635 15     15   49 my $self = shift;
636             # input: either a list of MIME parts, or just a chunk of text.
637              
638 15 50       89 if (@_ > 1) { return map { _extract_reports($_) } @_ }
  0         0  
  0         0  
639              
640 15         42 my $text = shift;
641              
642 15 100       64 $text = $text->bodyhandle->as_string if ref $text;
643              
644 15         50 my %by_email;
645              
646             # we'll assume that the text is made up of:
647             # blah blah 0
648             # email@address 1
649             # blah blah 1
650             # email@address 2
651             # blah blah 2
652             #
653              
654             # we'll break it up accordingly, and first try to detect a reason for email 1
655             # in section 1; if there's no reason returned, we'll look in section 0. and
656             # we'll keep going that way for each address.
657              
658 15 100       57 return unless $text;
659 13         590 my @split = split($EMAIL_ADDR_REGEX, $text);
660              
661 13         70 foreach my $i (0 .. $#split) {
662             # only interested in the odd numbered elements, which are the email
663             # addressess.
664 39 100       151 next if $i % 2 == 0;
665              
666 13         87 my $email = _cleanup_email($split[$i]);
667              
668 13 50       87 if ($split[$i-1] =~ /they are not accepting mail from/) {
669             # aol airmail sender block
670 0         0 next;
671             }
672              
673 13 100       71 if($split[$i-1] =~ /A message sent by/) {
674             # sender block
675 1         3 next;
676             }
677              
678 12         25 my $std_reason = "unknown";
679 12 50       91 $std_reason = _std_reason($split[$i+1]) if $#split > $i;
680 12 100       73 $std_reason = _std_reason($split[$i-1]) if $std_reason eq "unknown";
681              
682             # todo:
683             # if we can't figure out the reason, if we're in the delivery-status part,
684             # go back up into the text part and try extract_report() on that.
685              
686             next if (
687             exists $by_email{$email}
688             and $by_email{$email}->{std_reason}
689 12 50 66     74 ne "unknown" and $std_reason eq "unknown"
      33        
690             );
691              
692 12         34 my $reason = $split[$i-1];
693 12         42 $reason =~ s/(.*?). (Your mail to the following recipients could not be delivered)/$2/;
694              
695 12         97 $self->log("extracted a reason [$reason]");
696 12         144 $by_email{$email} = {
697             email => $email,
698             raw => join ("", @split[$i-1..$i+1]),
699             std_reason => $std_reason,
700             reason => $reason
701             };
702             }
703              
704 13         30 my @toreturn;
705              
706 13         49 foreach my $email (keys %by_email) {
707 11         146 my $report = Mail::DeliveryStatus::Report->new();
708 11         556 $report->modify(1);
709 11         231 $report->header_hashref($by_email{$email});
710 11         10053 push @toreturn, $report;
711             }
712              
713 13         81 return @toreturn;
714             }
715              
716             =head2 is_bounce
717              
718             if ($bounce->is_bounce) { ... }
719              
720             This method returns true if the bounce parser thought the message was a bounce,
721             and false otherwise.
722              
723             =cut
724              
725 124     124 1 86292 sub is_bounce { return shift->{is_bounce}; }
726              
727             =head2 reports
728              
729             Each $report returned by $bounce->reports() is basically a Mail::Header object
730             with a few modifications. It includes the email address bouncing, and the
731             reason for the bounce.
732              
733             Consider an RFC1892 error report of the form
734              
735             Reporting-MTA: dns; hydrant.pobox.com
736             Arrival-Date: Fri, 4 Oct 2002 16:49:32 -0400 (EDT)
737              
738             Final-Recipient: rfc822; bogus3@dumbo.pobox.com
739             Action: failed
740             Status: 5.0.0
741             Diagnostic-Code: X-Postfix; host dumbo.pobox.com[208.210.125.24] said: 550
742             : Nonexistent Mailbox
743              
744             Each "header" above is available through the usual get() mechanism.
745              
746             print $report->get('reporting_mta'); # 'some.host.com'
747             print $report->get('arrival-date'); # 'Fri, 4 Oct 2002 16:49:32 -0400 (EDT)'
748             print $report->get('final-recipient'); # 'rfc822; bogus3@dumbo.pobox.com'
749             print $report->get('action'); # "failed"
750             print $report->get('status'); # "5.0.0"
751             print $report->get('diagnostic-code'); # X-Postfix; ...
752              
753             # BounceParser also inserts a few interpretations of its own:
754             print $report->get('email'); # 'bogus3@dumbo.pobox.com'
755             print $report->get('std_reason'); # 'user_unknown'
756             print $report->get('reason'); # host [199.248.185.2] said: 550 5.1.1 unknown or illegal user: somebody@uss.com
757             print $report->get('host'); # dumbo.pobox.com
758             print $report->get('smtp_code'); # 550
759              
760             print $report->get('raw') || # the original unstructured text
761             $report->as_string; # the original structured text
762              
763             Probably the two most useful fields are "email" and "std_reason", the
764             standardized reason. At this time BounceParser returns the following
765             standardized reasons:
766              
767             user_unknown
768             over_quota
769             user_disabled
770             domain_error
771             spam
772             message_too_large
773             unknown
774             no_problemo
775              
776             The "spam" standard reason indicates that the message bounced because
777             the recipient considered it spam.
778              
779             (no_problemo will only appear if you set {report_non_bounces=>1})
780              
781             If the bounce message is not structured according to RFC1892,
782             BounceParser will still try to return as much information as it can;
783             in particular, you can count on "email" and "std_reason" to be
784             present.
785              
786             =cut
787              
788 153     153 1 2311 sub reports { return @{shift->{reports}} }
  153         588  
789              
790             =head2 addresses
791              
792             Returns a list of the addresses which appear to be bouncing. Each member of
793             the list is an email address string of the form 'foo@bar.com'.
794              
795             =cut
796              
797 41     41 1 989 sub addresses { return map { $_->get("email") } shift->reports; }
  44         255  
798              
799             =head2 orig_message_id
800              
801             If possible, returns the message-id of the original message as a string.
802              
803             =cut
804              
805 5     5 1 32 sub orig_message_id { return shift->{orig_message_id}; }
806              
807             =head2 orig_message
808              
809             If the original message was included in the bounce, it'll be available here as
810             a message/rfc822 MIME entity.
811              
812             my $orig_message = $bounce->orig_message;
813              
814             =cut
815              
816 2     2 1 14 sub orig_message { return shift->{orig_message} }
817              
818             =head2 orig_header
819              
820             If only the original headers were returned in the text/rfc822-headers chunk,
821             they'll be available here as a Mail::Header entity.
822              
823             =cut
824              
825 1     1 1 6 sub orig_header { return shift->{orig_header} }
826              
827             =head2 orig_text
828              
829             If the bounce message was poorly structured, the above two methods won't return
830             anything --- instead, you get back a block of text that may or may not
831             approximate the original message. No guarantees. Good luck.
832              
833             =cut
834              
835 1     1 1 6 sub orig_text { return shift->{orig_text} }
836              
837             =head1 CAVEATS
838              
839             Bounce messages are generally meant to be read by humans, not computers. A
840             poorly formatted bounce message may fool BounceParser into spreading its net
841             too widely and returning email addresses that didn't actually bounce. Before
842             you do anything with the email addresses you get back, confirm that it makes
843             sense that they might be bouncing --- for example, it doesn't make sense for
844             the sender of the original message to show up in the addresses list, but it
845             could if the bounce message is sufficiently misformatted.
846              
847             Still, please report all bugs!
848              
849             =head1 FREE-FLOATING ANXIETY
850              
851             Some bizarre MTAs construct bounce messages using the original headers of the
852             original message. If your application relies on the assumption that all
853             Message-IDs are unique, you need to watch out for these MTAs and program
854             defensively; before doing anything with the Message-ID of a bounce message,
855             first confirm that you haven't already seen it; if you have, change it to
856             something else that you make up on the spot, such as
857             "".
858              
859             =head1 BUGS
860              
861             BounceParser assumes a sanely constructed bounce message. Input from the real
862             world may cause BounceParser to barf and die horribly when we violate one of
863             MIME::Entity's assumptions; this is why you should always call it inside an
864             eval { }.
865              
866             =head2 TODO
867              
868             Provide some translation of the SMTP and DSN error codes into English. Review
869             RFC1891 and RFC1893.
870              
871             =head1 KNOWN TO WORK WITH
872              
873             We understand bounce messages generated by the following MTAs / organizations:
874              
875             Postfix
876             Sendmail
877             Exim
878             AOL
879             Yahoo
880             Hotmail
881             AOL's AirMail sender-blocking
882             Microsoft Exchange*
883             Qmail*
884             Novell Groupwise*
885              
886             * Items marked with an asterisk currently may return incomplete information.
887              
888             =head1 SEE ALSO
889              
890             Used by http://listbox.com/ --- if you like BounceParser and you know it,
891             consider Listbox for your mailing list needs!
892              
893             SVN repository and email list information at:
894             http://emailproject.perl.org/
895              
896             RFC1892 and RFC1894
897              
898             =head1 RANDOM OBSERVATION
899              
900             Schwern's modules have the Alexandre Dumas property.
901              
902             =head1 AUTHOR
903              
904             Original author: Meng Weng Wong, Emengwong+bounceparser@pobox.comE
905              
906             Current maintainer: Ricardo SIGNES, Erjbs@cpan.orgE
907              
908             Massive contributions to the 1.5xx series were made by William Yardley and
909             Michael Stevens. Ricardo mostly just helped out and managed releases.
910              
911             =head1 COPYRIGHT AND LICENSE
912              
913             Copyright (C) 2003-2006, IC Group, Inc.
914             pobox.com permanent email forwarding with spam filtering
915             listbox.com mailing list services for announcements and discussion
916              
917             This library is free software; you can redistribute it and/or modify
918             it under the same terms as Perl itself.
919              
920             =head1 WITH A SHOUT OUT TO
921              
922             coraline, Fletch, TorgoX, mjd, a-mused, Masque, gbarr,
923             sungo, dngor, and all the other hoopy froods on #perl
924              
925             =cut
926              
927             sub _std_reason {
928 116     116   273 local $_ = shift;
929              
930 116 50       456 if (!defined $_) {
931 0         0 return "unknown";
932             }
933              
934 116 100       1260 if (/(?:domain|host|service)\s+(?:not\s+found|unknown|not\s+known)/i) {
935 1         4 return "domain_error"
936             }
937              
938 115 100       522 if (/sorry,\s+that\s+domain\s+isn't\s+in\s+my\s+list\s+of\s+allowed\s+rcpthosts/i) {
939 1         5 return "domain_error";
940             }
941              
942 114 100 33     4597 if (
      33        
      66        
      66        
      66        
      66        
      100        
      100        
      100        
      100        
943             /try.again.later/is or
944             /mailbox\b.*\bfull/ or
945             /storage/i or
946             /quota/i or
947             /\s552\s/ or
948             /\s#?5\.2\.2\s/ or # rfc 1893
949             /User\s+mailbox\s+exceeds\s+allowed\s+size/i or
950             /Mailbox\s+size\s+limit\s+exceeded/i or
951             /max\s+message\s+size\s+exceeded/i or
952             /Benutzer\s+hat\s+zuviele\s+Mails\s+auf\s+dem\s+Server/i or
953             /exceeded\s+its\s+disk\s+space\s+limit/i
954             ) {
955 6         42 return "over_quota";
956             }
957              
958 108         627 my $user_re =
959             qr'(?: mailbox | user | recipient | address (?: ee)?
960             | customer | account | e-?mail | ? )'ix;
961              
962 108 100 100     66445 if (
      100        
      66        
      66        
      33        
      33        
      66        
      66        
      100        
      100        
      66        
      100        
      66        
      66        
      66        
      66        
      66        
      33        
      66        
      66        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
      66        
      100        
      100        
      66        
      100        
      100        
      100        
      66        
      100        
      100        
      100        
963             /\s \(? \#? 5\.1\.[01] \)? \s/x or # rfc 1893
964             /$user_re\s+(?:\S+\s+)? (?:is\s+)? # Generic
965             (?: (?: un|not\s+) (?: known | recognized )
966             | [dw]oes\s?n[o']?t
967             (?: exist|found ) | disabled | expired ) /ix or
968             /no\s+(?:such)\s+?$user_re/i or # Gmail and other (mofified for bug #41874)
969             /unrouteable address/i or # bug #41874
970             /inactive user/i or # Outblaze
971             /unknown local part/i or # Exim(?)
972             /user\s+doesn't\s+have\s+a/i or # Yahoo!
973             /account\s+has\s+been\s+(?:disabled|suspended)/i or # Yahoo!
974             /$user_re\s+(?:suspended|discontinued)/i or # everyone.net / other?
975             /unknown\s+$user_re/i or # Generic
976             /$user_re\s+(?:is\s+)?(?:inactive|unavailable)/i or # Hotmail, others?
977             /(?:(?:in|not\s+a\s+)?valid|no such)\s$user_re/i or # Various
978             /$user_re\s+(?:was\s+)?not\s+found/i or # AOL, generic
979             /$user_re \s+ (?:is\s+)? (?:currently\s+)? # ATT, generic
980             (?:suspended|unavailable)/ix or
981             /address is administratively disabled/i or # Unknown
982             /no $user_re\s+(?:here\s+)?by that name/i or # Unknown
983             /? is invalid/i or # Unknown
984             /address.*not known here/i or # Unknown
985             /recipient\s+(?:address\s+)?rejected/i or # Cox, generic
986             /not\s+listed\s+in\s+Domino/i or # Domino
987             /account not activated/i or # usa.net
988             /not\s+our\s+customer/i or # Comcast
989             /doesn't handle mail for that user/i or # mailfoundry
990             /$user_re\s+does\s+not\s+exist/i or
991             /Recipient\s+?\s+does\s+not\s+exist/i or
992             /recipient\s+no\s+longer\s+on\s+server/i or # me.com
993             /is\s+not\s+a\s+known\s+user\s+on\s+this\s+system/i or # cam.ac.uk
994             /Rcpt\s+?\s+does\s+not\s+exist/i or
995             /Mailbox\s+not\s+available/i or
996             /No\s+mailbox\s+found/i or
997             /?\s+is\s+a\s+deactivated\s+mailbox/i or
998             /Recipient\s+does\s+not\s+exist\s+on\s+this\s+system/i or
999             /user\s+mail-box\s+not\s+found/i or
1000             /No\s+mail\s+box\s+available\s+for\s+this\s+user/i or
1001             /User\s+\[\S+\]\s+does\s+not\s+exist/i or
1002             /email\s+account\s+that\s+you\s+tried\s+to\s+reach\s+is\s+disabled/i or
1003             /not\s+an\s+active\s+address\s+at\s+this\s+host/i or
1004             /not\s+a\s+known\s+user/i or
1005             /BAD_RECIPIENT/i
1006             ) {
1007 30         214 return "user_unknown";
1008             }
1009              
1010 78 100 33     3964 if (
      33        
      33        
      33        
      66        
      100        
      66        
      66        
      66        
      100        
      100        
1011             /domain\s+syntax/i or
1012             /timed\s+out/i or
1013             /route\s+to\s+host/i or
1014             /connection\s+refused/i or
1015             /no\s+data\s+record\s+of\s+requested\s+type/i or
1016             /Malformed name server reply/i or
1017             /as\s+a\s+relay,\s+but\s+I\s+have\s+not\s+been\s+configured\s+to\s+let/i or
1018             /550\s+relay\s+not\s+permitted/i or
1019             /550\s+relaying\s+denied/i or
1020             /Relay\s+access\s+denied/i or
1021             /Relaying\s+denied/i or
1022             /No\s+such\s+domain\s+at\s+this\s+location/i
1023             ) {
1024 8         91 return "domain_error";
1025             }
1026              
1027 70 100 100     8118 if (
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      66        
      66        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
1028             /Blocked\s+by\s+SpamAssassin/i or
1029             /spam\s+rejection/i or
1030             /identified\s+SPAM,\s+message\s+permanently\s+rejected/i or
1031             /Mail\s+appears\s+to\s+be\s+unsolicited/i or
1032             /Message\s+rejected\s+as\s+spam\s+by\s+Content\s+Filtering/i or
1033             /message\s+looks\s+like\s+SPAM\s+to\s+me/i or
1034             /NOT\s+JUNKEMAILFILTER/i or
1035             /your\s+message\s+has\s+triggered\s+a\s+SPAM\s+block/i or
1036             /Spam\s+detected/i or
1037             /Message\s+looks\s+like\s+spam/i or
1038             /Message\s+content\s+rejected,\s+UBE/i or
1039             /Blocked\s+using\s+spam\s+pattern/i or
1040             /Client\s+host\s+\S+\s+blocked\s+using/i or
1041             /breaches\s+local\s+URIBL\s+policy/i or
1042             /Your\s+email\s+had\s+spam-like\s+header\s+contents/i or
1043             /detected\s+as\s+spam/i or
1044             /Denied\s+due\s+to\s+spam\s+list/i or
1045             /appears\s+to\s+be\s+unsolicited/i or
1046             /antispam\s+checks/i or
1047             /Probable\s+Spam/i or
1048             /ESETS_SMTP\s+\(spam\)/i or
1049             /this\s+message\s+appears\s+to\s+be\s+spam/i or
1050             /Spam\s+score\s+\(\S+\)\s+too\s+high/i or
1051             /matches\s+a\s+profile\s+the\s+Internet\s+community\s+may\s+consider\s+spam/i or
1052             /accepted\s+due\s+to\s+spam\s+filter/i or
1053             /content\s+filter\s+rejection/i or
1054             /using\s+a\s+mass\s+mailer/i or
1055             /Spam\s+email/i or
1056             /Spam\s+content/i or
1057             (/CONTENT\s+REJECT/i and /dspam\s+check/i) or
1058             /this\s+email\s+is\s+spam/i or
1059             /rejected\s+as\s+spam/i or
1060             /MCSpamSignature/i or
1061             /identified\s+as\s+spam/i or
1062             /Spamming\s+not\s+allowed/i or
1063             /classified\s+as\s+spam/i or
1064             /Message\s+refused\s+by\s+MailMarshal\s+SpamProfiler/i or
1065             /Your\s+email\s+appears\s+similar\s+to\s+spam/i or
1066             /This\s+message\s+scored\s+\S+\s+spam\s+points\s+and\s+has\s+been\s+rejected/i or
1067             /Spam\s+Blocked/i or
1068             /bulk\s+e?mail/i or
1069             /probably\s+spam/i or
1070             /appears\s+to\s+be\s+SPAM/i or
1071             /SPAM NOT ACCEPTED/i or
1072             /5.9.8\s+spam/i
1073             ) {
1074 47         427 return "spam";
1075             }
1076              
1077 23 100 100     482 if (
      100        
      100        
      100        
1078             /RESOLVER.RST.RecipSizeLimit/i or
1079             /exceeds\s+size\s+limit/i or
1080             /Message\s+too\s+big/i or
1081             /RESOLVER.RST.SendSizeLimit/i or
1082             /Message\s+Rejected\s+Class=size/i
1083             ) {
1084 6         73 return "message_too_large";
1085             }
1086              
1087 17         302 return "unknown";
1088             }
1089              
1090             # ---------------------------------------------------------------------
1091             # preprocessors
1092             # ---------------------------------------------------------------------
1093              
1094             sub p_ims {
1095 132     132 0 294 my $self = shift;
1096 132         273 my $message = shift;
1097              
1098             # Mangle Exchange messages into a format we like better
1099             # see t/corpus/exchange.unknown.msg
1100              
1101             return
1102 132 100 100     491 unless ($message->head->get("X-Mailer")||'') =~ /Internet Mail Service/i;
1103              
1104 1 50       48 if ($message->is_multipart) {
1105             return unless my ($error_part)
1106 1 50       154 = grep { $_->effective_type eq "text/plain" } $message->parts;
  2         253  
1107              
1108 1 50       112 return unless my ($actual_error)
1109             = $error_part->as_string
1110             =~ /did not reach the following recipient\S+\s*(.*)/is;
1111              
1112 1 50       1477 if (my $io = $error_part->open("w")) {
1113 1         68 $io->print($actual_error);
1114 1         9 $io->close;
1115             }
1116              
1117             } else {
1118              
1119 0 0       0 return unless my ($actual_error)
1120             = $message->bodyhandle->as_string
1121             =~ /did not reach the following recipient\S+\s*(.*)/is;
1122              
1123 0         0 my ($stuff_before, $stuff_after)
1124             = split /^(?=Message-ID:|Received:)/m, $message->bodyhandle->as_string;
1125              
1126 0         0 $stuff_before =~ s/.*did not reach the following recipient.*?$//ism;
1127 0         0 $self->log("rewrote IMS into plain/report.");
1128 0         0 return $self->new_plain_report($message, $stuff_before, $stuff_after);
1129             }
1130              
1131 1         11 return $message;
1132             }
1133              
1134             sub p_aol_senderblock {
1135 132     132 0 308 my $self = shift;
1136 132         309 my $message = shift;
1137              
1138 132 100 100     582 return unless ($message->head->get("Mailer")||'') =~ /AirMail/i;
1139 2 50       84 return unless $message->effective_type eq "text/plain";
1140 2 50       170 return unless $message->bodyhandle->as_string =~ /Your mail to the following recipients could not be delivered because they are not accepting mail/i;
1141              
1142 2         43 my ($host) = $message->head->get("From") =~ /\@(\S+)>/;
1143              
1144 2         74 my $rejector;
1145             my @new_output;
1146 2         9 for (split /\n/, $message->bodyhandle->as_string) {
1147              
1148             # "Sorry luser@example.com. Your mail to the...
1149             # Get rid of this so that the module doesn't create a report for
1150             # *your* address.
1151 6         42 s/Sorry \S+?@\S+?\.//g;
1152              
1153 6 50       20 if (/because they are not accepting mail from (\S+?):?/i) {
1154 0         0 $rejector = $1;
1155 0         0 push @new_output, $_;
1156 0         0 next;
1157             }
1158 6 100       23 if (/^\s*(\S+)\s*$/) {
1159 2         6 my $recipient = $1;
1160 2 50       8 if ($recipient =~ /\@/) {
1161 0         0 push @new_output, $_;
1162 0         0 next;
1163             }
1164 2         16 s/^(\s*)(\S+)(\s*)$/$1$2\@$host$3/;
1165 2         4 push @new_output, $_;
1166 2         6 next;
1167             }
1168 4         9 push @new_output, $_;
1169 4         8 next;
1170             }
1171              
1172 2         7 push @new_output, ("# rewritten by BounceParser: p_aol_senderblock()", "");
1173 2 50       10 if (my $io = $message->open("w")) {
1174 2         133 $io->print(join "\n", @new_output);
1175 2         15 $io->close;
1176             }
1177 2         19 return $message;
1178             }
1179              
1180             sub p_novell_groupwise {
1181              
1182             # renamed from p_novell_groupwise_5_2 - hopefully we can deal with most / all
1183             # versions and create test cases / fixes when we can't
1184             #
1185             # See t/various-unknown.t and t/corpus/novell-*.msg for some recent examples.
1186              
1187 132     132 0 293 my $self = shift;
1188 132         268 my $message = shift;
1189              
1190 132 100 100     472 return unless ($message->head->get("X-Mailer")||'') =~ /Novell Groupwise/i;
1191 1 50       45 return unless $message->effective_type eq "multipart/mixed";
1192             return unless my ($error_part)
1193 1 50       143 = grep { $_->effective_type eq "text/plain" } $message->parts;
  1         11  
1194              
1195 1         165 my ($host) = $message->head->get("From") =~ /\@(\S+)>?/;
1196              
1197             # A lot of times, Novell returns just the LHS; this makes it difficult /
1198             # impossible in many cases to guess the recipient address. MBP makes an
1199             # attempt here.
1200 1         41 my @new_output;
1201 1         6 for (split /\n/, $error_part->bodyhandle->as_string) {
1202 3 100       25 if (/^(\s*)(\S+)(\s+\(.*\))$/) {
1203 1         5 my ($space, $recipient, $reason) = ($1, $2, $3);
1204 1 50       6 if ($recipient =~ /\@/) {
1205 1         3 push @new_output, $_;
1206 1         3 next;
1207             }
1208 0         0 $_ = join "", $space, "$2\@$host", $reason;
1209 0         0 push @new_output, $_; next;
  0         0  
1210             }
1211 2         4 push @new_output, $_; next;
  2         4  
1212             }
1213              
1214 1         5 push @new_output,
1215             ("# rewritten by BounceParser: p_novell_groupwise()", "");
1216              
1217 1 50       5 if (my $io = $error_part->open("w")) {
1218 1         75 $io->print(join "\n", @new_output);
1219 1         8 $io->close;
1220             }
1221 1         11 return $message;
1222             }
1223              
1224             sub p_plain_smtp_transcript {
1225 132     132 0 305 my ($self, $message) = (shift, shift);
1226              
1227             # sometimes, we have a proper smtp transcript;
1228             # that means we have enough information to mark the message up into a proper
1229             # multipart/report!
1230             #
1231             # pennwomen-la@v2.listbox.com/200209/19/1032468752.1444_1.frodo
1232             # The original message was received at Thu, 19 Sep 2002 13:51:36 -0700 (MST)
1233             # from daemon@localhost
1234             #
1235             # ----- The following addresses had permanent fatal errors -----
1236             #
1237             # (expanded from: )
1238             #
1239             # ----- Transcript of session follows -----
1240             # ... while talking to smtp-local.primenet.com.:
1241             # >>> RCPT To:
1242             # <<< 550 ... User unknown
1243             # 550 ... User unknown
1244             # ----- Message header follows -----
1245             #
1246             # what we'll do is mark it back up into a proper multipart/report.
1247              
1248 132 100       480 return unless $message->effective_type eq "text/plain";
1249              
1250 16 100       1792 return unless $message->bodyhandle->as_string
1251             =~ /The following addresses had permanent fatal errors/;
1252              
1253 2 50       28 return unless $message->bodyhandle->as_string
1254             =~ /Transcript of session follows/;
1255              
1256 2 50       27 return unless $message->bodyhandle->as_string =~ /Message .* follows/;
1257              
1258 2         31 my ($stuff_before, $stuff_after)
1259             = split /^.*Message (?:header|body) follows.*$/im,
1260             $message->bodyhandle->as_string, 2;
1261              
1262 2         79 my %by_email = $self->_analyze_smtp_transcripts($stuff_before);
1263              
1264 2         7 my @paras = _construct_delivery_status_paras(\%by_email);
1265              
1266 2         6 my @new_output;
1267 2         7 my ($reporting_mta) = _cleanup_email($message->head->get("From")) =~ /\@(\S+)/;
1268              
1269 2         11 chomp (my $arrival_date = $message->head->get("Date"));
1270              
1271 2 50       74 push @new_output, "Reporting-MTA: $reporting_mta" if $reporting_mta;
1272 2 50       11 push @new_output, "Arrival-Date: $arrival_date" if $arrival_date;
1273 2         6 push @new_output, "";
1274 2         4 push @new_output, map { @$_, "" } @paras;
  2         8  
1275              
1276 2         18 return $self->new_multipart_report(
1277             $message,
1278             $stuff_before,
1279             join("\n", @new_output),
1280             $stuff_after
1281             );
1282             }
1283              
1284             sub _construct_delivery_status_paras {
1285 2     2   4 my %by_email = %{shift()};
  2         5  
1286              
1287 2         4 my @new_output;
1288              
1289 2         9 foreach my $email (sort keys %by_email) {
1290             # Final-Recipient: RFC822; robinbw@aol.com
1291             # Action: failed
1292             # Status: 2.0.0
1293             # Remote-MTA: DNS; air-xj03.mail.aol.com
1294             # Diagnostic-Code: SMTP; 250 OK
1295             # Last-Attempt-Date: Thu, 19 Sep 2002 16:53:10 -0400 (EDT)
1296              
1297             push @new_output, [
1298             "Final-Recipient: RFC822; $email",
1299             "Action: failed",
1300             "Status: 5.0.0",
1301 2 50       15 ($by_email{$email}->{host} ? ("Remote-MTA: DNS; $by_email{$email}->{host}") : ()),
1302             _construct_diagnostic_code(\%by_email, $email),
1303             ];
1304              
1305             }
1306              
1307 2         6 return @new_output;
1308             }
1309              
1310             sub _construct_diagnostic_code {
1311 2     2   4 my %by_email = %{shift()};
  2         5  
1312 2         4 my $email = shift;
1313             join (" ",
1314             "Diagnostic-Code: X-BounceParser;",
1315             ($by_email{$email}->{'host'} ? "host $by_email{$email}->{'host'} said:" : ()),
1316             ($by_email{$email}->{'smtp_code'}),
1317 2 50       12 (join ", ", @{ $by_email{$email}->{'errors'} || [] }));
  2 50       18  
1318             }
1319              
1320             sub _analyze_smtp_transcripts {
1321 2     2   5 my $self = shift;
1322 2         5 my $plain_smtp_transcript = shift;
1323              
1324 2         4 my (%by_email, $email, $smtp_code, @error_strings, $host);
1325              
1326             # parse the text part for the actual SMTP transcript
1327 2         130 for (split /\n\n|(?=>>>)/, $plain_smtp_transcript) {
1328 6 50       24 $email = _cleanup_email($1) if /RCPT TO:\s*(\S+)/im;
1329              
1330 6 100       132 if (/The\s+following\s+addresses\s+had\s+permanent\s+fatal\s+errors\s+-----\s+\?/im) {
1331 2         9 $email = _cleanup_email($1);
1332             }
1333              
1334 6 100       23 $by_email{$email}->{host} = $host if $email;
1335              
1336 6 100       24 if (/while talking to (\S+)/im) {
1337 2         4 $host = $1;
1338 2         8 $host =~ s/[.:;]+$//g;
1339             }
1340              
1341 6 100       21 if (/<<< (\d\d\d) (.*)/m) {
1342 2         7 $by_email{$email}->{smtp_code} = $1;
1343 2         5 push @{$by_email{$email}->{errors}}, $2;
  2         11  
1344             }
1345              
1346 6 50       26 if (/^(\d\d\d)\b.*(<\S+\@\S+>)\.*\s+(.+)/m) {
1347 0         0 $email = _cleanup_email($2);
1348 0         0 $by_email{$email}->{smtp_code} = $1;
1349 0         0 push @{$by_email{$email}->{errors}}, $3;
  0         0  
1350             }
1351             }
1352 2         6 delete $by_email{''};
1353 2         9 return %by_email;
1354             }
1355              
1356             # ------------------------------------------------------------
1357              
1358             sub new_plain_report {
1359 0     0 0 0 my ($self, $message, $error_text, $orig_message) = @_;
1360              
1361 0         0 $orig_message =~ s/^\s+//;
1362              
1363 0         0 my $newmessage = $message->dup();
1364 0         0 $newmessage->make_multipart("plain-report");
1365 0         0 $newmessage->parts([]);
1366 0         0 $newmessage->attach(Type => "text/plain", Data => $error_text);
1367              
1368 0         0 my $orig_message_mime = MIME::Entity->build(Type => "multipart/transitory");
1369              
1370 0         0 $orig_message_mime->add_part($self->{parser}->parse_data($orig_message));
1371              
1372 0         0 $orig_message_mime->head->mime_attr("content-type" => "message/rfc822");
1373 0         0 $newmessage->add_part($orig_message_mime);
1374              
1375 0         0 $self->log("created new plain-report message.");
1376              
1377 0         0 return $newmessage;
1378             }
1379              
1380             # ------------------------------------------------------------
1381              
1382             sub new_multipart_report {
1383 2     2 0 7 my ($self, $message, $error_text, $delivery_status, $orig_message) = @_;
1384              
1385 2         17 $orig_message =~ s/^\s+//;
1386              
1387 2         11 my $newmessage = $message->dup();
1388 2         1009 $newmessage->make_multipart("report");
1389 2         3765 $newmessage->parts([]);
1390 2         39 $newmessage->attach(
1391             Type => "text/plain",
1392             Data => $error_text
1393             );
1394 2         1957 $newmessage->attach(
1395             Type => "message/delivery-status",
1396             Data => $delivery_status
1397             );
1398              
1399 2         2213 my $orig_message_mime
1400             = MIME::Entity->build(Type => "multipart/transitory", Top => 0);
1401              
1402 2         16588 $orig_message_mime->add_part($self->{parser}->parse_data($orig_message));
1403              
1404 2         12375 $orig_message_mime->head->mime_attr("content-type" => "message/rfc822");
1405 2         512 $newmessage->add_part($orig_message_mime);
1406              
1407 2         24 $self->log("created new multipart-report message.");
1408              
1409 2         22 return $newmessage;
1410             }
1411              
1412             # ------------------------------------------------------------
1413              
1414             sub _cleanup_email {
1415 129     129   321 my $email = shift;
1416 129         388 for ($email) {
1417 129         378 chomp;
1418             # Get rid of parens around addresses like (luser@example.com)
1419             # Got rid of earlier /\(.*\)/ - not sure what that was about - wby
1420 129         527 tr/[()]//d;
1421 129         321 s/^To:\s*//i;
1422 129         411 s/[.:;]+$//;
1423 129         329 s/<(.+)>/$1/;
1424             # IMS hack: c=US;a= ;p=NDC;o=ORANGE;dda:SMTP=slpark@msx.ndc.mc.uci.edu; on
1425             # Thu, 19 Sep...
1426 129         272 s/.*:SMTP=//;
1427 129         287 s/^\s+//;
1428 129         311 s/\s+$//;
1429             # hack to get rid of stuff like "luser@example.com...User"
1430 129         294 s/\.{3}\S+//;
1431             # SMTP:foo@example.com
1432 129         385 s/^SMTP://;
1433             }
1434 129         350 return $email;
1435             }
1436              
1437             sub p_xdelivery_status {
1438 132     132 0 291 my ($self, $message) = @_;
1439              
1440             # This seems to be caused by something called "XWall v3.31", which
1441             # (according to Google) is a "firewall that protects your Exchange
1442             # server from viruses, spam mail and dangerous attachments". Shame it
1443             # doesn't protect the rest of the world from gratuitously broken MIME
1444             # types.
1445              
1446 132         647 for ($message->parts_DFS) {
1447 697 100       80688 $_->effective_type('message/delivery-status')
1448             if $_->effective_type eq 'message/xdelivery-status';
1449             }
1450             }
1451              
1452             sub _first_non_multi_part {
1453 132     132   287 my ($entity) = @_;
1454              
1455 132         244 my $part = $entity;
1456 132   100     587 $part = $part->parts(0) or return while $part->is_multipart;
1457 131         39333 return $part;
1458             }
1459              
1460             sub _position_before {
1461 1     1   3 my ($pos_a, $pos_b) = @_;
1462 1 50 33     18 return 1 if defined($pos_a) && (!defined($pos_b) || $pos_a < $pos_b);
      33        
1463 0         0 return;
1464             }
1465              
1466             # Return the position in $string at which $regex first matches, or undef if
1467             # no match.
1468             sub _match_position {
1469 15     15   38 my ($string, $regex) = @_;
1470 15 100       2805 return $string =~ $regex ? $-[0] : undef;
1471             }
1472              
1473             1;