File Coverage

blib/lib/Mail/DeliveryStatus/BounceParser.pm
Criterion Covered Total %
statement 414 484 85.5
branch 207 270 76.6
condition 350 417 83.9
subroutine 31 32 96.8
pod 9 16 56.2
total 1011 1219 82.9


line stmt bran cond sub pod time code
1             package Mail::DeliveryStatus::BounceParser;
2              
3             =head1 NAME
4              
5             Mail::DeliveryStatus::BounceParser - Perl extension to analyze bounce messages
6              
7             =head1 SYNOPSIS
8              
9             use Mail::DeliveryStatus::BounceParser;
10              
11             # $message is \*io or $fh or "entire\nmessage" or \@lines
12             my $bounce = eval { Mail::DeliveryStatus::BounceParser->new($message); };
13              
14             if ($@) {
15             # couldn't parse.
16             }
17              
18             my @addresses = $bounce->addresses; # email address strings
19             my @reports = $bounce->reports; # Mail::Header objects
20             my $orig_message_id = $bounce->orig_message_id; # <ABCD.1234@mx.example.com>
21             my $orig_message = $bounce->orig_message; # Mail::Internet object
22              
23             =head1 ABSTRACT
24              
25             Mail::DeliveryStatus::BounceParser analyzes RFC822 bounce messages and returns
26             a structured description of the addresses that bounced and the reason they
27             bounced; it also returns information about the original returned message
28             including the Message-ID. It works best with RFC1892 delivery reports, but
29             will gamely attempt to understand any bounce message no matter what MTA
30             generated it.
31              
32             =head1 DESCRIPTION
33              
34             Meng Wong wrote this for the Listbox v2 project; good mailing list managers
35             handle bounce messages so listowners don't have to. The best mailing list
36             managers figure out exactly what is going on with each subscriber so the
37             appropriate action can be taken.
38              
39             =cut
40              
41 34     34   2857054 use 5.006;
  34         381  
42 34     34   162 use strict;
  34         59  
  34         671  
43 34     34   179 use warnings;
  34         53  
  34         1609  
44              
45             our $VERSION = '1.542';
46             $VERSION = eval $VERSION;
47              
48 34     34   18209 use MIME::Parser;
  34         3390228  
  34         1121  
49 34     34   15666 use Mail::DeliveryStatus::Report;
  34         88  
  34         916  
50 34     34   182 use vars qw($EMAIL_ADDR_REGEX);
  34         62  
  34         100978  
51              
52             $EMAIL_ADDR_REGEX = qr{
53             # Avoid using something like Email::Valid
54             # Full rfc(2)822 compliance isn't exactly what we want, and this seems to work
55             # for most real world cases
56             (?:<|^|\s) # Space, or the start of a string
57             ([^\s\/<]+ # some non-space, non-/ characters; none are <
58             \@ # at sign (duh)
59             (?:[-\w]+\.)+[-\w]+) # word characters or hypens organized into
60             # at least two dot-separated words
61             (?:$|\s|>) # then the end
62             }sx;
63              
64             my $Not_An_Error = qr/
65             \b delayed \b
66             | \b warning \b
67             | transient.{0,20}\serror
68             | Your \s message .{0,100} was \s delivered \s to \s the \s following \s recipient
69             /six;
70              
71             # added "permanent fatal errors" - fix for bug #41874
72             my $Really_An_Error = qr/this is a permanent error|permanent fatal errors/i;
73              
74             my $Returned_Message_Below = qr/(
75             (?:original|returned) \s message \s (?:follows|below)
76             | (?: this \s is \s a \s copy \s of
77             | below \s this \s line \s is \s a \s copy
78             ) .{0,100} \s message\.?
79             | message \s header \s follows
80             | ^ (?:return-path|received|from):
81             )\s+/sixm;
82              
83             my @Preprocessors = qw(
84             p_ims
85             p_aol_senderblock
86             p_novell_groupwise
87             p_plain_smtp_transcript
88             p_xdelivery_status
89             );
90              
91             =head2 parse
92              
93             my $bounce = Mail::DeliveryStatus::BounceParser->parse($message, \%arg);
94              
95             OPTIONS. If you pass BounceParser->new(..., {log=>sub { ... }}) That will be
96             used as a logging callback. If C<< $message >> is undefined, will parse STDIN.
97              
98             NON-BOUNCES. If the message is recognizably a vacation autoresponse, or is a
99             report of a transient nonfatal error, or a spam or virus autoresponse, you'll
100             still get back a C<$bounce>, but its C<< $bounce->is_bounce() >> will return
101             false.
102              
103             It is possible that some bounces are not really bounces; such as
104             anything that appears to have a 2XX status code. To include such
105             non-bounces in the reports, pass the option {report_non_bounces=>1}.
106              
107             For historical reasons, C<new> is an alias for the C<parse> method.
108              
109             =cut
110              
111             sub parse {
112 133     133 1 373287 my ($class, $data, $arg) = @_;
113             # my $bounce = Mail::DeliveryStatus::BounceParser->new( \*STDIN | $fh |
114             # "entire\nmessage" | ["array","of","lines"] );
115              
116 133         1221 my $parser = MIME::Parser->new;
117 133         19517 $parser->output_to_core(1);
118 133         1701 $parser->decode_headers(1);
119              
120 133         4683 my $message;
121              
122 133 50       705 if (not $data) {
    50          
    0          
123 0 0       0 print STDERR "BounceParser: expecting bounce message on STDIN\n" if -t STDIN;
124 0         0 $message = $parser->parse(\*STDIN);
125             } elsif (not ref $data) {
126 133         602 $message = $parser->parse_data($data);
127             } elsif (ref $data eq "ARRAY") {
128 0         0 $message = $parser->parse_data($data);
129             } else {
130 0         0 $message = $parser->parse($data);
131             }
132              
133             my $self = bless {
134             reports => [],
135             is_bounce => 1,
136             log => $arg->{log},
137             parser => $parser,
138             orig_message_id => undef,
139             prefer_final_recipient => $arg->{prefer_final_recipient},
140 133         2596015 }, $class;
141              
142 133 50       548 $self->log(
    100          
143             "received message with type "
144             . (defined($message->effective_type) ? $message->effective_type : "undef")
145             . ", subject "
146             . (defined($message->head->get("subject")) ? $message->head->get("subject") : "CAN'T GET SUBJECT")
147             );
148              
149             # before we even start to analyze the bounce, we recognize certain special
150             # cases, and rewrite them to be intelligible to us
151 133         529 foreach my $preprocessor (@Preprocessors) {
152 665 100       30984 if (my $newmessage = $self->$preprocessor($message)) {
153 6         32 $message = $newmessage;
154             }
155             }
156              
157 133         15008 $self->{message} = $message;
158              
159 133 100       449 $self->log(
160             "now the message is type "
161             . $message->effective_type
162             . ", subject "
163             . (defined($message->head->get("subject")) ? $message->head->get("subject") : "CAN'T GET SUBJECT")
164             );
165              
166 133         584 my $first_part = _first_non_multi_part($message);
167              
168             # Deal with some common C/R systems like TMDA
169             {
170 133 50 33     424 last unless ($message->head->get("x-delivery-agent")
171             and $message->head->get("X-Delivery-Agent") =~ /^TMDA/);
172 0         0 $self->log("looks like a challenge/response autoresponse; ignoring.");
173 0         0 $self->{type} = "Challenge / Response system autoreply";
174 0         0 $self->{is_bounce} = 0;
175 0         0 return $self;
176             }
177              
178             {
179 133 100 66     442 last unless ($message->head->get("X-Bluebottle-Request") and $first_part->stringify_body =~ /This account is protected by Bluebottle/);
  133         452  
180 1         1137 $self->log("looks like a challenge/response autoresponse; ignoring.");
181 1         3 $self->{type} = "Challenge / Response system autoreply";
182 1         2 $self->{is_bounce} = 0;
183 1         5 return $self;
184             }
185              
186             {
187 133 100 100     3828 last unless defined $first_part and $first_part->stringify_body =~ /Your server requires confirmation/;
  132         818  
188 1         1313 $self->log("Looks like a challenge/response autoresponse; ignoring.");
189 1         4 $self->{type} = "Challenge / Response system autoreply";
190 1         2 $self->{is_bounce} = 0;
191 1         6 return $self;
192             }
193              
194             {
195 132 100 100     3647 last unless defined $first_part and $first_part->stringify_body =~ /Please add yourself to my Boxbe Guest List/;
  131         681  
196 1         1575 $self->log("Looks like a challenge/response autoresponse; ignoring.");
197 1         4 $self->{type} = "Challenge / Response system autoreply";
198 1         3 $self->{is_bounce} = 0;
199             }
200              
201             {
202 131 100 100     175557 last unless defined $first_part and $first_part->stringify_body =~ /This\s+is\s+a\s+one-time\s+automated\s+message\s+to\s+confirm\s+that\s+you're\s+listed\s+on\s+my\s+Boxbe\s+Guest\s+List/;
  131         701  
203 1         1512 $self->log("Looks like a challenge/response autoresponse; ignoring.");
204 1         4 $self->{type} = "Challenge / Response system autoreply";
205 1         3 $self->{is_bounce} = 0;
206             }
207              
208             # we'll deem autoreplies to be usually less than a certain size.
209              
210             # Some vacation autoreplies are (sigh) multipart/mixed, with an additional
211             # part containing a pointless disclaimer; some are multipart/alternative,
212             # with a pointless HTML part saying the exact same thing. (Messages in
213             # this latter category have the decency to self-identify with things like
214             # '<META NAME="Generator" CONTENT="MS Exchange Server version
215             # 5.5.2653.12">', so we know to avoid such software in future.) So look
216             # at the first part of a multipart message (recursively, down the tree).
217              
218             {
219 131 100       165395 last if $message->effective_type eq 'multipart/report';
  131         491  
220 19 100 66     2418 last if !$first_part || $first_part->effective_type ne 'text/plain';
221 17         1933 my $string = $first_part->as_string;
222 17 100       27396 last if length($string) > 3000;
223             # added return receipt (fix for bug #41870)
224 14 100       2616 last if $string !~ /auto.{0,20}(reply|response)|return receipt|vacation|(out|away|on holiday).*office/i;
225 1         5 $self->log("looks like a vacation autoreply, ignoring.");
226 1         3 $self->{type} = "vacation autoreply";
227 1         2 $self->{is_bounce} = 0;
228 1         5 return $self;
229             }
230              
231             # vacation autoreply tagged in the subject
232             {
233 131 100       165281 last if $message->effective_type eq 'multipart/report';
  130         432  
234 18 100 66     2244 last if !$first_part || $first_part->effective_type ne 'text/plain';
235 16         1737 my $subject = $message->head->get('Subject');
236 16 100       615 last if !defined($subject);
237 15 100       94 last if $subject !~ /^AUTO/;
238 1 50       5 last if $subject !~ /is out of the office/;
239 1         5 $self->log("looks like a vacation autoreply, ignoring.");
240 1         4 $self->{type} = "vacation autoreply";
241 1         2 $self->{is_bounce} = 0;
242 1         5 return $self;
243             }
244              
245             # Polish auto-reply
246             {
247 130 100       19442 last if $message->effective_type eq 'multipart/report';
  129         425  
248 17 100 66     1943 last if !$first_part || $first_part->effective_type ne 'text/plain';
249 15         1688 my $subject = $message->head->get('Subject');
250 15 100       599 last if !defined($subject);
251 14 100       75 last if $subject !~ /Automatyczna\s+odpowied/;
252 1         5 $self->log("looks like a polish autoreply, ignoring.");
253 1         4 $self->{type} = "polish autoreply";
254 1         2 $self->{is_bounce} = 0;
255 1         5 return $self;
256             }
257              
258             # "Email address changed but your message has been forwarded"
259             {
260 129 100       17516 last if $message->effective_type eq 'multipart/report';
  128         17574  
  128         438  
261 16 100 66     1862 last if !$first_part || $first_part->effective_type ne 'text/plain';
262 14         1513 my $string = $first_part->as_string;
263 14 100       24045 last if length($string) > 3000;
264 12 50       1476 last if $string
265             !~ /(address .{0,60} changed | domain .{0,40} retired) .*
266             (has\s*been|was|have|will\s*be) \s* (forwarded|delivered)/six;
267 0         0 $self->log('looks like an address-change autoreply, ignoring');
268 0         0 $self->{type} = 'informational address-change autoreply';
269 0         0 $self->{is_bounce} = 0;
270 0         0 return $self;
271             }
272              
273             # Network Associates WebShield SMTP V4.5 MR1a on cpwebshield intercepted a
274             # mail from <owner-aftermba@v2.listbox.com> which caused the Content Filter
275             # Block extension COM to be triggered.
276 128 50 100     18163 if ($message->effective_type eq "text/plain"
      66        
277             and (length $message->as_string) < 3000
278             and $message->bodyhandle->as_string
279             =~ m/norton\sassociates\swebshield|content\s+filter/ix
280             ) {
281 0         0 $self->log("looks like a virus/spam block, ignoring.");
282 0         0 $self->{type} = "virus/spam false positive";
283 0         0 $self->{is_bounce} = 0;
284 0         0 return $self;
285             }
286              
287             # nonfatal errors usually say they're transient
288              
289 128 50 66     39365 if ($message->effective_type eq "text/plain"
290             and $message->bodyhandle->as_string =~ /transient.*error/is) {
291 0         0 $self->log("seems like a nonfatal error, ignoring.");
292 0         0 $self->{is_bounce} = 0;
293 0         0 return $self;
294             }
295              
296             # nonfatal errors usually say they're transient, but sometimes they do it
297             # straight out and sometimes it's wrapped in a multipart/report.
298             #
299             # Be careful not to examine a returned body for the transient-only signature:
300             # $Not_An_Error can match the single words 'delayed' and 'warning', which
301             # could quite reasonably occur in the body of the returned message. This
302             # also means it's worth additionally checking for a regex that gives a very
303             # strong indication that the error was permanent.
304             {
305 128         19020 my $part_for_maybe_transient;
  128         262  
306 128 100       531 $part_for_maybe_transient = $message
307             if $message->effective_type eq "text/plain";
308             ($part_for_maybe_transient)
309 128 100 100     19491 = grep { $_->effective_type eq "text/plain" } $message->parts
  9         1957  
310             if $message->effective_type =~ /multipart/
311             && $message->effective_type ne 'multipart/report';
312              
313 128 100       36472 if ($part_for_maybe_transient) {
314 13         51 my $string = $part_for_maybe_transient->bodyhandle->as_string;
315 13         162 my $transient_pos = _match_position($string, $Not_An_Error);
316 13 100       75 last unless defined $transient_pos;
317 1         2 my $permanent_pos = _match_position($string, $Really_An_Error);
318 1         3 my $orig_msg_pos = _match_position($string, $Returned_Message_Below);
319 1 50       4 last if _position_before($permanent_pos, $orig_msg_pos);
320 0 0       0 if (_position_before($transient_pos, $orig_msg_pos)) {
321 0         0 $self->log("transient error, ignoring.");
322 0         0 $self->{is_bounce} = 0;
323 0         0 return $self;
324             }
325             }
326             }
327              
328             # In all cases we will read the message body to try to pull out a message-id.
329 128 100       440 if ($message->effective_type =~ /multipart/) {
330             # "Internet Mail Service" sends multipart/mixed which still has a
331             # message/rfc822 in it
332 117 100       18082 if (
333             my ($orig_message) =
334 344         27405 grep { $_->effective_type eq "message/rfc822" } $message->parts
335             ) {
336             # see MIME::Entity regarding REPLACE
337 95         10283 my $orig_message_id = $orig_message->parts(0)->head->get("message-id");
338 95 100       4567 if ($orig_message_id) {
339 94         1093 $orig_message_id =~ s/(\r|\n)*$//g;
340 94         705 $self->log("extracted original message-id [$orig_message_id] from the original rfc822/message");
341             } else {
342 1         5 $self->log("Couldn't extract original message-id from the original rfc822/message");
343             }
344 95         321 $self->{orig_message_id} = $orig_message_id;
345 95         328 $self->{orig_message} = $orig_message->parts(0);
346             }
347              
348             # todo: handle pennwomen-la@v2.listbox.com/200209/19/1032468832.1444_1.frodo
349             # which is a multipart/mixed containing an application/tnef instead of a
350             # message/rfc822. yow!
351              
352 117 100 100     3890 if (! $self->{orig_message_id}
353             and
354             my ($rfc822_headers) =
355 62         4951 grep { lc $_->effective_type eq "text/rfc822-headers" } $message->parts
356             ) {
357 17         1896 my $orig_head = Mail::Header->new($rfc822_headers->body);
358 17         44995 my $message_id = $orig_head->get("message-id");
359 17 50       614 if ($message_id) {
360 17         56 chomp ($self->{orig_message_id} = $orig_head->get("message-id"));
361 17         559 $self->{orig_header} = $orig_head;
362 17         96 $self->log("extracted original message-id $self->{orig_message_id} from text/rfc822-headers");
363             }
364             }
365             }
366              
367 128 100       2380 if (! $self->{orig_message_id}) {
368 17 100 100     87 if ($message->bodyhandle and $message->bodyhandle->as_string =~ /Message-ID: (\S+)/i) {
369 5         135 $self->{orig_message_id} = $1;
370 5         33 $self->log("found a message-id $self->{orig_message_id} in the body.");
371             }
372             }
373              
374 128 100       719 if (! $self->{orig_message_id}) {
375 12         51 $self->log("couldn't find original message id.");
376             }
377              
378             #
379             # try to extract email addresses to identify members.
380             # we will also try to extract reasons as much as we can.
381             #
382              
383 128 100       446 if ($message->effective_type eq "multipart/report") {
    100          
    100          
384             my ($delivery_status) =
385 112         18057 grep { $_->effective_type eq "message/delivery-status" } $message->parts;
  335         26863  
386              
387 112         12051 my %global = ("reporting-mta" => undef, "arrival-date" => undef);
388              
389 112         250 my ($seen_action_expanded, $seen_action_failed);
390              
391             # Some MTAs generate malformed multipart/report messages with no
392             # message/delivery-status part; don't die in such cases.
393             my $delivery_status_body
394 112   100     246 = eval { $delivery_status->bodyhandle->as_string } || '';
395              
396             # Used to be \n\n, but now we allow any number of newlines between
397             # individual per-recipient fields to deal with stupid bug with the IIS SMTP
398             # service. RFC1894 (2.1, 2.3) is not 100% clear about whether more than
399             # one line is allowed - it just says "preceded by a blank line". We very
400             # well may put an upper bound on this in the future.
401             #
402             # See t/iis-multiple-bounce.t
403 112         2041 foreach my $para (split /\n{2,}/, $delivery_status_body) {
404              
405             # See t/surfcontrol-extra-newline.t - deal with bug #21249
406 136         3406 $para =~ s/\A\n+//g;
407             # added the following line as part of fix for #41874
408 136         785 $para =~ s/\r/ /g;
409              
410 136         1589 my $report = Mail::DeliveryStatus::Report->new([split /\n/, $para]);
411              
412             # Removed a $report->combine here - doesn't seem to work without a tag
413             # anyway... not sure what that was for. - wby 20060823
414              
415             # Unfold so message doesn't wrap over multiple lines
416 136         90105 $report->unfold;
417              
418             # Some MTAs send unsought delivery-status notifications indicating
419             # success; others send RFC1892/RFC3464 delivery status notifications
420             # for transient failures.
421 136 100 66     6682 if (defined $report->get('Action') and lc $report->get('Action')) {
422 114         388 my $action = lc $report->get('Action');
423 114         321 $action =~ s/^\s+//;
424 114 50       682 if ($action =~ s/^\s*([a-z]+)\b.*/$1/s) {
425             # In general, assume that anything other than 'failed' is a
426             # non-bounce; but 'expanded' is handled after the end of this
427             # foreach loop, because it might be followed by another
428             # per-recipient group that says 'failed'.
429 114 50       663 if ($action eq 'expanded') {
    100          
430 0         0 $seen_action_expanded = 1;
431             } elsif ($action eq 'failed') {
432 113         260 $seen_action_failed = 1;
433             } else {
434 1         8 $self->log("message/delivery-status says 'Action: \L$1'");
435 1         4 $self->{type} = "delivery-status \L$1";
436 1         3 $self->{is_bounce} = 0;
437 1         12 return $self;
438             }
439             }
440             }
441              
442 135         377 for my $hdr (qw(Reporting-MTA Arrival-Date)) {
443 270   100     15265 my $val = $global{$hdr} ||= $report->get($hdr);
444 270 50       727 if (defined($val)) {
445 270         1078 $report->replace($hdr => $val)
446             }
447             }
448              
449 135         13064 my $email;
450              
451 135 100       452 if ($self->{prefer_final_recipient}) {
452 2   66     6 $email = $report->get("final-recipient")
453             || $report->get("original-recipient");
454             } else {
455 133   100     421 $email = $report->get("original-recipient")
456             || $report->get("final-recipient");
457             }
458              
459 135 100       488 next unless $email;
460              
461             # $self->log("email = \"$email\"") if $DEBUG > 3;
462              
463             # Diagnostic-Code: smtp; 550 5.1.1 User unknown
464 113         316 my $reason = $report->get("diagnostic-code");
465              
466 113         536 $email =~ s/[^;]+;\s*//; # strip leading RFC822; or LOCAL; or system;
467 113 50       362 if (defined $reason) {
468 113         515 $reason =~ s/[^;]+;\s*//; # strip leading X-Postfix;
469             }
470              
471 113         453 $email = _cleanup_email($email);
472              
473 113         699 $report->replace(email => $email);
474 113 50       12089 if (defined $reason) {
475 113         319 $report->replace(reason => $reason);
476             } else {
477 0         0 $report->delete("reason");
478             }
479              
480 113         12109 my $status = $report->get('Status');
481 113 50       444 $report->replace(Status => $status) if $status =~ s/ \(permanent failure\)$//;
482              
483 113 50       332 if ($status) {
484             # RFC 1893... prefer Status: if it exists and is something we know
485             # about
486             # Not 100% sure about 5.1.0...
487 113 100       818 if ($status =~ /^5\.1\.[01]$/) {
    100          
    100          
    100          
    100          
488 9         27 $report->replace(std_reason => "user_unknown");
489             } elsif ($status eq "5.1.2") {
490 1         3 $report->replace(std_reason => "domain_error");
491             } elsif ($status eq "5.2.1") {
492 2         6 $report->replace(std_reason => "user_disabled");
493             } elsif ($status eq "5.2.2") {
494 1         4 $report->replace(std_reason => "over_quota");
495             } elsif ($status eq "5.4.4") {
496 1         2 $report->replace(std_reason => "domain_error");
497             } else {
498 99         299 $report->replace(
499             std_reason => _std_reason($report->get("diagnostic-code"))
500             );
501             }
502             } else {
503 0         0 $report->replace(
504             std_reason => _std_reason($report->get("diagnostic-code"))
505             );
506             }
507 113         13003 my $diag_code = $report->get("diagnostic-code");
508              
509 113         224 my $host;
510 113 50       331 if (defined $diag_code) {
511 113         391 ($host) = $diag_code =~ /\bhost\s+(\S+)/;
512             }
513              
514 113 100       350 $report->replace(host => ($host)) if $host;
515              
516 113         1533 my ($code);
517              
518 113 50       359 if (defined $diag_code) {
519 113         738 ($code) = $diag_code =~
520             m/ ( ( [245] \d{2} ) \s | \s ( [245] \d{2} ) (?!\.) ) /x;
521             }
522              
523 113 50 66     478 if (!$code && $status && $status =~ /\A([245])\.?([0-9])\.?([0-9])/) {
      66        
524 13         55 $code = "$1$2$3";
525             }
526              
527 113 50       329 if ($code) {
528 113         334 $report->replace(smtp_code => $code);
529             }
530              
531 113 100       12048 if (not $report->get("host")) {
532 100         243 my $email = $report->get("email");
533 100 50       305 if (defined $email) {
534 100         419 my $host = ($email =~ /\@(.+)/)[0];
535 100 100       384 $report->replace(host => $host) if $host;
536             }
537             }
538              
539 113 100 66     10163 if ($report->get("smtp_code") and ($report->get("smtp_code") =~ /^2../)) {
540 1         48 $self->log(
541             "smtp code is "
542             . $report->get("smtp_code")
543             . "; no_problemo."
544             );
545              
546             }
547              
548 113 50       351 unless ($arg->{report_non_bounces}) {
549 113 50       327 if ($report->get("std_reason") eq "no_problemo") {
550 0         0 $self->log(
551             "not actually a bounce: " . $report->get("diagnostic-code")
552             );
553 0         0 next;
554             }
555             }
556              
557 113         214 push @{$self->{reports}},
  113         572  
558             Mail::DeliveryStatus::Report->new([ split /\n/, $report->as_string ]
559             );
560             }
561              
562 111 50 33     126711 if ($seen_action_expanded && !$seen_action_failed) {
563             # We've seen at least one 'Action: expanded' DSN-field, but no
564             # 'Action: failed'
565 0         0 $self->log(q[message/delivery-status says 'Action: expanded']);
566 0         0 $self->{type} = 'delivery-status expanded';
567 0         0 $self->{is_bounce} = 0;
568 0         0 return $self;
569             }
570              
571             } elsif ($message->effective_type =~ /multipart/) {
572             # but not a multipart/report. look through each non-message/* section.
573             # See t/corpus/exchange.unknown.msg
574              
575 5 100       1373 my @delivery_status_parts = grep { $_->effective_type =~ m{text/plain}
  9         862  
576             and not $_->is_multipart
577             } $message->parts;
578              
579             # $self->log("error parts: @{[ map { $_->bodyhandle->as_string }
580             # @delivery_status_parts ]}") if $DEBUG > 3;
581              
582 5         760 push @{$self->{reports}}, $self->_extract_reports(@delivery_status_parts);
  5         30  
583              
584             } elsif ($message->effective_type =~ m{text/plain}) {
585             # handle plain-text responses
586              
587             # This used to just take *any* part, even if the only part wasn't a
588             # text/plain part
589             #
590             # We may have to specifically allow some other types, but in my testing, all
591             # the messages that get here and are actual bounces are text/plain
592             # wby - 20060907
593              
594             # they usually say "returned message" somewhere, and we can split on that,
595             # above and below.
596 10   50     2874 my $body_string = $message->bodyhandle->as_string || '';
597              
598 10 100       867 if ($body_string =~ $Returned_Message_Below) {
    50          
599 5         25 my ($stuff_before, $stuff_splitted, $stuff_after) =
600             split $Returned_Message_Below, $message->bodyhandle->as_string, 2;
601             # $self->log("splitting on \"$stuff_splitted\", " . length($stuff_before)
602             # . " vs " . length($stuff_after) . " bytes.") if $DEBUG > 3;
603 5         401 push @{$self->{reports}}, $self->_extract_reports($stuff_before);
  5         28  
604 5         24 $self->{orig_text} = $stuff_after;
605             } elsif ($body_string =~ /(.+)\n\n(.+?Message-ID:.+)/is) {
606 0         0 push @{$self->{reports}}, $self->_extract_reports($1);
  0         0  
607 0         0 $self->{orig_text} = $2;
608             } else {
609 5         12 push @{$self->{reports}}, $self->_extract_reports($body_string);
  5         23  
610 5         32 $self->{orig_text} = $body_string;
611             }
612             }
613 127         1310 return $self;
614             }
615              
616 34     34   136402 BEGIN { *new = \&parse };
617              
618             =head2 log
619              
620             $bounce->log($messages);
621              
622             If a logging callback has been given, the message will be passed to it.
623              
624             =cut
625              
626             sub log {
627 418     418 1 80995 my ($self, @log) = @_;
628 418 50       1619 if (ref $self->{log} eq "CODE") {
629 0         0 $self->{log}->(@_);
630             }
631 418         1032 return 1;
632             }
633              
634             sub _extract_reports {
635 15     15   36 my $self = shift;
636             # input: either a list of MIME parts, or just a chunk of text.
637              
638 15 50       54 if (@_ > 1) { return map { _extract_reports($_) } @_ }
  0         0  
  0         0  
639              
640 15         91 my $text = shift;
641              
642 15 100       58 $text = $text->bodyhandle->as_string if ref $text;
643              
644 15         53 my %by_email;
645              
646             # we'll assume that the text is made up of:
647             # blah blah 0
648             # email@address 1
649             # blah blah 1
650             # email@address 2
651             # blah blah 2
652             #
653              
654             # we'll break it up accordingly, and first try to detect a reason for email 1
655             # in section 1; if there's no reason returned, we'll look in section 0. and
656             # we'll keep going that way for each address.
657              
658 15 100       46 return unless $text;
659 13         443 my @split = split($EMAIL_ADDR_REGEX, $text);
660              
661 13         83 foreach my $i (0 .. $#split) {
662             # only interested in the odd numbered elements, which are the email
663             # addressess.
664 39 100       128 next if $i % 2 == 0;
665              
666 13         53 my $email = _cleanup_email($split[$i]);
667              
668 13 50       107 if ($split[$i-1] =~ /they are not accepting mail from/) {
669             # aol airmail sender block
670 0         0 next;
671             }
672              
673 13 100       53 if($split[$i-1] =~ /A message sent by/) {
674             # sender block
675 1         3 next;
676             }
677              
678 12         27 my $std_reason = "unknown";
679 12 50       67 $std_reason = _std_reason($split[$i+1]) if $#split > $i;
680 12 100       69 $std_reason = _std_reason($split[$i-1]) if $std_reason eq "unknown";
681              
682             # todo:
683             # if we can't figure out the reason, if we're in the delivery-status part,
684             # go back up into the text part and try extract_report() on that.
685              
686             next if (
687             exists $by_email{$email}
688             and $by_email{$email}->{std_reason}
689 12 50 66     64 ne "unknown" and $std_reason eq "unknown"
      33        
690             );
691              
692 12         33 my $reason = $split[$i-1];
693 12         47 $reason =~ s/(.*?). (Your mail to the following recipients could not be delivered)/$2/;
694              
695 12         65 $self->log("extracted a reason [$reason]");
696 12         149 $by_email{$email} = {
697             email => $email,
698             raw => join ("", @split[$i-1..$i+1]),
699             std_reason => $std_reason,
700             reason => $reason
701             };
702             }
703              
704 13         28 my @toreturn;
705              
706 13         47 foreach my $email (keys %by_email) {
707 11         117 my $report = Mail::DeliveryStatus::Report->new();
708 11         518 $report->modify(1);
709 11         150 $report->header_hashref($by_email{$email});
710 11         9242 push @toreturn, $report;
711             }
712              
713 13         66 return @toreturn;
714             }
715              
716             =head2 is_bounce
717              
718             if ($bounce->is_bounce) { ... }
719              
720             This method returns true if the bounce parser thought the message was a bounce,
721             and false otherwise.
722              
723             =cut
724              
725 130     130 1 9680 sub is_bounce { return shift->{is_bounce}; }
726              
727             =head2 reports
728              
729             Each $report returned by $bounce->reports() is basically a Mail::Header object
730             with a few modifications. It includes the email address bouncing, and the
731             reason for the bounce.
732              
733             Consider an RFC1892 error report of the form
734              
735             Reporting-MTA: dns; hydrant.pobox.com
736             Arrival-Date: Fri, 4 Oct 2002 16:49:32 -0400 (EDT)
737              
738             Final-Recipient: rfc822; bogus3@dumbo.pobox.com
739             Action: failed
740             Status: 5.0.0
741             Diagnostic-Code: X-Postfix; host dumbo.pobox.com[208.210.125.24] said: 550
742             <bogus3@dumbo.pobox.com>: Nonexistent Mailbox
743              
744             Each "header" above is available through the usual get() mechanism.
745              
746             print $report->get('reporting_mta'); # 'some.host.com'
747             print $report->get('arrival-date'); # 'Fri, 4 Oct 2002 16:49:32 -0400 (EDT)'
748             print $report->get('final-recipient'); # 'rfc822; bogus3@dumbo.pobox.com'
749             print $report->get('action'); # "failed"
750             print $report->get('status'); # "5.0.0"
751             print $report->get('diagnostic-code'); # X-Postfix; ...
752              
753             # BounceParser also inserts a few interpretations of its own:
754             print $report->get('email'); # 'bogus3@dumbo.pobox.com'
755             print $report->get('std_reason'); # 'user_unknown'
756             print $report->get('reason'); # host [199.248.185.2] said: 550 5.1.1 unknown or illegal user: somebody@uss.com
757             print $report->get('host'); # dumbo.pobox.com
758             print $report->get('smtp_code'); # 550
759              
760             print $report->get('raw') || # the original unstructured text
761             $report->as_string; # the original structured text
762              
763             Probably the two most useful fields are "email" and "std_reason", the
764             standardized reason. At this time BounceParser returns the following
765             standardized reasons:
766              
767             user_unknown
768             over_quota
769             user_disabled
770             domain_error
771             spam
772             message_too_large
773             unknown
774             no_problemo
775              
776             The "spam" standard reason indicates that the message bounced because
777             the recipient considered it spam.
778              
779             (no_problemo will only appear if you set {report_non_bounces=>1})
780              
781             If the bounce message is not structured according to RFC1892,
782             BounceParser will still try to return as much information as it can;
783             in particular, you can count on "email" and "std_reason" to be
784             present.
785              
786             =cut
787              
788 206     206 1 115138 sub reports { return @{shift->{reports}} }
  206         728  
789              
790             =head2 addresses
791              
792             Returns a list of the addresses which appear to be bouncing. Each member of
793             the list is an email address string of the form 'foo@bar.com'.
794              
795             =cut
796              
797 78     78 1 23904 sub addresses { return map { $_->get("email") } shift->reports; }
  84         299  
798              
799             =head2 orig_message_id
800              
801             If possible, returns the message-id of the original message as a string.
802              
803             =cut
804              
805 5     5 1 656 sub orig_message_id { return shift->{orig_message_id}; }
806              
807             =head2 orig_message
808              
809             If the original message was included in the bounce, it'll be available here as
810             a message/rfc822 MIME entity.
811              
812             my $orig_message = $bounce->orig_message;
813              
814             =cut
815              
816 2     2 1 13 sub orig_message { return shift->{orig_message} }
817              
818             =head2 orig_header
819              
820             If only the original headers were returned in the text/rfc822-headers chunk,
821             they'll be available here as a Mail::Header entity.
822              
823             =cut
824              
825 1     1 1 4 sub orig_header { return shift->{orig_header} }
826              
827             =head2 orig_text
828              
829             If the bounce message was poorly structured, the above two methods won't return
830             anything --- instead, you get back a block of text that may or may not
831             approximate the original message. No guarantees. Good luck.
832              
833             =cut
834              
835 1     1 1 4 sub orig_text { return shift->{orig_text} }
836              
837             =head1 CAVEATS
838              
839             Bounce messages are generally meant to be read by humans, not computers. A
840             poorly formatted bounce message may fool BounceParser into spreading its net
841             too widely and returning email addresses that didn't actually bounce. Before
842             you do anything with the email addresses you get back, confirm that it makes
843             sense that they might be bouncing --- for example, it doesn't make sense for
844             the sender of the original message to show up in the addresses list, but it
845             could if the bounce message is sufficiently misformatted.
846              
847             Still, please report all bugs!
848              
849             =head1 FREE-FLOATING ANXIETY
850              
851             Some bizarre MTAs construct bounce messages using the original headers of the
852             original message. If your application relies on the assumption that all
853             Message-IDs are unique, you need to watch out for these MTAs and program
854             defensively; before doing anything with the Message-ID of a bounce message,
855             first confirm that you haven't already seen it; if you have, change it to
856             something else that you make up on the spot, such as
857             "<antibogus-TIMESTAMP-PID-COUNT@LOCALHOST>".
858              
859             =head1 BUGS
860              
861             BounceParser assumes a sanely constructed bounce message. Input from the real
862             world may cause BounceParser to barf and die horribly when we violate one of
863             MIME::Entity's assumptions; this is why you should always call it inside an
864             eval { }.
865              
866             =head2 TODO
867              
868             Provide some translation of the SMTP and DSN error codes into English. Review
869             RFC1891 and RFC1893.
870              
871             =head1 KNOWN TO WORK WITH
872              
873             We understand bounce messages generated by the following MTAs / organizations:
874              
875             Postfix
876             Sendmail
877             Exim
878             AOL
879             Yahoo
880             Hotmail
881             AOL's AirMail sender-blocking
882             Microsoft Exchange*
883             Qmail*
884             Novell Groupwise*
885              
886             * Items marked with an asterisk currently may return incomplete information.
887              
888             =head1 SEE ALSO
889              
890             Used by http://listbox.com/ --- if you like BounceParser and you know it,
891             consider Listbox for your mailing list needs!
892              
893             SVN repository and email list information at:
894             http://emailproject.perl.org/
895              
896             RFC1892 and RFC1894
897              
898             =head1 RANDOM OBSERVATION
899              
900             Schwern's modules have the Alexandre Dumas property.
901              
902             =head1 AUTHOR
903              
904             Original author: Meng Weng Wong, E<lt>mengwong+bounceparser@pobox.comE<gt>
905              
906             Current maintainer: Ricardo SIGNES, E<lt>rjbs@cpan.orgE<gt>
907              
908             Massive contributions to the 1.5xx series were made by William Yardley and
909             Michael Stevens. Ricardo mostly just helped out and managed releases.
910              
911             =head1 COPYRIGHT AND LICENSE
912              
913             Copyright (C) 2003-2006, IC Group, Inc.
914             pobox.com permanent email forwarding with spam filtering
915             listbox.com mailing list services for announcements and discussion
916              
917             This library is free software; you can redistribute it and/or modify
918             it under the same terms as Perl itself.
919              
920             =head1 WITH A SHOUT OUT TO
921              
922             coraline, Fletch, TorgoX, mjd, a-mused, Masque, gbarr,
923             sungo, dngor, and all the other hoopy froods on #perl
924              
925             =cut
926              
927             sub _std_reason {
928 117     117   288 local $_ = shift;
929              
930 117 50       435 if (!defined $_) {
931 0         0 return "unknown";
932             }
933              
934 117 100       1151 if (/(?:domain|host|service)\s+(?:not\s+found|unknown|not\s+known)/i) {
935 1         7 return "domain_error"
936             }
937              
938 116 100       538 if (/sorry,\s+that\s+domain\s+isn't\s+in\s+my\s+list\s+of\s+allowed\s+rcpthosts/i) {
939 1         5 return "domain_error";
940             }
941              
942 115 100 33     2987 if (
      33        
      66        
      66        
      66        
      66        
      100        
      100        
      100        
      100        
943             /try.again.later/is or
944             /mailbox\b.*\bfull/ or
945             /storage/i or
946             /quota/i or
947             /\s552\s/ or
948             /\s#?5\.2\.2\s/ or # rfc 1893
949             /User\s+mailbox\s+exceeds\s+allowed\s+size/i or
950             /Mailbox\s+size\s+limit\s+exceeded/i or
951             /max\s+message\s+size\s+exceeded/i or
952             /Benutzer\s+hat\s+zuviele\s+Mails\s+auf\s+dem\s+Server/i or
953             /exceeded\s+its\s+disk\s+space\s+limit/i
954             ) {
955 6         24 return "over_quota";
956             }
957              
958 109         635 my $user_re =
959             qr'(?: mailbox | user | recipient | address (?: ee)?
960             | customer | account | e-?mail | <? $EMAIL_ADDR_REGEX >? )'ix;
961              
962 109 100 100     46057 if (
      100        
      66        
      66        
      33        
      33        
      66        
      66        
      100        
      100        
      66        
      100        
      66        
      66        
      66        
      66        
      66        
      33        
      66        
      66        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
      66        
      100        
      100        
      66        
      100        
      100        
      100        
      66        
      100        
      100        
      100        
963             /\s \(? \#? 5\.1\.[01] \)? \s/x or # rfc 1893
964             /$user_re\s+(?:\S+\s+)? (?:is\s+)? # Generic
965             (?: (?: un|not\s+) (?: known | recognized )
966             | [dw]oes\s?n[o']?t
967             (?: exist|found ) | disabled | expired ) /ix or
968             /no\s+(?:such)\s+?$user_re/i or # Gmail and other (mofified for bug #41874)
969             /unrouteable address/i or # bug #41874
970             /inactive user/i or # Outblaze
971             /unknown local part/i or # Exim(?)
972             /user\s+doesn't\s+have\s+a/i or # Yahoo!
973             /account\s+has\s+been\s+(?:disabled|suspended)/i or # Yahoo!
974             /$user_re\s+(?:suspended|discontinued)/i or # everyone.net / other?
975             /unknown\s+$user_re/i or # Generic
976             /$user_re\s+(?:is\s+)?(?:inactive|unavailable)/i or # Hotmail, others?
977             /(?:(?:in|not\s+a\s+)?valid|no such)\s$user_re/i or # Various
978             /$user_re\s+(?:was\s+)?not\s+found/i or # AOL, generic
979             /$user_re \s+ (?:is\s+)? (?:currently\s+)? # ATT, generic
980             (?:suspended|unavailable)/ix or
981             /address is administratively disabled/i or # Unknown
982             /no $user_re\s+(?:here\s+)?by that name/i or # Unknown
983             /<?$EMAIL_ADDR_REGEX>? is invalid/i or # Unknown
984             /address.*not known here/i or # Unknown
985             /recipient\s+(?:address\s+)?rejected/i or # Cox, generic
986             /not\s+listed\s+in\s+Domino/i or # Domino
987             /account not activated/i or # usa.net
988             /not\s+our\s+customer/i or # Comcast
989             /doesn't handle mail for that user/i or # mailfoundry
990             /$user_re\s+does\s+not\s+exist/i or
991             /Recipient\s+<?$EMAIL_ADDR_REGEX>?\s+does\s+not\s+exist/i or
992             /recipient\s+no\s+longer\s+on\s+server/i or # me.com
993             /is\s+not\s+a\s+known\s+user\s+on\s+this\s+system/i or # cam.ac.uk
994             /Rcpt\s+<?$EMAIL_ADDR_REGEX>?\s+does\s+not\s+exist/i or
995             /Mailbox\s+not\s+available/i or
996             /No\s+mailbox\s+found/i or
997             /<?$EMAIL_ADDR_REGEX>?\s+is\s+a\s+deactivated\s+mailbox/i or
998             /Recipient\s+does\s+not\s+exist\s+on\s+this\s+system/i or
999             /user\s+mail-box\s+not\s+found/i or
1000             /No\s+mail\s+box\s+available\s+for\s+this\s+user/i or
1001             /User\s+\[\S+\]\s+does\s+not\s+exist/i or
1002             /email\s+account\s+that\s+you\s+tried\s+to\s+reach\s+is\s+disabled/i or
1003             /not\s+an\s+active\s+address\s+at\s+this\s+host/i or
1004             /not\s+a\s+known\s+user/i or
1005             /BAD_RECIPIENT/i
1006             ) {
1007 30         232 return "user_unknown";
1008             }
1009              
1010 79 100 33     2623 if (
      33        
      33        
      33        
      66        
      100        
      66        
      66        
      66        
      100        
      100        
1011             /domain\s+syntax/i or
1012             /timed\s+out/i or
1013             /route\s+to\s+host/i or
1014             /connection\s+refused/i or
1015             /no\s+data\s+record\s+of\s+requested\s+type/i or
1016             /Malformed name server reply/i or
1017             /as\s+a\s+relay,\s+but\s+I\s+have\s+not\s+been\s+configured\s+to\s+let/i or
1018             /550\s+relay\s+not\s+permitted/i or
1019             /550\s+relaying\s+denied/i or
1020             /Relay\s+access\s+denied/i or
1021             /Relaying\s+denied/i or
1022             /No\s+such\s+domain\s+at\s+this\s+location/i
1023             ) {
1024 8         86 return "domain_error";
1025             }
1026              
1027 71 100 100     4522 if (
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
1028             /Blocked\s+by\s+SpamAssassin/i or
1029             /spam\s+rejection/i or
1030             /identified\s+SPAM,\s+message\s+permanently\s+rejected/i or
1031             /Mail\s+appears\s+to\s+be\s+unsolicited/i or
1032             /Message\s+rejected\s+as\s+spam\s+by\s+Content\s+Filtering/i or
1033             /message\s+looks\s+like\s+SPAM\s+to\s+me/i or
1034             /NOT\s+JUNKEMAILFILTER/i or
1035             /your\s+message\s+has\s+triggered\s+a\s+SPAM\s+block/i or
1036             /Spam\s+detected/i or
1037             /Message\s+looks\s+like\s+spam/i or
1038             /Message\s+content\s+rejected,\s+UBE/i or
1039             /Blocked\s+using\s+spam\s+pattern/i or
1040             /Client\s+host\s+\S+\s+blocked\s+using/i or
1041             /breaches\s+local\s+URIBL\s+policy/i or
1042             /Your\s+email\s+had\s+spam-like\s+header\s+contents/i or
1043             /detected\s+as\s+spam/i or
1044             /Denied\s+due\s+to\s+spam\s+list/i or
1045             /appears\s+to\s+be\s+unsolicited/i or
1046             /antispam\s+checks/i or
1047             /Probable\s+Spam/i or
1048             /ESETS_SMTP\s+\(spam\)/i or
1049             /this\s+message\s+appears\s+to\s+be\s+spam/i or
1050             /Spam\s+score\s+\(\S+\)\s+too\s+high/i or
1051             /matches\s+a\s+profile\s+the\s+Internet\s+community\s+may\s+consider\s+spam/i or
1052             /accepted\s+due\s+to\s+spam\s+filter/i or
1053             /content\s+filter\s+rejection/i or
1054             /using\s+a\s+mass\s+mailer/i or
1055             /Spam\s+email/i or
1056             /Spam\s+content/i or
1057             (/CONTENT\s+REJECT/i and /dspam\s+check/i) or
1058             /this\s+email\s+is\s+spam/i or
1059             /rejected\s+as\s+spam/i or
1060             /MCSpamSignature/i or
1061             /identified\s+as\s+spam/i or
1062             /Spamming\s+not\s+allowed/i or
1063             /classified\s+as\s+spam/i or
1064             /Message\s+refused\s+by\s+MailMarshal\s+SpamProfiler/i or
1065             /Your\s+email\s+appears\s+similar\s+to\s+spam/i or
1066             /This\s+message\s+scored\s+\S+\s+spam\s+points\s+and\s+has\s+been\s+rejected/i or
1067             /Spam\s+Blocked/i or
1068             /bulk\s+e?mail/i or
1069             /probably\s+spam/i or
1070             /appears\s+to\s+be\s+SPAM/i or
1071             /SPAM NOT ACCEPTED/i or
1072             /5.9.8\s+spam/i
1073             ) {
1074 48         389 return "spam";
1075             }
1076              
1077 23 100 100     310 if (
      100        
      100        
      100        
1078             /RESOLVER.RST.RecipSizeLimit/i or
1079             /exceeds\s+size\s+limit/i or
1080             /Message\s+too\s+big/i or
1081             /RESOLVER.RST.SendSizeLimit/i or
1082             /Message\s+Rejected\s+Class=size/i
1083             ) {
1084 6         52 return "message_too_large";
1085             }
1086              
1087 17         144 return "unknown";
1088             }
1089              
1090             # ---------------------------------------------------------------------
1091             # preprocessors
1092             # ---------------------------------------------------------------------
1093              
1094             sub p_ims {
1095 133     133 0 315 my $self = shift;
1096 133         270 my $message = shift;
1097              
1098             # Mangle Exchange messages into a format we like better
1099             # see t/corpus/exchange.unknown.msg
1100              
1101             return
1102 133 100 100     391 unless ($message->head->get("X-Mailer")||'') =~ /Internet Mail Service/i;
1103              
1104 1 50       46 if ($message->is_multipart) {
1105             return unless my ($error_part)
1106 1 50       146 = grep { $_->effective_type eq "text/plain" } $message->parts;
  2         139  
1107              
1108 1 50       110 return unless my ($actual_error)
1109             = $error_part->as_string
1110             =~ /did not reach the following recipient\S+\s*(.*)/is;
1111              
1112 1 50       1347 if (my $io = $error_part->open("w")) {
1113 1         68 $io->print($actual_error);
1114 1         7 $io->close;
1115             }
1116              
1117             } else {
1118              
1119 0 0       0 return unless my ($actual_error)
1120             = $message->bodyhandle->as_string
1121             =~ /did not reach the following recipient\S+\s*(.*)/is;
1122              
1123 0         0 my ($stuff_before, $stuff_after)
1124             = split /^(?=Message-ID:|Received:)/m, $message->bodyhandle->as_string;
1125              
1126 0         0 $stuff_before =~ s/.*did not reach the following recipient.*?$//ism;
1127 0         0 $self->log("rewrote IMS into plain/report.");
1128 0         0 return $self->new_plain_report($message, $stuff_before, $stuff_after);
1129             }
1130              
1131 1         10 return $message;
1132             }
1133              
1134             sub p_aol_senderblock {
1135 133     133 0 306 my $self = shift;
1136 133         260 my $message = shift;
1137              
1138 133 100 100     400 return unless ($message->head->get("Mailer")||'') =~ /AirMail/i;
1139 2 50       78 return unless $message->effective_type eq "text/plain";
1140 2 50       180 return unless $message->bodyhandle->as_string =~ /Your mail to the following recipients could not be delivered because they are not accepting mail/i;
1141              
1142 2         37 my ($host) = $message->head->get("From") =~ /\@(\S+)>/;
1143              
1144 2         80 my $rejector;
1145             my @new_output;
1146 2         6 for (split /\n/, $message->bodyhandle->as_string) {
1147              
1148             # "Sorry luser@example.com. Your mail to the...
1149             # Get rid of this so that the module doesn't create a report for
1150             # *your* address.
1151 6         35 s/Sorry \S+?@\S+?\.//g;
1152              
1153 6 50       19 if (/because they are not accepting mail from (\S+?):?/i) {
1154 0         0 $rejector = $1;
1155 0         0 push @new_output, $_;
1156 0         0 next;
1157             }
1158 6 100       18 if (/^\s*(\S+)\s*$/) {
1159 2         5 my $recipient = $1;
1160 2 50       8 if ($recipient =~ /\@/) {
1161 0         0 push @new_output, $_;
1162 0         0 next;
1163             }
1164 2         15 s/^(\s*)(\S+)(\s*)$/$1$2\@$host$3/;
1165 2         15 push @new_output, $_;
1166 2         5 next;
1167             }
1168 4         7 push @new_output, $_;
1169 4         8 next;
1170             }
1171              
1172 2         6 push @new_output, ("# rewritten by BounceParser: p_aol_senderblock()", "");
1173 2 50       7 if (my $io = $message->open("w")) {
1174 2         137 $io->print(join "\n", @new_output);
1175 2         16 $io->close;
1176             }
1177 2         17 return $message;
1178             }
1179              
1180             sub p_novell_groupwise {
1181              
1182             # renamed from p_novell_groupwise_5_2 - hopefully we can deal with most / all
1183             # versions and create test cases / fixes when we can't
1184             #
1185             # See t/various-unknown.t and t/corpus/novell-*.msg for some recent examples.
1186              
1187 133     133 0 300 my $self = shift;
1188 133         252 my $message = shift;
1189              
1190 133 100 100     345 return unless ($message->head->get("X-Mailer")||'') =~ /Novell Groupwise/i;
1191 1 50       41 return unless $message->effective_type eq "multipart/mixed";
1192             return unless my ($error_part)
1193 1 50       135 = grep { $_->effective_type eq "text/plain" } $message->parts;
  1         10  
1194              
1195 1         132 my ($host) = $message->head->get("From") =~ /\@(\S+)>?/;
1196              
1197             # A lot of times, Novell returns just the LHS; this makes it difficult /
1198             # impossible in many cases to guess the recipient address. MBP makes an
1199             # attempt here.
1200 1         40 my @new_output;
1201 1         4 for (split /\n/, $error_part->bodyhandle->as_string) {
1202 3 100       22 if (/^(\s*)(\S+)(\s+\(.*\))$/) {
1203 1         5 my ($space, $recipient, $reason) = ($1, $2, $3);
1204 1 50       5 if ($recipient =~ /\@/) {
1205 1         3 push @new_output, $_;
1206 1         3 next;
1207             }
1208 0         0 $_ = join "", $space, "$2\@$host", $reason;
1209 0         0 push @new_output, $_; next;
  0         0  
1210             }
1211 2         5 push @new_output, $_; next;
  2         3  
1212             }
1213              
1214 1         4 push @new_output,
1215             ("# rewritten by BounceParser: p_novell_groupwise()", "");
1216              
1217 1 50       4 if (my $io = $error_part->open("w")) {
1218 1         72 $io->print(join "\n", @new_output);
1219 1         9 $io->close;
1220             }
1221 1         10 return $message;
1222             }
1223              
1224             sub p_plain_smtp_transcript {
1225 133     133 0 411 my ($self, $message) = (shift, shift);
1226              
1227             # sometimes, we have a proper smtp transcript;
1228             # that means we have enough information to mark the message up into a proper
1229             # multipart/report!
1230             #
1231             # pennwomen-la@v2.listbox.com/200209/19/1032468752.1444_1.frodo
1232             # The original message was received at Thu, 19 Sep 2002 13:51:36 -0700 (MST)
1233             # from daemon@localhost
1234             #
1235             # ----- The following addresses had permanent fatal errors -----
1236             # <friedman@primenet.com>
1237             # (expanded from: <friedman@primenet.com>)
1238             #
1239             # ----- Transcript of session follows -----
1240             # ... while talking to smtp-local.primenet.com.:
1241             # >>> RCPT To:<friedman@smtp-local.primenet.com>
1242             # <<< 550 <friedman@smtp-local.primenet.com>... User unknown
1243             # 550 <friedman@primenet.com>... User unknown
1244             # ----- Message header follows -----
1245             #
1246             # what we'll do is mark it back up into a proper multipart/report.
1247              
1248 133 100       384 return unless $message->effective_type eq "text/plain";
1249              
1250 16 100       1691 return unless $message->bodyhandle->as_string
1251             =~ /The following addresses had permanent fatal errors/;
1252              
1253 2 50       29 return unless $message->bodyhandle->as_string
1254             =~ /Transcript of session follows/;
1255              
1256 2 50       25 return unless $message->bodyhandle->as_string =~ /Message .* follows/;
1257              
1258 2         30 my ($stuff_before, $stuff_after)
1259             = split /^.*Message (?:header|body) follows.*$/im,
1260             $message->bodyhandle->as_string, 2;
1261              
1262 2         47 my %by_email = $self->_analyze_smtp_transcripts($stuff_before);
1263              
1264 2         9 my @paras = _construct_delivery_status_paras(\%by_email);
1265              
1266 2         4 my @new_output;
1267 2         7 my ($reporting_mta) = _cleanup_email($message->head->get("From")) =~ /\@(\S+)/;
1268              
1269 2         9 chomp (my $arrival_date = $message->head->get("Date"));
1270              
1271 2 50       85 push @new_output, "Reporting-MTA: $reporting_mta" if $reporting_mta;
1272 2 50       10 push @new_output, "Arrival-Date: $arrival_date" if $arrival_date;
1273 2         7 push @new_output, "";
1274 2         4 push @new_output, map { @$_, "" } @paras;
  2         8  
1275              
1276 2         16 return $self->new_multipart_report(
1277             $message,
1278             $stuff_before,
1279             join("\n", @new_output),
1280             $stuff_after
1281             );
1282             }
1283              
1284             sub _construct_delivery_status_paras {
1285 2     2   4 my %by_email = %{shift()};
  2         6  
1286              
1287 2         5 my @new_output;
1288              
1289 2         8 foreach my $email (sort keys %by_email) {
1290             # Final-Recipient: RFC822; robinbw@aol.com
1291             # Action: failed
1292             # Status: 2.0.0
1293             # Remote-MTA: DNS; air-xj03.mail.aol.com
1294             # Diagnostic-Code: SMTP; 250 OK
1295             # Last-Attempt-Date: Thu, 19 Sep 2002 16:53:10 -0400 (EDT)
1296              
1297             push @new_output, [
1298             "Final-Recipient: RFC822; $email",
1299             "Action: failed",
1300             "Status: 5.0.0",
1301 2 50       15 ($by_email{$email}->{host} ? ("Remote-MTA: DNS; $by_email{$email}->{host}") : ()),
1302             _construct_diagnostic_code(\%by_email, $email),
1303             ];
1304              
1305             }
1306              
1307 2         6 return @new_output;
1308             }
1309              
1310             sub _construct_diagnostic_code {
1311 2     2   4 my %by_email = %{shift()};
  2         7  
1312 2         5 my $email = shift;
1313             join (" ",
1314             "Diagnostic-Code: X-BounceParser;",
1315             ($by_email{$email}->{'host'} ? "host $by_email{$email}->{'host'} said:" : ()),
1316             ($by_email{$email}->{'smtp_code'}),
1317 2 50       10 (join ", ", @{ $by_email{$email}->{'errors'} || [] }));
  2 50       15  
1318             }
1319              
1320             sub _analyze_smtp_transcripts {
1321 2     2   5 my $self = shift;
1322 2         5 my $plain_smtp_transcript = shift;
1323              
1324 2         5 my (%by_email, $email, $smtp_code, @error_strings, $host);
1325              
1326             # parse the text part for the actual SMTP transcript
1327 2         121 for (split /\n\n|(?=>>>)/, $plain_smtp_transcript) {
1328 6 50       20 $email = _cleanup_email($1) if /RCPT TO:\s*(\S+)/im;
1329              
1330 6 100       19 if (/The\s+following\s+addresses\s+had\s+permanent\s+fatal\s+errors\s+-----\s+\<?(.*)\>?/im) {
1331 2         7 $email = _cleanup_email($1);
1332             }
1333              
1334 6 100       21 $by_email{$email}->{host} = $host if $email;
1335              
1336 6 100       18 if (/while talking to (\S+)/im) {
1337 2         6 $host = $1;
1338 2         10 $host =~ s/[.:;]+$//g;
1339             }
1340              
1341 6 100       18 if (/<<< (\d\d\d) (.*)/m) {
1342 2         7 $by_email{$email}->{smtp_code} = $1;
1343 2         4 push @{$by_email{$email}->{errors}}, $2;
  2         9  
1344             }
1345              
1346 6 50       22 if (/^(\d\d\d)\b.*(<\S+\@\S+>)\.*\s+(.+)/m) {
1347 0         0 $email = _cleanup_email($2);
1348 0         0 $by_email{$email}->{smtp_code} = $1;
1349 0         0 push @{$by_email{$email}->{errors}}, $3;
  0         0  
1350             }
1351             }
1352 2         5 delete $by_email{''};
1353 2         9 return %by_email;
1354             }
1355              
1356             # ------------------------------------------------------------
1357              
1358             sub new_plain_report {
1359 0     0 0 0 my ($self, $message, $error_text, $orig_message) = @_;
1360              
1361 0         0 $orig_message =~ s/^\s+//;
1362              
1363 0         0 my $newmessage = $message->dup();
1364 0         0 $newmessage->make_multipart("plain-report");
1365 0         0 $newmessage->parts([]);
1366 0         0 $newmessage->attach(Type => "text/plain", Data => $error_text);
1367              
1368 0         0 my $orig_message_mime = MIME::Entity->build(Type => "multipart/transitory");
1369              
1370 0         0 $orig_message_mime->add_part($self->{parser}->parse_data($orig_message));
1371              
1372 0         0 $orig_message_mime->head->mime_attr("content-type" => "message/rfc822");
1373 0         0 $newmessage->add_part($orig_message_mime);
1374              
1375 0         0 $self->log("created new plain-report message.");
1376              
1377 0         0 return $newmessage;
1378             }
1379              
1380             # ------------------------------------------------------------
1381              
1382             sub new_multipart_report {
1383 2     2 0 8 my ($self, $message, $error_text, $delivery_status, $orig_message) = @_;
1384              
1385 2         18 $orig_message =~ s/^\s+//;
1386              
1387 2         11 my $newmessage = $message->dup();
1388 2         834 $newmessage->make_multipart("report");
1389 2         3729 $newmessage->parts([]);
1390 2         39 $newmessage->attach(
1391             Type => "text/plain",
1392             Data => $error_text
1393             );
1394 2         2124 $newmessage->attach(
1395             Type => "message/delivery-status",
1396             Data => $delivery_status
1397             );
1398              
1399 2         1865 my $orig_message_mime
1400             = MIME::Entity->build(Type => "multipart/transitory", Top => 0);
1401              
1402 2         1318 $orig_message_mime->add_part($self->{parser}->parse_data($orig_message));
1403              
1404 2         11617 $orig_message_mime->head->mime_attr("content-type" => "message/rfc822");
1405 2         523 $newmessage->add_part($orig_message_mime);
1406              
1407 2         27 $self->log("created new multipart-report message.");
1408              
1409 2         22 return $newmessage;
1410             }
1411              
1412             # ------------------------------------------------------------
1413              
1414             sub _cleanup_email {
1415 130     130   343 my $email = shift;
1416 130         446 for ($email) {
1417 130         320 chomp;
1418             # Get rid of parens around addresses like (luser@example.com)
1419             # Got rid of earlier /\(.*\)/ - not sure what that was about - wby
1420 130         429 tr/[()]//d;
1421 130         355 s/^To:\s*//i;
1422 130         418 s/[.:;]+$//;
1423 130         428 s/<(.+)>/$1/;
1424             # IMS hack: c=US;a= ;p=NDC;o=ORANGE;dda:SMTP=slpark@msx.ndc.mc.uci.edu; on
1425             # Thu, 19 Sep...
1426 130         719 s/.*:SMTP=//;
1427 130         295 s/^\s+//;
1428 130         295 s/\s+$//;
1429             # hack to get rid of stuff like "luser@example.com...User"
1430 130         284 s/\.{3}\S+//;
1431             # SMTP:foo@example.com
1432 130         332 s/^SMTP://;
1433             }
1434 130         411 return $email;
1435             }
1436              
1437             sub p_xdelivery_status {
1438 133     133 0 379 my ($self, $message) = @_;
1439              
1440             # This seems to be caused by something called "XWall v3.31", which
1441             # (according to Google) is a "firewall that protects your Exchange
1442             # server from viruses, spam mail and dangerous attachments". Shame it
1443             # doesn't protect the rest of the world from gratuitously broken MIME
1444             # types.
1445              
1446 133         607 for ($message->parts_DFS) {
1447 701 100       76627 $_->effective_type('message/delivery-status')
1448             if $_->effective_type eq 'message/xdelivery-status';
1449             }
1450             }
1451              
1452             sub _first_non_multi_part {
1453 133     133   334 my ($entity) = @_;
1454              
1455 133         272 my $part = $entity;
1456 133   100     475 $part = $part->parts(0) or return while $part->is_multipart;
1457 132         37768 return $part;
1458             }
1459              
1460             sub _position_before {
1461 1     1   3 my ($pos_a, $pos_b) = @_;
1462 1 50 33     13 return 1 if defined($pos_a) && (!defined($pos_b) || $pos_a < $pos_b);
      33        
1463 0         0 return;
1464             }
1465              
1466             # Return the position in $string at which $regex first matches, or undef if
1467             # no match.
1468             sub _match_position {
1469 15     15   45 my ($string, $regex) = @_;
1470 15 100       2643 return $string =~ $regex ? $-[0] : undef;
1471             }
1472              
1473             1;