File Coverage

blib/lib/Mail/DeliveryStatus/BounceParser.pm
Criterion Covered Total %
statement 414 484 85.5
branch 207 270 76.6
condition 350 417 83.9
subroutine 31 32 96.8
pod 9 16 56.2
total 1011 1219 82.9


line stmt bran cond sub pod time code
1             package Mail::DeliveryStatus::BounceParser;
2              
3             =head1 NAME
4              
5             Mail::DeliveryStatus::BounceParser - Perl extension to analyze bounce messages
6              
7             =head1 SYNOPSIS
8              
9             use Mail::DeliveryStatus::BounceParser;
10              
11             # $message is \*io or $fh or "entire\nmessage" or \@lines
12             my $bounce = eval { Mail::DeliveryStatus::BounceParser->new($message); };
13              
14             if ($@) {
15             # couldn't parse.
16             }
17              
18             my @addresses = $bounce->addresses; # email address strings
19             my @reports = $bounce->reports; # Mail::Header objects
20             my $orig_message_id = $bounce->orig_message_id; #
21             my $orig_message = $bounce->orig_message; # Mail::Internet object
22              
23             =head1 ABSTRACT
24              
25             Mail::DeliveryStatus::BounceParser analyzes RFC822 bounce messages and returns
26             a structured description of the addresses that bounced and the reason they
27             bounced; it also returns information about the original returned message
28             including the Message-ID. It works best with RFC1892 delivery reports, but
29             will gamely attempt to understand any bounce message no matter what MTA
30             generated it.
31              
32             =head1 DESCRIPTION
33              
34             Meng Wong wrote this for the Listbox v2 project; good mailing list managers
35             handle bounce messages so listowners don't have to. The best mailing list
36             managers figure out exactly what is going on with each subscriber so the
37             appropriate action can be taken.
38              
39             =cut
40              
41 33     33   2209086 use 5.006;
  33         427  
42 33     33   219 use strict;
  33         75  
  33         860  
43 33     33   184 use warnings;
  33         108  
  33         2009  
44              
45             our $VERSION = '1.543';
46             $VERSION = eval $VERSION;
47              
48 33     33   21819 use MIME::Parser;
  33         3817797  
  33         1356  
49 33     33   17008 use Mail::DeliveryStatus::Report;
  33         109  
  33         1046  
50 33     33   201 use vars qw($EMAIL_ADDR_REGEX);
  33         75  
  33         112214  
51              
52             $EMAIL_ADDR_REGEX = qr{
53             # Avoid using something like Email::Valid
54             # Full rfc(2)822 compliance isn't exactly what we want, and this seems to work
55             # for most real world cases
56             (?:<|^|\s) # Space, or the start of a string
57             ([^\s\/<]+ # some non-space, non-/ characters; none are <
58             \@ # at sign (duh)
59             (?:[-\w]+\.)+[-\w]+) # word characters or hypens organized into
60             # at least two dot-separated words
61             (?:$|\s|>) # then the end
62             }sx;
63              
64             my $Not_An_Error = qr/
65             \b delayed \b
66             | \b warning \b
67             | transient.{0,20}\serror
68             | Your \s message .{0,100} was \s delivered \s to \s the \s following \s recipient
69             /six;
70              
71             # added "permanent fatal errors" - fix for bug #41874
72             my $Really_An_Error = qr/this is a permanent error|permanent fatal errors/i;
73              
74             my $Returned_Message_Below = qr/(
75             (?:original|returned) \s message \s (?:follows|below)
76             | (?: this \s is \s a \s copy \s of
77             | below \s this \s line \s is \s a \s copy
78             ) .{0,100} \s message\.?
79             | message \s header \s follows
80             | ^ (?:return-path|received|from):
81             )\s+/sixm;
82              
83             my @Preprocessors = qw(
84             p_ims
85             p_aol_senderblock
86             p_novell_groupwise
87             p_plain_smtp_transcript
88             p_xdelivery_status
89             );
90              
91             =head2 parse
92              
93             my $bounce = Mail::DeliveryStatus::BounceParser->parse($message, \%arg);
94              
95             OPTIONS. If you pass BounceParser->new(..., {log=>sub { ... }}) That will be
96             used as a logging callback. If C<< $message >> is undefined, will parse STDIN.
97              
98             NON-BOUNCES. If the message is recognizably a vacation autoresponse, or is a
99             report of a transient nonfatal error, or a spam or virus autoresponse, you'll
100             still get back a C<$bounce>, but its C<< $bounce->is_bounce() >> will return
101             false.
102              
103             It is possible that some bounces are not really bounces; such as
104             anything that appears to have a 2XX status code. To include such
105             non-bounces in the reports, pass the option {report_non_bounces=>1}.
106              
107             For historical reasons, C is an alias for the C method.
108              
109             =cut
110              
111             sub parse {
112 132     132 1 266636 my ($class, $data, $arg) = @_;
113             # my $bounce = Mail::DeliveryStatus::BounceParser->new( \*STDIN | $fh |
114             # "entire\nmessage" | ["array","of","lines"] );
115              
116 132         1030 my $parser = MIME::Parser->new;
117 132         18720 $parser->output_to_core(1);
118 132         1616 $parser->decode_headers(1);
119              
120 132         4492 my $message;
121              
122 132 50       843 if (not $data) {
    50          
    0          
123 0 0       0 print STDERR "BounceParser: expecting bounce message on STDIN\n" if -t STDIN;
124 0         0 $message = $parser->parse(\*STDIN);
125             } elsif (not ref $data) {
126 132         598 $message = $parser->parse_data($data);
127             } elsif (ref $data eq "ARRAY") {
128 0         0 $message = $parser->parse_data($data);
129             } else {
130 0         0 $message = $parser->parse($data);
131             }
132              
133             my $self = bless {
134             reports => [],
135             is_bounce => 1,
136             log => $arg->{log},
137             parser => $parser,
138             orig_message_id => undef,
139             prefer_final_recipient => $arg->{prefer_final_recipient},
140 132         2735315 }, $class;
141              
142 132 50       525 $self->log(
    100          
143             "received message with type "
144             . (defined($message->effective_type) ? $message->effective_type : "undef")
145             . ", subject "
146             . (defined($message->head->get("subject")) ? $message->head->get("subject") : "CAN'T GET SUBJECT")
147             );
148              
149             # before we even start to analyze the bounce, we recognize certain special
150             # cases, and rewrite them to be intelligible to us
151 132         533 foreach my $preprocessor (@Preprocessors) {
152 660 100       32677 if (my $newmessage = $self->$preprocessor($message)) {
153 6         26 $message = $newmessage;
154             }
155             }
156              
157 132         15860 $self->{message} = $message;
158              
159 132 100       469 $self->log(
160             "now the message is type "
161             . $message->effective_type
162             . ", subject "
163             . (defined($message->head->get("subject")) ? $message->head->get("subject") : "CAN'T GET SUBJECT")
164             );
165              
166 132         585 my $first_part = _first_non_multi_part($message);
167              
168             # Deal with some common C/R systems like TMDA
169             {
170 132 50 33     373 last unless ($message->head->get("x-delivery-agent")
171             and $message->head->get("X-Delivery-Agent") =~ /^TMDA/);
172 0         0 $self->log("looks like a challenge/response autoresponse; ignoring.");
173 0         0 $self->{type} = "Challenge / Response system autoreply";
174 0         0 $self->{is_bounce} = 0;
175 0         0 return $self;
176             }
177              
178             {
179 132 100 66     475 last unless ($message->head->get("X-Bluebottle-Request") and $first_part->stringify_body =~ /This account is protected by Bluebottle/);
  132         410  
180 1         1443 $self->log("looks like a challenge/response autoresponse; ignoring.");
181 1         4 $self->{type} = "Challenge / Response system autoreply";
182 1         3 $self->{is_bounce} = 0;
183 1         6 return $self;
184             }
185              
186             {
187 132 100 100     4044 last unless defined $first_part and $first_part->stringify_body =~ /Your server requires confirmation/;
  131         794  
188 1         1641 $self->log("Looks like a challenge/response autoresponse; ignoring.");
189 1         4 $self->{type} = "Challenge / Response system autoreply";
190 1         4 $self->{is_bounce} = 0;
191 1         8 return $self;
192             }
193              
194             {
195 131 100 100     4047 last unless defined $first_part and $first_part->stringify_body =~ /Please add yourself to my Boxbe Guest List/;
  130         676  
196 1         2051 $self->log("Looks like a challenge/response autoresponse; ignoring.");
197 1         5 $self->{type} = "Challenge / Response system autoreply";
198 1         4 $self->{is_bounce} = 0;
199             }
200              
201             {
202 130 100 100     184048 last unless defined $first_part and $first_part->stringify_body =~ /This\s+is\s+a\s+one-time\s+automated\s+message\s+to\s+confirm\s+that\s+you're\s+listed\s+on\s+my\s+Boxbe\s+Guest\s+List/;
  130         732  
203 1         1876 $self->log("Looks like a challenge/response autoresponse; ignoring.");
204 1         5 $self->{type} = "Challenge / Response system autoreply";
205 1         3 $self->{is_bounce} = 0;
206             }
207              
208             # we'll deem autoreplies to be usually less than a certain size.
209              
210             # Some vacation autoreplies are (sigh) multipart/mixed, with an additional
211             # part containing a pointless disclaimer; some are multipart/alternative,
212             # with a pointless HTML part saying the exact same thing. (Messages in
213             # this latter category have the decency to self-identify with things like
214             # ' 215             # 5.5.2653.12">', so we know to avoid such software in future.) So look
216             # at the first part of a multipart message (recursively, down the tree).
217              
218             {
219 130 100       174862 last if $message->effective_type eq 'multipart/report';
  130         487  
220 18 100 66     2728 last if !$first_part || $first_part->effective_type ne 'text/plain';
221 17         2153 my $string = $first_part->as_string;
222 17 100       31967 last if length($string) > 3000;
223             # added return receipt (fix for bug #41870)
224 14 100       2976 last if $string !~ /auto.{0,20}(reply|response)|return receipt|vacation|(out|away|on holiday).*office/i;
225 1         5 $self->log("looks like a vacation autoreply, ignoring.");
226 1         5 $self->{type} = "vacation autoreply";
227 1         4 $self->{is_bounce} = 0;
228 1         6 return $self;
229             }
230              
231             # vacation autoreply tagged in the subject
232             {
233 130 100       174558 last if $message->effective_type eq 'multipart/report';
  129         454  
234 17 100 66     2658 last if !$first_part || $first_part->effective_type ne 'text/plain';
235 16         2007 my $subject = $message->head->get('Subject');
236 16 100       734 last if !defined($subject);
237 15 100       121 last if $subject !~ /^AUTO/;
238 1 50       4 last if $subject !~ /is out of the office/;
239 1         4 $self->log("looks like a vacation autoreply, ignoring.");
240 1         6 $self->{type} = "vacation autoreply";
241 1         2 $self->{is_bounce} = 0;
242 1         5 return $self;
243             }
244              
245             # Polish auto-reply
246             {
247 129 100       19829 last if $message->effective_type eq 'multipart/report';
  128         474  
248 16 100 66     2093 last if !$first_part || $first_part->effective_type ne 'text/plain';
249 15         1980 my $subject = $message->head->get('Subject');
250 15 100       648 last if !defined($subject);
251 14 100       90 last if $subject !~ /Automatyczna\s+odpowied/;
252 1         5 $self->log("looks like a polish autoreply, ignoring.");
253 1         4 $self->{type} = "polish autoreply";
254 1         3 $self->{is_bounce} = 0;
255 1         6 return $self;
256             }
257              
258             # "Email address changed but your message has been forwarded"
259             {
260 128 100       18550 last if $message->effective_type eq 'multipart/report';
  127         18083  
  127         423  
261 15 100 66     1897 last if !$first_part || $first_part->effective_type ne 'text/plain';
262 14         1678 my $string = $first_part->as_string;
263 14 100       27471 last if length($string) > 3000;
264 12 50       1716 last if $string
265             !~ /(address .{0,60} changed | domain .{0,40} retired) .*
266             (has\s*been|was|have|will\s*be) \s* (forwarded|delivered)/six;
267 0         0 $self->log('looks like an address-change autoreply, ignoring');
268 0         0 $self->{type} = 'informational address-change autoreply';
269 0         0 $self->{is_bounce} = 0;
270 0         0 return $self;
271             }
272              
273             # Network Associates WebShield SMTP V4.5 MR1a on cpwebshield intercepted a
274             # mail from which caused the Content Filter
275             # Block extension COM to be triggered.
276 127 50 100     18211 if ($message->effective_type eq "text/plain"
      66        
277             and (length $message->as_string) < 3000
278             and $message->bodyhandle->as_string
279             =~ m/norton\sassociates\swebshield|content\s+filter/ix
280             ) {
281 0         0 $self->log("looks like a virus/spam block, ignoring.");
282 0         0 $self->{type} = "virus/spam false positive";
283 0         0 $self->{is_bounce} = 0;
284 0         0 return $self;
285             }
286              
287             # nonfatal errors usually say they're transient
288              
289 127 50 66     41500 if ($message->effective_type eq "text/plain"
290             and $message->bodyhandle->as_string =~ /transient.*error/is) {
291 0         0 $self->log("seems like a nonfatal error, ignoring.");
292 0         0 $self->{is_bounce} = 0;
293 0         0 return $self;
294             }
295              
296             # nonfatal errors usually say they're transient, but sometimes they do it
297             # straight out and sometimes it's wrapped in a multipart/report.
298             #
299             # Be careful not to examine a returned body for the transient-only signature:
300             # $Not_An_Error can match the single words 'delayed' and 'warning', which
301             # could quite reasonably occur in the body of the returned message. This
302             # also means it's worth additionally checking for a regex that gives a very
303             # strong indication that the error was permanent.
304             {
305 127         20116 my $part_for_maybe_transient;
  127         273  
306 127 100       555 $part_for_maybe_transient = $message
307             if $message->effective_type eq "text/plain";
308             ($part_for_maybe_transient)
309 127 100 100     19759 = grep { $_->effective_type eq "text/plain" } $message->parts
  7         1536  
310             if $message->effective_type =~ /multipart/
311             && $message->effective_type ne 'multipart/report';
312              
313 127 100       37582 if ($part_for_maybe_transient) {
314 13         56 my $string = $part_for_maybe_transient->bodyhandle->as_string;
315 13         188 my $transient_pos = _match_position($string, $Not_An_Error);
316 13 100       69 last unless defined $transient_pos;
317 1         4 my $permanent_pos = _match_position($string, $Really_An_Error);
318 1         4 my $orig_msg_pos = _match_position($string, $Returned_Message_Below);
319 1 50       8 last if _position_before($permanent_pos, $orig_msg_pos);
320 0 0       0 if (_position_before($transient_pos, $orig_msg_pos)) {
321 0         0 $self->log("transient error, ignoring.");
322 0         0 $self->{is_bounce} = 0;
323 0         0 return $self;
324             }
325             }
326             }
327              
328             # In all cases we will read the message body to try to pull out a message-id.
329 127 100       457 if ($message->effective_type =~ /multipart/) {
330             # "Internet Mail Service" sends multipart/mixed which still has a
331             # message/rfc822 in it
332 116 100       18642 if (
333             my ($orig_message) =
334 342         28227 grep { $_->effective_type eq "message/rfc822" } $message->parts
335             ) {
336             # see MIME::Entity regarding REPLACE
337 95         11078 my $orig_message_id = $orig_message->parts(0)->head->get("message-id");
338 95 100       4767 if ($orig_message_id) {
339 94         1129 $orig_message_id =~ s/(\r|\n)*$//g;
340 94         632 $self->log("extracted original message-id [$orig_message_id] from the original rfc822/message");
341             } else {
342 1         4 $self->log("Couldn't extract original message-id from the original rfc822/message");
343             }
344 95         352 $self->{orig_message_id} = $orig_message_id;
345 95         411 $self->{orig_message} = $orig_message->parts(0);
346             }
347              
348             # todo: handle pennwomen-la@v2.listbox.com/200209/19/1032468832.1444_1.frodo
349             # which is a multipart/mixed containing an application/tnef instead of a
350             # message/rfc822. yow!
351              
352 116 100 100     3752 if (! $self->{orig_message_id}
353             and
354             my ($rfc822_headers) =
355 60         4989 grep { lc $_->effective_type eq "text/rfc822-headers" } $message->parts
356             ) {
357 17         2021 my $orig_head = Mail::Header->new($rfc822_headers->body);
358 17         50392 my $message_id = $orig_head->get("message-id");
359 17 50       674 if ($message_id) {
360 17         110 chomp ($self->{orig_message_id} = $orig_head->get("message-id"));
361 17         631 $self->{orig_header} = $orig_head;
362 17         101 $self->log("extracted original message-id $self->{orig_message_id} from text/rfc822-headers");
363             }
364             }
365             }
366              
367 127 100       2411 if (! $self->{orig_message_id}) {
368 16 100 100     69 if ($message->bodyhandle and $message->bodyhandle->as_string =~ /Message-ID: (\S+)/i) {
369 5         162 $self->{orig_message_id} = $1;
370 5         37 $self->log("found a message-id $self->{orig_message_id} in the body.");
371             }
372             }
373              
374 127 100       750 if (! $self->{orig_message_id}) {
375 11         62 $self->log("couldn't find original message id.");
376             }
377              
378             #
379             # try to extract email addresses to identify members.
380             # we will also try to extract reasons as much as we can.
381             #
382              
383 127 100       405 if ($message->effective_type eq "multipart/report") {
    100          
    100          
384             my ($delivery_status) =
385 112         18759 grep { $_->effective_type eq "message/delivery-status" } $message->parts;
  335         28134  
386              
387 112         12951 my %global = ("reporting-mta" => undef, "arrival-date" => undef);
388              
389 112         289 my ($seen_action_expanded, $seen_action_failed);
390              
391             # Some MTAs generate malformed multipart/report messages with no
392             # message/delivery-status part; don't die in such cases.
393             my $delivery_status_body
394 112   100     270 = eval { $delivery_status->bodyhandle->as_string } || '';
395              
396             # Used to be \n\n, but now we allow any number of newlines between
397             # individual per-recipient fields to deal with stupid bug with the IIS SMTP
398             # service. RFC1894 (2.1, 2.3) is not 100% clear about whether more than
399             # one line is allowed - it just says "preceded by a blank line". We very
400             # well may put an upper bound on this in the future.
401             #
402             # See t/iis-multiple-bounce.t
403 112         2060 foreach my $para (split /\n{2,}/, $delivery_status_body) {
404              
405             # See t/surfcontrol-extra-newline.t - deal with bug #21249
406 136         3810 $para =~ s/\A\n+//g;
407             # added the following line as part of fix for #41874
408 136         781 $para =~ s/\r/ /g;
409              
410 136         1631 my $report = Mail::DeliveryStatus::Report->new([split /\n/, $para]);
411              
412             # Removed a $report->combine here - doesn't seem to work without a tag
413             # anyway... not sure what that was for. - wby 20060823
414              
415             # Unfold so message doesn't wrap over multiple lines
416 136         94587 $report->unfold;
417              
418             # Some MTAs send unsought delivery-status notifications indicating
419             # success; others send RFC1892/RFC3464 delivery status notifications
420             # for transient failures.
421 136 100 66     6984 if (defined $report->get('Action') and lc $report->get('Action')) {
422 114         373 my $action = lc $report->get('Action');
423 114         354 $action =~ s/^\s+//;
424 114 50       637 if ($action =~ s/^\s*([a-z]+)\b.*/$1/s) {
425             # In general, assume that anything other than 'failed' is a
426             # non-bounce; but 'expanded' is handled after the end of this
427             # foreach loop, because it might be followed by another
428             # per-recipient group that says 'failed'.
429 114 50       565 if ($action eq 'expanded') {
    100          
430 0         0 $seen_action_expanded = 1;
431             } elsif ($action eq 'failed') {
432 113         274 $seen_action_failed = 1;
433             } else {
434 1         7 $self->log("message/delivery-status says 'Action: \L$1'");
435 1         4 $self->{type} = "delivery-status \L$1";
436 1         2 $self->{is_bounce} = 0;
437 1         11 return $self;
438             }
439             }
440             }
441              
442 135         416 for my $hdr (qw(Reporting-MTA Arrival-Date)) {
443 270   100     16003 my $val = $global{$hdr} ||= $report->get($hdr);
444 270 50       695 if (defined($val)) {
445 270         840 $report->replace($hdr => $val)
446             }
447             }
448              
449 135         13721 my $email;
450              
451 135 100       483 if ($self->{prefer_final_recipient}) {
452 2   66     7 $email = $report->get("final-recipient")
453             || $report->get("original-recipient");
454             } else {
455 133   100     418 $email = $report->get("original-recipient")
456             || $report->get("final-recipient");
457             }
458              
459 135 100       529 next unless $email;
460              
461             # $self->log("email = \"$email\"") if $DEBUG > 3;
462              
463             # Diagnostic-Code: smtp; 550 5.1.1 User unknown
464 113         408 my $reason = $report->get("diagnostic-code");
465              
466 113         567 $email =~ s/[^;]+;\s*//; # strip leading RFC822; or LOCAL; or system;
467 113 50       368 if (defined $reason) {
468 113         425 $reason =~ s/[^;]+;\s*//; # strip leading X-Postfix;
469             }
470              
471 113         431 $email = _cleanup_email($email);
472              
473 113         775 $report->replace(email => $email);
474 113 50       12965 if (defined $reason) {
475 113         405 $report->replace(reason => $reason);
476             } else {
477 0         0 $report->delete("reason");
478             }
479              
480 113         13091 my $status = $report->get('Status');
481 113 50       487 $report->replace(Status => $status) if $status =~ s/ \(permanent failure\)$//;
482              
483 113 50       336 if ($status) {
484             # RFC 1893... prefer Status: if it exists and is something we know
485             # about
486             # Not 100% sure about 5.1.0...
487 113 100       826 if ($status =~ /^5\.1\.[01]$/) {
    100          
    100          
    100          
    100          
488 9         37 $report->replace(std_reason => "user_unknown");
489             } elsif ($status eq "5.1.2") {
490 1         6 $report->replace(std_reason => "domain_error");
491             } elsif ($status eq "5.2.1") {
492 2         5 $report->replace(std_reason => "user_disabled");
493             } elsif ($status eq "5.2.2") {
494 1         4 $report->replace(std_reason => "over_quota");
495             } elsif ($status eq "5.4.4") {
496 1         4 $report->replace(std_reason => "domain_error");
497             } else {
498 99         312 $report->replace(
499             std_reason => _std_reason($report->get("diagnostic-code"))
500             );
501             }
502             } else {
503 0         0 $report->replace(
504             std_reason => _std_reason($report->get("diagnostic-code"))
505             );
506             }
507 113         13692 my $diag_code = $report->get("diagnostic-code");
508              
509 113         233 my $host;
510 113 50       352 if (defined $diag_code) {
511 113         415 ($host) = $diag_code =~ /\bhost\s+(\S+)/;
512             }
513              
514 113 100       404 $report->replace(host => ($host)) if $host;
515              
516 113         1665 my ($code);
517              
518 113 50       361 if (defined $diag_code) {
519 113         757 ($code) = $diag_code =~
520             m/ ( ( [245] \d{2} ) \s | \s ( [245] \d{2} ) (?!\.) ) /x;
521             }
522              
523 113 50 66     543 if (!$code && $status && $status =~ /\A([245])\.?([0-9])\.?([0-9])/) {
      66        
524 13         62 $code = "$1$2$3";
525             }
526              
527 113 50       352 if ($code) {
528 113         350 $report->replace(smtp_code => $code);
529             }
530              
531 113 100       12818 if (not $report->get("host")) {
532 100         289 my $email = $report->get("email");
533 100 50       364 if (defined $email) {
534 100         488 my $host = ($email =~ /\@(.+)/)[0];
535 100 100       418 $report->replace(host => $host) if $host;
536             }
537             }
538              
539 113 100 66     10990 if ($report->get("smtp_code") and ($report->get("smtp_code") =~ /^2../)) {
540 1         12 $self->log(
541             "smtp code is "
542             . $report->get("smtp_code")
543             . "; no_problemo."
544             );
545              
546             }
547              
548 113 50       387 unless ($arg->{report_non_bounces}) {
549 113 50       323 if ($report->get("std_reason") eq "no_problemo") {
550 0         0 $self->log(
551             "not actually a bounce: " . $report->get("diagnostic-code")
552             );
553 0         0 next;
554             }
555             }
556              
557 113         219 push @{$self->{reports}},
  113         660  
558             Mail::DeliveryStatus::Report->new([ split /\n/, $report->as_string ]
559             );
560             }
561              
562 111 50 33     132270 if ($seen_action_expanded && !$seen_action_failed) {
563             # We've seen at least one 'Action: expanded' DSN-field, but no
564             # 'Action: failed'
565 0         0 $self->log(q[message/delivery-status says 'Action: expanded']);
566 0         0 $self->{type} = 'delivery-status expanded';
567 0         0 $self->{is_bounce} = 0;
568 0         0 return $self;
569             }
570              
571             } elsif ($message->effective_type =~ /multipart/) {
572             # but not a multipart/report. look through each non-message/* section.
573             # See t/corpus/exchange.unknown.msg
574              
575 4 100       1254 my @delivery_status_parts = grep { $_->effective_type =~ m{text/plain}
  7         810  
576             and not $_->is_multipart
577             } $message->parts;
578              
579             # $self->log("error parts: @{[ map { $_->bodyhandle->as_string }
580             # @delivery_status_parts ]}") if $DEBUG > 3;
581              
582 4         710 push @{$self->{reports}}, $self->_extract_reports(@delivery_status_parts);
  4         26  
583              
584             } elsif ($message->effective_type =~ m{text/plain}) {
585             # handle plain-text responses
586              
587             # This used to just take *any* part, even if the only part wasn't a
588             # text/plain part
589             #
590             # We may have to specifically allow some other types, but in my testing, all
591             # the messages that get here and are actual bounces are text/plain
592             # wby - 20060907
593              
594             # they usually say "returned message" somewhere, and we can split on that,
595             # above and below.
596 10   50     3184 my $body_string = $message->bodyhandle->as_string || '';
597              
598 10 100       994 if ($body_string =~ $Returned_Message_Below) {
    50          
599 5         31 my ($stuff_before, $stuff_splitted, $stuff_after) =
600             split $Returned_Message_Below, $message->bodyhandle->as_string, 2;
601             # $self->log("splitting on \"$stuff_splitted\", " . length($stuff_before)
602             # . " vs " . length($stuff_after) . " bytes.") if $DEBUG > 3;
603 5         460 push @{$self->{reports}}, $self->_extract_reports($stuff_before);
  5         32  
604 5         31 $self->{orig_text} = $stuff_after;
605             } elsif ($body_string =~ /(.+)\n\n(.+?Message-ID:.+)/is) {
606 0         0 push @{$self->{reports}}, $self->_extract_reports($1);
  0         0  
607 0         0 $self->{orig_text} = $2;
608             } else {
609 5         13 push @{$self->{reports}}, $self->_extract_reports($body_string);
  5         29  
610 5         25 $self->{orig_text} = $body_string;
611             }
612             }
613 126         1419 return $self;
614             }
615              
616 33     33   154028 BEGIN { *new = \&parse };
617              
618             =head2 log
619              
620             $bounce->log($messages);
621              
622             If a logging callback has been given, the message will be passed to it.
623              
624             =cut
625              
626             sub log {
627 415     415 1 86123 my ($self, @log) = @_;
628 415 50       1688 if (ref $self->{log} eq "CODE") {
629 0         0 $self->{log}->(@_);
630             }
631 415         1006 return 1;
632             }
633              
634             sub _extract_reports {
635 14     14   58 my $self = shift;
636             # input: either a list of MIME parts, or just a chunk of text.
637              
638 14 50       68 if (@_ > 1) { return map { _extract_reports($_) } @_ }
  0         0  
  0         0  
639              
640 14         109 my $text = shift;
641              
642 14 100       85 $text = $text->bodyhandle->as_string if ref $text;
643              
644 14         81 my %by_email;
645              
646             # we'll assume that the text is made up of:
647             # blah blah 0
648             # email@address 1
649             # blah blah 1
650             # email@address 2
651             # blah blah 2
652             #
653              
654             # we'll break it up accordingly, and first try to detect a reason for email 1
655             # in section 1; if there's no reason returned, we'll look in section 0. and
656             # we'll keep going that way for each address.
657              
658 14 100       56 return unless $text;
659 13         527 my @split = split($EMAIL_ADDR_REGEX, $text);
660              
661 13         124 foreach my $i (0 .. $#split) {
662             # only interested in the odd numbered elements, which are the email
663             # addressess.
664 39 100       174 next if $i % 2 == 0;
665              
666 13         71 my $email = _cleanup_email($split[$i]);
667              
668 13 50       125 if ($split[$i-1] =~ /they are not accepting mail from/) {
669             # aol airmail sender block
670 0         0 next;
671             }
672              
673 13 100       67 if($split[$i-1] =~ /A message sent by/) {
674             # sender block
675 1         3 next;
676             }
677              
678 12         29 my $std_reason = "unknown";
679 12 50       68 $std_reason = _std_reason($split[$i+1]) if $#split > $i;
680 12 100       74 $std_reason = _std_reason($split[$i-1]) if $std_reason eq "unknown";
681              
682             # todo:
683             # if we can't figure out the reason, if we're in the delivery-status part,
684             # go back up into the text part and try extract_report() on that.
685              
686             next if (
687             exists $by_email{$email}
688             and $by_email{$email}->{std_reason}
689 12 50 66     88 ne "unknown" and $std_reason eq "unknown"
      33        
690             );
691              
692 12         45 my $reason = $split[$i-1];
693 12         50 $reason =~ s/(.*?). (Your mail to the following recipients could not be delivered)/$2/;
694              
695 12         81 $self->log("extracted a reason [$reason]");
696 12         146 $by_email{$email} = {
697             email => $email,
698             raw => join ("", @split[$i-1..$i+1]),
699             std_reason => $std_reason,
700             reason => $reason
701             };
702             }
703              
704 13         33 my @toreturn;
705              
706 13         54 foreach my $email (keys %by_email) {
707 11         139 my $report = Mail::DeliveryStatus::Report->new();
708 11         547 $report->modify(1);
709 11         174 $report->header_hashref($by_email{$email});
710 11         10684 push @toreturn, $report;
711             }
712              
713 13         80 return @toreturn;
714             }
715              
716             =head2 is_bounce
717              
718             if ($bounce->is_bounce) { ... }
719              
720             This method returns true if the bounce parser thought the message was a bounce,
721             and false otherwise.
722              
723             =cut
724              
725 129     129 1 7067 sub is_bounce { return shift->{is_bounce}; }
726              
727             =head2 reports
728              
729             Each $report returned by $bounce->reports() is basically a Mail::Header object
730             with a few modifications. It includes the email address bouncing, and the
731             reason for the bounce.
732              
733             Consider an RFC1892 error report of the form
734              
735             Reporting-MTA: dns; hydrant.pobox.com
736             Arrival-Date: Fri, 4 Oct 2002 16:49:32 -0400 (EDT)
737              
738             Final-Recipient: rfc822; bogus3@dumbo.pobox.com
739             Action: failed
740             Status: 5.0.0
741             Diagnostic-Code: X-Postfix; host dumbo.pobox.com[208.210.125.24] said: 550
742             : Nonexistent Mailbox
743              
744             Each "header" above is available through the usual get() mechanism.
745              
746             print $report->get('reporting_mta'); # 'some.host.com'
747             print $report->get('arrival-date'); # 'Fri, 4 Oct 2002 16:49:32 -0400 (EDT)'
748             print $report->get('final-recipient'); # 'rfc822; bogus3@dumbo.pobox.com'
749             print $report->get('action'); # "failed"
750             print $report->get('status'); # "5.0.0"
751             print $report->get('diagnostic-code'); # X-Postfix; ...
752              
753             # BounceParser also inserts a few interpretations of its own:
754             print $report->get('email'); # 'bogus3@dumbo.pobox.com'
755             print $report->get('std_reason'); # 'user_unknown'
756             print $report->get('reason'); # host [199.248.185.2] said: 550 5.1.1 unknown or illegal user: somebody@uss.com
757             print $report->get('host'); # dumbo.pobox.com
758             print $report->get('smtp_code'); # 550
759              
760             print $report->get('raw') || # the original unstructured text
761             $report->as_string; # the original structured text
762              
763             Probably the two most useful fields are "email" and "std_reason", the
764             standardized reason. At this time BounceParser returns the following
765             standardized reasons:
766              
767             user_unknown
768             over_quota
769             user_disabled
770             domain_error
771             spam
772             message_too_large
773             unknown
774             no_problemo
775              
776             The "spam" standard reason indicates that the message bounced because
777             the recipient considered it spam.
778              
779             (no_problemo will only appear if you set {report_non_bounces=>1})
780              
781             If the bounce message is not structured according to RFC1892,
782             BounceParser will still try to return as much information as it can;
783             in particular, you can count on "email" and "std_reason" to be
784             present.
785              
786             =cut
787              
788 204     204 1 69360 sub reports { return @{shift->{reports}} }
  204         701  
789              
790             =head2 addresses
791              
792             Returns a list of the addresses which appear to be bouncing. Each member of
793             the list is an email address string of the form 'foo@bar.com'.
794              
795             =cut
796              
797 78     78 1 22832 sub addresses { return map { $_->get("email") } shift->reports; }
  84         289  
798              
799             =head2 orig_message_id
800              
801             If possible, returns the message-id of the original message as a string.
802              
803             =cut
804              
805 5     5 1 643 sub orig_message_id { return shift->{orig_message_id}; }
806              
807             =head2 orig_message
808              
809             If the original message was included in the bounce, it'll be available here as
810             a message/rfc822 MIME entity.
811              
812             my $orig_message = $bounce->orig_message;
813              
814             =cut
815              
816 2     2 1 13 sub orig_message { return shift->{orig_message} }
817              
818             =head2 orig_header
819              
820             If only the original headers were returned in the text/rfc822-headers chunk,
821             they'll be available here as a Mail::Header entity.
822              
823             =cut
824              
825 1     1 1 8 sub orig_header { return shift->{orig_header} }
826              
827             =head2 orig_text
828              
829             If the bounce message was poorly structured, the above two methods won't return
830             anything --- instead, you get back a block of text that may or may not
831             approximate the original message. No guarantees. Good luck.
832              
833             =cut
834              
835 1     1 1 5 sub orig_text { return shift->{orig_text} }
836              
837             =head1 CAVEATS
838              
839             Bounce messages are generally meant to be read by humans, not computers. A
840             poorly formatted bounce message may fool BounceParser into spreading its net
841             too widely and returning email addresses that didn't actually bounce. Before
842             you do anything with the email addresses you get back, confirm that it makes
843             sense that they might be bouncing --- for example, it doesn't make sense for
844             the sender of the original message to show up in the addresses list, but it
845             could if the bounce message is sufficiently misformatted.
846              
847             Still, please report all bugs!
848              
849             =head1 FREE-FLOATING ANXIETY
850              
851             Some bizarre MTAs construct bounce messages using the original headers of the
852             original message. If your application relies on the assumption that all
853             Message-IDs are unique, you need to watch out for these MTAs and program
854             defensively; before doing anything with the Message-ID of a bounce message,
855             first confirm that you haven't already seen it; if you have, change it to
856             something else that you make up on the spot, such as
857             "".
858              
859             =head1 BUGS
860              
861             BounceParser assumes a sanely constructed bounce message. Input from the real
862             world may cause BounceParser to barf and die horribly when we violate one of
863             MIME::Entity's assumptions; this is why you should always call it inside an
864             eval { }.
865              
866             =head2 TODO
867              
868             Provide some translation of the SMTP and DSN error codes into English. Review
869             RFC1891 and RFC1893.
870              
871             =head1 KNOWN TO WORK WITH
872              
873             We understand bounce messages generated by the following MTAs / organizations:
874              
875             Postfix
876             Sendmail
877             Exim
878             AOL
879             Yahoo
880             Hotmail
881             AOL's AirMail sender-blocking
882             Microsoft Exchange*
883             Qmail*
884             Novell Groupwise*
885              
886             * Items marked with an asterisk currently may return incomplete information.
887              
888             =head1 SEE ALSO
889              
890             Used by http://listbox.com/ --- if you like BounceParser and you know it,
891             consider Listbox for your mailing list needs!
892              
893             SVN repository and email list information at:
894             http://emailproject.perl.org/
895              
896             RFC1892 and RFC1894
897              
898             =head1 RANDOM OBSERVATION
899              
900             Schwern's modules have the Alexandre Dumas property.
901              
902             =head1 AUTHOR
903              
904             Original author: Meng Weng Wong, Emengwong+bounceparser@pobox.comE
905              
906             Current maintainer: Ricardo SIGNES, Erjbs@cpan.orgE
907              
908             Massive contributions to the 1.5xx series were made by William Yardley and
909             Michael Stevens. Ricardo mostly just helped out and managed releases.
910              
911             =head1 COPYRIGHT AND LICENSE
912              
913             Copyright (C) 2003-2006, IC Group, Inc.
914             pobox.com permanent email forwarding with spam filtering
915             listbox.com mailing list services for announcements and discussion
916              
917             This library is free software; you can redistribute it and/or modify
918             it under the same terms as Perl itself.
919              
920             =head1 WITH A SHOUT OUT TO
921              
922             coraline, Fletch, TorgoX, mjd, a-mused, Masque, gbarr,
923             sungo, dngor, and all the other hoopy froods on #perl
924              
925             =cut
926              
927             sub _std_reason {
928 117     117   349 local $_ = shift;
929              
930 117 50       406 if (!defined $_) {
931 0         0 return "unknown";
932             }
933              
934 117 100       1160 if (/(?:domain|host|service)\s+(?:not\s+found|unknown|not\s+known)/i) {
935 1         5 return "domain_error"
936             }
937              
938 116 100       618 if (/sorry,\s+that\s+domain\s+isn't\s+in\s+my\s+list\s+of\s+allowed\s+rcpthosts/i) {
939 1         6 return "domain_error";
940             }
941              
942 115 100 33     3377 if (
      33        
      66        
      66        
      66        
      66        
      100        
      100        
      100        
      100        
943             /try.again.later/is or
944             /mailbox\b.*\bfull/ or
945             /storage/i or
946             /quota/i or
947             /\s552\s/ or
948             /\s#?5\.2\.2\s/ or # rfc 1893
949             /User\s+mailbox\s+exceeds\s+allowed\s+size/i or
950             /Mailbox\s+size\s+limit\s+exceeded/i or
951             /max\s+message\s+size\s+exceeded/i or
952             /Benutzer\s+hat\s+zuviele\s+Mails\s+auf\s+dem\s+Server/i or
953             /exceeded\s+its\s+disk\s+space\s+limit/i
954             ) {
955 6         29 return "over_quota";
956             }
957              
958 109         732 my $user_re =
959             qr'(?: mailbox | user | recipient | address (?: ee)?
960             | customer | account | e-?mail | ? )'ix;
961              
962 109 100 100     51061 if (
      100        
      66        
      66        
      33        
      33        
      66        
      66        
      100        
      100        
      66        
      100        
      66        
      66        
      66        
      66        
      66        
      33        
      66        
      66        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
      66        
      100        
      100        
      66        
      100        
      100        
      100        
      66        
      100        
      100        
      100        
963             /\s \(? \#? 5\.1\.[01] \)? \s/x or # rfc 1893
964             /$user_re\s+(?:\S+\s+)? (?:is\s+)? # Generic
965             (?: (?: un|not\s+) (?: known | recognized )
966             | [dw]oes\s?n[o']?t
967             (?: exist|found ) | disabled | expired ) /ix or
968             /no\s+(?:such)\s+?$user_re/i or # Gmail and other (mofified for bug #41874)
969             /unrouteable address/i or # bug #41874
970             /inactive user/i or # Outblaze
971             /unknown local part/i or # Exim(?)
972             /user\s+doesn't\s+have\s+a/i or # Yahoo!
973             /account\s+has\s+been\s+(?:disabled|suspended)/i or # Yahoo!
974             /$user_re\s+(?:suspended|discontinued)/i or # everyone.net / other?
975             /unknown\s+$user_re/i or # Generic
976             /$user_re\s+(?:is\s+)?(?:inactive|unavailable)/i or # Hotmail, others?
977             /(?:(?:in|not\s+a\s+)?valid|no such)\s$user_re/i or # Various
978             /$user_re\s+(?:was\s+)?not\s+found/i or # AOL, generic
979             /$user_re \s+ (?:is\s+)? (?:currently\s+)? # ATT, generic
980             (?:suspended|unavailable)/ix or
981             /address is administratively disabled/i or # Unknown
982             /no $user_re\s+(?:here\s+)?by that name/i or # Unknown
983             /? is invalid/i or # Unknown
984             /address.*not known here/i or # Unknown
985             /recipient\s+(?:address\s+)?rejected/i or # Cox, generic
986             /not\s+listed\s+in\s+Domino/i or # Domino
987             /account not activated/i or # usa.net
988             /not\s+our\s+customer/i or # Comcast
989             /doesn't handle mail for that user/i or # mailfoundry
990             /$user_re\s+does\s+not\s+exist/i or
991             /Recipient\s+?\s+does\s+not\s+exist/i or
992             /recipient\s+no\s+longer\s+on\s+server/i or # me.com
993             /is\s+not\s+a\s+known\s+user\s+on\s+this\s+system/i or # cam.ac.uk
994             /Rcpt\s+?\s+does\s+not\s+exist/i or
995             /Mailbox\s+not\s+available/i or
996             /No\s+mailbox\s+found/i or
997             /?\s+is\s+a\s+deactivated\s+mailbox/i or
998             /Recipient\s+does\s+not\s+exist\s+on\s+this\s+system/i or
999             /user\s+mail-box\s+not\s+found/i or
1000             /No\s+mail\s+box\s+available\s+for\s+this\s+user/i or
1001             /User\s+\[\S+\]\s+does\s+not\s+exist/i or
1002             /email\s+account\s+that\s+you\s+tried\s+to\s+reach\s+is\s+disabled/i or
1003             /not\s+an\s+active\s+address\s+at\s+this\s+host/i or
1004             /not\s+a\s+known\s+user/i or
1005             /BAD_RECIPIENT/i
1006             ) {
1007 30         254 return "user_unknown";
1008             }
1009              
1010 79 100 33     2395 if (
      33        
      33        
      33        
      66        
      100        
      66        
      66        
      66        
      100        
      100        
1011             /domain\s+syntax/i or
1012             /timed\s+out/i or
1013             /route\s+to\s+host/i or
1014             /connection\s+refused/i or
1015             /no\s+data\s+record\s+of\s+requested\s+type/i or
1016             /Malformed name server reply/i or
1017             /as\s+a\s+relay,\s+but\s+I\s+have\s+not\s+been\s+configured\s+to\s+let/i or
1018             /550\s+relay\s+not\s+permitted/i or
1019             /550\s+relaying\s+denied/i or
1020             /Relay\s+access\s+denied/i or
1021             /Relaying\s+denied/i or
1022             /No\s+such\s+domain\s+at\s+this\s+location/i
1023             ) {
1024 8         95 return "domain_error";
1025             }
1026              
1027 71 100 100     4806 if (
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      66        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
      100        
1028             /Blocked\s+by\s+SpamAssassin/i or
1029             /spam\s+rejection/i or
1030             /identified\s+SPAM,\s+message\s+permanently\s+rejected/i or
1031             /Mail\s+appears\s+to\s+be\s+unsolicited/i or
1032             /Message\s+rejected\s+as\s+spam\s+by\s+Content\s+Filtering/i or
1033             /message\s+looks\s+like\s+SPAM\s+to\s+me/i or
1034             /NOT\s+JUNKEMAILFILTER/i or
1035             /your\s+message\s+has\s+triggered\s+a\s+SPAM\s+block/i or
1036             /Spam\s+detected/i or
1037             /Message\s+looks\s+like\s+spam/i or
1038             /Message\s+content\s+rejected,\s+UBE/i or
1039             /Blocked\s+using\s+spam\s+pattern/i or
1040             /Client\s+host\s+\S+\s+blocked\s+using/i or
1041             /breaches\s+local\s+URIBL\s+policy/i or
1042             /Your\s+email\s+had\s+spam-like\s+header\s+contents/i or
1043             /detected\s+as\s+spam/i or
1044             /Denied\s+due\s+to\s+spam\s+list/i or
1045             /appears\s+to\s+be\s+unsolicited/i or
1046             /antispam\s+checks/i or
1047             /Probable\s+Spam/i or
1048             /ESETS_SMTP\s+\(spam\)/i or
1049             /this\s+message\s+appears\s+to\s+be\s+spam/i or
1050             /Spam\s+score\s+\(\S+\)\s+too\s+high/i or
1051             /matches\s+a\s+profile\s+the\s+Internet\s+community\s+may\s+consider\s+spam/i or
1052             /accepted\s+due\s+to\s+spam\s+filter/i or
1053             /content\s+filter\s+rejection/i or
1054             /using\s+a\s+mass\s+mailer/i or
1055             /Spam\s+email/i or
1056             /Spam\s+content/i or
1057             (/CONTENT\s+REJECT/i and /dspam\s+check/i) or
1058             /this\s+email\s+is\s+spam/i or
1059             /rejected\s+as\s+spam/i or
1060             /MCSpamSignature/i or
1061             /identified\s+as\s+spam/i or
1062             /Spamming\s+not\s+allowed/i or
1063             /classified\s+as\s+spam/i or
1064             /Message\s+refused\s+by\s+MailMarshal\s+SpamProfiler/i or
1065             /Your\s+email\s+appears\s+similar\s+to\s+spam/i or
1066             /This\s+message\s+scored\s+\S+\s+spam\s+points\s+and\s+has\s+been\s+rejected/i or
1067             /Spam\s+Blocked/i or
1068             /bulk\s+e?mail/i or
1069             /probably\s+spam/i or
1070             /appears\s+to\s+be\s+SPAM/i or
1071             /SPAM NOT ACCEPTED/i or
1072             /5.9.8\s+spam/i
1073             ) {
1074 48         403 return "spam";
1075             }
1076              
1077 23 100 100     369 if (
      100        
      100        
      100        
1078             /RESOLVER.RST.RecipSizeLimit/i or
1079             /exceeds\s+size\s+limit/i or
1080             /Message\s+too\s+big/i or
1081             /RESOLVER.RST.SendSizeLimit/i or
1082             /Message\s+Rejected\s+Class=size/i
1083             ) {
1084 6         63 return "message_too_large";
1085             }
1086              
1087 17         177 return "unknown";
1088             }
1089              
1090             # ---------------------------------------------------------------------
1091             # preprocessors
1092             # ---------------------------------------------------------------------
1093              
1094             sub p_ims {
1095 132     132 0 284 my $self = shift;
1096 132         250 my $message = shift;
1097              
1098             # Mangle Exchange messages into a format we like better
1099             # see t/corpus/exchange.unknown.msg
1100              
1101             return
1102 132 100 100     418 unless ($message->head->get("X-Mailer")||'') =~ /Internet Mail Service/i;
1103              
1104 1 50       47 if ($message->is_multipart) {
1105             return unless my ($error_part)
1106 1 50       165 = grep { $_->effective_type eq "text/plain" } $message->parts;
  2         139  
1107              
1108 1 50       132 return unless my ($actual_error)
1109             = $error_part->as_string
1110             =~ /did not reach the following recipient\S+\s*(.*)/is;
1111              
1112 1 50       1515 if (my $io = $error_part->open("w")) {
1113 1         69 $io->print($actual_error);
1114 1         7 $io->close;
1115             }
1116              
1117             } else {
1118              
1119 0 0       0 return unless my ($actual_error)
1120             = $message->bodyhandle->as_string
1121             =~ /did not reach the following recipient\S+\s*(.*)/is;
1122              
1123 0         0 my ($stuff_before, $stuff_after)
1124             = split /^(?=Message-ID:|Received:)/m, $message->bodyhandle->as_string;
1125              
1126 0         0 $stuff_before =~ s/.*did not reach the following recipient.*?$//ism;
1127 0         0 $self->log("rewrote IMS into plain/report.");
1128 0         0 return $self->new_plain_report($message, $stuff_before, $stuff_after);
1129             }
1130              
1131 1         10 return $message;
1132             }
1133              
1134             sub p_aol_senderblock {
1135 132     132 0 313 my $self = shift;
1136 132         295 my $message = shift;
1137              
1138 132 100 100     440 return unless ($message->head->get("Mailer")||'') =~ /AirMail/i;
1139 2 50       98 return unless $message->effective_type eq "text/plain";
1140 2 50       219 return unless $message->bodyhandle->as_string =~ /Your mail to the following recipients could not be delivered because they are not accepting mail/i;
1141              
1142 2         79 my ($host) = $message->head->get("From") =~ /\@(\S+)>/;
1143              
1144 2         110 my $rejector;
1145             my @new_output;
1146 2         7 for (split /\n/, $message->bodyhandle->as_string) {
1147              
1148             # "Sorry luser@example.com. Your mail to the...
1149             # Get rid of this so that the module doesn't create a report for
1150             # *your* address.
1151 6         44 s/Sorry \S+?@\S+?\.//g;
1152              
1153 6 50       21 if (/because they are not accepting mail from (\S+?):?/i) {
1154 0         0 $rejector = $1;
1155 0         0 push @new_output, $_;
1156 0         0 next;
1157             }
1158 6 100       25 if (/^\s*(\S+)\s*$/) {
1159 2         7 my $recipient = $1;
1160 2 50       10 if ($recipient =~ /\@/) {
1161 0         0 push @new_output, $_;
1162 0         0 next;
1163             }
1164 2         18 s/^(\s*)(\S+)(\s*)$/$1$2\@$host$3/;
1165 2         6 push @new_output, $_;
1166 2         6 next;
1167             }
1168 4         8 push @new_output, $_;
1169 4         10 next;
1170             }
1171              
1172 2         8 push @new_output, ("# rewritten by BounceParser: p_aol_senderblock()", "");
1173 2 50       9 if (my $io = $message->open("w")) {
1174 2         183 $io->print(join "\n", @new_output);
1175 2         24 $io->close;
1176             }
1177 2         23 return $message;
1178             }
1179              
1180             sub p_novell_groupwise {
1181              
1182             # renamed from p_novell_groupwise_5_2 - hopefully we can deal with most / all
1183             # versions and create test cases / fixes when we can't
1184             #
1185             # See t/various-unknown.t and t/corpus/novell-*.msg for some recent examples.
1186              
1187 132     132 0 278 my $self = shift;
1188 132         284 my $message = shift;
1189              
1190 132 100 100     352 return unless ($message->head->get("X-Mailer")||'') =~ /Novell Groupwise/i;
1191 1 50       55 return unless $message->effective_type eq "multipart/mixed";
1192             return unless my ($error_part)
1193 1 50       143 = grep { $_->effective_type eq "text/plain" } $message->parts;
  1         11  
1194              
1195 1         126 my ($host) = $message->head->get("From") =~ /\@(\S+)>?/;
1196              
1197             # A lot of times, Novell returns just the LHS; this makes it difficult /
1198             # impossible in many cases to guess the recipient address. MBP makes an
1199             # attempt here.
1200 1         38 my @new_output;
1201 1         4 for (split /\n/, $error_part->bodyhandle->as_string) {
1202 3 100       24 if (/^(\s*)(\S+)(\s+\(.*\))$/) {
1203 1         5 my ($space, $recipient, $reason) = ($1, $2, $3);
1204 1 50       4 if ($recipient =~ /\@/) {
1205 1         3 push @new_output, $_;
1206 1         3 next;
1207             }
1208 0         0 $_ = join "", $space, "$2\@$host", $reason;
1209 0         0 push @new_output, $_; next;
  0         0  
1210             }
1211 2         5 push @new_output, $_; next;
  2         3  
1212             }
1213              
1214 1         4 push @new_output,
1215             ("# rewritten by BounceParser: p_novell_groupwise()", "");
1216              
1217 1 50       4 if (my $io = $error_part->open("w")) {
1218 1         70 $io->print(join "\n", @new_output);
1219 1         8 $io->close;
1220             }
1221 1         11 return $message;
1222             }
1223              
1224             sub p_plain_smtp_transcript {
1225 132     132 0 399 my ($self, $message) = (shift, shift);
1226              
1227             # sometimes, we have a proper smtp transcript;
1228             # that means we have enough information to mark the message up into a proper
1229             # multipart/report!
1230             #
1231             # pennwomen-la@v2.listbox.com/200209/19/1032468752.1444_1.frodo
1232             # The original message was received at Thu, 19 Sep 2002 13:51:36 -0700 (MST)
1233             # from daemon@localhost
1234             #
1235             # ----- The following addresses had permanent fatal errors -----
1236             #
1237             # (expanded from: )
1238             #
1239             # ----- Transcript of session follows -----
1240             # ... while talking to smtp-local.primenet.com.:
1241             # >>> RCPT To:
1242             # <<< 550 ... User unknown
1243             # 550 ... User unknown
1244             # ----- Message header follows -----
1245             #
1246             # what we'll do is mark it back up into a proper multipart/report.
1247              
1248 132 100       404 return unless $message->effective_type eq "text/plain";
1249              
1250 16 100       1883 return unless $message->bodyhandle->as_string
1251             =~ /The following addresses had permanent fatal errors/;
1252              
1253 2 50       23 return unless $message->bodyhandle->as_string
1254             =~ /Transcript of session follows/;
1255              
1256 2 50       22 return unless $message->bodyhandle->as_string =~ /Message .* follows/;
1257              
1258 2         28 my ($stuff_before, $stuff_after)
1259             = split /^.*Message (?:header|body) follows.*$/im,
1260             $message->bodyhandle->as_string, 2;
1261              
1262 2         47 my %by_email = $self->_analyze_smtp_transcripts($stuff_before);
1263              
1264 2         7 my @paras = _construct_delivery_status_paras(\%by_email);
1265              
1266 2         5 my @new_output;
1267 2         6 my ($reporting_mta) = _cleanup_email($message->head->get("From")) =~ /\@(\S+)/;
1268              
1269 2         6 chomp (my $arrival_date = $message->head->get("Date"));
1270              
1271 2 50       82 push @new_output, "Reporting-MTA: $reporting_mta" if $reporting_mta;
1272 2 50       10 push @new_output, "Arrival-Date: $arrival_date" if $arrival_date;
1273 2         5 push @new_output, "";
1274 2         4 push @new_output, map { @$_, "" } @paras;
  2         7  
1275              
1276 2         18 return $self->new_multipart_report(
1277             $message,
1278             $stuff_before,
1279             join("\n", @new_output),
1280             $stuff_after
1281             );
1282             }
1283              
1284             sub _construct_delivery_status_paras {
1285 2     2   5 my %by_email = %{shift()};
  2         5  
1286              
1287 2         5 my @new_output;
1288              
1289 2         9 foreach my $email (sort keys %by_email) {
1290             # Final-Recipient: RFC822; robinbw@aol.com
1291             # Action: failed
1292             # Status: 2.0.0
1293             # Remote-MTA: DNS; air-xj03.mail.aol.com
1294             # Diagnostic-Code: SMTP; 250 OK
1295             # Last-Attempt-Date: Thu, 19 Sep 2002 16:53:10 -0400 (EDT)
1296              
1297             push @new_output, [
1298             "Final-Recipient: RFC822; $email",
1299             "Action: failed",
1300             "Status: 5.0.0",
1301 2 50       14 ($by_email{$email}->{host} ? ("Remote-MTA: DNS; $by_email{$email}->{host}") : ()),
1302             _construct_diagnostic_code(\%by_email, $email),
1303             ];
1304              
1305             }
1306              
1307 2         8 return @new_output;
1308             }
1309              
1310             sub _construct_diagnostic_code {
1311 2     2   4 my %by_email = %{shift()};
  2         4  
1312 2         4 my $email = shift;
1313             join (" ",
1314             "Diagnostic-Code: X-BounceParser;",
1315             ($by_email{$email}->{'host'} ? "host $by_email{$email}->{'host'} said:" : ()),
1316             ($by_email{$email}->{'smtp_code'}),
1317 2 50       10 (join ", ", @{ $by_email{$email}->{'errors'} || [] }));
  2 50       12  
1318             }
1319              
1320             sub _analyze_smtp_transcripts {
1321 2     2   5 my $self = shift;
1322 2         3 my $plain_smtp_transcript = shift;
1323              
1324 2         4 my (%by_email, $email, $smtp_code, @error_strings, $host);
1325              
1326             # parse the text part for the actual SMTP transcript
1327 2         127 for (split /\n\n|(?=>>>)/, $plain_smtp_transcript) {
1328 6 50       19 $email = _cleanup_email($1) if /RCPT TO:\s*(\S+)/im;
1329              
1330 6 100       18 if (/The\s+following\s+addresses\s+had\s+permanent\s+fatal\s+errors\s+-----\s+\?/im) {
1331 2         5 $email = _cleanup_email($1);
1332             }
1333              
1334 6 100       34 $by_email{$email}->{host} = $host if $email;
1335              
1336 6 100       17 if (/while talking to (\S+)/im) {
1337 2         24 $host = $1;
1338 2         10 $host =~ s/[.:;]+$//g;
1339             }
1340              
1341 6 100       20 if (/<<< (\d\d\d) (.*)/m) {
1342 2         11 $by_email{$email}->{smtp_code} = $1;
1343 2         3 push @{$by_email{$email}->{errors}}, $2;
  2         8  
1344             }
1345              
1346 6 50       22 if (/^(\d\d\d)\b.*(<\S+\@\S+>)\.*\s+(.+)/m) {
1347 0         0 $email = _cleanup_email($2);
1348 0         0 $by_email{$email}->{smtp_code} = $1;
1349 0         0 push @{$by_email{$email}->{errors}}, $3;
  0         0  
1350             }
1351             }
1352 2         4 delete $by_email{''};
1353 2         10 return %by_email;
1354             }
1355              
1356             # ------------------------------------------------------------
1357              
1358             sub new_plain_report {
1359 0     0 0 0 my ($self, $message, $error_text, $orig_message) = @_;
1360              
1361 0         0 $orig_message =~ s/^\s+//;
1362              
1363 0         0 my $newmessage = $message->dup();
1364 0         0 $newmessage->make_multipart("plain-report");
1365 0         0 $newmessage->parts([]);
1366 0         0 $newmessage->attach(Type => "text/plain", Data => $error_text);
1367              
1368 0         0 my $orig_message_mime = MIME::Entity->build(Type => "multipart/transitory");
1369              
1370 0         0 $orig_message_mime->add_part($self->{parser}->parse_data($orig_message));
1371              
1372 0         0 $orig_message_mime->head->mime_attr("content-type" => "message/rfc822");
1373 0         0 $newmessage->add_part($orig_message_mime);
1374              
1375 0         0 $self->log("created new plain-report message.");
1376              
1377 0         0 return $newmessage;
1378             }
1379              
1380             # ------------------------------------------------------------
1381              
1382             sub new_multipart_report {
1383 2     2 0 9 my ($self, $message, $error_text, $delivery_status, $orig_message) = @_;
1384              
1385 2         11 $orig_message =~ s/^\s+//;
1386              
1387 2         9 my $newmessage = $message->dup();
1388 2         809 $newmessage->make_multipart("report");
1389 2         4139 $newmessage->parts([]);
1390 2         36 $newmessage->attach(
1391             Type => "text/plain",
1392             Data => $error_text
1393             );
1394 2         2081 $newmessage->attach(
1395             Type => "message/delivery-status",
1396             Data => $delivery_status
1397             );
1398              
1399 2         1886 my $orig_message_mime
1400             = MIME::Entity->build(Type => "multipart/transitory", Top => 0);
1401              
1402 2         1296 $orig_message_mime->add_part($self->{parser}->parse_data($orig_message));
1403              
1404 2         11273 $orig_message_mime->head->mime_attr("content-type" => "message/rfc822");
1405 2         509 $newmessage->add_part($orig_message_mime);
1406              
1407 2         22 $self->log("created new multipart-report message.");
1408              
1409 2         13 return $newmessage;
1410             }
1411              
1412             # ------------------------------------------------------------
1413              
1414             sub _cleanup_email {
1415 130     130   338 my $email = shift;
1416 130         366 for ($email) {
1417 130         338 chomp;
1418             # Get rid of parens around addresses like (luser@example.com)
1419             # Got rid of earlier /\(.*\)/ - not sure what that was about - wby
1420 130         473 tr/[()]//d;
1421 130         392 s/^To:\s*//i;
1422 130         439 s/[.:;]+$//;
1423 130         493 s/<(.+)>/$1/;
1424             # IMS hack: c=US;a= ;p=NDC;o=ORANGE;dda:SMTP=slpark@msx.ndc.mc.uci.edu; on
1425             # Thu, 19 Sep...
1426 130         729 s/.*:SMTP=//;
1427 130         343 s/^\s+//;
1428 130         362 s/\s+$//;
1429             # hack to get rid of stuff like "luser@example.com...User"
1430 130         324 s/\.{3}\S+//;
1431             # SMTP:foo@example.com
1432 130         357 s/^SMTP://;
1433             }
1434 130         421 return $email;
1435             }
1436              
1437             sub p_xdelivery_status {
1438 132     132 0 385 my ($self, $message) = @_;
1439              
1440             # This seems to be caused by something called "XWall v3.31", which
1441             # (according to Google) is a "firewall that protects your Exchange
1442             # server from viruses, spam mail and dangerous attachments". Shame it
1443             # doesn't protect the rest of the world from gratuitously broken MIME
1444             # types.
1445              
1446 132         513 for ($message->parts_DFS) {
1447 698 100       80605 $_->effective_type('message/delivery-status')
1448             if $_->effective_type eq 'message/xdelivery-status';
1449             }
1450             }
1451              
1452             sub _first_non_multi_part {
1453 132     132   307 my ($entity) = @_;
1454              
1455 132         250 my $part = $entity;
1456 132   100     448 $part = $part->parts(0) or return while $part->is_multipart;
1457 131         39494 return $part;
1458             }
1459              
1460             sub _position_before {
1461 1     1   6 my ($pos_a, $pos_b) = @_;
1462 1 50 33     15 return 1 if defined($pos_a) && (!defined($pos_b) || $pos_a < $pos_b);
      33        
1463 0         0 return;
1464             }
1465              
1466             # Return the position in $string at which $regex first matches, or undef if
1467             # no match.
1468             sub _match_position {
1469 15     15   54 my ($string, $regex) = @_;
1470 15 100       2734 return $string =~ $regex ? $-[0] : undef;
1471             }
1472              
1473             1;