File Coverage

lib/Sisimai/Data.pm
Criterion Covered Total %
statement 198 202 98.0
branch 104 132 78.7
condition 71 100 71.0
subroutine 15 15 100.0
pod 2 4 50.0
total 390 453 86.0


line stmt bran cond sub pod time code
1             package Sisimai::Data;
2 76     76   206822 use feature ':5.10';
  76         247  
  76         6122  
3 76     76   425 use strict;
  76         138  
  76         1319  
4 76     76   296 use warnings;
  76         107  
  76         1762  
5 76     76   6306 use Sisimai::Address;
  76         134  
  76         1472  
6 76     76   6096 use Sisimai::String;
  76         151  
  76         1997  
7 76     76   28475 use Sisimai::Reason;
  76         157  
  76         2042  
8 76     76   20989 use Sisimai::Rhost;
  76         155  
  76         1900  
9 76     76   22914 use Sisimai::Time;
  76         210  
  76         658  
10 76     76   35890 use Sisimai::DateTime;
  76         227  
  76         2323  
11 76     76   27211 use Sisimai::SMTP::Error;
  76         175  
  76         4099  
12             use Class::Accessor::Lite (
13 76         903 'new' => 0,
14             'rw' => [
15             'catch', # [?] Results generated by hook method
16             'token', # [String] Message token/MD5 Hex digest value
17             'lhost', # [String] local host name/Local MTA
18             'rhost', # [String] Remote host name/Remote MTA
19             'alias', # [String] Alias of the recipient address
20             'listid', # [String] List-Id header of each ML
21             'reason', # [String] Bounce reason
22             'action', # [String] The value of Action: header
23             'origin', # [String] Email path as a data source
24             'subject', # [String] UTF-8 Subject text
25             'timestamp', # [Sisimai::Time] Date: header in the original message
26             'addresser', # [Sisimai::Address] From address
27             'recipient', # [Sisimai::Address] Recipient address which bounced
28             'messageid', # [String] Message-Id: header
29             'replycode', # [String] SMTP Reply Code
30             'smtpagent', # [String] Module(Engine) name
31             'softbounce', # [Integer] 1 = Soft bounce, 0 = Hard bounce, -1 = ?
32             'smtpcommand', # [String] The last SMTP command
33             'destination', # [String] The domain part of the "recipinet"
34             'senderdomain', # [String] The domain part of the "addresser"
35             'feedbacktype', # [String] Feedback Type
36             'diagnosticcode', # [String] Diagnostic-Code: Header
37             'diagnostictype', # [String] The 1st part of Diagnostic-Code: Header
38             'deliverystatus', # [String] Delivery Status(DSN)
39             'timezoneoffset', # [Integer] Time zone offset(seconds)
40             ]
41 76     76   418 );
  76         133  
42              
43             sub new {
44             # Constructor of Sisimai::Data
45             # @param [Hash] argvs Data
46             # @return [Sisimai::Data] Structured email data
47 2933     2933 0 4257 my $class = shift;
48 2933         27251 my $argvs = { @_ };
49              
50             # Create email address object
51 2933         10520 my $as = Sisimai::Address->make($argvs->{'addresser'});
52 2933         9242 my $ar = Sisimai::Address->make({ 'address' => $argvs->{'recipient'} });
53 2933 100       8826 return undef unless ref $as eq 'Sisimai::Address';
54 2932 50       5309 return undef unless ref $ar eq 'Sisimai::Address';
55              
56             my $thing = {
57             'addresser' => $as,
58             'recipient' => $ar,
59             'senderdomain' => $as->host,
60             'destination' => $ar->host,
61             'alias' => $argvs->{'alias'} || $ar->alias,
62 2932   66     7858 'token' => Sisimai::String->token($as->address, $ar->address, $argvs->{'timestamp'}),
63             };
64              
65             # Create Sisimai::Time object
66 2932         10862 $thing->{'timestamp'} = Sisimai::Time->new($argvs->{'timestamp'});
67 2932   50     234723 $thing->{'timezoneoffset'} = $argvs->{'timezoneoffset'} // '+0000';
68              
69             # Callback method
70 2932   100     11267 $thing->{'catch'} = $argvs->{'catch'} // undef;
71              
72 2932         12421 my @v1 = (qw|
73             listid subject messageid smtpagent diagnosticcode diagnostictype deliverystatus
74             reason lhost rhost smtpcommand feedbacktype action softbounce replycode origin
75             |);
76 2932   100     56155 $thing->{ $_ } = $argvs->{ $_ } // '' for @v1;
77 2932   50     9458 $thing->{'replycode'} ||= Sisimai::SMTP::Reply->find($argvs->{'diagnosticcode'}) || '';
      66        
78              
79 2932         16771 return bless($thing, __PACKAGE__);
80             }
81              
82             sub make {
83             # Another constructor of Sisimai::Data
84             # @param [Hash] argvs
85             # @option argvs [Sisimai::Message] data Data Object
86             # @option argvs [Integeer] delivered Include "delivered" status or not
87             # @return [Array, Undef] List of Sisimai::Data or Undef if the
88             # argument is not Sisimai::Message object
89 2779     2779 1 4178794 my $class = shift;
90 2779         6709 my $argvs = { @_ };
91              
92 2779 100       6824 return undef unless exists $argvs->{'data'};
93 2778 50       7307 return undef unless ref $argvs->{'data'} eq 'Sisimai::Message';
94 2778 50       7523 return undef unless $argvs->{'data'}->ds;
95 2778 50       13311 return undef unless $argvs->{'data'}->rfc822;
96              
97 2778         11094 state $retryindex = Sisimai::Reason->retry;
98 2778         3665 state $rfc822head = Sisimai::RFC5322->HEADERFIELDS('all');
99              
100 2778   100     7858 my $delivered1 = $argvs->{'delivered'} // 0;
101 2778         4355 my $messageobj = $argvs->{'data'};
102 2778         4780 my $rfc822data = $messageobj->rfc822;
103 2778         9042 my $objectlist = [];
104              
105 2778         3564 LOOP_DELIVERY_STATUS: for my $e ( @{ $messageobj->ds } ) {
  2778         4508  
106             # Create parameters for new() constructor.
107             my $p = {
108             'catch' => $messageobj->catch // undef,
109             'lhost' => $e->{'lhost'} // '',
110             'rhost' => $e->{'rhost'} // '',
111             'alias' => $e->{'alias'} // '',
112             'action' => $e->{'action'} // '',
113             'reason' => $e->{'reason'} // '',
114             'replycode' => $e->{'replycode'} // '',
115             'smtpagent' => $e->{'agent'} // '',
116             'recipient' => $e->{'recipient'} // '',
117             'softbounce' => $e->{'softbounce'} // '',
118             'smtpcommand' => $e->{'command'} // '',
119             'feedbacktype' => $e->{'feedbacktype'} // '',
120             'diagnosticcode' => $e->{'diagnosis'} // '',
121             'diagnostictype' => $e->{'spec'} // '',
122 2944   100     11985 'deliverystatus' => $e->{'status'} // '',
      100        
      100        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
123             };
124 2944 100       65895 unless( $delivered1 ) {
125             # Skip if the value of "deliverystatus" begins with "2." such as 2.1.5
126 2431 100       6305 next if index($p->{'deliverystatus'}, '2.') == 0;
127             }
128              
129             EMAIL_ADDRESS: {
130             # Detect email address from message/rfc822 part
131 2932         3304 for my $f ( @{ $rfc822head->{'addresser'} } ) {
  2932         3126  
  2932         6356  
132             # Check each header in message/rfc822 part
133 4392         6375 my $h = lc $f;
134 4392 100       7559 next unless exists $rfc822data->{ $h };
135 2737 50       5314 next unless $rfc822data->{ $h };
136              
137 2737   50     12954 my $j = Sisimai::Address->find($rfc822data->{ $h }) || [];
138 2737 50       5701 next unless scalar @$j;
139 2737         8275 $p->{'addresser'} = $j->[0];
140 2737         4772 last;
141             }
142              
143 2932 100       6539 unless( $p->{'addresser'} ) {
144             # Fallback: Get the sender address from the header of the bounced
145             # email if the address is not set at loop above.
146 195   50     977 my $j = Sisimai::Address->find($messageobj->{'header'}->{'to'}) || [];
147 195 50       1129 $p->{'addresser'} = $j->[0] if scalar @$j;
148             }
149             }
150 2932 50       5447 next unless $p->{'addresser'};
151 2932 50       5468 next unless $p->{'recipient'};
152              
153             TIMESTAMP: {
154             # Convert from a time stamp or a date string to a machine time.
155 2932         3626 my $datestring = undef;
  2932         4292  
156 2932         3564 my $zoneoffset = 0;
157 2932 100       2948 my @datevalues; push @datevalues, $e->{'date'} if $e->{'date'};
  2932         6056  
158              
159             # Date information did not exist in message/delivery-status part,...
160 2932         3505 for my $f ( @{ $rfc822head->{'date'} } ) {
  2932         5355  
161             # Get the value of Date header or other date related header.
162 11728 100       18244 next unless $rfc822data->{ $f };
163 2552         3886 push @datevalues, $rfc822data->{ $f };
164             }
165              
166             # Set "date" getting from the value of "Date" in the bounce message
167 2932 100       6853 push @datevalues, $messageobj->{'header'}->{'date'} if scalar(@datevalues) < 2;
168              
169 2932         6295 while( my $v = shift @datevalues ) {
170             # Parse each date value in the array
171 2932         17170 $datestring = Sisimai::DateTime->parse($v);
172 2932 50       7191 last if $datestring;
173             }
174              
175 2932 50 33     17586 if( defined $datestring && $datestring =~ /\A(.+)[ ]+([-+]\d{4})\z/ ) {
176             # Get the value of timezone offset from $datestring
177             # Wed, 26 Feb 2014 06:05:48 -0500
178 2932         7067 $datestring = $1;
179 2932         11172 $zoneoffset = Sisimai::DateTime->tz2second($2);
180 2932         6733 $p->{'timezoneoffset'} = $2;
181             }
182              
183 2932         3989 eval {
184             # Convert from the date string to an object then calculate time
185             # zone offset.
186 2932         15856 my $t = Sisimai::Time->strptime($datestring, '%a, %d %b %Y %T');
187 2932   50     200987 $p->{'timestamp'} = ($t->epoch - $zoneoffset) // undef;
188             };
189             }
190 2932 50       32944 next unless defined $p->{'timestamp'};
191              
192             OTHER_TEXT_HEADERS: {
193             # Scan "Received:" header of the original message
194 2932   50     3354 my $recvheader = $argvs->{'data'}->{'header'}->{'received'} || [];
  2932         8810  
195 2932 100       6230 if( scalar @$recvheader ) {
196             # Get localhost and remote host name from Received header.
197 2784   100     6336 $e->{'lhost'} ||= shift @{ Sisimai::RFC5322->received($recvheader->[0]) };
  1034         3860  
198 2784   100     7101 $e->{'rhost'} ||= pop @{ Sisimai::RFC5322->received($recvheader->[-1]) };
  780         1926  
199             }
200              
201 2932         5135 for my $v ('rhost', 'lhost') {
202 5864         10203 $p->{ $v } =~ y/[]()//d; # Remove square brackets and curly brackets from the host variable
203 5864         10012 $p->{ $v } =~ s/\A.+=//; # Remove string before "="
204 5864 50       11658 chop $p->{ $v } if substr($p->{ $v }, -1, 1) eq "\r"; # Remove CR at the end of the value
205              
206             # Check space character in each value and get the first element
207 5864 100       10782 $p->{ $v } = (split(' ', $p->{ $v }, 2))[0] if rindex($p->{ $v }, ' ') > -1;
208 5864 100       10531 chop $p->{ $v } if substr($p->{ $v }, -1, 1) eq '.'; # Remove "." at the end of the value
209             }
210              
211             # Subject: header of the original message
212 2932   50     7639 $p->{'subject'} = $rfc822data->{'subject'} // '';
213 2932 50       5804 chop $p->{'subject'} if substr($p->{'subject'}, -1, 1) eq "\r";
214              
215 2932 100 100     10846 if( $p->{'listid'} = $rfc822data->{'list-id'} // '' ) {
216             # Get the value of List-Id header: "List name "
217 32 100       243 $p->{'listid'} = $1 if $p->{'listid'} =~ /\A.*([<].+[>]).*\z/;
218 32         77 $p->{'listid'} =~ y/<>//d;
219 32 50       129 chop $p->{'listid'} if substr($p->{'listid'}, -1, 1) eq "\r";
220 32 50       93 $p->{'listid'} = '' if rindex($p->{'listid'}, ' ') > -1;
221             }
222              
223 2932 100 100     8886 if( $p->{'messageid'} = $rfc822data->{'message-id'} // '' ) {
224             # Leave only string inside of angle brackets(<>)
225 2512 100       5834 $p->{'messageid'} = $1 if $p->{'messageid'} =~ /\A([^ ]+)[ ].*/;
226 2512 100       16091 $p->{'messageid'} = $1 if $p->{'messageid'} =~ /[<]([^ ]+?)[>]/;
227             }
228              
229             CHECK_DELIVERY_STATUS_VALUE: {
230             # Cleanup the value of "Diagnostic-Code:" header
231 2932 50       4398 chop $p->{'diagnosticcode'} if substr($p->{'diagnosticcode'}, -1, 1) eq "\r";
  2932         6556  
232              
233 2932 100       5021 if( $p->{'diagnosticcode'} ) {
234             # Count the number of D.S.N. and SMTP Reply Code
235 2880         15280 my $vs = Sisimai::SMTP::Status->find($p->{'diagnosticcode'});
236 2880         12201 my $vr = Sisimai::SMTP::Reply->find($p->{'diagnosticcode'});
237 2880         3999 my $vm = 0;
238              
239 2880 100       4694 if( $vs ) {
240             # How many times does the D.S.N. appeared
241 1388         19364 $vm += 1 while $p->{'diagnosticcode'} =~ /\b\Q$vs\E\b/g;
242 1388 100       5454 $p->{'deliverystatus'} = $vs if $vs =~ /\A[45][.][1-9][.][1-9]\z/;
243             }
244              
245 2880 100       5277 if( $vr ) {
246             # How many times does the SMTP reply code appeared
247 2040         20910 $vm += 1 while $p->{'diagnosticcode'} =~ /\b$vr\b/g;
248 2040   66     7110 $p->{'replycode'} ||= $vr;
249             }
250              
251 2880 100       5874 if( $vm > 2 ) {
252             # Build regular expression for removing string like '550-5.1.1'
253             # from the value of "diagnosticcode"
254 181         2709 my $re = qr/[ ]$vr[- ](?:\Q$vs\E)?/;
255              
256             # 550-5.7.1 [192.0.2.222] Our system has detected that this message is
257             # 550-5.7.1 likely unsolicited mail. To reduce the amount of spam sent to Gmail,
258             # 550-5.7.1 this message has been blocked. Please visit
259             # 550 5.7.1 https://support.google.com/mail/answer/188131 for more information.
260 181         1820 $p->{'diagnosticcode'} =~ s/$re/ /g;
261 181         673 $p->{'diagnosticcode'} =~ s|.+||i;
262 181         668 $p->{'diagnosticcode'} = Sisimai::String->sweep($p->{'diagnosticcode'});
263             }
264             }
265 2932 50 0     6385 $p->{'diagnostictype'} ||= 'X-UNIX' if $p->{'reason'} eq 'mailererror';
266 2932 100 100     10415 $p->{'diagnostictype'} ||= 'SMTP' unless $p->{'reason'} =~ /\A(?:feedback|vacation)\z/;
267             }
268              
269             # Check the value of SMTP command
270 2932 100       8712 $p->{'smtpcommand'} = '' unless $p->{'smtpcommand'} =~ /\A(?:EHLO|HELO|MAIL|RCPT|DATA|QUIT)\z/;
271 2932         5066 $p->{'origin'} = $argvs->{'origin'}; # Set the path to the original email
272              
273             # Check "Action:" field
274 2932 100       6579 next if length $p->{'action'};
275 1332 100 100     7472 if( $p->{'reason'} eq 'expired' ) {
    100          
276             # Action: delayed
277 80         202 $p->{'action'} = 'delayed';
278              
279             } elsif( index($p->{'deliverystatus'}, '5') == 0 || index($p->{'deliverystatus'}, '4') == 0 ) {
280             # Action: failed
281 590         1131 $p->{'action'} = 'failed';
282             }
283             }
284 2932 50       18560 next unless my $o = __PACKAGE__->new(%$p);
285              
286 2932 100 100     11532 if( $o->reason eq '' || exists $retryindex->{ $o->reason } ) {
287             # Decide the reason of email bounce
288 2425 100       14572 my $r; $r = Sisimai::Rhost->get($o) if Sisimai::Rhost->match($o->rhost);
  2425         5582  
289 2425 100 100     7299 $r ||= Sisimai::Rhost->get($o, $o->destination) if Sisimai::Rhost->match($o->destination);
290 2425   66     15380 $r ||= Sisimai::Reason->get($o);
291 2425   50     5285 $r ||= 'undefined';
292 2425         4868 $o->reason($r);
293             }
294              
295 2932 100 100     18542 if( $o->reason eq 'delivered' || $o->reason eq 'feedback' || $o->reason eq 'vacation' ) {
      100        
296             # The value of reason is "delivered", "vacation" or "feedback".
297 146         1428 $o->softbounce(-1);
298 146 100       772 $o->replycode('') unless $o->reason eq 'delivered';
299              
300             } else {
301             # Bounce message which reason is "feedback" or "vacation" does
302             # not have the value of "deliverystatus".
303 2786 50       32496 unless( length $o->softbounce ) {
304             # Set the value of softbounce
305 2786         15211 my $textasargv = $p->{'deliverystatus'}.' '.$p->{'diagnosticcode'};
306 2786 100       8016 substr($textasargv, 0, 1, '') if substr($textasargv, 0, 1) eq ' ';
307 2786         4979 my $softorhard = Sisimai::SMTP::Error->soft_or_hard($o->reason, $textasargv);
308              
309 2786 50       4718 if( $softorhard ) {
310             # Returned value is "soft" or "hard"
311 2786 100       8236 $o->softbounce($softorhard eq 'soft' ? 1 : 0);
312              
313             } else {
314             # Returned value is an empty string or undef
315 0         0 $o->softbounce(-1);
316             }
317             }
318              
319 2786 100       14515 unless( $o->deliverystatus ) {
320             # Set pseudo status code
321 611         2892 my $textasargv = $o->replycode.' '.$p->{'diagnosticcode'};
322 611 100       3982 substr($textasargv, 0, 1, '') if substr($textasargv, 0, 1) eq ' ';
323              
324 611         1573 my $getchecked = Sisimai::SMTP::Error->is_permanent($textasargv);
325 611 100       1422 my $tmpfailure = defined $getchecked ? ( $getchecked == 1 ? 0 : 1 ) : 0;
    100          
326              
327 611 50       1348 if( my $pseudocode = Sisimai::SMTP::Status->code($o->reason, $tmpfailure) ) {
328             # Set the value of "deliverystatus" and "softbounce".
329 611         1323 $o->deliverystatus($pseudocode);
330 611 50       2919 if( $o->softbounce == -1 ) {
331             # Set the value of "softbounce" again when the value is -1
332 0 0       0 if( my $softorhard = Sisimai::SMTP::Error->soft_or_hard($o->reason, $pseudocode) ) {
333             # Returned value is "soft" or "hard"
334 0 0       0 $o->softbounce($softorhard eq 'soft' ? 1 : 0);
335              
336             } else {
337             # Returned value is an empty string or undef
338 0         0 $o->softbounce(-1);
339             }
340             }
341             }
342             }
343              
344 2786 100       13037 if( $o->replycode ) {
345             # Check both of the first digit of "deliverystatus" and "replycode"
346 2036         8285 my $d1 = substr($o->deliverystatus, 0, 1);
347 2036         7528 my $r1 = substr($o->replycode, 0, 1);
348 2036 100       7818 $o->replycode('') unless $d1 eq $r1;
349             }
350             }
351 2932         24335 push @$objectlist, $o;
352              
353             } # End of for(LOOP_DELIVERY_STATUS)
354              
355 2778         10554 return $objectlist;
356             }
357              
358             sub damn {
359             # Convert from object to hash reference
360             # @return [Hash] Data in Hash reference
361 1276     1276 1 1533218 my $self = shift;
362 1276         1727 my $data = undef;
363              
364 1276         2271 eval {
365 1276         1904 my $v = {};
366 1276         2072 state $stringdata = [qw|
367             token lhost rhost listid alias reason subject messageid smtpagent
368             smtpcommand destination diagnosticcode senderdomain deliverystatus
369             timezoneoffset feedbacktype diagnostictype action replycode catch
370             softbounce origin
371             |];
372              
373 1276         2454 for my $e ( @$stringdata ) {
374             # Copy string data
375 28072   100     122881 $v->{ $e } = $self->$e // '';
376             }
377 1276         6691 $v->{'addresser'} = $self->addresser->address;
378 1276         11206 $v->{'recipient'} = $self->recipient->address;
379 1276         8499 $v->{'timestamp'} = $self->timestamp->epoch;
380 1276         14797 $data = $v;
381             };
382 1276         19006 return $data;
383             }
384              
385             sub dump {
386             # Data dumper
387             # @param [String] type Data format: json, yaml
388             # @return [String, Undef] Dumped data or Undef if the value of first
389             # argument is neither "json" nor "yaml"
390 639     639 0 6879076 my $self = shift;
391 639   50     2436 my $type = shift || 'json';
392 639 50       3969 return undef unless $type =~ /\A(?:json|yaml)\z/;
393              
394 639         2151 my $referclass = 'Sisimai::Data::'.uc($type);
395 639         1416 my $modulepath = 'Sisimai/Data/'.uc($type).'.pm';
396              
397 639         4448 require $modulepath;
398 639         2451 return $referclass->dump($self);
399             }
400              
401             1;
402             __END__