File Coverage

lib/Sisimai/Data.pm
Criterion Covered Total %
statement 202 206 98.0
branch 104 132 78.7
condition 71 100 71.0
subroutine 15 15 100.0
pod 2 4 50.0
total 394 457 86.2


line stmt bran cond sub pod time code
1             package Sisimai::Data;
2 78     78   254791 use feature ':5.10';
  78         280  
  78         6277  
3 78     78   481 use strict;
  78         143  
  78         1445  
4 78     78   354 use warnings;
  78         141  
  78         1910  
5 78     78   7835 use Sisimai::Address;
  78         157  
  78         1726  
6 78     78   6914 use Sisimai::String;
  78         177  
  78         2196  
7 78     78   32353 use Sisimai::Reason;
  78         180  
  78         2349  
8 78     78   22386 use Sisimai::Rhost;
  78         200  
  78         2143  
9 78     78   25348 use Sisimai::Time;
  78         197  
  78         750  
10 78     78   38154 use Sisimai::DateTime;
  78         206  
  78         2447  
11 78     78   28079 use Sisimai::SMTP::Error;
  78         193  
  78         4688  
12             use Class::Accessor::Lite (
13 78         831 'new' => 0,
14             'rw' => [
15             'catch', # [?] Results generated by hook method
16             'token', # [String] Message token/MD5 Hex digest value
17             'lhost', # [String] local host name/Local MTA
18             'rhost', # [String] Remote host name/Remote MTA
19             'alias', # [String] Alias of the recipient address
20             'listid', # [String] List-Id header of each ML
21             'reason', # [String] Bounce reason
22             'action', # [String] The value of Action: header
23             'origin', # [String] Email path as a data source
24             'subject', # [String] UTF-8 Subject text
25             'timestamp', # [Sisimai::Time] Date: header in the original message
26             'addresser', # [Sisimai::Address] From address
27             'recipient', # [Sisimai::Address] Recipient address which bounced
28             'messageid', # [String] Message-Id: header
29             'replycode', # [String] SMTP Reply Code
30             'smtpagent', # [String] Module(Engine) name
31             'softbounce', # [Integer] 1 = Soft bounce, 0 = Hard bounce, -1 = ?
32             'smtpcommand', # [String] The last SMTP command
33             'destination', # [String] The domain part of the "recipinet"
34             'senderdomain', # [String] The domain part of the "addresser"
35             'feedbacktype', # [String] Feedback Type
36             'diagnosticcode', # [String] Diagnostic-Code: Header
37             'diagnostictype', # [String] The 1st part of Diagnostic-Code: Header
38             'deliverystatus', # [String] Delivery Status(DSN)
39             'timezoneoffset', # [Integer] Time zone offset(seconds)
40             ]
41 78     78   489 );
  78         132  
42              
43             sub new {
44             # Constructor of Sisimai::Data
45             # @param [Hash] argvs Data
46             # @return [Sisimai::Data] Structured email data
47 2963     2963 0 4694 my $class = shift;
48 2963         28681 my $argvs = { @_ };
49              
50             # Create email address object
51 2963         10571 my $as = Sisimai::Address->make($argvs->{'addresser'});
52 2963         11155 my $ar = Sisimai::Address->make({ 'address' => $argvs->{'recipient'} });
53 2963 100       8916 return undef unless ref $as eq 'Sisimai::Address';
54 2962 50       5903 return undef unless ref $ar eq 'Sisimai::Address';
55              
56             my $thing = {
57             'addresser' => $as,
58             'recipient' => $ar,
59             'senderdomain' => $as->host,
60             'destination' => $ar->host,
61             'alias' => $argvs->{'alias'} || $ar->alias,
62 2962   66     8806 'token' => Sisimai::String->token($as->address, $ar->address, $argvs->{'timestamp'}),
63             };
64              
65             # Create Sisimai::Time object
66 2962         12213 $thing->{'timestamp'} = Sisimai::Time->new($argvs->{'timestamp'});
67 2962   50     243974 $thing->{'timezoneoffset'} = $argvs->{'timezoneoffset'} // '+0000';
68              
69             # Callback method
70 2962   100     11721 $thing->{'catch'} = $argvs->{'catch'} // undef;
71              
72 2962         12973 my @v1 = (qw|
73             listid subject messageid smtpagent diagnosticcode diagnostictype deliverystatus
74             reason lhost rhost smtpcommand feedbacktype action softbounce replycode origin
75             |);
76 2962   100     65389 $thing->{ $_ } = $argvs->{ $_ } // '' for @v1;
77 2962   50     9611 $thing->{'replycode'} ||= Sisimai::SMTP::Reply->find($argvs->{'diagnosticcode'}) || '';
      66        
78              
79 2962         19365 return bless($thing, __PACKAGE__);
80             }
81              
82             sub make {
83             # Another constructor of Sisimai::Data
84             # @param [Hash] argvs
85             # @option argvs [Sisimai::Message] data Data Object
86             # @option argvs [Integeer] delivered Include "delivered" status or not
87             # @return [Array, Undef] List of Sisimai::Data or Undef if the
88             # argument is not Sisimai::Message object
89 2809     2809 1 4698836 my $class = shift;
90 2809         7205 my $argvs = { @_ };
91              
92 2809 100       7350 return undef unless exists $argvs->{'data'};
93 2808 50       7540 return undef unless ref $argvs->{'data'} eq 'Sisimai::Message';
94 2808 50       6542 return undef unless $argvs->{'data'}->ds;
95 2808 50       15079 return undef unless $argvs->{'data'}->rfc822;
96              
97 2808         11488 state $retryindex = Sisimai::Reason->retry;
98 2808         3567 state $rfc822head = Sisimai::RFC5322->HEADERFIELDS('all');
99              
100 2808   100     8435 my $delivered1 = $argvs->{'delivered'} // 0;
101 2808         4467 my $messageobj = $argvs->{'data'};
102 2808         5306 my $rfc822data = $messageobj->rfc822;
103 2808         9964 my $objectlist = [];
104              
105 2808         3966 LOOP_DELIVERY_STATUS: for my $e ( @{ $messageobj->ds } ) {
  2808         5129  
106             # Create parameters for new() constructor.
107             my $p = {
108             'catch' => $messageobj->catch // undef,
109             'lhost' => $e->{'lhost'} // '',
110             'rhost' => $e->{'rhost'} // '',
111             'alias' => $e->{'alias'} // '',
112             'action' => $e->{'action'} // '',
113             'reason' => $e->{'reason'} // '',
114             'replycode' => $e->{'replycode'} // '',
115             'smtpagent' => $e->{'agent'} // '',
116             'recipient' => $e->{'recipient'} // '',
117             'softbounce' => $e->{'softbounce'} // '',
118             'smtpcommand' => $e->{'command'} // '',
119             'feedbacktype' => $e->{'feedbacktype'} // '',
120             'diagnosticcode' => $e->{'diagnosis'} // '',
121             'diagnostictype' => $e->{'spec'} // '',
122 2974   100     13244 'deliverystatus' => $e->{'status'} // '',
      100        
      100        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
      50        
123             };
124 2974 100       67740 unless( $delivered1 ) {
125             # Skip if the value of "deliverystatus" begins with "2." such as 2.1.5
126 2461 100       6633 next if index($p->{'deliverystatus'}, '2.') == 0;
127             }
128              
129             EMAIL_ADDRESS: {
130             # Detect email address from message/rfc822 part
131 2962         3633 for my $f ( @{ $rfc822head->{'addresser'} } ) {
  2962         3758  
  2962         6390  
132             # Check each header in message/rfc822 part
133 4422         7339 my $h = lc $f;
134 4422 100       9450 next unless exists $rfc822data->{ $h };
135 2767 50       5730 next unless $rfc822data->{ $h };
136              
137 2767   50     13825 my $j = Sisimai::Address->find($rfc822data->{ $h }) || [];
138 2767 50       5125 next unless scalar @$j;
139 2767         9965 $p->{'addresser'} = $j->[0];
140 2767         5394 last;
141             }
142              
143 2962 100       6344 unless( $p->{'addresser'} ) {
144             # Fallback: Get the sender address from the header of the bounced
145             # email if the address is not set at loop above.
146 195   50     1238 my $j = Sisimai::Address->find($messageobj->{'header'}->{'to'}) || [];
147 195 50       1280 $p->{'addresser'} = $j->[0] if scalar @$j;
148             }
149             }
150 2962 50       5679 next unless $p->{'addresser'};
151 2962 50       5600 next unless $p->{'recipient'};
152              
153             TIMESTAMP: {
154             # Convert from a time stamp or a date string to a machine time.
155 2962         3784 my $datestring = undef;
  2962         4755  
156 2962         3827 my $zoneoffset = 0;
157 2962 100       3459 my @datevalues; push @datevalues, $e->{'date'} if $e->{'date'};
  2962         7414  
158              
159             # Date information did not exist in message/delivery-status part,...
160 2962         3544 for my $f ( @{ $rfc822head->{'date'} } ) {
  2962         6211  
161             # Get the value of Date header or other date related header.
162 11848 100       20548 next unless $rfc822data->{ $f };
163 2582         4917 push @datevalues, $rfc822data->{ $f };
164             }
165              
166             # Set "date" getting from the value of "Date" in the bounce message
167 2962 100       8148 push @datevalues, $messageobj->{'header'}->{'date'} if scalar(@datevalues) < 2;
168              
169 2962         6830 while( my $v = shift @datevalues ) {
170             # Parse each date value in the array
171 2962         15306 $datestring = Sisimai::DateTime->parse($v);
172 2962 50       8094 last if $datestring;
173             }
174              
175 2962 50 33     19523 if( defined $datestring && $datestring =~ /\A(.+)[ ]+([-+]\d{4})\z/ ) {
176             # Get the value of timezone offset from $datestring
177             # Wed, 26 Feb 2014 06:05:48 -0500
178 2962         7562 $datestring = $1;
179 2962         9729 $zoneoffset = Sisimai::DateTime->tz2second($2);
180 2962         7245 $p->{'timezoneoffset'} = $2;
181             }
182              
183 2962         4616 eval {
184             # Convert from the date string to an object then calculate time
185             # zone offset.
186 2962         15970 my $t = Sisimai::Time->strptime($datestring, '%a, %d %b %Y %T');
187 2962   50     206875 $p->{'timestamp'} = ($t->epoch - $zoneoffset) // undef;
188             };
189             }
190 2962 50       35550 next unless defined $p->{'timestamp'};
191              
192             OTHER_TEXT_HEADERS: {
193             # Scan "Received:" header of the original message
194 2962   50     4191 my $recvheader = $argvs->{'data'}->{'header'}->{'received'} || [];
  2962         8795  
195 2962 100       6599 if( scalar @$recvheader ) {
196             # Get localhost and remote host name from Received header.
197 2814   100     7182 $e->{'lhost'} ||= shift @{ Sisimai::RFC5322->received($recvheader->[0]) };
  1034         3810  
198 2814   100     6560 $e->{'rhost'} ||= pop @{ Sisimai::RFC5322->received($recvheader->[-1]) };
  780         2165  
199             }
200              
201 2962         5182 for my $v ('rhost', 'lhost') {
202 5924         10712 $p->{ $v } =~ y/[]()//d; # Remove square brackets and curly brackets from the host variable
203 5924         10422 $p->{ $v } =~ s/\A.+=//; # Remove string before "="
204 5924 50       13408 chop $p->{ $v } if substr($p->{ $v }, -1, 1) eq "\r"; # Remove CR at the end of the value
205              
206             # Check space character in each value and get the first element
207 5924 100       13421 $p->{ $v } = (split(' ', $p->{ $v }, 2))[0] if rindex($p->{ $v }, ' ') > -1;
208 5924 100       12236 chop $p->{ $v } if substr($p->{ $v }, -1, 1) eq '.'; # Remove "." at the end of the value
209             }
210              
211             # Subject: header of the original message
212 2962   50     8551 $p->{'subject'} = $rfc822data->{'subject'} // '';
213 2962 50       6746 chop $p->{'subject'} if substr($p->{'subject'}, -1, 1) eq "\r";
214              
215 2962 100 100     13270 if( $p->{'listid'} = $rfc822data->{'list-id'} // '' ) {
216             # Get the value of List-Id header: "List name "
217 32 100       251 $p->{'listid'} = $1 if $p->{'listid'} =~ /\A.*([<].+[>]).*\z/;
218 32         79 $p->{'listid'} =~ y/<>//d;
219 32 50       144 chop $p->{'listid'} if substr($p->{'listid'}, -1, 1) eq "\r";
220 32 50       129 $p->{'listid'} = '' if rindex($p->{'listid'}, ' ') > -1;
221             }
222              
223 2962 100 100     10624 if( $p->{'messageid'} = $rfc822data->{'message-id'} // '' ) {
224             # Leave only string inside of angle brackets(<>)
225 2542 100       7092 $p->{'messageid'} = $1 if $p->{'messageid'} =~ /\A([^ ]+)[ ].*/;
226 2542 100       16688 $p->{'messageid'} = $1 if $p->{'messageid'} =~ /[<]([^ ]+?)[>]/;
227             }
228              
229             CHECK_DELIVERY_STATUS_VALUE: {
230             # Cleanup the value of "Diagnostic-Code:" header
231 2962 50       4085 chop $p->{'diagnosticcode'} if substr($p->{'diagnosticcode'}, -1, 1) eq "\r";
  2962         6997  
232              
233 2962 100       6266 if( $p->{'diagnosticcode'} ) {
234             # Count the number of D.S.N. and SMTP Reply Code
235 2910         14663 my $vs = Sisimai::SMTP::Status->find($p->{'diagnosticcode'});
236 2910         12767 my $vr = Sisimai::SMTP::Reply->find($p->{'diagnosticcode'});
237 2910         4293 my $vm = 0;
238              
239 2910 100       5630 if( $vs ) {
240             # How many times does the D.S.N. appeared
241 1388         22510 $vm += 1 while $p->{'diagnosticcode'} =~ /\b\Q$vs\E\b/g;
242 1388 100       6435 $p->{'deliverystatus'} = $vs if $vs =~ /\A[45][.][1-9][.][1-9]\z/;
243             }
244              
245 2910 100       5155 if( $vr ) {
246             # How many times does the SMTP reply code appeared
247 2070         22451 $vm += 1 while $p->{'diagnosticcode'} =~ /\b$vr\b/g;
248 2070   66     8090 $p->{'replycode'} ||= $vr;
249             }
250              
251 2910 100       6815 if( $vm > 2 ) {
252             # Build regular expression for removing string like '550-5.1.1'
253             # from the value of "diagnosticcode"
254 181         3746 my $re0 = qr/;?[ ]$vr[- ](?:\Q$vs\E)?/;
255 181         2796 my $re1 = qr/;?[ ][45]\d\d[- ](?:\Q$vs\E)?/;
256 181         2759 my $re2 = qr/;?[ ]$vr[- ](?:[45][.]\d[.]\d+)?/;
257              
258             # 550-5.7.1 [192.0.2.222] Our system has detected that this message is
259             # 550-5.7.1 likely unsolicited mail. To reduce the amount of spam sent to Gmail,
260             # 550-5.7.1 this message has been blocked. Please visit
261             # 550 5.7.1 https://support.google.com/mail/answer/188131 for more information.
262 181         2534 $p->{'diagnosticcode'} =~ s/$re0/ /g;
263 181         2580 $p->{'diagnosticcode'} =~ s/$re1/ /g;
264 181         906 $p->{'diagnosticcode'} =~ s/$re2/ /g;
265 181         581 $p->{'diagnosticcode'} =~ s|.+||i;
266 181         773 $p->{'diagnosticcode'} = Sisimai::String->sweep($p->{'diagnosticcode'});
267             }
268             }
269 2962 50 0     6221 $p->{'diagnostictype'} ||= 'X-UNIX' if $p->{'reason'} eq 'mailererror';
270 2962 100 100     11531 $p->{'diagnostictype'} ||= 'SMTP' unless $p->{'reason'} =~ /\A(?:feedback|vacation)\z/;
271             }
272              
273             # Check the value of SMTP command
274 2962 100       10631 $p->{'smtpcommand'} = '' unless $p->{'smtpcommand'} =~ /\A(?:EHLO|HELO|MAIL|RCPT|DATA|QUIT)\z/;
275 2962         6304 $p->{'origin'} = $argvs->{'origin'}; # Set the path to the original email
276              
277             # Check "Action:" field
278 2962 100       6568 next if length $p->{'action'};
279 1332 100 100     7357 if( $p->{'reason'} eq 'expired' ) {
    100          
280             # Action: delayed
281 80         206 $p->{'action'} = 'delayed';
282              
283             } elsif( index($p->{'deliverystatus'}, '5') == 0 || index($p->{'deliverystatus'}, '4') == 0 ) {
284             # Action: failed
285 590         1181 $p->{'action'} = 'failed';
286             }
287             }
288 2962 50       19729 next unless my $o = __PACKAGE__->new(%$p);
289              
290 2962 100 100     12997 if( $o->reason eq '' || exists $retryindex->{ $o->reason } ) {
291             # Decide the reason of email bounce
292 2455 100       16625 my $r; $r = Sisimai::Rhost->get($o) if Sisimai::Rhost->match($o->rhost);
  2455         5409  
293 2455 100 100     8970 $r ||= Sisimai::Rhost->get($o, $o->destination) if Sisimai::Rhost->match($o->destination);
294 2455   66     16295 $r ||= Sisimai::Reason->get($o);
295 2455   50     5609 $r ||= 'undefined';
296 2455         6094 $o->reason($r);
297             }
298              
299 2962 100 100     20911 if( $o->reason eq 'delivered' || $o->reason eq 'feedback' || $o->reason eq 'vacation' ) {
      100        
300             # The value of reason is "delivered", "vacation" or "feedback".
301 146         1742 $o->softbounce(-1);
302 146 100       832 $o->replycode('') unless $o->reason eq 'delivered';
303              
304             } else {
305             # Bounce message which reason is "feedback" or "vacation" does
306             # not have the value of "deliverystatus".
307 2816 50       38458 unless( length $o->softbounce ) {
308             # Set the value of softbounce
309 2816         16167 my $textasargv = $p->{'deliverystatus'}.' '.$p->{'diagnosticcode'};
310 2816 100       7297 substr($textasargv, 0, 1, '') if substr($textasargv, 0, 1) eq ' ';
311 2816         6906 my $softorhard = Sisimai::SMTP::Error->soft_or_hard($o->reason, $textasargv);
312              
313 2816 50       5367 if( $softorhard ) {
314             # Returned value is "soft" or "hard"
315 2816 100       7441 $o->softbounce($softorhard eq 'soft' ? 1 : 0);
316              
317             } else {
318             # Returned value is an empty string or undef
319 0         0 $o->softbounce(-1);
320             }
321             }
322              
323 2816 100       17068 unless( $o->deliverystatus ) {
324             # Set pseudo status code
325 611         3287 my $textasargv = $o->replycode.' '.$p->{'diagnosticcode'};
326 611 100       4450 substr($textasargv, 0, 1, '') if substr($textasargv, 0, 1) eq ' ';
327              
328 611         2101 my $getchecked = Sisimai::SMTP::Error->is_permanent($textasargv);
329 611 100       1501 my $tmpfailure = defined $getchecked ? ( $getchecked == 1 ? 0 : 1 ) : 0;
    100          
330              
331 611 50       1529 if( my $pseudocode = Sisimai::SMTP::Status->code($o->reason, $tmpfailure) ) {
332             # Set the value of "deliverystatus" and "softbounce".
333 611         1605 $o->deliverystatus($pseudocode);
334 611 50       3367 if( $o->softbounce == -1 ) {
335             # Set the value of "softbounce" again when the value is -1
336 0 0       0 if( my $softorhard = Sisimai::SMTP::Error->soft_or_hard($o->reason, $pseudocode) ) {
337             # Returned value is "soft" or "hard"
338 0 0       0 $o->softbounce($softorhard eq 'soft' ? 1 : 0);
339              
340             } else {
341             # Returned value is an empty string or undef
342 0         0 $o->softbounce(-1);
343             }
344             }
345             }
346             }
347              
348 2816 100       14696 if( $o->replycode ) {
349             # Check both of the first digit of "deliverystatus" and "replycode"
350 2066         10118 my $d1 = substr($o->deliverystatus, 0, 1);
351 2066         8719 my $r1 = substr($o->replycode, 0, 1);
352 2066 100       9249 $o->replycode('') unless $d1 eq $r1;
353             }
354             }
355 2962         25263 push @$objectlist, $o;
356              
357             } # End of for(LOOP_DELIVERY_STATUS)
358              
359 2808         10479 return $objectlist;
360             }
361              
362             sub damn {
363             # Convert from object to hash reference
364             # @return [Hash] Data in Hash reference
365 1286     1286 1 1949321 my $self = shift;
366 1286         2313 my $data = undef;
367              
368 1286         1923 eval {
369 1286         2022 my $v = {};
370 1286         1904 state $stringdata = [qw|
371             token lhost rhost listid alias reason subject messageid smtpagent
372             smtpcommand destination diagnosticcode senderdomain deliverystatus
373             timezoneoffset feedbacktype diagnostictype action replycode catch
374             softbounce origin
375             |];
376              
377 1286         2762 for my $e ( @$stringdata ) {
378             # Copy string data
379 28292   100     139758 $v->{ $e } = $self->$e // '';
380             }
381 1286         6759 $v->{'addresser'} = $self->addresser->address;
382 1286         12202 $v->{'recipient'} = $self->recipient->address;
383 1286         9210 $v->{'timestamp'} = $self->timestamp->epoch;
384 1286         14656 $data = $v;
385             };
386 1286         20179 return $data;
387             }
388              
389             sub dump {
390             # Data dumper
391             # @param [String] type Data format: json, yaml
392             # @return [String, Undef] Dumped data or Undef if the value of first
393             # argument is neither "json" nor "yaml"
394 644     644 0 8577860 my $self = shift;
395 644   50     1989 my $type = shift || 'json';
396 644 50       3852 return undef unless $type =~ /\A(?:json|yaml)\z/;
397              
398 644         2031 my $referclass = 'Sisimai::Data::'.uc($type);
399 644         1407 my $modulepath = 'Sisimai/Data/'.uc($type).'.pm';
400              
401 644         4857 require $modulepath;
402 644         2724 return $referclass->dump($self);
403             }
404              
405             1;
406             __END__