File Coverage

lib/Sisimai/RFC5322.pm
Criterion Covered Total %
statement 77 77 100.0
branch 35 40 87.5
condition 12 17 70.5
subroutine 10 10 100.0
pod 3 6 50.0
total 137 150 91.3


line stmt bran cond sub pod time code
1             package Sisimai::RFC5322;
2 86     86   69369 use feature ':5.10';
  86         212  
  86         6038  
3 86     86   447 use strict;
  86         139  
  86         1608  
4 86     86   452 use warnings;
  86         181  
  86         6567  
5 86         133801 use constant HEADERTABLE => {
6             'messageid' => ['message-id'],
7             'subject' => ['subject'],
8             'listid' => ['list-id'],
9             'date' => [qw|date posted-date posted resent-date|],
10             'addresser' => [qw|from return-path reply-to errors-to reverse-path x-postfix-sender envelope-from x-envelope-from|],
11             'recipient' => [qw|to delivered-to forward-path envelope-to x-envelope-to resent-to apparently-to|],
12 86     86   557 };
  86         174  
13              
14             # Regular expression of valid RFC-5322 email address()
15             my $Re = { 'rfc5322' => undef, 'ignored' => undef, 'domain' => undef, };
16             BUILD_REGULAR_EXPRESSIONS: {
17             # See http://www.ietf.org/rfc/rfc5322.txt
18             # or http://www.ex-parrot.com/pdw/Mail-RFC822-Address.html ...
19             # addr-spec = local-part "@" domain
20             # local-part = dot-atom / quoted-string / obs-local-part
21             # domain = dot-atom / domain-literal / obs-domain
22             # domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
23             # dcontent = dtext / quoted-pair
24             # dtext = NO-WS-CTL / ; Non white space controls
25             # %d33-90 / ; The rest of the US-ASCII
26             # %d94-126 ; characters not including "[",
27             # ; "]", or "\"
28             my $atom = qr;[a-zA-Z0-9_!#\$\%&'*+/=?\^`{}~|\-]+;o;
29             my $quoted_string = qr/"(?:\\[^\r\n]|[^\\"])*"/o;
30             my $domain_literal = qr/\[(?:\\[\x01-\x09\x0B-\x0c\x0e-\x7f]|[\x21-\x5a\x5e-\x7e])*\]/o;
31             my $dot_atom = qr/$atom(?:[.]$atom)*/o;
32             my $local_part = qr/(?:$dot_atom|$quoted_string)/o;
33             my $domain = qr/(?:$dot_atom|$domain_literal)/o;
34              
35             $Re->{'rfc5322'} = qr/\A$local_part[@]$domain\z/o;
36             $Re->{'ignored'} = qr/\A$local_part[.]*[@]$domain\z/o;
37             $Re->{'domain'} = qr/\A$domain\z/o;
38             }
39              
40             my $HEADERINDEX = {};
41             BUILD_FLATTEN_RFC822HEADER_LIST: {
42             # Convert $HEADER: hash reference to flatten hash reference for being
43             # called from Sisimai::Lhost::*
44             for my $v ( values %{ HEADERTABLE() } ) {
45             $HEADERINDEX->{ $_ } = 1 for @$v;
46             }
47             }
48              
49             sub HEADERFIELDS {
50             # Grouped RFC822 headers
51             # @param [String] group RFC822 Header group name
52             # @return [Array,Hash] RFC822 Header list
53 82     82 0 33843 my $class = shift;
54 82   100     278 my $group = shift || return $HEADERINDEX;
55 78 100       322 return HEADERTABLE->{ $group } if exists HEADERTABLE->{ $group };
56 77         257 return HEADERTABLE;
57             }
58              
59             sub LONGFIELDS {
60             # Fields that might be long
61             # @return [Hash] Long filed(email header) list
62 4     4 0 21362 return { 'to' => 1, 'from' => 1, 'subject' => 1, 'message-id' => 1 };
63             }
64              
65             sub is_emailaddress {
66             # Check that the argument is an email address or not
67             # @param [String] email Email address string
68             # @return [Integer] 0: Not email address
69             # 1: Email address
70 6356     6356 1 18548 my $class = shift;
71 6356   50     10766 my $email = shift // return 0;
72              
73 6356 50       31612 return 0 if $email =~ /(?:[\x00-\x1f]|\x1f)/;
74 6356 50       10995 return 0 if length $email > 254;
75 6356 100       49117 return 1 if $email =~ $Re->{'ignored'};
76 33         167 return 0;
77             }
78              
79             sub is_mailerdaemon {
80             # Check that the argument is mailer-daemon or not
81             # @param [String] email Email address
82             # @return [Integer] 0: Not mailer-daemon
83             # 1: Mailer-daemon
84 204     204 0 155942 my $class = shift;
85 204   50     453 my $email = shift // return 0;
86 204         267 state $match = qr{(?>
87             (?:mailer-daemon|postmaster)[@]
88             |[<(](?:mailer-daemon|postmaster)[)>]
89             |\A(?:mailer-daemon|postmaster)\z
90             |[ ]?mailer-daemon[ ]
91             )
92             }x;
93 204 100       2048 return 1 if lc($email) =~ $match;
94 153         386 return 0;
95             }
96              
97             sub received {
98             # Convert Received headers to a structured data
99             # @param [String] argv1 Received header
100             # @return [Array] Received header as a structured data
101 2091     2091 1 34252 my $class = shift;
102 2091   50     4180 my $argv1 = shift || return [];
103 2091         2839 my $hosts = [];
104 2091         6359 my $value = { 'from' => '', 'by' => '' };
105              
106             # Received: (qmail 10000 invoked by uid 999); 24 Apr 2013 00:00:00 +0900
107 2091 100       7582 return [] if $argv1 =~ /qmail[ \t]+.+invoked[ \t]+/;
108              
109 1973 100       21404 if( $argv1 =~ /\Afrom[ \t]+(.+)[ \t]+by[ \t]+([^ ]+)/ ) {
    100          
110             # Received: from localhost (localhost)
111             # by nijo.example.jp (V8/cf) id s1QB5ma0018057;
112             # Wed, 26 Feb 2014 06:05:48 -0500
113 1601         4285 $value->{'from'} = $1;
114 1601         3554 $value->{'by'} = $2;
115              
116             } elsif( $argv1 =~ /\bby[ \t]+([^ ]+)(.+)/ ) {
117             # Received: by 10.70.22.98 with SMTP id c2mr1838265pdf.3; Fri, 18 Jul 2014
118             # 00:31:02 -0700 (PDT)
119 314         1343 $value->{'from'} = $1.$2;
120 314         698 $value->{'by'} = $1;
121             }
122              
123 1973 100       5787 if( $value->{'from'} =~ / / ) {
124             # Received: from [10.22.22.222] (smtp-gateway.kyoto.ocn.ne.jp [192.0.2.222])
125             # (authenticated bits=0)
126             # by nijo.example.jp (V8/cf) with ESMTP id s1QB5ka0018055;
127             # Wed, 26 Feb 2014 06:05:47 -0500
128 1654         5312 my @received = split(' ', $value->{'from'});
129 1654         2497 my @namelist;
130             my @addrlist;
131 1654         2190 my $hostname = '';
132 1654         2158 my $hostaddr = '';
133              
134 1654         3106 for my $e ( @received ) {
135             # Received: from [10.22.22.222] (smtp-gateway.kyoto.ocn.ne.jp [192.0.2.222])
136 7661 100       14477 if( $e =~ /\A[(\[]\d+[.]\d+[.]\d+[.]\d+[)\]]\z/ ) {
137             # [192.0.2.1] or (192.0.2.1)
138 243         574 $e =~ y/[]()//d;
139 243         622 push @addrlist, $e;
140              
141             } else {
142             # hostname
143 7418         8597 $e =~ y/()//d;
144 7418         11090 push @namelist, $e;
145             }
146             }
147              
148 1654         2687 for my $e ( @namelist ) {
149             # 1. Hostname takes priority over all other IP addresses
150 1991 100       5771 next unless rindex($e, '.') > -1;
151 1593         1906 $hostname = $e;
152 1593         2495 last;
153             }
154              
155 1654 100       3087 unless( $hostname ) {
156             # 2. Use IP address as a remote host name
157 61         132 for my $e ( @addrlist ) {
158             # Skip if the address is a private address
159 23 50       102 next if index($e, '10.') == 0;
160 23 100       80 next if index($e, '127.') == 0;
161 18 50       63 next if index($e, '192.168.') == 0;
162 18 50       63 next if $e =~ /\A172[.](?:1[6-9]|2[0-9]|3[0-1])[.]/;
163 18         42 $hostaddr = $e;
164 18         23 last;
165             }
166             }
167 1654   100     6364 $value->{'from'} = $hostname || $hostaddr || $addrlist[-1];
168             }
169              
170 1973         2954 for my $e ('from', 'by') {
171             # Copy entries into $hosts
172 3946 100       6870 next unless defined $value->{ $e };
173 3908         5340 $value->{ $e } =~ y/()[];?//d;
174 3908         7216 push @$hosts, $value->{ $e };
175             }
176 1973         8503 return $hosts;
177             }
178              
179             sub fillet {
180             # Split given entire message body into error message lines and the original
181             # message part only include email headers
182             # @param [String] mbody Entire message body
183             # @param [Regexp] regex Regular expression of the message/rfc822 or the
184             # beginning of the original message part
185             # @return [Array] [Error message lines, The original message]
186             # @since v4.25.5
187 2894     2894 1 6529 my $class = shift;
188 2894   50     7118 my $mbody = shift || return undef;
189 2894   50     6338 my $regex = shift || return undef;
190              
191 2894   100     32631 my ($a, $b) = split($regex, $$mbody, 2); $b ||= '';
  2894         8931  
192 2894 100       5915 if( length $b ) {
193             # Remove blank lines, the message body of the original message, and
194             # append "\n" at the end of the original message headers
195             # 1. Remove leading blank lines
196             # 2. Remove text after the first blank line: \n\n
197             # 3. Append "\n" at the end of test block when the last character is not "\n"
198 2278         11023 $b =~ s/\A[\r\n\s]+//m;
199 2278 100       11463 substr($b, index($b, "\n\n") + 1, length($b), '') if index($b, "\n\n") > 0;
200 2278 100       8314 $b .= "\n" unless $b =~ /\n\z/;
201             }
202 2894         9686 return [$a, $b];
203             }
204              
205             1;
206             __END__