File Coverage

lib/Sisimai/RFC5322.pm
Criterion Covered Total %
statement 77 77 100.0
branch 35 40 87.5
condition 12 17 70.5
subroutine 10 10 100.0
pod 3 6 50.0
total 137 150 91.3


line stmt bran cond sub pod time code
1             package Sisimai::RFC5322;
2 86     86   68905 use feature ':5.10';
  86         152  
  86         6201  
3 86     86   472 use strict;
  86         155  
  86         1807  
4 86     86   425 use warnings;
  86         184  
  86         7064  
5 86         139295 use constant HEADERTABLE => {
6             'messageid' => ['message-id'],
7             'subject' => ['subject'],
8             'listid' => ['list-id'],
9             'date' => [qw|date posted-date posted resent-date|],
10             'addresser' => [qw|from return-path reply-to errors-to reverse-path x-postfix-sender envelope-from x-envelope-from|],
11             'recipient' => [qw|to delivered-to forward-path envelope-to x-envelope-to resent-to apparently-to|],
12 86     86   557 };
  86         192  
13              
14             # Regular expression of valid RFC-5322 email address()
15             my $Re = { 'rfc5322' => undef, 'ignored' => undef, 'domain' => undef, };
16             BUILD_REGULAR_EXPRESSIONS: {
17             # See http://www.ietf.org/rfc/rfc5322.txt
18             # or http://www.ex-parrot.com/pdw/Mail-RFC822-Address.html ...
19             # addr-spec = local-part "@" domain
20             # local-part = dot-atom / quoted-string / obs-local-part
21             # domain = dot-atom / domain-literal / obs-domain
22             # domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
23             # dcontent = dtext / quoted-pair
24             # dtext = NO-WS-CTL / ; Non white space controls
25             # %d33-90 / ; The rest of the US-ASCII
26             # %d94-126 ; characters not including "[",
27             # ; "]", or "\"
28             my $atom = qr;[a-zA-Z0-9_!#\$\%&'*+/=?\^`{}~|\-]+;o;
29             my $quoted_string = qr/"(?:\\[^\r\n]|[^\\"])*"/o;
30             my $domain_literal = qr/\[(?:\\[\x01-\x09\x0B-\x0c\x0e-\x7f]|[\x21-\x5a\x5e-\x7e])*\]/o;
31             my $dot_atom = qr/$atom(?:[.]$atom)*/o;
32             my $local_part = qr/(?:$dot_atom|$quoted_string)/o;
33             my $domain = qr/(?:$dot_atom|$domain_literal)/o;
34              
35             $Re->{'rfc5322'} = qr/\A$local_part[@]$domain\z/o;
36             $Re->{'ignored'} = qr/\A$local_part[.]*[@]$domain\z/o;
37             $Re->{'domain'} = qr/\A$domain\z/o;
38             }
39              
40             my $HEADERINDEX = {};
41             BUILD_FLATTEN_RFC822HEADER_LIST: {
42             # Convert $HEADER: hash reference to flatten hash reference for being
43             # called from Sisimai::Lhost::*
44             for my $v ( values %{ HEADERTABLE() } ) {
45             $HEADERINDEX->{ $_ } = 1 for @$v;
46             }
47             }
48              
49             sub HEADERFIELDS {
50             # Grouped RFC822 headers
51             # @param [String] group RFC822 Header group name
52             # @return [Array,Hash] RFC822 Header list
53 82     82 0 33636 my $class = shift;
54 82   100     511 my $group = shift || return $HEADERINDEX;
55 78 100       495 return HEADERTABLE->{ $group } if exists HEADERTABLE->{ $group };
56 77         202 return HEADERTABLE;
57             }
58              
59             sub LONGFIELDS {
60             # Fields that might be long
61             # @return [Hash] Long filed(email header) list
62 4     4 0 21452 return { 'to' => 1, 'from' => 1, 'subject' => 1, 'message-id' => 1 };
63             }
64              
65             sub is_emailaddress {
66             # Check that the argument is an email address or not
67             # @param [String] email Email address string
68             # @return [Integer] 0: Not email address
69             # 1: Email address
70 6356     6356 1 19213 my $class = shift;
71 6356   50     10012 my $email = shift // return 0;
72              
73 6356 50       32029 return 0 if $email =~ /(?:[\x00-\x1f]|\x1f)/;
74 6356 50       11159 return 0 if length $email > 254;
75 6356 100       50171 return 1 if $email =~ $Re->{'ignored'};
76 33         189 return 0;
77             }
78              
79             sub is_mailerdaemon {
80             # Check that the argument is mailer-daemon or not
81             # @param [String] email Email address
82             # @return [Integer] 0: Not mailer-daemon
83             # 1: Mailer-daemon
84 204     204 0 167289 my $class = shift;
85 204   50     449 my $email = shift // return 0;
86 204         275 state $match = qr{(?>
87             (?:mailer-daemon|postmaster)[@]
88             |[<(](?:mailer-daemon|postmaster)[)>]
89             |\A(?:mailer-daemon|postmaster)\z
90             |[ ]?mailer-daemon[ ]
91             )
92             }x;
93 204 100       2123 return 1 if lc($email) =~ $match;
94 153         364 return 0;
95             }
96              
97             sub received {
98             # Convert Received headers to a structured data
99             # @param [String] argv1 Received header
100             # @return [Array] Received header as a structured data
101 2091     2091 1 33160 my $class = shift;
102 2091   50     3819 my $argv1 = shift || return [];
103 2091         2736 my $hosts = [];
104 2091         5421 my $value = { 'from' => '', 'by' => '' };
105              
106             # Received: (qmail 10000 invoked by uid 999); 24 Apr 2013 00:00:00 +0900
107 2091 100       6759 return [] if $argv1 =~ /qmail[ \t]+.+invoked[ \t]+/;
108              
109 1973 100       21479 if( $argv1 =~ /\Afrom[ \t]+(.+)[ \t]+by[ \t]+([^ ]+)/ ) {
    100          
110             # Received: from localhost (localhost)
111             # by nijo.example.jp (V8/cf) id s1QB5ma0018057;
112             # Wed, 26 Feb 2014 06:05:48 -0500
113 1601         4386 $value->{'from'} = $1;
114 1601         2968 $value->{'by'} = $2;
115              
116             } elsif( $argv1 =~ /\bby[ \t]+([^ ]+)(.+)/ ) {
117             # Received: by 10.70.22.98 with SMTP id c2mr1838265pdf.3; Fri, 18 Jul 2014
118             # 00:31:02 -0700 (PDT)
119 314         1218 $value->{'from'} = $1.$2;
120 314         638 $value->{'by'} = $1;
121             }
122              
123 1973 100       5355 if( $value->{'from'} =~ / / ) {
124             # Received: from [10.22.22.222] (smtp-gateway.kyoto.ocn.ne.jp [192.0.2.222])
125             # (authenticated bits=0)
126             # by nijo.example.jp (V8/cf) with ESMTP id s1QB5ka0018055;
127             # Wed, 26 Feb 2014 06:05:47 -0500
128 1654         5888 my @received = split(' ', $value->{'from'});
129 1654         2408 my @namelist;
130             my @addrlist;
131 1654         2019 my $hostname = '';
132 1654         1866 my $hostaddr = '';
133              
134 1654         2746 for my $e ( @received ) {
135             # Received: from [10.22.22.222] (smtp-gateway.kyoto.ocn.ne.jp [192.0.2.222])
136 7661 100       13580 if( $e =~ /\A[(\[]\d+[.]\d+[.]\d+[.]\d+[)\]]\z/ ) {
137             # [192.0.2.1] or (192.0.2.1)
138 243         519 $e =~ y/[]()//d;
139 243         666 push @addrlist, $e;
140              
141             } else {
142             # hostname
143 7418         8442 $e =~ y/()//d;
144 7418         11113 push @namelist, $e;
145             }
146             }
147              
148 1654         2355 for my $e ( @namelist ) {
149             # 1. Hostname takes priority over all other IP addresses
150 1991 100       4179 next unless rindex($e, '.') > -1;
151 1593         2024 $hostname = $e;
152 1593         1906 last;
153             }
154              
155 1654 100       2920 unless( $hostname ) {
156             # 2. Use IP address as a remote host name
157 61         185 for my $e ( @addrlist ) {
158             # Skip if the address is a private address
159 23 50       106 next if index($e, '10.') == 0;
160 23 100       179 next if index($e, '127.') == 0;
161 18 50       70 next if index($e, '192.168.') == 0;
162 18 50       95 next if $e =~ /\A172[.](?:1[6-9]|2[0-9]|3[0-1])[.]/;
163 18         34 $hostaddr = $e;
164 18         32 last;
165             }
166             }
167 1654   100     5889 $value->{'from'} = $hostname || $hostaddr || $addrlist[-1];
168             }
169              
170 1973         3222 for my $e ('from', 'by') {
171             # Copy entries into $hosts
172 3946 100       6484 next unless defined $value->{ $e };
173 3908         5200 $value->{ $e } =~ y/()[];?//d;
174 3908         7134 push @$hosts, $value->{ $e };
175             }
176 1973         8439 return $hosts;
177             }
178              
179             sub fillet {
180             # Split given entire message body into error message lines and the original
181             # message part only include email headers
182             # @param [String] mbody Entire message body
183             # @param [Regexp] regex Regular expression of the message/rfc822 or the
184             # beginning of the original message part
185             # @return [Array] [Error message lines, The original message]
186             # @since v4.25.5
187 2894     2894 1 7098 my $class = shift;
188 2894   50     6585 my $mbody = shift || return undef;
189 2894   50     6311 my $regex = shift || return undef;
190              
191 2894   100     34403 my ($a, $b) = split($regex, $$mbody, 2); $b ||= '';
  2894         9607  
192 2894 100       5778 if( length $b ) {
193             # Remove blank lines, the message body of the original message, and
194             # append "\n" at the end of the original message headers
195             # 1. Remove leading blank lines
196             # 2. Remove text after the first blank line: \n\n
197             # 3. Append "\n" at the end of test block when the last character is not "\n"
198 2278         11154 $b =~ s/\A[\r\n\s]+//m;
199 2278 100       10952 substr($b, index($b, "\n\n") + 1, length($b), '') if index($b, "\n\n") > 0;
200 2278 100       8036 $b .= "\n" unless $b =~ /\n\z/;
201             }
202 2894         10533 return [$a, $b];
203             }
204              
205             1;
206             __END__