File Coverage

blib/lib/Mail/SpamAssassin/Constants.pm
Criterion Covered Total %
statement 84 84 100.0
branch n/a
condition n/a
subroutine 27 27 100.0
pod n/a
total 111 111 100.0


line stmt bran cond sub pod time code
1             # Constants used in many parts of the SpamAssassin codebase.
2             #
3             # TODO! we need to reimplement parts of the RESERVED regexp!
4              
5             # <@LICENSE>
6             # Licensed to the Apache Software Foundation (ASF) under one or more
7             # contributor license agreements. See the NOTICE file distributed with
8             # this work for additional information regarding copyright ownership.
9             # The ASF licenses this file to you under the Apache License, Version 2.0
10             # (the "License"); you may not use this file except in compliance with
11             # the License. You may obtain a copy of the License at:
12             #
13             # http://www.apache.org/licenses/LICENSE-2.0
14             #
15             # Unless required by applicable law or agreed to in writing, software
16             # distributed under the License is distributed on an "AS IS" BASIS,
17             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18             # See the License for the specific language governing permissions and
19             # limitations under the License.
20             # </@LICENSE>
21              
22              
23             use strict;
24 41     41   388 use warnings;
  41         74  
  41         1123  
25 41     41   255 use re 'taint';
  41         81  
  41         1195  
26 41     41   208  
  41         72  
  41         1260  
27             use Exporter ();
28 41     41   217 our @ISA = qw(Exporter);
  41         63  
  41         6476  
29              
30             our(@BAYES_VARS, @IP_VARS, @SA_VARS, %EXPORT_TAGS, @EXPORT_OK);
31              
32             # NOTE: Unless you need these to be available at BEGIN time, you're better with this out of a BEGIN block with a simple our statement.
33             BEGIN {
34             @IP_VARS = qw(
35 41     41   177 IP_IN_RESERVED_RANGE IP_PRIVATE LOCALHOST IPV4_ADDRESS IP_ADDRESS
36             );
37             @BAYES_VARS = qw(
38 41         80 DUMP_MAGIC DUMP_TOKEN DUMP_BACKUP
39             );
40             # These are generic constants that may be used across several modules
41             @SA_VARS = qw(
42 41         111 HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
43             MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
44             MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
45             CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE IS_RULENAME
46             META_RULES_MATCHING_RE
47             );
48              
49             %EXPORT_TAGS = (
50 41         453 bayes => [ @BAYES_VARS ],
51             ip => [ @IP_VARS ],
52             sa => [ @SA_VARS ],
53             all => [ @BAYES_VARS, @IP_VARS, @SA_VARS ],
54             );
55              
56             @EXPORT_OK = ( @BAYES_VARS, @IP_VARS, @SA_VARS );
57 41         1162 }
58              
59             # BAYES_VARS
60             use constant DUMP_MAGIC => 1;
61 41     41   255 use constant DUMP_TOKEN => 2;
  41         84  
  41         2728  
62 41     41   230 use constant DUMP_SEEN => 4;
  41         71  
  41         2006  
63 41     41   211 use constant DUMP_BACKUP => 8;
  41         86  
  41         1881  
64 41     41   202  
  41         71  
  41         12667  
65             # IP_VARS
66             # ---------------------------------------------------------------------------
67             # Initialize a regexp for private IPs, i.e. ones that could be
68             # used inside a company and be the first or second relay hit by
69             # a message. Some companies use these internally and translate
70             # them using a NAT firewall. These are listed in the RBL as invalid
71             # originators -- which is true, if you receive the mail directly
72             # from them; however we do not, so we should ignore them.
73             #
74             # sources:
75             # IANA = <https://www.iana.org/numbers>,
76             # 5735 = <https://tools.ietf.org/html/rfc5735>
77             # 6598 = <https://tools.ietf.org/html/rfc6598>
78             # 4193 = <https://tools.ietf.org/html/rfc4193>
79             # CYMRU = <https://www.team-cymru.com/bogon-reference.html>
80             #
81             # This includes:
82             # host-local address space 127.0.0.0/8 and ::1,
83             # link-local address space 169.254.0.0/16 and fe80::/10,
84             # private-use address space 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16,
85             # TODO: Unique Local Unicast Addresses fc00::/7 (RFC 4193)
86             # shared address space 100.64.0.0/10 (RFC 6598 - for use in CGN),
87             # IPv4-mapped IPv6 address ::ffff:0:0/96 (RFC 3513)
88             #
89             use constant IP_PRIVATE => qr{^(?:
90 41         2387 (?: # IPv4 addresses
91             10| # 10.0.0.0/8 Private Use (5735, 1918)
92             127| # 127.0.0.0/8 Host-local (5735, 1122)
93             169\.254| # 169.254.0.0/16 Link-local (5735, 3927)
94             172\.(?:1[6-9]|2[0-9]|3[01])| # 172.16.0.0/12 Private Use (5735, 1918)
95             192\.168| # 192.168.0.0/16 Private Use (5735, 1918)
96             100\.(?:6[4-9]|[7-9][0-9]|1[01][0-9]|12[0-7]) # 100.64.0.0/10 CGN (6598)
97             )\..*
98             |
99             (?: # IPv6 addresses
100             # don't use \b here, it hits on :'s
101             (?:IPv6: # with optional prefix
102             | (?<![a-f0-9:])
103             )
104             (?:
105             # IPv4 mapped in IPv6
106             # note the colon after the 12th byte in each here
107             (?:
108             # first 6 (12 bytes) non-zero
109             (?:0{1,4}:){5} ffff:
110             |
111             # leading zeros omitted (note {0,5} not {1,5})
112             ::(?:0{1,4}:){0,4} ffff:
113             |
114             # trailing zeros (in the first 6) omitted
115             (?:0{1,4}:){1,4}: ffff:
116             |
117             # 0000 in second up to (including) fifth omitted
118             0{1,4}::(?:0{1,4}:){1,3} ffff:
119             |
120             # 0000 in third up to (including) fifth omitted
121             (?:0{1,4}:){2}:0{1,2}: ffff:
122             |
123             # 0000 in fourth up to (including) fifth omitted
124             (?:0{1,4}:){3}:0: ffff:
125             |
126             # 0000 in fifth omitted
127             (?:0{1,4}:){4}: ffff:
128             )
129             # and the IPv4 address appended to all of the 12 bytes above
130             (?:
131             10|
132             127|
133             169\.254|
134             172\.(?:1[6-9]|2[0-9]|3[01])|
135             192\.168|
136             100\.(?:6[4-9]|[7-9][0-9]|1[01][0-9]|12[0-7])
137             )\..*
138              
139             | # or IPv6 link-local address space, fe80::/10
140             fe[89ab][0-9a-f]:.*
141              
142             | # or the host-local ::1 addr, as a pure IPv6 address
143              
144             # all 8 (16 bytes) of them present
145             (?:0{1,4}:){7} 0{0,3}1
146             |
147             # leading zeros omitted
148             :(?::0{1,4}){0,6}: 0{0,3}1
149             |
150             # 0000 in second up to (including) seventh omitted
151             0{1,4}:(?::0{1,4}){0,5}: 0{0,3}1
152             |
153             # 0000 in third up to (including) seventh omitted
154             (?:0{1,4}:){2}(?::0{1,4}){0,4}: 0{0,3}1
155             |
156             # 0000 in fourth up to (including) seventh omitted
157             (?:0{1,4}:){3}(?::0{1,4}){0,3}: 0{0,3}1
158             |
159             # 0000 in fifth up to (including) seventh omitted
160             (?:0{1,4}:){4}(?::0{1,4}){0,2}: 0{0,3}1
161             |
162             # 0000 in sixth up to (including) seventh omitted
163             (?:0{1,4}:){5}(?::0{1,4}){0,1}: 0{0,3}1
164             |
165             # 0000 in seventh omitted
166             (?:0{1,4}:){6}: 0{0,3}1
167             )
168             (?![a-f0-9:])
169             )
170             )}oxi;
171 41     41   270  
  41         76  
172             # backward compatibility
173             use constant IP_IN_RESERVED_RANGE => IP_PRIVATE;
174 41     41   214  
  41         77  
  41         7664  
175             # ---------------------------------------------------------------------------
176             # match the various ways of saying "localhost".
177              
178             use constant LOCALHOST => qr/
179 41         5278 (?:
180             # as a string
181             localhost(?:\.localdomain)?
182             |
183             \b(?<!:) # ensure no "::" IPv6 marker before this one
184             # plain IPv4
185             127\.0\.0\.1 \b
186             |
187             # IPv6 addresses
188             # don't use \b here, it hits on :'s
189             (?:IPv6: # with optional prefix
190             | (?<![a-f0-9:])
191             )
192             (?:
193             # IPv4 mapped in IPv6
194             # note the colon after the 12th byte in each here
195             (?:
196             # first 6 (12 bytes) non-zero
197             (?:0{1,4}:){5} ffff:
198             |
199             # leading zeros omitted (note {0,5} not {1,5})
200             ::(?:0{1,4}:){0,4} ffff:
201             |
202             # trailing zeros (in the first 6) omitted
203             (?:0{1,4}:){1,4}: ffff:
204             |
205             # 0000 in second up to (including) fifth omitted
206             0{1,4}::(?:0{1,4}:){1,3} ffff:
207             |
208             # 0000 in third up to (including) fifth omitted
209             (?:0{1,4}:){2}:0{1,2}: ffff:
210             |
211             # 0000 in fourth up to (including) fifth omitted
212             (?:0{1,4}:){3}:0: ffff:
213             |
214             # 0000 in fifth omitted
215             (?:0{1,4}:){4}: ffff:
216             )
217             # and the IPv4 address appended to all of the 12 bytes above
218             127\.0\.0\.1 # no \b, we check later
219              
220             | # or (separately) a pure IPv6 address
221              
222             # all 8 (16 bytes) of them present
223             (?:0{1,4}:){7} 0{0,3}1
224             |
225             # leading zeros omitted
226             :(?::0{1,4}){0,6}: 0{0,3}1
227             |
228             # 0000 in second up to (including) seventh omitted
229             0{1,4}:(?::0{1,4}){0,5}: 0{0,3}1
230             |
231             # 0000 in third up to (including) seventh omitted
232             (?:0{1,4}:){2}(?::0{1,4}){0,4}: 0{0,3}1
233             |
234             # 0000 in fourth up to (including) seventh omitted
235             (?:0{1,4}:){3}(?::0{1,4}){0,3}: 0{0,3}1
236             |
237             # 0000 in fifth up to (including) seventh omitted
238             (?:0{1,4}:){4}(?::0{1,4}){0,2}: 0{0,3}1
239             |
240             # 0000 in sixth up to (including) seventh omitted
241             (?:0{1,4}:){5}(?::0{1,4}){0,1}: 0{0,3}1
242             |
243             # 0000 in seventh omitted
244             (?:0{1,4}:){6}: 0{0,3}1
245             )
246             (?![a-f0-9:])
247             )
248             /oxi;
249 41     41   279  
  41         76  
250             # ---------------------------------------------------------------------------
251             # an IP address, in IPv4 format only.
252             #
253             use constant IPV4_ADDRESS => qr/\b
254 41         14241 (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
255             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
256             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
257             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)
258             \b/ox;
259 41     41   247  
  41         75  
260             # ---------------------------------------------------------------------------
261             # an IP address, in IPv4, IPv4-mapped-in-IPv6, or IPv6 format. NOTE: cannot
262             # just refer to $IPV4_ADDRESS, due to perl bug reported in nesting qr//s. :(
263             #
264             use constant IP_ADDRESS => qr/
265 41         1961 (?:
266             \b(?<!:) # ensure no "::" IPv4 marker before this one
267             # plain IPv4, as above
268             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
269             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
270             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
271             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\b
272             |
273             # IPv6 addresses
274             # don't use \b here, it hits on :'s
275             (?:IPv6: # with optional prefix
276             | (?<![a-f0-9:])
277             )
278             (?:
279             # IPv4 mapped in IPv6
280             # note the colon after the 12th byte in each here
281             (?:
282             # first 6 (12 bytes) non-zero
283             (?:[a-f0-9]{1,4}:){6}
284             |
285             # leading zeros omitted (note {0,5} not {1,5})
286             ::(?:[a-f0-9]{1,4}:){0,5}
287             |
288             # trailing zeros (in the first 6) omitted
289             (?:[a-f0-9]{1,4}:){1,5}:
290             |
291             # 0000 in second up to (including) fifth omitted
292             [a-f0-9]{1,4}::(?:[a-f0-9]{1,4}:){1,4}
293             |
294             # 0000 in third up to (including) fifth omitted
295             (?:[a-f0-9]{1,4}:){2}:(?:[a-f0-9]{1,4}:){1,3}
296             |
297             # 0000 in fourth up to (including) fifth omitted
298             (?:[a-f0-9]{1,4}:){3}:(?:[a-f0-9]{1,4}:){1,2}
299             |
300             # 0000 in fifth omitted
301             (?:[a-f0-9]{1,4}:){4}:[a-f0-9]{1,4}:
302             )
303             # and the IPv4 address appended to all of the 12 bytes above
304             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
305             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
306             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
307             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d) # no \b, we check later
308              
309             | # or (separately) a pure IPv6 address
310              
311             # all 8 (16 bytes) of them present
312             (?:[a-f0-9]{1,4}:){7}[a-f0-9]{1,4}
313             |
314             # leading zeros omitted
315             :(?::[a-f0-9]{1,4}){1,7}
316             |
317             # trailing zeros omitted
318             (?:[a-f0-9]{1,4}:){1,7}:
319             |
320             # 0000 in second up to (including) seventh omitted
321             [a-f0-9]{1,4}:(?::[a-f0-9]{1,4}){1,6}
322             |
323             # 0000 in third up to (including) seventh omitted
324             (?:[a-f0-9]{1,4}:){2}(?::[a-f0-9]{1,4}){1,5}
325             |
326             # 0000 in fourth up to (including) seventh omitted
327             (?:[a-f0-9]{1,4}:){3}(?::[a-f0-9]{1,4}){1,4}
328             |
329             # 0000 in fifth up to (including) seventh omitted
330             (?:[a-f0-9]{1,4}:){4}(?::[a-f0-9]{1,4}){1,3}
331             |
332             # 0000 in sixth up to (including) seventh omitted
333             (?:[a-f0-9]{1,4}:){5}(?::[a-f0-9]{1,4}){1,2}
334             |
335             # 0000 in seventh omitted
336             (?:[a-f0-9]{1,4}:){6}:[a-f0-9]{1,4}
337             |
338             # :: (the unspecified address 0:0:0:0:0:0:0:0)
339             # dos: I don't expect to see this address in a header, and
340             # it may cause non-address strings to match, but we'll
341             # include it for now since it is valid
342             ::
343             )
344             (?![a-f0-9:])
345             )
346             /oxi;
347 41     41   273  
  41         69  
348             # ---------------------------------------------------------------------------
349              
350             use constant HARVEST_DNSBL_PRIORITY => 500;
351 41     41   200  
  41         4006  
  41         4140  
352             # regular expression that matches message separators in The University of
353             # Washington's MBX mailbox format
354             use constant MBX_SEPARATOR => qr/^([\s\d]\d-[a-zA-Z]{3}-\d{4}\s\d{2}:\d{2}:\d{2}.*),(\d+);([\da-f]{12})-(\w{8})\r?$/;
355 41     41   260 # $1 = datestamp (str)
  41         111  
  41         2069  
356             # $2 = size of message in bytes (int)
357             # $3 = message status - binary (hex)
358             # $4 = message ID (hex)
359              
360             # ---------------------------------------------------------------------------
361             # values used for internal message representations
362              
363             # maximum byte length of lines in the body
364             use constant MAX_BODY_LINE_LENGTH => 2048;
365 41     41   223 # maximum byte length of a header key
  41         96  
  41         1770  
366             use constant MAX_HEADER_KEY_LENGTH => 256;
367 41     41   232 # maximum byte length of a header value including continued lines
  41         74  
  41         1804  
368             use constant MAX_HEADER_VALUE_LENGTH => 8192;
369 41     41   209 # maximum byte length of entire header
  41         65  
  41         1672  
370             use constant MAX_HEADER_LENGTH => 65536;
371 41     41   214  
  41         78  
  41         1832  
372             # maximum byte length of any given URI
373             use constant MAX_URI_LENGTH => 8192;
374 41     41   303  
  41         81  
  41         7679  
375             # used for meta rules and "if" conditionals in Conf::Parser
376             use constant ARITH_EXPRESSION_LEXER => qr/(?:
377 41         1774 [\-\+\d\.]+| # A Number
378             \w[\w\:]*| # Rule or Class Name
379             [\(\)]| # Parens
380             \|\|| # Boolean OR
381             \&\&| # Boolean AND
382             \^| # Boolean XOR
383             !(?!=)| # Boolean NOT
384             >=?| # GT or EQ
385             <=?| # LT or EQ
386             ==| # EQ
387             !=| # NEQ
388             [\+\-\*\/]| # Mathematical Operator
389             [\?:] # ? : Operator
390             )/ox;
391 41     41   253  
  41         66  
392             # ArchiveIterator
393              
394             # if AI doesn't read in the message in the first pass to see if the received
395             # date makes the message useful or not, we need to mark it so that in the
396             # second pass (when the message is actually read + processed) the received
397             # date is calculated. this value signifies "unknown" from the first pass.
398             use constant AI_TIME_UNKNOWN => 0;
399 41     41   185  
  41         64  
  41         5295  
400             # Charsets which use capital letters heavily in their encoded representation.
401             use constant CHARSETS_LIKELY_TO_FP_AS_CAPS => qr{[-_a-z0-9]*(?:
402 41         2390 koi|jp|jis|euc|gb|big5|isoir|cp1251|windows-1251|georgianps|pt154|tis
403             )[-_a-z0-9]*}ix;
404 41     41   251  
  41         69  
405             # Allowed rulename format
406             use constant RULENAME_RE => qr([_a-zA-Z][_a-zA-Z0-9]{0,127});
407 41     41   204 # Exact match
  41         71  
  41         2318  
408             use constant IS_RULENAME => qr/^${\(RULENAME_RE)}$/;
409 41     41   196  
  41         201  
  41         74  
  41         3158  
410             # meta function rules_matching(), takes argument RULENAME_RE with glob *? characters
411             use constant META_RULES_MATCHING_RE => qr/(?<!_)\brules_matching\(\s*([_a-zA-Z*?][_a-zA-Z0-9*?]{0,127})\s*\)/;
412 41     41   210  
  41         116  
  41         2753  
413             1;