File Coverage

blib/lib/Mail/SpamAssassin/Constants.pm
Criterion Covered Total %
statement 74 74 100.0
branch n/a
condition n/a
subroutine 24 24 100.0
pod n/a
total 98 98 100.0


line stmt bran cond sub pod time code
1             # Constants used in many parts of the SpamAssassin codebase.
2             #
3             # TODO! we need to reimplement parts of the RESERVED regexp!
4              
5             # <@LICENSE>
6             # Licensed to the Apache Software Foundation (ASF) under one or more
7             # contributor license agreements. See the NOTICE file distributed with
8             # this work for additional information regarding copyright ownership.
9             # The ASF licenses this file to you under the Apache License, Version 2.0
10             # (the "License"); you may not use this file except in compliance with
11             # the License. You may obtain a copy of the License at:
12             #
13             # http://www.apache.org/licenses/LICENSE-2.0
14             #
15             # Unless required by applicable law or agreed to in writing, software
16             # distributed under the License is distributed on an "AS IS" BASIS,
17             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18             # See the License for the specific language governing permissions and
19             # limitations under the License.
20             # </@LICENSE>
21              
22             package Mail::SpamAssassin::Constants;
23              
24 40     40   266 use strict;
  40         86  
  40         1353  
25 40     40   229 use warnings;
  40         83  
  40         1458  
26 40     40   236 use re 'taint';
  40         203  
  40         1386  
27              
28 40     40   267 use Exporter ();
  40         96  
  40         7032  
29             our @ISA = qw(Exporter);
30              
31             our(@BAYES_VARS, @IP_VARS, @SA_VARS, %EXPORT_TAGS, @EXPORT_OK);
32              
33             # NOTE: Unless you need these to be available at BEGIN time, you're better with this out of a BEGIN block with a simple our statement.
34             BEGIN {
35 40     40   193 @IP_VARS = qw(
36             IP_IN_RESERVED_RANGE IP_PRIVATE LOCALHOST IPV4_ADDRESS IP_ADDRESS
37             );
38 40         118 @BAYES_VARS = qw(
39             DUMP_MAGIC DUMP_TOKEN DUMP_BACKUP
40             );
41             # These are generic constants that may be used across several modules
42 40         158 @SA_VARS = qw(
43             HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
44             MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
45             MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
46             CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH
47             );
48              
49 40         461 %EXPORT_TAGS = (
50             bayes => [ @BAYES_VARS ],
51             ip => [ @IP_VARS ],
52             sa => [ @SA_VARS ],
53             all => [ @BAYES_VARS, @IP_VARS, @SA_VARS ],
54             );
55              
56 40         1326 @EXPORT_OK = ( @BAYES_VARS, @IP_VARS, @SA_VARS );
57             }
58              
59             # BAYES_VARS
60 40     40   265 use constant DUMP_MAGIC => 1;
  40         72  
  40         2646  
61 40     40   245 use constant DUMP_TOKEN => 2;
  40         88  
  40         2012  
62 40     40   256 use constant DUMP_SEEN => 4;
  40         76  
  40         2102  
63 40     40   225 use constant DUMP_BACKUP => 8;
  40         73  
  40         13152  
64              
65             # IP_VARS
66             # ---------------------------------------------------------------------------
67             # Initialize a regexp for private IPs, i.e. ones that could be
68             # used inside a company and be the first or second relay hit by
69             # a message. Some companies use these internally and translate
70             # them using a NAT firewall. These are listed in the RBL as invalid
71             # originators -- which is true, if you receive the mail directly
72             # from them; however we do not, so we should ignore them.
73             #
74             # sources:
75             # IANA = <http://www.iana.org/numbers>,
76             # 5735 = <http://tools.ietf.org/html/rfc5735>
77             # 6598 = <http://tools.ietf.org/html/rfc6598>
78             # 4193 = <http://tools.ietf.org/html/rfc4193>
79             # CYMRU = <http://www.cymru.com/Documents/bogon-list.html>
80             #
81             # This includes:
82             # host-local address space 127.0.0.0/8 and ::1,
83             # link-local address space 169.254.0.0/16 and fe80::/10,
84             # private-use address space 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16,
85             # TODO: Unique Local Unicast Addresses fc00::/7 (RFC 4193)
86             # shared address space 100.64.0.0/10 (RFC 6598 - for use in CGN),
87             # IPv4-mapped IPv6 address ::ffff:0:0/96 (RFC 3513)
88             #
89 40         2357 use constant IP_PRIVATE => qr{^(?:
90             (?: # IPv4 addresses
91             10| # 10.0.0.0/8 Private Use (5735, 1918)
92             127| # 127.0.0.0/8 Host-local (5735, 1122)
93             169\.254| # 169.254.0.0/16 Link-local (5735, 3927)
94             172\.(?:1[6-9]|2[0-9]|3[01])| # 172.16.0.0/12 Private Use (5735, 1918)
95             192\.168| # 192.168.0.0/16 Private Use (5735, 1918)
96             100\.(?:6[4-9]|[7-9][0-9]|1[01][0-9]|12[0-7]) # 100.64.0.0/10 CGN (6598)
97             )\..*
98             |
99             (?: # IPv6 addresses
100             # don't use \b here, it hits on :'s
101             (?:IPv6: # with optional prefix
102             | (?<![a-f0-9:])
103             )
104             (?:
105             # IPv4 mapped in IPv6
106             # note the colon after the 12th byte in each here
107             (?:
108             # first 6 (12 bytes) non-zero
109             (?:0{1,4}:){5} ffff:
110             |
111             # leading zeros omitted (note {0,5} not {1,5})
112             ::(?:0{1,4}:){0,4} ffff:
113             |
114             # trailing zeros (in the first 6) omitted
115             (?:0{1,4}:){1,4}: ffff:
116             |
117             # 0000 in second up to (including) fifth omitted
118             0{1,4}::(?:0{1,4}:){1,3} ffff:
119             |
120             # 0000 in third up to (including) fifth omitted
121             (?:0{1,4}:){2}:0{1,2}: ffff:
122             |
123             # 0000 in fourth up to (including) fifth omitted
124             (?:0{1,4}:){3}:0: ffff:
125             |
126             # 0000 in fifth omitted
127             (?:0{1,4}:){4}: ffff:
128             )
129             # and the IPv4 address appended to all of the 12 bytes above
130             (?:
131             10|
132             127|
133             169\.254|
134             172\.(?:1[6-9]|2[0-9]|3[01])|
135             192\.168|
136             100\.(?:6[4-9]|[7-9][0-9]|1[01][0-9]|12[0-7])
137             )\..*
138              
139             | # or IPv6 link-local address space, fe80::/10
140             fe[89ab][0-9a-f]:.*
141              
142             | # or the host-local ::1 addr, as a pure IPv6 address
143              
144             # all 8 (16 bytes) of them present
145             (?:0{1,4}:){7} 0{0,3}1
146             |
147             # leading zeros omitted
148             :(?::0{1,4}){0,6}: 0{0,3}1
149             |
150             # 0000 in second up to (including) seventh omitted
151             0{1,4}:(?::0{1,4}){0,5}: 0{0,3}1
152             |
153             # 0000 in third up to (including) seventh omitted
154             (?:0{1,4}:){2}(?::0{1,4}){0,4}: 0{0,3}1
155             |
156             # 0000 in fouth up to (including) seventh omitted
157             (?:0{1,4}:){3}(?::0{1,4}){0,3}: 0{0,3}1
158             |
159             # 0000 in fifth up to (including) seventh omitted
160             (?:0{1,4}:){4}(?::0{1,4}){0,2}: 0{0,3}1
161             |
162             # 0000 in sixth up to (including) seventh omitted
163             (?:0{1,4}:){5}(?::0{1,4}){0,1}: 0{0,3}1
164             |
165             # 0000 in seventh omitted
166             (?:0{1,4}:){6}: 0{0,3}1
167             )
168             (?![a-f0-9:])
169             )
170 40     40   324 )}oxi;
  40         86  
171              
172             # backward compatibility
173 40     40   242 use constant IP_IN_RESERVED_RANGE => IP_PRIVATE;
  40         82  
  40         9092  
174              
175             # ---------------------------------------------------------------------------
176             # match the various ways of saying "localhost".
177              
178 40         5795 use constant LOCALHOST => qr/
179             (?:
180             # as a string
181             localhost(?:\.localdomain)?
182             |
183             \b(?<!:) # ensure no "::" IPv6 marker before this one
184             # plain IPv4
185             127\.0\.0\.1 \b
186             |
187             # IPv6 addresses
188             # don't use \b here, it hits on :'s
189             (?:IPv6: # with optional prefix
190             | (?<![a-f0-9:])
191             )
192             (?:
193             # IPv4 mapped in IPv6
194             # note the colon after the 12th byte in each here
195             (?:
196             # first 6 (12 bytes) non-zero
197             (?:0{1,4}:){5} ffff:
198             |
199             # leading zeros omitted (note {0,5} not {1,5})
200             ::(?:0{1,4}:){0,4} ffff:
201             |
202             # trailing zeros (in the first 6) omitted
203             (?:0{1,4}:){1,4}: ffff:
204             |
205             # 0000 in second up to (including) fifth omitted
206             0{1,4}::(?:0{1,4}:){1,3} ffff:
207             |
208             # 0000 in third up to (including) fifth omitted
209             (?:0{1,4}:){2}:0{1,2}: ffff:
210             |
211             # 0000 in fourth up to (including) fifth omitted
212             (?:0{1,4}:){3}:0: ffff:
213             |
214             # 0000 in fifth omitted
215             (?:0{1,4}:){4}: ffff:
216             )
217             # and the IPv4 address appended to all of the 12 bytes above
218             127\.0\.0\.1 # no \b, we check later
219              
220             | # or (separately) a pure IPv6 address
221              
222             # all 8 (16 bytes) of them present
223             (?:0{1,4}:){7} 0{0,3}1
224             |
225             # leading zeros omitted
226             :(?::0{1,4}){0,6}: 0{0,3}1
227             |
228             # 0000 in second up to (including) seventh omitted
229             0{1,4}:(?::0{1,4}){0,5}: 0{0,3}1
230             |
231             # 0000 in third up to (including) seventh omitted
232             (?:0{1,4}:){2}(?::0{1,4}){0,4}: 0{0,3}1
233             |
234             # 0000 in fouth up to (including) seventh omitted
235             (?:0{1,4}:){3}(?::0{1,4}){0,3}: 0{0,3}1
236             |
237             # 0000 in fifth up to (including) seventh omitted
238             (?:0{1,4}:){4}(?::0{1,4}){0,2}: 0{0,3}1
239             |
240             # 0000 in sixth up to (including) seventh omitted
241             (?:0{1,4}:){5}(?::0{1,4}){0,1}: 0{0,3}1
242             |
243             # 0000 in seventh omitted
244             (?:0{1,4}:){6}: 0{0,3}1
245             )
246             (?![a-f0-9:])
247             )
248 40     40   276 /oxi;
  40         71  
249              
250             # ---------------------------------------------------------------------------
251             # an IP address, in IPv4 format only.
252             #
253 40         15851 use constant IPV4_ADDRESS => qr/\b
254             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
255             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
256             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
257             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)
258 40     40   282 \b/ox;
  40         92  
259              
260             # ---------------------------------------------------------------------------
261             # an IP address, in IPv4, IPv4-mapped-in-IPv6, or IPv6 format. NOTE: cannot
262             # just refer to $IPV4_ADDRESS, due to perl bug reported in nesting qr//s. :(
263             #
264 40         2392 use constant IP_ADDRESS => qr/
265             (?:
266             \b(?<!:) # ensure no "::" IPv4 marker before this one
267             # plain IPv4, as above
268             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
269             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
270             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
271             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\b
272             |
273             # IPv6 addresses
274             # don't use \b here, it hits on :'s
275             (?:IPv6: # with optional prefix
276             | (?<![a-f0-9:])
277             )
278             (?:
279             # IPv4 mapped in IPv6
280             # note the colon after the 12th byte in each here
281             (?:
282             # first 6 (12 bytes) non-zero
283             (?:[a-f0-9]{1,4}:){6}
284             |
285             # leading zeros omitted (note {0,5} not {1,5})
286             ::(?:[a-f0-9]{1,4}:){0,5}
287             |
288             # trailing zeros (in the first 6) omitted
289             (?:[a-f0-9]{1,4}:){1,5}:
290             |
291             # 0000 in second up to (including) fifth omitted
292             [a-f0-9]{1,4}::(?:[a-f0-9]{1,4}:){1,4}
293             |
294             # 0000 in third up to (including) fifth omitted
295             (?:[a-f0-9]{1,4}:){2}:(?:[a-f0-9]{1,4}:){1,3}
296             |
297             # 0000 in fourth up to (including) fifth omitted
298             (?:[a-f0-9]{1,4}:){3}:(?:[a-f0-9]{1,4}:){1,2}
299             |
300             # 0000 in fifth omitted
301             (?:[a-f0-9]{1,4}:){4}:[a-f0-9]{1,4}:
302             )
303             # and the IPv4 address appended to all of the 12 bytes above
304             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
305             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
306             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
307             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d) # no \b, we check later
308              
309             | # or (separately) a pure IPv6 address
310              
311             # all 8 (16 bytes) of them present
312             (?:[a-f0-9]{1,4}:){7}[a-f0-9]{1,4}
313             |
314             # leading zeros omitted
315             :(?::[a-f0-9]{1,4}){1,7}
316             |
317             # trailing zeros omitted
318             (?:[a-f0-9]{1,4}:){1,7}:
319             |
320             # 0000 in second up to (including) seventh omitted
321             [a-f0-9]{1,4}:(?::[a-f0-9]{1,4}){1,6}
322             |
323             # 0000 in third up to (including) seventh omitted
324             (?:[a-f0-9]{1,4}:){2}(?::[a-f0-9]{1,4}){1,5}
325             |
326             # 0000 in fouth up to (including) seventh omitted
327             (?:[a-f0-9]{1,4}:){3}(?::[a-f0-9]{1,4}){1,4}
328             |
329             # 0000 in fifth up to (including) seventh omitted
330             (?:[a-f0-9]{1,4}:){4}(?::[a-f0-9]{1,4}){1,3}
331             |
332             # 0000 in sixth up to (including) seventh omitted
333             (?:[a-f0-9]{1,4}:){5}(?::[a-f0-9]{1,4}){1,2}
334             |
335             # 0000 in seventh omitted
336             (?:[a-f0-9]{1,4}:){6}:[a-f0-9]{1,4}
337             |
338             # :: (the unspecified address 0:0:0:0:0:0:0:0)
339             # dos: I don't expect to see this address in a header, and
340             # it may cause non-address strings to match, but we'll
341             # include it for now since it is valid
342             ::
343             )
344             (?![a-f0-9:])
345             )
346 40     40   293 /oxi;
  40         73  
347              
348             # ---------------------------------------------------------------------------
349              
350 40     40   253 use constant HARVEST_DNSBL_PRIORITY => 500;
  40         85  
  40         4833  
351              
352             # regular expression that matches message separators in The University of
353             # Washington's MBX mailbox format
354 40     40   281 use constant MBX_SEPARATOR => qr/^([\s\d]\d-[a-zA-Z]{3}-\d{4}\s\d{2}:\d{2}:\d{2}.*),(\d+);([\da-f]{12})-(\w{8})\r?$/;
  40         78  
  40         2298  
355             # $1 = datestamp (str)
356             # $2 = size of message in bytes (int)
357             # $3 = message status - binary (hex)
358             # $4 = message ID (hex)
359              
360             # ---------------------------------------------------------------------------
361             # values used for internal message representations
362              
363             # maximum byte length of lines in the body
364 40     40   242 use constant MAX_BODY_LINE_LENGTH => 2048;
  40         86  
  40         1906  
365             # maximum byte length of a header key
366 40     40   254 use constant MAX_HEADER_KEY_LENGTH => 256;
  40         289  
  40         2001  
367             # maximum byte length of a header value including continued lines
368 40     40   233 use constant MAX_HEADER_VALUE_LENGTH => 8192;
  40         68  
  40         1905  
369             # maximum byte length of entire header
370 40     40   254 use constant MAX_HEADER_LENGTH => 65536;
  40         80  
  40         1960  
371              
372             # maximum byte length of any given URI
373 40     40   236 use constant MAX_URI_LENGTH => 8192;
  40         74  
  40         8322  
374              
375             # used for meta rules and "if" conditionals in Conf::Parser
376 40         1975 use constant ARITH_EXPRESSION_LEXER => qr/(?:
377             [\-\+\d\.]+| # A Number
378             \w[\w\:]*| # Rule or Class Name
379             [\(\)]| # Parens
380             \|\|| # Boolean OR
381             \&\&| # Boolean AND
382             \^| # Boolean XOR
383             !(?!=)| # Boolean NOT
384             >=?| # GT or EQ
385             <=?| # LT or EQ
386             ==| # EQ
387             !=| # NEQ
388             [\+\-\*\/]| # Mathematical Operator
389             [\?:] # ? : Operator
390 40     40   282 )/ox;
  40         82  
391              
392             # ArchiveIterator
393              
394             # if AI doesn't read in the message in the first pass to see if the received
395             # date makes the message useful or not, we need to mark it so that in the
396             # second pass (when the message is actually read + processed) the received
397             # date is calculated. this value signifies "unknown" from the first pass.
398 40     40   205 use constant AI_TIME_UNKNOWN => 0;
  40         69  
  40         5791  
399              
400             # Charsets which use capital letters heavily in their encoded representation.
401 40         2449 use constant CHARSETS_LIKELY_TO_FP_AS_CAPS => qr{[-_a-z0-9]*(?:
402             koi|jp|jis|euc|gb|big5|isoir|cp1251|windows-1251|georgianps|pt154|tis
403 40     40   322 )[-_a-z0-9]*}ix;
  40         79  
404              
405             1;