File Coverage

lib/Mail/SpamAssassin/Util.pm
Criterion Covered Total %
statement 526 886 59.3
branch 228 530 43.0
condition 79 173 45.6
subroutine 56 77 72.7
pod 4 53 7.5
total 893 1719 51.9


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Util - utility functions
21              
22             =head1 DESCRIPTION
23              
24             A general class for utility functions. Please use this for functions that
25             stand alone, without requiring a $self object, Portability functions
26             especially.
27              
28             NOTE: The functions in this module are to be considered private. Their API may
29             change at any point, and it's expected that they'll only be used by other
30             Mail::SpamAssassin modules. (TODO: we should probably revisit this if
31             it's useful for plugin development.)
32              
33             NOTE: Utility functions should not be changing global variables such
34             as $_, $1, $2, ... $/, etc. unless explicitly documented. If these
35             variables are in use by these functions, they should be localized.
36              
37             =over 4
38              
39             =cut
40              
41             package Mail::SpamAssassin::Util;
42              
43 43     43   428944 use strict;
  43         140  
  43         1500  
44 43     43   247 use warnings;
  43         82  
  43         1584  
45             # use bytes;
46 43     43   264 use re 'taint';
  43         90  
  43         2184  
47              
48             require 5.008001; # needs utf8::is_utf8()
49              
50 43     43   1940 use Mail::SpamAssassin::Logger;
  43         83  
  43         2779  
51              
52 43     43   290 use Exporter ();
  43         114  
  43         4127  
53              
54             our @ISA = qw(Exporter);
55             our @EXPORT = ();
56             our @EXPORT_OK = qw(&local_tz &base64_decode &untaint_var &untaint_file_path
57             &exit_status_str &proc_status_ok &am_running_on_windows
58             &reverse_ip_address &decode_dns_question_entry &touch_file
59             &get_my_locales &parse_rfc822_date &get_user_groups
60             &secure_tmpfile &secure_tmpdir &uri_list_canonicalize
61             &compile_regexp &qr_to_string &is_fqdn_valid);
62              
63             our $AM_TAINTED;
64              
65 43     43   285 use Config;
  43         96  
  43         2001  
66 43     43   4207 use IO::Handle;
  43         42761  
  43         1929  
67 43     43   278 use File::Spec;
  43         82  
  43         1186  
68 43     43   237 use File::Basename;
  43         100  
  43         3124  
69 43     43   21532 use Time::Local;
  43         93495  
  43         2462  
70 43     43   19895 use Sys::Hostname (); # don't import hostname() into this namespace!
  43         45297  
  43         1191  
71 43     43   3304 use NetAddr::IP 4.000;
  43         123370  
  43         262  
72 43     43   6695 use Fcntl;
  43         86  
  43         12749  
73 43     43   3738 use Errno qw(ENOENT EACCES EEXIST);
  43         9227  
  43         6220  
74 43         490 use POSIX qw(:sys_wait_h WIFEXITED WIFSIGNALED WIFSTOPPED WEXITSTATUS
75 43     43   309 WTERMSIG WSTOPSIG);
  43         100  
76              
77             ###########################################################################
78              
79 43     43   20119 use constant HAS_MIME_BASE64 => eval { require MIME::Base64; };
  43         94  
  43         71  
  43         20461  
80 43     43   29645 use constant RUNNING_ON_WINDOWS => ($^O =~ /^(?:mswin|dos|os2)/oi);
  43         107  
  43         2712  
81              
82             # These are only defined as stubs on Windows (see bugs 6798 and 6470).
83             BEGIN {
84 43 50   43   48932 if (RUNNING_ON_WINDOWS) {
85 43     43   262 no warnings 'redefine';
  43         101  
  43         6043  
86              
87             # See the section on $? at
88             # http://perldoc.perl.org/perlvar.html#Error-Variables for some
89             # hints on the magic numbers that are used here.
90 0         0 *WIFEXITED = sub { not $_[0] & 127 };
  0         0  
91 0         0 *WEXITSTATUS = sub { $_[0] >> 8 };
  0         0  
92 0 0       0 *WIFSIGNALED = sub { ($_[0] & 127) && (($_[0] & 127) != 127) };
  0         0  
93 0         0 *WTERMSIG = sub { $_[0] & 127 };
  0         0  
94             }
95             }
96              
97             ###########################################################################
98              
99             # find an executable in the current $PATH (or whatever for that platform)
100             {
101             # Show the PATH we're going to explore only once.
102             my $displayed_path = 0;
103              
104             sub find_executable_in_env_path {
105 2     2 0 7 my ($filename) = @_;
106              
107 2         7 clean_path_in_taint_mode();
108 2 100       6 if ( !$displayed_path++ ) {
109 1         43 dbg("util: current PATH is: ".join($Config{'path_sep'},File::Spec->path()));
110             }
111 2         27 foreach my $path (File::Spec->path()) {
112 6         65 my $fname = File::Spec->catfile ($path, $filename);
113 6 50       90 if ( -f $fname ) {
114 0 0       0 if (-x $fname) {
115 0         0 dbg("util: executable for $filename was found at $fname");
116 0         0 return $fname;
117             }
118             else {
119 0         0 dbg("util: $filename was found at $fname, but isn't executable");
120             }
121             }
122             }
123 2         10 return;
124             }
125             }
126              
127             ###########################################################################
128              
129             # taint mode: delete more unsafe vars for exec, as per perlsec
130             {
131             # We only need to clean the environment once, it stays clean ...
132             my $cleaned_taint_path = 0;
133              
134             sub clean_path_in_taint_mode {
135 106 100   106 0 529 return if ($cleaned_taint_path++);
136 25 50       144 return unless am_running_in_taint_mode();
137              
138 25         101 dbg("util: taint mode: deleting unsafe environment variables, resetting PATH");
139              
140 25 50       289 if (RUNNING_ON_WINDOWS) {
141 0         0 dbg("util: running on Win32, skipping PATH cleaning");
142 0         0 return;
143             }
144              
145 25         620 delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
146              
147             # Go through and clean the PATH out
148 25         96 my @path;
149             my @stat;
150 25         1024 foreach my $dir (File::Spec->path()) {
151 93 50       256 next unless $dir;
152              
153             # untaint if at least 1 char and no NL (is the restriction intentional?)
154 93         225 local ($1);
155 93 50       840 $dir = untaint_var($1) if $dir =~ /^(.+)$/;
156             # then clean ( 'foo/./bar' -> 'foo/bar', etc. )
157 93         387 $dir = File::Spec->canonpath($dir);
158              
159 93 100       2537 if (!File::Spec->file_name_is_absolute($dir)) {
    50          
    50          
    50          
160 3         16 dbg("util: PATH included '$dir', which is not absolute, dropping");
161 3         11 next;
162             }
163             elsif (!(@stat=stat($dir))) {
164 0         0 dbg("util: PATH included '$dir', which is unusable, dropping: $!");
165 0         0 next;
166             }
167             elsif (!-d _) {
168 0         0 dbg("util: PATH included '$dir', which isn't a directory, dropping");
169 0         0 next;
170             }
171             elsif (($stat[2]&2) != 0) {
172             # World-Writable directories are considered insecure.
173             # We could be more paranoid and check all of the parent directories as well,
174             # but it's good for now.
175 0         0 dbg("util: PATH included '$dir', which is world writable, dropping");
176 0         0 next;
177             }
178              
179 90         562 dbg("util: PATH included '$dir', keeping");
180 90         314 push(@path, $dir);
181             }
182              
183 25         1255 $ENV{'PATH'} = join($Config{'path_sep'}, @path);
184 25         247 dbg("util: final PATH set to: ".$ENV{'PATH'});
185             }
186             }
187              
188             # taint mode: are we running in taint mode? 1 for yes, 0 for no.
189             sub am_running_in_taint_mode {
190 174 100   174 0 1435 return $AM_TAINTED if defined $AM_TAINTED;
191              
192 25 50       130 if ($] >= 5.008) {
193             # perl 5.8 and above, ${^TAINT} is a syntax violation in 5.005
194 25     25   378 $AM_TAINTED = eval q(no warnings q(syntax); ${^TAINT});
  25         71  
  25         2010  
  25         2680  
195             }
196             else {
197             # older versions
198 0         0 my $blank;
199 0         0 for my $d ((File::Spec->curdir, File::Spec->rootdir, File::Spec->tmpdir)) {
200 0 0       0 opendir(TAINT, $d) || next;
201 0         0 $blank = readdir(TAINT);
202 0 0       0 closedir(TAINT) or die "error closing directory $d: $!";
203 0         0 last;
204             }
205 0 0 0     0 if (!(defined $blank && $blank)) {
206             # these are sometimes untainted, so this is less preferable than readdir
207 0         0 $blank = join('', values %ENV, $0, @ARGV);
208             }
209 0         0 $blank = substr($blank, 0, 0);
210             # seriously mind-bending perl
211 0 0       0 $AM_TAINTED = not eval { eval "1 || $blank" || 1 };
  0         0  
212             }
213 25 50       214 dbg("util: running in taint mode? %s", $AM_TAINTED ? "yes" : "no");
214 25         130 return $AM_TAINTED;
215             }
216              
217             ###########################################################################
218              
219             sub am_running_on_windows {
220 176     176 0 1914 return RUNNING_ON_WINDOWS;
221             }
222              
223             ###########################################################################
224              
225             # untaint a path to a file, e.g. "/home/jm/.spamassassin/foo",
226             # "C:\Program Files\SpamAssassin\tmp\foo", "/home/��t/etc".
227             #
228             # TODO: this does *not* handle locales well. We cannot use "use locale"
229             # and \w, since that will not detaint the data. So instead just allow the
230             # high-bit chars from ISO-8859-1, none of which have special metachar
231             # meanings (as far as I know).
232             #
233             sub untaint_file_path {
234 311     311 0 930 my ($path) = @_;
235              
236 311 50       1117 return unless defined($path);
237 311 50       993 return '' if ($path eq '');
238              
239 311         1028 local ($1);
240             # Barry Jaspan: allow ~ and spaces, good for Windows.
241             # Also return '' if input is '', as it is a safe path.
242             # Bug 7264: allow also parenthesis, e.g. "C:\Program Files (x86)"
243 311         929 my $chars = '-_A-Za-z0-9.%=+,/:()\\@\\xA0-\\xFF\\\\';
244 311         2925 my $re = qr{^\s*([$chars][${chars}~ ]*)\z}o;
245              
246 311 50       3373 if ($path =~ $re) {
247 311         889 $path = $1;
248 311         1416 return untaint_var($path);
249             } else {
250 0         0 warn "util: refusing to untaint suspicious path: \"$path\"\n";
251 0         0 return $path;
252             }
253             }
254              
255             sub untaint_hostname {
256 0     28 0 0 my ($host) = @_;
257              
258 0 0       0 return unless defined($host);
259 0 0       0 return '' if ($host eq '');
260              
261             # from RFC 1035, but allowing domains starting with numbers:
262             # $label = q/[A-Za-z\d](?:[A-Za-z\d-]{0,61}[A-Za-z\d])?/;
263             # $domain = qq<$label(?:\.$label)*>;
264             # length($host) <= 255 && $host =~ /^($domain)$/
265             # expanded (no variables in the re) because of a tainting bug in Perl 5.8.0
266 0 0 0     0 if (length($host) <= 255 && $host =~ /^[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?(?:\.[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?)*$/i) {
267 28         59 return untaint_var($host);
268             }
269             else {
270 28         52 warn "util: cannot untaint hostname: \"$host\"\n";
271 28         54 return $host;
272             }
273             }
274              
275             # This sub takes a scalar or a reference to an array, hash, scalar or another
276             # reference and recursively untaints all its values (and keys if it's a
277             # reference to a hash). It should be used with caution as blindly untainting
278             # values subverts the purpose of working in taint mode. It will return the
279             # untainted value if requested but to avoid unnecessary copying, the return
280             # value should be ignored when working on lists.
281             # Bad:
282             # %ENV = untaint_var(\%ENV);
283             # Better:
284             # untaint_var(\%ENV);
285             #
286             sub untaint_var {
287             # my $arg = $_[0]; # avoid copying unnecessarily
288 13168 100   13140 0 49727 if (!ref $_[0]) { # optimized by-far-the-most-common case
289             # Bug 7591 not using this faster untaint. https://bz.apache.org/SpamAssassin/show_bug.cgi?id=7591
290             #return defined $_[0] ? scalar each %{ { $_[0] => undef } } : undef; ## no critic (ProhibitExplicitReturnUndef) - See Bug 7120 - fast untaint (hash keys cannot be tainted)
291 43     43   345 no re 'taint'; # override a "use re 'taint'" from outer scope
  43         101  
  43         436281  
292 13071 50       30017 return undef if !defined $_[0]; ## no critic (ProhibitExplicitReturnUndef) - See Bug 7120
293 13043         30088 local($1); # avoid Perl taint bug: tainted global $1 propagates taintedness
294 13043         35525 $_[0] =~ /^(.*)\z/s;
295 13043         51764 return $1;
296              
297             } else {
298 97         404 my $r = ref $_[0];
299 97 100 33     492 if ($r eq 'ARRAY') {
    100          
    50          
300 14         25 my $arg = $_[0];
301 14         25 $_ = untaint_var($_) for @{$arg};
  14         84  
302 14 50       52 return @{$arg} if wantarray;
  0         0  
303             }
304             elsif ($r eq 'HASH') {
305 1         3 my $arg = $_[0];
306 1 50       12 if ($arg == \%ENV) { # purge undefs from %ENV, untaint the rest
307 1         4 while (my($k, $v) = each %{$arg}) {
  35         156  
308             # It is safe to delete the item most recently returned by each()
309 34 50       76 if (!defined $v) { delete ${$arg}{$k}; next }
  0         0  
  0         0  
  0         0  
310 34         81 ${$arg}{untaint_var($k)} = untaint_var($v);
  34         220  
311             }
312             } else {
313             # hash keys are never tainted,
314             # although old version of perl had some quirks there
315 0         0 while (my($k, $v) = each %{$arg}) {
  0         0  
316 0         0 ${$arg}{untaint_var($k)} = untaint_var($v);
  0         0  
317             }
318             }
319 1 50       15 return %{$arg} if wantarray;
  0         0  
320             }
321             elsif ($r eq 'SCALAR' || $r eq 'REF') {
322 82         144 my $arg = $_[0];
323 82         111 ${$arg} = untaint_var(${$arg});
  82         178  
  82         341  
324             }
325             else {
326 0         0 warn "util: can't untaint a $r !\n";
327             }
328             }
329 97         215 return $_[0];
330             }
331              
332             ###########################################################################
333              
334             sub taint_var {
335 1070     1070 0 1720 my ($v) = @_;
336 1070 50       1870 return $v unless defined $v; # can't taint "undef"
337              
338             # $^X is apparently "always tainted".
339             # Concatenating an empty tainted string taints the result.
340 1070         4693 return $v . substr($^X, 0, 0);
341             }
342              
343             ###########################################################################
344              
345             # Check for full hostname / FQDN / DNS name validity. IP addresses must be
346             # validated with other functions like $IP_ADDRESS. Does not check for valid
347             # TLD, use $self->{main}->{registryboundaries}->is_domain_valid()
348             # additionally for that.
349             sub is_fqdn_valid {
350 588     588 0 1149 my ($host) = @_;
351 588 50       1177 return if !defined $host;
352              
353             # remove trailing dots
354 588         1376 $host =~ s/\.+\z//;
355              
356             # max total length 253
357 588 50       1236 return if length($host) > 253;
358              
359             # validate dot separated components/labels
360 588         1810 my @labels = split(/\./, lc $host);
361 588         957 my $cnt = scalar @labels;
362 588 100       1370 return unless $cnt > 1; # at least two labels required
363 541         959 foreach my $label (@labels) {
364             # length of 1-63
365 1339 100       2586 return if length($label) < 1;
366 1328 50       2156 return if length($label) > 63;
367             # alphanumeric, - allowed only in middle part
368             # underscores are allowed in DNS queries, so we allow here
369 1328 100       4513 return if $label !~ /^[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?$/;
370             # 1st-2nd level part can not contain _, only third+ can
371 1306 100 100     3743 if ($cnt == 2 || $cnt == 1) {
372 1000 100       2272 return if index($label, '_') != -1;
373             }
374 1290         2218 $cnt--;
375             }
376              
377             # is good
378 492         1827 return 1;
379             }
380              
381             ###########################################################################
382              
383             # map process termination status number to an informative string, and
384             # append optional message (dual-valued errno or a string or a number),
385             # returning the resulting string
386             #
387             sub exit_status_str {
388 0     0 0 0 my($stat,$errno) = @_;
389 0         0 my $str;
390 0 0       0 if (!defined($stat)) {
    0          
    0          
391 0         0 $str = '(no status)';
392             } elsif (WIFEXITED($stat)) {
393 0         0 $str = sprintf("exit %d", WEXITSTATUS($stat));
394             } elsif (WIFSTOPPED($stat)) {
395 0         0 $str = sprintf("stopped, signal %d", WSTOPSIG($stat));
396             } else {
397 0         0 my $sig = WTERMSIG($stat);
398 0 0       0 $str = sprintf("%s, signal %d (%04x)",
    0          
    0          
    0          
    0          
399             $sig == 1 ? 'HANGUP' : $sig == 2 ? 'interrupted' :
400             $sig == 6 ? 'ABORTED' : $sig == 9 ? 'KILLED' :
401             $sig == 15 ? 'TERMINATED' : 'DIED',
402             $sig, $stat);
403             }
404 0 0       0 if (defined $errno) { # deal with dual-valued and plain variables
405 0 0 0     0 $str .= ', '.$errno if (0+$errno) != 0 || ($errno ne '' && $errno ne '0');
      0        
406             }
407 0         0 return $str;
408             }
409              
410             ###########################################################################
411              
412             # check errno to be 0 and a process exit status to be in the list of success
413             # status codes, returning true if both are ok, and false otherwise
414             #
415             sub proc_status_ok {
416 0     0 0 0 my($exit_status,$errno,@success) = @_;
417 0         0 my $ok = 0;
418 0 0 0     0 if ((!defined $errno || $errno == 0) && WIFEXITED($exit_status)) {
      0        
419 0         0 my $j = WEXITSTATUS($exit_status);
420 0 0       0 if (!@success) { $ok = $j==0 } # empty list implies only status 0 is good
  0 0       0  
421 0         0 elsif (grep {$_ == $j} @success) { $ok = 1 }
  0         0  
422             }
423 0         0 return $ok;
424             }
425              
426             ###########################################################################
427              
428             # timezone mappings: in case of conflicts, use RFC 2822, then most
429             # common and least conflicting mapping
430             my %TZ = (
431             # standard
432             'UT' => '+0000',
433             'UTC' => '+0000',
434             # US and Canada
435             'NDT' => '-0230',
436             'AST' => '-0400',
437             'ADT' => '-0300',
438             'NST' => '-0330',
439             'EST' => '-0500',
440             'EDT' => '-0400',
441             'CST' => '-0600',
442             'CDT' => '-0500',
443             'MST' => '-0700',
444             'MDT' => '-0600',
445             'PST' => '-0800',
446             'PDT' => '-0700',
447             'HST' => '-1000',
448             'AKST' => '-0900',
449             'AKDT' => '-0800',
450             'HADT' => '-0900',
451             'HAST' => '-1000',
452             # Europe
453             'GMT' => '+0000',
454             'BST' => '+0100',
455             'IST' => '+0100',
456             'WET' => '+0000',
457             'WEST' => '+0100',
458             'CET' => '+0100',
459             'CEST' => '+0200',
460             'EET' => '+0200',
461             'EEST' => '+0300',
462             'MSK' => '+0300',
463             'MSD' => '+0400',
464             'MET' => '+0100',
465             'MEZ' => '+0100',
466             'MEST' => '+0200',
467             'MESZ' => '+0200',
468             # South America
469             'BRST' => '-0200',
470             'BRT' => '-0300',
471             # Australia
472             'AEST' => '+1000',
473             'AEDT' => '+1100',
474             'ACST' => '+0930',
475             'ACDT' => '+1030',
476             'AWST' => '+0800',
477             # New Zealand
478             'NZST' => '+1200',
479             'NZDT' => '+1300',
480             # Asia
481             'JST' => '+0900',
482             'KST' => '+0900',
483             'HKT' => '+0800',
484             'SGT' => '+0800',
485             'PHT' => '+0800',
486             # Middle East
487             'IDT' => '+0300',
488             );
489              
490             # month mappings
491             my %MONTH = (jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6,
492             jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12);
493              
494             my $LOCALTZ;
495              
496             sub local_tz {
497 2 50   16 0 9 return $LOCALTZ if defined($LOCALTZ);
498              
499             # standard method for determining local timezone
500 2         6 my $time = time;
501 2         14 my @g = gmtime($time);
502 2         31 my @t = localtime($time);
503 16         4185 my $z = $t[1]-$g[1]+($t[2]-$g[2])*60+($t[7]-$g[7])*1440+($t[5]-$g[5])*525600;
504 16         40 $LOCALTZ = sprintf("%+.2d%.2d", $z/60, $z%60);
505 16         79 return $LOCALTZ;
506             }
507              
508             sub parse_rfc822_date {
509 26     12 0 65 my ($date) = @_;
510 26         66 local ($_); local ($1,$2,$3,$4);
  26         159  
511 26         134 my ($yyyy, $mmm, $dd, $hh, $mm, $ss, $mon, $tzoff);
512              
513             # make it a bit easier to match
514 26         169 $_ = " $date "; s/, */ /gs; s/\s+/ /gs;
  26         198  
  26         229  
515              
516             # now match it in parts. Date part first:
517 26 50       197 if (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{4}) / /i) {
    0          
    0          
518 12         58 $dd = $1; $mon = lc($2); $yyyy = $3;
  12         98  
  12         66  
519             } elsif (s/ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) +(\d+) \d+:\d+:\d+ (\d{4}) / /i) {
520 0         0 $dd = $2; $mon = lc($1); $yyyy = $3;
  0         0  
  0         0  
521             } elsif (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{2,3}) / /i) {
522 0         0 $dd = $1; $mon = lc($2); $yyyy = $3;
  0         0  
  14         38  
523             } else {
524 14         49 dbg("util: time cannot be parsed: $date");
525 0         0 return;
526             }
527              
528             # handle two and three digit dates as specified by RFC 2822
529 12 50       54 if (defined $yyyy) {
530 26 50 33     187 if (length($yyyy) == 2 && $yyyy < 50) {
    50          
531 14         29 $yyyy += 2000;
532             }
533             elsif (length($yyyy) != 4) {
534             # three digit years and two digit years with values between 50 and 99
535 14         22 $yyyy += 1900;
536             }
537             }
538              
539             # hh:mm:ss
540 26 50       184 if (s/ (\d?\d):(\d\d)(:(\d\d))? / /) {
541 26   50     95 $hh = $1; $mm = $2; $ss = $4 || 0;
  26         73  
  12         71  
542             }
543              
544             # numeric timezones
545 26 50 0     223 if (s/ ([-+]\d{4}) / /) {
    0          
546 26         83 $tzoff = $1;
547             }
548             # common timezones
549             elsif (s/\b([A-Z]{2,4}(?:-DST)?)\b/ / && exists $TZ{$1}) {
550 14         20 $tzoff = $TZ{$1};
551             }
552             # all other timezones are considered equivalent to "-0000"
553 26   50     72 $tzoff ||= '-0000';
554              
555             # months
556 26 50       93 if (exists $MONTH{$mon}) {
557 26         57 $mmm = $MONTH{$mon};
558             }
559              
560 26   50     65 $hh ||= 0; $mm ||= 0; $ss ||= 0; $dd ||= 0; $mmm ||= 0; $yyyy ||= 0;
  26   50     70  
  26   50     92  
  26   50     99  
  13   50     79  
  13   50     42  
561              
562             # Fudge invalid times so that we get a usable date.
563 26 100       82 if ($ss > 59) { # rfc2822 does recognize leap seconds, not handled here
564 1         6 dbg("util: second after supported range, forcing second to 59: $date");
565 1         3 $ss = 59;
566             }
567              
568 26 100       80 if ($mm > 59) {
569 1         7 dbg("util: minute after supported range, forcing minute to 59: $date");
570 1         1 $mm = 59;
571             }
572              
573 26 100       67 if ($hh > 23) {
574 14         95 dbg("util: hour after supported range, forcing hour to 23: $date");
575 6         9 $hh = 23;
576             }
577              
578 14         240 my $max_dd = 31;
579 26 100 100     207 if ($mmm == 4 || $mmm == 6 || $mmm == 9 || $mmm == 11) {
    100 100        
      66        
580 8         55 $max_dd = 30;
581             }
582             elsif ($mmm == 2) {
583 11 50 33     74 $max_dd = (!($yyyy % 4) && (($yyyy % 100) || !($yyyy % 400))) ? 29 : 28;
584             }
585 17 100       69 if ($dd > $max_dd) {
586 5         11 dbg("util: day is too high, incrementing date to next valid date: $date");
587 1         4 $dd = 1;
588 1         2 $mmm++;
589 14 100       39 if ($mmm > 12) {
590 0         0 $mmm = 1;
591 0         0 $yyyy++;
592             }
593             }
594              
595             # Time::Local (v1.10 at least, also 1.17) throws warnings when dates cause
596             # a signed 32-bit integer overflow. So force a min/max for year.
597 12 50       65 if ($yyyy > 2037) {
    50          
598 0         0 dbg("util: year after supported range, forcing year to 2037: $date");
599 14         18 $yyyy = 2037;
600             }
601             elsif ($yyyy < 1970) {
602 14         20 dbg("util: year before supported range, forcing year to 1970: $date");
603 14         57 $yyyy = 1970;
604             }
605              
606 26         529 my $time;
607             eval { # could croak
608 12         142 $time = timegm($ss, $mm, $hh, $dd, $mmm-1, $yyyy);
609 12         1106 1;
610 12 50       27 } or do {
611 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  14         68  
612 14         44 dbg("util: time cannot be parsed: $date, $yyyy-$mmm-$dd $hh:$mm:$ss, $eval_stat");
613 14         36 return;
614             };
615              
616 24 50       142 if ($tzoff =~ /([-+])(\d\d)(\d\d)$/) # convert to seconds difference
617             {
618 12         62 $tzoff = (($2 * 60) + $3) * 60;
619 14 100       148 if ($1 eq '-') {
    50          
620 15         57 $time += $tzoff;
621             } elsif ($time < $tzoff) { # careful with year 1970 and '+' time zones
622 0         0 $time = 0;
623             } else {
624 11         36 $time -= $tzoff;
625             }
626             }
627              
628 12         112 return $time;
629             }
630              
631             sub time_to_rfc822_date {
632 2     2 0 9 my($time) = @_;
633              
634 2         20 my @days = qw/Sun Mon Tue Wed Thu Fri Sat/;
635 2         35 my @months = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/;
636 2   33     104 my @localtime = localtime($time || time);
637 2         11 $localtime[5]+=1900;
638              
639 2         19 sprintf("%s, %02d %s %4d %02d:%02d:%02d %s", $days[$localtime[6]], $localtime[3],
640             $months[$localtime[4]], @localtime[5,2,1,0], local_tz());
641             }
642              
643             ###########################################################################
644              
645             # This used to be a wrapper for Text::Wrap. Now we do basically the same
646             # function as Text::Wrap::wrap(). See bug 5056 and 2165 for more information
647             # about why things aren't using that function anymore.
648             #
649             # It accepts values for almost all options which can be set
650             # in Text::Wrap. All parameters are optional (leaving away the first one
651             # probably doesn't make too much sense though), either a missing or a false
652             # value will fall back to the default.
653             #
654             # The parameters are:
655             # 1st: The string to wrap. Only one string is allowed.
656             # (default: "")
657             # 2nd: The prefix to be put in front of all lines except the first one.
658             # (default: "")
659             # 3rd: The prefix for the first line. (default: "")
660             # 4th: The number of columns available (no line will be longer than this
661             # unless overflow is set below). (default: 77)
662             # 5th: Enable or disable overflow mode. (default: 0)
663             # 6th: The sequence/expression to wrap at. (default: '\s');
664             # 7th: The string to join the lines again. (default: "\n")
665              
666             sub wrap {
667 365   50 365 0 3758 my $string = shift || '';
668 365   50     905 my $prefix = shift || '';
669 365   100     1028 my $first = shift || '';
670 365   50     870 my $length = shift || 77;
671 365   100     1124 my $overflow = shift || 0;
672 365   66     1381 my $break = shift || qr/\s/;
673 365         650 my $sep = "\n";
674              
675             # go ahead and break apart the string, keeping the break chars
676 365         7029 my @arr = split(/($break)/, $string);
677              
678             # tack the first prefix line at the start
679 365 100       1314 splice @arr, 0, 0, $first if $first;
680              
681             # go ahead and make up the lines in the array
682 365         607 my $pos = 0;
683 365         550 my $pos_mod = 0;
684 365         929 while ($#arr > $pos) {
685 3838         6806 my $tmpline = $arr[$pos] ;
686 3838         7749 $tmpline =~ s/\t/ /g;
687 3838         6433 my $len = length ($tmpline);
688             # if we don't want to have lines > $length (overflow==0), we
689             # need to verify what will happen with the next line. if we don't
690             # care if a single line goes longer, don't care about the next
691             # line.
692             # we also want this to be true for the first entry on the line
693 3838 100 100     11252 if ($pos_mod != 0 && $overflow == 0) {
694 3254         6103 my $tmpnext = $arr[$pos+1] ;
695 3254         5014 $tmpnext =~ s/\t/ /g;
696 3254         6335 $len += length ($tmpnext);
697             }
698              
699 3838 100       7441 if ($len <= $length) {
700             # if the length determined above is within bounds, go ahead and
701             # merge the next line with the current one
702 3716         7683 $arr[$pos] .= splice @arr, $pos+1, 1;
703 3716         10843 $pos_mod = 1;
704             }
705             else {
706             # ok, the current line is the right length, but there's more text!
707             # prep the current line and then go onto the next one
708              
709             # strip any trailing whitespace from the next line that's ready
710 122         797 $arr[$pos] =~ s/\s+$//;
711              
712             # go to the next line and reset pos_mod
713 122         216 $pos++;
714 122         178 $pos_mod = 0;
715              
716             # put the appropriate prefix at the front of the line
717 122         503 splice @arr, $pos, 0, $prefix;
718             }
719             }
720              
721             # go ahead and return the wrapped text, with the separator in between
722 365         2298 return join($sep, @arr);
723             }
724              
725             ###########################################################################
726              
727             # Some base64 decoders will remove intermediate "=" characters, others
728             # will stop decoding on the first "=" character, this one translates "="
729             # characters to null.
730             sub base64_decode {
731 7     7 0 128 local $_ = shift;
732 7         15 my $decoded_length = shift;
733              
734 7         801 s/\s+//g;
735 7 50 33     289 if (HAS_MIME_BASE64 && (length($_) % 4 == 0) &&
736             m|^(?:[A-Za-z0-9+/=]{2,}={0,2})$|s)
737             {
738             # only use MIME::Base64 when the XS and Perl are both correct and quiet
739 7         24 local $1;
740 7         33 s/(=+)(?!=*$)/'A' x length($1)/ge;
  0         0  
741              
742             # If only a certain number of bytes are requested, truncate the encoded
743             # version down to the appropriate size and return the requested bytes
744 7 50       26 if (defined $decoded_length) {
745 0         0 $_ = substr $_, 0, 4 * (int($decoded_length/3) + 1);
746 0         0 my $decoded = MIME::Base64::decode_base64($_);
747 0         0 return substr $decoded, 0, $decoded_length;
748             }
749              
750             # otherwise, just decode the whole thing and return it
751 7         410 return MIME::Base64::decode_base64($_);
752             }
753 0         0 tr{A-Za-z0-9+/=}{}cd; # remove non-base64 characters
754 0         0 s/=+$//; # remove terminating padding
755 0         0 tr{A-Za-z0-9+/=}{ -_}; # translate to uuencode
756 0 0       0 s/.$// if (length($_) % 4 == 1); # unpack cannot cope with extra byte
757              
758 0         0 my $length;
759 0         0 my $out = '';
760 0         0 while ($_) {
761 0 0       0 $length = (length >= 84) ? 84 : length;
762 0         0 $out .= unpack("u", chr(32 + $length * 3/4) . substr($_, 0, $length, ''));
763 0 0 0     0 last if (defined $decoded_length && length $out >= $decoded_length);
764             }
765              
766             # If only a certain number of bytes are requested, truncate the encoded
767             # version down to the appropriate size and return the requested bytes
768 0 0       0 if (defined $decoded_length) {
769 0         0 return substr $out, 0, $decoded_length;
770             }
771              
772 0         0 return $out;
773             }
774              
775             sub qp_decode {
776 14     14 0 76 my $str = $_[0];
777              
778             # RFC 2045: when decoding a Quoted-Printable body, any trailing
779             # white space on a line must be deleted
780 14         455 $str =~ s/[ \t]+(?=\r?\n)//gs;
781              
782 14         125 $str =~ s/=\r?\n//gs; # soft line breaks
783              
784             # RFC 2045 explicitly prohibits lowercase characters a-f in QP encoding
785             # do we really want to allow them???
786 14         43 local $1;
787 14         77 $str =~ s/=([0-9a-fA-F]{2})/chr(hex($1))/ge;
  111         434  
788              
789 14         94 return $str;
790             }
791              
792             sub base64_encode {
793 0     0 0 0 local $_ = shift;
794              
795 0         0 if (HAS_MIME_BASE64) {
796 0         0 return MIME::Base64::encode_base64($_);
797             }
798              
799 0         0 $_ = pack("u57", $_);
800 0         0 s/^.//mg;
801 0         0 tr| -_`|A-Za-z0-9+/A|; # -> #`# <- kluge against vim syntax issues
802 0         0 s/(A+)$/'=' x length $1/e;
  0         0  
803 0         0 return $_;
804             }
805              
806             ###########################################################################
807              
808             sub portable_getpwuid {
809 95 100   95 0 450 if (defined &Mail::SpamAssassin::Util::_getpwuid_wrapper) {
810 70         2040 return Mail::SpamAssassin::Util::_getpwuid_wrapper(@_);
811             }
812              
813 25         65 my $sts;
814 25 50       157 if (!RUNNING_ON_WINDOWS) {
815 25     95   2263 $sts = eval ' sub _getpwuid_wrapper { getpwuid($_[0]); }; 1 ';
  95         26582  
816             } else {
817 0         0 dbg("util: defining getpwuid() wrapper using 'unknown' as username");
818 0         0 $sts = eval ' sub _getpwuid_wrapper { _fake_getpwuid($_[0]); }; 1 ';
819             }
820 25 50       162 if (!$sts) {
821 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
822 0         0 warn "util: failed to define getpwuid() wrapper: $eval_stat\n";
823             } else {
824 25         417 return Mail::SpamAssassin::Util::_getpwuid_wrapper(@_);
825             }
826             }
827              
828             sub _fake_getpwuid {
829             return (
830 0     0   0 'unknown', # name,
831             'x', # passwd,
832             $_[0], # uid,
833             0, # gid,
834             '', # quota,
835             '', # comment,
836             '', # gcos,
837             '/', # dir,
838             '', # shell,
839             '', # expire
840             );
841             }
842              
843             ###########################################################################
844              
845             # Given a string, extract an IPv4 address from it. Required, since
846             # we currently have no way to portably unmarshal an IPv4 address from
847             # an IPv6 one without kludging elsewhere.
848             #
849             sub extract_ipv4_addr_from_string {
850 0     0 0 0 my ($str) = @_;
851              
852 0 0       0 return unless defined($str);
853              
854 0 0       0 if ($str =~ /\b(
855             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
856             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
857             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
858             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)
859             )\b/ix)
860             {
861 0 0       0 if (defined $1) { return $1; }
  0         0  
862             }
863              
864             # ignore native IPv6 addresses;
865             # TODO, eventually, once IPv6 spam starts to appear ;)
866 0         0 return;
867             }
868              
869             ###########################################################################
870              
871             {
872             my($hostname, $fq_hostname);
873              
874             # get the current host's unqalified domain name (better: return whatever
875             # Sys::Hostname thinks our hostname is, might also be a full qualified one)
876             sub hostname {
877 9 50   9 0 40 return $hostname if defined($hostname);
878              
879             # Sys::Hostname isn't taint safe and might fall back to `hostname`. So we've
880             # got to clean PATH before we may call it.
881 9         48 clean_path_in_taint_mode();
882 9         95 $hostname = Sys::Hostname::hostname();
883 9         230 $hostname =~ s/[()]//gs; # bug 5929
884 9         32 return $hostname;
885             }
886              
887             # get the current host's fully-qualified domain name, if possible. If
888             # not possible, return the unqualified hostname.
889             sub fq_hostname {
890 77 100   77 0 422 return $fq_hostname if defined($fq_hostname);
891              
892 9         42 $fq_hostname = hostname();
893 9 50       59 if ($fq_hostname !~ /\./) { # hostname doesn't contain a dot, so it can't be a FQDN
894             my @names = grep(/^\Q${fq_hostname}.\E/o, # grep only FQDNs
895 9         1832 map { split } (gethostbyname($fq_hostname))[0 .. 1] # from all aliases
  18         272  
896             );
897 9 50       55 $fq_hostname = $names[0] if (@names); # take the first FQDN, if any
898 9         46 $fq_hostname =~ s/[()]//gs; # bug 5929
899             }
900              
901 9         42 return $fq_hostname;
902             }
903             }
904              
905             ###########################################################################
906              
907             sub ips_match_in_16_mask {
908 0     0 0 0 my ($ipset1, $ipset2) = @_;
909 0         0 my ($b1, $b2);
910              
911 0         0 foreach my $ip1 (@{$ipset1}) {
  0         0  
912 0         0 foreach my $ip2 (@{$ipset2}) {
  0         0  
913 0 0       0 next unless defined $ip1;
914 0 0       0 next unless defined $ip2;
915 0 0       0 next unless ($ip1 =~ /^(\d+\.\d+\.)/); $b1 = $1;
  0         0  
916 0 0       0 next unless ($ip2 =~ /^(\d+\.\d+\.)/); $b2 = $1;
  0         0  
917 0 0       0 if ($b1 eq $b2) { return 1; }
  0         0  
918             }
919             }
920              
921 0         0 return 0;
922             }
923              
924             sub ips_match_in_24_mask {
925 0     0 0 0 my ($ipset1, $ipset2) = @_;
926 0         0 my ($b1, $b2);
927              
928 0         0 foreach my $ip1 (@{$ipset1}) {
  0         0  
929 0         0 foreach my $ip2 (@{$ipset2}) {
  0         0  
930 0 0       0 next unless defined $ip1;
931 0 0       0 next unless defined $ip2;
932 0 0       0 next unless ($ip1 =~ /^(\d+\.\d+\.\d+\.)/); $b1 = $1;
  0         0  
933 0 0       0 next unless ($ip2 =~ /^(\d+\.\d+\.\d+\.)/); $b2 = $1;
  0         0  
934 0 0       0 if ($b1 eq $b2) { return 1; }
  0         0  
935             }
936             }
937              
938 0         0 return 0;
939             }
940              
941             ###########################################################################
942              
943             # Given a quad-dotted IPv4 address or an IPv6 address, reverses the order
944             # of its bytes (IPv4) or nibbles (IPv6), joins them with dots, producing
945             # a string suitable for reverse DNS lookups. Returns undef in case of a
946             # syntactically invalid IP address.
947             #
948             sub reverse_ip_address {
949 116     116 0 324 my ($ip) = @_;
950              
951 116         167 my $revip;
952 116         435 local($1,$2,$3,$4);
953 116 100 33     803 if ($ip =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\z/) {
    50          
    50          
954 108         675 $revip = "$4.$3.$2.$1";
955             } elsif ($ip !~ /:/ || $ip !~ /^[0-9a-fA-F:.]{2,}\z/) { # triage
956             # obviously unrecognized syntax
957             } elsif (!NetAddr::IP->can('full6')) { # since NetAddr::IP 4.010
958 0         0 info("util: version of NetAddr::IP is too old, IPv6 not supported");
959             } else {
960             # looks like an IPv6 address, let NetAddr::IP check the details
961 8         52 my $ip_obj = NetAddr::IP->new6($ip);
962 8 50       2229 if (defined $ip_obj) { # valid IPv6 address
963             # RFC 5782 section 2.4.
964 8         37 $revip = lc $ip_obj->network->full6; # string in a canonical form
965 8         318 $revip =~ s/://g;
966 8         104 $revip = join('.', reverse split(//,$revip));
967             }
968             }
969 116         619 return $revip;
970             }
971              
972             ###########################################################################
973              
974 152     152 0 1292 sub my_inet_aton { unpack("N", pack("C4", split(/\./, $_[0]))) }
975              
976             ###########################################################################
977              
978             sub decode_dns_question_entry {
979             # decodes a Net::DNS::Packet->question entry,
980             # returning a triple: class, type, label
981             #
982 42     42 0 84 my $q = $_[0];
983 42         116 my $qname = $q->qname;
984              
985             # Bug 6959, Net::DNS flags a domain name in a query section as utf8, while
986             # still keeping it "RFC 1035 zone file format"-encoded, silly and harmful
987 42 50       2550 utf8::encode($qname) if utf8::is_utf8($qname); # since Perl 5.8.1
988              
989 42         120 local $1;
990             # Net::DNS provides a query in encoded RFC 1035 zone file format, decode it!
991 42 0       99 $qname =~ s{ \\ ( [0-9]{3} | [^0-9] ) }
  0 0       0  
992 42         122 { length($1)==1 ? $1 : $1 <= 255 ? chr($1) : "\\$1" }xgse;
993             return ($q->qclass, $q->qtype, $qname);
994             }
995              
996             ###########################################################################
997              
998             sub parse_content_type {
999             # This routine is typically called by passing a
1000             # get_header("content-type") which passes all content-type headers
1001             # (array context). If there are multiple Content-type headers (invalid,
1002             # but it happens), MUAs seem to take the last one and so that's what we
1003             # should do here.
1004 358   100 358 0 1579 #
1005             my $ct = $_[-1] || 'text/plain; charset=us-ascii';
1006              
1007             # This could be made a bit more rigid ...
1008             # the actual ABNF, BTW (RFC 1521, section 7.2.1):
1009             # boundary := 0*69<bchars> bcharsnospace
1010             # bchars := bcharsnospace / " "
1011             # bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /"_"
1012             # / "," / "-" / "." / "/" / ":" / "=" / "?"
1013             #
1014             # The boundary may be surrounded by double quotes.
1015             # "the boundary parameter, which consists of 1 to 70 characters from
1016             # a set of characters known to be very robust through email gateways,
1017             # and NOT ending with white space. (If a boundary appears to end with
1018             # white space, the white space must be presumed to have been added by
1019             # a gateway, and must be deleted.)"
1020             #
1021             # In practice:
1022             # - MUAs accept whitespace before and after the "=" character
1023             # - only an opening double quote seems to be needed
1024             # - non-quoted boundaries should be followed by space, ";", or end of line
1025             # - blank boundaries seem to not work
1026 358         1840 #
1027             my($boundary) = $ct =~ m!\bboundary\s*=\s*("[^"]+|[^\s";]+(?=[\s;]|$))!i;
1028              
1029             # remove double-quotes in boundary (should only be at start and end)
1030 358 100       958 #
1031             $boundary =~ tr/"//d if defined $boundary;
1032              
1033             # Parse out the charset and name, if they exist.
1034 358         2328 #
1035 358         1919 my($charset) = $ct =~ /\bcharset\s*=\s*["']?(.*?)["']?(?:;|$)/i;
1036             my($name) = $ct =~ /\b(?:file)?name\s*=\s*["']?(.*?)["']?(?:;|$)/i;
1037              
1038             # RFC 2231 section 3: Parameter Value Continuations
1039             # support continuations for name values
1040 358 50 66     2645 #
1041             if (!$name && $ct =~ /\b(?:file)?name\*0\s*=/i) {
1042 0         0  
1043 0         0 my @name;
1044             $name[$1] = $2
1045             while ($ct =~ /\b(?:file)?name\*(\d+)\s*=\s*["']?(.*?)["']?(?:;|$)/ig);
1046 0         0  
1047             $name = join "", grep defined, @name;
1048             }
1049              
1050             # Get the actual MIME type out ...
1051             # Note: the header content may not be whitespace unfolded, so make sure the
1052             # REs do /s when appropriate.
1053             # correct:
1054             # Content-type: text/plain; charset=us-ascii
1055             # missing a semi-colon, CT shouldn't have whitespace anyway:
1056             # Content-type: text/plain charset=us-ascii
1057 358         1013 #
1058 358         1584 $ct =~ s/^\s+//; # strip leading whitespace
1059 358         2172 $ct =~ s/;.*$//s; # strip everything after first ';'
1060 358         954 $ct =~ s@^([^/]+(?:/[^/\s]*)?).*$@$1@s; # only something/something ...
1061             $ct = lc $ct;
1062              
1063             # bug 4298: If at this point we don't have a content-type, assume text/plain;
1064             # also, bug 5399: if the content-type *starts* with "text", and isn't in a
1065 358 100 100     3266 # list of known bad/non-plain formats, do likewise.
      66        
1066             if (!$ct ||
1067             ($ct =~ /^text\b/ && $ct !~ /^text\/(?:x-vcard|calendar|html)$/))
1068 269         630 {
1069             $ct = "text/plain";
1070             }
1071              
1072 358         1025 # strip inappropriate chars (bug 5399: after the text/plain fixup)
1073             $ct =~ tr/\000-\040\177-\377\042\050\051\054\072-\077\100\133-\135//d;
1074              
1075             # Now that the header has been parsed, return the requested information.
1076             # In scalar context, just the MIME type, in array context the
1077             # four important data parts (type, boundary, charset, and filename).
1078 358 50       2592 #
1079             return wantarray ? ($ct,$boundary,$charset,$name) : $ct;
1080             }
1081              
1082             ###########################################################################
1083              
1084 7     7 0 21 sub url_encode {
1085 7         87 my ($url) = @_;
1086 7         18 my (@characters) = split(/(\%[0-9a-fA-F]{2})/, $url);
1087             my (@unencoded);
1088             my (@encoded);
1089 7         17  
1090             foreach (@characters) {
1091 138 100       300 # escaped character set ...
1092             if (/\%[0-9a-fA-F]{2}/) {
1093             # IF it is in the range of 0x00-0x20 or 0x7f-0xff
1094             # or it is one of "<", ">", """, "#", "%",
1095             # ";", "/", "?", ":", "@", "=" or "&"
1096 67 50       204 # THEN preserve its encoding
1097 67         188 unless (/(20|7f|[0189a-fA-F][0-9a-fA-F])/i) {
  67         213  
1098 67         176 s/\%([2-7][0-9a-fA-F])/sprintf "%c", hex($1)/e;
1099             push(@unencoded, $_);
1100             }
1101             }
1102             # other stuff
1103             else {
1104             # no re "strict"; # since perl 5.21.8
1105 71         162 # 0x00-0x20, 0x7f-0xff, ", %, <, >
1106 0 0       0 s/([\000-\040\177-\377\042\045\074\076])
1107             /push(@encoded, $1) && sprintf "%%%02x", unpack("C",$1)/egx;
1108             }
1109 7 50       25 }
1110 0         0 if (wantarray) {
1111             return(join("", @characters), join("", @unencoded), join("", @encoded));
1112             }
1113 7         52 else {
1114             return join("", @characters);
1115             }
1116             }
1117              
1118             ###########################################################################
1119              
1120             =item $module = first_available_module (@module_list)
1121              
1122             Return the name of the first module that can be successfully loaded with
1123             C<require> from the list. Returns C<undef> if none are available.
1124              
1125             This is used instead of C<AnyDBM_File> as follows:
1126              
1127             my $module = Mail::SpamAssassin::Util::first_available_module
1128             (qw(DB_File GDBM_File NDBM_File SDBM_File));
1129             tie %hash, $module, $path, [... args];
1130              
1131             Note that C<SDBM_File> is guaranteed to be present, since it comes
1132             with Perl.
1133              
1134             =cut
1135              
1136 10     10 1 38 sub first_available_module {
1137 10         46 my (@packages) = @_;
1138 30 50       208 foreach my $mod (@packages) {
1139 30 100       1690 next if $mod !~ /^[\w:]+$/; # be paranoid
1140 10         77 if (eval 'require '.$mod.'; 1;') {
1141             return $mod;
1142             }
1143 0         0 }
1144             undef;
1145             }
1146              
1147             ###########################################################################
1148              
1149             =item touch_file(file, { args });
1150              
1151             Touch or create a file.
1152              
1153             Possible args:
1154              
1155             create_exclusive => 1
1156             Create a new empty file safely, only if not existing before
1157              
1158             =cut
1159              
1160 0     0 1 0 sub touch_file {
1161             my ($file, $args) = @_;
1162 0         0  
1163 0   0     0 $file = untaint_file_path($file);
1164             $args ||= {};
1165 0 0 0     0  
1166             return unless defined $file && $file ne '';
1167 0 0       0  
1168 0 0       0 if ($args->{create_exclusive}) {
1169 0         0 if (sysopen(my $fh, $file, O_CREAT|O_EXCL)) {
1170 0         0 close $fh;
1171             return 1;
1172 0 0       0 }
1173 0         0 return 1 if $! == EEXIST; # fine if it exists already
1174 0         0 dbg("util: exclusive touch_file failed: $file: $!");
1175             return 0;
1176             }
1177 0 0       0  
1178 0         0 if (!utime(undef,undef,$file)) {
1179 0         0 dbg("util: touch_file failed: $file: $!");
1180             return 0;
1181             }
1182 0         0  
1183             return 1;
1184             }
1185              
1186             ###########################################################################
1187              
1188             =item my ($filepath, $filehandle) = secure_tmpfile();
1189              
1190             Generates a filename for a temporary file, opens it exclusively and
1191             securely, and returns a filehandle to the open file (opened O_RDWR).
1192              
1193             If it cannot open a file after 20 tries, it returns C<undef>.
1194              
1195             =cut
1196              
1197             # thanks to http://www2.picante.com:81/~gtaylor/autobuse/ for this code
1198 65 50   65 1 477 sub secure_tmpfile {
1199 65   33     3273 my $tmpenv = am_running_on_windows() ? 'TMP' : 'TMPDIR';
1200             my $tmpdir = untaint_file_path($ENV{$tmpenv} || File::Spec->tmpdir());
1201 65 50 33     390  
1202             defined $tmpdir && $tmpdir ne ''
1203             or die "util: cannot find a temporary directory, set TMP or TMPDIR in environment";
1204 65 50       3999  
1205 65 50       1038 opendir(my $dh, $tmpdir) or die "Could not open directory $tmpdir: $!";
1206             closedir $dh or die "Error closing directory $tmpdir: $!";
1207 65         308  
1208 65         294 my ($reportfile, $tmpfh);
1209             for (my $retries = 20; $retries > 0; $retries--) {
1210             # we do not rely on the obscurity of this name for security,
1211 65         773 # we use a average-quality PRG since this is all we need
1212             my $suffix = join('', (0..9,'A'..'Z','a'..'z')[rand 62, rand 62, rand 62,
1213 65         1890 rand 62, rand 62, rand 62]);
1214             $reportfile = File::Spec->catfile($tmpdir,".spamassassin${$}${suffix}tmp");
1215              
1216             # instead, we require O_EXCL|O_CREAT to guarantee us proper
1217 65 50       6537 # ownership of our file, read the open(2) man page
1218 65 50       407 if (sysopen($tmpfh, $reportfile, O_RDWR|O_CREAT|O_EXCL, 0600)) {
1219 65         183 binmode $tmpfh or die "cannot set $reportfile to binmode: $!";
1220             last;
1221 0         0 }
1222             my $errno = $!;
1223              
1224 0 0       0 # ensure the file handle is not semi-open in some way
1225 0 0       0 if ($tmpfh) {
1226 0         0 if (! close $tmpfh) {
1227 0         0 info("error closing $reportfile: $!");
1228             undef $tmpfh;
1229             }
1230             }
1231              
1232 0 0       0 # it is acceptable if $tmpfh already exists, try another
1233             next if $errno == EEXIST;
1234              
1235             # error, maybe "out of quota", "too many open files", "Permission denied"
1236 0         0 # (bug 4017); makes no sense retrying
1237             die "util: failed to create a temporary file '$reportfile': $errno";
1238             }
1239 65 50       223  
1240 0         0 if (!$tmpfh) {
1241 0         0 warn "util: secure_tmpfile failed to create a temporary file, giving up";
1242             return;
1243             }
1244 65         581  
1245 65         611 dbg("util: secure_tmpfile created a temporary file %s", $reportfile);
1246             return ($reportfile, $tmpfh);
1247             }
1248              
1249             =item my ($dirpath) = secure_tmpdir();
1250              
1251             Generates a directory for temporary files. Creates it securely and
1252             returns the path to the directory.
1253              
1254             If it cannot create a directory after 20 tries, it returns C<undef>.
1255              
1256             =cut
1257              
1258             # stolen from secure_tmpfile()
1259 0     0 1 0 sub secure_tmpdir {
1260             my $tmpdir = untaint_file_path(File::Spec->tmpdir());
1261 0 0       0  
1262             if (!$tmpdir) {
1263             # Note: we would prefer to keep this fatal, as not being able to
1264             # find a writable tmpdir is a big deal for the calling code too.
1265 0         0 # That would be quite a psychotic case, also.
1266 0         0 warn "util: cannot find a temporary directory, set TMP or TMPDIR in environment";
1267             return;
1268             }
1269 0         0  
1270 0         0 my ($reportpath, $tmppath);
1271             my $umask = umask 077;
1272 0         0  
1273             for (my $retries = 20; $retries > 0; $retries--) {
1274             # we do not rely on the obscurity of this name for security,
1275 0         0 # we use a average-quality PRG since this is all we need
1276             my $suffix = join('', (0..9,'A'..'Z','a'..'z')[rand 62, rand 62, rand 62,
1277 0         0 rand 62, rand 62, rand 62]);
1278             $reportpath = File::Spec->catfile($tmpdir,".spamassassin${$}${suffix}tmp");
1279              
1280             # instead, we require O_EXCL|O_CREAT to guarantee us proper
1281 0 0       0 # ownership of our file, read the open(2) man page
1282 0         0 if (mkdir $reportpath, 0700) {
1283 0         0 $tmppath = $reportpath;
1284             last;
1285             }
1286 0 0       0  
1287             if ($!{EEXIST}) {
1288 0         0 # it is acceptable if $reportpath already exists, try another
1289             next;
1290             }
1291            
1292 0         0 # error, maybe "out of quota" or "too many open files" (bug 4017)
1293             warn "util: secure_tmpdir failed to create file '$reportpath': $!\n";
1294             }
1295 0         0  
1296             umask $umask;
1297 0 0       0  
1298             warn "util: secure_tmpdir failed to create a directory, giving up" if (!$tmppath);
1299 0         0  
1300             return $tmppath;
1301             }
1302              
1303              
1304             ###########################################################################
1305              
1306             ##
1307             ## DEPRECATED FUNCTION, sub uri_to_domain removed.
1308             ## Replaced with Mail::SpamAssassin::RegistryBoundaries::uri_to_domain.
1309             ##
1310              
1311             ###########################################################################
1312              
1313             *uri_list_canonify = \&uri_list_canonicalize; # compatibility alias
1314 437     437 0 9871 sub uri_list_canonicalize {
1315             my($redirector_patterns, @uris) = @_;
1316              
1317 437         647 # make sure we catch bad encoding tricks
1318 437         899 my @nuris;
1319             for my $uri (@uris) {
1320 449         958 # sometimes we catch URLs on multiple lines
1321             $uri =~ s/\n//g;
1322              
1323 449         955 # URLs won't have leading/trailing whitespace
1324 449         810 $uri =~ s/^\s+//;
1325             $uri =~ s/\s+$//;
1326              
1327 449         681 # CRs just confuse things down below, so trash them now
1328             $uri =~ s/\r//g;
1329              
1330 449         657 # Make a copy so we don't trash the original in the array
1331             my $nuri = $uri;
1332              
1333 449 100 100     2119 # Handle emails differently
1334             if ($nuri =~ /^mailto:/i || $nuri =~ /^[^:]*\@/) {
1335             # Strip ?subject= parameters and obfuscations
1336 36 50       201 # Outlook linkifies foo@bar%2Ecom&x.com to foo@bar.com !!
1337 0         0 if ($nuri =~ /^([^\@]+\@[^?]+)\?/) {
1338             push @nuris, $1;
1339 36 50       162 }
1340 0         0 if ($nuri =~ /^([^\@]+\@[^?&]+)\&/) {
1341             push @nuris, $1
1342             }
1343 36 100 100     187 # Address must be trimmed of %20
1344             if ($nuri =~ tr/%20// &&
1345 8         36 $nuri =~ /^(?:mailto:)?(?:\%20)*([^\@]+\@[^?&%]+)/) {
1346             push @nuris, "mailto:$1";
1347             }
1348 36 50       108 # mailto:"Foo%20Bar"%20<foo.bar@example.com>
1349 0         0 if ($nuri =~ /^[^?&]*<([^\@>]+\@[^>]+)>/) {
1350             push @nuris, "mailto:$1";
1351             }
1352 36         108 # End email processing
1353             next;
1354             }
1355              
1356             # bug 4390: certain MUAs treat back slashes as front slashes.
1357             # since backslashes are supposed to be encoded in a URI, swap non-encoded
1358 413         959 # ones with front slashes.
1359             $nuri =~ tr{\\}{/};
1360              
1361 413         2386 # http:www.foo.biz -> http://www.foo.biz
1362             $nuri =~ s{^(https?:)/{0,2}}{$1//}i;
1363              
1364             # *always* make a dup with all %-encoding decoded, since
1365             # important parts of the URL may be encoded (such as the
1366 413 100       1124 # scheme). (bug 4213)
1367 7         37 if ($nuri =~ /%[0-9a-fA-F]{2}/) {
1368             $nuri = Mail::SpamAssassin::Util::url_encode($nuri);
1369             }
1370              
1371             # www.foo.biz -> http://www.foo.biz
1372 413 100       1311 # unschemed URIs: assume default of "http://" as most MUAs do
1373 77 100       136 if ($nuri !~ /^[-_a-z0-9]+:/i) {
1374 1         5 if ($nuri =~ /^ftp\./) {
1375             $nuri =~ s{^}{ftp://}g;
1376             }
1377 76         316 else {
1378             $nuri =~ s{^}{http://}g;
1379             }
1380             }
1381              
1382             # http://www.foo.biz?id=3 -> http://www.foo.biz/?id=3
1383 413         1738 # http://www.foo.biz#id=3 -> http://www.foo.biz/#id=3
1384             $nuri =~ s{^(https?://[^/?#]+)([?#])}{$1/$2}i;
1385              
1386             # deal with encoding of chars, this is just the set of printable
1387 413         781 # chars minus ' ' (that is, dec 33-126, hex 21-7e)
  0         0  
1388 413         630 $nuri =~ s/\&\#0*(3[3-9]|[4-9]\d|1[01]\d|12[0-6]);/sprintf "%c",$1/ge;
  0         0  
1389             $nuri =~ s/\&\#x0*(2[1-9]|[3-6][a-fA-F0-9]|7[0-9a-eA-E]);/sprintf "%c",hex($1)/ge;
1390 413         623 # handle other unicode dots (U+002E U+3002 U+FF0E U+FF61) -> .
1391             $nuri =~ s/\&\#(?:x2e|12290|x3002|65294|xff0e|65377|xff61);/./gi;
1392              
1393 413 100       923 # put the new URI on the new list if it's different
1394 91         165 if ($nuri ne $uri) {
1395             push(@nuris, $nuri);
1396             }
1397              
1398 413 100       2084 # deal with weird hostname parts, remove user/pass, etc.
1399 400         1534 if ($nuri =~ m{^(https?://)([^\@/?#]*\@)?([^/?#:]+)((?::(\d*))?.*)$}i) {
1400 400 100       990 my($proto, $host, $rest) = ($1,$3,$4);
1401 400 100       887 my $auth = defined $2 ? $2 : '';
1402             my $port = defined $5 ? $5 : '';
1403 400         599  
1404 400 100       790 my $rest_noport;
1405 382 100       835 if ($port eq '') {
1406             $port = $proto eq 'http://' ? 80 : 443;
1407 18         34 } else {
1408             $rest_noport = $rest;
1409 18 50       45 # Strip default ports from url and add to list
    0          
1410 18 100       67 if ($proto eq 'http://') {
1411 14         47 if ($rest_noport =~ s/^:80\b//) {
1412             push(@nuris, join('', $proto, $host, $rest_noport));
1413             }
1414 0         0 } elsif ($rest_noport =~ s/^:443\b//) {
1415             push(@nuris, join('', $proto, $host, $rest_noport));
1416             }
1417             }
1418              
1419             # Bug 6751:
1420             # RFC 3490 (IDNA): Whenever dots are used as label separators, the
1421             # following characters MUST be recognized as dots: U+002E (full stop),
1422             # U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
1423             # U+FF61 (halfwidth ideographic full stop).
1424             # RFC 5895: [...] the IDEOGRAPHIC FULL STOP character (U+3002)
1425             # can be mapped to the FULL STOP before label separation occurs.
1426             # [...] Only the IDEOGRAPHIC FULL STOP character (U+3002) is added in
1427             # this mapping because the authors have not fully investigated [...]
1428             # Adding also 'SMALL FULL STOP' (U+FE52) as seen in the wild.
1429 400 100       1384 # Parhaps also the 'ONE DOT LEADER' (U+2024).
1430             if ($host =~ s{(?: \xE3\x80\x82 | \xEF\xBC\x8E | \xEF\xBD\xA1 |
1431 5         14 \xEF\xB9\x92 | \xE2\x80\xA4 )}{.}xgs) {
1432             push(@nuris, join ('', $proto, $host, $rest));
1433 5 50       13 # Also add noport variant
1434             push(@nuris, join('', $proto, $host, $rest_noport)) if $rest_noport;
1435             }
1436              
1437             # bug 4146: deal with non-US ASCII 7-bit chars in the host portion
1438             # of the URI according to RFC 1738 that's invalid, and the tested
1439             # browsers (Firefox, IE) remove them before usage...
1440             #if ($host =~ tr/\000-\040\200-\377//d) {
1441             # Fixed 7/2019 to not strip extended chars, since they can be used in
1442 400 50       969 # IDN domains. Stripping control chars should be enough?
1443 0         0 if ($host =~ tr/\x00-\x20//d) {
1444             push(@nuris, join ('', $proto, $host, $rest));
1445             }
1446              
1447             # deal with http redirectors. strip off one level of redirector
1448             # and add back to the array. the foreach loop will go over those
1449             # and deal appropriately.
1450              
1451             # Bug 7278: try redirector pattern matching first
1452 400         595 # (but see also Bug 4176)
1453 400         532 my $found_redirector_match;
  400         940  
1454 1902 100       7660 foreach my $re (@{$redirector_patterns}) {
1455 4 50 33     32 if ("$proto$host$rest" =~ $re) {
1456 4         25 next unless defined $1 && index($1, '.') != -1;
1457 4         21 dbg("uri: parsed uri pattern: $re");
1458 4         9 dbg("uri: parsed uri found: $1 in redirector: $proto$host$rest");
1459 4         9 push (@uris, $1);
1460 4         9 $found_redirector_match = 1;
1461             last;
1462             }
1463 400 100       868 }
1464             if (!$found_redirector_match) {
1465             # try generic https? check if redirector pattern matching failed
1466 396 100 66     1036 # bug 3308: redirectors like yahoo only need one '/' ... <grrr>
1467 6         20 if ($rest =~ m{(https?:/{0,2}[^&#]+)}i && index($1, '.') != -1) {
1468 6         27 push(@uris, $1);
1469             dbg("uri: parsed uri found: $1 in hard-coded redirector");
1470             }
1471             }
1472              
1473             ########################
1474             ## TVD: known issue, if host has multiple combinations of the following,
1475             ## all permutations will be put onto @nuris. shouldn't be an issue.
1476              
1477             # Get rid of cruft that could cause confusion for rules...
1478              
1479 400 50       890 # remove "www.fakehostname.com@" username part
1480 0         0 if ($host =~ s/^[^\@]+\@//gs) {
1481             push(@nuris, join ('', $proto, $host, $rest));
1482             }
1483              
1484             # bug 3186: If in a sentence, we might pick up odd characters ...
1485             # ie: "visit http://example.biz." or "visit http://example.biz!!!"
1486             # the host portion should end in some form of alphanumeric, strip off
1487 400 100       1347 # the rest.
1488 1         4 if ($host =~ s/[^0-9A-Za-z]+$//) {
1489             push(@nuris, join ('', $proto, $host, $rest));
1490             }
1491              
1492             ########################
1493              
1494             # deal with hosts which are IPs
1495             # also handle things like:
1496             # http://89.0x00000000000000000000068.0000000000000000000000160.0x00000000000011
1497             # both hex (0x) and oct (0+) encoded octets, etc.
1498 400 100 100     5284  
    100 66        
    100 100        
    100 100        
1499             if ($host =~ /^
1500             ((?:0x[0-9a-f]+|\d+)\.)
1501             ((?:0x[0-9a-f]+|\d+)\.)
1502             ((?:0x[0-9a-f]+|\d+)\.)
1503             (0x[0-9a-f]+|\d+)
1504             $/ix)
1505 35         176 {
1506 35         79 my @chunk = ($1,$2,$3,$4);
1507 140         270 foreach my $octet (@chunk) {
  22         91  
1508 140         208 $octet =~ s/^0x([0-9a-f]+)/sprintf "%d",hex($1)/gei;
  3         12  
1509 140         248 $octet =~ s/^0+([1-3][0-7]{0,2}|[4-7][0-7]?)\b/sprintf "%d",oct($1)/ge;
1510             $octet =~ s/^0+//;
1511 35         217 }
1512             push(@nuris, join ('', $proto, @chunk, $rest));
1513             }
1514              
1515             # "http://0x7f000001/"
1516             elsif ($host =~ /^0x[0-9a-f]+$/i) {
1517 6         32 # only take last 4 octets
  6         34  
1518 6         24 $host =~ s/^0x[0-9a-f]*?([0-9a-f]{1,8})$/sprintf "%d",hex($1)/gei;
1519             push(@nuris, join ('', $proto, decode_ulong_to_ip($host), $rest));
1520             }
1521              
1522             # "http://1113343453/"
1523 19         72 elsif ($host =~ /^[0-9]+$/) {
1524             push(@nuris, join ('', $proto, decode_ulong_to_ip($host), $rest));
1525             }
1526              
1527             # http://foobar -> http://www.foobar.com as Firefox does (Bug 6596)
1528             # (do this here so we don't trip on those 0x123 IPs etc..)
1529             # https://hg.mozilla.org/mozilla-central/file/tip/docshell/base/nsDefaultURIFixup.cpp
1530             elsif ($proto eq 'http://' && $auth eq '' &&
1531             $host ne 'localhost' && $port eq '80' &&
1532 12         65 $host =~ /^(?:www\.)?([^.]+)$/) {
1533             push(@nuris, join('', $proto, 'www.', $1, '.com', $rest));
1534             }
1535             }
1536             }
1537              
1538 437         973 # remove duplicates, merge nuris and uris
  640         2097  
1539             my %uris = map { $_ => 1 } @uris, @nuris;
1540 437         2100  
1541             return keys %uris;
1542             }
1543              
1544 25     25 0 369 sub decode_ulong_to_ip {
1545             return join(".", unpack("CCCC",pack("H*", sprintf "%08lx", $_[0])));
1546             }
1547              
1548             ###########################################################################
1549              
1550 10     10 0 46 sub first_date {
1551             my (@strings) = @_;
1552 10         30  
1553 10         66 foreach my $string (@strings) {
1554 10 50 33     170 my $time = parse_rfc822_date($string);
1555             return $time if defined($time) && $time;
1556 0         0 }
1557             return;
1558             }
1559              
1560 10     10 0 68 sub receive_date {
1561             my ($header) = @_;
1562 10   50     40  
1563 10         104 $header ||= '';
1564             $header =~ s/\n[ \t]+/ /gs; # fix continuation lines
1565 10         147  
1566 10         34 my @rcvd = ($header =~ /^Received:(.*)/img);
1567             my @local;
1568             my $time;
1569 10 100       34  
1570 8 50 33     239 if (@rcvd) {
1571             if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||
1572             $rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)
1573 0         0 {
1574             push @local, (shift @rcvd);
1575 8 50 33     78 }
    50          
1576 0         0 if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
1577             push @local, (shift @rcvd);
1578             }
1579 0         0 elsif (@local) {
1580             unshift @rcvd, (shift @local);
1581             }
1582             }
1583 10 100       43  
1584 8         43 if (@rcvd) {
1585 8 50       123 $time = first_date(shift @rcvd);
1586             return $time if defined($time);
1587 2 50       12 }
1588 0         0 if (@local) {
1589 0 0       0 $time = first_date(@local);
1590             return $time if defined($time);
1591 2 50       39 }
1592 0         0 if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {
1593 0 0       0 my $string = $1;
1594 0         0 $string .= " ".local_tz() unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
1595 0 0       0 $time = first_date($string);
1596             return $time if defined($time);
1597 2 50       14 }
1598 0         0 if (@rcvd) {
1599 0 0       0 $time = first_date(@rcvd);
1600             return $time if defined($time);
1601 2 50       46 }
1602 0         0 if ($header =~ /^Resent-Date:\s*(.+)$/im) {
1603 0 0       0 $time = first_date($1);
1604             return $time if defined($time);
1605 2 50       20 }
1606 2         11 if ($header =~ /^Date:\s*(.+)$/im) {
1607 2 50       21 $time = first_date($1);
1608             return $time if defined($time);
1609             }
1610 0         0  
1611             return time;
1612             }
1613              
1614             ###########################################################################
1615 0     0 0 0 sub get_user_groups {
1616 0         0 my $suid = shift;
1617 0         0 dbg("util: get_user_groups: uid is $suid\n");
1618 0         0 my ( $user, $passwd, $uid, $gid, $quota, $comment, $gcos, $dir, $shell, $expire ) = getpwuid($suid);
1619 0         0 my $rgids="$gid ";
1620 0 0       0 while ( my($name,$pw,$gid,$members) = getgrent() ) {
1621 0         0 if ( $members =~ m/\b$user\b/ ) {
1622 0         0 $rgids .= "$gid ";
1623             dbg("util: get_user_groups: added $gid ($name) to group list which is now: $rgids\n");
1624             }
1625 0         0 }
1626 0         0 endgrent;
1627 0         0 chop $rgids;
1628             return ($rgids);
1629             }
1630              
1631              
1632              
1633 0 0   0 0 0 sub setuid_to_euid {
1634             return if (RUNNING_ON_WINDOWS);
1635              
1636 0         0 # remember the target uid, the first number is the important one
1637 0         0 my $touid = $>;
1638 0         0 my $gids = get_user_groups($touid);
1639 0 0       0 my ( $pgid, $supgs ) = split (' ',$gids,2);
1640 0 0       0 defined $supgs or $supgs=$pgid;
1641             if ($( != $pgid) {
1642 0         0 # Gotta be root for any of this to work
1643 0         0 $> = 0 ;
1644 0         0 dbg("util: changing real primary gid from $( to $pgid and supplemental groups to $supgs to match effective uid $touid");
1645 0         0 POSIX::setgid($pgid);
1646 0         0 dbg("util: POSIX::setgid($pgid) set errno to $!");
1647 0         0 $! = 0;
1648 0         0 $( = $pgid;
1649 0         0 $) = "$pgid $supgs";
1650             dbg("util: assignment \$) = $pgid $supgs set errno to $!");
1651 0 0       0 }
1652 0         0 if ($< != $touid) {
1653             dbg("util: changing real uid from $< to match effective uid $touid");
1654             # bug 3586: kludges needed to work around platform dependent behavior assigning to $<
1655 0         0 # The POSIX functions deal with that so just use it here
1656 0         0 POSIX::setuid($touid);
  0         0  
1657             $< = $touid; $> = $touid; # bug 5574
1658              
1659 0 0       0 # Check that we have now accomplished the setuid: catch bug 3586 if it comes back
1660             if ($< != $touid) {
1661 0         0 # keep this fatal: it's a serious security problem if it fails
1662             die "util: setuid $< to $touid failed!";
1663             }
1664             }
1665             }
1666              
1667             # helper app command-line open
1668 0 0   0 0 0 sub helper_app_pipe_open {
1669 0         0 if (RUNNING_ON_WINDOWS) {
1670             return helper_app_pipe_open_windows (@_);
1671 0         0 } else {
1672             return helper_app_pipe_open_unix (@_);
1673             }
1674             }
1675              
1676 0     0 0 0 sub helper_app_pipe_open_windows {
1677             my ($fh, $stdinfile, $duperr2out, @cmdline) = @_;
1678              
1679 0         0 # use a traditional open(FOO, "cmd |")
1680 0 0       0 my $cmd = join(' ', @cmdline);
  0         0  
1681 0 0       0 if ($stdinfile) { $cmd .= qq/ < "$stdinfile"/; }
  0         0  
1682 0         0 if ($duperr2out) { $cmd .= " 2>&1"; }
1683             return open ($fh, $cmd.'|');
1684             }
1685              
1686 0     0 0 0 sub force_die {
1687             my ($msg) = @_;
1688              
1689             # note use of eval { } scope in logging -- paranoia to ensure that a broken
1690             # $SIG{__WARN__} implementation will not interfere with the flow of control
1691 0         0 # here, where we *have* to die.
  0         0  
1692 0         0 eval { warn $msg }; # hmm, STDERR may no longer be open
  0         0  
1693             eval { dbg("util: force_die: $msg") };
1694 0         0  
1695 0         0 POSIX::_exit(6); # avoid END and destructor processing
1696             kill('KILL',$$); # still kicking? die!
1697             }
1698              
1699 0     39 0 0 sub helper_app_pipe_open_unix {
1700             my ($fh, $stdinfile, $duperr2out, @cmdline) = @_;
1701 0         0  
1702             my $pid;
1703             # do a fork-open, so we can setuid() back
1704 0         0 eval {
  0         0  
1705 0 0       0 $pid = open ($fh, '-|'); 1;
1706 0 0       0 } or do {
  0         0  
1707 0         0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
1708             die "util: cannot fork: $eval_stat";
1709 0 0       0 };
1710             if (!defined $pid) {
1711 0         0 # acceptable to die() here, calling code catches it
1712             die "util: cannot open a pipe to a forked process: $!";
1713             }
1714 0 0       0  
1715 0         0 if ($pid != 0) {
1716             return $pid; # parent process; return the child pid
1717             }
1718              
1719             # else, child process.
1720              
1721             # from now on, we cannot die(), it could create a cloned process
1722 0         0 # use force_die() instead (bug 4370, cmt 2)
1723             eval {
1724 0         0 # go setuid...
1725 0         0 setuid_to_euid();
1726             info("util: setuid: ruid=$< euid=$> rgid=$( egid=$) ");
1727              
1728             # now set up the fds. due to some weirdness, we may have to ensure that
1729             # we *really* close the correct fd number, since some other code may have
1730             # redirected the meaning of STDOUT/STDIN/STDERR it seems... (bug 3649).
1731             # use POSIX::close() for that. it's safe to call close() and POSIX::close()
1732             # on the same fd; the latter is a no-op in that case.
1733 0 0       0  
1734             if (!$stdinfile) { # < $tmpfile
1735 0         0 # ensure we have *some* kind of fd 0.
1736             $stdinfile = "/dev/null";
1737             }
1738 0         0  
1739 0 0       0 my $f = fileno(STDIN);
1740             close STDIN or die "error closing STDIN: $!";
1741              
1742 0 0       0 # sanity: was that the *real* STDIN? if not, close that one too ;)
1743 0         0 if ($f != 0) {
1744             POSIX::close(0);
1745             }
1746 0 0       0  
1747             open (STDIN, "<$stdinfile") or die "cannot open $stdinfile: $!";
1748              
1749             # this should be impossible; if we just closed fd 0, UNIX
1750             # fd behaviour dictates that the next fd opened (the new STDIN)
1751             # will be the lowest unused fd number, which should be 0.
1752 0 0       0 # so die with a useful error if this somehow isn't the case.
1753 0         0 if (fileno(STDIN) != 0) {
1754             die "oops: fileno(STDIN) [".fileno(STDIN)."] != 0";
1755             }
1756              
1757             # Ensure STDOUT is open. As we just created a pipe to ensure this, it has
1758             # to be open to that pipe, and if it isn't, something's seriously screwy.
1759             # Update: actually, this fails! see bug 3649 comment 37. For some reason,
1760             # fileno(STDOUT) can be 0; possibly because open("-|") didn't change the fh
1761             # named STDOUT, instead changing fileno(1) directly. So this is now
1762             # commented.
1763             # if (fileno(STDOUT) != 1) {
1764             # die "setuid: oops: fileno(STDOUT) [".fileno(STDOUT)."] != 1";
1765             # }
1766 0         0  
1767             STDOUT->autoflush(1);
1768 0 0       0  
1769 0         0 if ($duperr2out) { # 2>&1
1770 0 0       0 my $f = fileno(STDERR);
1771             close STDERR or die "error closing STDERR: $!";
1772              
1773 0 0       0 # sanity: was that the *real* STDERR? if not, close that one too ;)
1774 0         0 if ($f != 2) {
1775             POSIX::close(2);
1776             }
1777 0 0       0  
1778 0         0 open (STDERR, ">&STDOUT") or die "dup STDOUT failed: $!";
1779             STDERR->autoflush(1); # make sure not to lose diagnostics if exec fails
1780              
1781 0 0       0 # STDERR must be fd 2 to be useful to subprocesses! (bug 3649)
1782 0         0 if (fileno(STDERR) != 2) {
1783             die "oops: fileno(STDERR) [".fileno(STDERR)."] != 2";
1784             }
1785             }
1786 0         0  
1787 0         0 exec @cmdline;
1788             die "exec failed: $!";
1789 0 0       0 };
  0         0  
1790             my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
1791              
1792 0         0 # bug 4370: we really have to exit here; break any eval traps
1793             force_die(sprintf('util: failed to spawn a process "%s": %s',
1794 0         0 join(", ",@cmdline), $eval_stat));
1795             die; # must be a die() otherwise -w will complain
1796             }
1797              
1798             ###########################################################################
1799              
1800             # As "perldoc perlvar" notes, in perl 5.8.0, the concept of "safe" signal
1801             # handling was added, which means that signals cannot interrupt a running OP.
1802             # unfortunately, a regexp match is a single OP, so a psychotic m// can
1803             # effectively "hang" the interpreter as a result, and a $SIG{ALRM} handler
1804             # will never get called.
1805             #
1806             # However, by using "unsafe" signals, we can still interrupt that -- and
1807             # POSIX::sigaction can create an unsafe handler on 5.8.x. So this function
1808             # provides a portable way to do that.
1809              
1810 39     0 0 31836 sub trap_sigalrm_fully {
1811 39 0 0     120 my ($handler) = @_;
1812             if ($] < 5.008 || am_running_on_windows()) {
1813             # signals are always unsafe on perl older than 5.008, just use %SIG
1814 39         97 # Bug 6359, no POSIX::SIGALRM on Windows, just use %SIG
1815             $SIG{ALRM} = $handler;
1816             } else {
1817 0         0 # may be using "safe" signals with %SIG; use POSIX to avoid it
1818             POSIX::sigaction POSIX::SIGALRM(), new POSIX::SigAction $handler;
1819             }
1820             }
1821              
1822             ###########################################################################
1823              
1824             # returns ($compiled_re, $error)
1825             # if any errors, $compiled_re = undef, $error has string
1826             # args:
1827             # - regexp
1828             # - strip_delimiters (default: 1) (value 2 means, try strip, but don't error)
1829             # - ignore_always_matching (default: 0)
1830 1256     1248 0 7323 sub compile_regexp {
1831 1256         2882 my ($re, $strip_delimiters, $ignore_always_matching) = @_;
1832             local($1);
1833              
1834 1256 50       3114 # Do not allow already compiled regexes or other funky refs
1835 39         90 if (ref($re)) {
1836             return (undef, 'ref passed');
1837             }
1838              
1839 1252 50       2740 # try stripping by default
1840             $strip_delimiters = 1 if !defined $strip_delimiters;
1841              
1842             # OK, try to remove any normal perl-style regexp delimiters at
1843             # the start and end, and modifiers at the end if present,
1844 1225         2473 # so we can validate those too.
1845 1228         2153 my $origre = $re;
1846             my $delim_end = '';
1847 1231 100       2603  
1848             if ($strip_delimiters >= 1) {
1849 1219 100       6803 # most common delimiter
    100          
    100          
    50          
1850 1022         1851 if ($re =~ s{^/}{}) {
1851             $delim_end = '/';
1852             }
1853             # symmetric delimiters
1854 63         299 elsif ($re =~ s/^(?:m|qr)([\{\(\<\[])//) {
1855             ($delim_end = $1) =~ tr/\{\(\<\[/\}\)\>\]/;
1856             }
1857             # any non-wordchar delimiter, but let's ignore backslash..
1858 159         439 elsif ($re =~ s/^(?:m|qr)(\W)//) {
1859 159 50       617 $delim_end = $1;
1860 33         588 if ($delim_end eq '\\') {
1861             return (undef, 'backslash delimiter not allowed');
1862             }
1863             }
1864 33         157 elsif ($strip_delimiters != 2) {
1865             return (undef, 'missing regexp delimiters');
1866             }
1867             }
1868              
1869 1217         2065 # cut end delimiter, mods
1870 1254 100       2707 my $mods;
1871             if ($delim_end) {
1872 1208 50       14958 # Ignore e because paranoid
1873 1239         4317 if ($re =~ s/\Q${delim_end}\E([a-df-z]*)\z//) {
1874             $mods = $1;
1875 18         91 } else {
1876             return (undef, 'invalid end delimiter/mods');
1877             }
1878             }
1879              
1880             # paranoid check for eval exec (?{foo}), in case someone
1881 1235 100       4275 # actually put "use re 'eval'" somewhere..
1882 18         160 if ($re =~ /\(\?\??\{/) {
1883             return (undef, 'eval (?{}) found');
1884             }
1885              
1886             # check unescaped delimiter, but only if it's not symmetric,
1887             # those will fp on .{0,10} [xyz] etc, no need for so strict checks
1888 1220 100 100     6210 # since these regexes don't end up in eval strings anyway
1889             if ($delim_end && $delim_end !~ tr/\}\)\]//) {
1890 1175         2869 # first we remove all escaped backslashes "\\"
1891 1175         2713 my $dbs_stripped = $re;
1892             $dbs_stripped =~ s/\\\\//g;
1893 1147 100       7171 # now we can properly check if something is unescaped
1894 28         57 if ($dbs_stripped =~ /(?<!\\)\Q${delim_end}\E/) {
1895             return (undef, "unquoted delimiter '$delim_end' found");
1896             }
1897             }
1898 1226 100       2942  
1899 424 100       906 if ($ignore_always_matching) {
1900 32         85 if (my $err = is_always_matching_regexp($re)) {
1901             return (undef, "always matching regexp: $err");
1902             }
1903             }
1904              
1905 1241 100       2949 # now prepend the modifiers, in order to check if they're valid
1906 902         3405 if ($mods) {
1907             $re = '(?'.$mods.')'.$re;
1908             }
1909              
1910 1213         1788 # no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
1911 1213         2651 my $compiled_re;
1912 1241         2505 $re = untaint_var($re);
1913             my $ok = eval {
1914             # don't dump deprecated warnings to user STDERR
1915             # but die on any other warning for safety?
1916 28 0   0   117 local $SIG{__WARN__} = sub {
1917 25         101 if ($_[0] !~ /deprecated/i) {
1918             die "$_[0]\n";
1919 1238         9071 }
1920 1216         43437 };
1921 1216         7808 $compiled_re = qr/$re/;
1922             1;
1923 1216 100 66     6335 };
1924             if ($ok && ref($compiled_re) eq 'Regexp') {
1925             #$origre = untaint_var($origre);
1926 1216         8052 #dbg("config: accepted regex '%s' => '%s'", $origre, $compiled_re);
1927             return ($compiled_re, '');
1928 31 50       59 } else {
  31         113  
1929 1         7 my $err = $@ ne '' ? $@ : "errno=$!"; chomp $err;
1930 1         10 $err =~ s/ at .*? line \d.*$//;
1931             return (undef, $err);
1932             }
1933             }
1934              
1935 397     396 0 959 sub is_always_matching_regexp {
1936             my ($re) = @_;
1937 396 100       2094  
    100          
    100          
    100          
1938 28         72 if ($re eq '') {
1939             return "empty";
1940             }
1941 2         8 elsif ($re =~ /(?<!\\)\|\|/) {
1942             return "contains '||'";
1943             }
1944 1         6 elsif ($re =~ /^\|/) {
1945             return "starts with '|'";
1946             }
1947 1         5 elsif ($re =~ /\|(?<!\\\|)$/) {
1948             return "ends with '|'";
1949             }
1950 392         1333  
1951             return "";
1952             }
1953              
1954             # convert compiled regexp (?^i:foo) presentation to string (?i)foo
1955             # NOTE: This function is mainly used for Rule2XSBody purposes, since it
1956             # expects "(?i)foo" formatted strings. Generally there should NOT be need
1957             # to use this function. If you need a string, try "".$re / "".qr(foo.*bar).
1958 42     42 0 181 sub qr_to_string {
1959             my ($re) = @_;
1960 42 50       391  
1961 42         175 return undef unless ref($re) eq 'Regexp'; ## no critic (ProhibitExplicitReturnUndef)
1962             $re = "".$re; # stringify
1963 42         368  
1964 42         76 local($1);
1965             my $mods;
1966 42 50       774 # perl >=5.14 (?^i:foo)
    0          
1967 42         277 if ($re =~ s/^\(\?\^([a-z]*)://) {
1968 42         338 $mods = $1;
1969             $re =~ s/\)\s*\z//;
1970             }
1971             # perl <5.14 (?i-xsm:foo)
1972 0         0 elsif ($re =~ s/^\(\?([a-z]*)-[a-z]*://) {
1973 0         0 $mods = $1;
1974             $re =~ s/\)\s*\z//;
1975             }
1976 42 50       274  
1977             return ($mods ? "(?$mods)$re" : $re);
1978             }
1979              
1980             ###########################################################################
1981              
1982             ###
1983             ### regexp_remove_delimiters and make_qr DEPRECATED, to be removed
1984             ### compile_regexp() should be used everywhere
1985             ###
1986              
1987             # Removes any normal perl-style regexp delimiters at
1988             # the start and end, and modifiers at the end (if present).
1989             # If modifiers are found, they are inserted into the pattern using
1990             # the /(?i)/ idiom.
1991              
1992 0     0 0 0 sub regexp_remove_delimiters {
1993             my ($re) = @_;
1994 0         0  
1995             warn("deprecated Util regexp_remove_delimiters() called\n");
1996 0         0  
1997 0 0 0     0 my $delim;
    0          
    0          
    0          
    0          
    0          
1998 0         0 if (!defined $re || $re eq '') {
1999             return undef; ## no critic (ProhibitExplicitReturnUndef)
2000             }
2001 0         0 elsif ($re =~ s/^m?\{//) { # m{foo/bar}
2002             $delim = '}';
2003             }
2004 0         0 elsif ($re =~ s/^m?\[//) { # m[foo/bar]
2005             $delim = ']';
2006             }
2007 0         0 elsif ($re =~ s/^m?\(//) { # m(foo/bar)
2008             $delim = ')';
2009             }
2010 0         0 elsif ($re =~ s/^m?<//) { # m<foo/bar>
2011             $delim = '>';
2012             }
2013 0         0 elsif ($re =~ s/^m?(\W)//) { # m#foo/bar#
2014             $delim = $1;
2015             } else { # /foo\/bar/ or !foo/bar!
2016 0         0 # invalid
2017             return undef; ## no critic (ProhibitExplicitReturnUndef)
2018             }
2019 0 0       0  
2020 0         0 if ($re !~ s/\Q${delim}\E([imsx]*)$//) {
2021             return undef; ## no critic (ProhibitExplicitReturnUndef)
2022             }
2023 0         0  
2024 0 0       0 my $mods = $1;
2025 0         0 if ($mods) {
2026             $re = "(?".$mods.")".$re;
2027             }
2028 0         0  
2029             return $re;
2030             }
2031              
2032             # turn "/foobar/i" into qr/(?i)foobar/
2033              
2034 0     0 0 0 sub make_qr {
2035             my ($re) = @_;
2036 0         0  
2037             warn("deprecated Util make_qr() called\n");
2038 0         0  
2039 0 0 0     0 $re = regexp_remove_delimiters($re);
2040 0         0 return undef if !defined $re || $re eq ''; ## no critic (ProhibitExplicitReturnUndef)
2041 0 0 0     0 my $compiled_re;
  0         0  
  0         0  
2042 0         0 if (eval { $compiled_re = qr/$re/; 1; } && ref($compiled_re) eq 'Regexp') {
2043             return $compiled_re;
2044 0         0 } else {
2045             return undef; ## no critic (ProhibitExplicitReturnUndef)
2046             }
2047             }
2048              
2049             ###########################################################################
2050              
2051 0     0 0 0 sub get_my_locales {
2052             my ($ok_locales) = @_;
2053 0         0  
2054 0         0 my @locales = split(/\s+/, $ok_locales);
2055 0   0     0 my $lang = $ENV{'LC_ALL'};
2056 0   0     0 $lang ||= $ENV{'LANGUAGE'};
2057 0   0     0 $lang ||= $ENV{'LC_MESSAGES'};
2058 0 0       0 $lang ||= $ENV{'LANG'};
2059 0         0 push (@locales, $lang) if defined($lang);
2060             return @locales;
2061             }
2062              
2063             ###########################################################################
2064              
2065             # bug 5612: work around for bugs in Berkeley db 4.2
2066             #
2067             # on 4.2 having the __db.[DBNAME] file will cause an loop that will never finish
2068             # on 4.3+ the loop will timeout after 301 open attempts, but we will still
2069             # be unable to open the database. This workaround solves both problems.
2070             #
2071 0     0 0 0 sub avoid_db_file_locking_bug {
2072             my ($path) = @_;
2073 0         0  
2074             my $db_tmpfile = untaint_file_path(File::Spec->catfile(dirname($path),
2075             '__db.'.basename($path)));
2076              
2077 0         0 # delete "__db.[DBNAME]" and "__db.[DBNAME].*"
2078 0         0 foreach my $tfile ($db_tmpfile, glob("$db_tmpfile.*")) {
2079 0 0       0 my $file = untaint_file_path($tfile);
2080 0 0       0 my $stat_errn = stat($file) ? 0 : 0+$!;
2081             next if $stat_errn == ENOENT;
2082 0         0  
2083 0 0       0 dbg("util: Berkeley DB bug work-around: cleaning tmp file $file");
2084             unlink($file) or warn "cannot remove Berkeley DB tmp file $file: $!\n";
2085             }
2086             }
2087              
2088             ###########################################################################
2089              
2090 0     0 0 0 sub fisher_yates_shuffle {
2091 0         0 my ($deck) = @_;
  0         0  
2092 0         0 for (my $i = $#{$deck}; $i > 0; $i--) {
2093 0         0 my $j = int rand($i+1);
2094             @$deck[$i,$j] = @$deck[$j,$i];
2095             }
2096             }
2097              
2098             ###########################################################################
2099              
2100              
2101             ###########################################################################
2102              
2103             # bugs 6419 and 2607 relate to returning a score 1/10th lower than the
2104             # required score if the rounded to the 10th version of the score is equal
2105             # to the required score
2106             #
2107             # moved from PerMessageStatus.pm to here and modified to allow for a
2108             # non-class version of the routine to be called from PerMessageStatus
2109             # and from spamd
2110              
2111 38     38 0 148 sub get_tag_value_for_score {
2112             my ($score, $rscore, $is_spam) = @_;
2113              
2114             #BASED ON _get_tag_value_for_score from PerMsgStatus.pm
2115 38         236  
2116 38         202 $score = sprintf("%2.1f", $score);
2117             $rscore = sprintf("%2.1f", $rscore);
2118              
2119             # if the email is spam, return the accurate score
2120             # if the email is NOT spam and the score is less than the required score,
2121             # then return the accurate score
2122 38 50 66     314  
2123             return $score if $is_spam or $score < $rscore;
2124              
2125             # if the email is NOT spam and $score = $rscore, return the $rscore - 0.1
2126             # effectively flooring the value to the closest tenth
2127 0            
2128             return $rscore - 0.1;
2129             }
2130              
2131             ###########################################################################
2132              
2133              
2134             1;
2135              
2136             =back
2137              
2138             =cut