File Coverage

lib/Mail/SpamAssassin/Util.pm
Criterion Covered Total %
statement 579 895 64.6
branch 230 534 43.0
condition 80 176 45.4
subroutine 62 78 79.4
pod 4 53 7.5
total 955 1736 55.0


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Util - utility functions
21              
22             =head1 DESCRIPTION
23              
24             A general class for utility functions. Please use this for functions that
25             stand alone, without requiring a $self object, Portability functions
26             especially.
27              
28             NOTE: The functions in this module are to be considered private. Their API may
29             change at any point, and it's expected that they'll only be used by other
30             Mail::SpamAssassin modules. (TODO: we should probably revisit this if
31             it's useful for plugin development.)
32              
33             NOTE: Utility functions should not be changing global variables such
34             as $_, $1, $2, ... $/, etc. unless explicitly documented. If these
35             variables are in use by these functions, they should be localized.
36              
37             =over 4
38              
39             =cut
40              
41              
42             use strict;
43 44     44   500080 use warnings;
  44         123  
  44         1343  
44 44     44   227 # use bytes;
  44         88  
  44         1619  
45             use re 'taint';
46 44     44   231  
  44         65  
  44         1907  
47             require 5.008001; # needs utf8::is_utf8()
48              
49             use Mail::SpamAssassin::Logger;
50 44     44   1707  
  44         106  
  44         2590  
51             use Exporter ();
52 44     44   309  
  44         73  
  44         3564  
53             our @ISA = qw(Exporter);
54             our @EXPORT = ();
55             our @EXPORT_OK = qw(&local_tz &base64_decode &untaint_var &untaint_file_path
56             &exit_status_str &proc_status_ok &am_running_on_windows
57             &reverse_ip_address &decode_dns_question_entry &touch_file
58             &get_my_locales &parse_rfc822_date &get_user_groups
59             &secure_tmpfile &secure_tmpdir &uri_list_canonicalize
60             &compile_regexp &qr_to_string &is_fqdn_valid);
61              
62             our $AM_TAINTED;
63              
64             use Config;
65 44     44   253 use IO::Handle;
  44         78  
  44         1854  
66 44     44   3555 use File::Spec;
  44         37386  
  44         1730  
67 44     44   247 use File::Basename;
  44         91  
  44         1189  
68 44     44   209 use Time::Local;
  44         87  
  44         2769  
69 44     44   19859 use Sys::Hostname (); # don't import hostname() into this namespace!
  44         83823  
  44         2317  
70 44     44   17641 use NetAddr::IP 4.000;
  44         40581  
  44         1076  
71 44     44   2830 use Fcntl;
  44         115642  
  44         246  
72 44     44   7375 use Errno qw(ENOENT EACCES EEXIST);
  44         82  
  44         11402  
73 44     44   3047 use POSIX qw(:sys_wait_h WIFEXITED WIFSIGNALED WIFSTOPPED WEXITSTATUS
  44         7870  
  44         5623  
74 44         428 WTERMSIG WSTOPSIG);
75 44     44   256  
  44         71  
76             ###########################################################################
77              
78             use constant HAS_MIME_BASE64 => eval { require MIME::Base64; };
79 44     44   17907 use constant RUNNING_ON_WINDOWS => ($^O =~ /^(?:mswin|dos|os2)/oi);
  44         86  
  44         97  
  44         18192  
80 44     44   27614  
  44         110  
  44         2520  
81             # These are only defined as stubs on Windows (see bugs 6798 and 6470).
82             BEGIN {
83             if (RUNNING_ON_WINDOWS) {
84 44 50   44   43973 no warnings 'redefine';
85 44     44   238  
  44         75  
  44         5478  
86             # See the section on $? at
87             # http://perldoc.perl.org/perlvar.html#Error-Variables for some
88             # hints on the magic numbers that are used here.
89             *WIFEXITED = sub { not $_[0] & 127 };
90 0         0 *WEXITSTATUS = sub { $_[0] >> 8 };
  0         0  
91 0         0 *WIFSIGNALED = sub { ($_[0] & 127) && (($_[0] & 127) != 127) };
  0         0  
92 0 0       0 *WTERMSIG = sub { $_[0] & 127 };
  0         0  
93 0         0 }
  0         0  
94             }
95              
96             ###########################################################################
97              
98             # find an executable in the current $PATH (or whatever for that platform)
99             {
100             # Show the PATH we're going to explore only once.
101             my $displayed_path = 0;
102              
103             my ($filename) = @_;
104              
105 9     9 0 68 clean_path_in_taint_mode();
106             if ( !$displayed_path++ ) {
107 9         19 dbg("util: current PATH is: ".join($Config{'path_sep'},File::Spec->path()));
108 9 100       128 }
109 2         4 foreach my $path (File::Spec->path()) {
110             my $fname = File::Spec->catfile ($path, $filename);
111 2         5 if ( -f $fname ) {
112 2         5 if (-x $fname) {
113 1 50       38 dbg("util: executable for $filename was found at $fname");
114 2 0       20 return $fname;
115 6         45 }
116 6         55 else {
117             dbg("util: $filename was found at $fname, but isn't executable");
118             }
119 0         0 }
120             }
121             return;
122             }
123 0         0 }
124              
125             ###########################################################################
126              
127             # taint mode: delete more unsafe vars for exec, as per perlsec
128             {
129             # We only need to clean the environment once, it stays clean ...
130             my $cleaned_taint_path = 0;
131              
132             return if ($cleaned_taint_path++);
133             return unless am_running_in_taint_mode();
134              
135 75 100   77 0 427 dbg("util: taint mode: deleting unsafe environment variables, resetting PATH");
136 17 50       83  
137             if (RUNNING_ON_WINDOWS) {
138 19         61 dbg("util: running on Win32, skipping PATH cleaning");
139             return;
140 49 50       268 }
141 9         48  
142 9         32 delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
143              
144             # Go through and clean the PATH out
145 26         417 my @path;
146             my @stat;
147             foreach my $dir (File::Spec->path()) {
148 17         57 next unless $dir;
149              
150 17         684 # untaint if at least 1 char and no NL (is the restriction intentional?)
151 78 50       390 local ($1);
152             $dir = untaint_var($1) if $dir =~ /^(.+)$/;
153             # then clean ( 'foo/./bar' -> 'foo/bar', etc. )
154 78         173 $dir = File::Spec->canonpath($dir);
155 78 50       869  
156             if (!File::Spec->file_name_is_absolute($dir)) {
157 96         310 dbg("util: PATH included '$dir', which is not absolute, dropping");
158             next;
159 96 100       1681 }
    50          
    50          
    50          
160 30         267 elsif (!(@stat=stat($dir))) {
161 30         118 dbg("util: PATH included '$dir', which is unusable, dropping: $!");
162             next;
163             }
164 27         720 elsif (!-d _) {
165 0         0 dbg("util: PATH included '$dir', which isn't a directory, dropping");
166             next;
167             }
168 0         0 elsif (($stat[2]&2) != 0) {
169 0         0 # World-Writable directories are considered insecure.
170             # We could be more paranoid and check all of the parent directories as well,
171             # but it's good for now.
172             dbg("util: PATH included '$dir', which is world writable, dropping");
173             next;
174             }
175 0         0  
176 0         0 dbg("util: PATH included '$dir', keeping");
177             push(@path, $dir);
178             }
179 66         411  
180 66         211 $ENV{'PATH'} = join($Config{'path_sep'}, @path);
181             dbg("util: final PATH set to: ".$ENV{'PATH'});
182             }
183 17         680 }
184 44         306  
185             # taint mode: are we running in taint mode? 1 for yes, 0 for no.
186             return $AM_TAINTED if defined $AM_TAINTED;
187              
188             if ($] >= 5.008) {
189             # perl 5.8 and above, ${^TAINT} is a syntax violation in 5.005
190 160 100   165 0 1084 $AM_TAINTED = eval q(no warnings q(syntax); ${^TAINT});
191             }
192 26 50       473 else {
193             # older versions
194 26     26   418 my $blank;
  26         74  
  26         1811  
  26         1825  
195             for my $d ((File::Spec->curdir, File::Spec->rootdir, File::Spec->tmpdir)) {
196             opendir(TAINT, $d) || next;
197             $blank = readdir(TAINT);
198 43         297 closedir(TAINT) or die "error closing directory $d: $!";
199 9         45 last;
200 9 0       1041 }
201 0         0 if (!(defined $blank && $blank)) {
202 0 0       0 # these are sometimes untainted, so this is less preferable than readdir
203 0         0 $blank = join('', values %ENV, $0, @ARGV);
204             }
205 0 0 0     0 $blank = substr($blank, 0, 0);
206             # seriously mind-bending perl
207 0         0 $AM_TAINTED = not eval { eval "1 || $blank" || 1 };
208             }
209 0         0 dbg("util: running in taint mode? %s", $AM_TAINTED ? "yes" : "no");
210             return $AM_TAINTED;
211 0 0       0 }
  0         0  
212              
213 17 50       143 ###########################################################################
214 17         52  
215             return RUNNING_ON_WINDOWS;
216             }
217              
218             ###########################################################################
219              
220 146     189 0 1569 # untaint a path to a file, e.g. "/home/jm/.spamassassin/foo",
221             # "C:\Program Files\SpamAssassin\tmp\foo", "/home/��t/etc".
222             #
223             # TODO: this does *not* handle locales well. We cannot use "use locale"
224             # and \w, since that will not detaint the data. So instead just allow the
225             # high-bit chars from ISO-8859-1, none of which have special metachar
226             # meanings (as far as I know).
227             #
228             my ($path) = @_;
229              
230             return unless defined($path);
231             return '' if ($path eq '');
232              
233             local ($1);
234 273     295 0 859 # Barry Jaspan: allow ~ and spaces, good for Windows.
235             # Also return '' if input is '', as it is a safe path.
236 273 50       1046 # Bug 7264: allow also parenthesis, e.g. "C:\Program Files (x86)"
237 295 50       1064 my $chars = '-_A-Za-z0-9.%=+,/:()\\@\\xA0-\\xFF\\\\';
238             my $re = qr{^\s*([$chars][${chars}~ ]*)\z}o;
239 313         1024  
240             if ($path =~ $re) {
241             $path = $1;
242             return untaint_var($path);
243 313         943 } else {
244 313         2384 warn "util: refusing to untaint suspicious path: \"$path\"\n";
245             return $path;
246 313 50       2969 }
247 313         821 }
248 313         1818  
249             my ($host) = @_;
250 49         427  
251 49         113 return unless defined($host);
252             return '' if ($host eq '');
253              
254             # from RFC 1035, but allowing domains starting with numbers:
255             # $label = q/[A-Za-z\d](?:[A-Za-z\d-]{0,61}[A-Za-z\d])?/;
256 49     77 0 144 # $domain = qq<$label(?:\.$label)*>;
257             # length($host) <= 255 && $host =~ /^($domain)$/
258 0 0       0 # expanded (no variables in the re) because of a tainting bug in Perl 5.8.0
259 0 0       0 if (length($host) <= 255 && $host =~ /^[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?(?:\.[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?)*$/i) {
260             return untaint_var($host);
261             }
262             else {
263             warn "util: cannot untaint hostname: \"$host\"\n";
264             return $host;
265             }
266 0 0 0     0 }
267 28         50  
268             # This sub takes a scalar or a reference to an array, hash, scalar or another
269             # reference and recursively untaints all its values (and keys if it's a
270 28         48 # reference to a hash). It should be used with caution as blindly untainting
271 28         51 # values subverts the purpose of working in taint mode. It will return the
272             # untainted value if requested but to avoid unnecessary copying, the return
273             # value should be ignored when working on lists.
274             # Bad:
275             # %ENV = untaint_var(\%ENV);
276             # Better:
277             # untaint_var(\%ENV);
278             #
279             # my $arg = $_[0]; # avoid copying unnecessarily
280             if (!ref $_[0]) { # optimized by-far-the-most-common case
281             # Bug 7591 not using this faster untaint. https://bz.apache.org/SpamAssassin/show_bug.cgi?id=7591
282             #return defined $_[0] ? scalar each %{ { $_[0] => undef } } : undef; ## no critic (ProhibitExplicitReturnUndef) - See Bug 7120 - fast untaint (hash keys cannot be tainted)
283             no re 'taint'; # override a "use re 'taint'" from outer scope
284             return undef if !defined $_[0]; ## no critic (ProhibitExplicitReturnUndef) - See Bug 7120
285             local($1); # avoid Perl taint bug: tainted global $1 propagates taintedness
286             $_[0] =~ /^(.*)\z/s;
287             return $1;
288 10996 100   10968 0 45544  
289             } else {
290             my $r = ref $_[0];
291 44     44   335 if ($r eq 'ARRAY') {
  44         101  
  44         393844  
292 10907 50       21360 my $arg = $_[0];
293 10879         19776 $_ = untaint_var($_) for @{$arg};
294 13223         29254 return @{$arg} if wantarray;
295 13215         40711 }
296             elsif ($r eq 'HASH') {
297             my $arg = $_[0];
298 2425         5329 if ($arg == \%ENV) { # purge undefs from %ENV, untaint the rest
299 2425 100 33     6062 while (my($k, $v) = each %{$arg}) {
    100          
    50          
300 2342         7756 # It is safe to delete the item most recently returned by each()
301 14         26 if (!defined $v) { delete ${$arg}{$k}; next }
  14         62  
302 14 50       36 ${$arg}{untaint_var($k)} = untaint_var($v);
  8         9  
303             }
304             } else {
305 9         36 # hash keys are never tainted,
306 9 50       26 # although old version of perl had some quirks there
307 1         5 while (my($k, $v) = each %{$arg}) {
  35         127  
308             ${$arg}{untaint_var($k)} = untaint_var($v);
309 34 50       66 }
  0         0  
  0         0  
  0         0  
310 34         55 }
  34         93  
311             return %{$arg} if wantarray;
312             }
313             elsif ($r eq 'SCALAR' || $r eq 'REF') {
314             my $arg = $_[0];
315 0         0 ${$arg} = untaint_var(${$arg});
  0         0  
316 0         0 }
  0         0  
317             else {
318             warn "util: can't untaint a $r !\n";
319 1 50       6 }
  0         0  
320             }
321             return $_[0];
322 82         303 }
323 82         152  
  82         201  
  82         214  
324             ###########################################################################
325              
326 0         0 my ($v) = @_;
327             return $v unless defined $v; # can't taint "undef"
328              
329 89         240 # $^X is apparently "always tainted".
330             # Concatenating an empty tainted string taints the result.
331             # Bug 7806: use $fh trick to enforce for older Perl
332             my $t = eval { local $/; open my $fh, '<', \""; <$fh>; };
333             $t = '' unless defined $t;
334             return $v . $t . substr($^X, 0, 0);
335 468     2812 0 721 }
336 468 50       796  
337             ###########################################################################
338              
339             # Check for full hostname / FQDN / DNS name validity. IP addresses must be
340             # validated with other functions like $IP_ADDRESS. Does not check for valid
341 476     5   506 # TLD, use $self->{main}->{registryboundaries}->is_domain_valid()
  1184         1886  
  1184         4394  
  1184         7912  
  5         41  
  5         9  
  5         71  
342 1184 50       2468 # additionally for that.
343 1184         5490 my ($host) = @_;
344             return if !defined $host;
345              
346             # remove trailing dots
347             $host =~ s/\.+\z//;
348              
349             # max total length 253
350             return if length($host) > 253;
351              
352             # validate dot separated components/labels
353 912     912 0 10553 my @labels = split(/\./, lc $host);
354 912 50       1986 my $cnt = scalar @labels;
355             return unless $cnt > 1; # at least two labels required
356             foreach my $label (@labels) {
357 912         3253 # length of 1-63
358             return if length($label) < 1;
359             return if length($label) > 63;
360 739 50       1199 # alphanumeric, - allowed only in middle part
361             # underscores are allowed in DNS queries, so we allow here
362             return if $label !~ /^[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?$/;
363 739         1316 # 1st-2nd level part can not contain _, only third+ can
364 739         1317 if ($cnt == 2 || $cnt == 1) {
365 739 100       1415 return if index($label, '_') != -1;
366 699         1532 }
367             $cnt--;
368 911 100       1469 }
369 904 50       1493  
370             # is good
371             return 1;
372 896 100       1814 }
373              
374 1692 100 100     2871 ###########################################################################
375 1608 100       2265  
376             # map process termination status number to an informative string, and
377 1686         4102 # append optional message (dual-valued errno or a string or a number),
378             # returning the resulting string
379             #
380             my($stat,$errno) = @_;
381 1463         3434 my $str;
382             if (!defined($stat)) {
383             $str = '(no status)';
384             } elsif (WIFEXITED($stat)) {
385             $str = sprintf("exit %d", WEXITSTATUS($stat));
386             } elsif (WIFSTOPPED($stat)) {
387             $str = sprintf("stopped, signal %d", WSTOPSIG($stat));
388             } else {
389             my $sig = WTERMSIG($stat);
390             $str = sprintf("%s, signal %d (%04x)",
391 1008     543 0 1833 $sig == 1 ? 'HANGUP' : $sig == 2 ? 'interrupted' :
392 1301         1692 $sig == 6 ? 'ABORTED' : $sig == 9 ? 'KILLED' :
393 490 0       1347 $sig == 15 ? 'TERMINATED' : 'DIED',
    0          
    0          
394 0         0 $sig, $stat);
395             }
396 0         0 if (defined $errno) { # deal with dual-valued and plain variables
397             $str .= ', '.$errno if (0+$errno) != 0 || ($errno ne '' && $errno ne '0');
398 0         0 }
399             return $str;
400 0         0 }
401 0 0       0  
    0          
    0          
    0          
    0          
402             ###########################################################################
403              
404             # check errno to be 0 and a process exit status to be in the list of success
405             # status codes, returning true if both are ok, and false otherwise
406             #
407 0 0       0 my($exit_status,$errno,@success) = @_;
408 0 0 0     0 my $ok = 0;
      0        
409             if ((!defined $errno || $errno == 0) && WIFEXITED($exit_status)) {
410 0         0 my $j = WEXITSTATUS($exit_status);
411             if (!@success) { $ok = $j==0 } # empty list implies only status 0 is good
412             elsif (grep {$_ == $j} @success) { $ok = 1 }
413             }
414             return $ok;
415             }
416              
417             ###########################################################################
418              
419 0     0 0 0 # timezone mappings: in case of conflicts, use RFC 2822, then most
420 0         0 # common and least conflicting mapping
421 0 0 0     0 my %TZ = (
      0        
422 0         0 # standard
423 0 0       0 'UT' => '+0000',
  0 0       0  
424 0         0 'UTC' => '+0000',
  0         0  
425             # US and Canada
426 0         0 'NDT' => '-0230',
427             'AST' => '-0400',
428             'ADT' => '-0300',
429             'NST' => '-0330',
430             'EST' => '-0500',
431             'EDT' => '-0400',
432             'CST' => '-0600',
433             'CDT' => '-0500',
434             'MST' => '-0700',
435             'MDT' => '-0600',
436             'PST' => '-0800',
437             'PDT' => '-0700',
438             'HST' => '-1000',
439             'AKST' => '-0900',
440             'AKDT' => '-0800',
441             'HADT' => '-0900',
442             'HAST' => '-1000',
443             # Europe
444             'GMT' => '+0000',
445             'BST' => '+0100',
446             'IST' => '+0100',
447             'WET' => '+0000',
448             'WEST' => '+0100',
449             'CET' => '+0100',
450             'CEST' => '+0200',
451             'EET' => '+0200',
452             'EEST' => '+0300',
453             'MSK' => '+0300',
454             'MSD' => '+0400',
455             'MET' => '+0100',
456             'MEZ' => '+0100',
457             'MEST' => '+0200',
458             'MESZ' => '+0200',
459             # South America
460             'BRST' => '-0200',
461             'BRT' => '-0300',
462             # Australia
463             'AEST' => '+1000',
464             'AEDT' => '+1100',
465             'ACST' => '+0930',
466             'ACDT' => '+1030',
467             'AWST' => '+0800',
468             # New Zealand
469             'NZST' => '+1200',
470             'NZDT' => '+1300',
471             # Asia
472             'JST' => '+0900',
473             'KST' => '+0900',
474             'HKT' => '+0800',
475             'SGT' => '+0800',
476             'PHT' => '+0800',
477             # Middle East
478             'IDT' => '+0300',
479             );
480              
481             # month mappings
482             my %MONTH = (jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6,
483             jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12);
484              
485             my $LOCALTZ;
486              
487             return $LOCALTZ if defined($LOCALTZ);
488              
489             # standard method for determining local timezone
490             my $time = time;
491             my @g = gmtime($time);
492             my @t = localtime($time);
493             my $z = $t[1]-$g[1]+($t[2]-$g[2])*60+($t[7]-$g[7])*1440+($t[5]-$g[5])*525600;
494             $LOCALTZ = sprintf("%+.2d%.2d", $z/60, $z%60);
495             return $LOCALTZ;
496             }
497              
498             my ($date) = @_;
499             local ($_); local ($1,$2,$3,$4);
500 2 50   16 0 9 my ($yyyy, $mmm, $dd, $hh, $mm, $ss, $mon, $tzoff);
501              
502             # make it a bit easier to match
503 2         5 $_ = " $date "; s/, */ /gs; s/\s+/ /gs;
504 2         11  
505 2         34 # now match it in parts. Date part first:
506 16         5190 if (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{4}) / /i) {
507 16         29 $dd = $1; $mon = lc($2); $yyyy = $3;
508 16         67 } elsif (s/ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) +(\d+) \d+:\d+:\d+ (\d{4}) / /i) {
509             $dd = $2; $mon = lc($1); $yyyy = $3;
510             } elsif (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{2,3}) / /i) {
511             $dd = $1; $mon = lc($2); $yyyy = $3;
512 26     12 0 56 } else {
513 26         62 dbg("util: time cannot be parsed: $date");
  26         155  
514 26         120 return;
515             }
516              
517 26         152 # handle two and three digit dates as specified by RFC 2822
  26         125  
  26         219  
518             if (defined $yyyy) {
519             if (length($yyyy) == 2 && $yyyy < 50) {
520 26 50       189 $yyyy += 2000;
    0          
    0          
521 12         57 }
  12         74  
  12         51  
522             elsif (length($yyyy) != 4) {
523 0         0 # three digit years and two digit years with values between 50 and 99
  0         0  
  0         0  
524             $yyyy += 1900;
525 0         0 }
  0         0  
  14         29  
526             }
527 14         41  
528 0         0 # hh:mm:ss
529             if (s/ (\d?\d):(\d\d)(:(\d\d))? / /) {
530             $hh = $1; $mm = $2; $ss = $4 || 0;
531             }
532 12 50       62  
533 26 50 33     151 # numeric timezones
    50          
534 14         21 if (s/ ([-+]\d{4}) / /) {
535             $tzoff = $1;
536             }
537             # common timezones
538 14         14 elsif (s/\b([A-Z]{2,4}(?:-DST)?)\b/ / && exists $TZ{$1}) {
539             $tzoff = $TZ{$1};
540             }
541             # all other timezones are considered equivalent to "-0000"
542             $tzoff ||= '-0000';
543 26 50       162  
544 26   50     89 # months
  26         61  
  12         98  
545             if (exists $MONTH{$mon}) {
546             $mmm = $MONTH{$mon};
547             }
548 26 50 0     154  
    0          
549 26         65 $hh ||= 0; $mm ||= 0; $ss ||= 0; $dd ||= 0; $mmm ||= 0; $yyyy ||= 0;
550              
551             # Fudge invalid times so that we get a usable date.
552             if ($ss > 59) { # rfc2822 does recognize leap seconds, not handled here
553 14         18 dbg("util: second after supported range, forcing second to 59: $date");
554             $ss = 59;
555             }
556 26   50     94  
557             if ($mm > 59) {
558             dbg("util: minute after supported range, forcing minute to 59: $date");
559 26 50       102 $mm = 59;
560 26         63 }
561              
562             if ($hh > 23) {
563 26   50     55 dbg("util: hour after supported range, forcing hour to 23: $date");
  26   50     60  
  26   50     60  
  26   50     73  
  13   50     41  
  13   50     38  
564             $hh = 23;
565             }
566 26 100       78  
567 1         6 my $max_dd = 31;
568 1         1 if ($mmm == 4 || $mmm == 6 || $mmm == 9 || $mmm == 11) {
569             $max_dd = 30;
570             }
571 26 100       85 elsif ($mmm == 2) {
572 1         4 $max_dd = (!($yyyy % 4) && (($yyyy % 100) || !($yyyy % 400))) ? 29 : 28;
573 1         1 }
574             if ($dd > $max_dd) {
575             dbg("util: day is too high, incrementing date to next valid date: $date");
576 26 100       53 $dd = 1;
577 14         66 $mmm++;
578 6         8 if ($mmm > 12) {
579             $mmm = 1;
580             $yyyy++;
581 14         207 }
582 26 100 100     215 }
    100 100        
      66        
583 8         29  
584             # Time::Local (v1.10 at least, also 1.17) throws warnings when dates cause
585             # a signed 32-bit integer overflow. So force a min/max for year.
586 11 50 33     63 if ($yyyy > 2037) {
587             dbg("util: year after supported range, forcing year to 2037: $date");
588 17 100       55 $yyyy = 2037;
589 5         10 }
590 1         2 elsif ($yyyy < 1970) {
591 1         3 dbg("util: year before supported range, forcing year to 1970: $date");
592 14 100       26 $yyyy = 1970;
593 0         0 }
594 0         0  
595             my $time;
596             eval { # could croak
597             $time = timegm($ss, $mm, $hh, $dd, $mmm-1, $yyyy);
598             1;
599             } or do {
600 12 50       116 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
    50          
601 0         0 dbg("util: time cannot be parsed: $date, $yyyy-$mmm-$dd $hh:$mm:$ss, $eval_stat");
602 14         15 return;
603             };
604              
605 14         15 if ($tzoff =~ /([-+])(\d\d)(\d\d)$/) # convert to seconds difference
606 14         36 {
607             $tzoff = (($2 * 60) + $3) * 60;
608             if ($1 eq '-') {
609 26         441 $time += $tzoff;
610             } elsif ($time < $tzoff) { # careful with year 1970 and '+' time zones
611 12         122 $time = 0;
612 12         1090 } else {
613 12 50       30 $time -= $tzoff;
614 0 0       0 }
  14         48  
615 14         35 }
616 14         30  
617             return $time;
618             }
619 24 50       105  
620             my($time) = @_;
621 12         61  
622 14 100       157 my @days = qw/Sun Mon Tue Wed Thu Fri Sat/;
    50          
623 15         52 my @months = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/;
624             my @localtime = localtime($time || time);
625 0         0 $localtime[5]+=1900;
626              
627 11         36 sprintf("%s, %02d %s %4d %02d:%02d:%02d %s", $days[$localtime[6]], $localtime[3],
628             $months[$localtime[4]], @localtime[5,2,1,0], local_tz());
629             }
630              
631 12         110 ###########################################################################
632              
633             # This used to be a wrapper for Text::Wrap. Now we do basically the same
634             # function as Text::Wrap::wrap(). See bug 5056 and 2165 for more information
635 2     2 0 14 # about why things aren't using that function anymore.
636             #
637 2         10 # It accepts values for almost all options which can be set
638 2         7 # in Text::Wrap. All parameters are optional (leaving away the first one
639 2   33     83 # probably doesn't make too much sense though), either a missing or a false
640 2         8 # value will fall back to the default.
641             #
642 2         21 # The parameters are:
643             # 1st: The string to wrap. Only one string is allowed.
644             # (default: "")
645             # 2nd: The prefix to be put in front of all lines except the first one.
646             # (default: "")
647             # 3rd: The prefix for the first line. (default: "")
648             # 4th: The number of columns available (no line will be longer than this
649             # unless overflow is set below). (default: 77)
650             # 5th: Enable or disable overflow mode. (default: 0)
651             # 6th: The sequence/expression to wrap at. (default: '\s');
652             # 7th: The string to join the lines again. (default: "\n")
653              
654             my $string = shift || '';
655             my $prefix = shift || '';
656             my $first = shift || '';
657             my $length = shift || 77;
658             my $overflow = shift || 0;
659             my $break = shift || qr/\s/;
660             my $sep = "\n";
661              
662             # go ahead and break apart the string, keeping the break chars
663             my @arr = split(/($break)/, $string);
664              
665             # tack the first prefix line at the start
666             splice @arr, 0, 0, $first if $first;
667              
668             # go ahead and make up the lines in the array
669             my $pos = 0;
670 261   50 261 0 2858 my $pos_mod = 0;
671 261   50     517 while ($#arr > $pos) {
672 261   100     640 my $tmpline = $arr[$pos] ;
673 365   50     771 $tmpline =~ s/\t/ /g;
674 365   100     796 my $len = length ($tmpline);
675 365   66     1009 # if we don't want to have lines > $length (overflow==0), we
676 365         542 # need to verify what will happen with the next line. if we don't
677             # care if a single line goes longer, don't care about the next
678             # line.
679 365         5179 # we also want this to be true for the first entry on the line
680             if ($pos_mod != 0 && $overflow == 0) {
681             my $tmpnext = $arr[$pos+1] ;
682 365 100       1084 $tmpnext =~ s/\t/ /g;
683             $len += length ($tmpnext);
684             }
685 365         471  
686 365         1026 if ($len <= $length) {
687 365         885 # if the length determined above is within bounds, go ahead and
688 3202         4726 # merge the next line with the current one
689 3202         5118 $arr[$pos] .= splice @arr, $pos+1, 1;
690 3202         4505 $pos_mod = 1;
691             }
692             else {
693             # ok, the current line is the right length, but there's more text!
694             # prep the current line and then go onto the next one
695              
696 3838 100 100     8051 # strip any trailing whitespace from the next line that's ready
697 3370         4725 $arr[$pos] =~ s/\s+$//;
698 3370         4250  
699 3370         5770 # go to the next line and reset pos_mod
700             $pos++;
701             $pos_mod = 0;
702 3722 100       5527  
703             # put the appropriate prefix at the front of the line
704             splice @arr, $pos, 0, $prefix;
705 3612         5998 }
706 3612         8026 }
707              
708             # go ahead and return the wrapped text, with the separator in between
709             return join($sep, @arr);
710             }
711              
712             ###########################################################################
713 850         1710  
714             # Some base64 decoders will remove intermediate "=" characters, others
715             # will stop decoding on the first "=" character, this one translates "="
716 838         1450 # characters to null.
717 838         1861 local $_ = shift;
718             my $decoded_length = shift;
719              
720 122         423 s/\s+//g;
721             if (HAS_MIME_BASE64 && (length($_) % 4 == 0) &&
722             m|^(?:[A-Za-z0-9+/=]{2,}={0,2})$|s)
723             {
724             # only use MIME::Base64 when the XS and Perl are both correct and quiet
725 273         1374 local $1;
726             s/(=+)(?!=*$)/'A' x length($1)/ge;
727              
728             # If only a certain number of bytes are requested, truncate the encoded
729             # version down to the appropriate size and return the requested bytes
730             if (defined $decoded_length) {
731             $_ = substr $_, 0, 4 * (int($decoded_length/3) + 1);
732             my $decoded = MIME::Base64::decode_base64($_);
733             return substr $decoded, 0, $decoded_length;
734 17     109 0 110 }
735 17         48  
736             # otherwise, just decode the whole thing and return it
737 109         1138 return MIME::Base64::decode_base64($_);
738 7 50 33     232 }
739             tr{A-Za-z0-9+/=}{}cd; # remove non-base64 characters
740             s/=+$//; # remove terminating padding
741             tr{A-Za-z0-9+/=}{ -_}; # translate to uuencode
742 7         25 s/.$// if (length($_) % 4 == 1); # unpack cannot cope with extra byte
743 7         18  
  2         23  
744             my $length;
745             my $out = '';
746             while ($_) {
747 7 50       22 $length = (length >= 84) ? 84 : length;
748 2         5 $out .= unpack("u", chr(32 + $length * 3/4) . substr($_, 0, $length, ''));
749 0         0 last if (defined $decoded_length && length $out >= $decoded_length);
750 2         5 }
751              
752             # If only a certain number of bytes are requested, truncate the encoded
753             # version down to the appropriate size and return the requested bytes
754 5         338 if (defined $decoded_length) {
755             return substr $out, 0, $decoded_length;
756 0         0 }
757 0         0  
758 2         14 return $out;
759 0 0       0 }
760              
761 0         0 my $str = $_[0];
762 0         0  
763 0         0 # RFC 2045: when decoding a Quoted-Printable body, any trailing
764 0 0       0 # white space on a line must be deleted
765 0         0 $str =~ s/[ \t]+(?=\r?\n)//gs;
766 0 0 0     0  
767             $str =~ s/=\r?\n//gs; # soft line breaks
768              
769             # RFC 2045 explicitly prohibits lowercase characters a-f in QP encoding
770             # do we really want to allow them???
771 0 0       0 local $1;
772 0         0 $str =~ s/=([0-9a-fA-F]{2})/chr(hex($1))/ge;
773              
774             return $str;
775 0         0 }
776              
777             local $_ = shift;
778              
779 14     16 0 68 if (HAS_MIME_BASE64) {
780             return MIME::Base64::encode_base64($_);
781             }
782              
783 14         401 $_ = pack("u57", $_);
784             s/^.//mg;
785 14         109 tr| -_`|A-Za-z0-9+/A|; # -> #`# <- kluge against vim syntax issues
786             s/(A+)$/'=' x length $1/e;
787             return $_;
788             }
789 14         34  
790 14         74 ###########################################################################
  111         350  
791              
792 14         99 if (defined &Mail::SpamAssassin::Util::_getpwuid_wrapper) {
793             return Mail::SpamAssassin::Util::_getpwuid_wrapper(@_);
794             }
795              
796 0     0 0 0 my $sts;
797             if (!RUNNING_ON_WINDOWS) {
798 0         0 $sts = eval ' sub _getpwuid_wrapper { getpwuid($_[0]); }; 1 ';
799 0         0 } else {
800             dbg("util: defining getpwuid() wrapper using 'unknown' as username");
801             $sts = eval ' sub _getpwuid_wrapper { _fake_getpwuid($_[0]); }; 1 ';
802 0         0 }
803 0         0 if (!$sts) {
804 0         0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
805 0         0 warn "util: failed to define getpwuid() wrapper: $eval_stat\n";
  0         0  
806 0         0 } else {
807             return Mail::SpamAssassin::Util::_getpwuid_wrapper(@_);
808             }
809             }
810              
811             return (
812 67 100   67 0 252 'unknown', # name,
813 50         1200 'x', # passwd,
814             $_[0], # uid,
815             0, # gid,
816 17         37 '', # quota,
817 46 50       202 '', # comment,
818 37     96   1836 '', # gcos,
  67         18363  
819             '/', # dir,
820 9         20 '', # shell,
821 9         61 '', # expire
822             );
823 26 50       868 }
824 0 0       0  
  0         0  
825 9         60 ###########################################################################
826              
827 17         229 # Given a string, extract an IPv4 address from it. Required, since
828             # we currently have no way to portably unmarshal an IPv4 address from
829             # an IPv6 one without kludging elsewhere.
830             #
831             my ($str) = @_;
832              
833 0     29   0 return unless defined($str);
834              
835             if ($str =~ /\b(
836             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
837             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
838             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
839             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)
840             )\b/ix)
841             {
842             if (defined $1) { return $1; }
843             }
844              
845             # ignore native IPv6 addresses;
846             # TODO, eventually, once IPv6 spam starts to appear ;)
847             return;
848             }
849              
850             ###########################################################################
851              
852             {
853 9     0 0 131 my($hostname, $fq_hostname);
854              
855 29 0       8303 # get the current host's unqalified domain name (better: return whatever
856             # Sys::Hostname thinks our hostname is, might also be a full qualified one)
857 0 0       0 return $hostname if defined($hostname);
858              
859             # Sys::Hostname isn't taint safe and might fall back to `hostname`. So we've
860             # got to clean PATH before we may call it.
861             clean_path_in_taint_mode();
862             $hostname = Sys::Hostname::hostname();
863             $hostname =~ s/[()]//gs; # bug 5929
864 0 0       0 return $hostname;
  0         0  
865             }
866              
867             # get the current host's fully-qualified domain name, if possible. If
868             # not possible, return the unqualified hostname.
869 0         0 return $fq_hostname if defined($fq_hostname);
870              
871             $fq_hostname = hostname();
872             if ($fq_hostname !~ /\./) { # hostname doesn't contain a dot, so it can't be a FQDN
873             my @names = grep(/^\Q${fq_hostname}.\E/o, # grep only FQDNs
874             map { split } (gethostbyname($fq_hostname))[0 .. 1] # from all aliases
875             );
876             $fq_hostname = $names[0] if (@names); # take the first FQDN, if any
877             $fq_hostname =~ s/[()]//gs; # bug 5929
878             }
879              
880 8 50   8 0 43 return $fq_hostname;
881             }
882             }
883              
884 8         40 ###########################################################################
885 8         54  
886 9         175 my ($ipset1, $ipset2) = @_;
887 9         27 my ($b1, $b2);
888              
889             foreach my $ip1 (@{$ipset1}) {
890             foreach my $ip2 (@{$ipset2}) {
891             next unless defined $ip1;
892             next unless defined $ip2;
893 73 100   73 0 341 next unless ($ip1 =~ /^(\d+\.\d+\.)/); $b1 = $1;
894             next unless ($ip2 =~ /^(\d+\.\d+\.)/); $b2 = $1;
895 9         60 if ($b1 eq $b2) { return 1; }
896 9 50       59 }
897             }
898 13         1615  
  17         300  
899             return 0;
900 9 50       48 }
901 9         192  
902             my ($ipset1, $ipset2) = @_;
903             my ($b1, $b2);
904 10         61  
905             foreach my $ip1 (@{$ipset1}) {
906             foreach my $ip2 (@{$ipset2}) {
907             next unless defined $ip1;
908             next unless defined $ip2;
909             next unless ($ip1 =~ /^(\d+\.\d+\.\d+\.)/); $b1 = $1;
910             next unless ($ip2 =~ /^(\d+\.\d+\.\d+\.)/); $b2 = $1;
911 1     5 0 5 if ($b1 eq $b2) { return 1; }
912 1         3 }
913             }
914 1         6  
  0         0  
915 0         0 return 0;
  0         0  
916 0 0       0 }
917 0 0       0  
918 0 0       0 ###########################################################################
  0         0  
919 0 0       0  
  0         0  
920 0 0       0 # Given a quad-dotted IPv4 address or an IPv6 address, reverses the order
  0         0  
921             # of its bytes (IPv4) or nibbles (IPv6), joins them with dots, producing
922             # a string suitable for reverse DNS lookups. Returns undef in case of a
923             # syntactically invalid IP address.
924 0         0 #
925             my ($ip) = @_;
926              
927             my $revip;
928 0     0 0 0 local($1,$2,$3,$4);
929 0         0 if ($ip =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\z/) {
930             $revip = "$4.$3.$2.$1";
931 0         0 } elsif ($ip !~ /:/ || $ip !~ /^[0-9a-fA-F:.]{2,}\z/) { # triage
  0         0  
932 0         0 # obviously unrecognized syntax
  0         0  
933 0 0       0 } elsif (!NetAddr::IP->can('full6')) { # since NetAddr::IP 4.010
934 0 0       0 info("util: version of NetAddr::IP is too old, IPv6 not supported");
935 0 0       0 } else {
  0         0  
936 0 0       0 # looks like an IPv6 address, let NetAddr::IP check the details
  0         0  
937 0 0       0 my $ip_obj = NetAddr::IP->new6($ip);
  0         0  
938             if (defined $ip_obj) { # valid IPv6 address
939             # RFC 5782 section 2.4.
940             $revip = lc $ip_obj->network->full6; # string in a canonical form
941 0         0 $revip =~ s/://g;
942             $revip = join('.', reverse split(//,$revip));
943             }
944             }
945             return $revip;
946             }
947              
948             ###########################################################################
949              
950              
951             ###########################################################################
952 116     116 0 256  
953             # decodes a Net::DNS::Packet->question entry,
954 116         222 # returning a triple: class, type, label
955 116         373 #
956 116 100 33     650 my $q = $_[0];
    50          
    50          
957 108         526 my $qname = $q->qname;
958              
959             # Bug 6959, Net::DNS flags a domain name in a query section as utf8, while
960             # still keeping it "RFC 1035 zone file format"-encoded, silly and harmful
961 0         0 utf8::encode($qname) if utf8::is_utf8($qname); # since Perl 5.8.1
962              
963             local $1;
964 8         30 # Net::DNS provides a query in encoded RFC 1035 zone file format, decode it!
965 8 50       1969 $qname =~ s{ \\ ( [0-9]{3} | [^0-9] ) }
966             { length($1)==1 ? $1 : $1 <= 255 ? chr($1) : "\\$1" }xgse;
967 8         28 return ($q->qclass, $q->qtype, $qname);
968 8         253 }
969 8         79  
970             ###########################################################################
971              
972 116         506 # This routine is typically called by passing a
973             # get_header("content-type") which passes all content-type headers
974             # (array context). If there are multiple Content-type headers (invalid,
975             # but it happens), MUAs seem to take the last one and so that's what we
976             # should do here.
977 0     0 0 0 #
978             my $ct = $_[-1] || 'text/plain; charset=us-ascii';
979              
980             # This could be made a bit more rigid ...
981             # the actual ABNF, BTW (RFC 1521, section 7.2.1):
982             # boundary := 0*69<bchars> bcharsnospace
983             # bchars := bcharsnospace / " "
984             # bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /"_"
985 0     152 0 0 # / "," / "-" / "." / "/" / ":" / "=" / "?"
986 0         0 #
987             # The boundary may be surrounded by double quotes.
988             # "the boundary parameter, which consists of 1 to 70 characters from
989             # a set of characters known to be very robust through email gateways,
990 152 50       995 # and NOT ending with white space. (If a boundary appears to end with
991             # white space, the white space must be presumed to have been added by
992 42         71 # a gateway, and must be deleted.)"
993             #
994 42 0       94 # In practice:
  42 0       1898  
995 42         102 # - MUAs accept whitespace before and after the "=" character
996             # - only an opening double quote seems to be needed
997             # - non-quoted boundaries should be followed by space, ";", or end of line
998             # - blank boundaries seem to not work
999             #
1000             my($boundary) = $ct =~ m!\bboundary\s*=\s*("[^"]+|[^\s";]+(?=[\s;]|$))!i;
1001              
1002             # remove double-quotes in boundary (should only be at start and end)
1003             #
1004             $boundary =~ tr/"//d if defined $boundary;
1005              
1006             # Parse out the charset and name, if they exist.
1007 331   100 331 0 1195 #
1008             my($charset) = $ct =~ /\bcharset\s*=\s*["']?(.*?)["']?(?:;|$)/i;
1009             my($name) = $ct =~ /\b(?:file)?name\s*=\s*["']?(.*?)["']?(?:;|$)/i;
1010              
1011             # RFC 2231 section 3: Parameter Value Continuations
1012             # support continuations for name values
1013             #
1014             if (!$name && $ct =~ /\b(?:file)?name\*0\s*=/i) {
1015              
1016             my @name;
1017             $name[$1] = $2
1018             while ($ct =~ /\b(?:file)?name\*(\d+)\s*=\s*["']?(.*?)["']?(?:;|$)/ig);
1019              
1020             $name = join "", grep defined, @name;
1021             }
1022              
1023             # Get the actual MIME type out ...
1024             # Note: the header content may not be whitespace unfolded, so make sure the
1025             # REs do /s when appropriate.
1026             # correct:
1027             # Content-type: text/plain; charset=us-ascii
1028             # missing a semi-colon, CT shouldn't have whitespace anyway:
1029 289         1157 # Content-type: text/plain charset=us-ascii
1030             #
1031             $ct =~ s/^\s+//; # strip leading whitespace
1032             $ct =~ s/;.*$//s; # strip everything after first ';'
1033 331 100       794 $ct =~ s@^([^/]+(?:/[^/\s]*)?).*$@$1@s; # only something/something ...
1034             $ct = lc $ct;
1035              
1036             # bug 4298: If at this point we don't have a content-type, assume text/plain;
1037 363         1735 # also, bug 5399: if the content-type *starts* with "text", and isn't in a
1038 363         1686 # list of known bad/non-plain formats, do likewise.
1039             if (!$ct ||
1040             ($ct =~ /^text\b/ && $ct !~ /^text\/(?:x-vcard|calendar|html)$/))
1041             {
1042             $ct = "text/plain";
1043 363 50 66     1801 }
1044              
1045 74         484 # strip inappropriate chars (bug 5399: after the text/plain fixup)
1046 74         373 $ct =~ tr/\000-\040\177-\377\042\050\051\054\072-\077\100\133-\135//d;
1047              
1048             # Now that the header has been parsed, return the requested information.
1049 74         522 # In scalar context, just the MIME type, in array context the
1050             # four important data parts (type, boundary, charset, and filename).
1051             #
1052             return wantarray ? ($ct,$boundary,$charset,$name) : $ct;
1053             }
1054              
1055             ###########################################################################
1056              
1057             my ($url) = @_;
1058             my (@characters) = split(/(\%[0-9a-fA-F]{2})/, $url);
1059             my (@unencoded);
1060 289         631 my (@encoded);
1061 289         1057  
1062 289         1462 foreach (@characters) {
1063 363         833 # escaped character set ...
1064             if (/\%[0-9a-fA-F]{2}/) {
1065             # IF it is in the range of 0x00-0x20 or 0x7f-0xff
1066             # or it is one of "<", ">", """, "#", "%",
1067             # ";", "/", "?", ":", "@", "=" or "&"
1068 363 100 100     2368 # THEN preserve its encoding
      66        
1069             unless (/(20|7f|[0189a-fA-F][0-9a-fA-F])/i) {
1070             s/\%([2-7][0-9a-fA-F])/sprintf "%c", hex($1)/e;
1071 278         768 push(@unencoded, $_);
1072             }
1073             }
1074             # other stuff
1075 363         869 else {
1076             # no re "strict"; # since perl 5.21.8
1077             # 0x00-0x20, 0x7f-0xff, ", %, <, >
1078             s/([\000-\040\177-\377\042\045\074\076])
1079             /push(@encoded, $1) && sprintf "%%%02x", unpack("C",$1)/egx;
1080             }
1081 363 50       2514 }
1082             if (wantarray) {
1083             return(join("", @characters), join("", @unencoded), join("", @encoded));
1084             }
1085             else {
1086             return join("", @characters);
1087 70     77 0 208 }
1088 77         221 }
1089 77         558  
1090             ###########################################################################
1091              
1092 9         22 =item $module = first_available_module (@module_list)
1093              
1094 105 100       210 Return the name of the first module that can be successfully loaded with
1095             C<require> from the list. Returns C<undef> if none are available.
1096              
1097             This is used instead of C<AnyDBM_File> as follows:
1098              
1099 55 50       120 my $module = Mail::SpamAssassin::Util::first_available_module
1100 55         120 (qw(DB_File GDBM_File NDBM_File SDBM_File));
  121         265  
1101 83         180 tie %hash, $module, $path, [... args];
1102              
1103             Note that C<SDBM_File> is guaranteed to be present, since it comes
1104             with Perl.
1105              
1106             =cut
1107              
1108 84         150 my (@packages) = @_;
1109 34 0       99 foreach my $mod (@packages) {
1110             next if $mod !~ /^[\w:]+$/; # be paranoid
1111             if (eval 'require '.$mod.'; 1;') {
1112 37 50       77 return $mod;
1113 38         58 }
1114             }
1115             undef;
1116 3         22 }
1117              
1118             ###########################################################################
1119              
1120             =item touch_file(file, { args });
1121              
1122             Touch or create a file.
1123              
1124             Possible args:
1125              
1126             create_exclusive => 1
1127             Create a new empty file safely, only if not existing before
1128              
1129             =cut
1130              
1131             my ($file, $args) = @_;
1132              
1133             $file = untaint_file_path($file);
1134             $args ||= {};
1135              
1136             return unless defined $file && $file ne '';
1137              
1138             if ($args->{create_exclusive}) {
1139 12     12 1 40 if (sysopen(my $fh, $file, O_CREAT|O_EXCL)) {
1140 6         34 close $fh;
1141 24 50       140 return 1;
1142 22 100       857 }
1143 10         37 return 1 if $! == EEXIST; # fine if it exists already
1144             dbg("util: exclusive touch_file failed: $file: $!");
1145             return 0;
1146 12         71 }
1147              
1148             if (!utime(undef,undef,$file)) {
1149             dbg("util: touch_file failed: $file: $!");
1150             return 0;
1151             }
1152              
1153             return 1;
1154             }
1155              
1156             ###########################################################################
1157              
1158             =item my ($filepath, $filehandle) = secure_tmpfile();
1159              
1160             Generates a filename for a temporary file, opens it exclusively and
1161             securely, and returns a filehandle to the open file (opened O_RDWR).
1162              
1163 12     4 1 576 If it cannot open a file after 20 tries, it returns C<undef>.
1164              
1165 4         19 =cut
1166 0   0     0  
1167             # thanks to http://www2.picante.com:81/~gtaylor/autobuse/ for this code
1168 0 0 0     0 my $tmpenv = am_running_on_windows() ? 'TMP' : 'TMPDIR';
1169             my $tmpdir = untaint_file_path($ENV{$tmpenv} || File::Spec->tmpdir());
1170 0 0       0  
1171 0 0       0 defined $tmpdir && $tmpdir ne ''
1172 0         0 or die "util: cannot find a temporary directory, set TMP or TMPDIR in environment";
1173 0         0  
1174             opendir(my $dh, $tmpdir) or die "Could not open directory $tmpdir: $!";
1175 0 0       0 closedir $dh or die "Error closing directory $tmpdir: $!";
1176 0         0  
1177 0         0 my ($reportfile, $tmpfh);
1178             for (my $retries = 20; $retries > 0; $retries--) {
1179             # we do not rely on the obscurity of this name for security,
1180 0 0       0 # we use a average-quality PRG since this is all we need
1181 0         0 my $suffix = join('', (0..9,'A'..'Z','a'..'z')[rand 62, rand 62, rand 62,
1182 0         0 rand 62, rand 62, rand 62]);
1183             $reportfile = File::Spec->catfile($tmpdir,".spamassassin${$}${suffix}tmp");
1184              
1185 0         0 # instead, we require O_EXCL|O_CREAT to guarantee us proper
1186             # ownership of our file, read the open(2) man page
1187             if (sysopen($tmpfh, $reportfile, O_RDWR|O_CREAT|O_EXCL, 0600)) {
1188             binmode $tmpfh or die "cannot set $reportfile to binmode: $!";
1189             last;
1190             }
1191             my $errno = $!;
1192              
1193             # ensure the file handle is not semi-open in some way
1194             if ($tmpfh) {
1195             if (! close $tmpfh) {
1196             info("error closing $reportfile: $!");
1197             undef $tmpfh;
1198             }
1199             }
1200              
1201 61 50   61 1 442 # it is acceptable if $tmpfh already exists, try another
1202 61   33     3154 next if $errno == EEXIST;
1203              
1204 61 50 33     387 # error, maybe "out of quota", "too many open files", "Permission denied"
1205             # (bug 4017); makes no sense retrying
1206             die "util: failed to create a temporary file '$reportfile': $errno";
1207 65 50       4015 }
1208 65 50       1044  
1209             if (!$tmpfh) {
1210 65         191 warn "util: secure_tmpfile failed to create a temporary file, giving up";
1211 65         403 return;
1212             }
1213              
1214 65         883 dbg("util: secure_tmpfile created a temporary file %s", $reportfile);
1215             return ($reportfile, $tmpfh);
1216 65         2003 }
1217              
1218             =item my ($dirpath) = secure_tmpdir();
1219              
1220 65 50       6418 Generates a directory for temporary files. Creates it securely and
1221 65 50       415 returns the path to the directory.
1222 65         232  
1223             If it cannot create a directory after 20 tries, it returns C<undef>.
1224 4         462  
1225             =cut
1226              
1227 4 0       23 # stolen from secure_tmpfile()
1228 4 0       11 my $tmpdir = untaint_file_path(File::Spec->tmpdir());
1229 0         0  
1230 0         0 if (!$tmpdir) {
1231             # Note: we would prefer to keep this fatal, as not being able to
1232             # find a writable tmpdir is a big deal for the calling code too.
1233             # That would be quite a psychotic case, also.
1234             warn "util: cannot find a temporary directory, set TMP or TMPDIR in environment";
1235 0 0       0 return;
1236             }
1237              
1238             my ($reportpath, $tmppath);
1239 0         0 my $umask = umask 077;
1240              
1241             for (my $retries = 20; $retries > 0; $retries--) {
1242 61 50       244 # we do not rely on the obscurity of this name for security,
1243 0         0 # we use a average-quality PRG since this is all we need
1244 0         0 my $suffix = join('', (0..9,'A'..'Z','a'..'z')[rand 62, rand 62, rand 62,
1245             rand 62, rand 62, rand 62]);
1246             $reportpath = File::Spec->catfile($tmpdir,".spamassassin${$}${suffix}tmp");
1247 65         679  
1248 61         547 # instead, we require O_EXCL|O_CREAT to guarantee us proper
1249             # ownership of our file, read the open(2) man page
1250             if (mkdir $reportpath, 0700) {
1251             $tmppath = $reportpath;
1252             last;
1253             }
1254              
1255             if ($!{EEXIST}) {
1256             # it is acceptable if $reportpath already exists, try another
1257             next;
1258             }
1259            
1260             # error, maybe "out of quota" or "too many open files" (bug 4017)
1261             warn "util: secure_tmpdir failed to create file '$reportpath': $!\n";
1262 0     4 1 0 }
1263              
1264 4 0       19 umask $umask;
1265              
1266             warn "util: secure_tmpdir failed to create a directory, giving up" if (!$tmppath);
1267              
1268 4         24 return $tmppath;
1269 0         0 }
1270              
1271              
1272 0         0 ###########################################################################
1273 0         0  
1274             ##
1275 0         0 ## DEPRECATED FUNCTION, sub uri_to_domain removed.
1276             ## Replaced with Mail::SpamAssassin::RegistryBoundaries::uri_to_domain.
1277             ##
1278 0         0  
1279             ###########################################################################
1280 0         0  
1281             *uri_list_canonify = \&uri_list_canonicalize; # compatibility alias
1282             my($redirector_patterns, @uris) = @_;
1283              
1284 0 0       0 # make sure we catch bad encoding tricks
1285 0         0 my @nuris;
1286 0         0 for my $uri (@uris) {
1287             # sometimes we catch URLs on multiple lines
1288             $uri =~ s/\n//g;
1289 0 0       0  
1290             # URLs won't have leading/trailing whitespace
1291 0         0 $uri =~ s/^\s+//;
1292             $uri =~ s/\s+$//;
1293              
1294             # CRs just confuse things down below, so trash them now
1295 0         0 $uri =~ s/\r//g;
1296              
1297             # Skip some common non-http stuff like #abcdef, ?foobar,
1298 0         0 # /image.gif (but not //foo.com which actually does http)
1299             next if length($uri) <= 1 || $uri =~ m{^(?:[#?&]|/(?!/))};
1300 0 0       0  
1301             # Make a copy so we don't trash the original in the array
1302 0         0 my $nuri = $uri;
1303              
1304             # Handle emails differently
1305             if ($nuri =~ /^mailto:/i || $nuri =~ /^[^:]*\@/) {
1306             # Strip ?subject= parameters and obfuscations
1307             # Outlook linkifies foo@bar%2Ecom&x.com to foo@bar.com !!
1308             if ($nuri =~ /^([^\@]+\@[^?]+)\?/) {
1309             push @nuris, $1;
1310             }
1311             if ($nuri =~ /^([^\@]+\@[^?&]+)\&/) {
1312             push @nuris, $1
1313             }
1314             # Address must be trimmed of %20
1315             if ($nuri =~ tr/%20// &&
1316             $nuri =~ /^(?:mailto:)?(?:\%20)*([^\@]+\@[^?&%]+)/) {
1317 174     174 0 6314 push @nuris, "mailto:$1";
1318             }
1319             # mailto:"Foo%20Bar"%20<foo.bar@example.com>
1320 174         247 if ($nuri =~ /^[^?&]*<([^\@>]+\@[^>]+)>/) {
1321 174         326 push @nuris, "mailto:$1";
1322             }
1323 535         990 # End email processing
1324             next;
1325             }
1326 535         769  
1327 535         855 # bug 4390: certain MUAs treat back slashes as front slashes.
1328             # since backslashes are supposed to be encoded in a URI, swap non-encoded
1329             # ones with front slashes.
1330 537         785 $nuri =~ tr{\\}{/};
1331              
1332             # http:www.foo.biz -> http://www.foo.biz
1333             $nuri =~ s{^(https?:)/{0,2}}{$1//}i;
1334 537 50 33     1409  
1335             # *always* make a dup with all %-encoding decoded, since
1336             # important parts of the URL may be encoded (such as the
1337 537         811 # scheme). (bug 4213)
1338             if ($nuri =~ /%[0-9a-fA-F]{2}/) {
1339             $nuri = Mail::SpamAssassin::Util::url_encode($nuri);
1340 537 100 100     1080 }
1341              
1342             # www.foo.biz -> http://www.foo.biz
1343 374 50       1408 # unschemed URIs: assume default of "http://" as most MUAs do
1344 352         441 if ($nuri !~ /^[-_a-z0-9]+:/i) {
1345             if ($nuri =~ /^ftp\./) {
1346 374 50       1215 $nuri =~ s{^}{ftp://}g;
1347 19         59 }
1348             else {
1349             $nuri =~ s{^}{http://}g;
1350 22 100 100     102 }
1351             }
1352 24         68  
1353             # http://www.foo.biz?id=3 -> http://www.foo.biz/?id=3
1354             # http://www.foo.biz#id=3 -> http://www.foo.biz/#id=3
1355 22 50       64 $nuri =~ s{^(https?://[^/?#]+)([?#])}{$1/$2}i;
1356 19         76  
1357             # deal with encoding of chars, this is just the set of printable
1358             # chars minus ' ' (that is, dec 33-126, hex 21-7e)
1359 25         65 $nuri =~ s/\&\#0*(3[3-9]|[4-9]\d|1[01]\d|12[0-6]);/sprintf "%c",$1/ge;
1360             $nuri =~ s/\&\#x0*(2[1-9]|[3-6][a-fA-F0-9]|7[0-9a-eA-E]);/sprintf "%c",hex($1)/ge;
1361             # handle other unicode dots (U+002E U+3002 U+FF0E U+FF61) -> .
1362             $nuri =~ s/\&\#(?:x2e|12290|x3002|65294|xff0e|65377|xff61);/./gi;
1363              
1364             # put the new URI on the new list if it's different
1365 182         357 if ($nuri ne $uri) {
1366             push(@nuris, $nuri);
1367             }
1368 163         926  
1369             # deal with weird hostname parts, remove user/pass, etc.
1370             if ($nuri =~ m{^(https?://)([^\@/?#]*\@)?([^/?#:]+)((?::(\d*))?.*)$}i) {
1371             my($proto, $host, $rest) = ($1,$3,$4);
1372             my $auth = defined $2 ? $2 : '';
1373 182 100       439 my $port = defined $5 ? $5 : '';
1374 336         592  
1375             my $rest_noport;
1376             if ($port eq '') {
1377             $port = $proto eq 'http://' ? 80 : 443;
1378             } else {
1379 496 100       1704 $rest_noport = $rest;
1380 348 100       730 # Strip default ports from url and add to list
1381 6         19 if ($proto eq 'http://') {
1382             if ($rest_noport =~ s/^:80\b//) {
1383             push(@nuris, join('', $proto, $host, $rest_noport));
1384 348         782 }
1385             } elsif ($rest_noport =~ s/^:443\b//) {
1386             push(@nuris, join('', $proto, $host, $rest_noport));
1387             }
1388             }
1389              
1390 287         704 # Bug 6751:
1391             # RFC 3490 (IDNA): Whenever dots are used as label separators, the
1392             # following characters MUST be recognized as dots: U+002E (full stop),
1393             # U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
1394 165         265 # U+FF61 (halfwidth ideographic full stop).
  122         391  
1395 496         1262 # RFC 5895: [...] the IDEOGRAPHIC FULL STOP character (U+3002)
  333         453  
1396             # can be mapped to the FULL STOP before label separation occurs.
1397 163         209 # [...] Only the IDEOGRAPHIC FULL STOP character (U+3002) is added in
1398             # this mapping because the authors have not fully investigated [...]
1399             # Adding also 'SMALL FULL STOP' (U+FE52) as seen in the wild.
1400 496 100       704 # Parhaps also the 'ONE DOT LEADER' (U+2024).
1401 22         37 if ($host =~ s{(?: \xE3\x80\x82 | \xEF\xBC\x8E | \xEF\xBD\xA1 |
1402             \xEF\xB9\x92 | \xE2\x80\xA4 )}{.}xgs) {
1403             push(@nuris, join ('', $proto, $host, $rest));
1404             # Also add noport variant
1405 496 100       1085 push(@nuris, join('', $proto, $host, $rest_noport)) if $rest_noport;
1406 496         1135 }
1407 297 100       548  
1408 496 100       1643 # bug 4146: deal with non-US ASCII 7-bit chars in the host portion
1409             # of the URI according to RFC 1738 that's invalid, and the tested
1410 476         1174 # browsers (Firefox, IE) remove them before usage...
1411 476 100       905 #if ($host =~ tr/\000-\040\200-\377//d) {
1412 475 100       865 # Fixed 7/2019 to not strip extended chars, since they can be used in
1413             # IDN domains. Stripping control chars should be enough?
1414 314         341 if ($host =~ tr/\x00-\x20//d) {
1415             push(@nuris, join ('', $proto, $host, $rest));
1416 314 50       447 }
    0          
1417 291 100       514  
1418 23         32 # deal with http redirectors. strip off one level of redirector
1419             # and add back to the array. the foreach loop will go over those
1420             # and deal appropriately.
1421 23         37  
1422             # Bug 7278: try redirector pattern matching first
1423             # (but see also Bug 4176)
1424             my $found_redirector_match;
1425             foreach my $re (@{$redirector_patterns}) {
1426             if ("$proto$host$rest" =~ $re) {
1427             next unless defined $1 && index($1, '.') != -1;
1428             dbg("uri: parsed uri pattern: $re");
1429             dbg("uri: parsed uri found: $1 in redirector: $proto$host$rest");
1430             push (@uris, $1);
1431             $found_redirector_match = 1;
1432             last;
1433             }
1434             }
1435             if (!$found_redirector_match) {
1436 186 100       585 # try generic https? check if redirector pattern matching failed
1437             # bug 3308: redirectors like yahoo only need one '/' ... <grrr>
1438 19         50 if ($rest =~ m{(https?:/{0,2}[^&#]+)}i && index($1, '.') != -1) {
1439             push(@uris, $1);
1440 0 50       0 dbg("uri: parsed uri found: $1 in hard-coded redirector");
1441             }
1442             }
1443              
1444             ########################
1445             ## TVD: known issue, if host has multiple combinations of the following,
1446             ## all permutations will be put onto @nuris. shouldn't be an issue.
1447              
1448             # Get rid of cruft that could cause confusion for rules...
1449 476 50       1315  
1450 5         13 # remove "www.fakehostname.com@" username part
1451             if ($host =~ s/^[^\@]+\@//gs) {
1452             push(@nuris, join ('', $proto, $host, $rest));
1453             }
1454              
1455             # bug 3186: If in a sentence, we might pick up odd characters ...
1456             # ie: "visit http://example.biz." or "visit http://example.biz!!!"
1457             # the host portion should end in some form of alphanumeric, strip off
1458             # the rest.
1459 168         214 if ($host =~ s/[^0-9A-Za-z]+$//) {
1460 476         842 push(@nuris, join ('', $proto, $host, $rest));
  163         316  
1461 1443 100       4191 }
1462 316 50 33     344  
1463 316         591 ########################
1464 1302         4205  
1465 5         21 # deal with hosts which are IPs
1466 5         19 # also handle things like:
1467 5         16 # http://89.0x00000000000000000000068.0000000000000000000000160.0x00000000000011
1468             # both hex (0x) and oct (0+) encoded octets, etc.
1469              
1470 165 100       329 if ($host =~ /^
1471             ((?:0x[0-9a-f]+|\d+)\.)
1472             ((?:0x[0-9a-f]+|\d+)\.)
1473 162 100 66     409 ((?:0x[0-9a-f]+|\d+)\.)
1474 8         18 (0x[0-9a-f]+|\d+)
1475 319         602 $/ix)
1476             {
1477             my @chunk = ($1,$2,$3,$4);
1478             foreach my $octet (@chunk) {
1479             $octet =~ s/^0x([0-9a-f]+)/sprintf "%d",hex($1)/gei;
1480             $octet =~ s/^0+([1-3][0-7]{0,2}|[4-7][0-7]?)\b/sprintf "%d",oct($1)/ge;
1481             $octet =~ s/^0+//;
1482             }
1483             push(@nuris, join ('', $proto, @chunk, $rest));
1484             }
1485              
1486 474 50       905 # "http://0x7f000001/"
1487 0         0 elsif ($host =~ /^0x[0-9a-f]+$/i) {
1488             # only take last 4 octets
1489             $host =~ s/^0x[0-9a-f]*?([0-9a-f]{1,8})$/sprintf "%d",hex($1)/gei;
1490             push(@nuris, join ('', $proto, decode_ulong_to_ip($host), $rest));
1491             }
1492              
1493             # "http://1113343453/"
1494 163 100       462 elsif ($host =~ /^[0-9]+$/) {
1495 314         587 push(@nuris, join ('', $proto, decode_ulong_to_ip($host), $rest));
1496             }
1497              
1498             # http://foobar -> http://www.foobar.com as Firefox does (Bug 6596)
1499             # (do this here so we don't trip on those 0x123 IPs etc..)
1500             # https://hg.mozilla.org/mozilla-central/file/tip/docshell/base/nsDefaultURIFixup.cpp
1501             elsif ($proto eq 'http://' && $auth eq '' &&
1502             $host ne 'localhost' && $port eq '80' &&
1503             $host =~ /^(?:www\.)?([^.]+)$/) {
1504             push(@nuris, join('', $proto, 'www.', $1, '.com', $rest));
1505 163 100 100     1671 }
    100 66        
    100 100        
    100 100        
1506             }
1507             }
1508              
1509             # remove duplicates, merge nuris and uris
1510             my %uris = map { $_ => 1 } @uris, @nuris;
1511              
1512 343         992 return keys %uris;
1513 30         63 }
1514 433         3846  
  29         110  
1515 127         186 return join(".", unpack("CCCC",pack("H*", sprintf "%08lx", $_[0])));
  31         47  
1516 120         188 }
1517              
1518 58         217 ###########################################################################
1519              
1520             my (@strings) = @_;
1521              
1522             foreach my $string (@strings) {
1523             my $time = parse_rfc822_date($string);
1524 6         31 return $time if defined($time) && $time;
  34         77  
1525 13         64 }
1526             return;
1527             }
1528              
1529             my ($header) = @_;
1530 17         47  
1531             $header ||= '';
1532             $header =~ s/\n[ \t]+/ /gs; # fix continuation lines
1533              
1534             my @rcvd = ($header =~ /^Received:(.*)/img);
1535             my @local;
1536             my $time;
1537              
1538             if (@rcvd) {
1539 11         61 if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||
1540             $rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)
1541             {
1542             push @local, (shift @rcvd);
1543             }
1544             if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
1545 174         353 push @local, (shift @rcvd);
  280         774  
1546             }
1547 175         778 elsif (@local) {
1548             unshift @rcvd, (shift @local);
1549             }
1550             }
1551 373     373 0 937  
1552             if (@rcvd) {
1553             $time = first_date(shift @rcvd);
1554             return $time if defined($time);
1555             }
1556             if (@local) {
1557 534     13 0 1407 $time = first_date(@local);
1558             return $time if defined($time);
1559 360         1299 }
1560 13         122 if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {
1561 10 50 33     131 my $string = $1;
1562             $string .= " ".local_tz() unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
1563 0         0 $time = first_date($string);
1564             return $time if defined($time);
1565             }
1566             if (@rcvd) {
1567 10     10 0 41 $time = first_date(@rcvd);
1568             return $time if defined($time);
1569 10   50     44 }
1570 10         93 if ($header =~ /^Resent-Date:\s*(.+)$/im) {
1571             $time = first_date($1);
1572 10         132 return $time if defined($time);
1573 10         49 }
1574             if ($header =~ /^Date:\s*(.+)$/im) {
1575             $time = first_date($1);
1576 10 100       33 return $time if defined($time);
1577 8 50 33     202 }
1578              
1579             return time;
1580 0         0 }
1581              
1582 8 50 33     87 ###########################################################################
    50          
1583 0         0 my $suid = shift;
1584             dbg("util: get_user_groups: uid is $suid\n");
1585             my ( $user, $passwd, $uid, $gid, $quota, $comment, $gcos, $dir, $shell, $expire ) = getpwuid($suid);
1586 0         0 my $rgids="$gid ";
1587             while ( my($name,$pw,$gid,$members) = getgrent() ) {
1588             if ( $members =~ m/\b$user\b/ ) {
1589             $rgids .= "$gid ";
1590 10 100       35 dbg("util: get_user_groups: added $gid ($name) to group list which is now: $rgids\n");
1591 8         31 }
1592 8 50       100 }
1593             endgrent;
1594 2 50       9 chop $rgids;
1595 0         0 return ($rgids);
1596 0 0       0 }
1597              
1598 2 50       31  
1599 0         0  
1600 0 0       0 return if (RUNNING_ON_WINDOWS);
1601 0         0  
1602 0 0       0 # remember the target uid, the first number is the important one
1603             my $touid = $>;
1604 2 50       15 my $gids = get_user_groups($touid);
1605 0         0 my ( $pgid, $supgs ) = split (' ',$gids,2);
1606 0 0       0 defined $supgs or $supgs=$pgid;
1607             if ($( != $pgid) {
1608 2 50       13 # Gotta be root for any of this to work
1609 0         0 $> = 0 ;
1610 0 0       0 dbg("util: changing real primary gid from $( to $pgid and supplemental groups to $supgs to match effective uid $touid");
1611             POSIX::setgid($pgid);
1612 2 50       24 dbg("util: POSIX::setgid($pgid) set errno to $!");
1613 2         13 $! = 0;
1614 2 50       17 $( = $pgid;
1615             $) = "$pgid $supgs";
1616             dbg("util: assignment \$) = $pgid $supgs set errno to $!");
1617 0         0 }
1618             if ($< != $touid) {
1619             dbg("util: changing real uid from $< to match effective uid $touid");
1620             # bug 3586: kludges needed to work around platform dependent behavior assigning to $<
1621             # The POSIX functions deal with that so just use it here
1622 0     0 0 0 POSIX::setuid($touid);
1623 0         0 $< = $touid; $> = $touid; # bug 5574
1624 0         0  
1625 0         0 # Check that we have now accomplished the setuid: catch bug 3586 if it comes back
1626 0         0 if ($< != $touid) {
1627 0 0       0 # keep this fatal: it's a serious security problem if it fails
1628 0         0 die "util: setuid $< to $touid failed!";
1629 0         0 }
1630             }
1631             }
1632 0         0  
1633 0         0 # helper app command-line open
1634 0         0 if (RUNNING_ON_WINDOWS) {
1635             return helper_app_pipe_open_windows (@_);
1636             } else {
1637             return helper_app_pipe_open_unix (@_);
1638             }
1639             }
1640 0 0   0 0 0  
1641             my ($fh, $stdinfile, $duperr2out, @cmdline) = @_;
1642              
1643 0         0 # use a traditional open(FOO, "cmd |")
1644 0         0 my $cmd = join(' ', @cmdline);
1645 0         0 if ($stdinfile) { $cmd .= qq/ < "$stdinfile"/; }
1646 0 0       0 if ($duperr2out) { $cmd .= " 2>&1"; }
1647 0 0       0 return open ($fh, $cmd.'|');
1648             }
1649 0         0  
1650 0         0 my ($msg) = @_;
1651 0         0  
1652 0         0 # note use of eval { } scope in logging -- paranoia to ensure that a broken
1653 0         0 # $SIG{__WARN__} implementation will not interfere with the flow of control
1654 0         0 # here, where we *have* to die.
1655 0         0 eval { warn $msg }; # hmm, STDERR may no longer be open
1656 0         0 eval { dbg("util: force_die: $msg") };
1657              
1658 0 0       0 POSIX::_exit(6); # avoid END and destructor processing
1659 0         0 kill('KILL',$$); # still kicking? die!
1660             }
1661              
1662 0         0 my ($fh, $stdinfile, $duperr2out, @cmdline) = @_;
1663 0         0  
  0         0  
1664             my $pid;
1665             # do a fork-open, so we can setuid() back
1666 0 0       0 eval {
1667             $pid = open ($fh, '-|'); 1;
1668 0         0 } or do {
1669             my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
1670             die "util: cannot fork: $eval_stat";
1671             };
1672             if (!defined $pid) {
1673             # acceptable to die() here, calling code catches it
1674             die "util: cannot open a pipe to a forked process: $!";
1675 0 0   0 0 0 }
1676 0         0  
1677             if ($pid != 0) {
1678 0         0 return $pid; # parent process; return the child pid
1679             }
1680              
1681             # else, child process.
1682              
1683 0     0 0 0 # from now on, we cannot die(), it could create a cloned process
1684             # use force_die() instead (bug 4370, cmt 2)
1685             eval {
1686 0         0 # go setuid...
1687 0 0       0 setuid_to_euid();
  0         0  
1688 0 0       0 info("util: setuid: ruid=$< euid=$> rgid=$( egid=$) ");
  0         0  
1689 0         0  
1690             # now set up the fds. due to some weirdness, we may have to ensure that
1691             # we *really* close the correct fd number, since some other code may have
1692             # redirected the meaning of STDOUT/STDIN/STDERR it seems... (bug 3649).
1693 0     0 0 0 # use POSIX::close() for that. it's safe to call close() and POSIX::close()
1694             # on the same fd; the latter is a no-op in that case.
1695              
1696             if (!$stdinfile) { # < $tmpfile
1697             # ensure we have *some* kind of fd 0.
1698 0         0 $stdinfile = "/dev/null";
  0         0  
1699 0         0 }
  0         0  
1700              
1701 0         0 my $f = fileno(STDIN);
1702 0         0 close STDIN or die "error closing STDIN: $!";
1703              
1704             # sanity: was that the *real* STDIN? if not, close that one too ;)
1705             if ($f != 0) {
1706 0     39 0 0 POSIX::close(0);
1707             }
1708 0         0  
1709             open (STDIN, "<$stdinfile") or die "cannot open $stdinfile: $!";
1710              
1711 0         0 # this should be impossible; if we just closed fd 0, UNIX
  0         0  
1712 0 0       0 # fd behaviour dictates that the next fd opened (the new STDIN)
1713 0 0       0 # will be the lowest unused fd number, which should be 0.
  0         0  
1714 0         0 # so die with a useful error if this somehow isn't the case.
1715             if (fileno(STDIN) != 0) {
1716 0 0       0 die "oops: fileno(STDIN) [".fileno(STDIN)."] != 0";
1717             }
1718 0         0  
1719             # Ensure STDOUT is open. As we just created a pipe to ensure this, it has
1720             # to be open to that pipe, and if it isn't, something's seriously screwy.
1721 0 0       0 # Update: actually, this fails! see bug 3649 comment 37. For some reason,
1722 0         0 # fileno(STDOUT) can be 0; possibly because open("-|") didn't change the fh
1723             # named STDOUT, instead changing fileno(1) directly. So this is now
1724             # commented.
1725             # if (fileno(STDOUT) != 1) {
1726             # die "setuid: oops: fileno(STDOUT) [".fileno(STDOUT)."] != 1";
1727             # }
1728              
1729 0         0 STDOUT->autoflush(1);
1730              
1731 0         0 if ($duperr2out) { # 2>&1
1732 0         0 my $f = fileno(STDERR);
1733             close STDERR or die "error closing STDERR: $!";
1734              
1735             # sanity: was that the *real* STDERR? if not, close that one too ;)
1736             if ($f != 2) {
1737             POSIX::close(2);
1738             }
1739              
1740 0 0       0 open (STDERR, ">&STDOUT") or die "dup STDOUT failed: $!";
1741             STDERR->autoflush(1); # make sure not to lose diagnostics if exec fails
1742 0         0  
1743             # STDERR must be fd 2 to be useful to subprocesses! (bug 3649)
1744             if (fileno(STDERR) != 2) {
1745 0         0 die "oops: fileno(STDERR) [".fileno(STDERR)."] != 2";
1746 0 0       0 }
1747             }
1748              
1749 0 0       0 exec @cmdline;
1750 0         0 die "exec failed: $!";
1751             };
1752             my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
1753 0 0       0  
1754             # bug 4370: we really have to exit here; break any eval traps
1755             force_die(sprintf('util: failed to spawn a process "%s": %s',
1756             join(", ",@cmdline), $eval_stat));
1757             die; # must be a die() otherwise -w will complain
1758             }
1759 0 0       0  
1760 0         0 ###########################################################################
1761              
1762             # As "perldoc perlvar" notes, in perl 5.8.0, the concept of "safe" signal
1763             # handling was added, which means that signals cannot interrupt a running OP.
1764             # unfortunately, a regexp match is a single OP, so a psychotic m// can
1765             # effectively "hang" the interpreter as a result, and a $SIG{ALRM} handler
1766             # will never get called.
1767             #
1768             # However, by using "unsafe" signals, we can still interrupt that -- and
1769             # POSIX::sigaction can create an unsafe handler on 5.8.x. So this function
1770             # provides a portable way to do that.
1771              
1772             my ($handler) = @_;
1773 0         0 if ($] < 5.008 || am_running_on_windows()) {
1774             # signals are always unsafe on perl older than 5.008, just use %SIG
1775 0 0       0 # Bug 6359, no POSIX::SIGALRM on Windows, just use %SIG
1776 0         0 $SIG{ALRM} = $handler;
1777 0 0       0 } else {
1778             # may be using "safe" signals with %SIG; use POSIX to avoid it
1779             POSIX::sigaction POSIX::SIGALRM(), new POSIX::SigAction $handler;
1780 0 0       0 }
1781 0         0 }
1782              
1783             ###########################################################################
1784 0 0       0  
1785 0         0 # returns ($compiled_re, $error)
1786             # if any errors, $compiled_re = undef, $error has string
1787             # args:
1788 0 0       0 # - regexp
1789 0         0 # - strip_delimiters (default: 1) (value 2 means, try strip, but don't error)
1790             # - ignore_always_matching (default: 0)
1791             my ($re, $strip_delimiters, $ignore_always_matching) = @_;
1792             local($1);
1793 0         0  
1794 0         0 # Do not allow already compiled regexes or other funky refs
1795             if (ref($re)) {
1796 0 0       0 return (undef, 'ref passed');
  0         0  
1797             }
1798              
1799 0         0 # try stripping by default
1800             $strip_delimiters = 1 if !defined $strip_delimiters;
1801 0         0  
1802             # OK, try to remove any normal perl-style regexp delimiters at
1803             # the start and end, and modifiers at the end if present,
1804             # so we can validate those too.
1805             my $origre = $re;
1806             my $delim_end = '';
1807              
1808             if ($strip_delimiters >= 1) {
1809             # most common delimiter
1810             if ($re =~ s{^/}{}) {
1811             $delim_end = '/';
1812             }
1813             # symmetric delimiters
1814             elsif ($re =~ s/^(?:m|qr)([\{\(\<\[])//) {
1815             ($delim_end = $1) =~ tr/\{\(\<\[/\}\)\>\]/;
1816             }
1817 39     0 0 37918 # any non-wordchar delimiter, but let's ignore backslash..
1818 39 0 0     116 elsif ($re =~ s/^(?:m|qr)(\W)//) {
1819             $delim_end = $1;
1820             if ($delim_end eq '\\') {
1821 39         85 return (undef, 'backslash delimiter not allowed');
1822             }
1823             }
1824 0         0 elsif ($strip_delimiters != 2) {
1825             return (undef, 'missing regexp delimiters');
1826             }
1827             }
1828              
1829             # cut end delimiter, mods
1830             my $mods;
1831             if ($delim_end) {
1832             # Ignore e because paranoid
1833             if ($re =~ s/\Q${delim_end}\E([a-df-z]*)\z//) {
1834             $mods = $1;
1835             } else {
1836             return (undef, 'invalid end delimiter/mods');
1837 1047     1039 0 7586 }
1838 1047         2001 }
1839              
1840             # paranoid check for eval exec (?{foo}), in case someone
1841 1047 50       2208 # actually put "use re 'eval'" somewhere..
1842 267         656 if ($re =~ /\(\?\??\{/) {
1843             return (undef, 'eval (?{}) found');
1844             }
1845              
1846 1271 50       2494 # check unescaped delimiter, but only if it's not symmetric,
1847             # those will fp on .{0,10} [xyz] etc, no need for so strict checks
1848             # since these regexes don't end up in eval strings anyway
1849             if ($delim_end && $delim_end !~ tr/\}\)\]//) {
1850             # first we remove all escaped backslashes "\\"
1851 1244         2301 my $dbs_stripped = $re;
1852 1019         1466 $dbs_stripped =~ s/\\\\//g;
1853             # now we can properly check if something is unescaped
1854 1250 100       2289 if ($dbs_stripped =~ /(?<!\\)\Q${delim_end}\E/) {
1855             return (undef, "unquoted delimiter '$delim_end' found");
1856 1238 100       5082 }
    100          
    100          
    50          
1857 1074         1634 }
1858              
1859             if ($ignore_always_matching) {
1860             if (my $err = is_always_matching_regexp($re)) {
1861 280         625 return (undef, "always matching regexp: $err");
1862             }
1863             }
1864              
1865 365         1513 # now prepend the modifiers, in order to check if they're valid
1866 329 50       729 if ($mods) {
1867 45         547 $re = '(?'.$mods.')'.$re;
1868             }
1869              
1870             # no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
1871 57         245 my $compiled_re;
1872             $re = untaint_var($re);
1873             my $ok = eval {
1874             # don't dump deprecated warnings to user STDERR
1875             # but die on any other warning for safety?
1876 1032         1510 local $SIG{__WARN__} = sub {
1877 1045 100       2047 if ($_[0] !~ /deprecated/i) {
1878             die "$_[0]\n";
1879 999 50       10508 }
1880 1258         3498 };
1881             $compiled_re = qr/$re/;
1882 246         487 1;
1883             };
1884             if ($ok && ref($compiled_re) eq 'Regexp') {
1885             #$origre = untaint_var($origre);
1886             #dbg("config: accepted regex '%s' => '%s'", $origre, $compiled_re);
1887             return ($compiled_re, '');
1888 1254 100       5596 } else {
1889 246         822 my $err = $@ ne '' ? $@ : "errno=$!"; chomp $err;
1890             $err =~ s/ at .*? line \d.*$//;
1891             return (undef, $err);
1892             }
1893             }
1894              
1895 1011 100 100     4241 my ($re) = @_;
1896              
1897 1205         2832 if ($re eq '') {
1898 977         1990 return "empty";
1899             }
1900 1177 100       6134 elsif ($re =~ /(?<!\\)\|\|/) {
1901 244         515 return "contains '||'";
1902             }
1903             elsif ($re =~ /^\|/) {
1904             return "starts with '|'";
1905 1233 100       2528 }
1906 640 100       1935 elsif ($re =~ /\|(?<!\\\|)$/) {
1907 32         79 return "ends with '|'";
1908             }
1909              
1910             return "";
1911             }
1912 1260 100       2800  
1913 748         2457 # convert compiled regexp (?^i:foo) presentation to string (?i)foo
1914             # NOTE: This function is mainly used for Rule2XSBody purposes, since it
1915             # expects "(?i)foo" formatted strings. Generally there should NOT be need
1916             # to use this function. If you need a string, try "".$re / "".qr(foo.*bar).
1917 1004         1339 my ($re) = @_;
1918 1232         2385  
1919 1200         2448 return undef unless ref($re) eq 'Regexp'; ## no critic (ProhibitExplicitReturnUndef)
1920             $re = "".$re; # stringify
1921              
1922             local($1);
1923 256 0   228   581 my $mods;
1924 253         449 # perl >=5.14 (?^i:foo)
1925             if ($re =~ s/^\(\?\^([a-z]*)://) {
1926 1257         6855 $mods = $1;
1927 1235         32436 $re =~ s/\)\s*\z//;
1928 1007         5372 }
1929             # perl <5.14 (?i-xsm:foo)
1930 1007 100 66     4539 elsif ($re =~ s/^\(\?([a-z]*)-[a-z]*://) {
1931             $mods = $1;
1932             $re =~ s/\)\s*\z//;
1933 1235         13004 }
1934              
1935 259 50       1367 return ($mods ? "(?$mods)$re" : $re);
  259         1184  
1936 229         1383 }
1937 1         4  
1938             ###########################################################################
1939              
1940             ###
1941             ### regexp_remove_delimiters and make_qr DEPRECATED, to be removed
1942 397     396 0 759 ### compile_regexp() should be used everywhere
1943             ###
1944 396 100       1757  
    100          
    100          
    100          
1945 28         70 # Removes any normal perl-style regexp delimiters at
1946             # the start and end, and modifiers at the end (if present).
1947             # If modifiers are found, they are inserted into the pattern using
1948 2         12 # the /(?i)/ idiom.
1949              
1950             my ($re) = @_;
1951 1         5  
1952             warn("deprecated Util regexp_remove_delimiters() called\n");
1953              
1954 1         3 my $delim;
1955             if (!defined $re || $re eq '') {
1956             return undef; ## no critic (ProhibitExplicitReturnUndef)
1957 392         1194 }
1958             elsif ($re =~ s/^m?\{//) { # m{foo/bar}
1959             $delim = '}';
1960             }
1961             elsif ($re =~ s/^m?\[//) { # m[foo/bar]
1962             $delim = ']';
1963             }
1964             elsif ($re =~ s/^m?\(//) { # m(foo/bar)
1965 42     42 0 246 $delim = ')';
1966             }
1967 42 50       231 elsif ($re =~ s/^m?<//) { # m<foo/bar>
1968 42         205 $delim = '>';
1969             }
1970 42         183 elsif ($re =~ s/^m?(\W)//) { # m#foo/bar#
1971 42         65 $delim = $1;
1972             } else { # /foo\/bar/ or !foo/bar!
1973 42 50       790 # invalid
    0          
1974 42         267 return undef; ## no critic (ProhibitExplicitReturnUndef)
1975 42         375 }
1976              
1977             if ($re !~ s/\Q${delim}\E([imsx]*)$//) {
1978             return undef; ## no critic (ProhibitExplicitReturnUndef)
1979 0         0 }
1980 0         0  
1981             my $mods = $1;
1982             if ($mods) {
1983 42 50       342 $re = "(?".$mods.")".$re;
1984             }
1985              
1986             return $re;
1987             }
1988              
1989             # turn "/foobar/i" into qr/(?i)foobar/
1990              
1991             my ($re) = @_;
1992              
1993             warn("deprecated Util make_qr() called\n");
1994              
1995             $re = regexp_remove_delimiters($re);
1996             return undef if !defined $re || $re eq ''; ## no critic (ProhibitExplicitReturnUndef)
1997             my $compiled_re;
1998             if (eval { $compiled_re = qr/$re/; 1; } && ref($compiled_re) eq 'Regexp') {
1999 0     0 0 0 return $compiled_re;
2000             } else {
2001 0         0 return undef; ## no critic (ProhibitExplicitReturnUndef)
2002             }
2003 0         0 }
2004 0 0 0     0  
    0          
    0          
    0          
    0          
    0          
2005 0         0 ###########################################################################
2006              
2007             my ($ok_locales) = @_;
2008 0         0  
2009             my @locales = split(/\s+/, $ok_locales);
2010             my $lang = $ENV{'LC_ALL'};
2011 0         0 $lang ||= $ENV{'LANGUAGE'};
2012             $lang ||= $ENV{'LC_MESSAGES'};
2013             $lang ||= $ENV{'LANG'};
2014 0         0 push (@locales, $lang) if defined($lang);
2015             return @locales;
2016             }
2017 0         0  
2018             ###########################################################################
2019              
2020 0         0 # bug 5612: work around for bugs in Berkeley db 4.2
2021             #
2022             # on 4.2 having the __db.[DBNAME] file will cause an loop that will never finish
2023 0         0 # on 4.3+ the loop will timeout after 301 open attempts, but we will still
2024             # be unable to open the database. This workaround solves both problems.
2025             #
2026 0 0       0 my ($path) = @_;
2027 0         0  
2028             my $db_tmpfile = untaint_file_path(File::Spec->catfile(dirname($path),
2029             '__db.'.basename($path)));
2030 0         0  
2031 0 0       0 # delete "__db.[DBNAME]" and "__db.[DBNAME].*"
2032 0         0 foreach my $tfile ($db_tmpfile, glob("$db_tmpfile.*")) {
2033             my $file = untaint_file_path($tfile);
2034             my $stat_errn = stat($file) ? 0 : 0+$!;
2035 0         0 next if $stat_errn == ENOENT;
2036              
2037             dbg("util: Berkeley DB bug work-around: cleaning tmp file $file");
2038             unlink($file) or warn "cannot remove Berkeley DB tmp file $file: $!\n";
2039             }
2040             }
2041 0     0 0 0  
2042             ###########################################################################
2043 0         0  
2044             my ($deck) = @_;
2045 0         0 for (my $i = $#{$deck}; $i > 0; $i--) {
2046 0 0 0     0 my $j = int rand($i+1);
2047 0         0 @$deck[$i,$j] = @$deck[$j,$i];
2048 0 0 0     0 }
  0         0  
  0         0  
2049 0         0 }
2050              
2051 0         0 ###########################################################################
2052              
2053              
2054             ###########################################################################
2055              
2056             # bugs 6419 and 2607 relate to returning a score 1/10th lower than the
2057             # required score if the rounded to the 10th version of the score is equal
2058 0     0 0 0 # to the required score
2059             #
2060 0         0 # moved from PerMessageStatus.pm to here and modified to allow for a
2061 0         0 # non-class version of the routine to be called from PerMessageStatus
2062 0   0     0 # and from spamd
2063 0   0     0  
2064 0   0     0 my ($score, $rscore, $is_spam) = @_;
2065 0 0       0  
2066 0         0 #BASED ON _get_tag_value_for_score from PerMsgStatus.pm
2067              
2068             $score = sprintf("%2.1f", $score);
2069             $rscore = sprintf("%2.1f", $rscore);
2070              
2071             # if the email is spam, return the accurate score
2072             # if the email is NOT spam and the score is less than the required score,
2073             # then return the accurate score
2074              
2075             return $score if $is_spam or $score < $rscore;
2076              
2077             # if the email is NOT spam and $score = $rscore, return the $rscore - 0.1
2078 0     0 0 0 # effectively flooring the value to the closest tenth
2079              
2080 0         0 return $rscore - 0.1;
2081             }
2082              
2083             ###########################################################################
2084 0         0  
2085 0         0  
2086 0 0       0 1;
2087 0 0       0  
2088             =back
2089 0         0  
2090 0 0       0 =cut