File Coverage

lib/Mail/SpamAssassin/Util.pm
Criterion Covered Total %
statement 422 761 55.4
branch 159 414 38.4
condition 57 134 42.5
subroutine 49 71 69.0
pod 3 48 6.2
total 690 1428 48.3


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Util - utility functions
21              
22             =head1 DESCRIPTION
23              
24             A general class for utility functions. Please use this for functions that
25             stand alone, without requiring a $self object, Portability functions
26             especially.
27              
28             NOTE: The functions in this module are to be considered private. Their API may
29             change at any point, and it's expected that they'll only be used by other
30             Mail::SpamAssassin modules. (TODO: we should probably revisit this if
31             it's useful for plugin development.)
32              
33             NOTE: Utility functions should not be changing global variables such
34             as $_, $1, $2, ... $/, etc. unless explicitly documented. If these
35             variables are in use by these functions, they should be localized.
36              
37             =over 4
38              
39             =cut
40              
41             package Mail::SpamAssassin::Util;
42              
43 42     42   361182 use strict;
  42         78  
  42         1275  
44 42     42   208 use warnings;
  42         69  
  42         1283  
45             # use bytes;
46 42     42   212 use re 'taint';
  42         81  
  42         1747  
47              
48             require 5.008001; # needs utf8::is_utf8()
49              
50 42     42   1213 use Mail::SpamAssassin::Logger;
  42         94  
  42         2319  
51              
52 42     42   229 use Exporter ();
  42         79  
  42         3782  
53              
54             our @ISA = qw(Exporter);
55             our @EXPORT = ();
56             our @EXPORT_OK = qw(&local_tz &base64_decode &untaint_var &untaint_file_path
57             &exit_status_str &proc_status_ok &am_running_on_windows
58             &reverse_ip_address &decode_dns_question_entry
59             &get_my_locales &parse_rfc822_date &get_user_groups
60             &secure_tmpfile &secure_tmpdir &uri_list_canonicalize);
61              
62             our $AM_TAINTED;
63              
64 42     42   265 use Config;
  42         84  
  42         2038  
65 42     42   3916 use IO::Handle;
  42         40583  
  42         1925  
66 42     42   279 use File::Spec;
  42         74  
  42         1833  
67 42     42   769 use File::Basename;
  42         119  
  42         3531  
68 42     42   20633 use Time::Local;
  42         64667  
  42         2327  
69 42     42   18647 use Sys::Hostname (); # don't import hostname() into this namespace!
  42         43854  
  42         1165  
70 42     42   24046 use NetAddr::IP 4.000;
  42         995873  
  42         267  
71 42     42   5323 use Fcntl;
  42         95  
  42         11463  
72 42     42   3423 use Errno qw(ENOENT EACCES EEXIST);
  42         9066  
  42         7151  
73 42         549 use POSIX qw(:sys_wait_h WIFEXITED WIFSIGNALED WIFSTOPPED WEXITSTATUS
74 42     42   305 WTERMSIG WSTOPSIG);
  42         78  
75              
76             ###########################################################################
77              
78 42     42   20319 use constant HAS_MIME_BASE64 => eval { require MIME::Base64; };
  42         85  
  42         108  
  42         19107  
79 42     42   30584 use constant RUNNING_ON_WINDOWS => ($^O =~ /^(?:mswin|dos|os2)/oi);
  42         89  
  42         2730  
80              
81             # These are only defined as stubs on Windows (see bugs 6798 and 6470).
82             BEGIN {
83 42 50   42   46834 if (RUNNING_ON_WINDOWS) {
84 42     42   273 no warnings 'redefine';
  42         88  
  42         6056  
85              
86             # See the section on $? at
87             # http://perldoc.perl.org/perlvar.html#Error-Variables for some
88             # hints on the magic numbers that are used here.
89 0         0 *WIFEXITED = sub { not $_[0] & 127 };
  0         0  
90 0         0 *WEXITSTATUS = sub { $_[0] >> 8 };
  0         0  
91 0 0       0 *WIFSIGNALED = sub { ($_[0] & 127) && (($_[0] & 127) != 127) };
  0         0  
92 0         0 *WTERMSIG = sub { $_[0] & 127 };
  0         0  
93             }
94             }
95              
96             ###########################################################################
97              
98             # find an executable in the current $PATH (or whatever for that platform)
99             {
100             # Show the PATH we're going to explore only once.
101             my $displayed_path = 0;
102              
103             sub find_executable_in_env_path {
104 0     0 0 0 my ($filename) = @_;
105              
106 0         0 clean_path_in_taint_mode();
107 0 0       0 if ( !$displayed_path++ ) {
108 0         0 dbg("util: current PATH is: ".join($Config{'path_sep'},File::Spec->path()));
109             }
110 0         0 foreach my $path (File::Spec->path()) {
111 0         0 my $fname = File::Spec->catfile ($path, $filename);
112 0 0       0 if ( -f $fname ) {
113 0 0       0 if (-x $fname) {
114 0         0 dbg("util: executable for $filename was found at $fname");
115 0         0 return $fname;
116             }
117             else {
118 0         0 dbg("util: $filename was found at $fname, but isn't executable");
119             }
120             }
121             }
122 0         0 return;
123             }
124             }
125              
126             ###########################################################################
127              
128             # taint mode: delete more unsafe vars for exec, as per perlsec
129             {
130             # We only need to clean the environment once, it stays clean ...
131             my $cleaned_taint_path = 0;
132              
133             sub clean_path_in_taint_mode {
134 89 100   89 0 512 return if ($cleaned_taint_path++);
135 26 100       155 return unless am_running_in_taint_mode();
136              
137 1         3 dbg("util: taint mode: deleting unsafe environment variables, resetting PATH");
138              
139 1 50       15 if (RUNNING_ON_WINDOWS) {
140 0         0 dbg("util: running on Win32, skipping PATH cleaning");
141 0         0 return;
142             }
143              
144 1         23 delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
145              
146             # Go through and clean the PATH out
147 1         4 my @path;
148             my @stat;
149 1         34 foreach my $dir (File::Spec->path()) {
150 3 50       9 next unless $dir;
151              
152             # untaint if at least 1 char and no NL (is the restriction intentional?)
153 3         6 local ($1);
154 3 50       25 $dir = untaint_var($1) if $dir =~ /^(.+)$/;
155             # then clean ( 'foo/./bar' -> 'foo/bar', etc. )
156 3         12 $dir = File::Spec->canonpath($dir);
157              
158 3 50       103 if (!File::Spec->file_name_is_absolute($dir)) {
    50          
    50          
    50          
159 0         0 dbg("util: PATH included '$dir', which is not absolute, dropping");
160 0         0 next;
161             }
162             elsif (!(@stat=stat($dir))) {
163 0         0 dbg("util: PATH included '$dir', which is unusable, dropping: $!");
164 0         0 next;
165             }
166             elsif (!-d _) {
167 0         0 dbg("util: PATH included '$dir', which isn't a directory, dropping");
168 0         0 next;
169             }
170             elsif (($stat[2]&2) != 0) {
171             # World-Writable directories are considered insecure.
172             # We could be more paranoid and check all of the parent directories as well,
173             # but it's good for now.
174 0         0 dbg("util: PATH included '$dir', which is world writable, dropping");
175 0         0 next;
176             }
177              
178 3         20 dbg("util: PATH included '$dir', keeping");
179 3         20 push(@path, $dir);
180             }
181              
182 1         52 $ENV{'PATH'} = join($Config{'path_sep'}, @path);
183 1         10 dbg("util: final PATH set to: ".$ENV{'PATH'});
184             }
185             }
186              
187             # taint mode: are we running in taint mode? 1 for yes, 0 for no.
188             sub am_running_in_taint_mode {
189 26 50   26 0 116 return $AM_TAINTED if defined $AM_TAINTED;
190              
191 26 50       126 if ($] >= 5.008) {
192             # perl 5.8 and above, ${^TAINT} is a syntax violation in 5.005
193 26     26   389 $AM_TAINTED = eval q(no warnings q(syntax); ${^TAINT});
  26         68  
  26         1947  
  26         2663  
194             }
195             else {
196             # older versions
197 0         0 my $blank;
198 0         0 for my $d ((File::Spec->curdir, File::Spec->rootdir, File::Spec->tmpdir)) {
199 0 0       0 opendir(TAINT, $d) || next;
200 0         0 $blank = readdir(TAINT);
201 0 0       0 closedir(TAINT) or die "error closing directory $d: $!";
202 0         0 last;
203             }
204 0 0 0     0 if (!(defined $blank && $blank)) {
205             # these are sometimes untainted, so this is less preferable than readdir
206 0         0 $blank = join('', values %ENV, $0, @ARGV);
207             }
208 0         0 $blank = substr($blank, 0, 0);
209             # seriously mind-bending perl
210 0 0       0 $AM_TAINTED = not eval { eval "1 || $blank" || 1 };
  0         0  
211             }
212 26 100       216 dbg("util: running in taint mode? %s", $AM_TAINTED ? "yes" : "no");
213 26         107 return $AM_TAINTED;
214             }
215              
216             ###########################################################################
217              
218             sub am_running_on_windows {
219 101     101 0 853 return RUNNING_ON_WINDOWS;
220             }
221              
222             ###########################################################################
223              
224             # untaint a path to a file, e.g. "/home/jm/.spamassassin/foo",
225             # "C:\Program Files\SpamAssassin\tmp\foo", "/home/��t/etc".
226             #
227             # TODO: this does *not* handle locales well. We cannot use "use locale"
228             # and \w, since that will not detaint the data. So instead just allow the
229             # high-bit chars from ISO-8859-1, none of which have special metachar
230             # meanings (as far as I know).
231             #
232             sub untaint_file_path {
233 296     296 0 818 my ($path) = @_;
234              
235 296 50       773 return unless defined($path);
236 296 50       741 return '' if ($path eq '');
237              
238 296         833 local ($1);
239             # Barry Jaspan: allow ~ and spaces, good for Windows.
240             # Also return '' if input is '', as it is a safe path.
241             # Bug 7264: allow also parenthesis, e.g. "C:\Program Files (x86)"
242 296         1117 my $chars = '-_A-Za-z0-9.%=+,/:()\\@\\xA0-\\xFF\\\\';
243 296         3155 my $re = qr{^\s*([$chars][${chars}~ ]*)\z}o;
244              
245 296 50       2868 if ($path =~ $re) {
246 296         846 $path = $1;
247 296         1023 return untaint_var($path);
248             } else {
249 0         0 warn "util: refusing to untaint suspicious path: \"$path\"\n";
250 0         0 return $path;
251             }
252             }
253              
254             sub untaint_hostname {
255 0     0 0 0 my ($host) = @_;
256              
257 0 0       0 return unless defined($host);
258 0 0       0 return '' if ($host eq '');
259              
260             # from RFC 1035, but allowing domains starting with numbers:
261             # $label = q/[A-Za-z\d](?:[A-Za-z\d-]{0,61}[A-Za-z\d])?/;
262             # $domain = qq<$label(?:\.$label)*>;
263             # length($host) <= 255 && $host =~ /^($domain)$/
264             # expanded (no variables in the re) because of a tainting bug in Perl 5.8.0
265 0 0 0     0 if (length($host) <= 255 && $host =~ /^[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?(?:\.[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?)*$/i) {
266 0         0 return untaint_var($host);
267             }
268             else {
269 0         0 warn "util: cannot untaint hostname: \"$host\"\n";
270 0         0 return $host;
271             }
272             }
273              
274             # This sub takes a scalar or a reference to an array, hash, scalar or another
275             # reference and recursively untaints all its values (and keys if it's a
276             # reference to a hash). It should be used with caution as blindly untainting
277             # values subverts the purpose of working in taint mode. It will return the
278             # untainted value if requested but to avoid unnecessary copying, the return
279             # value should be ignored when working on lists.
280             # Bad:
281             # %ENV = untaint_var(\%ENV);
282             # Better:
283             # untaint_var(\%ENV);
284             #
285             sub untaint_var {
286             # my $arg = $_[0]; # avoid copying unnecessarily
287 20581 100   20581 0 49988 if (!ref $_[0]) { # optimized by-far-the-most-common case
288             # Bug 7591 not using this faster untaint. https://bz.apache.org/SpamAssassin/show_bug.cgi?id=7591
289             #return defined $_[0] ? scalar each %{ { $_[0] => undef } } : undef; ## no critic (ProhibitExplicitReturnUndef) - See Bug 7120 - fast untaint (hash keys cannot be tainted)
290 42     42   361 no re 'taint'; # override a "use re 'taint'" from outer scope
  42         89  
  42         346851  
291 20492 100       35491 return undef if !defined $_[0]; ## no critic (ProhibitExplicitReturnUndef) - See Bug 7120
292 15812         26844 local($1); # avoid Perl taint bug: tainted global $1 propagates taintedness
293 15812         33903 $_[0] =~ /^(.*)\z/s;
294 15812         55688 return $1;
295              
296             } else {
297 89         415 my $r = ref $_[0];
298 89 100 33     626 if ($r eq 'ARRAY') {
    100          
    50          
299 6         13 my $arg = $_[0];
300 6         13 $_ = untaint_var($_) for @{$arg};
  6         61  
301 6 50       28 return @{$arg} if wantarray;
  0         0  
302             }
303             elsif ($r eq 'HASH') {
304 1         6 my $arg = $_[0];
305 1 50       12 if ($arg == \%ENV) { # purge undefs from %ENV, untaint the rest
306 1         6 while (my($k, $v) = each %{$arg}) {
  36         144  
307             # It is safe to delete the item most recently returned by each()
308 35 50       66 if (!defined $v) { delete ${$arg}{$k}; next }
  0         0  
  0         0  
  0         0  
309 35         115 ${$arg}{untaint_var($k)} = untaint_var($v);
  35         63  
310             }
311             } else {
312             # hash keys are never tainted,
313             # although old version of perl had some quirks there
314 0         0 while (my($k, $v) = each %{$arg}) {
  0         0  
315 0         0 ${$arg}{untaint_var($k)} = untaint_var($v);
  0         0  
316             }
317             }
318 1 50       21 return %{$arg} if wantarray;
  0         0  
319             }
320             elsif ($r eq 'SCALAR' || $r eq 'REF') {
321 82         140 my $arg = $_[0];
322 82         133 ${$arg} = untaint_var(${$arg});
  82         194  
  82         322  
323             }
324             else {
325 0         0 warn "util: can't untaint a $r !\n";
326             }
327             }
328 89         230 return $_[0];
329             }
330              
331             ###########################################################################
332              
333             sub taint_var {
334 0     0 0 0 my ($v) = @_;
335 0 0       0 return $v unless defined $v; # can't taint "undef"
336              
337             # $^X is apparently "always tainted".
338             # Concatenating an empty tainted string taints the result.
339 0         0 return $v . substr($^X, 0, 0);
340             }
341              
342             ###########################################################################
343              
344             # map process termination status number to an informative string, and
345             # append optional message (dual-valued errno or a string or a number),
346             # returning the resulting string
347             #
348             sub exit_status_str {
349 0     0 0 0 my($stat,$errno) = @_;
350 0         0 my $str;
351 0 0       0 if (!defined($stat)) {
    0          
    0          
352 0         0 $str = '(no status)';
353             } elsif (WIFEXITED($stat)) {
354 0         0 $str = sprintf("exit %d", WEXITSTATUS($stat));
355             } elsif (WIFSTOPPED($stat)) {
356 0         0 $str = sprintf("stopped, signal %d", WSTOPSIG($stat));
357             } else {
358 0         0 my $sig = WTERMSIG($stat);
359 0 0       0 $str = sprintf("%s, signal %d (%04x)",
    0          
    0          
    0          
    0          
360             $sig == 1 ? 'HANGUP' : $sig == 2 ? 'interrupted' :
361             $sig == 6 ? 'ABORTED' : $sig == 9 ? 'KILLED' :
362             $sig == 15 ? 'TERMINATED' : 'DIED',
363             $sig, $stat);
364             }
365 0 0       0 if (defined $errno) { # deal with dual-valued and plain variables
366 0 0 0     0 $str .= ', '.$errno if (0+$errno) != 0 || ($errno ne '' && $errno ne '0');
      0        
367             }
368 0         0 return $str;
369             }
370              
371             ###########################################################################
372              
373             # check errno to be 0 and a process exit status to be in the list of success
374             # status codes, returning true if both are ok, and false otherwise
375             #
376             sub proc_status_ok {
377 0     0 0 0 my($exit_status,$errno,@success) = @_;
378 0         0 my $ok = 0;
379 0 0 0     0 if ((!defined $errno || $errno == 0) && WIFEXITED($exit_status)) {
      0        
380 0         0 my $j = WEXITSTATUS($exit_status);
381 0 0       0 if (!@success) { $ok = $j==0 } # empty list implies only status 0 is good
  0 0       0  
382 0         0 elsif (grep {$_ == $j} @success) { $ok = 1 }
  0         0  
383             }
384 0         0 return $ok;
385             }
386              
387             ###########################################################################
388              
389             # timezone mappings: in case of conflicts, use RFC 2822, then most
390             # common and least conflicting mapping
391             my %TZ = (
392             # standard
393             'UT' => '+0000',
394             'UTC' => '+0000',
395             # US and Canada
396             'NDT' => '-0230',
397             'AST' => '-0400',
398             'ADT' => '-0300',
399             'NST' => '-0330',
400             'EST' => '-0500',
401             'EDT' => '-0400',
402             'CST' => '-0600',
403             'CDT' => '-0500',
404             'MST' => '-0700',
405             'MDT' => '-0600',
406             'PST' => '-0800',
407             'PDT' => '-0700',
408             'HST' => '-1000',
409             'AKST' => '-0900',
410             'AKDT' => '-0800',
411             'HADT' => '-0900',
412             'HAST' => '-1000',
413             # Europe
414             'GMT' => '+0000',
415             'BST' => '+0100',
416             'IST' => '+0100',
417             'WET' => '+0000',
418             'WEST' => '+0100',
419             'CET' => '+0100',
420             'CEST' => '+0200',
421             'EET' => '+0200',
422             'EEST' => '+0300',
423             'MSK' => '+0300',
424             'MSD' => '+0400',
425             'MET' => '+0100',
426             'MEZ' => '+0100',
427             'MEST' => '+0200',
428             'MESZ' => '+0200',
429             # South America
430             'BRST' => '-0200',
431             'BRT' => '-0300',
432             # Australia
433             'AEST' => '+1000',
434             'AEDT' => '+1100',
435             'ACST' => '+0930',
436             'ACDT' => '+1030',
437             'AWST' => '+0800',
438             # New Zealand
439             'NZST' => '+1200',
440             'NZDT' => '+1300',
441             # Asia
442             'JST' => '+0900',
443             'KST' => '+0900',
444             'HKT' => '+0800',
445             'SGT' => '+0800',
446             'PHT' => '+0800',
447             # Middle East
448             'IDT' => '+0300',
449             );
450              
451             # month mappings
452             my %MONTH = (jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6,
453             jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12);
454              
455             my $LOCALTZ;
456              
457             sub local_tz {
458 2 50   16 0 8 return $LOCALTZ if defined($LOCALTZ);
459              
460             # standard method for determining local timezone
461 2         5 my $time = time;
462 2         14 my @g = gmtime($time);
463 2         31 my @t = localtime($time);
464 16         6126 my $z = $t[1]-$g[1]+($t[2]-$g[2])*60+($t[7]-$g[7])*1440+($t[5]-$g[5])*525600;
465 16         45 $LOCALTZ = sprintf("%+.2d%.2d", $z/60, $z%60);
466 16         80 return $LOCALTZ;
467             }
468              
469             sub parse_rfc822_date {
470 26     12 0 54 my ($date) = @_;
471 26         68 local ($_); local ($1,$2,$3,$4);
  26         169  
472 26         132 my ($yyyy, $mmm, $dd, $hh, $mm, $ss, $mon, $tzoff);
473              
474             # make it a bit easier to match
475 26         143 $_ = " $date "; s/, */ /gs; s/\s+/ /gs;
  26         140  
  26         221  
476              
477             # now match it in parts. Date part first:
478 26 50       215 if (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{4}) / /i) {
    0          
    0          
479 12         56 $dd = $1; $mon = lc($2); $yyyy = $3;
  12         35  
  12         35  
480             } elsif (s/ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) +(\d+) \d+:\d+:\d+ (\d{4}) / /i) {
481 0         0 $dd = $2; $mon = lc($1); $yyyy = $3;
  0         0  
  0         0  
482             } elsif (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{2,3}) / /i) {
483 0         0 $dd = $1; $mon = lc($2); $yyyy = $3;
  0         0  
  14         36  
484             } else {
485 14         50 dbg("util: time cannot be parsed: $date");
486 0         0 return;
487             }
488              
489             # handle two and three digit dates as specified by RFC 2822
490 12 50       39 if (defined $yyyy) {
491 26 50 33     144 if (length($yyyy) == 2 && $yyyy < 50) {
    50          
492 14         34 $yyyy += 2000;
493             }
494             elsif (length($yyyy) != 4) {
495             # three digit years and two digit years with values between 50 and 99
496 14         22 $yyyy += 1900;
497             }
498             }
499              
500             # hh:mm:ss
501 26 50       159 if (s/ (\d?\d):(\d\d)(:(\d\d))? / /) {
502 26   50     96 $hh = $1; $mm = $2; $ss = $4 || 0;
  26         50  
  12         50  
503             }
504              
505             # numeric timezones
506 26 50 0     143 if (s/ ([-+]\d{4}) / /) {
    0          
507 26         70 $tzoff = $1;
508             }
509             # common timezones
510             elsif (s/\b([A-Z]{2,4}(?:-DST)?)\b/ / && exists $TZ{$1}) {
511 14         22 $tzoff = $TZ{$1};
512             }
513             # all other timezones are considered equivalent to "-0000"
514 26   50     60 $tzoff ||= '-0000';
515              
516             # months
517 26 50       72 if (exists $MONTH{$mon}) {
518 26         48 $mmm = $MONTH{$mon};
519             }
520              
521 26   50     50 $hh ||= 0; $mm ||= 0; $ss ||= 0; $dd ||= 0; $mmm ||= 0; $yyyy ||= 0;
  26   50     61  
  26   50     49  
  26   50     65  
  13   50     49  
  13   50     31  
522              
523             # Fudge invalid times so that we get a usable date.
524 26 100       64 if ($ss > 59) { # rfc2822 does recognize leap seconds, not handled here
525 1         6 dbg("util: second after supported range, forcing second to 59: $date");
526 1         2 $ss = 59;
527             }
528              
529 26 100       74 if ($mm > 59) {
530 1         6 dbg("util: minute after supported range, forcing minute to 59: $date");
531 1         2 $mm = 59;
532             }
533              
534 26 100       52 if ($hh > 23) {
535 14         98 dbg("util: hour after supported range, forcing hour to 23: $date");
536 6         10 $hh = 23;
537             }
538              
539 14         31 my $max_dd = 31;
540 26 100 100     237 if ($mmm == 4 || $mmm == 6 || $mmm == 9 || $mmm == 11) {
    100 100        
      66        
541 8         36 $max_dd = 30;
542             }
543             elsif ($mmm == 2) {
544 11 50 33     42 $max_dd = (!($yyyy % 4) && (($yyyy % 100) || !($yyyy % 400))) ? 29 : 28;
545             }
546 17 100       61 if ($dd > $max_dd) {
547 5         10 dbg("util: day is too high, incrementing date to next valid date: $date");
548 1         3 $dd = 1;
549 1         3 $mmm++;
550 14 100       39 if ($mmm > 12) {
551 0         0 $mmm = 1;
552 0         0 $yyyy++;
553             }
554             }
555              
556             # Time::Local (v1.10 at least, also 1.17) throws warnings when dates cause
557             # a signed 32-bit integer overflow. So force a min/max for year.
558 12 50       53 if ($yyyy > 2037) {
    50          
559 0         0 dbg("util: year after supported range, forcing year to 2037: $date");
560 14         18 $yyyy = 2037;
561             }
562             elsif ($yyyy < 1970) {
563 14         17 dbg("util: year before supported range, forcing year to 1970: $date");
564 14         48 $yyyy = 1970;
565             }
566              
567 26         534 my $time;
568             eval { # could croak
569 12         164 $time = timegm($ss, $mm, $hh, $dd, $mmm-1, $yyyy);
570 12         815 1;
571 12 50       27 } or do {
572 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  14         58  
573 14         43 dbg("util: time cannot be parsed: $date, $yyyy-$mmm-$dd $hh:$mm:$ss, $eval_stat");
574 14         36 return;
575             };
576              
577 24 50       101 if ($tzoff =~ /([-+])(\d\d)(\d\d)$/) # convert to seconds difference
578             {
579 12         94 $tzoff = (($2 * 60) + $3) * 60;
580 14 100       71 if ($1 eq '-') {
    50          
581 15         57 $time += $tzoff;
582             } elsif ($time < $tzoff) { # careful with year 1970 and '+' time zones
583 0         0 $time = 0;
584             } else {
585 11         25 $time -= $tzoff;
586             }
587             }
588              
589 12         58 return $time;
590             }
591              
592             sub time_to_rfc822_date {
593 2     2 0 6 my($time) = @_;
594              
595 2         34 my @days = qw/Sun Mon Tue Wed Thu Fri Sat/;
596 2         19 my @months = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/;
597 2   33     108 my @localtime = localtime($time || time);
598 2         9 $localtime[5]+=1900;
599              
600 2         16 sprintf("%s, %02d %s %4d %02d:%02d:%02d %s", $days[$localtime[6]], $localtime[3],
601             $months[$localtime[4]], @localtime[5,2,1,0], local_tz());
602             }
603              
604             ###########################################################################
605              
606             # This used to be a wrapper for Text::Wrap. Now we do basically the same
607             # function as Text::Wrap::wrap(). See bug 5056 and 2165 for more information
608             # about why things aren't using that function anymore.
609             #
610             # It accepts values for almost all options which can be set
611             # in Text::Wrap. All parameters are optional (leaving away the first one
612             # probably doesn't make too much sense though), either a missing or a false
613             # value will fall back to the default.
614             #
615             # The parameters are:
616             # 1st: The string to wrap. Only one string is allowed.
617             # (default: "")
618             # 2nd: The prefix to be put in front of all lines except the first one.
619             # (default: "")
620             # 3rd: The prefix for the first line. (default: "")
621             # 4th: The number of columns available (no line will be longer than this
622             # unless overflow is set below). (default: 77)
623             # 5th: Enable or disable overflow mode. (default: 0)
624             # 6th: The sequence/expression to wrap at. (default: '\s');
625             # 7th: The string to join the lines again. (default: "\n")
626              
627             sub wrap {
628 349   50 349 0 3420 my $string = shift || '';
629 349   50     754 my $prefix = shift || '';
630 349   100     941 my $first = shift || '';
631 349   50     727 my $length = shift || 77;
632 349   100     1021 my $overflow = shift || 0;
633 349   66     1186 my $break = shift || qr/\s/;
634 349         614 my $sep = "\n";
635              
636             # go ahead and break apart the string, keeping the break chars
637 349         6309 my @arr = split(/($break)/, $string);
638              
639             # tack the first prefix line at the start
640 349 100       1133 splice @arr, 0, 0, $first if $first;
641              
642             # go ahead and make up the lines in the array
643 349         527 my $pos = 0;
644 349         472 my $pos_mod = 0;
645 349         892 while ($#arr > $pos) {
646 3692         4809 my $tmpline = $arr[$pos] ;
647 3692         5688 $tmpline =~ s/\t/ /g;
648 3692         4306 my $len = length ($tmpline);
649             # if we don't want to have lines > $length (overflow==0), we
650             # need to verify what will happen with the next line. if we don't
651             # care if a single line goes longer, don't care about the next
652             # line.
653             # we also want this to be true for the first entry on the line
654 3692 100 100     8992 if ($pos_mod != 0 && $overflow == 0) {
655 3126         4062 my $tmpnext = $arr[$pos+1] ;
656 3126         3687 $tmpnext =~ s/\t/ /g;
657 3126         3628 $len += length ($tmpnext);
658             }
659              
660 3692 100       5037 if ($len <= $length) {
661             # if the length determined above is within bounds, go ahead and
662             # merge the next line with the current one
663 3572         5416 $arr[$pos] .= splice @arr, $pos+1, 1;
664 3572         6683 $pos_mod = 1;
665             }
666             else {
667             # ok, the current line is the right length, but there's more text!
668             # prep the current line and then go onto the next one
669              
670             # strip any trailing whitespace from the next line that's ready
671 120         699 $arr[$pos] =~ s/\s+$//;
672              
673             # go to the next line and reset pos_mod
674 120         218 $pos++;
675 120         176 $pos_mod = 0;
676              
677             # put the appropriate prefix at the front of the line
678 120         348 splice @arr, $pos, 0, $prefix;
679             }
680             }
681              
682             # go ahead and return the wrapped text, with the separator in between
683 349         1569 return join($sep, @arr);
684             }
685              
686             ###########################################################################
687              
688             # Some base64 decoders will remove intermediate "=" characters, others
689             # will stop decoding on the first "=" character, this one translates "="
690             # characters to null.
691             sub base64_decode {
692 7     7 0 90 local $_ = shift;
693 7         13 my $decoded_length = shift;
694              
695 7         637 s/\s+//g;
696 7 50 33     208 if (HAS_MIME_BASE64 && (length($_) % 4 == 0) &&
697             m|^(?:[A-Za-z0-9+/=]{2,}={0,2})$|s)
698             {
699             # only use MIME::Base64 when the XS and Perl are both correct and quiet
700 7         28 local $1;
701 7         37 s/(=+)(?!=*$)/'A' x length($1)/ge;
  0         0  
702              
703             # If only a certain number of bytes are requested, truncate the encoded
704             # version down to the appropriate size and return the requested bytes
705 7 50       20 if (defined $decoded_length) {
706 0         0 $_ = substr $_, 0, 4 * (int($decoded_length/3) + 1);
707 0         0 my $decoded = MIME::Base64::decode_base64($_);
708 0         0 return substr $decoded, 0, $decoded_length;
709             }
710              
711             # otherwise, just decode the whole thing and return it
712 7         841 return MIME::Base64::decode_base64($_);
713             }
714 0         0 tr{A-Za-z0-9+/=}{}cd; # remove non-base64 characters
715 0         0 s/=+$//; # remove terminating padding
716 0         0 tr{A-Za-z0-9+/=}{ -_}; # translate to uuencode
717 0 0       0 s/.$// if (length($_) % 4 == 1); # unpack cannot cope with extra byte
718              
719 0         0 my $length;
720 0         0 my $out = '';
721 0         0 while ($_) {
722 0 0       0 $length = (length >= 84) ? 84 : length;
723 0         0 $out .= unpack("u", chr(32 + $length * 3/4) . substr($_, 0, $length, ''));
724 0 0 0     0 last if (defined $decoded_length && length $out >= $decoded_length);
725             }
726              
727             # If only a certain number of bytes are requested, truncate the encoded
728             # version down to the appropriate size and return the requested bytes
729 0 0       0 if (defined $decoded_length) {
730 0         0 return substr $out, 0, $decoded_length;
731             }
732              
733 0         0 return $out;
734             }
735              
736             sub qp_decode {
737 14     14 0 48 my $str = $_[0];
738              
739             # RFC 2045: when decoding a Quoted-Printable body, any trailing
740             # white space on a line must be deleted
741 14         475 $str =~ s/[ \t]+(?=\r?\n)//gs;
742              
743 14         134 $str =~ s/=\r?\n//gs; # soft line breaks
744              
745             # RFC 2045 explicitly prohibits lowercase characters a-f in QP encoding
746             # do we really want to allow them???
747 14         44 local $1;
748 14         58 $str =~ s/=([0-9a-fA-F]{2})/chr(hex($1))/ge;
  111         399  
749              
750 14         101 return $str;
751             }
752              
753             sub base64_encode {
754 0     0 0 0 local $_ = shift;
755              
756 0         0 if (HAS_MIME_BASE64) {
757 0         0 return MIME::Base64::encode_base64($_);
758             }
759              
760 0         0 $_ = pack("u57", $_);
761 0         0 s/^.//mg;
762 0         0 tr| -_`|A-Za-z0-9+/A|; # -> #`# <- kluge against vim syntax issues
763 0         0 s/(A+)$/'=' x length $1/e;
  0         0  
764 0         0 return $_;
765             }
766              
767             ###########################################################################
768              
769             sub portable_getpwuid {
770 81 100   81 0 369 if (defined &Mail::SpamAssassin::Util::_getpwuid_wrapper) {
771 55         1806 return Mail::SpamAssassin::Util::_getpwuid_wrapper(@_);
772             }
773              
774 26         55 my $sts;
775 26 50       263 if (!RUNNING_ON_WINDOWS) {
776 26     81   1826 $sts = eval ' sub _getpwuid_wrapper { getpwuid($_[0]); }; 1 ';
  81         25010  
777             } else {
778 0         0 dbg("util: defining getpwuid() wrapper using 'unknown' as username");
779 0         0 $sts = eval ' sub _getpwuid_wrapper { _fake_getpwuid($_[0]); }; 1 ';
780             }
781 26 50       122 if (!$sts) {
782 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
783 0         0 warn "util: failed to define getpwuid() wrapper: $eval_stat\n";
784             } else {
785 26         385 return Mail::SpamAssassin::Util::_getpwuid_wrapper(@_);
786             }
787             }
788              
789             sub _fake_getpwuid {
790             return (
791 0     0   0 'unknown', # name,
792             'x', # passwd,
793             $_[0], # uid,
794             0, # gid,
795             '', # quota,
796             '', # comment,
797             '', # gcos,
798             '/', # dir,
799             '', # shell,
800             '', # expire
801             );
802             }
803              
804             ###########################################################################
805              
806             # Given a string, extract an IPv4 address from it. Required, since
807             # we currently have no way to portably unmarshal an IPv4 address from
808             # an IPv6 one without kludging elsewhere.
809             #
810             sub extract_ipv4_addr_from_string {
811 0     0 0 0 my ($str) = @_;
812              
813 0 0       0 return unless defined($str);
814              
815 0 0       0 if ($str =~ /\b(
816             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
817             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
818             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.
819             (?:1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)
820             )\b/ix)
821             {
822 0 0       0 if (defined $1) { return $1; }
  0         0  
823             }
824              
825             # ignore native IPv6 addresses;
826             # TODO, eventually, once IPv6 spam starts to appear ;)
827 0         0 return;
828             }
829              
830             ###########################################################################
831              
832             {
833             my($hostname, $fq_hostname);
834              
835             # get the current host's unqalified domain name (better: return whatever
836             # Sys::Hostname thinks our hostname is, might also be a full qualified one)
837             sub hostname {
838 8 50   8 0 35 return $hostname if defined($hostname);
839              
840             # Sys::Hostname isn't taint safe and might fall back to `hostname`. So we've
841             # got to clean PATH before we may call it.
842 8         39 clean_path_in_taint_mode();
843 8         76 $hostname = Sys::Hostname::hostname();
844 8         193 $hostname =~ s/[()]//gs; # bug 5929
845 8         23 return $hostname;
846             }
847              
848             # get the current host's fully-qualified domain name, if possible. If
849             # not possible, return the unqualified hostname.
850             sub fq_hostname {
851 72 100   72 0 529 return $fq_hostname if defined($fq_hostname);
852              
853 8         40 $fq_hostname = hostname();
854 8 50       59 if ($fq_hostname !~ /\./) { # hostname doesn't contain a dot, so it can't be a FQDN
855             my @names = grep(/^\Q${fq_hostname}.\E/o, # grep only FQDNs
856 8         1669 map { split } (gethostbyname($fq_hostname))[0 .. 1] # from all aliases
  16         235  
857             );
858 8 50       48 $fq_hostname = $names[0] if (@names); # take the first FQDN, if any
859 8         36 $fq_hostname =~ s/[()]//gs; # bug 5929
860             }
861              
862 8         36 return $fq_hostname;
863             }
864             }
865              
866             ###########################################################################
867              
868             sub ips_match_in_16_mask {
869 0     0 0 0 my ($ipset1, $ipset2) = @_;
870 0         0 my ($b1, $b2);
871              
872 0         0 foreach my $ip1 (@{$ipset1}) {
  0         0  
873 0         0 foreach my $ip2 (@{$ipset2}) {
  0         0  
874 0 0       0 next unless defined $ip1;
875 0 0       0 next unless defined $ip2;
876 0 0       0 next unless ($ip1 =~ /^(\d+\.\d+\.)/); $b1 = $1;
  0         0  
877 0 0       0 next unless ($ip2 =~ /^(\d+\.\d+\.)/); $b2 = $1;
  0         0  
878 0 0       0 if ($b1 eq $b2) { return 1; }
  0         0  
879             }
880             }
881              
882 0         0 return 0;
883             }
884              
885             sub ips_match_in_24_mask {
886 0     0 0 0 my ($ipset1, $ipset2) = @_;
887 0         0 my ($b1, $b2);
888              
889 0         0 foreach my $ip1 (@{$ipset1}) {
  0         0  
890 0         0 foreach my $ip2 (@{$ipset2}) {
  0         0  
891 0 0       0 next unless defined $ip1;
892 0 0       0 next unless defined $ip2;
893 0 0       0 next unless ($ip1 =~ /^(\d+\.\d+\.\d+\.)/); $b1 = $1;
  0         0  
894 0 0       0 next unless ($ip2 =~ /^(\d+\.\d+\.\d+\.)/); $b2 = $1;
  0         0  
895 0 0       0 if ($b1 eq $b2) { return 1; }
  0         0  
896             }
897             }
898              
899 0         0 return 0;
900             }
901              
902             ###########################################################################
903              
904             # Given a quad-dotted IPv4 address or an IPv6 address, reverses the order
905             # of its bytes (IPv4) or nibbles (IPv6), joins them with dots, producing
906             # a string suitable for reverse DNS lookups. Returns undef in case of a
907             # syntactically invalid IP address.
908             #
909             sub reverse_ip_address {
910 116     116 0 219 my ($ip) = @_;
911              
912 116         166 my $revip;
913 116         425 local($1,$2,$3,$4);
914 116 100 33     689 if ($ip =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\z/) {
    50          
    50          
915 108         416 $revip = "$4.$3.$2.$1";
916             } elsif ($ip !~ /:/ || $ip !~ /^[0-9a-fA-F:.]{2,}\z/) { # triage
917             # obviously unrecognized syntax
918             } elsif (!NetAddr::IP->can('full6')) { # since NetAddr::IP 4.010
919 0         0 info("util: version of NetAddr::IP is too old, IPv6 not supported");
920             } else {
921             # looks like an IPv6 address, let NetAddr::IP check the details
922 8         38 my $ip_obj = NetAddr::IP->new6($ip);
923 8 50       1949 if (defined $ip_obj) { # valid IPv6 address
924             # RFC 5782 section 2.4.
925 8         41 $revip = lc $ip_obj->network->full6; # string in a canonical form
926 8         313 $revip =~ s/://g;
927 8         104 $revip = join('.', reverse split(//,$revip));
928             }
929             }
930 116         434 return $revip;
931             }
932              
933             ###########################################################################
934              
935 152     152 0 827 sub my_inet_aton { unpack("N", pack("C4", split(/\./, $_[0]))) }
936              
937             ###########################################################################
938              
939             sub decode_dns_question_entry {
940             # decodes a Net::DNS::Packet->question entry,
941             # returning a triple: class, type, label
942             #
943 26     26 0 34 my $q = $_[0];
944 26         76 my $qname = $q->qname;
945              
946             # Bug 6959, Net::DNS flags a domain name in a query section as utf8, while
947             # still keeping it "RFC 1035 zone file format"-encoded, silly and harmful
948 26 50       1299 utf8::encode($qname) if utf8::is_utf8($qname); # since Perl 5.8.1
949              
950 26         66 local $1;
951             # Net::DNS provides a query in encoded RFC 1035 zone file format, decode it!
952 26 0       59 $qname =~ s{ \\ ( [0-9]{3} | [^0-9] ) }
  0 0       0  
953 26         80 { length($1)==1 ? $1 : $1 <= 255 ? chr($1) : "\\$1" }xgse;
954             return ($q->qclass, $q->qtype, $qname);
955             }
956              
957             ###########################################################################
958              
959             sub parse_content_type {
960             # This routine is typically called by passing a
961             # get_header("content-type") which passes all content-type headers
962             # (array context). If there are multiple Content-type headers (invalid,
963             # but it happens), MUAs seem to take the last one and so that's what we
964             # should do here.
965 334   100 334 0 1359 #
966             my $ct = $_[-1] || 'text/plain; charset=us-ascii';
967              
968             # This could be made a bit more rigid ...
969             # the actual ABNF, BTW (RFC 1521, section 7.2.1):
970             # boundary := 0*69<bchars> bcharsnospace
971             # bchars := bcharsnospace / " "
972             # bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /"_"
973             # / "," / "-" / "." / "/" / ":" / "=" / "?"
974             #
975             # The boundary may be surrounded by double quotes.
976             # "the boundary parameter, which consists of 1 to 70 characters from
977             # a set of characters known to be very robust through email gateways,
978             # and NOT ending with white space. (If a boundary appears to end with
979             # white space, the white space must be presumed to have been added by
980             # a gateway, and must be deleted.)"
981             #
982             # In practice:
983             # - MUAs accept whitespace before and after the "=" character
984             # - only an opening double quote seems to be needed
985             # - non-quoted boundaries should be followed by space, ";", or end of line
986             # - blank boundaries seem to not work
987 334         1494 #
988             my($boundary) = $ct =~ m!\bboundary\s*=\s*("[^"]+|[^\s";]+(?=[\s;]|$))!i;
989              
990             # remove double-quotes in boundary (should only be at start and end)
991 334 100       854 #
992             $boundary =~ tr/"//d if defined $boundary;
993              
994             # Parse out the charset and name, if they exist.
995 334         1866 #
996 334         1709 my($charset) = $ct =~ /\bcharset\s*=\s*["']?(.*?)["']?(?:;|$)/i;
997             my($name) = $ct =~ /\b(?:file)?name\s*=\s*["']?(.*?)["']?(?:;|$)/i;
998              
999             # RFC 2231 section 3: Parameter Value Continuations
1000             # support continuations for name values
1001 334 50 66     2166 #
1002             if (!$name && $ct =~ /\b(?:file)?name\*0\s*=/i) {
1003 0         0  
1004 0         0 my @name;
1005             $name[$1] = $2
1006             while ($ct =~ /\b(?:file)?name\*(\d+)\s*=\s*["']?(.*?)["']?(?:;|$)/ig);
1007 0         0  
1008             $name = join "", grep defined, @name;
1009             }
1010              
1011             # Get the actual MIME type out ...
1012             # Note: the header content may not be whitespace unfolded, so make sure the
1013             # REs do /s when appropriate.
1014             # correct:
1015             # Content-type: text/plain; charset=us-ascii
1016             # missing a semi-colon, CT shouldn't have whitespace anyway:
1017             # Content-type: text/plain charset=us-ascii
1018 334         832 #
1019 334         1270 $ct =~ s/^\s+//; # strip leading whitespace
1020 334         1856 $ct =~ s/;.*$//s; # strip everything after first ';'
1021 334         745 $ct =~ s@^([^/]+(?:/[^/\s]*)?).*$@$1@s; # only something/something ...
1022             $ct = lc $ct;
1023              
1024             # bug 4298: If at this point we don't have a content-type, assume text/plain;
1025             # also, bug 5399: if the content-type *starts* with "text", and isn't in a
1026 334 100 100     2532 # list of known bad/non-plain formats, do likewise.
      66        
1027             if (!$ct ||
1028             ($ct =~ /^text\b/ && $ct !~ /^text\/(?:x-vcard|calendar|html)$/))
1029 245         486 {
1030             $ct = "text/plain";
1031             }
1032              
1033 334         816 # strip inappropriate chars (bug 5399: after the text/plain fixup)
1034             $ct =~ tr/\000-\040\177-\377\042\050\051\054\072-\077\100\133-\135//d;
1035              
1036             # Now that the header has been parsed, return the requested information.
1037             # In scalar context, just the MIME type, in array context the
1038             # four important data parts (type, boundary, charset, and filename).
1039 334 50       1763 #
1040             return wantarray ? ($ct,$boundary,$charset,$name) : $ct;
1041             }
1042              
1043             ###########################################################################
1044              
1045 8     8 0 18 sub url_encode {
1046 8         89 my ($url) = @_;
1047 8         17 my (@characters) = split(/(\%[0-9a-fA-F]{2})/, $url);
1048             my (@unencoded);
1049             my (@encoded);
1050 8         23  
1051             foreach (@characters) {
1052 141 100       249 # escaped character set ...
1053             if (/\%[0-9a-fA-F]{2}/) {
1054             # IF it is in the range of 0x00-0x20 or 0x7f-0xff
1055             # or it is one of "<", ">", """, "#", "%",
1056             # ";", "/", "?", ":", "@", "=" or "&"
1057 68 50       159 # THEN preserve its encoding
1058 68         165 unless (/(20|7f|[0189a-fA-F][0-9a-fA-F])/i) {
  68         184  
1059 68         130 s/\%([2-7][0-9a-fA-F])/sprintf "%c", hex($1)/e;
1060             push(@unencoded, $_);
1061             }
1062             }
1063             # other stuff
1064             else {
1065             # no re "strict"; # since perl 5.21.8
1066 73         93 # 0x00-0x20, 0x7f-0xff, ", %, <, >
1067 0 0       0 s/([\000-\040\177-\377\042\045\074\076])
1068             /push(@encoded, $1) && sprintf "%%%02x", unpack("C",$1)/egx;
1069             }
1070 8 50       20 }
1071 0         0 if (wantarray) {
1072             return(join("", @characters), join("", @unencoded), join("", @encoded));
1073             }
1074 8         46 else {
1075             return join("", @characters);
1076             }
1077             }
1078              
1079             ###########################################################################
1080              
1081             =item $module = first_available_module (@module_list)
1082              
1083             Return the name of the first module that can be successfully loaded with
1084             C<require> from the list. Returns C<undef> if none are available.
1085              
1086             This is used instead of C<AnyDBM_File> as follows:
1087              
1088             my $module = Mail::SpamAssassin::Util::first_available_module
1089             (qw(DB_File GDBM_File NDBM_File SDBM_File));
1090             tie %hash, $module, $path, [... args];
1091              
1092             Note that C<SDBM_File> is guaranteed to be present, since it comes
1093             with Perl.
1094              
1095             =cut
1096              
1097 6     6 1 24 sub first_available_module {
1098 6         23 my (@packages) = @_;
1099 18 100       899 foreach my $mod (@packages) {
1100 6         47 if (eval 'require '.$mod.'; 1; ') {
1101             return $mod;
1102             }
1103 0         0 }
1104             undef;
1105             }
1106              
1107             ###########################################################################
1108              
1109             =item my ($filepath, $filehandle) = secure_tmpfile();
1110              
1111             Generates a filename for a temporary file, opens it exclusively and
1112             securely, and returns a filehandle to the open file (opened O_RDWR).
1113              
1114             If it cannot open a file after 20 tries, it returns C<undef>.
1115              
1116             =cut
1117              
1118             # thanks to http://www2.picante.com:81/~gtaylor/autobuse/ for this code
1119 61   33 61 1 3640 sub secure_tmpfile {
1120             my $tmpdir = untaint_file_path($ENV{'TMPDIR'} || File::Spec->tmpdir());
1121 61 50 33     620  
1122             defined $tmpdir && $tmpdir ne ''
1123             or die "util: cannot find a temporary directory, set TMP or TMPDIR in environment";
1124 61 50       2825  
1125 61 50       894 opendir(my $dh, $tmpdir) or die "Could not open directory $tmpdir: $!";
1126             closedir $dh or die "Error closing directory $tmpdir: $!";
1127 61         195  
1128 61         209 my ($reportfile, $tmpfh);
1129             for (my $retries = 20; $retries > 0; $retries--) {
1130             # we do not rely on the obscurity of this name for security,
1131 61         838 # we use a average-quality PRG since this is all we need
1132             my $suffix = join('', (0..9,'A'..'Z','a'..'z')[rand 62, rand 62, rand 62,
1133 61         1624 rand 62, rand 62, rand 62]);
1134             $reportfile = File::Spec->catfile($tmpdir,".spamassassin${$}${suffix}tmp");
1135              
1136             # instead, we require O_EXCL|O_CREAT to guarantee us proper
1137 61 50       5499 # ownership of our file, read the open(2) man page
1138 61 50       371 if (sysopen($tmpfh, $reportfile, O_RDWR|O_CREAT|O_EXCL, 0600)) {
1139 61         158 binmode $tmpfh or die "cannot set $reportfile to binmode: $!";
1140             last;
1141 0         0 }
1142             my $errno = $!;
1143              
1144 0 0       0 # ensure the file handle is not semi-open in some way
1145 0 0       0 if ($tmpfh) {
1146 0         0 if (! close $tmpfh) {
1147 0         0 info("error closing $reportfile: $!");
1148             undef $tmpfh;
1149             }
1150             }
1151              
1152 0 0       0 # it is acceptable if $tmpfh already exists, try another
1153             next if $errno == EEXIST;
1154              
1155             # error, maybe "out of quota", "too many open files", "Permission denied"
1156 0         0 # (bug 4017); makes no sense retrying
1157             die "util: failed to create a temporary file '$reportfile': $errno";
1158             }
1159 61 50       206  
1160 0         0 if (!$tmpfh) {
1161 0         0 warn "util: secure_tmpfile failed to create a temporary file, giving up";
1162             return;
1163             }
1164 61         784  
1165 61         733 dbg("util: secure_tmpfile created a temporary file %s", $reportfile);
1166             return ($reportfile, $tmpfh);
1167             }
1168              
1169             =item my ($dirpath) = secure_tmpdir();
1170              
1171             Generates a directory for temporary files. Creates it securely and
1172             returns the path to the directory.
1173              
1174             If it cannot create a directory after 20 tries, it returns C<undef>.
1175              
1176             =cut
1177              
1178             # stolen from secure_tmpfile()
1179 0     0 1 0 sub secure_tmpdir {
1180             my $tmpdir = untaint_file_path(File::Spec->tmpdir());
1181 0 0       0  
1182             if (!$tmpdir) {
1183             # Note: we would prefer to keep this fatal, as not being able to
1184             # find a writable tmpdir is a big deal for the calling code too.
1185 0         0 # That would be quite a psychotic case, also.
1186 0         0 warn "util: cannot find a temporary directory, set TMP or TMPDIR in environment";
1187             return;
1188             }
1189 0         0  
1190 0         0 my ($reportpath, $tmppath);
1191             my $umask = umask 077;
1192 0         0  
1193             for (my $retries = 20; $retries > 0; $retries--) {
1194             # we do not rely on the obscurity of this name for security,
1195 0         0 # we use a average-quality PRG since this is all we need
1196             my $suffix = join('', (0..9,'A'..'Z','a'..'z')[rand 62, rand 62, rand 62,
1197 0         0 rand 62, rand 62, rand 62]);
1198             $reportpath = File::Spec->catfile($tmpdir,".spamassassin${$}${suffix}tmp");
1199              
1200             # instead, we require O_EXCL|O_CREAT to guarantee us proper
1201 0 0       0 # ownership of our file, read the open(2) man page
1202 0         0 if (mkdir $reportpath, 0700) {
1203 0         0 $tmppath = $reportpath;
1204             last;
1205             }
1206 0 0       0  
1207             if ($!{EEXIST}) {
1208 0         0 # it is acceptable if $reportpath already exists, try another
1209             next;
1210             }
1211            
1212 0         0 # error, maybe "out of quota" or "too many open files" (bug 4017)
1213             warn "util: secure_tmpdir failed to create file '$reportpath': $!\n";
1214             }
1215 0         0  
1216             umask $umask;
1217 0 0       0  
1218             warn "util: secure_tmpdir failed to create a directory, giving up" if (!$tmppath);
1219 0         0  
1220             return $tmppath;
1221             }
1222              
1223              
1224             ###########################################################################
1225              
1226             ##
1227             ## DEPRECATED FUNCTION, sub uri_to_domain removed.
1228             ## Replaced with Mail::SpamAssassin::RegistryBoundaries::uri_to_domain.
1229             ##
1230              
1231             *uri_list_canonify = \&uri_list_canonicalize; # compatibility alias
1232 2061     2061 0 8620 sub uri_list_canonicalize {
1233             my($redirector_patterns, @uris) = @_;
1234              
1235 2061         2156 # make sure we catch bad encoding tricks
1236 2061         2738 my @nuris;
1237             for my $uri (@uris) {
1238             # we're interested in http:// and so on, skip mailto: and
1239 2073 100 100     7256 # email addresses with no protocol
1240             next if $uri =~ /^mailto:/i || $uri =~ /^[^:]*\@/;
1241              
1242 2010         2676 # sometimes we catch URLs on multiple lines
1243             $uri =~ s/\n//g;
1244              
1245 2010         2814 # URLs won't have leading/trailing whitespace
1246 2010         2542 $uri =~ s/^\s+//;
1247             $uri =~ s/\s+$//;
1248              
1249 2010         2367 # CRs just confuse things down below, so trash them now
1250             $uri =~ s/\r//g;
1251              
1252 2010         2006 # Make a copy so we don't trash the original in the array
1253             my $nuri = $uri;
1254              
1255             # bug 4390: certain MUAs treat back slashes as front slashes.
1256             # since backslashes are supposed to be encoded in a URI, swap non-encoded
1257 2010         3215 # ones with front slashes.
1258             $nuri =~ tr{\\}{/};
1259              
1260 2010         8183 # http:www.foo.biz -> http://www.foo.biz
1261             $nuri =~ s{^(https?:)/{0,2}}{$1//}i;
1262              
1263             # *always* make a dup with all %-encoding decoded, since
1264             # important parts of the URL may be encoded (such as the
1265 2010 100       4024 # scheme). (bug 4213)
1266 8         27 if ($nuri =~ /%[0-9a-fA-F]{2}/) {
1267             $nuri = Mail::SpamAssassin::Util::url_encode($nuri);
1268             }
1269              
1270             # www.foo.biz -> http://www.foo.biz
1271 2010 100       4402 # unschemed URIs: assume default of "http://" as most MUAs do
1272 378 100       572 if ($nuri !~ /^[-_a-z0-9]+:/i) {
1273 2         9 if ($nuri =~ /^ftp\./) {
1274             $nuri =~ s{^}{ftp://}g;
1275             }
1276 376         1088 else {
1277             $nuri =~ s{^}{http://}g;
1278             }
1279             }
1280              
1281 2010         4554 # http://www.foo.biz?id=3 -> http://www.foo.biz/?id=3
1282             $nuri =~ s{^(https?://[^/?]+)\?}{$1/?}i;
1283              
1284             # deal with encoding of chars, this is just the set of printable
1285 2010         2772 # chars minus ' ' (that is, dec 33-126, hex 21-7e)
  0         0  
1286 2010         2163 $nuri =~ s/\&\#0*(3[3-9]|[4-9]\d|1[01]\d|12[0-6]);/sprintf "%c",$1/ge;
  0         0  
1287             $nuri =~ s/\&\#x0*(2[1-9]|[3-6][a-fA-F0-9]|7[0-9a-eA-E]);/sprintf "%c",hex($1)/ge;
1288              
1289 2010 100       3043 # put the new URI on the new list if it's different
1290 393         542 if ($nuri ne $uri) {
1291             push(@nuris, $nuri);
1292             }
1293              
1294 2010 100       12897 # deal with wierd hostname parts, remove user/pass, etc.
1295 1990         5407 if ($nuri =~ m{^(https?://)([^/]+?)((?::\d*)?\/.*)?$}i) {
1296             my($proto, $host, $rest) = ($1,$2,$3);
1297              
1298 1990   100     5163 # not required
1299             $rest ||= '';
1300              
1301             # Bug 6751:
1302             # RFC 3490 (IDNA): Whenever dots are used as label separators, the
1303             # following characters MUST be recognized as dots: U+002E (full stop),
1304             # U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
1305             # U+FF61 (halfwidth ideographic full stop).
1306             # RFC 5895: [...] the IDEOGRAPHIC FULL STOP character (U+3002)
1307             # can be mapped to the FULL STOP before label separation occurs.
1308             # [...] Only the IDEOGRAPHIC FULL STOP character (U+3002) is added in
1309             # this mapping because the authors have not fully investigated [...]
1310             # Adding also 'SMALL FULL STOP' (U+FE52) as seen in the wild.
1311 1990 100       5158 # Parhaps also the 'ONE DOT LEADER' (U+2024).
1312             if ($host =~ s{(?: \xE3\x80\x82 | \xEF\xBC\x8E | \xEF\xBD\xA1 |
1313 10         27 \xEF\xB9\x92 | \xE2\x80\xA4 )}{.}xgs) {
1314             push(@nuris, join ('', $proto, $host, $rest));
1315             }
1316              
1317             # bug 4146: deal with non-US ASCII 7-bit chars in the host portion
1318             # of the URI according to RFC 1738 that's invalid, and the tested
1319 1990 50       3601 # browsers (Firefox, IE) remove them before usage...
1320 0         0 if ($host =~ tr/\000-\040\200-\377//d) {
1321             push(@nuris, join ('', $proto, $host, $rest));
1322             }
1323              
1324             # deal with http redirectors. strip off one level of redirector
1325             # and add back to the array. the foreach loop will go over those
1326             # and deal appropriately.
1327              
1328             # Bug 7278: try redirector pattern matching first
1329 1990         2543 # (but see also Bug 4176)
1330 1990         1928 my $found_redirector_match;
  1990         2911  
1331 13816 100       33079 foreach my $re (@{$redirector_patterns}) {
1332 4 50       13 if ("$proto$host$rest" =~ $re) {
1333 4         22 next unless defined $1;
1334 4         16 dbg("uri: parsed uri pattern: $re");
1335 4         9 dbg("uri: parsed uri found: $1 in redirector: $proto$host$rest");
1336 4         6 push (@uris, $1);
1337 4         8 $found_redirector_match = 1;
1338             last;
1339             }
1340 1990 100       3259 }
1341             if (!$found_redirector_match) {
1342             # try generic https? check if redirector pattern matching failed
1343 1986 100       3544 # bug 3308: redirectors like yahoo only need one '/' ... <grrr>
1344 6         16 if ($rest =~ m{(https?:/{0,2}.+)$}i) {
1345 6         23 push(@uris, $1);
1346             dbg("uri: parsed uri found: $1 in hard-coded redirector");
1347             }
1348             }
1349              
1350             ########################
1351             ## TVD: known issue, if host has multiple combinations of the following,
1352             ## all permutations will be put onto @nuris. shouldn't be an issue.
1353              
1354             # Get rid of cruft that could cause confusion for rules...
1355              
1356 1990 100       3326 # remove "www.fakehostname.com@" username part
1357 9         35 if ($host =~ s/^[^\@]+\@//gs) {
1358             push(@nuris, join ('', $proto, $host, $rest));
1359             }
1360              
1361             # bug 3186: If in a sentence, we might pick up odd characters ...
1362             # ie: "visit http://example.biz." or "visit http://example.biz!!!"
1363             # the host portion should end in some form of alpha-numeric, strip off
1364 1990 100       4503 # the rest.
1365 1         4 if ($host =~ s/[^0-9A-Za-z]+$//) {
1366             push(@nuris, join ('', $proto, $host, $rest));
1367             }
1368              
1369             ########################
1370              
1371             # deal with hosts which are IPs
1372             # also handle things like:
1373             # http://89.0x00000000000000000000068.0000000000000000000000160.0x00000000000011
1374             # both hex (0x) and oct (0+) encoded octets, etc.
1375 1990 100       7560  
    100          
    100          
1376             if ($host =~ /^
1377             ((?:0x[0-9a-f]+|\d+)\.)
1378             ((?:0x[0-9a-f]+|\d+)\.)
1379             ((?:0x[0-9a-f]+|\d+)\.)
1380             (0x[0-9a-f]+|\d+)
1381             $/ix)
1382 67         348 {
1383 67         128 my @chunk = ($1,$2,$3,$4);
1384 268         414 foreach my $octet (@chunk) {
  22         84  
1385 268         324 $octet =~ s/^0x([0-9a-f]+)/sprintf "%d",hex($1)/gei;
  3         11  
1386 268         351 $octet =~ s/^0+([1-3][0-7]{0,2}|[4-7][0-7]?)\b/sprintf "%d",oct($1)/ge;
1387             $octet =~ s/^0+//;
1388 67         348 }
1389             push(@nuris, join ('', $proto, @chunk, $rest));
1390             }
1391              
1392             # "http://0x7f000001/"
1393             elsif ($host =~ /^0x[0-9a-f]+$/i) {
1394 6         34 # only take last 4 octets
  6         39  
1395 6         24 $host =~ s/^0x[0-9a-f]*?([0-9a-f]{1,8})$/sprintf "%d",hex($1)/gei;
1396             push(@nuris, join ('', $proto, decode_ulong_to_ip($host), $rest));
1397             }
1398              
1399             # "http://1113343453/"
1400 20         71 elsif ($host =~ /^[0-9]+$/) {
1401             push(@nuris, join ('', $proto, decode_ulong_to_ip($host), $rest));
1402             }
1403              
1404             }
1405             }
1406              
1407 2061         3069 # remove duplicates, merge nuris and uris
  2579         6678  
1408             my %uris = map { $_ => 1 } @uris, @nuris;
1409 2061         7041  
1410             return keys %uris;
1411             }
1412              
1413 26     26 0 374 sub decode_ulong_to_ip {
1414             return join(".", unpack("CCCC",pack("H*", sprintf "%08lx", $_[0])));
1415             }
1416              
1417             ###########################################################################
1418              
1419 10     10 0 32 sub first_date {
1420             my (@strings) = @_;
1421 10         52  
1422 10         71 foreach my $string (@strings) {
1423 10 50 33     80 my $time = parse_rfc822_date($string);
1424             return $time if defined($time) && $time;
1425 0         0 }
1426             return;
1427             }
1428              
1429 10     10 0 40 sub receive_date {
1430             my ($header) = @_;
1431 10   50     30  
1432 10         80 $header ||= '';
1433             $header =~ s/\n[ \t]+/ /gs; # fix continuation lines
1434 10         149  
1435 10         27 my @rcvd = ($header =~ /^Received:(.*)/img);
1436             my @local;
1437             my $time;
1438 10 100       46  
1439 8 50 33     119 if (@rcvd) {
1440             if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||
1441             $rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)
1442 0         0 {
1443             push @local, (shift @rcvd);
1444 8 50 33     92 }
    50          
1445 0         0 if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
1446             push @local, (shift @rcvd);
1447             }
1448 0         0 elsif (@local) {
1449             unshift @rcvd, (shift @local);
1450             }
1451             }
1452 10 100       43  
1453 8         44 if (@rcvd) {
1454 8 50       55 $time = first_date(shift @rcvd);
1455             return $time if defined($time);
1456 2 50       12 }
1457 0         0 if (@local) {
1458 0 0       0 $time = first_date(@local);
1459             return $time if defined($time);
1460 2 50       36 }
1461 0         0 if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {
1462 0 0       0 my $string = $1;
1463 0         0 $string .= " ".local_tz() unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
1464 0 0       0 $time = first_date($string);
1465             return $time if defined($time);
1466 2 50       15 }
1467 0         0 if (@rcvd) {
1468 0 0       0 $time = first_date(@rcvd);
1469             return $time if defined($time);
1470 2 50       20 }
1471 0         0 if ($header =~ /^Resent-Date:\s*(.+)$/im) {
1472 0 0       0 $time = first_date($1);
1473             return $time if defined($time);
1474 2 50       18 }
1475 2         12 if ($header =~ /^Date:\s*(.+)$/im) {
1476 2 50       16 $time = first_date($1);
1477             return $time if defined($time);
1478             }
1479 0         0  
1480             return time;
1481             }
1482              
1483             ###########################################################################
1484 0     0 0 0 sub get_user_groups {
1485 0         0 my $suid = shift;
1486 0         0 dbg("get_user_groups: uid is $suid\n");
1487 0         0 my ( $user, $passwd, $uid, $gid, $quota, $comment, $gcos, $dir, $shell, $expire ) = getpwuid($suid);
1488 0         0 my $rgids="$gid ";
1489 0 0       0 while ( my($name,$pw,$gid,$members) = getgrent() ) {
1490 0         0 if ( $members =~ m/\b$user\b/ ) {
1491 0         0 $rgids .= "$gid ";
1492             dbg("get_user_groups: added $gid ($name) to group list which is now: $rgids\n");
1493             }
1494 0         0 }
1495 0         0 endgrent;
1496 0         0 chop $rgids;
1497             return ($rgids);
1498             }
1499              
1500              
1501              
1502 0 0   0 0 0 sub setuid_to_euid {
1503             return if (RUNNING_ON_WINDOWS);
1504              
1505 0         0 # remember the target uid, the first number is the important one
1506 0         0 my $touid = $>;
1507 0         0 my $gids = get_user_groups($touid);
1508 0 0       0 my ( $pgid, $supgs ) = split (' ',$gids,2);
1509 0 0       0 defined $supgs or $supgs=$pgid;
1510             if ($( != $pgid) {
1511 0         0 # Gotta be root for any of this to work
1512 0         0 $> = 0 ;
1513 0         0 dbg("util: changing real primary gid from $( to $pgid and supplemental groups to $supgs to match effective uid $touid");
1514 0         0 POSIX::setgid($pgid);
1515 0         0 dbg("util: POSIX::setgid($pgid) set errno to $!");
1516 0         0 $! = 0;
1517 0         0 $( = $pgid;
1518 0         0 $) = "$pgid $supgs";
1519             dbg("util: assignment \$) = $pgid $supgs set errno to $!");
1520 0 0       0 }
1521 0         0 if ($< != $touid) {
1522             dbg("util: changing real uid from $< to match effective uid $touid");
1523             # bug 3586: kludges needed to work around platform dependent behavior assigning to $<
1524 0         0 # The POSIX functions deal with that so just use it here
1525 0         0 POSIX::setuid($touid);
  0         0  
1526             $< = $touid; $> = $touid; # bug 5574
1527              
1528 0 0       0 # Check that we have now accomplished the setuid: catch bug 3586 if it comes back
1529             if ($< != $touid) {
1530 0         0 # keep this fatal: it's a serious security problem if it fails
1531             die "util: setuid $< to $touid failed!";
1532             }
1533             }
1534             }
1535              
1536             # helper app command-line open
1537 0 0   0 0 0 sub helper_app_pipe_open {
1538 0         0 if (RUNNING_ON_WINDOWS) {
1539             return helper_app_pipe_open_windows (@_);
1540 0         0 } else {
1541             return helper_app_pipe_open_unix (@_);
1542             }
1543             }
1544              
1545 0     0 0 0 sub helper_app_pipe_open_windows {
1546             my ($fh, $stdinfile, $duperr2out, @cmdline) = @_;
1547              
1548 0         0 # use a traditional open(FOO, "cmd |")
1549 0 0       0 my $cmd = join(' ', @cmdline);
  0         0  
1550 0 0       0 if ($stdinfile) { $cmd .= qq/ < "$stdinfile"/; }
  0         0  
1551 0         0 if ($duperr2out) { $cmd .= " 2>&1"; }
1552             return open ($fh, $cmd.'|');
1553             }
1554              
1555 0     0 0 0 sub force_die {
1556             my ($msg) = @_;
1557              
1558             # note use of eval { } scope in logging -- paranoia to ensure that a broken
1559             # $SIG{__WARN__} implementation will not interfere with the flow of control
1560 0         0 # here, where we *have* to die.
  0         0  
1561 0         0 eval { warn $msg }; # hmm, STDERR may no longer be open
  0         0  
1562             eval { dbg("util: force_die: $msg") };
1563 0         0  
1564 0         0 POSIX::_exit(6); # avoid END and destructor processing
1565             kill('KILL',$$); # still kicking? die!
1566             }
1567              
1568 0     0 0 0 sub helper_app_pipe_open_unix {
1569             my ($fh, $stdinfile, $duperr2out, @cmdline) = @_;
1570 0         0  
1571             my $pid;
1572             # do a fork-open, so we can setuid() back
1573 0         0 eval {
  0         0  
1574 0 0       0 $pid = open ($fh, '-|'); 1;
1575 0 0       0 } or do {
  0         0  
1576 0         0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
1577             die "util: cannot fork: $eval_stat";
1578 0 0       0 };
1579             if (!defined $pid) {
1580 0         0 # acceptable to die() here, calling code catches it
1581             die "util: cannot open a pipe to a forked process: $!";
1582             }
1583 0 0       0  
1584 0         0 if ($pid != 0) {
1585             return $pid; # parent process; return the child pid
1586             }
1587              
1588             # else, child process.
1589              
1590             # from now on, we cannot die(), it could create a cloned process
1591 0         0 # use force_die() instead (bug 4370, cmt 2)
1592             eval {
1593 0         0 # go setuid...
1594 0         0 setuid_to_euid();
1595             info("util: setuid: ruid=$< euid=$> rgid=$( egid=$) ");
1596              
1597             # now set up the fds. due to some wierdness, we may have to ensure that
1598             # we *really* close the correct fd number, since some other code may have
1599             # redirected the meaning of STDOUT/STDIN/STDERR it seems... (bug 3649).
1600             # use POSIX::close() for that. it's safe to call close() and POSIX::close()
1601             # on the same fd; the latter is a no-op in that case.
1602 0 0       0  
1603             if (!$stdinfile) { # < $tmpfile
1604 0         0 # ensure we have *some* kind of fd 0.
1605             $stdinfile = "/dev/null";
1606             }
1607 0         0  
1608 0 0       0 my $f = fileno(STDIN);
1609             close STDIN or die "error closing STDIN: $!";
1610              
1611 0 0       0 # sanity: was that the *real* STDIN? if not, close that one too ;)
1612 0         0 if ($f != 0) {
1613             POSIX::close(0);
1614             }
1615 0 0       0  
1616             open (STDIN, "<$stdinfile") or die "cannot open $stdinfile: $!";
1617              
1618             # this should be impossible; if we just closed fd 0, UNIX
1619             # fd behaviour dictates that the next fd opened (the new STDIN)
1620             # will be the lowest unused fd number, which should be 0.
1621 0 0       0 # so die with a useful error if this somehow isn't the case.
1622 0         0 if (fileno(STDIN) != 0) {
1623             die "oops: fileno(STDIN) [".fileno(STDIN)."] != 0";
1624             }
1625              
1626             # Ensure STDOUT is open. As we just created a pipe to ensure this, it has
1627             # to be open to that pipe, and if it isn't, something's seriously screwy.
1628             # Update: actually, this fails! see bug 3649 comment 37. For some reason,
1629             # fileno(STDOUT) can be 0; possibly because open("-|") didn't change the fh
1630             # named STDOUT, instead changing fileno(1) directly. So this is now
1631             # commented.
1632             # if (fileno(STDOUT) != 1) {
1633             # die "setuid: oops: fileno(STDOUT) [".fileno(STDOUT)."] != 1";
1634             # }
1635 0         0  
1636             STDOUT->autoflush(1);
1637 0 0       0  
1638 0         0 if ($duperr2out) { # 2>&1
1639 0 0       0 my $f = fileno(STDERR);
1640             close STDERR or die "error closing STDERR: $!";
1641              
1642 0 0       0 # sanity: was that the *real* STDERR? if not, close that one too ;)
1643 0         0 if ($f != 2) {
1644             POSIX::close(2);
1645             }
1646 0 0       0  
1647 0         0 open (STDERR, ">&STDOUT") or die "dup STDOUT failed: $!";
1648             STDERR->autoflush(1); # make sure not to lose diagnostics if exec fails
1649              
1650 0 0       0 # STDERR must be fd 2 to be useful to subprocesses! (bug 3649)
1651 0         0 if (fileno(STDERR) != 2) {
1652             die "oops: fileno(STDERR) [".fileno(STDERR)."] != 2";
1653             }
1654             }
1655 0         0  
1656 0         0 exec @cmdline;
1657             die "exec failed: $!";
1658 0 0       0 };
  0         0  
1659             my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
1660              
1661 0         0 # bug 4370: we really have to exit here; break any eval traps
1662             force_die(sprintf('util: failed to spawn a process "%s": %s',
1663 0         0 join(", ",@cmdline), $eval_stat));
1664             die; # must be a die() otherwise -w will complain
1665             }
1666              
1667             ###########################################################################
1668              
1669             # As "perldoc perlvar" notes, in perl 5.8.0, the concept of "safe" signal
1670             # handling was added, which means that signals cannot interrupt a running OP.
1671             # unfortunately, a regexp match is a single OP, so a psychotic m// can
1672             # effectively "hang" the interpreter as a result, and a $SIG{ALRM} handler
1673             # will never get called.
1674             #
1675             # However, by using "unsafe" signals, we can still interrupt that -- and
1676             # POSIX::sigaction can create an unsafe handler on 5.8.x. So this function
1677             # provides a portable way to do that.
1678              
1679 0     0 0 0 sub trap_sigalrm_fully {
1680 0 0 0     0 my ($handler) = @_;
1681             if ($] < 5.008 || am_running_on_windows()) {
1682             # signals are always unsafe on perl older than 5.008, just use %SIG
1683 0         0 # Bug 6359, no POSIX::SIGALRM on Windows, just use %SIG
1684             $SIG{ALRM} = $handler;
1685             } else {
1686 0         0 # may be using "safe" signals with %SIG; use POSIX to avoid it
1687             POSIX::sigaction POSIX::SIGALRM(), new POSIX::SigAction $handler;
1688             }
1689             }
1690              
1691             ###########################################################################
1692              
1693             # Removes any normal perl-style regexp delimiters at
1694             # the start and end, and modifiers at the end (if present).
1695             # If modifiers are found, they are inserted into the pattern using
1696             # the /(?i)/ idiom.
1697              
1698 42     42 0 195 sub regexp_remove_delimiters {
1699             my ($re) = @_;
1700 42         79  
1701 42 50 33     1124 my $delim;
    50          
    50          
    50          
    50          
1702 0         0 if (!defined $re || $re eq '') {
1703 0         0 warn "cannot remove delimiters from null regexp";
1704             return; # invalid
1705             }
1706 0         0 elsif ($re =~ s/^m\{//) { # m{foo/bar}
1707             $delim = '}';
1708             }
1709 0         0 elsif ($re =~ s/^m\(//) { # m(foo/bar)
1710             $delim = ')';
1711             }
1712 0         0 elsif ($re =~ s/^m<//) { # m<foo/bar>
1713             $delim = '>';
1714             }
1715 0         0 elsif ($re =~ s/^m(\W)//) { # m#foo/bar#
1716             $delim = $1;
1717 42         540 } else { # /foo\/bar/ or !foo/bar!
  42         365  
1718             $re =~ s/^(\W)//; $delim = $1;
1719             }
1720 42 50       904  
1721             $re =~ s/\Q${delim}\E([imsx]*)$// or warn "unbalanced re: $re";
1722 42         317  
1723 42 100       158 my $mods = $1;
1724 20         97 if ($mods) {
1725             $re = "(?".$mods.")".$re;
1726             }
1727 42         184  
1728             return $re;
1729             }
1730              
1731             # turn "/foobar/i" into qr/(?i)foobar/
1732              
1733 0     0 0 0 sub make_qr {
1734 0         0 my ($re) = @_;
1735 0         0 $re = regexp_remove_delimiters($re);
1736             return qr/$re/;
1737             }
1738              
1739             ###########################################################################
1740              
1741 0     0 0 0 sub get_my_locales {
1742             my ($ok_locales) = @_;
1743 0         0  
1744 0         0 my @locales = split(' ', $ok_locales);
1745 0   0     0 my $lang = $ENV{'LC_ALL'};
1746 0   0     0 $lang ||= $ENV{'LANGUAGE'};
1747 0   0     0 $lang ||= $ENV{'LC_MESSAGES'};
1748 0 0       0 $lang ||= $ENV{'LANG'};
1749 0         0 push (@locales, $lang) if defined($lang);
1750             return @locales;
1751             }
1752              
1753             ###########################################################################
1754              
1755             # bug 5612: work around for bugs in Berkeley db 4.2
1756             #
1757             # on 4.2 having the __db.[DBNAME] file will cause an loop that will never finish
1758             # on 4.3+ the loop will timeout after 301 open attempts, but we will still
1759             # be unable to open the database. This workaround solves both problems.
1760             #
1761 0     0 0 0 sub avoid_db_file_locking_bug {
1762             my ($path) = @_;
1763 0         0  
1764             my $db_tmpfile = untaint_file_path(File::Spec->catfile(dirname($path),
1765             '__db.'.basename($path)));
1766              
1767 0         0 # delete "__db.[DBNAME]" and "__db.[DBNAME].*"
1768 0         0 foreach my $tfile ($db_tmpfile, glob("$db_tmpfile.*")) {
1769 0 0       0 my $file = untaint_file_path($tfile);
1770 0 0       0 my $stat_errn = stat($file) ? 0 : 0+$!;
1771             next if $stat_errn == ENOENT;
1772 0         0  
1773 0 0       0 dbg("util: Berkeley DB bug work-around: cleaning tmp file $file");
1774             unlink($file) or warn "cannot remove Berkeley DB tmp file $file: $!\n";
1775             }
1776             }
1777              
1778             ###########################################################################
1779              
1780 0     0 0 0 sub fisher_yates_shuffle {
1781 0         0 my ($deck) = @_;
  0         0  
1782 0         0 for (my $i = $#{$deck}; $i > 0; $i--) {
1783 0         0 my $j = int rand($i+1);
1784             @$deck[$i,$j] = @$deck[$j,$i];
1785             }
1786             }
1787              
1788             ###########################################################################
1789              
1790              
1791             ###########################################################################
1792              
1793             # bugs 6419 and 2607 relate to returning a score 1/10th lower than the
1794             # required score if the rounded to the 10th version of the score is equal
1795             # to the required score
1796             #
1797             # moved from PerMessageStatus.pm to here and modified to allow for a
1798             # non-class version of the routine to be called from PerMessageStatus
1799             # and from spamd
1800              
1801 38     38 0 163 sub get_tag_value_for_score {
1802             my ($score, $rscore, $is_spam) = @_;
1803              
1804             #BASED ON _get_tag_value_for_score from PerMsgStatus.pm
1805 38         246  
1806 38         174 $score = sprintf("%2.1f", $score);
1807             $rscore = sprintf("%2.1f", $rscore);
1808              
1809             # if the email is spam, return the accurate score
1810             # if the email is NOT spam and the score is less than the required score,
1811             # then return the accurate score
1812 38 50 66     286  
1813             return $score if $is_spam or $score < $rscore;
1814              
1815             # if the email is NOT spam and $score = $rscore, return the $rscore - 0.1
1816             # effectively flooring the value to the closest tenth
1817 0            
1818             return $rscore - 0.1;
1819             }
1820              
1821             ###########################################################################
1822              
1823              
1824             1;
1825              
1826             =back
1827              
1828             =cut