File Coverage

blib/lib/URBL/Prepare.pm
Criterion Covered Total %
statement 47 75 62.6
branch 9 18 50.0
condition 3 10 30.0
subroutine 7 11 63.6
pod 6 8 75.0
total 72 122 59.0


line stmt bran cond sub pod time code
1              
2             package URBL::Prepare;
3              
4 1     1   590 use strict;
  1         2  
  1         35  
5             #use diagnostics;
6 1     1   788 use AutoLoader 'AUTOLOAD';
  1         1399  
  1         4  
7 1     1   26 use vars qw($VERSION);
  1         1  
  1         931  
8              
9             $VERSION = do { my @r = (q$Revision: 0.09 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
10              
11             sub loadcache;
12 0     0 0 0 sub Destroy {};
13              
14             =head1 NAME
15              
16             URPL::Prepare -- prepare hostname for URBL domain lookup
17              
18             =head1 SYNOPSIS
19              
20             require URBL::Prepare;
21              
22             my $ubp = new URBL::Prepare;
23              
24             $tlds = $blessed->cachetlds($localfilelistptr);
25             $whitelist = $blessed->cachewhite($localfilelistptr);
26             $domain = $blessed->urbldomain($hostname);
27             $response_code = $proto->loadcache($url,$localfile);
28             ($response,$message) = $proto->loadcache($url,$localfile);
29             $rv = $blessed->urblblack($hostname);
30             $rv = $blessed->urblwhite($hostname);
31              
32             =head1 DESCRIPTION
33              
34             =over 2
35              
36             =item * my $urbl = new URBL::Prepare;
37              
38             This method returns a blessed reference to an empty hash.
39              
40             For use with other modules:
41              
42             require URBL::Prepare;
43              
44             @ISA = qw(URBL::Prepare);
45              
46             =back
47              
48             =cut
49              
50             sub new {
51 1     1 1 13 my $proto = shift;
52 1   50     10 my $class = ref $proto || $proto || __PACKAGE__;
53 1         4 bless {}, $class;
54             }
55              
56             =head1 URBL Preparation for lookup methods
57              
58             The following three methods are for facilitating URBL lookups.
59              
60             SEE: http://www.uribl.com/about.shtml
61             and http://www.surbl.org/guidelines
62              
63             =over 2
64              
65             =item * $tldlist = $blessed->cachetlds($localfilelistptr);
66              
67             This method opens local files in "file list" and extracts the tld's found
68             therein.
69              
70             input: ptr to array of local/file/path/names
71             return: array ptr to list of tld's
72              
73             NOTE: place level 3 tld's ahead of level 2 tld's
74              
75             =cut
76              
77             # do level3 tld's first
78             sub cachetlds {
79 3     3 1 99 my($bls,$files) = @_;
80 3         5 my @tldlist;
81 3         7 foreach my $infile (@$files) {
82 2         3 my $tldf;
83 2 50       81 next unless open $tldf, $infile;
84 2         2172 foreach (<$tldf>) {
85 4120         4389 chomp;
86 4120 50       9605 next unless $_ =~ /\S/;
87 4120         8980 $_ =~ s/\./\\./g;
88 4120         8222 push @tldlist, lc $_;
89             }
90             }
91 3         19 $bls->{-urblpreparebad} = \@tldlist;
92             }
93              
94             =item * $whitelist = $blessed->cachewhite($localfilelistptr);
95              
96             This method opens local file(s) in "file list" and extracts the domains
97             found therein.
98              
99             See http://wiki.apache.org/spamassassin/DnsBlocklists and
100             http://spamassasin.googlecode.com/svn-history/r6/trunk/share/spamassassin/
101              
102             Note:: these URL's may change
103              
104             input: ptr to array of local/file/path/names
105             return: array ptr to whitelist domain names
106              
107             =cut
108              
109             sub cachewhite {
110 3     3 1 68 my($bls,$files) = @_;
111 3         4 my @whitelist;
112 3         7 foreach my $infile (@$files) {
113 1         1 my $wfile;
114 1 50       43 next unless open $wfile, $infile;
115 1         109 foreach(<$wfile>) {
116 160 100       430 next unless $_ =~ /uridnsbl_skip_domain\s+(.+)/;
117 51         231 (my $white = $1) =~ s/\./\\./g;
118 51         62 chomp $white;
119 51         193 my @wtmp = split /\s+/, lc $white;
120 51         147 push @whitelist, @wtmp;
121             }
122             }
123 3         23 $bls->{-urblpreparewhite} = \@whitelist;
124             }
125              
126             =item * $blacklist = $blessed->cacheblack($localfilelistptr);
127              
128             This method opens local file(s) in "file list" and extracts the domains found
129             therein. The domains may be space seperated many to a line.
130              
131             input: ptr to array of local/file/path/names
132             return: ptr to blacklist domain names
133              
134             =cut
135              
136             sub cacheblack {
137 0     0 1 0 my($bls,$files) = @_;
138 0         0 my @blacklist;
139 0         0 foreach my $infile (@$files) {
140 0         0 my $bkfile;
141 0 0       0 next unless open $bkfile, $infile;
142 0         0 foreach(<$bkfile>) {
143 0         0 chomp;
144 0         0 (my $black = $_) =~ s/\./\\./g;
145 0         0 my @btmp = split /\s+/, lc $black;
146 0         0 push @blacklist, @btmp;
147             }
148             }
149 0         0 $bls->{-urblprepareblack} = \@blacklist;
150             }
151              
152             =item * $domain = $blessed->urbldomain($hostname)
153              
154             This method extracts a domain name to check against an SURBL. If the
155             hostname is whitelisted, the return value is false, otherwise a domain name
156             is returned.
157              
158             input: hostname
159             return: false if whitelisted
160             else domain to check against SURBL
161              
162             NOTE: optionally white or tld testing will be bypassed if the pointer
163             is undefined or points to an empty array.
164              
165             =cut
166              
167             # Implementation Guidelines
168             #
169             # http://www.surbl.org/guidelines
170             #
171             # Extract base (registered) domains from those URIs. This includes removing
172             # all leading host names, subdomains, www., randomized subdomains, etc. In
173             # order to determine the level of domain to check, use our tables of two level
174             # and three level TLDs. Originally these were CCTLDs, but they now also
175             # include some frequently abused hosting domains. (Note that these files only
176             # rarely update. Please don't retrieve them more often than once per day.) The
177             # usage is:
178             #
179             # For any domain on the three level list, check it at the fourth level.
180             # For any domain on the two level list, check it at the third level.
181             # For any other domain, check it at the second level.
182             #
183             # For example, any domain found in the two level list should have a third
184             # level domain name (such as foo.co.uk) checked against SURBLs. Domains not
185             # specifically on the two level list (such as foo.com or foo.fr) should be
186             # checked at two levels. Please do not check at levels other than these as
187             # doing so will cause unnecessary queries that won't result in matches.
188              
189             sub urbldomain {
190 5     5 1 151 my $bls = shift;
191 5         17 my $host = lc shift;
192 5   50     31 my $white = $bls->{-urblpreparewhite} || [];
193 5   50     21 my $tlds = $bls->{-urblpreparebad} || [];
194            
195 5         14 foreach(@$white) {
196 1008 100       7689 return undef if $host =~ /$_$/; # whitelisted?
197             }
198 4         23 foreach (@$tlds) {
199 11590 100       139879 if ($host =~ /([^\.]+\.$_)$/) {
200 2         11 ($host = $1) =~ s/\\//g;
201 2         15 return $host;
202             }
203             }
204 2         34 $host =~ /([^\.]+\.[^\.]+)$/;
205 2         25 return $1;
206             }
207              
208             =item * $rv = $blessed->urblblack($hostname)
209              
210             This method check if a hostname is found within the local black list(s).
211              
212             input: hostname
213             return: domain found, else false
214              
215             =cut
216              
217             sub urblblack {
218 0     0 1   my $bls = shift;
219 0           my $host = lc shift;
220 0   0       my $tlds = $bls->{-urblprepareblack} || [];
221 0           foreach (@$tlds) {
222 0 0         if ($host =~ /$_$/) {
223 0           ($host = $_) =~ s/\\//g;
224 0           return $host;
225             }
226             }
227 0           return undef;
228             }
229              
230             =item * $rv = $blessed->urbwhite($hostname)
231              
232             This method check if a hostname is found within the local white list.
233              
234             input: hostname
235             return: domain found, else false
236              
237             =cut
238              
239             sub urblwhite {
240 0     0 0   my $bls = shift;
241 0           my $host = lc shift;
242 0   0       my $white = $bls->{-urblpreparewhite} || [];
243 0           foreach (@$white) {
244 0 0         if ($host =~ /$_$/) {
245 0           ($host = $_) =~ s/\\//g;
246 0           return $host;
247             }
248             }
249 0           return undef;
250             }
251              
252             1;
253             __END__