File Coverage

blib/lib/Net/IP/Match/Regexp.pm
Criterion Covered Total %
statement 58 58 100.0
branch 36 36 100.0
condition 17 17 100.0
subroutine 13 13 100.0
pod 3 3 100.0
total 127 127 100.0


line stmt bran cond sub pod time code
1             package Net::IP::Match::Regexp;
2              
3 1     1   30031 use 5.006;
  1         3  
  1         29  
4 1     1   5 use strict;
  1         1  
  1         33  
5 1     1   4 use warnings;
  1         7  
  1         26  
6 1     1   739 use English qw(-no_match_vars);
  1         4486  
  1         5  
7              
8 1     1   499 use base 'Exporter';
  1         2  
  1         580  
9             our @EXPORT_OK = qw( create_iprange_regexp create_iprange_regexp_depthfirst match_ip );
10             our $VERSION = '1.01';
11              
12             =head1 NAME
13              
14             Net::IP::Match::Regexp - Efficiently match IP addresses against ranges
15              
16             =head1 LICENSE
17              
18             Copyright 2005-2006 Clotho Advanced Media, Inc.,
19              
20             Copyright 2007-2008 Chris Dolan,
21              
22             This library is free software; you can redistribute it and/or modify it
23             under the same terms as Perl itself.
24              
25             =head1 SYNOPSIS
26              
27             use Net::IP::Match::Regexp qw( create_iprange_regexp match_ip );
28            
29             my $regexp = create_iprange_regexp(
30             qw( 10.0.0.0/8 87.134.66.128 87.134.87.0/24 145.97.0.0/16 )
31             );
32             if (match_ip('209.249.163.62', $regexp)) {
33             ...
34             }
35              
36             =head1 DESCRIPTION
37              
38             This module allows you to check an IP address against one or more IP
39             ranges. It employs Perl's highly optimized regular expression engine
40             to do the hard work, so it is very fast. It is optimized for speed by
41             doing the match against a regexp which implicitly checks the broadest
42             IP ranges first. An advantage is that the regexp can be computed and
43             stored in advance (in source code, in a database table, etc) and
44             reused, saving much time if the IP ranges don't change too often. The
45             match can optionally report a value (e.g. a network name) instead of
46             just a boolean, which makes module useful for mapping IP ranges to
47             names or codes or anything else.
48              
49             =head1 LIMITATIONS
50              
51             This module does not yet support IPv6 addresses, although that feature
52             should not be hard to implement as long as the regexps start with a 4
53             vs. 6 flag. Patches welcome. :-)
54              
55             This module only accepts IP ranges in C (aka CIDR)
56             notation. To work around that limitation, I recommend
57             Net::CIDR::Lite to conveniently convert collections of IP address
58             ranges into CIDR format.
59              
60             This module makes no effort to validate the IP addresses or ranges
61             passed as arguments. If you pass address ranges like
62             C<1000.0.0.0/300>, you will probably get weird regexps out.
63              
64             =head1 FUNCTIONS
65              
66             =over
67              
68             =cut
69              
70             =item create_iprange_regexp($iprange | $hashref | $arrayref, ...)
71              
72             This function digests IP ranges into a regular expression that can
73             subsequently be used to efficiently test single IP addresses. It
74             returns a regular expression string that can be passed to match_ip().
75              
76             The simple way to use this is to pass a list of IP ranges as
77             C. When used this way, the return value of the
78             match_ip() function will be simply C<1> or C.
79              
80             The more complex way is to pass a hash reference of IP range => return
81             value pairs. When used this way, the return value of the match_ip()
82             function will be the specified return value or C for no match.
83              
84             For example:
85              
86             my $re1 = create_iprange_regexp('209.249.163.0/25', '127.0.0.1/32');
87             print match_ip('209.249.163.62', $re1); # prints '1'
88            
89             my $re2 = create_iprange_regexp({'209.249.163.0/25' => 'clotho.com',
90             '127.0.0.1/32' => 'localhost'});
91             print match_ip('209.249.163.62', $re2); # prints 'clotho.com'
92              
93             Be aware that the value string will be wrapped in single quotes in the
94             regexp. Therefore, you must double-escape any single quotes in that
95             value. For example:
96              
97             create_iprange_regexp({'208.201.239.36/31' => 'O\\'Reilly publishing'});
98              
99             Note that the scalar and hash styles can be mixed (a rarely used
100             feature). These two examples are equivalent:
101              
102             create_iprange_regexp('127.0.0.1/32',
103             {'209.249.163.0/25' => 'clotho.com'},
104             '10.0.0.0/8',
105             {'192.168.0.0/16' => 'LAN'});
106            
107             create_iprange_regexp({'127.0.0.1/32' => 1,
108             '209.249.163.0/25' => 'clotho.com',
109             '10.0.0.0/8' => 1,
110             '192.168.0.0/16' => 'LAN'});
111              
112             If any of the IP ranges are overlapping, the broadest one is used. If
113             they are equivalent, then the first one passed is used. If you have
114             some data that might be ambiguous, you pass an arrayref instead of a
115             hashref, but it's better to clean up your data instead! For example:
116              
117             my $re = create_iprange_regexp(['1.1.1.0/31' => 'zero', '1.1.1.1/31' => 'one']);
118             print match_ip('1.1.1.1', $re)); # prints 'zero', since both match
119              
120             WARNING: This function does no checking for validity of IP ranges. It
121             happily accepts C<1000.0.0.0/-38> and makes a garbage regexp.
122             Hopefully a future version will validate the ranges, perhaps via
123             Net::CIDR or Net::IP.
124              
125             =cut
126              
127             sub create_iprange_regexp { ##no critic (ArgUnpacking)
128 79     79 1 34089 return _build_regexp(0, \@_);
129             }
130              
131             =item create_iprange_regexp_depthfirst($iprange | $hashref | $arrayref, ...)
132              
133             Returns a regexp in matches the most specific IP range instead of the
134             broadest range. Example:
135              
136             my $re = create_iprange_regexp_depthfirst({'192.168.0.0/16' => 'LAN',
137             '192.168.0.1' => 'router'});
138             match_ip('192.168.0.1', $re);
139              
140             returns 'router' instead of 'LAN'.
141              
142             =cut
143              
144             sub create_iprange_regexp_depthfirst { ##no critic (ArgUnpacking)
145 2     2 1 283 return _build_regexp(1, \@_);
146             }
147             sub _build_regexp {
148 81     81   209 my ($depthfirst, $ipranges) = @_;
149              
150             # If an argument is a hash or array ref, flatten it
151             # If an argument is a scalar, make it a key and give it a value of 1
152             my @map
153 3         16 = map { ! ref $_ ? ( $_ => 1 )
  81         202  
154 5         35 : ref $_ eq 'ARRAY' ? @{$_}
155 81 100       298 : %{$_} } @{$ipranges};
  88 100       463  
156              
157             # The tree is a temporary construct. It has three possible
158             # properties: 0, 1, and code. The code is the return value for a
159             # match.
160 81         172 my %tree;
161              
162             IPRANGE:
163 81         281 for ( my $i = 0; $i < @map; $i += 2 ) {
164 104         200 my $range = $map[ $i ];
165 104         186 my $match = $map[ $i + 1 ];
166              
167 104         340 my ( $ip, $mask ) = split m/\//xms, $range;
168 104 100       295 if (! defined $mask) {
169 5         7 $mask = 32; ## no critic(MagicNumbers)
170             }
171              
172 104         161 my $tree = \%tree;
173 104         1851 my @bits = split m//xms, unpack 'B32', pack 'C4', split m/[.]/xms, $ip;
174              
175 104         922 for my $bit ( @bits[ 0 .. $mask - 1 ] ) {
176              
177             # If this case is hit, it means that our IP range is a subset
178             # of some other range, and thus ignorable
179 1913 100 100     6883 next IPRANGE if !$depthfirst && $tree->{code};
180              
181 1911   100     7828 $tree->{$bit} ||= {}; # Turn a leaf into a branch, if needed
182 1911         3191 $tree = $tree->{$bit}; # Follow one branch
183             }
184              
185             # Our $tree is now a leaf node of %tree. Set its value
186             # If the code is already set, it's a non-fatal error (redundant data)
187 102   100     1073 $tree->{code} ||= $match;
188              
189             # Ignore case where $tree->{0} or $tree->{1} are set (i.e. if
190             # the current range encompasses any earlier-processed ranges).
191             # Those branches will be ignored in _tree2re()
192             }
193              
194             # Recurse into the tree making it into a regexp
195 81 100       878 my $re = join q{}, '^4', $depthfirst ? _tree2re_depthfirst( \%tree ) : _tree2re( \%tree );
196              
197             ## Performance optimization:
198              
199             # If we are going to use the pattern repeatedly, it's more
200             # effiecient if it's already a regexp instead of a string.
201             # Otherwise, it needs to be compiled in each invocation of
202             # match_ip(). If the regexp is merely stored and not used then
203             # this is wasted effort.
204              
205 1     1   7 use re 'eval'; # needed because we're interpolating into a regexp
  1         1  
  1         140  
206 81         10736 $re = qr/$re/xms;
207              
208 81         746 return $re;
209             }
210              
211             =item match_ip($ipaddr, $regexp)
212              
213             Given a single IP address as a string of the form C
214             and a regular expression string (typically the output of
215             create_iprange_regexp()), this function returns a specified value
216             (typically C<1>) if the IP is in one of the ranges, or C if no
217             ranges match.
218              
219             See create_ipranges_regexp() for more details about the return value
220             of this function.
221              
222             WARNING: This function does no checking for validity of the IP address.
223              
224             =cut
225              
226             sub match_ip {
227 627     627 1 270680 my ( $ip, $re ) = @_;
228              
229 627 100       4865 return if !$ip;
230 625 100       1433 return if !$re;
231              
232 624         1319 local $LAST_REGEXP_CODE_RESULT = undef;
233 1     1   6 use re 'eval';
  1         2  
  1         499  
234 624         16346 ( '4' . unpack 'B32', pack 'C4', split m/[.]/xms, $ip ) =~ m/$re/xms;
235 624         2815 return $LAST_REGEXP_CODE_RESULT;
236             }
237              
238             # Helper function. This recurses to build the regular expression
239             # string from a tree of IP ranges constructed by
240             # create_iprange_regexp().
241              
242             sub _tree2re {
243 1548     1548   1895 my ( $tree ) = @_;
244              
245             return
246 1548 100 100     16701 defined $tree->{code} ? ( "(?{'$tree->{code}'})" ) # Match
    100          
    100          
    100          
247             : $tree->{0} && $tree->{1} ? ( '(?>0', _tree2re($tree->{0}),
248             '|1', _tree2re($tree->{1}), ')' ) # Choice
249             : $tree->{0} ? ( '0', _tree2re($tree->{0}) ) # Literal, no choice
250             : $tree->{1} ? ( '1', _tree2re($tree->{1}) ) # Literal, no choice
251             : die 'Internal error: failed to create a regexp from the supplied IP ranges'
252             ;
253             }
254              
255             sub _tree2re_depthfirst {
256 113     113   474 my ( $tree ) = @_;
257              
258 113 100       185 if (defined $tree->{code}) {
259 11 100 100     130 return '(?>',
    100          
    100          
260             $tree->{0} && $tree->{1} ? ( '(?>0', _tree2re_depthfirst($tree->{0}),
261             '|1', _tree2re_depthfirst($tree->{1}), ')|' )
262             : $tree->{0} ? ( '0', _tree2re_depthfirst($tree->{0}), q{|} )
263             : $tree->{1} ? ( '1', _tree2re_depthfirst($tree->{1}), q{|} )
264             : (),
265             "(?{'$tree->{code}'}))";
266             } else {
267             return
268 102 100 100     556 $tree->{0} && $tree->{1} ? ( '(?>0', _tree2re_depthfirst($tree->{0}),
    100          
    100          
269             '|1', _tree2re_depthfirst($tree->{1}), ')' ) # Choice
270             : $tree->{0} ? ( '0', _tree2re_depthfirst($tree->{0}) ) # Literal, no choice
271             : $tree->{1} ? ( '1', _tree2re_depthfirst($tree->{1}) ) # Literal, no choice
272             : die 'Internal error: failed to create a regexp from the supplied IP ranges'
273             ;
274             }
275             }
276              
277             1;
278              
279             __END__