File Coverage

blib/lib/Plack/Middleware/Greylist.pm
Criterion Covered Total %
statement 88 90 97.7
branch 23 30 76.6
condition 13 20 65.0
subroutine 15 15 100.0
pod 2 2 100.0
total 141 157 89.8


line stmt bran cond sub pod time code
1             package Plack::Middleware::Greylist;
2:

3: # ABSTRACT: throttle requests with different rates based on net blocks 4:
5: # RECOMMEND PREREQ: Cache::FastMmap
6: # RECOMMEND PREREQ: Ref::Util::XS
7:
8: use v5.12;
9: use warnings;
10:
11: use parent qw( Plack::Middleware );
12:
13: use HTTP::Status qw/ HTTP_FORBIDDEN HTTP_TOO_MANY_REQUESTS /;
14: use List::Util 1.29 qw/ pairs /;
15: use Module::Load qw/ load /;
16: use Net::IP::Match::Trie;
17: use Plack::Util;
18: use Plack::Util::Accessor qw/ default_rate rules cache file _match greylist retry_after /;
19: use Ref::Util qw/ is_plain_arrayref /;
20: use Time::Seconds qw/ ONE_MINUTE /;
21:
22: our $VERSION = 'v0.4.3';
23:
24:
25: sub prepare_app {
26: my ($self) = @_;
27:
28: $self->default_rate(-1) unless defined $self->default_rate;
29:
30: die "default_rate must be a positive integer" unless $self->default_rate =~ /^[1-9][0-9]*$/;
31:
32: $self->retry_after( ONE_MINUTE + 1 ) unless defined $self->retry_after;
33: die "retry_after must be a positive integer greater than ${ \ONE_MINUTE} seconds"
34: unless $self->retry_after =~ /^[1-9][0-9]*$/ && $self->retry_after > ONE_MINUTE;
35:
36: unless ( $self->cache ) {
37:
38: my $file = $self->file // die "No cache was set";
39:
40: load Cache::FastMmap;
41:
42: my $cache = Cache::FastMmap->new(
43: share_file => "$file",
44: init_file => 1,
45: serializer => '',
46: expire_time => ONE_MINUTE,
47: );
48:
49: $self->cache(
50: sub {
51: my ($ip) = @_;
52: return $cache->get_and_set(
53: $ip,
54: sub {
55: my ( $key, $count, $opts ) = @_;
56: $count //= 0;
57: return ( $count + 1, { expire_on => $opts->{expire_on} } );
58: }
59: );
60: }
61: );
62:
63: }
64:
65: my $match = Net::IP::Match::Trie->new;
66:
67: $self->_match( sub { return $match->match_ip(@_) } );
68:
69: my @blocks;
70:
71: if ( my $greylist = $self->greylist ) {
72: push @blocks, ( %{ $greylist } );
73: }
74:
75: $self->rules( my $rules = {} );
76:
77: my %codes = ( whitelist => -1, blacklist => 0 );
78: my %types = ( ip => '', netblock => 1 );
79:
80: for my $line ( pairs @blocks ) {
81:
82: my ( $block, $rule ) = @{$line};
83: $rule = [ split /\s+/, $rule ] unless is_plain_arrayref($rule);
84:
85: my ( $rate, $type ) = @{ $rule };
86:
87: $rate //= $codes{blacklist};
88: $rate = $codes{$rate} if exists $codes{$rate};
89:
90: $type //= "ip";
91: my $mask = $types{$type} // $type;
92: $mask = $block if $mask eq "1";
93:
94: $rules->{$block} = [ $rate, $mask ];
95: $match->add( $block => [$block] );
96: }
97:
98: }
99:
100: sub call {
101: my ( $self, $env ) = @_;
102:
103: my $ip = $env->{REMOTE_ADDR};
104: my $name = $self->_match->($ip);
105: my $rule = $name ? $self->rules->{$name} : [ $self->default_rate ];
106:
107: my $rate = $rule->[0];
108: if ( $rate >= 0 ) {
109:
110: my $limit = $rate == 0;
111:
112: my ($hits) = $self->cache->( $rule->[1] || $ip );
113: $limit = $hits > $rate ? $hits : 0;
114:
115: if ($limit) {
116:
117: my $block = $name || "default";
118: my $msg = "Rate limiting ${ip} after ${limit}/${rate} for ${block}";
119:
120: if ( my $log = $env->{'psgix.logger'} ) {
121: $log->( { message => $msg, level => 'warn' } );
122: }
123: else {
124: $env->{'psgi.errors'}->print($msg);
125: }
126:
127: if ( $rate == 0 ) {
128:
129: return [ HTTP_FORBIDDEN, [], ["Forbbidden"] ];
130:
131: }
132: else {
133:
134: return [
135: HTTP_TOO_MANY_REQUESTS,
136: [
137: "Retry-After" => $self->retry_after,
138: ],
139: ["Too Many Requests"]
140: ];
141:
142: }
143: }
144:
145: }
146:
147: return $self->app->($env);
148: }
149:
150:
151: 1;
152:
153: __END__
154:
155: =pod
156:
157: =encoding UTF-8
158:
159: =head1 NAME
160:
161: Plack::Middleware::Greylist - throttle requests with different rates based on net blocks
162:
163: =head1 VERSION
164:
165: version v0.4.3
166:
167: =head1 SYNOPSIS
168:
169: use Plack::Builder;
170:
171: builder {
172:
173: enable "Greylist",
174: file => sprintf('/run/user/%u/greylist', $>), # cache file
175: default_rate => 250,
176: greylist => {
177: '192.168.0.0/24' => 'whitelist',
178: '172.16.1.0/25' => [ 100, 'netblock' ],
179: };
180:
181: }
182:
183: =head1 DESCRIPTION
184:
185: This middleware will apply rate limiting to requests, depending on the requestor netblock.
186:
187: Hosts that exceed their configured per-minute request limit will be rejected with HTTP 429 errors.
188:
189: =head2 Log Messages
190:
191: Rejections will be logged with a message of the form
192:
193: Rate limiting $ip after $hits/$rate for $netblock
194:
195: for example,
196:
197: Rate limiting 172.16.0.10 after 225/250 for 172.16.0.0/24
198:
199: Note that the C<$netblock> for the default rate is simply "default", e.g.
200:
201: Rate limiting 192.168.0.12 after 101/100 for default
202:
203: This will allow you to use something like L<fail2ban> to block repeat offenders, since bad
204: robots are like houseflies that repeatedly bump against closed windows.
205:
206: =head1 ATTRIBUTES
207:
208: =head2 default_rate
209:
210: This is the default maximum number of hits per minute before requests are rejected, for any request not in the L</greylist>.
211:
212: Omitting it will disable the global rate.
213:
214: =head2 retry_after
215:
216: This sets the C<Retry-After> header value, in seconds. It defaults to 61 seconds, which is the minimum allowed value.
217:
218: Note that this does not enforce that a client has waited that amount of time before making a new request, as long as the
219: number of hits per minute is within the allowed rate.
220:
221: =head2 greylist
222:
223: This is a hash reference to the greylist configuration.
224:
225: The keys are network blocks, and the values are an array reference of rates and the tracking type. (A string of space-
226: separated values can be used instead, to make it easier to directly use the configuration from something like
227: L<Config::General>.)
228:
229: The rates are either the maximum number of requests per minute, or "whitelist" to not limit the network block, or
230: "blacklist" to always forbid a network block.
231:
232: (The rate "-1" corresponds to "whitelist", and the rate "0" corresponds to "blacklist".)
233:
234: The tracking type defaults to "ip", which applies limits to individual ips. You can also use "netblock" to apply the
235: limits to all hosts in that network block, or use a name so that limits are applied to all hosts in network blocks
236: with that name.
237:
238: For example:
239:
240: {
241: '127.0.0.1/32' => 'whitelist',
242:
243: '192.168.1.0/24' => 'blacklist',
244:
245: '192.168.2.0/24' => [ 100, 'ip' ],
246:
247: '192.168.3.0/24' => [ 60, 'netblock' ],
248:
249: # All requests from these blocks will limited collectively
250:
251: '10.0.0.0/16' => [ 60, 'group1' ],
252: '172.16.0.0/16' => [ 60, 'group1' ],
253: }
254:
255: Note: the network blocks shown above are examples only.
256:
257: The limit may be larger than L</default_rate>, to allow hosts to exceed the default limit.
258:
259: =head2 file
260:
261: This is the path of the throttle count file used by the L</cache>.
262:
263: It is required unless you are defining your own L</cache>.
264:
265: =head2 cache
266:
267: This is a code reference to a function that increments the cache counter for a key (usually the IP address or net
268: block).
269:
270: If you customise this, then you need to ensure that the counter resets or expires counts after a set period of time,
271: e.g. one minute. If you use a different time interval, then you may need to adjust the L</retry_after> time.
272:
273: =head1 KNOWN ISSUES
274:
275: This does not try and enforce any consistency or block overlapping netblocks. It trusts L<Net::IP::Match::Trie> to
276: handle any overlapping or conflicting network ranges, or to specify exceptions for larger blocks.
277:
278: When configuring the L</greylist> netblocks from a configuration file using L<Config::General>, duplicate netblocks may
279: be merged in unexpected ways, for example
280:
281: 10.0.0.0/16 60 group-1
282:
283: ...
284:
285: 10.0.0.0/16 120 group-2
286:
287: may be merged as something like
288:
289: '10.0.0.0/16' => [ '60 group-1', '120 group-2' ],
290:
291: Some search engine robots may not respect HTTP 429 responses, and will treat these as errors. You may want to make an
292: exception for trusted networks that gives them a higher rate than the default.
293:
294: This does not enforce consistent rates for named blocks. For example, if you specified
295:
296: '10.0.0.0/16' => [ 60, 'named-group' ],
297: '172.16.0.0/16' => [ 100, 'named-group' ],
298:
299: Requests from both netblocks would be counted together, but requests from 10./16 netblock would be rejected after 60
300: requests. This is probably not something that you want.
301:
302: =head1 SUPPORT FOR OLDER PERL VERSIONS
303:
304: Since v0.4.0, the this module requires Perl v5.12 or later.
305:
306: If you need this module on Perl v5.10, please use one of the v0.3.x
307: versions of this module. Significant bug or security fixes may be
308: backported to those versions.
309:
310: =head1 SOURCE
311:
312: The development version is on github at L<https://github.com/robrwo/Plack-Middleware-Greylist>
313: and may be cloned from L<git://github.com/robrwo/Plack-Middleware-Greylist.git>
314:
315: =head1 BUGS
316:
317: Please report any bugs or feature requests on the bugtracker website
318: L<https://github.com/robrwo/Plack-Middleware-Greylist/issues>
319:
320: When submitting a bug or request, please include a test-file or a
321: patch to an existing test-file that illustrates the bug or desired
322: feature.
323:
324: =head1 AUTHOR
325:
326: Robert Rothenberg <rrwo@cpan.org>
327:
328: The initial development of this module was sponsored by Science Photo
329: Library L<https://www.sciencephoto.com>.
330:
331: =head1 CONTRIBUTOR
332:
333: =for stopwords Gabor Szabo
334:
335: Gabor Szabo <gabor@szabgab.com>
336:
337: =head1 COPYRIGHT AND LICENSE
338:
339: This software is Copyright (c) 2022-2023 by Robert Rothenberg.
340:
341: This is free software, licensed under:
342:
343: The Artistic License 2.0 (GPL Compatible)
344:
345: =cut
346: