File Coverage

blib/lib/Plack/Middleware/Greylist.pm
Criterion Covered Total %
statement 93 95 97.8
branch 28 36 77.7
condition 19 25 76.0
subroutine 15 15 100.0
pod 2 2 100.0
total 157 173 90.7


line stmt bran cond sub pod time code
1             package Plack::Middleware::Greylist;
2:

3: # ABSTRACT: throttle requests with different rates based on net blocks 4:
5: # RECOMMEND PREREQ: Cache::FastMmap
6: # RECOMMEND PREREQ: Ref::Util::XS
7:
8: use v5.12;
9: use warnings;
10:
11: use parent qw( Plack::Middleware );
12:
13: use HTTP::Status qw/ HTTP_FORBIDDEN HTTP_TOO_MANY_REQUESTS /;
14: use List::Util 1.29 qw/ pairs /;
15: use Module::Load qw/ load /;
16: use Net::IP::Match::Trie;
17: use Plack::Util;
18: use Plack::Util::Accessor qw/ default_rate rules cache file _match greylist retry_after /;
19: use Ref::Util qw/ is_plain_arrayref /;
20: use Time::Seconds qw/ ONE_MINUTE /;
21:
22: our $VERSION = 'v0.5.0';
23:
24:
25: sub prepare_app {
26: my ($self) = @_;
27:
28: $self->default_rate(-1) unless defined $self->default_rate;
29:
30: die "default_rate must be a positive integer" unless $self->default_rate =~ /^[1-9][0-9]*$/;
31:
32: $self->retry_after( ONE_MINUTE + 1 ) unless defined $self->retry_after;
33: die "retry_after must be a positive integer greater than ${ \ONE_MINUTE} seconds"
34: unless $self->retry_after =~ /^[1-9][0-9]*$/ && $self->retry_after > ONE_MINUTE;
35:
36: unless ( $self->cache ) {
37:
38: my $file = $self->file // die "No cache was set";
39:
40: load Cache::FastMmap;
41:
42: my $cache = Cache::FastMmap->new(
43: share_file => "$file",
44: init_file => 1,
45: serializer => '',
46: expire_time => ONE_MINUTE,
47: );
48:
49: $self->cache(
50: sub {
51: my ($ip) = @_;
52: return $cache->get_and_set(
53: $ip,
54: sub {
55: my ( $key, $count, $opts ) = @_;
56: $count //= 0;
57: return ( $count + 1, { expire_on => $opts->{expire_on} } );
58: }
59: );
60: }
61: );
62:
63: }
64:
65: my $match = Net::IP::Match::Trie->new;
66:
67: $self->_match( sub { return $match->match_ip(@_) } );
68:
69: my @blocks;
70:
71: if ( my $greylist = $self->greylist ) {
72: push @blocks, ( %{ $greylist } );
73: }
74:
75: $self->rules( my $rules = {} );
76:
77: my %codes = ( whitelist => -1, allowed => -1, blacklist => 0, rejected => 0, norobots => 0 );
78: my %types = ( ip => '', netblock => 1 );
79:
80: for my $line ( pairs @blocks ) {
81:
82: my ( $block, $rule ) = @{$line};
83: $rule = [ split /\s+/, $rule ] unless is_plain_arrayref($rule);
84:
85: my ( $rate, $type ) = @{ $rule };
86:
87: $type //= "ip";
88: my $mask = $types{$type} // $type;
89: $mask = $block if $mask eq "1";
90:
91: $rate //= "rejected";
92: if (exists $codes{$rate}) {
93: $mask = $rate if $mask eq "";
94: $rate = $codes{$rate};
95: }
96:
97: $rules->{$block} = [ $rate, $mask ];
98: $match->add( $block => [$block] );
99: }
100:
101: }
102:
103: sub call {
104: my ( $self, $env ) = @_;
105:
106: my $ip = $env->{REMOTE_ADDR};
107: my $name = $self->_match->($ip);
108: my $rule = $name ? $self->rules->{$name} : [ $self->default_rate ];
109:
110: my $rate = $rule->[0];
111:
112: if ( $rate == 0 && $rule->[1] && $rule->[1] eq "norobots" ) {
113: if ( $env->{PATH_INFO} eq "/robots.txt" ) {
114: $rate = ONE_MINUTE; # one request/second
115: }
116: }
117:
118: if ( $rate >= 0 ) {
119:
120: my $limit = $rate == 0;
121:
122: my ($hits) = $self->cache->( $rule->[1] || $ip );
123: $limit = $hits > $rate ? $hits : 0;
124:
125: if ($limit) {
126:
127: my $block = $name || "default";
128: my $msg = "Rate limiting ${ip} after ${limit}/${rate} for ${block}";
129:
130: if ( my $log = $env->{'psgix.logger'} ) {
131: $log->( { message => $msg, level => 'warn' } );
132: }
133: else {
134: $env->{'psgi.errors'}->print($msg);
135: }
136:
137: if ( $rate == 0 ) {
138:
139: return [ HTTP_FORBIDDEN, [], ["Forbbidden"] ];
140:
141: }
142: else {
143:
144: return [
145: HTTP_TOO_MANY_REQUESTS,
146: [
147: "Retry-After" => $self->retry_after,
148: ],
149: ["Too Many Requests"]
150: ];
151:
152: }
153: }
154:
155: }
156:
157: return $self->app->($env);
158: }
159:
160:
161: 1;
162:
163: __END__
164:
165: =pod
166:
167: =encoding UTF-8
168:
169: =head1 NAME
170:
171: Plack::Middleware::Greylist - throttle requests with different rates based on net blocks
172:
173: =head1 VERSION
174:
175: version v0.5.0
176:
177: =head1 SYNOPSIS
178:
179: use Plack::Builder;
180:
181: builder {
182:
183: enable "Greylist",
184: file => sprintf('/run/user/%u/greylist', $>), # cache file
185: default_rate => 250,
186: greylist => {
187: '192.168.0.0/24' => 'whitelist',
188: '172.16.1.0/25' => [ 100, 'netblock' ],
189: };
190:
191: }
192:
193: =head1 DESCRIPTION
194:
195: This middleware will apply rate limiting to requests, depending on the requestor netblock.
196:
197: Hosts that exceed their configured per-minute request limit will be rejected with HTTP 429 errors.
198:
199: =head2 Log Messages
200:
201: Rejections will be logged with a message of the form
202:
203: Rate limiting $ip after $hits/$rate for $netblock
204:
205: for example,
206:
207: Rate limiting 172.16.0.10 after 225/250 for 172.16.0.0/24
208:
209: Note that the C<$netblock> for the default rate is simply "default", e.g.
210:
211: Rate limiting 192.168.0.12 after 101/100 for default
212:
213: This will allow you to use something like L<fail2ban> to block repeat offenders, since bad
214: robots are like houseflies that repeatedly bump against closed windows.
215:
216: =head1 ATTRIBUTES
217:
218: =head2 default_rate
219:
220: This is the default maximum number of hits per minute before requests are rejected, for any request not in the L</greylist>.
221:
222: Omitting it will disable the global rate.
223:
224: =head2 retry_after
225:
226: This sets the C<Retry-After> header value, in seconds. It defaults to 61 seconds, which is the minimum allowed value.
227:
228: Note that this does not enforce that a client has waited that amount of time before making a new request, as long as the
229: number of hits per minute is within the allowed rate.
230:
231: =head2 greylist
232:
233: This is a hash reference to the greylist configuration.
234:
235: The keys are network blocks, and the values are an array reference of rates and the tracking type. (A string of space-
236: separated values can be used instead, to make it easier to directly use the configuration from something like
237: L<Config::General>.)
238:
239: The rates are either the maximum number of requests per minute, or "whitelist" or "allowed" to not limit the network
240: block, or "blacklist" or "rejected" to always forbid a network block.
241:
242: (The rate "-1" corresponds to "allowed", and the rate "0" corresponds to "rejected".)
243:
244: A special rate code of "norobots" will reject all requests except for F</robots.txt>, which is allowed at a rate of 60
245: per minute. This will allow you to block a robot but still allow the robot to access the robot rules that say it is
246: disallowed.
247:
248: The tracking type defaults to "ip", which applies limits to individual ips. You can also use "netblock" to apply the
249: limits to all hosts in that network block, or use a name so that limits are applied to all hosts in network blocks
250: with that name.
251:
252: For example:
253:
254: {
255: '127.0.0.1/32' => 'whitelist',
256:
257: '192.168.1.0/24' => 'blacklist',
258:
259: '192.168.2.0/24' => [ 100, 'ip' ],
260:
261: '192.168.3.0/24' => [ 60, 'netblock' ],
262:
263: # All requests from these blocks will limited collectively
264:
265: '10.0.0.0/16' => [ 60, 'group1' ],
266: '172.16.0.0/16' => [ 60, 'group1' ],
267: }
268:
269: Note: the network blocks shown above are examples only.
270:
271: The limit may be larger than L</default_rate>, to allow hosts to exceed the default limit.
272:
273: =head2 file
274:
275: This is the path of the throttle count file used by the L</cache>.
276:
277: It is required unless you are defining your own L</cache>.
278:
279: =head2 cache
280:
281: This is a code reference to a function that increments the cache counter for a key (usually the IP address or net
282: block).
283:
284: If you customise this, then you need to ensure that the counter resets or expires counts after a set period of time,
285: e.g. one minute. If you use a different time interval, then you may need to adjust the L</retry_after> time.
286:
287: =head1 KNOWN ISSUES
288:
289: This does not try and enforce any consistency or block overlapping netblocks. It trusts L<Net::IP::Match::Trie> to
290: handle any overlapping or conflicting network ranges, or to specify exceptions for larger blocks.
291:
292: When configuring the L</greylist> netblocks from a configuration file using L<Config::General>, duplicate netblocks may
293: be merged in unexpected ways, for example
294:
295: 10.0.0.0/16 60 group-1
296:
297: ...
298:
299: 10.0.0.0/16 120 group-2
300:
301: may be merged as something like
302:
303: '10.0.0.0/16' => [ '60 group-1', '120 group-2' ],
304:
305: Some search engine robots may not respect HTTP 429 responses, and will treat these as errors. You may want to make an
306: exception for trusted networks that gives them a higher rate than the default.
307:
308: This does not enforce consistent rates for named blocks. For example, if you specified
309:
310: '10.0.0.0/16' => [ 60, 'named-group' ],
311: '172.16.0.0/16' => [ 100, 'named-group' ],
312:
313: Requests from both netblocks would be counted together, but requests from 10./16 netblock would be rejected after 60
314: requests. This is probably not something that you want.
315:
316: =head1 SUPPORT FOR OLDER PERL VERSIONS
317:
318: This module requires Perl v5.12 or later.
319:
320: Future releases may only support Perl versions released in the last ten years
321:
322: =head1 SOURCE
323:
324: The development version is on github at L<https://github.com/robrwo/Plack-Middleware-Greylist>
325: and may be cloned from L<git://github.com/robrwo/Plack-Middleware-Greylist.git>
326:
327: =head1 BUGS
328:
329: Please report any bugs or feature requests on the bugtracker website
330: L<https://github.com/robrwo/Plack-Middleware-Greylist/issues>
331:
332: When submitting a bug or request, please include a test-file or a
333: patch to an existing test-file that illustrates the bug or desired
334: feature.
335:
336: =head1 AUTHOR
337:
338: Robert Rothenberg <rrwo@cpan.org>
339:
340: The initial development of this module was sponsored by Science Photo
341: Library L<https://www.sciencephoto.com>.
342:
343: =head1 CONTRIBUTOR
344:
345: =for stopwords Gabor Szabo
346:
347: Gabor Szabo <gabor@szabgab.com>
348:
349: =head1 COPYRIGHT AND LICENSE
350:
351: This software is Copyright (c) 2022-2023 by Robert Rothenberg.
352:
353: This is free software, licensed under:
354:
355: The Artistic License 2.0 (GPL Compatible)
356:
357: =cut
358: