blib/lib/Plack/Middleware/Greylist.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 88 | 90 | 97.7 |
branch | 23 | 30 | 76.6 |
condition | 13 | 20 | 65.0 |
subroutine | 15 | 15 | 100.0 |
pod | 2 | 2 | 100.0 |
total | 141 | 157 | 89.8 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package Plack::Middleware::Greylist; 2: 3: # ABSTRACT: throttle requests with different rates based on net blocks 4: 5: # RECOMMEND PREREQ: Cache::FastMmap 6: # RECOMMEND PREREQ: Ref::Util::XS 7: 8: use v5.12; 9: use warnings; 10: 11: use parent qw( Plack::Middleware ); 12: 13: use HTTP::Status qw/ HTTP_FORBIDDEN HTTP_TOO_MANY_REQUESTS /; 14: use List::Util 1.29 qw/ pairs /; 15: use Module::Load qw/ load /; 16: use Net::IP::Match::Trie; 17: use Plack::Util; 18: use Plack::Util::Accessor qw/ default_rate rules cache file _match greylist retry_after /; 19: use Ref::Util qw/ is_plain_arrayref /; 20: use Time::Seconds qw/ ONE_MINUTE /; 21: 22: our $VERSION = 'v0.4.3'; 23: 24: 25: sub prepare_app { 26: my ($self) = @_; 27: 28: $self->default_rate(-1) unless defined $self->default_rate; 29: 30: die "default_rate must be a positive integer" unless $self->default_rate =~ /^[1-9][0-9]*$/; 31: 32: $self->retry_after( ONE_MINUTE + 1 ) unless defined $self->retry_after; 33: die "retry_after must be a positive integer greater than ${ \ONE_MINUTE} seconds" 34: unless $self->retry_after =~ /^[1-9][0-9]*$/ && $self->retry_after > ONE_MINUTE; 35: 36: unless ( $self->cache ) { 37: 38: my $file = $self->file // die "No cache was set"; 39: 40: load Cache::FastMmap; 41: 42: my $cache = Cache::FastMmap->new( 43: share_file => "$file", 44: init_file => 1, 45: serializer => '', 46: expire_time => ONE_MINUTE, 47: ); 48: 49: $self->cache( 50: sub { 51: my ($ip) = @_; 52: return $cache->get_and_set( 53: $ip, 54: sub { 55: my ( $key, $count, $opts ) = @_; 56: $count //= 0; 57: return ( $count + 1, { expire_on => $opts->{expire_on} } ); 58: } 59: ); 60: } 61: ); 62: 63: } 64: 65: my $match = Net::IP::Match::Trie->new; 66: 67: $self->_match( sub { return $match->match_ip(@_) } ); 68: 69: my @blocks; 70: 71: if ( my $greylist = $self->greylist ) { 72: push @blocks, ( %{ $greylist } ); 73: } 74: 75: $self->rules( my $rules = {} ); 76: 77: my %codes = ( whitelist => -1, blacklist => 0 ); 78: my %types = ( ip => '', netblock => 1 ); 79: 80: for my $line ( pairs @blocks ) { 81: 82: my ( $block, $rule ) = @{$line}; 83: $rule = [ split /\s+/, $rule ] unless is_plain_arrayref($rule); 84: 85: my ( $rate, $type ) = @{ $rule }; 86: 87: $rate //= $codes{blacklist}; 88: $rate = $codes{$rate} if exists $codes{$rate}; 89: 90: $type //= "ip"; 91: my $mask = $types{$type} // $type; 92: $mask = $block if $mask eq "1"; 93: 94: $rules->{$block} = [ $rate, $mask ]; 95: $match->add( $block => [$block] ); 96: } 97: 98: } 99: 100: sub call { 101: my ( $self, $env ) = @_; 102: 103: my $ip = $env->{REMOTE_ADDR}; 104: my $name = $self->_match->($ip); 105: my $rule = $name ? $self->rules->{$name} : [ $self->default_rate ]; 106: 107: my $rate = $rule->[0]; 108: if ( $rate >= 0 ) { 109: 110: my $limit = $rate == 0; 111: 112: my ($hits) = $self->cache->( $rule->[1] || $ip ); 113: $limit = $hits > $rate ? $hits : 0; 114: 115: if ($limit) { 116: 117: my $block = $name || "default"; 118: my $msg = "Rate limiting ${ip} after ${limit}/${rate} for ${block}"; 119: 120: if ( my $log = $env->{'psgix.logger'} ) { 121: $log->( { message => $msg, level => 'warn' } ); 122: } 123: else { 124: $env->{'psgi.errors'}->print($msg); 125: } 126: 127: if ( $rate == 0 ) { 128: 129: return [ HTTP_FORBIDDEN, [], ["Forbbidden"] ]; 130: 131: } 132: else { 133: 134: return [ 135: HTTP_TOO_MANY_REQUESTS, 136: [ 137: "Retry-After" => $self->retry_after, 138: ], 139: ["Too Many Requests"] 140: ]; 141: 142: } 143: } 144: 145: } 146: 147: return $self->app->($env); 148: } 149: 150: 151: 1; 152: 153: __END__ 154: 155: =pod 156: 157: =encoding UTF-8 158: 159: =head1 NAME 160: 161: Plack::Middleware::Greylist - throttle requests with different rates based on net blocks 162: 163: =head1 VERSION 164: 165: version v0.4.3 166: 167: =head1 SYNOPSIS 168: 169: use Plack::Builder; 170: 171: builder { 172: 173: enable "Greylist", 174: file => sprintf('/run/user/%u/greylist', $>), # cache file 175: default_rate => 250, 176: greylist => { 177: '192.168.0.0/24' => 'whitelist', 178: '172.16.1.0/25' => [ 100, 'netblock' ], 179: }; 180: 181: } 182: 183: =head1 DESCRIPTION 184: 185: This middleware will apply rate limiting to requests, depending on the requestor netblock. 186: 187: Hosts that exceed their configured per-minute request limit will be rejected with HTTP 429 errors. 188: 189: =head2 Log Messages 190: 191: Rejections will be logged with a message of the form 192: 193: Rate limiting $ip after $hits/$rate for $netblock 194: 195: for example, 196: 197: Rate limiting 172.16.0.10 after 225/250 for 172.16.0.0/24 198: 199: Note that the C<$netblock> for the default rate is simply "default", e.g. 200: 201: Rate limiting 192.168.0.12 after 101/100 for default 202: 203: This will allow you to use something like L<fail2ban> to block repeat offenders, since bad 204: robots are like houseflies that repeatedly bump against closed windows. 205: 206: =head1 ATTRIBUTES 207: 208: =head2 default_rate 209: 210: This is the default maximum number of hits per minute before requests are rejected, for any request not in the L</greylist>. 211: 212: Omitting it will disable the global rate. 213: 214: =head2 retry_after 215: 216: This sets the C<Retry-After> header value, in seconds. It defaults to 61 seconds, which is the minimum allowed value. 217: 218: Note that this does not enforce that a client has waited that amount of time before making a new request, as long as the 219: number of hits per minute is within the allowed rate. 220: 221: =head2 greylist 222: 223: This is a hash reference to the greylist configuration. 224: 225: The keys are network blocks, and the values are an array reference of rates and the tracking type. (A string of space- 226: separated values can be used instead, to make it easier to directly use the configuration from something like 227: L<Config::General>.) 228: 229: The rates are either the maximum number of requests per minute, or "whitelist" to not limit the network block, or 230: "blacklist" to always forbid a network block. 231: 232: (The rate "-1" corresponds to "whitelist", and the rate "0" corresponds to "blacklist".) 233: 234: The tracking type defaults to "ip", which applies limits to individual ips. You can also use "netblock" to apply the 235: limits to all hosts in that network block, or use a name so that limits are applied to all hosts in network blocks 236: with that name. 237: 238: For example: 239: 240: { 241: '127.0.0.1/32' => 'whitelist', 242: 243: '192.168.1.0/24' => 'blacklist', 244: 245: '192.168.2.0/24' => [ 100, 'ip' ], 246: 247: '192.168.3.0/24' => [ 60, 'netblock' ], 248: 249: # All requests from these blocks will limited collectively 250: 251: '10.0.0.0/16' => [ 60, 'group1' ], 252: '172.16.0.0/16' => [ 60, 'group1' ], 253: } 254: 255: Note: the network blocks shown above are examples only. 256: 257: The limit may be larger than L</default_rate>, to allow hosts to exceed the default limit. 258: 259: =head2 file 260: 261: This is the path of the throttle count file used by the L</cache>. 262: 263: It is required unless you are defining your own L</cache>. 264: 265: =head2 cache 266: 267: This is a code reference to a function that increments the cache counter for a key (usually the IP address or net 268: block). 269: 270: If you customise this, then you need to ensure that the counter resets or expires counts after a set period of time, 271: e.g. one minute. If you use a different time interval, then you may need to adjust the L</retry_after> time. 272: 273: =head1 KNOWN ISSUES 274: 275: This does not try and enforce any consistency or block overlapping netblocks. It trusts L<Net::IP::Match::Trie> to 276: handle any overlapping or conflicting network ranges, or to specify exceptions for larger blocks. 277: 278: When configuring the L</greylist> netblocks from a configuration file using L<Config::General>, duplicate netblocks may 279: be merged in unexpected ways, for example 280: 281: 10.0.0.0/16 60 group-1 282: 283: ... 284: 285: 10.0.0.0/16 120 group-2 286: 287: may be merged as something like 288: 289: '10.0.0.0/16' => [ '60 group-1', '120 group-2' ], 290: 291: Some search engine robots may not respect HTTP 429 responses, and will treat these as errors. You may want to make an 292: exception for trusted networks that gives them a higher rate than the default. 293: 294: This does not enforce consistent rates for named blocks. For example, if you specified 295: 296: '10.0.0.0/16' => [ 60, 'named-group' ], 297: '172.16.0.0/16' => [ 100, 'named-group' ], 298: 299: Requests from both netblocks would be counted together, but requests from 10./16 netblock would be rejected after 60 300: requests. This is probably not something that you want. 301: 302: =head1 SUPPORT FOR OLDER PERL VERSIONS 303: 304: Since v0.4.0, the this module requires Perl v5.12 or later. 305: 306: If you need this module on Perl v5.10, please use one of the v0.3.x 307: versions of this module. Significant bug or security fixes may be 308: backported to those versions. 309: 310: =head1 SOURCE 311: 312: The development version is on github at L<https://github.com/robrwo/Plack-Middleware-Greylist> 313: and may be cloned from L<git://github.com/robrwo/Plack-Middleware-Greylist.git> 314: 315: =head1 BUGS 316: 317: Please report any bugs or feature requests on the bugtracker website 318: L<https://github.com/robrwo/Plack-Middleware-Greylist/issues> 319: 320: When submitting a bug or request, please include a test-file or a 321: patch to an existing test-file that illustrates the bug or desired 322: feature. 323: 324: =head1 AUTHOR 325: 326: Robert Rothenberg <rrwo@cpan.org> 327: 328: The initial development of this module was sponsored by Science Photo 329: Library L<https://www.sciencephoto.com>. 330: 331: =head1 CONTRIBUTOR 332: 333: =for stopwords Gabor Szabo 334: 335: Gabor Szabo <gabor@szabgab.com> 336: 337: =head1 COPYRIGHT AND LICENSE 338: 339: This software is Copyright (c) 2022-2023 by Robert Rothenberg. 340: 341: This is free software, licensed under: 342: 343: The Artistic License 2.0 (GPL Compatible) 344: 345: =cut 346: |