File Coverage

lib/CGI/Application/Plugin/Throttle.pm
Criterion Covered Total %
statement 97 114 85.0
branch 37 48 77.0
condition 12 23 52.1
subroutine 17 19 89.4
pod 4 5 80.0
total 167 209 79.9


line stmt bran cond sub pod time code
1             package CGI::Application::Plugin::Throttle;
2              
3             =head1 NAME
4              
5             CGI::Application::Plugin::Throttle - Rate-Limiting for CGI::Application
6              
7              
8              
9             =head1 SYNOPSIS
10              
11             use CGI::Application::Plugin::Throttle;
12            
13            
14             # Your application
15             sub setup {
16            
17             ...
18            
19             # Create a redis handle
20             my $redis = Redis->new();
21            
22             # Configure throttling
23             $self->throttle()->configure(
24             redis => $redis,
25             prefix => "REDIS:KEY:PREFIX",
26             limit => 100,
27             period => 60,
28             exceeded => "slow_down_champ"
29             );
30            
31             ...
32            
33             }
34            
35             sub throttle_keys {
36             my $self = shift;
37            
38             # do not throttle at all when returning `undef`
39             return undef if %ENV{DEVELOPMENT};
40            
41             return (
42             remote_addr => $ENV{REMOTE_ADDR},
43            
44             maybe
45             pwd_recover => $self->_is_password_recovery
46             );
47             }
48            
49             sub throttle_spec {
50             { pwd_recover => 1 } =>
51             { limit => 5, period => 300, exceeded => 'stay_out' }
52            
53             { remote_addr => '127.0.0.1' }
54             { limit => 10_000, period => 1, exceeded => 'get_home' }
55             }
56              
57             =cut
58              
59              
60              
61             =head1 VERSION
62              
63             This is version '0.7'
64              
65              
66              
67             =head1 DESCRIPTION
68              
69             This module allows you to enforce a throttle on incoming requests to your
70             application, based upon the remote IP address, or other parameters.
71              
72             This module stores a count of accesses in a Redis key-store, and once hits
73             exceed the specified threshold the user will be redirected to the run-mode
74             you've specified.
75              
76              
77              
78             =head1 POTENTIAL ISSUES / CONCERNS
79              
80             Users who share IP addresses, because they are behind a common-gateway for
81             example, will all suffer if the threshold is too low. We attempt to mitigate
82             this by building the key using a combination of the remote IP address, and the
83             remote user-agent.
84              
85             This module has added great flexibillity to change the parameters being used for
86             generating the redis key. It now also has the posibillity to select different
87             throttle rules specified by filters that need to match the parameters.
88              
89             =cut
90              
91              
92              
93 7     7   739288 use strict;
  7         50  
  7         171  
94 7     7   30 use warnings;
  7         13  
  7         244  
95              
96             our $VERSION = '0.7';
97              
98 7     7   3226 use Digest::SHA qw/sha512_base64/;
  7         18672  
  7         646  
99              
100              
101             =head1 METHODS
102              
103             =cut
104              
105              
106              
107             =head2 C
108              
109             Force the C method into the caller's namespace, and configure the
110             prerun hook which is used by L.
111              
112             =cut
113              
114             sub import
115             {
116 7     7   61 my $pkg = shift;
117 7         16 my $callpkg = caller;
118              
119             {
120             ## no critic
121 7     7   49 no strict qw(refs);
  7         13  
  7         8498  
  7         14  
122             ## use critic
123 7         14 *{ $callpkg . '::throttle' } = \&throttle;
  7         36  
124             }
125              
126 7 100       5319 if ( UNIVERSAL::can( $callpkg, "add_callback" ) )
127             {
128 2         9 $callpkg->add_callback( 'prerun' => \&throttle_callback );
129             }
130              
131             }
132              
133              
134              
135             =head2 C
136              
137             This method is used internally, and not expected to be invoked externally.
138              
139             The defaults are setup here, although they can be overridden in the
140             L method.
141              
142             =cut
143              
144             sub new
145             {
146 34     34 1 129 my ( $proto, %supplied ) = (@_);
147 34   33     135 my $class = ref($proto) || $proto;
148              
149 34         65 my $self = {};
150              
151             #
152             # Configure defaults.
153             #
154 34         77 $self->{ 'limit' } = 100;
155 34         61 $self->{ 'period' } = 60;
156              
157             #
158             # The redis key-prefix.
159             #
160 34   50     90 $self->{ 'prefix' } = $supplied{ 'prefix' } || "THROTTLE";
161              
162             #
163             # Run mode to redirect to on exceed.
164             #
165 34         56 $self->{ 'exceeded' } = "slow_down";
166              
167             #
168             # Set the code reference for getting the throttle keys
169             #
170 34   100     94 $self->{ 'throttle_keys_callback' } = $supplied{ 'throttle_keys_callback' }
171             || \&_get_default_throttle_keys;
172            
173             #
174             # Set the code reference for getting throttle specific rules
175             #
176 34         55 $self->{ 'throttle_spec_callback' } = $supplied{ 'throttle_spec_callback' };
177              
178 34         56 bless( $self, $class );
179 34         111 return $self;
180             }
181              
182              
183              
184             =head2 C
185              
186             Gain access to an instance of this class. This is the method by which you can
187             call methods on this plugin from your L derived-class.
188              
189             =cut
190              
191             sub throttle
192             {
193 64     64 1 197999 my $cgi_app = shift;
194 64 100       181 return $cgi_app->{ __throttle_obj } if $cgi_app->{ __throttle_obj };
195            
196             #
197             # Setup the prefix of the Redis keys to default to the name of
198             # the CGI::Application.
199             #
200             # This avoids collisions if multiple applications are running on
201             # the same host, and the developer won't need to explicitly setup
202             # distinct prefixes.
203             #
204             my $throttle = $cgi_app->{ __throttle_obj } =
205 34         317 __PACKAGE__->new(
206             prefix => ref($cgi_app),
207             throttle_keys_callback => $cgi_app->can('throttle_keys'),
208             throttle_spec_callback => $cgi_app->can('throttle_spec'),
209             )
210             ;
211              
212 34         170 return $throttle;
213             }
214              
215              
216              
217             # sub _get_redis_key>
218             #
219             # Build and return the Redis key to use for this particular remote request.
220             #
221             # The key is built from the C string set in L method,
222             # along with:
223             #
224             # * The remote IP address of the client.
225             # * The remote HTTP Basic-Auth username of the client.
226             # * The remote User-Agent.
227             #
228             sub _get_redis_key
229             {
230 0     0   0 my $self = shift;
231 0         0 my $key = $self->{ 'prefix' };
232              
233             #
234             # Build up the key based on the:
235             #
236             # 1. User using HTTP Basic-Auth, if present.
237             # 2. The remote IP address.
238             # 3. The remote user-agent.
239             #
240 0         0 foreach my $env (qw! REMOTE_USER REMOTE_ADDR HTTP_USER_AGENT !)
241             {
242 0 0       0 if ( $ENV{ $env } )
243             {
244 0         0 $key .= ":";
245 0         0 $key .= $ENV{ $env };
246             }
247             }
248              
249 0         0 return ($key);
250             }
251              
252              
253              
254             =head2 C
255              
256             Returns two values: the number of times the remote client has hit a run mode,
257             along with the maximum allowed visits:
258              
259             =for example begin
260              
261             sub your_run_mode
262             {
263             my ($self) = (@_);
264            
265             my( $count, $max ) = $self->throttle()->count();
266             return( "$count visits seen - maximum is $max." );
267             }
268              
269             =for example end
270              
271             =head3 warning
272              
273             This method must be called in list context, in scalar context, the result will
274             always be '2'.
275              
276             =cut
277              
278             sub count
279             {
280 0     0 1 0 my ($self) = (@_);
281            
282 0         0 my $keys = $self->_get_keys();
283 0         0 my $rule = $self->_get_throttle_rule( $keys );
284              
285 0         0 my $visits = 0;
286 0         0 my $max = $rule->{ 'limit' };
287              
288 0 0       0 if ( $self->{ 'redis' } )
289             {
290 0         0 my $digest_key = $self->_digest_key_in_timeslot($keys, $rule->{period});
291 0         0 $visits = $self->{ 'redis' }->llen($digest_key);
292             }
293 0         0 return ( $visits, $max );
294             }
295              
296              
297              
298             # sub throttle_callback
299             #
300             # This method is invoked by L, as a hook.
301             #
302             # The method is responsible for determining whether the remote client which
303             # triggered the current request has exceeded their request threshold.
304             #
305             # If the client has made too many requests their intended run-mode will be
306             # changed to redirect them.
307             #
308             sub throttle_callback
309             {
310 30     30 0 62639 my $cgi_app = shift;
311 30         59 my $self = $cgi_app->throttle();
312              
313             #
314             # Get the redis handle
315             #
316 30   50     71 my $redis = $self->{ 'redis' } || return;
317              
318             #
319             # The key relating to this user.
320             #
321 30         62 my $keys = $self->_get_keys();
322              
323             #
324             # Get throttle rule
325             #
326 30         63 my $rule = $self->_get_throttle_rule( $keys );
327            
328             #
329             # If too many redirect.
330             #
331 30 100       62 if ( my $exceeded = $self->_is_exceeded($rule, $keys) )
332             {
333 10         32 $cgi_app->prerun_mode( $exceeded );
334 10         116 return;
335             }
336              
337             #
338             # Otherwise if we've been called with a mode merge it in
339             #
340 20 50       50 if ( $cgi_app->query->url_param( $cgi_app->mode_param ) )
341             {
342 0         0 $cgi_app->prerun_mode(
343             $cgi_app->query->url_param( $cgi_app->mode_param ) );
344             }
345              
346             }
347              
348              
349              
350             =head2 C
351              
352             This method is what the user will invoke to configure the throttle-limits.
353              
354             It is expected that within the users L
355             L method there will be code similar to this:
356              
357             =for example begin
358              
359             sub setup {
360             my $self = shift;
361            
362             my $r = Redis->new();
363            
364             $self->throttle()->configure( redis => $r,
365             # .. other options here
366             )
367             }
368              
369             =for example end
370              
371             The arguments hash contains the following known keys:
372              
373             =over
374              
375             =item C
376              
377             A L handle object.
378              
379             =item C
380              
381             The maximum number of requests that the remote client may make, in the given
382             period of time.
383              
384             =item C
385              
386             The period of time which requests are summed for. The period is specified in
387             seconds and if more than C requests are sent then the client will be
388             redirected.
389              
390             =item C
391              
392             This module uses L to store the counts of client requests. Redis is a
393             key-value store, and each key used by this module is given a prefix to avoid
394             collisions. You may specify your prefix here.
395              
396             The prefix will default to the name of your application class if it isn't set
397             explicitly, which should avoid collisions if you're running multiple
398             applications on the same host.
399              
400             =item C
401              
402             The C to redirect the client to, when their request-count has exceeded
403             the specified limit.
404              
405             =back
406              
407             =cut
408              
409             sub configure
410             {
411 33     33 1 784 my ( $self, %args ) = (@_);
412              
413             #
414             # The rate-limiting number of requests per time period
415             #
416 33 100       103 $self->{ 'limit' } = $args{ 'limit' } if ( $args{ 'limit' } );
417 33 100       117 $self->{ 'period' } = $args{ 'period' } if ( $args{ 'period' } );
418              
419             #
420             # Redis key-prefix
421             #
422 33 100       79 $self->{ 'prefix' } = $args{ 'prefix' } if ( $args{ 'prefix' } );
423              
424             #
425             # The handle to Redis for state-tracking
426             #
427 33 100       112 $self->{ 'redis' } = $args{ 'redis' } if ( $args{ 'redis' } );
428              
429             #
430             # The run-mode to redirect to on violation.
431             #
432 33 50       99 $self->{ 'exceeded' } = $args{ 'exceeded' } if ( $args{ 'exceeded' } );
433              
434             }
435              
436             #
437             # This is the original default list of values
438             #
439             sub _get_default_throttle_keys
440             {
441             remote_user => $ENV{ REMOTE_USER },
442             remote_addr => $ENV{ REMOTE_ADDR },
443             http_user_agent => $ENV{ HTTP_USER_AGENT },
444 16     16   54 }
445              
446             # returns a 'key'
447             #
448             # This routine will take the normal key and adds a 'timeslot' to it, so all keys
449             # will now fall in the same group during the time interval of the 'period'
450             # Since the key is becomming uglier, we just base64 encode the sha512 hash
451             #
452             sub _digest_key_in_timeslot
453             {
454 30     30   56 my ($self, $keys, $period ) = @_;
455 30         63 my @throttle_keys = @$keys;
456            
457             # we need to preserve order and can not use random order of a hash
458 30         39 my (@keys, @vals);
459 30         70 for ( my $i =0 ; $i < @throttle_keys; )
460             {
461 90         132 push @keys, $throttle_keys[$i++];
462 90   100     217 push @vals, $throttle_keys[$i++] || '* * *';
463             }
464 30         81 my $key_string = join q{:}, @vals;
465            
466 30         88 $key_string .= q{#} . int(time() / $period );
467              
468 30         452 sha512_base64( $key_string )
469             }
470              
471             # returns the 'keys' relating to the current user / session etc.
472             #
473             sub _get_keys
474             {
475 33     33   61 my $self = shift;
476 33         75 my @throttle_keys = $self->{ throttle_keys_callback }->();
477              
478             # return undef, as an explicit instruction to ignote throttling at all
479 33 100 66     131 return undef if scalar(@throttle_keys) == 1 && !defined($throttle_keys[0]);
480            
481             # prepend the list with the prefix if missing
482             unshift @throttle_keys, (prefix => $self->{ prefix } )
483 32 50       171 unless exists {@throttle_keys}->{ prefix };
484            
485 32         93 return \@throttle_keys;
486             }
487              
488             # return a set of key/value pairs for a specific key
489             #
490             sub _get_throttle_rule
491             {
492 40     40   5438 my $self = shift;
493 40         53 my $keys = shift;
494              
495 40 50       93 return unless defined $keys;
496              
497 40         84 my $default_rule = $self->_get_default_throttle_rule();
498 40         84 my $special_rule = $self->_get_special_throttle_rule( $keys );
499 40         150 my $throttle_rule = { %$default_rule, %$special_rule };
500              
501 40         148 return $throttle_rule
502             }
503              
504             # returns the default set of rules, set by $throttle->configure
505             #
506             sub _get_default_throttle_rule
507             {
508 40     40   54 my $self = shift;
509            
510             my $rule = {
511             limit => $self->{ 'limit' },
512             period => $self->{ 'period' },
513 40         112 exceeded => $self->{ 'exceeded' },
514             };
515 40         92 return $rule;
516             }
517              
518             # returns the first rule whre all the filters are matched against the keys
519             #
520             sub _get_special_throttle_rule
521             {
522 40     40   63 my ( $self, $keys ) = @_;
523 40 100       88 return { } unless $self->{ throttle_spec_callback };
524            
525 23         49 my @spec = $self->{ throttle_spec_callback }->();
526            
527             # set initial rule to an empty set, or the last spec if there is an odd list
528 23 100       147 my $rule = scalar @spec %2 ? pop @spec : {};
529              
530 23         72 while ( my($filter, $rule ) = splice @spec, 0 , 2 )
531             {
532 34 100       67 next unless $self->_match_all( $filter, $keys );
533 20         78 return $rule
534             }
535            
536 3         7 return $rule;
537             }
538              
539             sub _match_all
540             {
541 34     34   62 my ($self, $filter, $keys) = @_;
542            
543 34         89 my $lookup = { @$keys };
544            
545 34         85 foreach ( keys %$filter )
546             #
547             # In natural language, not in Perl, the below test does match:
548             #
549             # "if both are the same"
550             #
551             # that is, under the precondition that both exists,
552             # that both defined strings are the same, or both are undefined
553             #
554             # normally,in string comparision, `undef` is compared as an empty string
555             #
556             # take a class in boolean algebra and learn about The Morgan etc
557             #
558             # we do not match if:
559             #
560             {
561 37 100       93 return unless exists $lookup->{$_};
562            
563             next if
564             ( defined $filter->{$_} && $filter->{$_} )
565             eq
566 27 100 33     166 ( defined $lookup->{$_} && $lookup->{$_} );
      33        
567            
568             return if
569             ( defined $filter->{$_} )
570             ||
571 4 50 33     22 ( defined $lookup->{$_} );
572            
573             }
574             return !undef
575 20         93 }
576              
577             # returns the runmode if the this is true for the given rule and key
578             #
579             sub _is_exceeded
580             {
581 30     30   53 my ($self, $rule, $keys) = @_;
582            
583 30 50       59 return unless defined $keys;
584            
585 30 50       56 my $redis = $self->{ 'redis' } or return;
586              
587             #
588             # Use a timeslot defined digest key instead
589             #
590 30         54 my $digest_key = $self->_digest_key_in_timeslot($keys, $rule->{period});
591              
592             #
593             # Increase the count, and set the expiry.
594             #
595 30         696 my $cur = $redis->lpush($digest_key, 1);
596 30 100       11069 $redis->expire( $digest_key, $rule->{ 'period' } ) if $cur == 1;
597              
598             #
599             # If limit exceeded, redirect.
600             #
601 30 100       1013 return $rule->{ exceeded } if $cur > $rule->{ limit };
602            
603             return
604 20         56 }
605              
606              
607             =head1 CALLBACKS
608              
609             =head2 C
610              
611             This callback will be called to give the developer the option to use alternative
612             keys. It must return a list of key value pairs, and the plugin will preserve the
613             order. Default these are C, C, and C.
614              
615             =for example begin
616              
617             sub throttle_keys {
618             remote_user => $ENV{ REMOTE_USER },
619             remote_addr => $ENV{ REMOTE_ADDR },
620             http_user_agent => $ENV{ HTTP_USER_AGENT },
621             }
622              
623             =for example end
624              
625             This callback can be used to do more fancy things and add a key for run-modes as
626             in:
627              
628             =for example begin
629              
630             sub throttle_keys {
631             my $self = shift;
632            
633             return (
634             runmode_grp => $self->_get_runmode_group(),
635             ... => ...
636             )
637             }
638              
639             =for example end
640              
641             Returning a explicit C means that no throttling will happen, at all; If
642             the call back returns an empty list, all incoming request will be throttled and
643             no difference will be made from where the request comes from.
644              
645             =for example begin
646              
647             sub throttle_keys {
648             return undef if $ENV{REMOTE_USER} eq 'superuser';
649             return ( );
650             }
651              
652             =for example end
653              
654             =head2 C
655              
656             This callback can be used to specify different set of throttle rules based on
657             filters that must match with the throttle keys. This callback must return a list
658             of filter/settings pairs that will be checked against the current throttle keys.
659             It can have a additional last set of throttle rules (it is an odd sized list),
660             which will then be used as a default.
661              
662             The selected rules willbe merged with the settings from the Cconfigure> call, or
663             the defaults from the module itself.
664              
665             Keys mentioned in the filter must be present in the current throttle keys/params
666             in order to match. The value can be C, meaning that the throttle param
667             must exist and be undefined.
668              
669             =for example begin
670              
671             sub throttle_spec {
672             { remote_user => undef } =>
673             {
674             limit => 5,
675             exceeded => 'we_dont_like_strangers'
676             },
677            
678             { runmode_grp => 'pdf_report' } =>
679             {
680             limit => 10,
681             period => 3600,
682             exceeded => 'these_are_very_expensive'
683             }
684            
685             {
686             limit => rnd * 10 # making people go crazy why?
687             }
688             }
689              
690             =for example end
691              
692             =head1 AUTHOR
693              
694             Steve Kemp
695              
696             =head1 CONTRIBUTORS
697              
698             Theo van Hoesel
699              
700             =head1 COPYRIGHT AND LICENSE
701              
702             Copyright (C) 2014..2020 Steve Kemp .
703              
704             This library is free software. You can modify and or distribute it under the
705             same terms as Perl itself.
706              
707             =cut
708              
709              
710              
711             1;