File Coverage

blib/lib/Dancer/Plugin/DetectRobots.pm
Criterion Covered Total %
statement 31 57 54.3
branch 5 30 16.6
condition n/a
subroutine 7 8 87.5
pod n/a
total 43 95 45.2


line stmt bran cond sub pod time code
1             package Dancer::Plugin::DetectRobots;
2             {
3             $Dancer::Plugin::DetectRobots::VERSION = '0.6';
4             }
5              
6 1     1   26055 use strict;
  1         2  
  1         49  
7 1     1   5 use warnings;
  1         2  
  1         35  
8              
9 1     1   27022 use Regexp::Assemble qw();
  1         34620  
  1         37  
10              
11 1     1   14290 use Dancer ':syntax';
  1         503893  
  1         8  
12 1     1   2775 use Dancer::Plugin;
  1         1656  
  1         796  
13              
14             my $conf = plugin_setting;
15             my $key = $conf->{session_key} || 'robot_client';
16             my $type = $conf->{type} || 'BASIC';
17              
18             my $reList = _read_list();
19             my $basic = _assemble( $reList, 'basic' );
20             my $extended = _assemble( $reList, 'extended' );
21             my $generic = _assemble( $reList, 'generic' );
22              
23             register is_robot => sub {
24 0     0   0 my $ua = request->user_agent;
25 0         0 my $value = "";
26 0 0       0 if( session( $key ) ) {
27 0         0 $value = session $key;
28             }
29 0 0       0 if( $value eq "NO") {
    0          
    0          
    0          
30 0         0 return 0;
31             }
32             elsif( $value eq "BASIC") {
33 0         0 return 1;
34             }
35             elsif( $value eq "EXTENDED") {
36 0         0 return 1;
37             }
38             elsif( $value eq "GENERIC") {
39 0         0 return 1;
40             }
41              
42 0         0 my $rv = 0;
43              
44 0 0       0 if ( $type eq "BASIC" ) {
    0          
    0          
45 0 0       0 if ( $ua =~ $basic ) {
46 0         0 session $key => $type;
47 0         0 $rv = 1;
48             }
49             else {
50 0         0 session $key => "NO";
51             }
52             }
53             elsif ( $type eq "EXTENDED" ) {
54 0 0       0 if ( $ua =~ $basic ) {
    0          
55 0         0 session $key => $type;
56 0         0 $rv = 1;
57             }
58             elsif ( $ua =~ $extended ) {
59 0         0 session $key => $type;
60 0         0 $rv = 1;
61             }
62             else {
63 0         0 session $key => "NO";
64             }
65             }
66             elsif ( $type eq "GENERIC" ) {
67 0 0       0 if ( $ua =~ $generic ) {
68 0         0 session $key => $type;
69 0         0 $rv = 1;
70             }
71             else {
72 0         0 session $key => "NO";
73             }
74             }
75 0         0 return $rv;
76             };
77              
78             sub _assemble {
79 3     3   10 my ( $list, $use_type ) = @_;
80              
81 3         28 my $ra = Regexp::Assemble->new( flags => 'i' );
82 3         267 foreach my $r ( @{ $list->{$use_type} } ) {
  3         12  
83 794         114368 $ra->add($r);
84             }
85              
86 3         393 return $ra->re;
87             }
88              
89             sub _read_list {
90 1     1   5 my $bots = { basic => [], extended => [], generic => [], };
91 1         2 my $currentType = 'basic';
92              
93 1         9 while () {
94 794         815 chomp;
95 794 50       5359 next unless $_;
96 794 100       1549 $currentType = 'extended' if /\A##\s+EXTENDED/;
97 794 100       1305 $currentType = 'generic' if /\A##\s+GENERIC/;
98              
99 794         872 push @{ $bots->{$currentType} }, $_;
  794         2843  
100             }
101              
102 1         5 return $bots;
103             }
104              
105             register_plugin;
106              
107             1;
108              
109             =pod
110              
111             =head1 NAME
112              
113             Dancer::Plugin::DetectRobots - Dancer plugin to determine if the user is a robot
114              
115             =head1 VERSION
116              
117             version 0.6
118              
119             =head1 DESCRIPTION
120              
121             A plugin for Dancer applications providing a keyword, is_robot,
122             which tests request->user_agent and returns 1 if the user_agent
123             appears to be a robot.
124              
125             To use, simply call is_robot whenever/wherever you would like to
126             know if the user is a bot or a human. For example, if you would
127             like to skip logging for bots
128              
129             if( ! is_robot ) {
130             log_message("your log message");
131             }
132              
133             The plugin has been written to be as efficient as possible. The
134             list of Robot UserAgent strings is only matched against request->user_agent
135             once per session.
136              
137             This is done by storing its results in a session variable so a session
138             engine must be enabled. Session::Cookie would be a poor choice since
139             the optimization will be lost when dealing with a search engine or robot.
140              
141             The first call to is_robot in a session checks to see if the session
142             variable has been set, if if it has, it returns 0 or 1 based upon the
143             session variable.
144              
145             By default the session variable key is "robot_client"
146              
147             The check is done against the list of UserAgent strings used
148             by AWStats. There are three levels of testing, BASIC which matches
149             AWStats LevelForRobotsDetection=1, EXTENDED which matches
150             LevelForRobotsDetection=2 and GENERIC which is a very lax test.
151              
152             By default the level is set to BASIC
153              
154             You can change these settings. See L
155              
156             =head1 NAME
157              
158             Dancer::Plugin::DetectRobots - A plugin to detect if the HTTP_USER_AGENT
159             matches a known search engine or robot string.
160              
161             =head1 SYNOPSYS
162              
163             In your configuration, make sure you have session configured. Of course you can
164             use any session engine.
165              
166             session: "simple"
167              
168             In your Dancer App
169              
170             use Dancer;
171             use Dancer::Plugin::DetectRobots;
172              
173             if( is_robot ) {
174             ...
175             }
176             else {
177             processing goes here
178             ...
179             }
180              
181             =head1 METHODS
182              
183             =head2 is_robot
184              
185             # returns 1 if the HTTP_USER_AGENT as returned by request->user_agent
186             # matches one of the strings used by AWStats to detect search engines and
187             # bots
188              
189             if ( is_robot ) {
190             ..
191             }
192              
193             =head1 CONFIGURATION
194              
195             With no configuration whatsoever, the plugin will work fine, thus contributing
196             to the I motto of Dancer.
197              
198             =head2 configuration default values
199              
200             These are the default values. See below for a description of the keys
201              
202             plugins:
203             DetectRobots:
204             session_key: robot_client
205             type: BASIC
206              
207             =head2 configuration description
208              
209             =over
210              
211             =item session_key
212              
213             The name of the session key which is used to store the results of the
214             robot test lookup
215              
216             B : C
217              
218             =item type
219              
220             This value determinse which of 3 lists the search tests against.
221             BASIC - this is the same as AWStats LevelForRobotsDetection=1
222             It tests for major search engines and know bots
223             EXTENDED - this is the same as AWStats LevelForRobotsDetection=2
224             It tests for major search engines and know bots as in BASIC plus
225             about 800 minor bots and search engines.
226             GENERIC - this is a very simple test that only looks for a couple of
227             dozen generic bot strings, e.g. robot, crawl, hunter, spider ...
228              
229             B : C
230              
231             =back
232              
233             =head1 COPYRIGHT
234              
235             This software is copyright (c) 2014 by Dan Busarow .
236              
237             =head1 LICENCE
238              
239             This is free software; you can redistribute it and/or modify it under
240             the same terms as the Perl 5 programming language system itself.
241              
242             =head1 AUTHORS
243              
244             This module has been written by Dan Busarow
245             based upon Plack::Middleware::DetectRobots by Heiko Jansen
246              
247             =head1 SEE ALSO
248              
249             L
250              
251             =head1 AUTHOR
252              
253             Dan Busarow
254              
255             =head1 COPYRIGHT AND LICENSE
256              
257             This software is copyright (c) 2014 by Dan Busarow
258              
259             This is free software; you can redistribute it and/or modify it under
260             the same terms as the Perl 5 programming language system itself.
261              
262             =cut
263              
264              
265             __DATA__