| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTTP::UserAgentString::Parser; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
HTTP::UserAgentStringParser - User-Agent string parser |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $p = HTTP::UserAgentString::Parser->new(); |
|
10
|
|
|
|
|
|
|
my $ua = $p->parse("Opera/9.80 (X11; Linux x86_64; U; en) Presto/2.9.168 Version/11.50"); |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
if ($ua->isRobot) { |
|
13
|
|
|
|
|
|
|
print "It's a robot: ", $ua->name, "\n"; |
|
14
|
|
|
|
|
|
|
} else { |
|
15
|
|
|
|
|
|
|
print "It's a browser: ", $ua->name, " - version: ", $ua->version, "\n"; |
|
16
|
|
|
|
|
|
|
} |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
C is a Perl API for user-agent-string.info. It |
|
21
|
|
|
|
|
|
|
can be used to parse user agent strings and determine whether the agent is a robot, |
|
22
|
|
|
|
|
|
|
a normal browser, mobile browser, e-mail client. It can also tell browser version, |
|
23
|
|
|
|
|
|
|
company that makes it, home page URL. In most of the cases it can also tell in which |
|
24
|
|
|
|
|
|
|
OS the browser is running. |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
HTTP::UserAgentString::Parser will download the .ini file provided by user-agent-string.info |
|
27
|
|
|
|
|
|
|
which contains all the information to do the parsing. The file will be cached by default |
|
28
|
|
|
|
|
|
|
for 7 days. After that time, it will check whether a new version was released. The |
|
29
|
|
|
|
|
|
|
default cache time can be modified, as well as the cache path (default is /tmp). A |
|
30
|
|
|
|
|
|
|
cache reload can also be forced. |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
In order to parse a string, a parse() method is provided which returns an object |
|
33
|
|
|
|
|
|
|
of classes HTTP::UserAgentString::Browser or HTTP::UserAgentString::Robot. Both classes |
|
34
|
|
|
|
|
|
|
have accesors to determine agent capabilities. In case the string does not match any known |
|
35
|
|
|
|
|
|
|
browser or robot, undef() is returned. |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$p = HTTP::UserAgentString::Parser->new(%opts) |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Valid options are: |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
cache_max_age: in seconds (default is 7 days) |
|
44
|
|
|
|
|
|
|
cache_dir: path must be writeable - default is /tmp |
|
45
|
|
|
|
|
|
|
parse_cache_size: size of parsing cache in number of elements. Default is 100_000 |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 METHODS |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=over 4 |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=item $agent = $p->parse($string) |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Parses a User-Agent string and returns a HTTP::UserAgentString::Browser or |
|
54
|
|
|
|
|
|
|
HTTP::UserAgentString::Robot object, or undef() if no matches where found. |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=item $p->updateDB($force) |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Updates the cache file from user-agent-string.info. If force is false or undef(), the |
|
59
|
|
|
|
|
|
|
check is only executed if the cache file has expired. If force is true, the method |
|
60
|
|
|
|
|
|
|
checks whether there is a new file and downloads it accordingly. |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=item $p->getCurrentVersion() |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Retrieves the current database version from user-agent-string.info. Returns the version |
|
65
|
|
|
|
|
|
|
number or undef() if an error occurs. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item $p->getCachedVersion() |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Returns the version of the cached .ini file, or undef() if there is no cached file. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=item $p->cache_file() |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Local path to the cached .ini file. |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item $p->version_file() |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Local path to file that contains the version of the cached .ini file. |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=back |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
See L and L for description |
|
84
|
|
|
|
|
|
|
of the objects returned by parse(). |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Copyright (c) 2011 Nicolas Moldavsky (http://www.e-planning.net/) |
|
89
|
|
|
|
|
|
|
This is free software. You can redistribute it or modify it under the terms of the |
|
90
|
|
|
|
|
|
|
Perl license |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=cut |
|
93
|
|
|
|
|
|
|
|
|
94
|
4
|
|
|
4
|
|
1066799
|
use strict; |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
153
|
|
|
95
|
4
|
|
|
4
|
|
22
|
use Carp (); |
|
|
4
|
|
|
|
|
8
|
|
|
|
4
|
|
|
|
|
64
|
|
|
96
|
4
|
|
|
4
|
|
4371
|
use LWP::UserAgent; |
|
|
4
|
|
|
|
|
212307
|
|
|
|
4
|
|
|
|
|
145
|
|
|
97
|
4
|
|
|
4
|
|
42
|
use File::Spec; |
|
|
4
|
|
|
|
|
11
|
|
|
|
4
|
|
|
|
|
117
|
|
|
98
|
4
|
|
|
4
|
|
24
|
use Digest::MD5; |
|
|
4
|
|
|
|
|
6
|
|
|
|
4
|
|
|
|
|
168
|
|
|
99
|
4
|
|
|
4
|
|
2457
|
use HTTP::UserAgentString::Browser; |
|
|
4
|
|
|
|
|
12
|
|
|
|
4
|
|
|
|
|
114
|
|
|
100
|
4
|
|
|
4
|
|
2125
|
use HTTP::UserAgentString::Robot; |
|
|
4
|
|
|
|
|
11
|
|
|
|
4
|
|
|
|
|
162
|
|
|
101
|
4
|
|
|
4
|
|
2375
|
use HTTP::UserAgentString::OS; |
|
|
4
|
|
|
|
|
12
|
|
|
|
4
|
|
|
|
|
11783
|
|
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
our $VERSION = '0.6.1'; |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
my @REQUIRED_SECS = qw(robots os browser browser_type browser_reg browser_os os_reg); |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
my $REGEX_SECS = { 'browser_reg' => 1, 'os_reg' => 1 }; |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my $INI_URL = 'http://user-agent-string.info/rpc/get_data.php?key=free&format=ini'; |
|
110
|
|
|
|
|
|
|
my $VER_URL = 'http://user-agent-string.info/rpc/get_data.php?key=free&format=ini&ver=y'; |
|
111
|
|
|
|
|
|
|
my $MD5_URL = 'http://user-agent-string.info/rpc/get_data.php?format=ini&md5=y'; |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
my $DEFAULT_CACHE_DIR = '/tmp'; |
|
114
|
|
|
|
|
|
|
my $DEFAULT_CACHE_MAX_AGE = 7 * 86400; |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
my $DEFAULT_PARSE_CACHE_SIZE = 100000; |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
my $INI_FILE = 'uas.ini'; |
|
119
|
|
|
|
|
|
|
my $VER_FILE = 'uas.version'; |
|
120
|
|
|
|
|
|
|
|
|
121
|
8
|
|
|
8
|
0
|
176
|
sub cache_dir($) { $_[0]->{cache_dir} } |
|
122
|
0
|
|
|
0
|
0
|
0
|
sub parse_cache_count($) { $_[0]->{parse_cache_count} } |
|
123
|
4
|
|
|
4
|
0
|
14
|
sub cache_max_age($) { $_[0]->{cache_max_age} } |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub cache_file($) { |
|
126
|
8
|
|
|
8
|
1
|
19
|
my $self = shift; |
|
127
|
8
|
|
|
|
|
30
|
return File::Spec->catfile($self->cache_dir, $INI_FILE); |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
sub version_file($) { |
|
130
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
131
|
0
|
|
|
|
|
0
|
return File::Spec->catfile($self->cache_dir, $VER_FILE); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub getCurrentVersion($) { |
|
135
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
136
|
0
|
|
|
|
|
0
|
my $lwp = LWP::UserAgent->new(); |
|
137
|
0
|
|
|
|
|
0
|
$lwp->env_proxy(); |
|
138
|
0
|
|
|
|
|
0
|
my $res = $lwp->get($VER_URL); |
|
139
|
0
|
0
|
|
|
|
0
|
if ($res->is_success) { |
|
140
|
0
|
|
|
|
|
0
|
return $res->content; |
|
141
|
|
|
|
|
|
|
} else { |
|
142
|
0
|
|
|
|
|
0
|
Carp::carp( "Can't get current file version from $VER_URL: " . $res->status_line . "\n"); |
|
143
|
0
|
|
|
|
|
0
|
return undef(); |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
sub getCachedVersion($) { |
|
148
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
149
|
0
|
|
|
|
|
0
|
my $path = $self->version_file; |
|
150
|
0
|
0
|
|
|
|
0
|
if (-f $path) { |
|
151
|
0
|
0
|
|
|
|
0
|
if (open(my $fh, "<", $path)) { |
|
152
|
0
|
|
|
|
|
0
|
my $version = <$fh>; |
|
153
|
0
|
|
|
|
|
0
|
close($fh); |
|
154
|
0
|
|
|
|
|
0
|
return $version; |
|
155
|
|
|
|
|
|
|
} else { |
|
156
|
0
|
|
|
|
|
0
|
Carp::carp("Can't open $path: $!\n"); |
|
157
|
0
|
|
|
|
|
0
|
return undef(); |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
} else { |
|
160
|
0
|
|
|
|
|
0
|
return undef(); |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _writeCacheFile($$$) { |
|
166
|
0
|
|
|
0
|
|
0
|
my ($self, $filename, $content) = @_; |
|
167
|
|
|
|
|
|
|
|
|
168
|
0
|
0
|
|
|
|
0
|
if (open(my $fh, ">", $filename)) { |
|
169
|
0
|
0
|
|
|
|
0
|
if (print $fh $content) { |
|
170
|
0
|
0
|
|
|
|
0
|
if (close($fh)) { |
|
171
|
0
|
|
|
|
|
0
|
return 1; |
|
172
|
|
|
|
|
|
|
} else { |
|
173
|
0
|
|
|
|
|
0
|
Carp::carp("Can't close $filename: $!\n"); |
|
174
|
0
|
|
|
|
|
0
|
return 0; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
} else { |
|
177
|
0
|
|
|
|
|
0
|
Carp::carp("Can't write to $filename: $!\n"); |
|
178
|
0
|
|
|
|
|
0
|
return 0; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
} else { |
|
181
|
0
|
|
|
|
|
0
|
Carp::carp("Can't open $filename for writing: $!\n"); |
|
182
|
0
|
|
|
|
|
0
|
return 0; |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub _updateCache($$$) { |
|
187
|
0
|
|
|
0
|
|
0
|
my ($self, $inidata, $version) = @_; |
|
188
|
|
|
|
|
|
|
|
|
189
|
0
|
|
0
|
|
|
0
|
return ($self->_writeCacheFile($self->cache_file, $inidata) and $self->_writeCacheFile($self->version_file, $version)); |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
sub _downloadDB($$) { |
|
193
|
0
|
|
|
0
|
|
0
|
my ($self, $current_version) = @_; |
|
194
|
0
|
|
|
|
|
0
|
my $lwp = LWP::UserAgent->new(); |
|
195
|
0
|
|
|
|
|
0
|
$lwp->env_proxy(); |
|
196
|
0
|
|
|
|
|
0
|
my $res_ini = $lwp->get($INI_URL); |
|
197
|
0
|
0
|
|
|
|
0
|
if ($res_ini->is_success) { |
|
198
|
0
|
|
|
|
|
0
|
my $inidata = $res_ini->content; |
|
199
|
0
|
|
|
|
|
0
|
my $res_md5 = $lwp->get($MD5_URL); |
|
200
|
0
|
0
|
|
|
|
0
|
if ($res_md5->is_success) { |
|
201
|
0
|
|
|
|
|
0
|
my $expected_hash = $res_md5->content; |
|
202
|
0
|
|
|
|
|
0
|
my $ctx = Digest::MD5->new(); |
|
203
|
0
|
|
|
|
|
0
|
$ctx->add($inidata); |
|
204
|
0
|
|
|
|
|
0
|
my $hash = $ctx->hexdigest(); |
|
205
|
0
|
0
|
|
|
|
0
|
if ($hash eq $expected_hash) { |
|
206
|
|
|
|
|
|
|
# Write files to disk |
|
207
|
0
|
|
|
|
|
0
|
return $self->_updateCache($inidata, $current_version); |
|
208
|
|
|
|
|
|
|
} else { |
|
209
|
0
|
|
|
|
|
0
|
Carp::carp("MD5 digest does not match - expected=$expected_hash; calculate=$hash\n"); |
|
210
|
0
|
|
|
|
|
0
|
return 0; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
} else { |
|
213
|
0
|
|
|
|
|
0
|
Carp::carp("Can't get MD5 from $MD5_URL: " . $res_md5->status_line . "\n"); |
|
214
|
0
|
|
|
|
|
0
|
return 0; |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
} else { |
|
217
|
0
|
|
|
|
|
0
|
Carp::carp("Can't get .ini from $INI_URL: " . $res_ini->status_line . "\n"); |
|
218
|
0
|
|
|
|
|
0
|
return 0; |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub updateDB($;$) { |
|
223
|
4
|
|
|
4
|
1
|
9
|
my ($self, $force) = @_; |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# Check if cache file needs to be updated according to max_age |
|
226
|
|
|
|
|
|
|
|
|
227
|
4
|
|
|
|
|
21
|
my $cache_file = $self->cache_file; |
|
228
|
|
|
|
|
|
|
|
|
229
|
4
|
|
|
|
|
11
|
my $do_check; |
|
230
|
4
|
50
|
|
|
|
138
|
if (! -f $cache_file) { |
|
231
|
0
|
|
|
|
|
0
|
$do_check = 1; |
|
232
|
|
|
|
|
|
|
} else { |
|
233
|
4
|
|
|
|
|
96
|
my @stat = stat($cache_file); |
|
234
|
4
|
50
|
|
|
|
18
|
if (@stat) { |
|
235
|
4
|
|
|
|
|
9
|
my $mtime = $stat[9]; |
|
236
|
4
|
|
|
|
|
36
|
my $limit = time() - $self->cache_max_age; |
|
237
|
4
|
|
|
|
|
14
|
$do_check = ($mtime < $limit); |
|
238
|
|
|
|
|
|
|
} else { |
|
239
|
0
|
|
|
|
|
0
|
Carp::carp("Can't stat() $cache_file: $!\n"); |
|
240
|
0
|
|
|
|
|
0
|
return undef(); |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
4
|
50
|
33
|
|
|
31
|
if ($do_check or $force) { |
|
245
|
0
|
|
|
|
|
0
|
my $current_version = $self->getCurrentVersion(); |
|
246
|
0
|
|
|
|
|
0
|
my $cache_version = $self->getCachedVersion(); |
|
247
|
0
|
0
|
0
|
|
|
0
|
if (defined($current_version) and ((! defined($cache_version)) or ($current_version gt $cache_version))) { |
|
|
|
|
0
|
|
|
|
|
|
248
|
0
|
|
|
|
|
0
|
return $self->_downloadDB($current_version); |
|
249
|
|
|
|
|
|
|
} else { |
|
250
|
0
|
|
|
|
|
0
|
return -1; |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
} else { |
|
253
|
4
|
|
|
|
|
41
|
return -1; |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub _compileRegexes($$) { |
|
259
|
8
|
|
|
8
|
|
21
|
my ($self, $regexes) = @_; |
|
260
|
|
|
|
|
|
|
|
|
261
|
8
|
|
|
|
|
25
|
foreach my $ir (@$regexes) { |
|
262
|
3392
|
|
|
|
|
5449
|
my $r = $ir->[0]; |
|
263
|
3392
|
|
|
|
|
265326
|
my $regex = eval "qr" . $r; |
|
264
|
3392
|
50
|
|
|
|
9773
|
if (defined($regex)) { |
|
265
|
3392
|
|
|
|
|
6654
|
$ir->[2] = $r; |
|
266
|
3392
|
|
|
|
|
7604
|
$ir->[0] = $regex; |
|
267
|
|
|
|
|
|
|
} else { |
|
268
|
0
|
|
|
|
|
0
|
Carp::carp("Invalid regex: " . $ir->[0] . "($@)\n"); |
|
269
|
0
|
|
|
|
|
0
|
return 0; |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
} |
|
272
|
|
|
|
|
|
|
|
|
273
|
8
|
|
|
|
|
69
|
return 1; |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
sub _loadDB($) { |
|
277
|
4
|
|
|
4
|
|
9
|
my $self = shift; |
|
278
|
4
|
|
|
|
|
15
|
my $file = $self->cache_file; |
|
279
|
4
|
50
|
|
|
|
178
|
if (open(my $fh, "<", $file)) { |
|
280
|
4
|
|
|
|
|
6
|
my $cursec; |
|
281
|
4
|
|
|
|
|
8
|
my $nline = 1; |
|
282
|
4
|
|
|
|
|
7
|
my $lastvalues; |
|
283
|
|
|
|
|
|
|
my $lastid; |
|
284
|
4
|
|
|
|
|
169
|
while (<$fh>) { |
|
285
|
74808
|
|
|
|
|
79232
|
$nline++; |
|
286
|
74808
|
100
|
|
|
|
149443
|
next if (/^;/); |
|
287
|
74652
|
|
|
|
|
83058
|
chop; |
|
288
|
74652
|
100
|
|
|
|
347044
|
if (/^\[([\w_]+)\]\s*$/) { |
|
|
|
50
|
|
|
|
|
|
|
289
|
36
|
100
|
|
|
|
89
|
if (defined($lastvalues)) { |
|
290
|
8
|
|
|
|
|
22
|
push(@{$self->{$cursec}}, $lastvalues); |
|
|
8
|
|
|
|
|
85
|
|
|
291
|
|
|
|
|
|
|
} |
|
292
|
36
|
|
|
|
|
81
|
$cursec = $1; |
|
293
|
36
|
|
|
|
|
56
|
$lastid = undef(); |
|
294
|
36
|
|
|
|
|
168
|
$lastvalues = undef(); |
|
295
|
|
|
|
|
|
|
} elsif (/^(\d+)\[\] = "(.*)"\s*$/) { |
|
296
|
74616
|
|
|
|
|
162728
|
my ($id, $value) = ($1, $2); |
|
297
|
74616
|
100
|
|
|
|
129349
|
if ($REGEX_SECS->{$cursec}) { |
|
298
|
6784
|
100
|
100
|
|
|
26612
|
if (defined($lastid) and ($id == $lastid)) { |
|
299
|
3392
|
|
|
|
|
16087
|
push(@$lastvalues, $value); |
|
300
|
|
|
|
|
|
|
} else { |
|
301
|
3392
|
100
|
|
|
|
5871
|
push(@{$self->{$cursec}}, $lastvalues) if (defined($lastid)); |
|
|
3384
|
|
|
|
|
6732
|
|
|
302
|
3392
|
|
|
|
|
4497
|
$lastid = $id; |
|
303
|
3392
|
|
|
|
|
16687
|
$lastvalues = [ $value ]; |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
} else { |
|
306
|
67832
|
|
|
|
|
63429
|
push(@{$self->{$cursec}[$id]}, $value); |
|
|
67832
|
|
|
|
|
351306
|
|
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
} else { |
|
309
|
0
|
|
|
|
|
0
|
Carp::carp("Invalid format in line $nline: $_\n"); |
|
310
|
0
|
|
|
|
|
0
|
return 0; |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
} |
|
313
|
4
|
50
|
|
|
|
21
|
if (defined($lastvalues)) { |
|
314
|
0
|
|
|
|
|
0
|
push(@{$self->{$cursec}}, $lastvalues); |
|
|
0
|
|
|
|
|
0
|
|
|
315
|
|
|
|
|
|
|
} |
|
316
|
4
|
|
|
|
|
137
|
close($fh); |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# Check that we have all required sections |
|
319
|
4
|
|
|
|
|
18
|
foreach my $sec (@REQUIRED_SECS) { |
|
320
|
28
|
|
|
|
|
73
|
my $a = $self->{$sec}; |
|
321
|
28
|
50
|
33
|
|
|
274
|
if (! defined($a) or (! @$a)) { |
|
322
|
0
|
|
|
|
|
0
|
Carp::carp("Section $a is not present in $file"); |
|
323
|
0
|
|
|
|
|
0
|
return 0; |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# Compile regexes |
|
328
|
4
|
|
|
|
|
26
|
foreach my $key (keys %$REGEX_SECS) { |
|
329
|
8
|
50
|
|
|
|
67
|
$self->_compileRegexes($self->{$key}) or return 0; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# Index for robots |
|
334
|
4
|
|
|
|
|
27
|
$self->{robot_index} = {}; |
|
335
|
4
|
|
|
|
|
12
|
my @r; |
|
336
|
4
|
|
|
|
|
12
|
foreach my $robot (grep { defined($_) } @{$self->{robots}}) { |
|
|
57588
|
|
|
|
|
86678
|
|
|
|
4
|
|
|
|
|
984
|
|
|
337
|
5620
|
|
|
|
|
10858
|
my $os_id = $robot->[7]; |
|
338
|
5620
|
|
|
|
|
5687
|
my $os; |
|
339
|
5620
|
50
|
33
|
|
|
12479
|
if ($os_id and defined($self->{os}[$os_id])) { |
|
340
|
0
|
|
|
|
|
0
|
$os = HTTP::UserAgentString::OS->new($self->{os}[$os_id]); |
|
341
|
|
|
|
|
|
|
} |
|
342
|
5620
|
|
|
|
|
17776
|
my $bot = HTTP::UserAgentString::Robot->new($robot, $os); |
|
343
|
5620
|
|
|
|
|
8748
|
push(@r, $bot); |
|
344
|
5620
|
|
|
|
|
27800
|
$self->{robot_index}{$robot->[0]} = $bot; |
|
345
|
|
|
|
|
|
|
} |
|
346
|
4
|
|
|
|
|
21
|
$self->{robots} = \@r; |
|
347
|
|
|
|
|
|
|
|
|
348
|
4
|
|
|
|
|
6413
|
$self->{parse_cache} = {}; |
|
349
|
4
|
|
|
|
|
21
|
$self->{parse_cache_count} = 0; |
|
350
|
|
|
|
|
|
|
|
|
351
|
4
|
|
|
|
|
75
|
return 1; |
|
352
|
|
|
|
|
|
|
} else { |
|
353
|
0
|
|
|
|
|
0
|
Carp::carp("Can't open $file for reading: $!\n"); |
|
354
|
0
|
|
|
|
|
0
|
return 0; |
|
355
|
|
|
|
|
|
|
} |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub new($;%) { |
|
359
|
4
|
|
|
4
|
0
|
97
|
my ($pkg, %opts) = @_; |
|
360
|
|
|
|
|
|
|
|
|
361
|
4
|
|
|
|
|
15
|
foreach my $key (qw(cache_max_age parse_cache_size)) { |
|
362
|
8
|
|
|
|
|
16
|
my $val = $opts{$key}; |
|
363
|
8
|
50
|
33
|
|
|
37
|
if (defined($val) and ($val !~ /^\d+$/)) { |
|
364
|
0
|
|
|
|
|
0
|
Carp::carp("$key must be an integer!\n"); |
|
365
|
0
|
|
|
|
|
0
|
return undef(); |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
|
|
369
|
4
|
50
|
|
|
|
24
|
if ($opts{cache_dir}) { |
|
370
|
0
|
0
|
|
|
|
0
|
if (! -d $opts{cache_dir}) { |
|
371
|
0
|
|
|
|
|
0
|
Carp::carp($opts{cache_dir} . " is not a valid directory: $!"); |
|
372
|
0
|
|
|
|
|
0
|
return undef(); |
|
373
|
|
|
|
|
|
|
} |
|
374
|
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
|
|
376
|
4
|
50
|
33
|
|
|
81
|
my $self = bless({ |
|
|
|
|
33
|
|
|
|
|
|
377
|
|
|
|
|
|
|
cache_dir => $opts{cache_dir} || $DEFAULT_CACHE_DIR, |
|
378
|
|
|
|
|
|
|
cache_max_age => $opts{cache_max_age} || $DEFAULT_CACHE_MAX_AGE, |
|
379
|
|
|
|
|
|
|
parse_cache_size => defined($opts{parse_cache_size}) ? $opts{parse_cache_size} : $DEFAULT_PARSE_CACHE_SIZE |
|
380
|
|
|
|
|
|
|
}, $pkg); |
|
381
|
|
|
|
|
|
|
|
|
382
|
4
|
50
|
33
|
|
|
24
|
if ($self->updateDB and $self->_loadDB()) { |
|
383
|
4
|
|
|
|
|
36
|
return $self; |
|
384
|
|
|
|
|
|
|
} else { |
|
385
|
0
|
|
|
|
|
0
|
return undef(); |
|
386
|
|
|
|
|
|
|
} |
|
387
|
|
|
|
|
|
|
} |
|
388
|
|
|
|
|
|
|
|
|
389
|
2
|
|
|
2
|
0
|
10
|
sub robots($) { $_[0]->{robots} } |
|
390
|
2
|
|
|
2
|
0
|
761
|
sub browser_reg($) { $_[0]->{browser_reg} } |
|
391
|
2
|
|
|
2
|
0
|
11
|
sub os_reg($) { $_[0]->{os_reg} } |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub getBrowser($$) { |
|
394
|
0
|
|
|
0
|
0
|
0
|
my ($self, $browser_id) = @_; |
|
395
|
0
|
|
|
|
|
0
|
my $bos = $self->{browser_os}[$browser_id]; |
|
396
|
0
|
|
|
|
|
0
|
my $os; |
|
397
|
0
|
0
|
|
|
|
0
|
$os = $self->getOS($bos->[0]) if (defined($bos)); |
|
398
|
0
|
|
|
|
|
0
|
return HTTP::UserAgentString::Browser->new($self->{browser}[$browser_id], "", "", $os); |
|
399
|
|
|
|
|
|
|
} |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
sub getOS($$) { |
|
402
|
0
|
|
|
0
|
0
|
0
|
my ($self, $os_id) = @_; |
|
403
|
0
|
|
|
|
|
0
|
return HTTP::UserAgentString::OS->new($self->{os}[$os_id]); |
|
404
|
|
|
|
|
|
|
} |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# Real parsing with no cache checking |
|
407
|
|
|
|
|
|
|
sub _parse($$) { |
|
408
|
3
|
|
|
3
|
|
7
|
my ($self, $string) = @_; |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# First we check whether it is a robot |
|
411
|
3
|
100
|
|
|
|
22
|
if (defined(my $robot = $self->{robot_index}{$string})) { |
|
412
|
1
|
|
|
|
|
3
|
return $robot; |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# Now we check browser regexes |
|
416
|
2
|
|
|
|
|
4
|
my $idx = 0; |
|
417
|
2
|
|
|
|
|
5
|
foreach my $br (grep { defined($_) } @{$self->{browser_reg}}) { |
|
|
1258
|
|
|
|
|
1430
|
|
|
|
2
|
|
|
|
|
13
|
|
|
418
|
359
|
|
|
|
|
488
|
my ($regex, $browser_id) = @$br; |
|
419
|
359
|
100
|
|
|
|
1452
|
if ($string =~ $regex) { |
|
420
|
2
|
|
|
|
|
10
|
my $version = $1; |
|
421
|
2
|
|
|
|
|
12
|
my $browser = $self->{browser}[$browser_id]; |
|
422
|
2
|
|
|
|
|
6
|
my $typeDesc; |
|
423
|
2
|
|
|
|
|
6
|
my $type = $browser->[0]; |
|
424
|
2
|
50
|
|
|
|
13
|
if (defined($self->{browser_type}[$type])) { |
|
425
|
2
|
|
|
|
|
7
|
$typeDesc = $self->{browser_type}[$type][0]; |
|
426
|
|
|
|
|
|
|
} |
|
427
|
2
|
|
|
|
|
9
|
my $bos = $self->{browser_os}[$browser_id]; |
|
428
|
2
|
|
|
|
|
5
|
my $os_id; |
|
429
|
2
|
50
|
|
|
|
7
|
$os_id = $bos->[0] if (defined($bos)); |
|
430
|
2
|
|
|
|
|
5
|
my $os; |
|
431
|
|
|
|
|
|
|
|
|
432
|
2
|
50
|
|
|
|
10
|
if (! defined($os_id)) { |
|
433
|
|
|
|
|
|
|
# Use regexes to search lookup OS |
|
434
|
2
|
|
|
|
|
7
|
OS: foreach my $or (grep { defined($_) } @{$self->{os_reg}}) { |
|
|
438
|
|
|
|
|
527
|
|
|
|
2
|
|
|
|
|
8
|
|
|
435
|
223
|
|
|
|
|
377
|
my ($osregex, $id) = @$or; |
|
436
|
223
|
100
|
|
|
|
1145
|
if ($string =~ $osregex) { |
|
437
|
2
|
|
|
|
|
7
|
$os_id = $id; |
|
438
|
2
|
|
|
|
|
8
|
last OS; |
|
439
|
|
|
|
|
|
|
} |
|
440
|
|
|
|
|
|
|
} |
|
441
|
|
|
|
|
|
|
} |
|
442
|
|
|
|
|
|
|
|
|
443
|
2
|
50
|
33
|
|
|
27
|
if (defined($os_id) and defined($self->{os}[$os_id])) { |
|
444
|
2
|
|
|
|
|
37
|
$os = HTTP::UserAgentString::OS->new($self->{os}[$os_id]); |
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
|
|
447
|
2
|
|
|
|
|
28
|
return HTTP::UserAgentString::Browser->new($browser, $typeDesc, $version, $os); |
|
448
|
|
|
|
|
|
|
} |
|
449
|
357
|
|
|
|
|
414
|
$idx++; |
|
450
|
|
|
|
|
|
|
} |
|
451
|
|
|
|
|
|
|
|
|
452
|
0
|
|
|
|
|
0
|
return undef(); |
|
453
|
|
|
|
|
|
|
} |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
sub parse($$) { |
|
456
|
7
|
|
|
7
|
1
|
15794
|
my ($self, $string) = @_; |
|
457
|
|
|
|
|
|
|
|
|
458
|
7
|
|
|
|
|
16
|
my $obj; |
|
459
|
7
|
100
|
|
|
|
297
|
if (exists $self->{parse_cache}{$string}) { |
|
460
|
4
|
|
|
|
|
14
|
$obj = $self->{parse_cache}{$string}; |
|
461
|
|
|
|
|
|
|
} else { |
|
462
|
3
|
|
|
|
|
18
|
$obj = $self->_parse($string); |
|
463
|
|
|
|
|
|
|
# Cache it if we have enough space |
|
464
|
3
|
50
|
|
|
|
23
|
if ($self->{parse_cache_count} < $self->{parse_cache_size}) { |
|
465
|
3
|
|
|
|
|
7
|
$self->{parse_cache_count}++; |
|
466
|
3
|
|
|
|
|
15
|
$self->{parse_cache}{$string} = $obj; |
|
467
|
|
|
|
|
|
|
} |
|
468
|
|
|
|
|
|
|
} |
|
469
|
|
|
|
|
|
|
|
|
470
|
7
|
|
|
|
|
23
|
return $obj; |
|
471
|
|
|
|
|
|
|
} |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
1; |
|
474
|
|
|
|
|
|
|
|