File Coverage

blib/lib/Nginx/Log/Statistics.pm
Criterion Covered Total %
statement 12 118 10.1
branch 0 24 0.0
condition 0 11 0.0
subroutine 4 16 25.0
pod 2 2 100.0
total 18 171 10.5


line stmt bran cond sub pod time code
1             package Nginx::Log::Statistics;
2 1     1   24843 use Nginx::Log::Entry;
  1         33891  
  1         30  
3 1     1   13 use Time::Piece;
  1         2  
  1         63  
4 1     1   84 use strict;
  1         6  
  1         40  
5 1     1   7 use warnings;
  1         2  
  1         1163  
6              
7             =head1 NAME
8              
9             Nginx::Log::Statistics - This module parses the Nginx combined access log and provides summary statistics about the log data.
10              
11             =head1 VERSION
12              
13             Version 0.03
14              
15             =cut
16              
17             our $VERSION = '0.03';
18              
19              
20             =head1 SYNOPSIS
21              
22             There are only two methods to understand in order to use this module: new and get_stat. These methods are documented below.
23            
24             use Nginx::Log::Statistics;
25             use Data::Dumper;
26              
27             my $stats = Nginx::Log::Statistics->new({filepath => '/etc/nginx/logs/access.log'});
28             my $browser_stats = $stats->get_stat('browser_count');
29             print Dumper $browser_stats;
30            
31              
32             =head1 SUBROUTINES/METHODS
33              
34             =head2 new
35              
36             Returns a new Nginx::Log::Statistics object. Requires a hashref with the following arguments (most are optional):
37              
38             =over 3
39              
40             =item * filepath => path to the access.log file, e.g. '/etc/nginx/logs/access.log' (required).
41              
42             =item * start_date => a L object, entries in the log with a date earlier than this date will be ignored (optional).
43              
44             =item * end_date => a L object, entries in the log with a date later than this date will be ignored (optional).
45              
46             =item * ignore_robots => either 1 or 0. If set to 1, all robot (e.g. Googlebot) entries in the log will be ignored, default is on (optional).
47              
48             =back
49              
50             =cut
51              
52             sub new {
53 0     0 1   my $class = shift;
54 0   0       my $self = {
55             ignore_robots => $_[0]->{ignore_robots} || 1,
56             despatch_table => {
57             entry_count => \&_get_entries_count,
58             unique_ip_count => \&_get_unique_ip_count,
59             unique_ip_browser_count => \&_get_unique_ip_browser_count,
60             browser_count => \&_get_browser_count,
61             url_count => \&_get_url_count,
62             os_count => \&_get_os_count,
63             referer_count => \&_get_referer_count,
64             unique_ip_os_count => \&_get_unique_ip_os_count,
65             },
66             };
67 0           my $obj = bless $self, $class;
68 0           my $log_arrayref = $obj->_build_log_array($_[0]->{filepath});
69 0           $obj->{log} = $obj->_build_structure($log_arrayref, $_[0]->{start_date}, $_[0]->{end_date});
70 0           return $obj;
71             }
72              
73             =head2 get_stat
74              
75             This method requires a string or scalar argument for one of the statistics options below. It returns a hashref of the statistics requested, or zero if the argument was not recognised. All successful return values include the statistics calculated per year, by day, week and month. Valid options are:
76              
77             =over 3
78              
79             =item * 'entry_count' - the number of log entries
80              
81             =item * 'unique_ip_count - the number of unique ip addresses
82              
83             =item * 'browser_count' - a hashref with internet browser software as keys and counts as values.
84              
85             =item * 'unique_ip_browser_count' - same as browser_count except that the counts are unique combinations of ip and browser. This is reduces skew when comparing popularity of internet browsers as one user may make more page request than another, hence by controlling for ip address this issue is somewhat mitigated.
86              
87             =item * 'url_count' - a hashref with urls as keys and counts as values.
88              
89             =item * 'os_count' - a hashref with operating systems as keys and counts as values.
90              
91             =item * 'unique_ip_os_count' - same as os_count except that the counts are unique combinations of ip and operating system. This is reduces skew when comparing popularity of operating systems as one user may make more page request than another, hence by controlling for ip address this issue is somewhat mitigated.
92              
93             =item * 'referer_count' - a hashref with referer urls as keys and counts as values.
94              
95             =back
96              
97             =cut
98              
99             sub get_stat {
100 0     0 1   my ($self, $stat_key) = @_;
101 0 0         return 0 unless exists $self->{despatch_table}->{$stat_key};
102 0           my $stat_structure = {};
103 0           my $stat_sub = $self->{despatch_table}->{$stat_key};
104 0           foreach my $year (keys %{$self->{log}}){
  0            
105 0           foreach my $month (keys %{$self->{log}->{$year}->{months}}){
  0            
106 0           $stat_structure->{$year}->{months}->{$month} = $stat_sub->($self->{log}->{$year}->{months}->{$month});
107             }
108 0           foreach my $week (keys %{$self->{log}->{$year}->{weeks}}){
  0            
109 0           $stat_structure->{$year}->{weeks}->{$week} = $stat_sub->($self->{log}->{$year}->{weeks}->{$week});
110             }
111 0           foreach my $day (keys %{$self->{log}->{$year}->{days}}){
  0            
112 0           $stat_structure->{$year}->{days}->{$day} = $stat_sub->($self->{log}->{$year}->{days}->{$day});
113             }
114             }
115 0           return $stat_structure;
116             }
117              
118             sub _get_entries_count {
119 0     0     my $arrayref = shift;
120 0           return @{$arrayref};
  0            
121             }
122              
123             sub _get_unique_ip_count {
124 0     0     my $arrayref = shift;
125 0           my %entry_ips;
126 0           foreach my $entry (@{$arrayref}) {
  0            
127 0 0         if ($entry->get_request_url){
128 0           $entry_ips{$entry->get_ip} = 1;
129             }
130             }
131 0           return scalar keys %entry_ips;
132             }
133              
134             sub _get_unique_ip_browser_count {
135 0     0     my $arrayref = shift;
136 0           my %browsers_ips;
137 0           foreach my $entry (@{$arrayref}) {
  0            
138 0 0         if ($entry->get_request_url){
139 0           $browsers_ips{$entry->get_browser}->{$entry->get_ip} = 1;
140             }
141             }
142 0           my %unique_browser_counts;
143 0           foreach my $browser (keys %browsers_ips) {
144 0           $unique_browser_counts{$browser} = scalar keys %{$browsers_ips{$browser}};
  0            
145             }
146 0           return \%unique_browser_counts;
147             }
148              
149             sub _get_browser_count {
150 0     0     my $arrayref = shift;
151 0           my %browsers;
152 0           foreach my $entry (@{$arrayref}) {
  0            
153 0 0         if ($entry->get_request_url){
154 0           $browsers{$entry->get_browser}++;
155             }
156             }
157 0           return \%browsers;
158             }
159              
160             sub _get_os_count {
161 0     0     my $arrayref = shift;
162 0           my %os;
163 0           foreach my $entry (@{$arrayref}) {
  0            
164 0 0         if ($entry->get_request_url){
165 0           $os{$entry->get_os}++;
166             }
167             }
168 0           return \%os;
169             }
170              
171             sub _get_url_count {
172 0     0     my $arrayref = shift;
173 0           my %urls;
174 0           foreach my $entry (@{$arrayref}) {
  0            
175 0 0         if ($entry->get_request_url){
176 0           $urls{$entry->get_request_url}++;
177             }
178             }
179 0           return \%urls;
180             }
181              
182             sub _get_referer_count {
183 0     0     my $arrayref = shift;
184 0           my %referers;
185 0           foreach my $entry (@{$arrayref}) {
  0            
186 0 0         if ($entry->get_request_url){
187 0           $referers{$entry->get_referer}++;
188             }
189             }
190 0           return \%referers;
191             }
192              
193             sub _get_unique_ip_os_count {
194 0     0     my $arrayref = shift;
195 0           my %os_ips;
196 0           foreach my $entry (@{$arrayref}) {
  0            
197 0 0         if ($entry->get_request_url){
198 0           $os_ips{$entry->get_os}->{$entry->get_ip} = 1;
199             }
200             }
201 0           my %unique_os_counts;
202 0           foreach my $os (keys %os_ips) {
203 0           $unique_os_counts{$os} = scalar keys %{$os_ips{$os}};
  0            
204             }
205 0           return \%unique_os_counts;
206             }
207              
208             =head1 INTERNAL METHODS
209              
210             =head2 _build_log_array
211              
212             This internal method returns an array of L objects. It requires the filepath to the log as an argument.
213              
214             =cut
215              
216             sub _build_log_array {
217 0     0     my ($self, $filepath) = @_;
218 0 0         open (my $fh, '<', $filepath) or die $!;
219 0           my @log = ();
220 0           while (<$fh>) {
221 0           push @log, Nginx::Log::Entry->new($_);
222             }
223 0           return \@log;
224             }
225              
226             =head2 _build_structure
227              
228             This method builds the core hashref structure that is the input to the statistical calculations.
229              
230             =cut
231              
232             sub _build_structure {
233 0     0     my ($self, $log, $start_date, $end_date) = @_;
234 0           my $structure = {};
235 0           foreach my $entry (@{$log}){
  0            
236 0 0 0       if ($entry->was_robot and $self->{ignore_robots}) {
    0 0        
237 0           next;
238             }
239             elsif ($start_date and $end_date) {
240 0 0 0       next if ($entry->get_datetime_obj < $start_date
241             or $entry->get_datetime_obj > $end_date);
242             }
243 0           my $year = $entry->get_datetime_obj->yy;
244 0           my $month= $entry->get_datetime_obj->mon;
245 0           my $week = $entry->get_datetime_obj->week;
246 0           my $day = $entry->get_datetime_obj->day_of_year;
247 0           push @{$structure->{$year}->{months}->{$month}}, $entry;
  0            
248 0           push @{$structure->{$year}->{weeks}->{$week}}, $entry;
  0            
249 0           push @{$structure->{$year}->{days}->{$day}}, $entry;
  0            
250             }
251 0           return $structure;
252             }
253              
254             =head1 AUTHOR
255              
256             David Farrell, C<< >>, L
257              
258             =head1 BUGS
259              
260             Please report any bugs or feature requests to C, or through
261             the web interface at L. I will be notified, and then you'll
262             automatically be notified of progress on your bug as I make changes.
263              
264              
265              
266              
267             =head1 SUPPORT
268              
269             You can find documentation for this module with the perldoc command.
270              
271             perldoc Nginx::Log::Statistics
272              
273              
274             You can also look for information at:
275              
276             =over 4
277              
278             =item * RT: CPAN's request tracker (report bugs here)
279              
280             L
281              
282             =item * AnnoCPAN: Annotated CPAN documentation
283              
284             L
285              
286             =item * CPAN Ratings
287              
288             L
289              
290             =item * Search CPAN
291              
292             L
293              
294             =back
295              
296              
297             =head1 ACKNOWLEDGEMENTS
298              
299              
300             =head1 LICENSE AND COPYRIGHT
301              
302             Copyright 2013 David Farrell.
303              
304             This program is free software; you can redistribute it and/or modify it
305             under the terms of the the Artistic License (2.0). You may obtain a
306             copy of the full license at:
307              
308             L
309              
310             Any use, modification, and distribution of the Standard or Modified
311             Versions is governed by this Artistic License. By using, modifying or
312             distributing the Package, you accept this license. Do not use, modify,
313             or distribute the Package, if you do not accept this license.
314              
315             If your Modified Version has been derived from a Modified Version made
316             by someone other than you, you are nevertheless required to ensure that
317             your Modified Version complies with the requirements of this license.
318              
319             This license does not grant you the right to use any trademark, service
320             mark, tradename, or logo of the Copyright Holder.
321              
322             This license includes the non-exclusive, worldwide, free-of-charge
323             patent license to make, have made, use, offer to sell, sell, import and
324             otherwise transfer the Package with respect to any patent claims
325             licensable by the Copyright Holder that are necessarily infringed by the
326             Package. If you institute patent litigation (including a cross-claim or
327             counterclaim) against any party alleging that the Package constitutes
328             direct or contributory patent infringement, then this Artistic License
329             to you shall terminate on the date that such litigation is filed.
330              
331             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
332             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
333             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
334             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
335             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
336             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
337             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
338             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
339              
340              
341             =cut
342              
343             1; # End of Nginx::Log::Statistics