File Coverage

blib/lib/Parse/AccessLog.pm
Criterion Covered Total %
statement 39 39 100.0
branch 6 8 75.0
condition 2 3 66.6
subroutine 6 6 100.0
pod 2 2 100.0
total 55 58 94.8


line stmt bran cond sub pod time code
1             package Parse::AccessLog;
2              
3 7     7   312741 use 5.006;
  7         25  
  7         276  
4 7     7   37 use strict;
  7         12  
  7         237  
5 7     7   32 use warnings;
  7         23  
  7         2988  
6              
7             =head1 NAME
8              
9             Parse::AccessLog - Parse Nginx/Apache access logs in "combined" format.
10              
11             =head1 VERSION
12              
13             Version 0.01
14              
15             =cut
16              
17             our $VERSION = '0.01';
18              
19             =head1 SYNOPSIS
20              
21             Parses web server logs created by Apache/nginx in combined format. Assumes no knowledge of the server which creates the log entries.
22              
23             Following the UNIX philosophy of "write programs that do one thing and do it well", this module does not attempt to validate any of the data/fields (e.g. match the IP address via a regex or some other method). This module assumes that the logs are already written by a web server daemon, and whether the data are "correct" or not is up to the end user. This module just parses it.
24              
25             use Parse::AccessLog;
26              
27             my $p = Parse::AccessLog->new;
28              
29             # returns one hashref
30             my $log_line = q{127.0.0.1 - - [11/Jan/2013:17:31:36 -0600] "GET / HTTP/1.1" 200 612 "-" "HTTP-Tiny/0.022"};
31             my $rec = $p->parse($log_line);
32              
33             ...
34              
35             # returns two hashrefs...
36             my @log_lines = (
37             q{127.0.0.1 - - [11/Jan/2013:17:31:36 -0600] "GET / HTTP/1.1" 200 612 "-" "HTTP-Tiny/0.022"},
38             q{127.0.0.1 - - [11/Jan/2013:17:31:38 -0600] "GET / HTTP/1.1" 200 612 "-" "HTTP-Tiny/0.022"},
39             );
40             my @recs = $p->parse( @log_lines );
41              
42             ...
43              
44             # returns a hashref for each line in 'access.log'...
45             my @recs = $p->parse( '/var/log/nginx/access.log' );
46              
47             =head1 METHODS
48              
49             =head2 new()
50              
51             Constructor, creates a Parse::AccessLog parser object. Use of new() is
52             optional, since the parse() method can be called as a class method also.
53              
54             =cut
55              
56             sub new {
57 6     6 1 410 my $class = shift;
58 6         16 my $self = {};
59 6         25 return bless $self, $class;
60             }
61              
62             =head2 parse()
63              
64             This is the only method (other than the constructor new()), and can be called
65             as a class method or as an object method. It Does What You Want (probably).
66              
67             Accepts a scalar or a list. If a scalar, can be either one line of an access
68             log file, or can be the full path (absolute or relative) to an access log (e.g.
69             /var/log/apache2/access.log). If a list, expects each element to be a line from
70             an access log file. Will return either a single hashref or a list of hashrefs
71             with the following keys:
72              
73             remote_addr
74             remote_user
75             time_local
76             request
77             status
78             bytes_sent
79             referer
80             user_agent
81              
82             =cut
83              
84             sub parse {
85             # don't parse anything in void context
86 4532 50   4532 1 66364 return unless defined wantarray;
87              
88 4532         5075 my $self = shift;
89 4532   66     9712 my $class = ref($self) || $self;
90              
91             # output determined by input data
92              
93             # array
94 4532 100       27378 if ( 0 < $#_) {
95 2         10 return map { $self->parse($_) } @_;
  2258         4350  
96             }
97              
98 4530         6490 my $line = shift;
99 4530         9485 chomp $line;
100              
101 4530 100       33833 if ( -f $line ) {
102 2         4 my $filename = $line;
103 2         84 open(my $fh, '<', $filename);
104 2         4138 chomp(my @lines = <$fh>);
105 2         164 close($fh);
106 2         10 return map { $self->parse($_) } @lines;
  2258         4196  
107             }
108              
109 4528         4779 my $hr;
110              
111             # this is where the magic happens...
112 4528 50       31764 if ( $line =~ /^ (\S+) # remote_addr
113             \ \-\ (\S+) # remote_user
114             \ \[([^\]]+)\] # time_local
115             \ "(.*?)" # request
116             \ (\d+) # status
117             \ (\-|(?:\d+)) # bytes_sent
118             \ "(.*?)" # referer
119             \ "(.*?)" # user_agent
120             $ /x ) {
121              
122 4528         11038 my @fields = qw(remote_addr remote_user time_local request
123             status bytes_sent referer user_agent);
124 4528         4587 my $c = 0;
125 7     7   39 { no strict 'refs';
  7         12  
  7         687  
  4528         4069  
126 4528         6189 for ( @fields ) {
127 36224         36434 $hr->{ $_ } = ${ ++$c };
  36224         141398  
128             }
129             };
130             }
131              
132 4528         12818 return $hr;
133             }
134              
135             =head1 SEE ALSO
136              
137             http://en.wikipedia.org/w/index.php?title=Unix_philosophy&oldid=525612531
138              
139             =head1 AUTHOR
140              
141             Nathan Marley, C<< >>
142              
143             =head1 BUGS
144              
145             Please report any bugs through github at https://github.com/nmarley/Parse-AccessLog.
146              
147              
148             =head1 SUPPORT
149              
150             You can find documentation for this module with the perldoc command.
151              
152             perldoc Parse::AccessLog
153              
154             You can also look for information at:
155              
156             =over 3
157              
158             =item * MetaCPAN
159              
160             L
161              
162             =item * GitHub
163              
164             L
165              
166             =item * Search CPAN
167              
168             L
169              
170             =back
171              
172              
173             =head1 LICENSE AND COPYRIGHT
174              
175             Copyright 2013 Nathan Marley.
176              
177             This program is free software; you can redistribute it and/or modify it
178             under the terms of either: the GNU General Public License as published
179             by the Free Software Foundation; or the Artistic License.
180              
181             See http://dev.perl.org/licenses/ for more information.
182              
183              
184             =cut
185              
186             1;
187