| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package ApacheLog::Compressor; |
|
2
|
|
|
|
|
|
|
# ABSTRACT: Convert Apache/CLF data to binary format |
|
3
|
3
|
|
|
3
|
|
56342
|
use strict; |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
125
|
|
|
4
|
3
|
|
|
3
|
|
19
|
use warnings; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
108
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
3
|
|
|
3
|
|
3455
|
use Socket qw(inet_aton inet_ntoa); |
|
|
3
|
|
|
|
|
14966
|
|
|
|
3
|
|
|
|
|
779
|
|
|
7
|
3
|
|
|
3
|
|
4705
|
use Date::Parse qw(str2time); |
|
|
3
|
|
|
|
|
136038
|
|
|
|
3
|
|
|
|
|
265
|
|
|
8
|
3
|
|
|
3
|
|
29
|
use List::Util qw(min); |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
303
|
|
|
9
|
3
|
|
|
3
|
|
326076
|
use URI; |
|
|
3
|
|
|
|
|
34329
|
|
|
|
3
|
|
|
|
|
106
|
|
|
10
|
3
|
|
|
3
|
|
29
|
use URI::Escape qw(uri_unescape); |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
208
|
|
|
11
|
3
|
|
|
3
|
|
4669
|
use DateTime; |
|
|
3
|
|
|
|
|
914507
|
|
|
|
3
|
|
|
|
|
159
|
|
|
12
|
3
|
|
|
3
|
|
3441
|
use Encode qw(encode_utf8 decode_utf8 FB_DEFAULT is_utf8 FB_CROAK); |
|
|
3
|
|
|
|
|
42828
|
|
|
|
3
|
|
|
|
|
402
|
|
|
13
|
3
|
|
|
3
|
|
56
|
use POSIX qw{strftime}; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
31
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our $VERSION = '0.005'; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 NAME |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
ApacheLog::Compressor - convert Apache / CLF log files into a binary format for transfer |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 VERSION |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
version 0.005 |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
use ApacheLog::Compressor; |
|
28
|
|
|
|
|
|
|
use Sys::Hostname qw(hostname); |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# Write all data to bzip2-compressed output file |
|
31
|
|
|
|
|
|
|
open my $out_fh, '>', 'compressed.log.bz2' or die "Failed to create output file: $!"; |
|
32
|
|
|
|
|
|
|
binmode $out_fh; |
|
33
|
|
|
|
|
|
|
my $zip = IO::Compress::Bzip2->new($out_fh, BlockSize100K => 9); |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# Provide a callback to send data through to the file |
|
36
|
|
|
|
|
|
|
my $alc = ApacheLog::Compressor->new( |
|
37
|
|
|
|
|
|
|
on_write => sub { |
|
38
|
|
|
|
|
|
|
my ($self, $pkt) = @_; |
|
39
|
|
|
|
|
|
|
$zip->write($pkt); |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
); |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Input file - normally use whichever one's just been closed + rotated |
|
44
|
|
|
|
|
|
|
open my $fh, '<', '/var/log/apache2/access.log.1' or die "Failed to open log: $!"; |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Initial packet to identify which server this came from |
|
47
|
|
|
|
|
|
|
$alc->send_packet('server', |
|
48
|
|
|
|
|
|
|
hostname => hostname(), |
|
49
|
|
|
|
|
|
|
); |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Read and compress all the lines in the files |
|
52
|
|
|
|
|
|
|
while(my $line = <$fh>) { |
|
53
|
|
|
|
|
|
|
$alc->compress($line); |
|
54
|
|
|
|
|
|
|
} |
|
55
|
|
|
|
|
|
|
close $fh or die $!; |
|
56
|
|
|
|
|
|
|
$zip->close; |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# Dump the stats in case anyone finds them useful |
|
59
|
|
|
|
|
|
|
$alc->stats; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Converts data from standard Apache log format into a binary stream which is typically 20% - 60% the size of the original file. |
|
64
|
|
|
|
|
|
|
Intended for cases where log data needs transferring from multiple high-volume servers for analysis (potentially in realtime |
|
65
|
|
|
|
|
|
|
via tail -f). |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
The log format is a simple dictionary replacement algorithm: each field that cannot be represented in a fixed-width datatype |
|
68
|
|
|
|
|
|
|
is replaced with an indexed value, allowing the basic log line packet to be fixed size with additional packets containing the |
|
69
|
|
|
|
|
|
|
first instance of each variable-width data item. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Example: |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
api.example.com 105327 123.15.16.108 - apiuser@example.com [19/Dec/2009:03:12:07 +0000] "POST /api/status.json HTTP/1.1" 200 80516 "-" "-" "-" |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
The duration, IP, timestamp, method, HTTP version, response and size can all be stored as 32-bit quantities (or smaller), without losing |
|
76
|
|
|
|
|
|
|
any information. The vhost, user and URL are extracted to separate packets, since we expect to see them at least twice on a typical server. |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
This would be converted to: |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=over 4 |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=item * vhost packet - api.example.com assigned index 0 |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=item * user packet - apiuser@example.com assigned index 0 |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=item * url packet - /api/status.json assigned index 0 |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=item * timestamp packet - since a busy server is likely to have several requests a second, there's a tiny saving to be had by sending this only when the value changes, so we push this into a separate packet as well. |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item * log packet - actual data, binary encoded. |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=back |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
The following packet types are available: |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=over 4 |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=item * 00 - Log entry |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item * 01 - Change server |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item * 02 - timestamp |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=item * 03 - vhost |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item * 04 - user |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item * 05 - useragent |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item * 06 - referer |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=item * 07 - url |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item * 80 - reset |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=back |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
The log entry itself normally consists of the following fields: |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
N vhost |
|
121
|
|
|
|
|
|
|
N time |
|
122
|
|
|
|
|
|
|
N IP |
|
123
|
|
|
|
|
|
|
N user |
|
124
|
|
|
|
|
|
|
N useragent |
|
125
|
|
|
|
|
|
|
N timestamp |
|
126
|
|
|
|
|
|
|
C method |
|
127
|
|
|
|
|
|
|
C version |
|
128
|
|
|
|
|
|
|
n response |
|
129
|
|
|
|
|
|
|
N bytes |
|
130
|
|
|
|
|
|
|
N url |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
The format of the log file can be customised, see the next section for details. |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head3 FORMAT SPECIFICATION |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
A custom format can be provided as the C parameter when instantiating |
|
137
|
|
|
|
|
|
|
a new L object via ->L. This format consists of an |
|
138
|
|
|
|
|
|
|
arrayref of key/value pairs, each value holding the following information: |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=over 4 |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=item * id - the ID to use when sending packets |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=item * type - L format specifier used when storing and retrieving the data, such as N1 or n1. Without this there will be no entry for the item in the compressed log stream |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=item * regex - the regular expression used for matching this part of the log file. The |
|
147
|
|
|
|
|
|
|
final regex will be the concatenation of all regex entries for the format, joined |
|
148
|
|
|
|
|
|
|
using \s+ as the delimiter. |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=item * process_in - coderef for converting incoming values from a plain text log source into compressed values, will receive $self (the current L instance) and $data (the current hashref containing the raw data). |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=item * process_out - coderef for converting values from a compressed source back to plain text, will receive $self (the current L instance) and $data (the current hashref containing the raw data). |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=back |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=cut |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
our %HTTP_METHOD; |
|
159
|
|
|
|
|
|
|
our @HTTP_METHOD_LIST = qw(GET PUT HEAD POST OPTIONS DELETE TRACE CONNECT MKCOL PATCH PROPFIND PROPPATCH FILEPATCH COPY MOVE LOCK UNLOCK SIGNATURE DELTA); |
|
160
|
|
|
|
|
|
|
{ my $idx = 0; %HTTP_METHOD = map { $_ => $idx++ } @HTTP_METHOD_LIST; } |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head1 METHODS |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=cut |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head2 new |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Instantiate the class. |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
Takes the following named parameters: |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=over 4 |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=item * on_write - coderef to call with packet data for each outgoing packet |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=back |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub new { |
|
182
|
5
|
|
|
5
|
1
|
4712
|
my $class = shift; |
|
183
|
5
|
|
|
|
|
19
|
my %args = @_; |
|
184
|
5
|
|
|
|
|
12
|
my $format = delete $args{format}; |
|
185
|
5
|
|
|
|
|
42
|
my $self = bless { |
|
186
|
|
|
|
|
|
|
%args, |
|
187
|
|
|
|
|
|
|
entry_index => {}, |
|
188
|
|
|
|
|
|
|
entry_cache => {}, |
|
189
|
|
|
|
|
|
|
log_packet_count => 0, |
|
190
|
|
|
|
|
|
|
timestamp => undef, |
|
191
|
|
|
|
|
|
|
server => undef, |
|
192
|
|
|
|
|
|
|
}, $class; |
|
193
|
5
|
|
33
|
|
|
29
|
$self->{format} = $format || $self->default_format; |
|
194
|
5
|
|
|
|
|
22
|
$self->update_mapping; |
|
195
|
5
|
|
|
|
|
18
|
return $self; |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head2 default_format |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Returns the default format used for parsing log lines. |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
This is an arrayref containing key => value pairs, see L for |
|
203
|
|
|
|
|
|
|
more details. |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=cut |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub default_format { |
|
208
|
5
|
|
|
5
|
1
|
9
|
my $self = shift; |
|
209
|
|
|
|
|
|
|
return [ |
|
210
|
|
|
|
|
|
|
type => { type => 'C1' }, |
|
211
|
|
|
|
|
|
|
vhost => { id => 0x03, type => 'n1', regex => qr{([^ ]+)} }, |
|
212
|
|
|
|
|
|
|
duration => { type => 'N1', regex => qr{(\d+)} }, |
|
213
|
|
|
|
|
|
|
ip => { |
|
214
|
|
|
|
|
|
|
type => 'N1', |
|
215
|
|
|
|
|
|
|
regex => qr{(\S+)\s+\S+}, |
|
216
|
|
|
|
|
|
|
process_in => sub { |
|
217
|
2
|
|
|
2
|
|
5
|
my ($self, $data) = @_; |
|
218
|
2
|
|
|
|
|
53
|
$data->{ip} = unpack('N1', inet_aton($data->{ip})); |
|
219
|
|
|
|
|
|
|
}, |
|
220
|
|
|
|
|
|
|
process_out => sub { |
|
221
|
2
|
|
|
2
|
|
5
|
my ($self, $data) = @_; |
|
222
|
2
|
|
|
|
|
31
|
$data->{ip} = inet_ntoa(pack('N1', $data->{ip})); |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
}, |
|
225
|
|
|
|
|
|
|
user => { id => 0x04, type => 'n1', regex => qr{(\S+)} }, |
|
226
|
|
|
|
|
|
|
timestamp => { |
|
227
|
|
|
|
|
|
|
id => 0x02, |
|
228
|
|
|
|
|
|
|
regex => qr{\[([^\]]+)\]}, |
|
229
|
|
|
|
|
|
|
process_in => sub { |
|
230
|
2
|
|
|
2
|
|
5
|
my ($self, $data) = @_; |
|
231
|
2
|
|
|
|
|
20
|
$data->{timestamp} = str2time($data->{timestamp}); |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
}, |
|
234
|
|
|
|
|
|
|
method => { |
|
235
|
|
|
|
|
|
|
type => 'C1', |
|
236
|
|
|
|
|
|
|
regex => qr{"([^ ]+)}, |
|
237
|
|
|
|
|
|
|
process_in => sub { |
|
238
|
2
|
|
|
2
|
|
922
|
my ($self, $data) = @_; |
|
239
|
2
|
|
|
|
|
12
|
$data->{method} = $HTTP_METHOD{$data->{method}}; |
|
240
|
|
|
|
|
|
|
}, |
|
241
|
|
|
|
|
|
|
process_out => sub { |
|
242
|
2
|
|
|
2
|
|
5
|
my ($self, $data) = @_; |
|
243
|
2
|
|
|
|
|
15
|
$data->{method} = $HTTP_METHOD_LIST[$data->{method}]; |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
}, |
|
246
|
|
|
|
|
|
|
url => { |
|
247
|
|
|
|
|
|
|
id => 0x07, |
|
248
|
|
|
|
|
|
|
type => 'N1', |
|
249
|
|
|
|
|
|
|
regex => qr{([^ ]+)}, |
|
250
|
|
|
|
|
|
|
process_in => sub { |
|
251
|
2
|
|
|
2
|
|
5
|
my ($self, $data) = @_; |
|
252
|
2
|
50
|
|
|
|
9
|
return $data->{url} = '' unless defined $data->{url}; |
|
253
|
|
|
|
|
|
|
|
|
254
|
2
|
|
|
|
|
15
|
($data->{url}, $data->{query}) = split /\?/, $data->{url}, 2; |
|
255
|
|
|
|
|
|
|
# Dodgy UTF8 handling, currently disabled - no guarantee that URLs are UTF8 anyway |
|
256
|
|
|
|
|
|
|
# if(length $data->{url}) { |
|
257
|
|
|
|
|
|
|
# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually |
|
258
|
|
|
|
|
|
|
# my $txt = $data->{url}; |
|
259
|
|
|
|
|
|
|
# $txt = encode_utf8($txt); # turn OFF utf8 |
|
260
|
|
|
|
|
|
|
# $txt =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/ge; # expand |
|
261
|
|
|
|
|
|
|
# $txt = decode_utf8($txt); # turn ON utf8 where applicable |
|
262
|
|
|
|
|
|
|
# $data->{url} = $txt; |
|
263
|
|
|
|
|
|
|
# } |
|
264
|
|
|
|
|
|
|
# if(defined $data->{query} && length $data->{query}) { |
|
265
|
|
|
|
|
|
|
# URI::Escape's uri_unescape but in byte mode so we can check utf8 decoding manually |
|
266
|
|
|
|
|
|
|
# (my $txt = $data->{query}) =~ s/%([0-9A-Fa-f]{2})/pack("C1", hex($1))/eg; |
|
267
|
|
|
|
|
|
|
# $data->{query} = decode_utf8($txt, FB_DEFAULT); |
|
268
|
|
|
|
|
|
|
# } |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
}, |
|
271
|
|
|
|
|
|
|
query => { id => 0x0A, type => 'N1', }, |
|
272
|
|
|
|
|
|
|
ver => { |
|
273
|
|
|
|
|
|
|
type => 'C1', |
|
274
|
|
|
|
|
|
|
regex => qr{HTTP/(\d+\.\d+)"}, |
|
275
|
|
|
|
|
|
|
process_in => sub { |
|
276
|
2
|
|
|
2
|
|
4
|
my ($self, $data) = @_; |
|
277
|
2
|
50
|
|
|
|
11
|
$data->{ver} = ($data->{ver} eq '1.0' ? 0 : 1); |
|
278
|
|
|
|
|
|
|
}, process_out => sub { |
|
279
|
2
|
|
|
2
|
|
5
|
my ($self, $data) = @_; |
|
280
|
2
|
50
|
|
|
|
17
|
$data->{ver} = ($data->{ver} ? '1.1' : '1.0'); |
|
281
|
|
|
|
|
|
|
} |
|
282
|
|
|
|
|
|
|
}, |
|
283
|
|
|
|
|
|
|
result => { type => 'n1', regex => qr{(\d+)} }, |
|
284
|
|
|
|
|
|
|
size => { |
|
285
|
|
|
|
|
|
|
type => 'N1', |
|
286
|
|
|
|
|
|
|
regex => qr{(\d+|-)}, |
|
287
|
|
|
|
|
|
|
process_in => sub { |
|
288
|
2
|
|
|
2
|
|
5
|
my ($self, $data) = @_; |
|
289
|
2
|
100
|
|
|
|
12
|
$data->{size} = ($data->{size} eq '-') ? -1 : $data->{size}; |
|
290
|
|
|
|
|
|
|
}, process_out => sub { |
|
291
|
2
|
|
|
2
|
|
7
|
my ($self, $data) = @_; |
|
292
|
2
|
100
|
|
|
|
16
|
$data->{size} = ($data->{size} == 4294967295) ? '-' : $data->{size}; |
|
293
|
|
|
|
|
|
|
} |
|
294
|
|
|
|
|
|
|
}, |
|
295
|
5
|
|
|
|
|
406
|
refer => { id => 0x06, type => 'n1', regex => qr{"([^"]*)"} }, |
|
296
|
|
|
|
|
|
|
useragent => { id => 0x05, type => 'n1', regex => qr{"([^"]*)"} }, |
|
297
|
|
|
|
|
|
|
]; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=head2 update_mapping |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
Refresh the mapping from format keys and internal definitions. |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=cut |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
sub update_mapping { |
|
307
|
5
|
|
|
5
|
1
|
7
|
my $self = shift; |
|
308
|
5
|
|
|
|
|
10
|
my %fmt = @{ $self->{format} }; |
|
|
5
|
|
|
|
|
47
|
|
|
309
|
5
|
|
|
|
|
14
|
$self->{format_hash} = \%fmt; |
|
310
|
35
|
|
|
|
|
110
|
$self->{packet_handler} = { |
|
311
|
|
|
|
|
|
|
0x00 => 'log', |
|
312
|
|
|
|
|
|
|
0x01 => 'server', |
|
313
|
|
|
|
|
|
|
0x80 => 'reset', |
|
314
|
5
|
|
|
|
|
25
|
map { $fmt{$_}->{id} => $_ } grep { exists $fmt{$_}->{id} } keys %fmt |
|
|
70
|
|
|
|
|
131
|
|
|
315
|
|
|
|
|
|
|
}; |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# Extract information from format strings so that we know how big the packets are and where the data goes |
|
318
|
5
|
|
|
|
|
17
|
my @fmt = @{$self->{format}}; |
|
|
5
|
|
|
|
|
33
|
|
|
319
|
5
|
|
|
|
|
10
|
my $pack_str = ''; |
|
320
|
5
|
|
|
|
|
6
|
my $log_len = 0; |
|
321
|
5
|
|
|
|
|
6
|
my @format_keys; |
|
322
|
|
|
|
|
|
|
my @regex; |
|
323
|
|
|
|
|
|
|
ITEM: |
|
324
|
5
|
|
|
|
|
74
|
while(@fmt) { |
|
325
|
70
|
|
|
|
|
97
|
my $k = shift(@fmt); |
|
326
|
70
|
|
|
|
|
80
|
my $v = shift(@fmt); |
|
327
|
70
|
50
|
|
|
|
126
|
$v = { type => $v } unless ref $v; |
|
328
|
70
|
100
|
|
|
|
143
|
if(exists $v->{regex}) { |
|
329
|
60
|
|
|
|
|
89
|
push @regex, $v->{regex}; |
|
330
|
60
|
|
|
|
|
61
|
push @{$self->{log_regex_keys}}, $k; |
|
|
60
|
|
|
|
|
110
|
|
|
331
|
|
|
|
|
|
|
} |
|
332
|
70
|
100
|
|
|
|
143
|
if(exists $v->{process_in}) { |
|
333
|
30
|
|
|
|
|
27
|
push @{$self->{log_process}}, $v->{process_in}; |
|
|
30
|
|
|
|
|
54
|
|
|
334
|
|
|
|
|
|
|
} |
|
335
|
70
|
100
|
|
|
|
132
|
if(exists $v->{process_out}) { |
|
336
|
20
|
|
|
|
|
19
|
push @{$self->{log_process_out}}, $v->{process_out}; |
|
|
20
|
|
|
|
|
35
|
|
|
337
|
|
|
|
|
|
|
} |
|
338
|
|
|
|
|
|
|
|
|
339
|
70
|
|
|
|
|
90
|
my $type = $v->{type}; |
|
340
|
70
|
100
|
|
|
|
124
|
next ITEM unless $type; |
|
341
|
|
|
|
|
|
|
|
|
342
|
65
|
|
|
|
|
88
|
push @format_keys, $k; |
|
343
|
65
|
|
|
|
|
102
|
$pack_str .= $type; |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
# Obviously these will need updating if we use any other pack() datatypes |
|
346
|
65
|
100
|
|
|
|
244
|
if($type =~ /^C(\d+)/) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
347
|
15
|
|
|
|
|
50
|
$log_len += $1; |
|
348
|
|
|
|
|
|
|
} elsif($type =~ /^n(\d+)/) { |
|
349
|
25
|
|
|
|
|
76
|
$log_len += 2 * $1; |
|
350
|
|
|
|
|
|
|
} elsif($type =~ /^N(\d+)/) { |
|
351
|
25
|
|
|
|
|
73
|
$log_len += 4 * $1; |
|
352
|
|
|
|
|
|
|
} else { |
|
353
|
0
|
|
|
|
|
0
|
die "no idea what $type is"; |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
} |
|
356
|
5
|
|
|
|
|
25
|
my $regex = join(' ', @regex); |
|
357
|
5
|
|
|
|
|
198
|
$self->{log_regex} = qr{^$regex}; |
|
358
|
5
|
|
|
|
|
11
|
$self->{log_format} = $pack_str; |
|
359
|
5
|
|
|
|
|
27
|
$self->{log_record_length} = $log_len; |
|
360
|
5
|
|
|
|
|
13
|
$self->{format_keys} = \@format_keys; |
|
361
|
5
|
|
|
|
|
23
|
return $self; |
|
362
|
|
|
|
|
|
|
} |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=head2 cached |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
Returns the index for the given type and value, generating a packet if no previous value was found. |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=cut |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
sub cached { |
|
371
|
12
|
|
|
12
|
1
|
14
|
my $self = shift; |
|
372
|
12
|
|
|
|
|
15
|
my ($type, $v) = @_; |
|
373
|
12
|
100
|
|
|
|
27
|
$v = '' unless defined $v; |
|
374
|
12
|
|
|
|
|
30
|
my $id = $self->{entry_cache}->{$type}->{$v}; |
|
375
|
12
|
50
|
|
|
|
30
|
unless(defined $id) { |
|
376
|
12
|
|
|
|
|
14
|
push @{ $self->{entry_index}->{$type} }, $v; |
|
|
12
|
|
|
|
|
34
|
|
|
377
|
12
|
|
|
|
|
26
|
++$self->{entry_count}->{$type}; |
|
378
|
12
|
|
|
|
|
11
|
$id = $self->{entry_cache}->{$type}->{$v} = scalar(@{ $self->{entry_index}->{$type} }) - 1; |
|
|
12
|
|
|
|
|
44
|
|
|
379
|
12
|
|
|
|
|
41
|
$self->send_packet($type, id => $id, data => encode_utf8($v)); |
|
380
|
|
|
|
|
|
|
} |
|
381
|
12
|
|
|
|
|
32
|
return $id; |
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=head2 from_cache |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
Read a value from the cache, for expanding compressed log format entries. |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=cut |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
sub from_cache { |
|
391
|
13
|
|
|
13
|
1
|
1281
|
my $self = shift; |
|
392
|
13
|
|
|
|
|
28
|
my ($type, $id) = @_; |
|
393
|
13
|
50
|
|
|
|
49
|
die "ID $id not found for $type\n" unless defined $self->{entry_index}->{$type}->[$id]; |
|
394
|
13
|
|
|
|
|
113
|
return $self->{entry_index}->{$type}->[$id]; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=head2 set_key |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Set a cache index key to a value when expanding a packet stream. |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=cut |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub set_key { |
|
404
|
12
|
|
|
12
|
1
|
18
|
my $self = shift; |
|
405
|
12
|
|
|
|
|
14
|
my $type = shift; |
|
406
|
12
|
|
|
|
|
41
|
my %args = @_; |
|
407
|
12
|
|
|
|
|
39
|
my $v = decode_utf8($args{data}); |
|
408
|
12
|
|
|
|
|
316
|
$self->{entry_cache}->{$type}->{$v} = $args{id}; |
|
409
|
12
|
|
|
|
|
79
|
$self->{entry_index}->{$type}->[$args{id}] = $v; |
|
410
|
12
|
50
|
|
|
|
117
|
$self->{"on_set_$type"}->($self, $args{id}, $v) if $self->{"on_set_$type"}; |
|
411
|
12
|
50
|
|
|
|
30
|
$self->{"on_set_key"}->($self, $type, $args{id}, $v) if $self->{on_set_key}; |
|
412
|
12
|
|
|
|
|
45
|
return $self; |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=head2 compress |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
General compression function. Given a line of data, sends packets as required to transmit that information. |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=cut |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
sub compress { |
|
422
|
3
|
|
|
3
|
1
|
19
|
my $self = shift; |
|
423
|
3
|
|
|
|
|
5
|
my $txt = shift; |
|
424
|
3
|
|
|
|
|
6
|
my %data; |
|
425
|
3
|
100
|
|
|
|
33
|
@data{@{$self->{log_regex_keys}}} = $txt =~ m!$self->{log_regex}! |
|
|
3
|
|
|
|
|
54
|
|
|
426
|
|
|
|
|
|
|
or return $self->invoke_event(bad_data => $txt); |
|
427
|
2
|
|
|
|
|
8
|
$data{type} = 0; |
|
428
|
2
|
|
|
|
|
4
|
$_->($self, \%data) for @{$self->{log_process}}; |
|
|
2
|
|
|
|
|
20
|
|
|
429
|
2
|
50
|
33
|
|
|
10
|
return if exists($self->{filter}) && !$self->{filter}->($self, \%data); |
|
430
|
|
|
|
|
|
|
|
|
431
|
2
|
50
|
33
|
|
|
11
|
if(!defined($self->{timestamp}) || $data{timestamp} != $self->{timestamp}) { |
|
432
|
2
|
|
|
|
|
6
|
$self->{timestamp} = $data{timestamp}; |
|
433
|
2
|
|
|
|
|
8
|
$self->send_packet('timestamp', timestamp => $self->{timestamp}); |
|
434
|
|
|
|
|
|
|
} |
|
435
|
|
|
|
|
|
|
|
|
436
|
2
|
|
|
|
|
4
|
my @fmt = @{$self->{format}}; |
|
|
2
|
|
|
|
|
13
|
|
|
437
|
2
|
|
|
|
|
4
|
my @data; |
|
438
|
2
|
|
|
|
|
11
|
while(@fmt) { |
|
439
|
28
|
|
|
|
|
33
|
my $k = shift(@fmt); |
|
440
|
28
|
|
|
|
|
33
|
my $v = shift(@fmt); |
|
441
|
28
|
100
|
|
|
|
65
|
if($v->{type}) { |
|
442
|
26
|
100
|
|
|
|
75
|
$data{$k} = $self->cached($k, $data{$k}) if exists $v->{id}; |
|
443
|
26
|
|
|
|
|
90
|
push @data, $data{$k}; |
|
444
|
|
|
|
|
|
|
} |
|
445
|
|
|
|
|
|
|
} |
|
446
|
2
|
|
|
|
|
18
|
$self->write_packet(pack($self->{log_format}, @data)); |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
# Recycle everything after 5m entries |
|
449
|
2
|
50
|
|
|
|
11
|
if($self->{log_packet_count}++ >= 5000000) { |
|
450
|
0
|
|
|
|
|
0
|
$self->send_packet('reset'); |
|
451
|
0
|
|
|
|
|
0
|
$self->{log_packet_count} = 0; |
|
452
|
|
|
|
|
|
|
} |
|
453
|
2
|
|
|
|
|
17
|
return $self; |
|
454
|
|
|
|
|
|
|
} |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head2 send_packet |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
Generate and send a packet for the given type. |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=cut |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
sub send_packet { |
|
463
|
16
|
|
|
16
|
1
|
2368
|
my $self = shift; |
|
464
|
16
|
|
|
|
|
21
|
my $type = shift; |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
# Try the specific method for this packet if we have one |
|
467
|
16
|
|
|
|
|
27
|
my $method = "packet_$type"; |
|
468
|
16
|
100
|
|
|
|
116
|
return $self->write_packet($self->$method(@_)) if $self->can($method); |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
# Otherwise use the generic format for ASCIIZ mapping |
|
471
|
12
|
|
|
|
|
35
|
my %args = @_; |
|
472
|
12
|
|
|
|
|
85
|
return $self->write_packet(pack('C1N1Z*', $self->{format_hash}->{$type}->{id}, $args{id}, $args{data})); |
|
473
|
|
|
|
|
|
|
} |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=head2 packet_reset |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
Generate a reset packet and clear internal caches in the process. |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=cut |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
sub packet_reset { |
|
482
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
483
|
0
|
|
|
|
|
0
|
$self->{entry_cache} = {}; |
|
484
|
0
|
|
|
|
|
0
|
$self->{entry_index} = {}; |
|
485
|
0
|
|
|
|
|
0
|
return pack('C1', 0x80); |
|
486
|
|
|
|
|
|
|
} |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=head2 packet_server |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
Generate a server packet. |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=cut |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
sub packet_server { |
|
495
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
|
496
|
2
|
|
|
|
|
8
|
my %args = @_; |
|
497
|
2
|
|
|
|
|
28
|
return pack('C1Z*', 1, $args{hostname}); |
|
498
|
|
|
|
|
|
|
} |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=head2 packet_timestamp |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Generate the timestamp packet. |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=cut |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
sub packet_timestamp { |
|
507
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
|
508
|
2
|
|
|
|
|
7
|
my %args = @_; |
|
509
|
2
|
|
|
|
|
13
|
return pack('C1N1', 2, $args{timestamp}); |
|
510
|
|
|
|
|
|
|
} |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=head2 write_packet |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Write a packet to the output handler. |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=cut |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
sub write_packet { |
|
519
|
18
|
|
|
18
|
1
|
31
|
my ($self, $pkt) = @_; |
|
520
|
18
|
|
|
|
|
46
|
$self->{on_write}->($self, $pkt); |
|
521
|
18
|
|
|
|
|
4132
|
return $self; |
|
522
|
|
|
|
|
|
|
} |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=head2 expand |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
Expand incoming data. |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=cut |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
sub expand { |
|
531
|
18
|
|
|
18
|
1
|
2187
|
my $self = shift; |
|
532
|
18
|
|
|
|
|
21
|
my $pkt = shift; |
|
533
|
18
|
|
|
|
|
43
|
my $type = unpack('C1', $$pkt); |
|
534
|
18
|
50
|
|
|
|
64
|
unless($self->{packet_handler}->{$type}) { |
|
535
|
0
|
|
|
|
|
0
|
print substr $$pkt, 0, 16; |
|
536
|
0
|
|
|
|
|
0
|
die "what is $type?"; |
|
537
|
|
|
|
|
|
|
} |
|
538
|
18
|
|
|
|
|
49
|
my $method = 'handle_' . $self->{packet_handler}->{$type}; |
|
539
|
18
|
100
|
|
|
|
144
|
return $self->$method($pkt) if $self->can($method); |
|
540
|
|
|
|
|
|
|
|
|
541
|
12
|
50
|
|
|
|
41
|
return unless index($$pkt, "\0", 5) >= 0; |
|
542
|
|
|
|
|
|
|
|
|
543
|
12
|
|
|
|
|
42
|
(undef, my $id, my $data) = unpack('C1N1Z*', $$pkt); |
|
544
|
12
|
|
|
|
|
28
|
substr $$pkt, 0, 6 + length($data), ''; |
|
545
|
12
|
|
|
|
|
39
|
$self->set_key($self->{packet_handler}->{$type}, data => $data, id => $id); |
|
546
|
|
|
|
|
|
|
} |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=head2 handle_reset |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Handle an incoming reset packet. |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=cut |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
sub handle_reset { |
|
555
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
556
|
0
|
|
|
|
|
0
|
my $pkt = shift; |
|
557
|
|
|
|
|
|
|
# Clear cache for all items |
|
558
|
0
|
|
|
|
|
0
|
$self->{entry_cache} = { }; |
|
559
|
0
|
|
|
|
|
0
|
$self->{entry_index} = { }; |
|
560
|
0
|
|
|
|
|
0
|
substr $$pkt, 0, 1, ''; |
|
561
|
|
|
|
|
|
|
} |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
=head2 handle_log |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
Handle an incoming log packet. |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=cut |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
sub handle_log { |
|
570
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
|
571
|
2
|
|
|
|
|
3
|
my $pkt = shift; |
|
572
|
2
|
50
|
|
|
|
10
|
return unless length $$pkt >= $self->{log_record_length}; |
|
573
|
|
|
|
|
|
|
|
|
574
|
2
|
|
|
|
|
4
|
my %data; |
|
575
|
2
|
|
|
|
|
10
|
@data{@{ $self->{format_keys} }} = unpack($self->{log_format}, $$pkt); |
|
|
2
|
|
|
|
|
26
|
|
|
576
|
2
|
|
|
|
|
7
|
$_->($self, \%data) for @{$self->{log_process_out}}; |
|
|
2
|
|
|
|
|
17
|
|
|
577
|
|
|
|
|
|
|
|
|
578
|
2
|
50
|
|
|
|
11
|
die "No timestamp" unless $self->{timestamp}; |
|
579
|
2
|
50
|
|
|
|
25
|
$self->{on_log_line}->($self, \%data) if exists $self->{on_log_line}; |
|
580
|
2
|
|
|
|
|
986
|
substr $$pkt, 0, $self->{log_record_length}, ''; |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=head2 data_hashref |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
Convert logline data to a hashref. |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=cut |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
sub data_hashref { |
|
590
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
591
|
0
|
|
|
|
|
0
|
my $data = shift; |
|
592
|
0
|
|
|
|
|
0
|
my %info = %$data; |
|
593
|
|
|
|
|
|
|
|
|
594
|
0
|
|
|
|
|
0
|
$info{$_} = $self->from_cache($_, $info{$_}) for qw(vhost user url query useragent refer); |
|
595
|
0
|
|
|
|
|
0
|
$info{server} = $self->{server}; |
|
596
|
0
|
|
|
|
|
0
|
undef $info{$_} for grep { $info{$_} eq '-' } qw(user refer size useragent); |
|
|
0
|
|
|
|
|
0
|
|
|
597
|
0
|
0
|
0
|
|
|
0
|
undef $info{query} unless defined $info{query} && length $info{query}; |
|
598
|
|
|
|
|
|
|
#DateTime->from_epoch(epoch => $self->{timestamp})->strftime("%d/%b/%Y:%H:%M:%S %z"); |
|
599
|
0
|
|
|
|
|
0
|
$info{timestamp} = strftime("%d/%b/%Y:%H:%M:%S %z", gmtime($self->{timestamp})); |
|
600
|
0
|
|
|
|
|
0
|
return \%info; |
|
601
|
|
|
|
|
|
|
} |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=head2 data_to_text |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
Internal method for converting the current log entry to a text string in |
|
606
|
|
|
|
|
|
|
something approaching the 'standard' Apache log format (almost, but not quite, |
|
607
|
|
|
|
|
|
|
CLF). |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
=cut |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
sub data_to_text { |
|
612
|
2
|
|
|
2
|
1
|
16
|
my $self = shift; |
|
613
|
2
|
|
|
|
|
4
|
my $data = shift; |
|
614
|
2
|
|
|
|
|
17
|
my $q = $self->from_cache('query', $data->{query}); |
|
615
|
2
|
50
|
|
|
|
22
|
$q = '' unless defined $q; |
|
616
|
2
|
50
|
|
|
|
8
|
return join(' ', |
|
|
|
50
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
$self->from_cache('vhost', $data->{vhost}), |
|
618
|
|
|
|
|
|
|
$data->{duration}, |
|
619
|
|
|
|
|
|
|
$data->{ip}, |
|
620
|
|
|
|
|
|
|
'-', |
|
621
|
|
|
|
|
|
|
$self->from_cache('user', $data->{user}), |
|
622
|
|
|
|
|
|
|
'[' . DateTime->from_epoch(epoch => $self->{timestamp})->strftime("%d/%b/%Y:%H:%M:%S %z") . ']', |
|
623
|
|
|
|
|
|
|
'"' . $data->{method} . ' ' . $self->from_cache('url', $data->{url}) . (length $q ? "?$q" : "") . ' HTTP/' . ($data->{ver} ? '1.1' : '1.0') . '"', |
|
624
|
|
|
|
|
|
|
$data->{result}, |
|
625
|
|
|
|
|
|
|
$data->{size}, |
|
626
|
|
|
|
|
|
|
'"' . $self->from_cache('useragent', $data->{useragent}) . '"', |
|
627
|
|
|
|
|
|
|
'"' . $self->from_cache('refer', $data->{refer}) . '"', |
|
628
|
|
|
|
|
|
|
); |
|
629
|
|
|
|
|
|
|
} |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=head2 handle_server |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
Internal method for processing a server record (used to indicate the server |
|
634
|
|
|
|
|
|
|
name subsequent records apply to). |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
=cut |
|
637
|
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
sub handle_server { |
|
639
|
2
|
|
|
2
|
1
|
6
|
my $self = shift; |
|
640
|
2
|
|
|
|
|
6
|
my $pkt = shift; |
|
641
|
2
|
50
|
|
|
|
11
|
return unless index($$pkt, "\0", 1) >= 0; |
|
642
|
2
|
|
|
|
|
12
|
(undef, my $server) = unpack('C1Z*', $$pkt); |
|
643
|
2
|
|
|
|
|
10
|
substr $$pkt, 0, 2 + length($server), ''; |
|
644
|
2
|
|
|
|
|
7
|
$self->{server} = $server; |
|
645
|
2
|
|
|
|
|
8
|
$self; |
|
646
|
|
|
|
|
|
|
} |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=head2 handle_timestamp |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
Internal method for processing a timestamp entry. |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=cut |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
sub handle_timestamp { |
|
655
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
|
656
|
2
|
|
|
|
|
5
|
my $pkt = shift; |
|
657
|
2
|
50
|
|
|
|
58
|
return unless length $$pkt >= 5; |
|
658
|
2
|
|
|
|
|
16
|
(undef, my $hostname) = unpack('C1N1', $$pkt); |
|
659
|
2
|
|
|
|
|
6
|
substr $$pkt, 0, 5, ''; |
|
660
|
2
|
|
|
|
|
6
|
$self->{timestamp} = $hostname; |
|
661
|
2
|
50
|
|
|
|
9
|
warn "Zero timestamp?" unless $self->{timestamp}; |
|
662
|
2
|
|
|
|
|
9
|
$self; |
|
663
|
|
|
|
|
|
|
} |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=head2 invoke_event |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
Internal method for invoking an event. |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=cut |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
sub invoke_event { |
|
672
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
|
673
|
1
|
|
|
|
|
3
|
my $event = shift; |
|
674
|
1
|
50
|
33
|
|
|
6
|
my $code = $self->{"on_" . $event} || $self->can("on_" . $event) or return; |
|
675
|
1
|
|
|
|
|
5
|
return $code->(@_); |
|
676
|
|
|
|
|
|
|
} |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
=head2 stats |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
Print current stats - not all that useful since we clear cached values regularly. |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=cut |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
sub stats { |
|
685
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
686
|
0
|
|
|
|
|
|
printf("%-64.64s saw total entries: %s\n", $_, $self->{entry_count}->{$_}) for sort keys %{$self->{entry_index}}; |
|
|
0
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
} |
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
1; |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
=head1 AUTHOR |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
Tom Molesworth |
|
694
|
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=head1 LICENSE |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
Copyright Tom Molesworth 2009-2011. Licensed under the same terms as Perl itself. |