line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package ELFF::Parser; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ELFF-Parser is a perl module for parsing ELFF formatted log files. |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Copyright (C) 2007-2010 Mark Warren |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# This library is free software; you can redistribute it and/or |
8
|
|
|
|
|
|
|
# modify it under the terms of the GNU Lesser General Public |
9
|
|
|
|
|
|
|
# License as published by the Free Software Foundation; either |
10
|
|
|
|
|
|
|
# version 2.1 of the License, or (at your option) any later version. |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# This library is distributed in the hope that it will be useful, |
13
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
14
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15
|
|
|
|
|
|
|
# Lesser General Public License for more details. |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
# You should have received a copy of the GNU Lesser General Public |
18
|
|
|
|
|
|
|
# License along with this library; if not, write to the Free Software |
19
|
|
|
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=pod |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 NAME |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
ELFF::Parser - parse ELFF formatted log files |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 SYNOPSIS |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
use ELFF::Parser; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$p = new ELFF::Parser(); |
33
|
|
|
|
|
|
|
while() { |
34
|
|
|
|
|
|
|
$res = $p->parse_line($_); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
if($res->{directive} && $res->{directive} eq 'Start-Date') { |
37
|
|
|
|
|
|
|
print "Log starts at $res->{value}\n"; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
elsif($res->{href}) { |
40
|
|
|
|
|
|
|
print $res->{href}{'rs-bytes'}, "\n"; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
elsif($res->{aref}) { |
43
|
|
|
|
|
|
|
print "Detected log format change, or no fields directive\n"; |
44
|
|
|
|
|
|
|
foreach my $field (@{$res->{aref}}) { |
45
|
|
|
|
|
|
|
print " found field: $field\n"; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
print "\n"; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
else { |
50
|
|
|
|
|
|
|
print STDERR "Failed to parse log line\n"; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 DESCRIPTION |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
C parses ELFF formatted logs. For a description of ELFF |
57
|
|
|
|
|
|
|
(Extended Log File Format), see http://www.w3.org/TR/WD-logfile.html. In |
58
|
|
|
|
|
|
|
brief, ELFF log files consist of directives (meta-data about the logs) |
59
|
|
|
|
|
|
|
and logs. C parses both, extracting log format information |
60
|
|
|
|
|
|
|
from the directives and using it to build hashes for each log entry. |
61
|
|
|
|
|
|
|
If log format information isn't available or becomes invalidated (see |
62
|
|
|
|
|
|
|
the L"ELFF PROBLEMS"> section below), C will return |
63
|
|
|
|
|
|
|
arrays for each log entry instead of hashes. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=over 4 |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=item $ep = new ELFF::Parser() |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Creates a new C object. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=back |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 METHODS |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=over 4 |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=item $res = $ep->parse_line($line) |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Parse an ELFF log line. The returned result will be a hash reference that |
82
|
|
|
|
|
|
|
contains different information depending on the state of the object and |
83
|
|
|
|
|
|
|
the type of line parsed (i.e. directive or log entry). |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
If the line is a directive, the returned hash will have the following |
86
|
|
|
|
|
|
|
keys: |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
$res->{directive} the name of the directive |
89
|
|
|
|
|
|
|
$res->{value} the value of the directive |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
If the line is a Fields directive, the result will contain a 'fields' |
92
|
|
|
|
|
|
|
key as well, which is an array reference containing the fields. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
foreach my $field (@{$res->{fields}}) { |
95
|
|
|
|
|
|
|
print "Found field $field\n"; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Since C builds hashes for you for each log entry, you |
99
|
|
|
|
|
|
|
generally don't need to worry about the fields. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
If the line is a log entry, and the C object has parsed |
102
|
|
|
|
|
|
|
a fields directive already, the result hash will contain a 'href' |
103
|
|
|
|
|
|
|
key whose value is a hash reference containing the log entry data. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
print "client to proxy bytes: ", $res->{href}{'cs-bytes'}, "\n"; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
If no fields directive has been parsed, or C detects a |
108
|
|
|
|
|
|
|
change in log format (see the L"ELFF PROBLEMS"> section below), an |
109
|
|
|
|
|
|
|
array reference may be returned instead: |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
foreach my $field (@{$res->{aref}}) { |
112
|
|
|
|
|
|
|
print "data: ", $field, "\n"; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
If C detects a malformed line, it will return undef. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=back |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head1 ELFF PROBLEMS |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
There is one particularly annoying thing about ELFF log files, which is |
122
|
|
|
|
|
|
|
that the ELFF standard doesn't require that a new Fields directive be |
123
|
|
|
|
|
|
|
inserted into the log file when the log format changes. Because of this, |
124
|
|
|
|
|
|
|
if the log format changes in the middle of a log file, there is very |
125
|
|
|
|
|
|
|
little that a parser can do to detect the change. All reporting software |
126
|
|
|
|
|
|
|
that I have seen simply ignores logs as soon as a change in format |
127
|
|
|
|
|
|
|
is detected (i.e. when errors are encountered extracting statistics |
128
|
|
|
|
|
|
|
from the logs). This is a shortcoming in the ELFF standard, and I'm |
129
|
|
|
|
|
|
|
afraid that C doesn't handle the problem much better. |
130
|
|
|
|
|
|
|
C detects log format changes by checking the number of |
131
|
|
|
|
|
|
|
fields in each log entry. If the number of fields in a log entry differs |
132
|
|
|
|
|
|
|
from the number of fields specified in the Fields directive, C |
133
|
|
|
|
|
|
|
will invalidate the format and start returning arrays of fields for |
134
|
|
|
|
|
|
|
each message instead of hashes. This way, the log data is still |
135
|
|
|
|
|
|
|
available to you, and you can attempt to recover from the problem |
136
|
|
|
|
|
|
|
yourself. However, if the number of fields in the log messages |
137
|
|
|
|
|
|
|
doesn't change when the log format changes (e.g. when fields are |
138
|
|
|
|
|
|
|
re-ordered, or when the same number of fields is added and removed), |
139
|
|
|
|
|
|
|
C will not detected the format change. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Thankfully, log formats usually don't change on their own, so |
142
|
|
|
|
|
|
|
administrators can modify their procedures such that the impact |
143
|
|
|
|
|
|
|
of this shortcoming is minimized (e.g. rotate the log file |
144
|
|
|
|
|
|
|
immediately after changing the log format to force a new fields |
145
|
|
|
|
|
|
|
directive to be logged). |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head1 HOMEPAGE |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
L |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head1 BUGS |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
None that I know of, but please let me know if you find one. Please |
154
|
|
|
|
|
|
|
report bugs via the SourceForge tracker. |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head1 AUTHOR |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Copyright (c) 2007 Mark Warren |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 LICENSE AND DISCLAIMER |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
This software is distributed under the terms of the GNU Lesser General |
163
|
|
|
|
|
|
|
Public License. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
166
|
|
|
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
167
|
|
|
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
168
|
|
|
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
169
|
|
|
|
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
170
|
|
|
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
171
|
|
|
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
172
|
|
|
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
173
|
|
|
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
174
|
|
|
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
175
|
|
|
|
|
|
|
POSSIBILITY OF SUCH DAMAGE. |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
178
|
|
|
|
|
|
|
|
179
|
3
|
|
|
3
|
|
24005
|
use 5.00; |
|
3
|
|
|
|
|
12
|
|
|
3
|
|
|
|
|
136
|
|
180
|
3
|
|
|
3
|
|
20
|
use strict; |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
98
|
|
181
|
3
|
|
|
3
|
|
17
|
use Carp; |
|
3
|
|
|
|
|
12
|
|
|
3
|
|
|
|
|
1855
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
our $VERSION = '0.92'; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub new { |
187
|
1
|
|
|
1
|
1
|
265
|
my $class = shift; |
188
|
|
|
|
|
|
|
|
189
|
1
|
|
|
|
|
4
|
my $self = { |
190
|
|
|
|
|
|
|
# we use number of fields to detect log format changes. it's |
191
|
|
|
|
|
|
|
# not perfect, but we don't understand the log content, so this |
192
|
|
|
|
|
|
|
# is the best that we can do |
193
|
|
|
|
|
|
|
'fields' => 0, |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# revmap is used to figure out the name of each field as we |
196
|
|
|
|
|
|
|
# build the result hash in parse_line |
197
|
|
|
|
|
|
|
'revmap' => {}, |
198
|
|
|
|
|
|
|
}; |
199
|
|
|
|
|
|
|
|
200
|
1
|
|
|
|
|
4
|
return bless $self, $class; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub parse_line { |
204
|
15
|
|
|
15
|
1
|
7179
|
my ($self, $line) = @_; |
205
|
15
|
|
|
|
|
22
|
chomp($line); |
206
|
|
|
|
|
|
|
|
207
|
15
|
|
|
|
|
23
|
my $res = {}; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# if the line is a directive, handle it here |
210
|
15
|
100
|
66
|
|
|
80
|
if($line && substr($line, 0, 1) eq '#') { |
211
|
|
|
|
|
|
|
# some vendors put whitespace between # and the directive name, remove it |
212
|
9
|
|
|
|
|
22
|
$line =~ s/^#\s+/#/; |
213
|
|
|
|
|
|
|
|
214
|
9
|
|
|
|
|
37
|
@$res{('directive', 'value')} = split(/\s+/, $line, 2); |
215
|
9
|
|
|
|
|
46
|
$res->{directive} =~ s/(?:^#|:$)//g; |
216
|
|
|
|
|
|
|
|
217
|
9
|
100
|
|
|
|
24
|
if($res->{directive} eq 'Fields') { |
218
|
5
|
|
|
|
|
28
|
$self->{revmap} = tokenize($res->{value}); |
219
|
5
|
|
|
|
|
8
|
$self->{fields} = $#{$self->{revmap}}; |
|
5
|
|
|
|
|
9
|
|
220
|
5
|
|
|
|
|
5
|
@{$res->{fields}} = @{$self->{revmap}}; |
|
5
|
|
|
|
|
17
|
|
|
5
|
|
|
|
|
8
|
|
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
9
|
|
|
|
|
23
|
return $res; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# not a directive, regular log |
227
|
|
|
|
|
|
|
|
228
|
6
|
|
|
|
|
22
|
my $flds = tokenize($line); |
229
|
6
|
50
|
|
|
|
13
|
return undef unless $flds; |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
# no field names - return array |
232
|
6
|
50
|
|
|
|
14
|
unless($self->{revmap}) { |
233
|
0
|
|
|
|
|
0
|
$res->{aref} = $flds; |
234
|
0
|
|
|
|
|
0
|
return $res; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# change in format, invalidate fields and return array |
238
|
6
|
100
|
|
|
|
15
|
if($#$flds != $self->{fields}) { |
239
|
1
|
|
|
|
|
3
|
$self->{revmap} = undef; |
240
|
1
|
|
|
|
|
3
|
$res->{aref} = $flds; |
241
|
1
|
|
|
|
|
3
|
return $res; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# return href |
245
|
5
|
|
|
|
|
6
|
my %href; |
246
|
5
|
|
|
|
|
6
|
@href{@{$self->{revmap}}} = @$flds; |
|
5
|
|
|
|
|
30
|
|
247
|
5
|
|
|
|
|
12
|
$res->{href} = \%href; |
248
|
|
|
|
|
|
|
|
249
|
5
|
|
|
|
|
22
|
return $res; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
require XSLoader; |
254
|
|
|
|
|
|
|
XSLoader::load('ELFF::Parser', $VERSION); |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
1; |