line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package ETL::Yertl::Command::ygrok; |
2
|
|
|
|
|
|
|
our $VERSION = '0.035'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Parse lines of text into documents |
4
|
|
|
|
|
|
|
|
5
|
9
|
|
|
9
|
|
2637
|
use ETL::Yertl; |
|
9
|
|
|
|
|
47
|
|
|
9
|
|
|
|
|
61
|
|
6
|
9
|
|
|
9
|
|
2641
|
use ETL::Yertl::Util qw( load_module ); |
|
9
|
|
|
|
|
21
|
|
|
9
|
|
|
|
|
430
|
|
7
|
9
|
|
|
9
|
|
3673
|
use Getopt::Long qw( GetOptionsFromArray ); |
|
9
|
|
|
|
|
68213
|
|
|
9
|
|
|
|
|
33
|
|
8
|
9
|
|
|
9
|
|
3921
|
use Regexp::Common; |
|
9
|
|
|
|
|
17441
|
|
|
9
|
|
|
|
|
33
|
|
9
|
9
|
|
|
9
|
|
1150215
|
use File::HomeDir; |
|
9
|
|
|
|
|
33078
|
|
|
9
|
|
|
|
|
616
|
|
10
|
9
|
|
|
9
|
|
2214
|
use Hash::Merge::Simple qw( merge ); |
|
9
|
|
|
|
|
3059
|
|
|
9
|
|
|
|
|
7483
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our %PATTERNS = ( |
13
|
|
|
|
|
|
|
WORD => '\b\w+\b', |
14
|
|
|
|
|
|
|
DATA => '.*?', |
15
|
|
|
|
|
|
|
NUM => $RE{num}{real}."", # stringify to allow YAML serialization |
16
|
|
|
|
|
|
|
INT => $RE{num}{int}."", # stringify to allow YAML serialization |
17
|
|
|
|
|
|
|
VERSION => '\d+(?:[.]\d+)*', |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
DATE => { |
20
|
|
|
|
|
|
|
MONTH => '\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b', |
21
|
|
|
|
|
|
|
ISO8601 => '\d{4}-?\d{2}-?\d{2}[T ]\d{2}:?\d{2}:?\d{2}(?:Z|[+-]\d{4})', |
22
|
|
|
|
|
|
|
HTTP => '\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4}', |
23
|
|
|
|
|
|
|
SYSLOG => '%{DATE.MONTH} +\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}', |
24
|
|
|
|
|
|
|
}, |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
OS => { |
27
|
|
|
|
|
|
|
USER => '[a-zA-Z0-9._-]+', |
28
|
|
|
|
|
|
|
PROCNAME => '[\w._-]+', |
29
|
|
|
|
|
|
|
}, |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
NET => { |
32
|
|
|
|
|
|
|
HOSTNAME => join( "|", $RE{net}{IPv4}, $RE{net}{IPv6}, $RE{net}{domain}{-rfc1101} ), |
33
|
|
|
|
|
|
|
IPV6 => $RE{net}{IPv6}."", |
34
|
|
|
|
|
|
|
IPV4 => $RE{net}{IPv4}."", |
35
|
|
|
|
|
|
|
}, |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
URL => { |
38
|
|
|
|
|
|
|
PATH => '[^?#]*(?:\?[^#]*)?', |
39
|
|
|
|
|
|
|
# URL regex from URI.pm |
40
|
|
|
|
|
|
|
URL => '(?:[^:/?#]+:)?(?://[^/?#]*)?[^?#]*(?:\?[^#]*)?(?:#.*)?', |
41
|
|
|
|
|
|
|
}, |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
LOG => { |
44
|
|
|
|
|
|
|
HTTP_COMMON => join( " ", |
45
|
|
|
|
|
|
|
'%{NET.HOSTNAME:remote_addr}', '%{OS.USER:ident}', '%{OS.USER:user}', |
46
|
|
|
|
|
|
|
'\[%{DATE.HTTP:timestamp}]', |
47
|
|
|
|
|
|
|
'"%{WORD:method} %{URL.PATH:path} [^/]+/%{VERSION:http_version}"', |
48
|
|
|
|
|
|
|
'%{INT:status}', '(?\d+|-)', |
49
|
|
|
|
|
|
|
), |
50
|
|
|
|
|
|
|
HTTP_COMBINED => join( " ", |
51
|
|
|
|
|
|
|
'%{LOG.HTTP_COMMON}', |
52
|
|
|
|
|
|
|
'"%{URL:referer}"', '"%{DATA:user_agent}"', |
53
|
|
|
|
|
|
|
), |
54
|
|
|
|
|
|
|
SYSLOG => join( "", |
55
|
|
|
|
|
|
|
'%{DATE.SYSLOG:timestamp} ', |
56
|
|
|
|
|
|
|
'(?:<%{INT:facility}.%{INT:priority}> )?', |
57
|
|
|
|
|
|
|
'%{NET.HOSTNAME:host} ', |
58
|
|
|
|
|
|
|
'%{OS.PROCNAME:program}(?:\[%{INT:pid}\])?: ', |
59
|
|
|
|
|
|
|
'%{DATA:text}', |
60
|
|
|
|
|
|
|
), |
61
|
|
|
|
|
|
|
}, |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
POSIX => { |
64
|
|
|
|
|
|
|
LS => join( " +", |
65
|
|
|
|
|
|
|
'(?[bcdlsp-][rwxSsTt-]{9})', |
66
|
|
|
|
|
|
|
'%{INT:links}', |
67
|
|
|
|
|
|
|
'%{OS.USER:owner}', |
68
|
|
|
|
|
|
|
'%{OS.USER:group}', |
69
|
|
|
|
|
|
|
'%{INT:bytes}', |
70
|
|
|
|
|
|
|
'(?%{DATE.MONTH} +\d+ +\d+(?::\d+)?)', |
71
|
|
|
|
|
|
|
'%{DATA:name}', |
72
|
|
|
|
|
|
|
), |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# -- Mac OSX |
75
|
|
|
|
|
|
|
# TTY field starts with "tty" |
76
|
|
|
|
|
|
|
# No STAT field |
77
|
|
|
|
|
|
|
# -- OpenBSD |
78
|
|
|
|
|
|
|
# STAT field |
79
|
|
|
|
|
|
|
# -- RHEL 5 |
80
|
|
|
|
|
|
|
# tty can contain / |
81
|
|
|
|
|
|
|
# Seconds time optional |
82
|
|
|
|
|
|
|
PS => join( " +", |
83
|
|
|
|
|
|
|
' *%{INT:pid}', |
84
|
|
|
|
|
|
|
'(?[\w?/]+)', |
85
|
|
|
|
|
|
|
'(?(?:[\w+]+))?', |
86
|
|
|
|
|
|
|
'(? |
87
|
|
|
|
|
|
|
'%{DATA:command}', |
88
|
|
|
|
|
|
|
), |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Mac OSX and OpenBSD are the same |
91
|
|
|
|
|
|
|
PSU => join ( " +", |
92
|
|
|
|
|
|
|
'%{OS.USER:user}', |
93
|
|
|
|
|
|
|
'%{INT:pid}', |
94
|
|
|
|
|
|
|
'%{NUM:cpu}', |
95
|
|
|
|
|
|
|
'%{NUM:mem}', |
96
|
|
|
|
|
|
|
'%{INT:vsz}', |
97
|
|
|
|
|
|
|
'%{INT:rss}', |
98
|
|
|
|
|
|
|
'(?[\w?/]+)', |
99
|
|
|
|
|
|
|
'(?(?:[\w+]+))?', |
100
|
|
|
|
|
|
|
'(?[\w:]+)', |
101
|
|
|
|
|
|
|
'(? |
102
|
|
|
|
|
|
|
'%{DATA:command}', |
103
|
|
|
|
|
|
|
), |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# Max OSX and OpenBSD are the same |
106
|
|
|
|
|
|
|
PSX => join ( " +", |
107
|
|
|
|
|
|
|
' *%{INT:pid}', |
108
|
|
|
|
|
|
|
'(?[\w?/]+)', |
109
|
|
|
|
|
|
|
'(?(?:[\w+]+))', |
110
|
|
|
|
|
|
|
'(? |
111
|
|
|
|
|
|
|
'%{DATA:command}', |
112
|
|
|
|
|
|
|
), |
113
|
|
|
|
|
|
|
}, |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub main { |
118
|
80
|
|
|
80
|
0
|
547350
|
my $class = shift; |
119
|
|
|
|
|
|
|
|
120
|
80
|
|
|
|
|
141
|
my %opt; |
121
|
80
|
100
|
|
|
|
268
|
if ( ref $_[-1] eq 'HASH' ) { |
122
|
5
|
|
|
|
|
6
|
%opt = %{ pop @_ }; |
|
5
|
|
|
|
|
17
|
|
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
80
|
|
|
|
|
178
|
my @args = @_; |
126
|
80
|
|
|
|
|
321
|
GetOptionsFromArray( \@args, \%opt, |
127
|
|
|
|
|
|
|
'pattern', |
128
|
|
|
|
|
|
|
'loose', |
129
|
|
|
|
|
|
|
); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# Manage patterns |
132
|
80
|
100
|
|
|
|
19089
|
if ( $opt{pattern} ) { |
133
|
11
|
|
|
|
|
25
|
my ( $pattern_name, $pattern ) = @args; |
134
|
|
|
|
|
|
|
|
135
|
11
|
100
|
|
|
|
29
|
if ( $pattern ) { |
136
|
|
|
|
|
|
|
# Edit a pattern |
137
|
7
|
|
|
|
|
19
|
config_pattern( $pattern_name, $pattern ); |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
else { |
140
|
4
|
|
|
|
|
10
|
my $patterns = $class->_all_patterns; |
141
|
|
|
|
|
|
|
|
142
|
4
|
100
|
|
|
|
191
|
if ( $pattern_name ) { |
143
|
|
|
|
|
|
|
# Show a single pattern |
144
|
3
|
|
|
|
|
4
|
my $pattern = $patterns; |
145
|
3
|
|
|
|
|
9
|
my @parts = split /[.]/, $pattern_name; |
146
|
3
|
|
|
|
|
5
|
for my $part ( @parts ) { |
147
|
4
|
|
50
|
|
|
16
|
$pattern = $pattern->{ $part } ||= {}; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
3
|
100
|
|
|
|
7
|
if ( !ref $pattern ) { |
151
|
2
|
|
|
|
|
43
|
say $pattern; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
else { |
154
|
1
|
|
|
|
|
4
|
my $out_fmt = load_module( format => 'default' )->new; |
155
|
1
|
|
|
|
|
4
|
say $out_fmt->write( $pattern ); |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
else { |
159
|
|
|
|
|
|
|
# Show all patterns we know about |
160
|
1
|
|
|
|
|
3
|
my $out_fmt = load_module( format => 'default' )->new; |
161
|
1
|
|
|
|
|
3
|
say $out_fmt->write( $patterns ); |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
11
|
|
|
|
|
57
|
return 0; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# Grok incoming lines |
169
|
69
|
|
|
|
|
171
|
my ( $pattern, @files ) = @args; |
170
|
69
|
100
|
|
|
|
159
|
die "Must give a pattern\n" unless $pattern; |
171
|
|
|
|
|
|
|
|
172
|
68
|
|
|
|
|
197
|
my $re = $class->parse_pattern( $pattern ); |
173
|
68
|
100
|
|
|
|
172
|
if ( !$opt{loose} ) { |
174
|
66
|
|
|
|
|
6752
|
$re = qr{^$re$}; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
68
|
|
|
|
|
418
|
my $out_formatter = load_module( format => 'default' )->new; |
178
|
68
|
100
|
|
|
|
192
|
push @files, "-" unless @files; |
179
|
68
|
|
|
|
|
146
|
for my $file ( @files ) { |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# We're doing a similar behavior to <>, but manually for easier testing. |
182
|
68
|
|
|
|
|
102
|
my $fh; |
183
|
68
|
100
|
|
|
|
199
|
if ( $file eq '-' ) { |
184
|
|
|
|
|
|
|
# Use the existing STDIN so tests can fake it |
185
|
34
|
|
|
|
|
70
|
$fh = \*STDIN; |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
else { |
188
|
34
|
50
|
|
|
|
353
|
unless ( open $fh, '<', $file ) { |
189
|
0
|
|
|
|
|
0
|
warn "Could not open file '$file' for reading: $!\n"; |
190
|
0
|
|
|
|
|
0
|
next; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
68
|
|
|
|
|
2007
|
while ( my $line = <$fh> ) { |
195
|
|
|
|
|
|
|
#; say STDERR "$line =~ $re"; |
196
|
236
|
100
|
|
|
|
3430
|
if ( $line =~ $re ) { |
197
|
196
|
|
|
8
|
|
3913
|
print $out_formatter->write( { %+ } ); |
|
8
|
|
|
|
|
2005
|
|
|
8
|
|
|
|
|
2705
|
|
|
8
|
|
|
|
|
6008
|
|
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub _all_patterns { |
204
|
326
|
|
|
326
|
|
503
|
my ( $class ) = @_; |
205
|
326
|
|
|
|
|
620
|
return merge( \%PATTERNS, config() ); |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub _get_pattern { |
209
|
322
|
|
|
322
|
|
1014
|
my ( $class, $pattern_name, $field_name ) = @_; |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
#; say STDERR "_get_pattern( $pattern_name, $field_name )"; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
# Handle nested patterns |
214
|
322
|
|
|
|
|
834
|
my @parts = split /[.]/, $pattern_name; |
215
|
322
|
|
|
|
|
607
|
my $pattern = $class->_all_patterns->{ shift @parts }; |
216
|
322
|
|
|
|
|
6876
|
for my $part ( @parts ) { |
217
|
120
|
50
|
|
|
|
338
|
if ( !$pattern->{ $part } ) { |
218
|
|
|
|
|
|
|
# warn "Could not find pattern $pattern_name for field $field_name\n"; |
219
|
0
|
0
|
|
|
|
0
|
if ( $field_name ) { |
220
|
0
|
|
|
|
|
0
|
return "%{$pattern_name:$field_name}"; |
221
|
|
|
|
|
|
|
} |
222
|
0
|
|
|
|
|
0
|
return "%{$pattern_name}"; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
120
|
|
|
|
|
263
|
$pattern = $pattern->{ $part }; |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# Handle the "default" pattern for a pattern group |
229
|
322
|
100
|
|
|
|
710
|
if ( ref $pattern eq 'HASH' ) { |
230
|
4
|
|
33
|
|
|
21
|
$pattern = $pattern->{ $parts[-1] || $pattern_name }; |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
322
|
100
|
|
|
|
551
|
if ( $field_name ) { |
234
|
284
|
|
|
|
|
817
|
return "(?<$field_name>" . $class->parse_pattern( $pattern ) . ")"; |
235
|
|
|
|
|
|
|
} |
236
|
38
|
|
|
|
|
94
|
return "(?:" . $class->parse_pattern( $pattern ) . ")"; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
sub parse_pattern { |
240
|
390
|
|
|
390
|
0
|
681
|
my ( $class, $pattern ) = @_; |
241
|
390
|
|
|
|
|
1433
|
$pattern =~ s/\%\{([^:}]+)(?::([^:}]+))?\}/$class->_get_pattern( $1, $2 )/ge; |
|
322
|
|
|
|
|
753
|
|
242
|
|
|
|
|
|
|
#; say STDERR 'PATTERN: ' . $pattern; |
243
|
390
|
|
|
|
|
2398
|
return $pattern; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
sub config { |
247
|
333
|
|
|
333
|
0
|
1081
|
my $conf_file = path( File::HomeDir->my_home, '.yertl', 'ygrok.yml' ); |
248
|
333
|
|
|
|
|
16543
|
my $config = {}; |
249
|
333
|
100
|
|
|
|
800
|
if ( $conf_file->exists ) { |
250
|
31
|
|
|
|
|
302
|
my $yaml = load_module( format => 'yaml' )->new( input => $conf_file->openr ); |
251
|
31
|
|
|
|
|
87
|
( $config ) = $yaml->read; |
252
|
|
|
|
|
|
|
} |
253
|
333
|
|
|
|
|
2508
|
return $config; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub config_pattern { |
257
|
7
|
|
|
7
|
0
|
15
|
my ( $pattern_name, $pattern ) = @_; |
258
|
7
|
|
|
|
|
16
|
my $all_config = config(); |
259
|
7
|
|
|
|
|
12
|
my $pattern_category = $all_config; |
260
|
7
|
|
|
|
|
21
|
my @parts = split /[.]/, $pattern_name; |
261
|
7
|
|
|
|
|
24
|
for my $part ( @parts[0..$#parts-1] ) { |
262
|
5
|
|
100
|
|
|
25
|
$pattern_category = $pattern_category->{ $part } ||= {}; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
7
|
50
|
|
|
|
19
|
if ( $pattern ) { |
266
|
7
|
|
|
|
|
28
|
my $conf_file = path( File::HomeDir->my_home, '.yertl', 'ygrok.yml' ); |
267
|
7
|
100
|
|
|
|
365
|
if ( !$conf_file->exists ) { |
268
|
4
|
|
|
|
|
28
|
$conf_file->touchpath; |
269
|
|
|
|
|
|
|
} |
270
|
7
|
|
|
|
|
1355
|
$pattern_category->{ $parts[-1] } = $pattern; |
271
|
7
|
|
|
|
|
23
|
my $yaml = load_module( format => 'yaml' )->new; |
272
|
7
|
|
|
|
|
20
|
$conf_file->spew( $yaml->write( $all_config ) ); |
273
|
7
|
|
|
|
|
2456
|
return; |
274
|
|
|
|
|
|
|
} |
275
|
0
|
|
0
|
|
|
0
|
return $pattern_category->{ $parts[-1] } || ''; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
1; |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
__END__ |