line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Tubes::Plugin::Parser; |
2
|
14
|
|
|
14
|
|
1442
|
use strict; |
|
14
|
|
|
|
|
28
|
|
|
14
|
|
|
|
|
460
|
|
3
|
14
|
|
|
14
|
|
68
|
use warnings; |
|
14
|
|
|
|
|
30
|
|
|
14
|
|
|
|
|
430
|
|
4
|
14
|
|
|
14
|
|
71
|
use English qw< -no_match_vars >; |
|
14
|
|
|
|
|
32
|
|
|
14
|
|
|
|
|
102
|
|
5
|
14
|
|
|
14
|
|
5655
|
use Data::Dumper; |
|
14
|
|
|
|
|
62
|
|
|
14
|
|
|
|
|
1153
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.740'; |
7
|
|
|
|
|
|
|
|
8
|
14
|
|
|
14
|
|
110
|
use Log::Log4perl::Tiny qw< :easy :dead_if_first >; |
|
14
|
|
|
|
|
29
|
|
|
14
|
|
|
|
|
175
|
|
9
|
|
|
|
|
|
|
|
10
|
14
|
|
|
|
|
1318
|
use Data::Tubes::Util qw< |
11
|
|
|
|
|
|
|
assert_all_different |
12
|
|
|
|
|
|
|
generalized_hashy |
13
|
|
|
|
|
|
|
metadata |
14
|
|
|
|
|
|
|
normalize_args |
15
|
|
|
|
|
|
|
shorter_sub_names |
16
|
|
|
|
|
|
|
test_all_equal |
17
|
|
|
|
|
|
|
trim |
18
|
|
|
|
|
|
|
unzip |
19
|
14
|
|
|
14
|
|
5620
|
>; |
|
14
|
|
|
|
|
39
|
|
20
|
14
|
|
|
14
|
|
4201
|
use Data::Tubes::Plugin::Util qw< identify >; |
|
14
|
|
|
|
|
47
|
|
|
14
|
|
|
|
|
50485
|
|
21
|
|
|
|
|
|
|
my %global_defaults = ( |
22
|
|
|
|
|
|
|
input => 'raw', |
23
|
|
|
|
|
|
|
output => 'structured', |
24
|
|
|
|
|
|
|
); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub parse_by_format { |
27
|
24
|
|
|
24
|
1
|
20433
|
my %args = normalize_args(@_, |
28
|
|
|
|
|
|
|
[{%global_defaults, name => 'parse by format'}, 'format']); |
29
|
24
|
|
|
|
|
126
|
identify(\%args); |
30
|
|
|
|
|
|
|
|
31
|
24
|
|
|
|
|
57
|
my $format = $args{format}; |
32
|
24
|
50
|
|
|
|
64
|
LOGDIE "parser of type 'format' needs a definition" |
33
|
|
|
|
|
|
|
unless defined $format; |
34
|
|
|
|
|
|
|
|
35
|
24
|
|
|
|
|
159
|
my @items = split m{(\W+)}, $format; |
36
|
24
|
50
|
|
|
|
70
|
return parse_single(key => $items[0]) if @items == 1; |
37
|
|
|
|
|
|
|
|
38
|
24
|
|
|
|
|
103
|
my ($keys, $separators) = unzip(\@items); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# all keys MUST be different, otherwise some fields are just trumping |
41
|
|
|
|
|
|
|
# on each other |
42
|
24
|
50
|
|
|
|
45
|
eval { assert_all_different($keys); } |
|
24
|
|
|
|
|
76
|
|
43
|
|
|
|
|
|
|
or LOGDIE "'format' parser [$format] " |
44
|
|
|
|
|
|
|
. "has duplicate key $EVAL_ERROR->{message}"; |
45
|
|
|
|
|
|
|
|
46
|
24
|
|
100
|
|
|
105
|
my $value = $args{value} //= ['whatever']; |
47
|
24
|
100
|
|
|
|
60
|
$value = [$value] unless ref $value; |
48
|
24
|
|
100
|
|
|
129
|
my $multiple = |
49
|
|
|
|
|
|
|
(ref($value) ne 'ARRAY') |
50
|
|
|
|
|
|
|
|| (scalar(@$value) > 1) |
51
|
|
|
|
|
|
|
|| ($value->[0] ne 'whatever'); |
52
|
|
|
|
|
|
|
|
53
|
24
|
100
|
100
|
|
|
121
|
return parse_by_separators( |
54
|
|
|
|
|
|
|
%args, |
55
|
|
|
|
|
|
|
keys => $keys, |
56
|
|
|
|
|
|
|
separators => $separators |
57
|
|
|
|
|
|
|
) if $multiple || !test_all_equal(@$separators); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# a simple split will do if all separators are the same |
60
|
13
|
|
|
|
|
63
|
return parse_by_split( |
61
|
|
|
|
|
|
|
%args, |
62
|
|
|
|
|
|
|
keys => $keys, |
63
|
|
|
|
|
|
|
separator => $separators->[0] |
64
|
|
|
|
|
|
|
); |
65
|
|
|
|
|
|
|
} ## end sub parse_by_format |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub parse_by_regex { |
68
|
4
|
|
|
4
|
1
|
7280
|
my %args = |
69
|
|
|
|
|
|
|
normalize_args(@_, |
70
|
|
|
|
|
|
|
[{%global_defaults, name => 'parse by regex'}, 'regex']); |
71
|
4
|
|
|
|
|
32
|
identify(\%args); |
72
|
|
|
|
|
|
|
|
73
|
4
|
|
|
|
|
14
|
my $name = $args{name}; |
74
|
4
|
|
|
|
|
7
|
my $regex = $args{regex}; |
75
|
4
|
50
|
|
|
|
15
|
LOGDIE "parse_by_regex needs a regex" |
76
|
|
|
|
|
|
|
unless defined $regex; |
77
|
|
|
|
|
|
|
|
78
|
4
|
|
|
|
|
28
|
$regex = qr{$regex}; |
79
|
4
|
|
|
|
|
9
|
my $input = $args{input}; |
80
|
4
|
|
|
|
|
12
|
my $output = $args{output}; |
81
|
|
|
|
|
|
|
return sub { |
82
|
4
|
|
|
4
|
|
25
|
my $record = shift; |
83
|
4
|
50
|
|
|
|
38
|
$record->{$input} =~ m{$regex} |
84
|
|
|
|
|
|
|
or die { |
85
|
|
|
|
|
|
|
message => "'$name': invalid record, regex is $regex", |
86
|
|
|
|
|
|
|
input => $input, |
87
|
|
|
|
|
|
|
record => $record, |
88
|
|
|
|
|
|
|
}; |
89
|
4
|
|
|
|
|
89
|
my $retval = {%+}; |
90
|
4
|
|
|
|
|
20
|
$record->{$output} = $retval; |
91
|
4
|
|
|
|
|
12
|
return $record; |
92
|
4
|
|
|
|
|
34
|
}; |
93
|
|
|
|
|
|
|
} ## end sub parse_by_regex |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub _resolve_separator { |
96
|
63
|
|
|
63
|
|
135
|
my ($separator, $args) = @_; |
97
|
63
|
50
|
|
|
|
137
|
return unless defined $separator; |
98
|
63
|
50
|
|
|
|
172
|
$separator = $separator->($args) if ref($separator) eq 'CODE'; |
99
|
63
|
|
|
|
|
95
|
my $ref = ref $separator; |
100
|
63
|
100
|
|
|
|
141
|
return $separator if $ref eq 'Regexp'; |
101
|
47
|
50
|
|
|
|
90
|
LOGCROAK "$args->{name}: unknown separator type $ref" if $ref; |
102
|
47
|
|
|
|
|
86
|
$separator = quotemeta $separator; |
103
|
47
|
|
|
|
|
515
|
return qr{(?-i:$separator)}; |
104
|
|
|
|
|
|
|
} ## end sub _resolve_separator |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub _resolve_value { |
107
|
25
|
|
|
25
|
|
53
|
my ($value, $args) = @_; |
108
|
25
|
|
100
|
|
|
81
|
$value //= 'whatever'; |
109
|
25
|
50
|
|
|
|
56
|
$value = $value->($args) if ref($value) eq 'CODE'; |
110
|
25
|
|
|
|
|
45
|
my $ref = ref $value; |
111
|
25
|
100
|
66
|
|
|
94
|
($value, $ref) = ([$value], 'ARRAY') if (!$ref) || ($ref eq 'Regexp'); |
112
|
25
|
50
|
|
|
|
67
|
LOGCROAK "$args->{name}: unknown value type $ref" if $ref ne 'ARRAY'; |
113
|
|
|
|
|
|
|
|
114
|
25
|
|
|
|
|
39
|
my (%flag_for, @regexps); |
115
|
25
|
|
|
|
|
50
|
for my $part (@$value) { |
116
|
31
|
|
|
|
|
49
|
my $ref = ref $part; |
117
|
31
|
50
|
|
|
|
177
|
if ($ref eq 'Regexp') { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
118
|
0
|
|
|
|
|
0
|
push @regexps, $part; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
elsif ( |
121
|
|
|
|
|
|
|
$part =~ m{\A(?: |
122
|
|
|
|
|
|
|
(?:single|double)[-_]quoted |
123
|
|
|
|
|
|
|
| escaped |
124
|
|
|
|
|
|
|
| whatever |
125
|
|
|
|
|
|
|
)\z}mxs |
126
|
|
|
|
|
|
|
) |
127
|
|
|
|
|
|
|
{ |
128
|
30
|
|
|
|
|
74
|
$part =~ s{-}{_}mxs; |
129
|
30
|
|
|
|
|
81
|
$flag_for{$part} = 1; |
130
|
|
|
|
|
|
|
} ## end elsif ($part =~ m{\A(?: )}) |
131
|
|
|
|
|
|
|
elsif ($part eq 'quoted') { |
132
|
0
|
|
|
|
|
0
|
$flag_for{single_quoted} = 1; |
133
|
0
|
|
|
|
|
0
|
$flag_for{double_quoted} = 1; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
elsif ($part eq 'specials') { |
136
|
1
|
|
|
|
|
3
|
$flag_for{single_quoted} = 1; |
137
|
1
|
|
|
|
|
2
|
$flag_for{double_quoted} = 1; |
138
|
1
|
|
|
|
|
3
|
$flag_for{escaped} = 1; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
elsif ($ref) { |
141
|
0
|
|
|
|
|
0
|
LOGCROAK "$args->{name}: unknown part of type $ref"; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
else { |
144
|
0
|
|
|
|
|
0
|
LOGCROAK "$args->{name}: unknown part $part"; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
} ## end for my $part (@$value) |
147
|
|
|
|
|
|
|
|
148
|
25
|
|
|
|
|
69
|
my @escape; |
149
|
25
|
100
|
|
|
|
59
|
if ($flag_for{single_quoted}) { |
150
|
7
|
|
|
|
|
13
|
push @escape, q{'}; |
151
|
7
|
|
|
|
|
24
|
unshift @regexps, q{(?mxs: '[^']*' )}; |
152
|
|
|
|
|
|
|
} |
153
|
25
|
100
|
|
|
|
53
|
if ($flag_for{double_quoted}) { |
154
|
3
|
|
|
|
|
6
|
push @escape, q{"}; |
155
|
3
|
|
|
|
|
5
|
unshift @regexps, q{(?mxs: "(?: [^\\"] | \\\\.)*" )}; |
156
|
|
|
|
|
|
|
} |
157
|
25
|
100
|
|
|
|
48
|
if ($flag_for{escaped}) { |
158
|
7
|
|
|
|
|
12
|
push @escape, '\\'; |
159
|
7
|
|
|
|
|
19
|
my $escape = quotemeta join '', @escape; |
160
|
7
|
|
|
|
|
17
|
push @regexps, qq{(?mxs-i: (?: [^$escape] | \\\\.)*?)}; |
161
|
|
|
|
|
|
|
} |
162
|
25
|
100
|
|
|
|
59
|
if ($flag_for{whatever}) { |
163
|
16
|
|
|
|
|
32
|
push @regexps, qq{(?mxs:.*?)}; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
25
|
|
|
|
|
75
|
my $regex = '(' . join('|', @regexps) . ')'; |
167
|
25
|
|
|
|
|
128
|
return ($regex, \%flag_for); |
168
|
|
|
|
|
|
|
} ## end sub _resolve_value |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub _resolve_decode { |
171
|
25
|
|
|
25
|
|
44
|
my $args = shift; |
172
|
25
|
|
|
|
|
43
|
my $name = $args->{name}; |
173
|
25
|
|
|
|
|
35
|
my $escape = $args->{escaped}; |
174
|
25
|
|
|
|
|
39
|
my $squote = $args->{single_quoted}; |
175
|
25
|
|
|
|
|
36
|
my $dquote = $args->{double_quoted}; |
176
|
25
|
|
|
|
|
39
|
my $vdecode = $args->{decode}; |
177
|
25
|
|
|
|
|
36
|
my $decode = $args->{decode_values}; |
178
|
25
|
50
|
100
|
|
|
183
|
if ($vdecode) { |
|
|
100
|
100
|
|
|
|
|
179
|
|
|
|
|
|
|
$decode ||= sub { |
180
|
0
|
|
|
0
|
|
0
|
my $values = shift; |
181
|
0
|
|
|
|
|
0
|
for my $value (@$values) { |
182
|
0
|
|
|
|
|
0
|
$value = $vdecode->($value); |
183
|
|
|
|
|
|
|
} |
184
|
0
|
|
|
|
|
0
|
return $values; |
185
|
|
|
|
|
|
|
} |
186
|
0
|
|
0
|
|
|
0
|
} ## end if ($vdecode) |
187
|
|
|
|
|
|
|
elsif ($escape || $squote || $dquote) { |
188
|
|
|
|
|
|
|
$decode ||= sub { |
189
|
12
|
|
|
12
|
|
17
|
my $values = shift; |
190
|
12
|
|
|
|
|
34
|
for my $i (0 .. $#$values) { |
191
|
41
|
|
|
|
|
74
|
my $value = $values->[$i]; |
192
|
41
|
50
|
|
|
|
80
|
my $len = length $value or next; |
193
|
41
|
|
|
|
|
66
|
my $first = substr $value, 0, 1; |
194
|
41
|
100
|
100
|
|
|
159
|
if ($dquote && $first eq q{"}) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
195
|
5
|
50
|
33
|
|
|
17
|
die {message => "'$name': invalid record, " |
196
|
|
|
|
|
|
|
. "unterminated double quote at field $i (0-based)" |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
unless $len > 1 && substr($value, -1, 1) eq q{"}; |
199
|
5
|
|
|
|
|
12
|
$values->[$i] = substr $value, 1, $len - 2; # unquote |
200
|
5
|
|
|
|
|
19
|
$values->[$i] =~ s{\\(.)}{$1}gmxs; # unescape |
201
|
|
|
|
|
|
|
} ## end if ($dquote && $first ...) |
202
|
|
|
|
|
|
|
elsif ($squote && $first eq q{'}) { |
203
|
11
|
50
|
33
|
|
|
46
|
die {message => "'$name': invalid record, " |
204
|
|
|
|
|
|
|
. "unterminated single quote at field $i (0-based)", |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
unless $len > 1 && substr($value, -1, 1) eq q{'}; |
207
|
11
|
|
|
|
|
31
|
$values->[$i] = substr $value, 1, $len - 2; # unquote |
208
|
|
|
|
|
|
|
} ## end elsif ($squote && $first ...) |
209
|
|
|
|
|
|
|
elsif ($escape) { |
210
|
21
|
|
|
|
|
69
|
$values->[$i] =~ s{\\(.)}{$1}gmxs; # unescape |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
} ## end for my $i (0 .. $#$values) |
213
|
12
|
|
|
|
|
44
|
return $values; |
214
|
|
|
|
|
|
|
} |
215
|
11
|
|
50
|
|
|
89
|
} ## end elsif ($escape || $squote...) |
216
|
25
|
|
|
|
|
57
|
return $decode; |
217
|
|
|
|
|
|
|
} ## end sub _resolve_decode |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub parse_by_separators { |
220
|
16
|
|
|
16
|
1
|
2996
|
my %args = normalize_args(@_, |
221
|
|
|
|
|
|
|
[{%global_defaults, name => 'parse by separators'}, 'separators']); |
222
|
16
|
|
|
|
|
89
|
identify(\%args); |
223
|
16
|
|
|
|
|
42
|
my $name = $args{name}; |
224
|
|
|
|
|
|
|
|
225
|
16
|
|
|
|
|
30
|
my $separators = $args{separators}; |
226
|
16
|
50
|
|
|
|
43
|
LOGDIE "parse_by_separators needs separators" |
227
|
|
|
|
|
|
|
unless defined $separators; |
228
|
16
|
|
|
|
|
34
|
$separators = [map { _resolve_separator($_, \%args) } @$separators]; |
|
41
|
|
|
|
|
103
|
|
229
|
|
|
|
|
|
|
|
230
|
16
|
|
|
|
|
37
|
my $keys = $args{keys}; |
231
|
16
|
|
|
|
|
29
|
my ($delta, $n_keys); |
232
|
16
|
100
|
|
|
|
39
|
if (defined $keys) { |
233
|
12
|
|
|
|
|
21
|
$n_keys = scalar @$keys; |
234
|
12
|
|
|
|
|
26
|
$delta = $n_keys - scalar(@$separators); |
235
|
12
|
50
|
33
|
|
|
56
|
LOGDIE "parse_by_separators 0 <= #keys - #separators <= 1" |
236
|
|
|
|
|
|
|
if ($delta < 0) || ($delta > 1); |
237
|
|
|
|
|
|
|
} ## end if (defined $keys) |
238
|
|
|
|
|
|
|
else { |
239
|
4
|
|
|
|
|
11
|
$keys = [0 .. scalar(@$separators)]; |
240
|
4
|
|
|
|
|
7
|
$n_keys = 0; # don't bother |
241
|
4
|
|
|
|
|
6
|
$delta = 1; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
16
|
|
|
|
|
44
|
my ($value_regex, $flag_for) = _resolve_value($args{value}, \%args); |
245
|
|
|
|
|
|
|
|
246
|
16
|
|
|
|
|
32
|
my @items; |
247
|
16
|
|
|
|
|
44
|
for my $i (0 .. $#$keys) { |
248
|
57
|
|
|
|
|
89
|
push @items, $value_regex; |
249
|
57
|
100
|
|
|
|
133
|
push @items, $separators->[$i] if $i <= $#$separators; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# if not a separator, the last item becomes a catchall |
253
|
16
|
50
|
|
|
|
37
|
$items[-1] = '(.*)' if $delta > 0; |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# ready to generate the regexp. We bind the end to \z anyway because |
256
|
|
|
|
|
|
|
# the last element might be a separator |
257
|
16
|
|
|
|
|
45
|
my $format = join '', '(?:\\A', @items, '\\z)'; |
258
|
16
|
|
|
|
|
428
|
my $regex = qr{$format}; |
259
|
16
|
|
|
|
|
95
|
DEBUG "$name: regex will be: $regex"; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# this sub will use the regexp above, do checking and return captured |
262
|
|
|
|
|
|
|
# values in a hash with @keys |
263
|
16
|
|
|
|
|
253
|
my $input = $args{input}; |
264
|
16
|
|
|
|
|
29
|
my $output = $args{output}; |
265
|
16
|
|
|
|
|
24
|
my $trim = $args{trim}; |
266
|
16
|
|
|
|
|
123
|
my $decode = _resolve_decode({%args, %$flag_for}); |
267
|
|
|
|
|
|
|
return sub { |
268
|
16
|
|
|
16
|
|
114
|
my $record = shift; |
269
|
16
|
50
|
|
|
|
234
|
my @values = $record->{$input} =~ m{$regex} |
270
|
|
|
|
|
|
|
or die { |
271
|
|
|
|
|
|
|
message => 'invalid record', |
272
|
|
|
|
|
|
|
record => $record, |
273
|
|
|
|
|
|
|
regex => $regex |
274
|
|
|
|
|
|
|
}; |
275
|
16
|
100
|
|
|
|
57
|
trim(@values) if $trim; |
276
|
16
|
100
|
|
|
|
36
|
if ($decode) { |
277
|
7
|
50
|
|
|
|
10
|
eval { @values = @{$decode->(\@values)}; 1 } or do { |
|
7
|
|
|
|
|
13
|
|
|
7
|
|
|
|
|
17
|
|
|
7
|
|
|
|
|
19
|
|
278
|
0
|
|
|
|
|
0
|
my $e = $@; |
279
|
0
|
0
|
|
|
|
0
|
$e = {message => $e} unless ref $e; |
280
|
0
|
0
|
|
|
|
0
|
$e = {%$e, record => $record} if ref($e) eq 'HASH'; |
281
|
0
|
|
|
|
|
0
|
die $e; |
282
|
|
|
|
|
|
|
}; |
283
|
|
|
|
|
|
|
} ## end if ($decode) |
284
|
|
|
|
|
|
|
|
285
|
16
|
100
|
|
|
|
42
|
if ($n_keys) { |
286
|
12
|
|
|
|
|
19
|
my $n_values = scalar @values; |
287
|
12
|
50
|
|
|
|
27
|
die { |
288
|
|
|
|
|
|
|
message => "'$name': invalid record, expected $n_keys, " |
289
|
|
|
|
|
|
|
. "got $n_values only", |
290
|
|
|
|
|
|
|
values => \@values, |
291
|
|
|
|
|
|
|
record => $record |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
if $n_values < $n_keys; |
294
|
|
|
|
|
|
|
|
295
|
12
|
|
|
|
|
28
|
$record->{$output} = \my %retval; |
296
|
12
|
|
|
|
|
56
|
@retval{@$keys} = @values; |
297
|
|
|
|
|
|
|
} ## end if ($n_keys) |
298
|
|
|
|
|
|
|
else { |
299
|
4
|
|
|
|
|
9
|
$record->{$output} = \@values; |
300
|
|
|
|
|
|
|
} |
301
|
16
|
|
|
|
|
48
|
return $record; |
302
|
16
|
|
|
|
|
254
|
}; |
303
|
|
|
|
|
|
|
} ## end sub parse_by_separators |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub parse_by_split { |
306
|
13
|
|
|
13
|
1
|
74
|
my %args = |
307
|
|
|
|
|
|
|
normalize_args(@_, |
308
|
|
|
|
|
|
|
[{%global_defaults, name => 'parse by split'}, 'separator']); |
309
|
13
|
|
|
|
|
67
|
identify(\%args); |
310
|
|
|
|
|
|
|
|
311
|
13
|
|
|
|
|
50
|
my $separator = _resolve_separator($args{separator}, \%args); |
312
|
|
|
|
|
|
|
|
313
|
13
|
|
|
|
|
75
|
my $name = $args{name}; |
314
|
13
|
|
|
|
|
25
|
my $keys = $args{keys}; |
315
|
13
|
50
|
|
|
|
38
|
my $n_keys = defined($keys) ? scalar(@$keys) : 0; |
316
|
13
|
|
|
|
|
28
|
my $input = $args{input}; |
317
|
13
|
|
|
|
|
24
|
my $output = $args{output}; |
318
|
13
|
|
100
|
|
|
54
|
my $allow_missing = $args{allow_missing} || 0; |
319
|
13
|
|
|
|
|
21
|
my $trim = $args{trim}; |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
return sub { |
322
|
16
|
|
|
16
|
|
77
|
my $record = shift; |
323
|
|
|
|
|
|
|
|
324
|
16
|
|
|
|
|
78
|
my @values = split(/$separator/, $record->{$input}, $n_keys); |
325
|
16
|
100
|
|
|
|
44
|
trim(@values) if $trim; |
326
|
|
|
|
|
|
|
|
327
|
16
|
|
|
|
|
26
|
my $n_values = @values; |
328
|
16
|
100
|
|
|
|
130
|
die { |
329
|
|
|
|
|
|
|
message => "'$name': invalid record, expected $n_keys items, " |
330
|
|
|
|
|
|
|
. "got $n_values", |
331
|
|
|
|
|
|
|
input => $input, |
332
|
|
|
|
|
|
|
record => $record, |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
if $n_values + $allow_missing < $n_keys; |
335
|
|
|
|
|
|
|
|
336
|
11
|
|
|
|
|
27
|
$record->{$output} = \my %retval; |
337
|
11
|
|
|
|
|
71
|
@retval{@$keys} = @values; |
338
|
11
|
|
|
|
|
43
|
return $record; |
339
|
|
|
|
|
|
|
} |
340
|
13
|
50
|
|
|
|
160
|
if $n_keys; |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
return sub { |
343
|
0
|
|
|
0
|
|
0
|
my $record = shift; |
344
|
0
|
|
|
|
|
0
|
my @retval = split /$separator/, $record->{$input}; |
345
|
0
|
0
|
|
|
|
0
|
trim(@retval) if $trim; |
346
|
0
|
|
|
|
|
0
|
$record->{$output} = \@retval; |
347
|
0
|
|
|
|
|
0
|
return $record; |
348
|
0
|
|
|
|
|
0
|
}; |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
} ## end sub parse_by_split |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub parse_by_value_separator { |
353
|
9
|
|
|
9
|
1
|
6273
|
my %args = normalize_args( |
354
|
|
|
|
|
|
|
@_, |
355
|
|
|
|
|
|
|
[ |
356
|
|
|
|
|
|
|
{%global_defaults, name => 'parse by value and separator'}, |
357
|
|
|
|
|
|
|
'separator' |
358
|
|
|
|
|
|
|
] |
359
|
|
|
|
|
|
|
); |
360
|
9
|
|
|
|
|
49
|
identify(\%args); |
361
|
9
|
|
|
|
|
20
|
my $name = $args{name}; |
362
|
|
|
|
|
|
|
|
363
|
9
|
|
|
|
|
29
|
my $separator = _resolve_separator($args{separator}, \%args); |
364
|
9
|
50
|
|
|
|
25
|
LOGCROAK "$name: argument separator is mandatory" |
365
|
|
|
|
|
|
|
unless defined $separator; |
366
|
|
|
|
|
|
|
|
367
|
9
|
|
|
|
|
25
|
my ($value, $flag_for) = _resolve_value($args{value}, \%args); |
368
|
9
|
|
|
|
|
63
|
my $decode = _resolve_decode({%args, %$flag_for}); |
369
|
|
|
|
|
|
|
|
370
|
9
|
|
|
|
|
26
|
my $keys = $args{keys}; |
371
|
9
|
100
|
|
|
|
19
|
my $n_keys = defined($keys) ? scalar(@$keys) : 0; |
372
|
9
|
|
|
|
|
13
|
my $input = $args{input}; |
373
|
9
|
|
|
|
|
14
|
my $output = $args{output}; |
374
|
9
|
|
50
|
|
|
25
|
my $allow_missing = $args{allow_missing} || 0; |
375
|
9
|
|
50
|
|
|
25
|
my $allow_surplus = $args{allow_surplus} || 0; |
376
|
9
|
|
|
|
|
12
|
my $trim = $args{trim}; |
377
|
9
|
|
|
|
|
95
|
my $go_global = $^V lt v5.18.0; |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
return sub { |
380
|
10
|
|
|
10
|
|
756
|
my $record = shift; |
381
|
|
|
|
|
|
|
|
382
|
10
|
|
|
|
|
13
|
my @values; |
383
|
10
|
50
|
|
|
|
21
|
if ($go_global) { |
384
|
0
|
|
|
|
|
0
|
local our @global_values = (); |
385
|
0
|
|
|
|
|
0
|
my $collector = qr/(?{push @global_values, $^N})/; |
|
0
|
|
|
|
|
0
|
|
386
|
0
|
0
|
|
|
|
0
|
$record->{$input} =~ m/ |
387
|
|
|
|
|
|
|
\A (?: $value $separator $collector )* |
388
|
|
|
|
|
|
|
$value \z $collector |
389
|
|
|
|
|
|
|
/gmxs |
390
|
|
|
|
|
|
|
or die { |
391
|
|
|
|
|
|
|
message => 'invalid record', |
392
|
|
|
|
|
|
|
separator => $separator, |
393
|
|
|
|
|
|
|
value => $value, |
394
|
|
|
|
|
|
|
record => $record, |
395
|
|
|
|
|
|
|
}; |
396
|
0
|
|
|
|
|
0
|
@values = @global_values; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
else { |
399
|
10
|
50
|
|
|
|
398
|
$record->{$input} =~ m/ |
400
|
30
|
|
|
|
|
165
|
\A (?: $value $separator (?{push @values, $^N}) )* |
401
|
10
|
|
|
|
|
60
|
$value \z (?{push @values, $^N}) |
402
|
|
|
|
|
|
|
/gmxs |
403
|
|
|
|
|
|
|
or die { |
404
|
|
|
|
|
|
|
message => 'invalid record', |
405
|
|
|
|
|
|
|
separator => $separator, |
406
|
|
|
|
|
|
|
value => $value, |
407
|
|
|
|
|
|
|
record => $record, |
408
|
|
|
|
|
|
|
}; |
409
|
|
|
|
|
|
|
} |
410
|
10
|
100
|
|
|
|
50
|
trim(@values) if $trim; |
411
|
10
|
100
|
|
|
|
24
|
if ($decode) { |
412
|
5
|
50
|
|
|
|
10
|
eval { @values = @{$decode->(\@values)}; 1 } or do { |
|
5
|
|
|
|
|
7
|
|
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
12
|
|
413
|
0
|
|
|
|
|
0
|
my $e = $EVAL_ERROR; |
414
|
0
|
0
|
|
|
|
0
|
$e = {message => $e} unless ref $e; |
415
|
0
|
0
|
|
|
|
0
|
$e = {%$e, record => $record} if ref($e) eq 'HASH'; |
416
|
0
|
|
|
|
|
0
|
die $e; |
417
|
|
|
|
|
|
|
}; |
418
|
|
|
|
|
|
|
} ## end if ($decode) |
419
|
|
|
|
|
|
|
|
420
|
10
|
100
|
|
|
|
22
|
if ($n_keys) { |
421
|
6
|
|
|
|
|
10
|
my $n_values = @values; |
422
|
6
|
50
|
33
|
|
|
25
|
die { |
423
|
|
|
|
|
|
|
message => "'$name': invalid record, expected $n_keys items, " |
424
|
|
|
|
|
|
|
. "got $n_values", |
425
|
|
|
|
|
|
|
input => $input, |
426
|
|
|
|
|
|
|
record => $record, |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
if ($n_values + $allow_missing < $n_keys) |
429
|
|
|
|
|
|
|
|| ($n_values - $allow_surplus > $n_keys); |
430
|
6
|
|
|
|
|
16
|
$record->{$output} = \my %retval; |
431
|
6
|
|
|
|
|
29
|
@retval{@$keys} = @values; |
432
|
|
|
|
|
|
|
} ## end if ($n_keys) |
433
|
|
|
|
|
|
|
else { |
434
|
4
|
|
|
|
|
9
|
$record->{$output} = \@values; |
435
|
|
|
|
|
|
|
} |
436
|
10
|
|
|
|
|
31
|
return $record; |
437
|
9
|
|
|
|
|
87
|
}; |
438
|
|
|
|
|
|
|
} ## end sub parse_by_value_separator |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
sub parse_ghashy { |
441
|
3
|
|
|
3
|
1
|
4047
|
my %args = normalize_args(@_, |
442
|
|
|
|
|
|
|
{%global_defaults, default_key => '', name => 'parse ghashy'}); |
443
|
3
|
|
|
|
|
15
|
identify(\%args); |
444
|
|
|
|
|
|
|
|
445
|
3
|
50
|
|
|
|
6
|
my %defaults = %{$args{defaults} || {}}; |
|
3
|
|
|
|
|
18
|
|
446
|
3
|
|
|
|
|
8
|
my $input = $args{input}; |
447
|
3
|
|
|
|
|
5
|
my $output = $args{output}; |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
# pre-compile capture thing from generalized_hashy |
450
|
3
|
|
|
|
|
13
|
$args{capture} = generalized_hashy(%args, text => undef)->{capture}; |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
return sub { |
453
|
3
|
|
|
3
|
|
64
|
my $record = shift; |
454
|
3
|
|
|
|
|
11
|
my $outcome = generalized_hashy(%args, text => $record->{$input}); |
455
|
|
|
|
|
|
|
die { |
456
|
|
|
|
|
|
|
input => $input, |
457
|
|
|
|
|
|
|
message => $outcome->{failure}, |
458
|
|
|
|
|
|
|
outcome => $outcome, |
459
|
|
|
|
|
|
|
record => $record, |
460
|
|
|
|
|
|
|
} |
461
|
3
|
50
|
|
|
|
10
|
unless exists $outcome->{hash}; |
462
|
3
|
|
|
|
|
7
|
$record->{$output} = {%defaults, %{$outcome->{hash}}}; |
|
3
|
|
|
|
|
13
|
|
463
|
3
|
|
|
|
|
15
|
return $record; |
464
|
3
|
|
|
|
|
19
|
}; |
465
|
|
|
|
|
|
|
} ## end sub parse_ghashy |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
sub parse_hashy { |
468
|
4
|
|
|
4
|
1
|
2263
|
my %args = normalize_args( |
469
|
|
|
|
|
|
|
@_, |
470
|
|
|
|
|
|
|
{ |
471
|
|
|
|
|
|
|
%global_defaults, |
472
|
|
|
|
|
|
|
chunks_separator => ' ', |
473
|
|
|
|
|
|
|
default_key => '', |
474
|
|
|
|
|
|
|
key_value_separator => '=', |
475
|
|
|
|
|
|
|
name => 'parse hashy', |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
); |
478
|
4
|
|
|
|
|
22
|
identify(\%args); |
479
|
4
|
50
|
|
|
|
8
|
my %defaults = %{$args{defaults} || {}}; |
|
4
|
|
|
|
|
27
|
|
480
|
4
|
|
|
|
|
11
|
my $input = $args{input}; |
481
|
4
|
|
|
|
|
8
|
my $output = $args{output}; |
482
|
|
|
|
|
|
|
return sub { |
483
|
3
|
|
|
3
|
|
550
|
my $record = shift; |
484
|
3
|
|
|
|
|
22
|
my $parsed = metadata($record->{$input}, %args); |
485
|
3
|
|
|
|
|
17
|
$record->{$output} = {%defaults, %$parsed}; |
486
|
3
|
|
|
|
|
19
|
return $record; |
487
|
4
|
|
|
|
|
26
|
}; |
488
|
|
|
|
|
|
|
} ## end sub parse_hashy |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
sub parse_single { |
491
|
2
|
|
|
2
|
1
|
3154
|
my %args = normalize_args( |
492
|
|
|
|
|
|
|
@_, |
493
|
|
|
|
|
|
|
{ |
494
|
|
|
|
|
|
|
key => 'key', |
495
|
|
|
|
|
|
|
%global_defaults, |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
); |
498
|
2
|
|
|
|
|
16
|
identify(\%args); |
499
|
2
|
|
|
|
|
6
|
my $key = $args{key}; |
500
|
2
|
|
33
|
|
|
17
|
my $has_key = defined($key) && length($key); |
501
|
2
|
|
|
|
|
6
|
my $input = $args{input}; |
502
|
2
|
|
|
|
|
5
|
my $output = $args{output}; |
503
|
|
|
|
|
|
|
return sub { |
504
|
2
|
|
|
2
|
|
12
|
my $record = shift; |
505
|
|
|
|
|
|
|
$record->{$output} = |
506
|
2
|
50
|
|
|
|
12
|
$has_key ? {$key => $record->{$input}} : $record->{$input}; |
507
|
2
|
|
|
|
|
7
|
return $record; |
508
|
|
|
|
|
|
|
} |
509
|
2
|
|
|
|
|
18
|
} ## end sub parse_single |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
shorter_sub_names(__PACKAGE__, 'parse_'); |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
1; |