| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::FixedLengthMultiline; |
|
2
|
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
143474
|
use utf8; |
|
|
4
|
|
|
|
|
42
|
|
|
|
4
|
|
|
|
|
24
|
|
|
4
|
4
|
|
|
4
|
|
137
|
use strict; |
|
|
4
|
|
|
|
|
7
|
|
|
|
4
|
|
|
|
|
124
|
|
|
5
|
4
|
|
|
4
|
|
22
|
use warnings; |
|
|
4
|
|
|
|
|
12
|
|
|
|
4
|
|
|
|
|
113
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
4
|
|
|
4
|
|
21
|
use Carp; |
|
|
4
|
|
|
|
|
6
|
|
|
|
4
|
|
|
|
|
356
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
BEGIN { |
|
10
|
4
|
|
|
4
|
|
83
|
our $VERSION = '0.071'; |
|
11
|
|
|
|
|
|
|
} |
|
12
|
|
|
|
|
|
|
|
|
13
|
4
|
|
|
4
|
|
21
|
use constant FIRST => 1; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
344
|
|
|
14
|
4
|
|
|
4
|
|
29
|
use constant LAST => 2; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
166
|
|
|
15
|
4
|
|
|
4
|
|
22
|
use constant ANY => 3; # FIRST | LAST |
|
|
4
|
|
|
|
|
6
|
|
|
|
4
|
|
|
|
|
15600
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my %continue_styles = ( |
|
18
|
|
|
|
|
|
|
'first' => FIRST, |
|
19
|
|
|
|
|
|
|
'last' => LAST, |
|
20
|
|
|
|
|
|
|
'any' => ANY |
|
21
|
|
|
|
|
|
|
); |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=encoding utf8 |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 NAME |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Text::FixedLengthMultiline - Parse text data formatted in space separated columns optionnaly on multiple lines |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
use Text::FixedLengthMultiline; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#234567890 12345678901234567890 12 |
|
35
|
|
|
|
|
|
|
my $text = <
|
|
36
|
|
|
|
|
|
|
Alice Pretty girl! |
|
37
|
|
|
|
|
|
|
Bob Good old uncle Bob, |
|
38
|
|
|
|
|
|
|
very old. 92 |
|
39
|
|
|
|
|
|
|
Charlie Best known as Waldo 14 |
|
40
|
|
|
|
|
|
|
or Wally. Where's |
|
41
|
|
|
|
|
|
|
he? |
|
42
|
|
|
|
|
|
|
EOT |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $fmt = Text::FixedLengthMultiline->new(format => ['!name' => 10, 1, 'comment~' => 20, 1, 'age' => -2 ]); |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Compute the RegExp that matches the first line |
|
47
|
|
|
|
|
|
|
my $first_line_re = $fmt->get_first_line_re(); |
|
48
|
|
|
|
|
|
|
# Compute the RegExp that matches a continuation line |
|
49
|
|
|
|
|
|
|
my $continue_line_re = $fmt->get_continue_line_re(); |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
my @data; |
|
52
|
|
|
|
|
|
|
my $err; |
|
53
|
|
|
|
|
|
|
while ($text =~ /^([^\n]+)$/gm) { |
|
54
|
|
|
|
|
|
|
my $line = $1; |
|
55
|
|
|
|
|
|
|
push @data, {} if $line =~ $first_line_re; |
|
56
|
|
|
|
|
|
|
if (($err = $fmt->parse_line($line, $data[$#data])) > 0) { |
|
57
|
|
|
|
|
|
|
warn "Parse error at column $err"; |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
A row of data can be splitted on multiple lines of text with cell content |
|
64
|
|
|
|
|
|
|
flowing in the same column space. |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 FORMAT SPECIFICATION |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
The format is given at the contruction time as an array ref. Modifying the |
|
69
|
|
|
|
|
|
|
array content after the construction call is done at your own risks. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
The array contains the ordered sequence of columns. Each colmun can either be: |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=over |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item * |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
a positive integer representing the size of a separating column which is |
|
78
|
|
|
|
|
|
|
expected to always be filled with spaces. |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item * |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
a string that matches this regexp: /^(?#mandatory)!?(?#name)[:alnum:]\w*(?:(?#multi)~(?#cont).?)?$/ |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=over |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=item * |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
C means the column is mandatory |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item * |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
C is the column name. This will be the key for the hash after parsing. |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item * |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
C<~> means the column data can be on multiple lines. |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=back |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=back |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head1 METHODS |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head2 new() |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Arguments: |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=over |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item * |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
C: an array reference following the L. |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item * |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
C |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=back |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Example: |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
my $format = Text::FixedLengthMultiline->new(format => [ 2, col1 => 4, 1, '!col2' => 4 ]); |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=cut |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# TODO add 'continue-style': first/last/any |
|
127
|
|
|
|
|
|
|
sub new |
|
128
|
|
|
|
|
|
|
{ |
|
129
|
41
|
|
|
41
|
1
|
23045
|
my $class = shift; |
|
130
|
41
|
|
|
|
|
133
|
my %params = @_; |
|
131
|
41
|
100
|
66
|
|
|
479
|
(%params && exists $params{'format'}) or croak('['.__PACKAGE__."] Missing format"); |
|
132
|
40
|
100
|
|
|
|
488
|
ref $params{'format'} eq 'ARRAY' or croak('['.__PACKAGE__."] Invalid format: array ref expected"); |
|
133
|
37
|
|
|
|
|
56
|
my $continue_style = ANY; |
|
134
|
37
|
100
|
|
|
|
108
|
if (exists $params{'continue_style'}) { |
|
135
|
23
|
|
|
|
|
33
|
my $style = $params{'continue_style'}; |
|
136
|
23
|
100
|
|
|
|
575
|
croak('['.__PACKAGE__."] Invalid continue_style: first/last/any expected") unless exists $continue_styles{$style}; |
|
137
|
19
|
|
|
|
|
527
|
$continue_style = $continue_styles{$style}; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
# TODO Check the format, and report errors |
|
140
|
33
|
|
33
|
|
|
200
|
my $self = { |
|
141
|
|
|
|
|
|
|
FORMAT => $params{'format'}, |
|
142
|
|
|
|
|
|
|
# Maybe doing a copy would be a good idea... |
|
143
|
|
|
|
|
|
|
# But we trust the user even if we all know |
|
144
|
|
|
|
|
|
|
# he's a crazy programmer |
|
145
|
|
|
|
|
|
|
DEBUG => exists $params{'debug'} && $params{'debug'}, |
|
146
|
|
|
|
|
|
|
CONTINUE_STYLE => $continue_style |
|
147
|
|
|
|
|
|
|
}; |
|
148
|
33
|
|
|
|
|
88
|
bless $self, $class; |
|
149
|
33
|
|
|
|
|
109
|
return $self; |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head2 C |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Parse a table. |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
my @table = $fmt->parse_table($text); |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Returns an array of hashes. Each hash is a row of data. |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub parse_table |
|
164
|
|
|
|
|
|
|
{ |
|
165
|
1
|
|
|
1
|
1
|
10
|
my ($self, $text) = @_; |
|
166
|
1
|
|
|
|
|
3
|
my $first_re = $self->get_first_line_re(); |
|
167
|
1
|
|
|
|
|
2
|
my @table; |
|
168
|
|
|
|
|
|
|
my $err; |
|
169
|
1
|
|
|
|
|
1
|
my $linenum = 1; |
|
170
|
1
|
|
|
|
|
19
|
(pos $text) = 0; |
|
171
|
1
|
|
|
|
|
8
|
while ($text =~ /^([^\n]+)$/gm) { |
|
172
|
6
|
|
|
|
|
11
|
my $line = $1; |
|
173
|
6
|
100
|
|
|
|
62
|
push @table, {} if $line =~ $first_re; |
|
174
|
6
|
50
|
|
|
|
16
|
if (($err = $self->parse_line($line, $table[$#table])) > 0) { |
|
175
|
0
|
|
|
|
|
0
|
croak "Parse error at line $linenum, column $err"; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
} |
|
178
|
1
|
|
|
|
|
4
|
return @table; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head2 C |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Parse a line of text and add parsed data to the hash. |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
my $error = $fmt->parse_line($line, \%row_data); |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Multiple calls to C with the same hashref may be needed to fully |
|
191
|
|
|
|
|
|
|
read a "logical line" in case some columns are multiline. |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
Returns: |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=over |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item * |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
C<-col>: Parse error. The value is a negative integer indicating the |
|
200
|
|
|
|
|
|
|
character position in the line where the parse error occured. |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=item * |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
C<0>: OK |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=item * |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
C: Missing data: need to feed next line to fill remining columns. |
|
209
|
|
|
|
|
|
|
The value is the character position of the column where data is expected. |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=back |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=cut |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# TODO: return a RE in case of missing data |
|
216
|
|
|
|
|
|
|
sub parse_line |
|
217
|
|
|
|
|
|
|
{ |
|
218
|
40
|
|
|
40
|
1
|
16571
|
my ($self, $line, $data) = @_; |
|
219
|
40
|
|
|
|
|
56
|
my @fmt = @{$self->{FORMAT}}; |
|
|
40
|
|
|
|
|
116
|
|
|
220
|
40
|
|
|
|
|
52
|
my $col = 1; |
|
221
|
40
|
|
|
|
|
45
|
my $ret = 0; |
|
222
|
40
|
100
|
|
|
|
99
|
$line = '' unless defined $line; |
|
223
|
40
|
|
|
|
|
92
|
while ($#fmt >= 0) { |
|
224
|
86
|
|
|
|
|
114
|
my $f = shift @fmt; |
|
225
|
86
|
|
|
|
|
88
|
my $data_len; |
|
226
|
86
|
100
|
|
|
|
390
|
if ($f =~ /^\d+$/) { |
|
|
|
50
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# Spaces to skip |
|
228
|
36
|
50
|
|
|
|
69
|
next if $f == 0; |
|
229
|
36
|
|
|
|
|
140
|
$line =~ /^( {0,$f})/; |
|
230
|
36
|
|
|
|
|
54
|
$data_len = length $1; |
|
231
|
36
|
100
|
|
|
|
127
|
return -($col+$data_len) if $data_len < $f; |
|
232
|
|
|
|
|
|
|
} elsif ($f =~ /^(!?)([A-Za-z_]\w*)(?:(~)(.?))?$/) { |
|
233
|
50
|
|
|
|
|
141
|
my ($mandatory, $field, $multi, $cont) = ($1, $2, $3, $4); |
|
234
|
50
|
100
|
|
|
|
110
|
$multi = 0 unless defined $multi; |
|
235
|
50
|
50
|
66
|
|
|
137
|
$cont = ' ' unless defined $cont && $cont ne ''; |
|
236
|
50
|
|
|
|
|
58
|
my $len = shift @fmt; |
|
237
|
50
|
50
|
|
|
|
90
|
next if $len == 0; |
|
238
|
50
|
|
|
|
|
91
|
my $d = substr($line, 0, abs $len); |
|
239
|
50
|
|
|
|
|
49
|
$data_len = length $d; |
|
240
|
50
|
100
|
|
|
|
79
|
if ($len > 0) { |
|
241
|
38
|
|
|
|
|
128
|
$d =~ s/ +$//; |
|
242
|
|
|
|
|
|
|
} else { |
|
243
|
12
|
|
|
|
|
27
|
$d .= ' ' x -($data_len+$len); |
|
244
|
12
|
|
|
|
|
27
|
$d =~ s/^ +//; |
|
245
|
|
|
|
|
|
|
} |
|
246
|
50
|
100
|
|
|
|
104
|
if ($d ne '') { |
|
247
|
37
|
100
|
100
|
|
|
157
|
return -$col if !$multi && exists $data->{$field}; |
|
248
|
34
|
100
|
100
|
|
|
103
|
if ($multi && exists $data->{$field}) { |
|
249
|
|
|
|
|
|
|
# Multilines => concat |
|
250
|
6
|
|
|
|
|
13
|
$data->{$field} .= "\n" . $d; |
|
251
|
6
|
50
|
33
|
|
|
43
|
$ret = $col if $ret == 0 && $d =~ /\Q$cont\E$/; |
|
252
|
|
|
|
|
|
|
} else { |
|
253
|
28
|
|
|
|
|
57
|
$data->{$field} = $d; |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
} |
|
256
|
47
|
100
|
100
|
|
|
163
|
$ret = $col if $mandatory && !exists $data->{$field} && $ret == 0; |
|
|
|
|
66
|
|
|
|
|
|
257
|
|
|
|
|
|
|
} else { |
|
258
|
0
|
|
|
|
|
0
|
warn "Bad format!\n"; |
|
259
|
0
|
|
|
|
|
0
|
return -$col; |
|
260
|
|
|
|
|
|
|
} |
|
261
|
69
|
|
|
|
|
78
|
$col += $data_len; |
|
262
|
69
|
|
|
|
|
122
|
$line = substr($line, $data_len); |
|
263
|
69
|
100
|
66
|
|
|
229
|
last if $ret != 0 && $line eq ''; |
|
264
|
|
|
|
|
|
|
} |
|
265
|
23
|
100
|
|
|
|
95
|
return -$col unless $line =~ /^ *$/; |
|
266
|
20
|
|
|
|
|
107
|
return $ret; |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
sub _dump_line_re() |
|
273
|
|
|
|
|
|
|
{ |
|
274
|
0
|
|
|
0
|
|
0
|
while ($#_ >= 0) { |
|
275
|
0
|
|
|
|
|
0
|
print "> [" . (shift @_) ."]\n"; |
|
276
|
0
|
|
|
|
|
0
|
print ' [' . join('] :: [', @{ (shift @_) }) . "]\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
277
|
|
|
|
|
|
|
} |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub _serialize_line_re() |
|
281
|
|
|
|
|
|
|
{ |
|
282
|
|
|
|
|
|
|
#&_dump_line_re(@_); |
|
283
|
76
|
|
|
76
|
|
117
|
my $re = ''; |
|
284
|
76
|
|
|
|
|
186
|
while ($#_ > -1) { |
|
285
|
|
|
|
|
|
|
# Pop the alternatives for the end of the line |
|
286
|
69
|
|
|
|
|
77
|
my @b = grep(!/^$/, @{ (pop @_) }); |
|
|
69
|
|
|
|
|
250
|
|
|
287
|
|
|
|
|
|
|
# TODO remove duplicates |
|
288
|
69
|
100
|
|
|
|
162
|
push @b, $re if $re ne ''; |
|
289
|
69
|
100
|
|
|
|
155
|
if ($#b > 0) { |
|
|
|
100
|
|
|
|
|
|
|
290
|
23
|
|
|
|
|
73
|
$re = "(?:" . join('|', @b) . ")"; |
|
291
|
|
|
|
|
|
|
} elsif ($#b > -1) { |
|
292
|
42
|
|
|
|
|
63
|
$re = $b[0]; |
|
293
|
|
|
|
|
|
|
} else { |
|
294
|
4
|
|
|
|
|
6
|
$re = ''; |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
# Pop |
|
297
|
69
|
|
|
|
|
256
|
$re = (pop @_) . $re; |
|
298
|
|
|
|
|
|
|
#print "$re\n"; |
|
299
|
|
|
|
|
|
|
} |
|
300
|
76
|
|
|
|
|
236
|
return $re; |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub _parse_column_format($;$) |
|
304
|
|
|
|
|
|
|
{ |
|
305
|
104
|
|
|
104
|
|
148
|
my ($format, $width) = @_; |
|
306
|
104
|
50
|
|
|
|
449
|
if ($format =~ /^(!?)([A-Za-z_]\w*)(?:(~)(.?))?$/) { |
|
307
|
104
|
|
|
|
|
683
|
my %def = ( |
|
308
|
|
|
|
|
|
|
mandatory => $1, |
|
309
|
|
|
|
|
|
|
name => $2, |
|
310
|
|
|
|
|
|
|
multi => $3, |
|
311
|
|
|
|
|
|
|
cont => $4, |
|
312
|
|
|
|
|
|
|
width => abs $width |
|
313
|
|
|
|
|
|
|
); |
|
314
|
104
|
100
|
|
|
|
273
|
$def{multi} = '' unless defined $def{multi}; |
|
315
|
104
|
100
|
|
|
|
239
|
$def{align} = $width > 0 ? 'L' : 'R'; |
|
316
|
104
|
|
|
|
|
751
|
return %def; |
|
317
|
|
|
|
|
|
|
} else { |
|
318
|
0
|
|
|
|
|
0
|
return undef; |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
} |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub _build_repetition_re($;$;$) |
|
323
|
|
|
|
|
|
|
{ |
|
324
|
101
|
|
|
101
|
|
136
|
my ($c, $min, $max) = @_; |
|
325
|
101
|
50
|
|
|
|
193
|
return '' if $max <= 0; |
|
326
|
101
|
50
|
|
|
|
156
|
if ($max == 1) { |
|
327
|
0
|
0
|
|
|
|
0
|
$c .= '?' if $min <= 0; |
|
328
|
|
|
|
|
|
|
} else { |
|
329
|
101
|
100
|
|
|
|
162
|
if ($min < $max) { |
|
330
|
45
|
|
|
|
|
91
|
$c .= "{$min,$max}"; |
|
331
|
|
|
|
|
|
|
} else { |
|
332
|
56
|
|
|
|
|
222
|
$c .= "{$max}"; |
|
333
|
|
|
|
|
|
|
} |
|
334
|
|
|
|
|
|
|
} |
|
335
|
101
|
|
|
|
|
202
|
return $c; |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
sub _build_column_re |
|
339
|
|
|
|
|
|
|
{ |
|
340
|
78
|
|
|
78
|
|
95
|
my $self = shift; |
|
341
|
78
|
|
|
|
|
391
|
my %def = @_; |
|
342
|
78
|
|
100
|
|
|
407
|
my $branch_multi = $def{multi} && exists $def{branch_multi} && $def{branch_multi}; |
|
343
|
78
|
50
|
|
|
|
184
|
my $re_label = $self->{DEBUG} ? "(?#_$def{mandatory}$def{name}$def{multi}_)" : ''; |
|
344
|
78
|
100
|
|
|
|
311
|
my $re_spaces = $def{spaces} > 0 ? ' '.($def{spaces} > 1 ? "{$def{spaces}}":'') : ''; |
|
|
|
100
|
|
|
|
|
|
|
345
|
78
|
|
|
|
|
94
|
my $width = $def{width}; |
|
346
|
78
|
|
|
|
|
85
|
my ($re_col_mand, $re_col_end, $re_col); |
|
347
|
78
|
100
|
100
|
|
|
227
|
if ($def{mandatory} || $branch_multi) { |
|
348
|
56
|
|
|
|
|
67
|
$re_col_mand = $re_spaces . $re_label; |
|
349
|
56
|
100
|
|
|
|
129
|
if ($def{align} eq 'L') { # Left aligned |
|
350
|
45
|
|
|
|
|
100
|
$re_col_end = &_build_repetition_re('.', 0, $width-1); |
|
351
|
45
|
100
|
|
|
|
89
|
unless ($branch_multi) { |
|
352
|
40
|
|
|
|
|
164
|
$re_col_mand .= '\S'; |
|
353
|
40
|
|
|
|
|
89
|
$re_col = &_build_repetition_re('.', $width-1, $width-1); |
|
354
|
|
|
|
|
|
|
} else { |
|
355
|
5
|
|
|
|
|
11
|
$re_col = &_build_repetition_re('.', $width, $width); |
|
356
|
5
|
|
|
|
|
10
|
$re_col_end = '\S' . $re_col_end; |
|
357
|
|
|
|
|
|
|
} |
|
358
|
|
|
|
|
|
|
} else { |
|
359
|
11
|
|
|
|
|
28
|
$re_col_mand .= &_build_repetition_re('.', $width-1, $width-1); |
|
360
|
11
|
50
|
|
|
|
25
|
unless ($branch_multi) { |
|
361
|
11
|
|
|
|
|
16
|
$re_col_end = $re_col = ''; |
|
362
|
11
|
|
|
|
|
16
|
$re_col_mand .= '\S'; |
|
363
|
|
|
|
|
|
|
} else { |
|
364
|
0
|
|
|
|
|
0
|
$re_col_end = '\S'; |
|
365
|
0
|
|
|
|
|
0
|
$re_col = '.'; |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
} else { |
|
369
|
22
|
|
|
|
|
30
|
$re_col_mand = ''; |
|
370
|
22
|
50
|
|
|
|
75
|
$re_col_end = $re_spaces . $re_label . '.' . ($width > 1 ? "{0,$width}" : '?'); |
|
371
|
22
|
50
|
|
|
|
96
|
$re_col_end = "(?:$re_col_end)?" if $def{spaces}; |
|
372
|
22
|
50
|
|
|
|
57
|
$re_col = $re_spaces . $re_label . '.' . ($width > 1 ? "{$width}" : '' ); |
|
373
|
|
|
|
|
|
|
} |
|
374
|
|
|
|
|
|
|
#print "$def{name} => /$re_col_mand/ /$re_col_end/ /$re_col/ (spaces = $def{spaces})\n"; |
|
375
|
78
|
|
|
|
|
411
|
return ($re_col_mand, $re_col_end, $re_col); |
|
376
|
|
|
|
|
|
|
} |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
sub _has_multi(@) |
|
379
|
|
|
|
|
|
|
{ |
|
380
|
17
|
|
|
17
|
|
35
|
foreach (@_) { |
|
381
|
43
|
100
|
|
|
|
129
|
return 1 if /!?[_[:alpha:]]\w+~/; |
|
382
|
|
|
|
|
|
|
} |
|
383
|
12
|
|
|
|
|
38
|
return 0; |
|
384
|
|
|
|
|
|
|
} |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# @_ is the format |
|
388
|
|
|
|
|
|
|
# TODO handle the case where all columns are optionnal |
|
389
|
|
|
|
|
|
|
# The RE is then the union of the cases where one of the colmuns, up to the first multi, is mandatory |
|
390
|
|
|
|
|
|
|
sub _build_first_line_re |
|
391
|
|
|
|
|
|
|
{ |
|
392
|
37
|
|
|
37
|
|
46
|
my $self = shift; |
|
393
|
37
|
|
|
|
|
48
|
my $branch_multi = shift; |
|
394
|
37
|
|
|
|
|
40
|
my $spaces = 0; |
|
395
|
37
|
|
|
|
|
612
|
my @re = (); |
|
396
|
37
|
|
|
|
|
49
|
my $re_acc = ''; # Accumulator |
|
397
|
37
|
|
|
|
|
101
|
my $multi = '~'; # Force the initialisation of @re |
|
398
|
37
|
|
|
|
|
99
|
while ($#_ >= 0) { |
|
399
|
121
|
|
|
|
|
155
|
my $f = shift; |
|
400
|
121
|
100
|
|
|
|
437
|
if ($f =~ /^\d+$/) { |
|
401
|
60
|
|
|
|
|
152
|
$spaces += $f; |
|
402
|
|
|
|
|
|
|
} else { |
|
403
|
61
|
|
|
|
|
114
|
my %def = &_parse_column_format($f, shift); |
|
404
|
61
|
100
|
100
|
|
|
279
|
if ($multi && ($branch_multi || $#re == -1)) { |
|
|
|
|
66
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# The previous column was a multi. The following fields may not be |
|
406
|
|
|
|
|
|
|
# on this line but on one of the next ones. |
|
407
|
|
|
|
|
|
|
# So the end of the line is optionnal. |
|
408
|
|
|
|
|
|
|
# We are starting a new altenative in the RE. |
|
409
|
42
|
|
|
|
|
80
|
push @re, $re_acc, [ ]; |
|
410
|
42
|
|
|
|
|
70
|
$re_acc = ''; |
|
411
|
|
|
|
|
|
|
} |
|
412
|
61
|
|
|
|
|
223
|
my ($re_col_mand, $re_col_end, $re_col) = $self->_build_column_re(%def, spaces => $spaces); |
|
413
|
61
|
100
|
|
|
|
156
|
if ($def{mandatory}) { |
|
414
|
|
|
|
|
|
|
# Flush optional columns and append this column |
|
415
|
40
|
|
|
|
|
91
|
$re[$#re-1] .= $re_acc . $re_col_mand; |
|
416
|
40
|
100
|
|
|
|
83
|
if ($re_col_end eq '') { |
|
417
|
8
|
|
|
|
|
16
|
$re[$#re] = [ ]; |
|
418
|
|
|
|
|
|
|
} else { |
|
419
|
32
|
|
|
|
|
72
|
$re[$#re] = [ $re_col_end ]; |
|
420
|
|
|
|
|
|
|
} |
|
421
|
40
|
|
|
|
|
64
|
$re_acc = $re_col; |
|
422
|
|
|
|
|
|
|
} else { |
|
423
|
|
|
|
|
|
|
# Save column format for later |
|
424
|
21
|
|
|
|
|
28
|
push @{$re[$#re]}, $re_acc . $re_col_mand . $re_col_end; |
|
|
21
|
|
|
|
|
57
|
|
|
425
|
21
|
|
|
|
|
38
|
$re_acc .= $re_col_mand . $re_col; |
|
426
|
|
|
|
|
|
|
} |
|
427
|
61
|
|
|
|
|
67
|
$spaces = 0; |
|
428
|
61
|
|
|
|
|
270
|
$multi = $def{multi}; |
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
} |
|
431
|
37
|
|
|
|
|
133
|
return @re; |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
sub _build_continue_line_re |
|
435
|
|
|
|
|
|
|
{ |
|
436
|
39
|
|
|
39
|
|
51
|
my $self = shift; |
|
437
|
39
|
|
|
|
|
46
|
my $spaces = 0; |
|
438
|
39
|
|
|
|
|
45
|
my $multi = '~'; # Force the initialisation of @re |
|
439
|
39
|
|
|
|
|
102
|
while ($#_ >= 0) { |
|
440
|
85
|
|
|
|
|
120
|
my $f = shift; |
|
441
|
85
|
100
|
|
|
|
326
|
if ($f =~ /^\d+$/) { |
|
442
|
42
|
|
|
|
|
105
|
$spaces += $f; |
|
443
|
|
|
|
|
|
|
} else { |
|
444
|
43
|
|
|
|
|
84
|
my %def = &_parse_column_format($f, shift); |
|
445
|
43
|
100
|
|
|
|
131
|
unless ($def{multi}) { |
|
446
|
26
|
|
|
|
|
35
|
$spaces += $def{width}; |
|
447
|
26
|
|
|
|
|
109
|
next; |
|
448
|
|
|
|
|
|
|
} |
|
449
|
17
|
|
|
|
|
23
|
my @re; |
|
450
|
17
|
|
|
|
|
20
|
my ($re_col_end, $re_col); |
|
451
|
17
|
|
|
|
|
63
|
($re[0], $re_col_end, $re_col) = $self->_build_column_re(%def, spaces => $spaces, branch_multi => &_has_multi(@_)); |
|
452
|
17
|
|
|
|
|
86
|
push @re, [ $re_col_end ]; |
|
453
|
17
|
|
|
|
|
21
|
my @re_end; |
|
454
|
17
|
100
|
|
|
|
100
|
push @re_end, &_serialize_line_re($self->_build_continue_line_re(@_)) if $self->{CONTINUE_STYLE} & FIRST; |
|
455
|
17
|
100
|
|
|
|
69
|
push @re_end, &_serialize_line_re($self->_build_first_line_re(1, @_)) if $self->{CONTINUE_STYLE} & LAST; |
|
456
|
17
|
|
|
|
|
79
|
@re_end = grep !/^$/, @re_end; |
|
457
|
|
|
|
|
|
|
#pop @re_end if $#re_end == 1 && $re_end[1] eq $re_end[0]; |
|
458
|
17
|
100
|
|
|
|
50
|
push @re, $re_col, [ @re_end ] if (@re_end); |
|
459
|
17
|
|
|
|
|
97
|
return @re; |
|
460
|
|
|
|
|
|
|
} |
|
461
|
|
|
|
|
|
|
} |
|
462
|
22
|
|
|
|
|
56
|
return (); |
|
463
|
|
|
|
|
|
|
} |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=head2 C |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
Returns a regular expression that matches the first line of a "logical line" |
|
468
|
|
|
|
|
|
|
of data. |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
my $re = $fmt->get_first_line_re(); |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=cut |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
sub get_first_line_re |
|
475
|
|
|
|
|
|
|
{ |
|
476
|
27
|
|
|
27
|
1
|
291
|
my $self = shift; |
|
477
|
27
|
100
|
|
|
|
87
|
if (!exists $self->{FIRST_LINE_RE}) { |
|
478
|
26
|
|
|
|
|
54
|
my @re; |
|
479
|
26
|
100
|
|
|
|
68
|
if ($self->{CONTINUE_STYLE} == FIRST) { |
|
480
|
6
|
|
|
|
|
10
|
@re = $self->_build_first_line_re(0, @{$self->{FORMAT}}); |
|
|
6
|
|
|
|
|
24
|
|
|
481
|
|
|
|
|
|
|
} else { |
|
482
|
20
|
|
|
|
|
35
|
@re = $self->_build_first_line_re(1, @{$self->{FORMAT}}); |
|
|
20
|
|
|
|
|
73
|
|
|
483
|
|
|
|
|
|
|
} |
|
484
|
26
|
|
|
|
|
63
|
my $re = &_serialize_line_re(@re); |
|
485
|
26
|
100
|
|
|
|
668
|
$self->{FIRST_LINE_RE} = ($re eq '' ? undef : qr/^$re *$/); |
|
486
|
|
|
|
|
|
|
} |
|
487
|
27
|
|
|
|
|
185
|
return $self->{FIRST_LINE_RE}; |
|
488
|
|
|
|
|
|
|
} |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head2 C |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Returns a regular expression that matches the 2nd line and the following |
|
493
|
|
|
|
|
|
|
lines of a "logical line". |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
my $re = $fmt->get_continue_line_re(); |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
Returns undef if the format specification does not contains any column that |
|
498
|
|
|
|
|
|
|
can be splitted on multiples lines. |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# continue-style: first (only cont columns can appear on a continue line) |
|
503
|
|
|
|
|
|
|
sub get_continue_line_re |
|
504
|
|
|
|
|
|
|
{ |
|
505
|
26
|
|
|
26
|
1
|
54
|
my $self = shift; |
|
506
|
26
|
50
|
|
|
|
127
|
if (!exists $self->{CONTINUE_LINE_RE}) { |
|
507
|
26
|
|
|
|
|
35
|
my @re = $self->_build_continue_line_re(@{$self->{FORMAT}}); |
|
|
26
|
|
|
|
|
93
|
|
|
508
|
|
|
|
|
|
|
#&_dump_line_re(@re); |
|
509
|
26
|
|
|
|
|
57
|
my $re = &_serialize_line_re(@re); |
|
510
|
26
|
100
|
|
|
|
599
|
$self->{CONTINUE_LINE_RE} = ($re eq '' ? undef : qr/^$re *$/); |
|
511
|
|
|
|
|
|
|
} |
|
512
|
26
|
|
|
|
|
169
|
return $self->{CONTINUE_LINE_RE}; |
|
513
|
|
|
|
|
|
|
} |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
1; # Magic for module end |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
__END__ |