line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# You may distribute under the terms of either the GNU General Public License |
2
|
|
|
|
|
|
|
# or the Artistic License (the same terms as Perl itself) |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# (C) Paul Evans, 2010-2022 -- leonerd@leonerd.org.uk |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package Parser::MGC 0.20; |
7
|
|
|
|
|
|
|
|
8
|
31
|
|
|
31
|
|
1418337
|
use v5.14; |
|
31
|
|
|
|
|
1556
|
|
9
|
31
|
|
|
31
|
|
141
|
use warnings; |
|
31
|
|
|
|
|
44
|
|
|
31
|
|
|
|
|
741
|
|
10
|
|
|
|
|
|
|
|
11
|
31
|
|
|
31
|
|
127
|
use Carp; |
|
31
|
|
|
|
|
48
|
|
|
31
|
|
|
|
|
1633
|
|
12
|
31
|
|
|
31
|
|
12849
|
use Feature::Compat::Try; |
|
31
|
|
|
|
|
8602
|
|
|
31
|
|
|
|
|
106
|
|
13
|
|
|
|
|
|
|
|
14
|
31
|
|
|
31
|
|
71230
|
use Scalar::Util qw( blessed ); |
|
31
|
|
|
|
|
61
|
|
|
31
|
|
|
|
|
3009
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
C - build simple recursive-descent parsers |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package My::Grammar::Parser; |
23
|
|
|
|
|
|
|
use base qw( Parser::MGC ); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub parse |
26
|
|
|
|
|
|
|
{ |
27
|
|
|
|
|
|
|
my $self = shift; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$self->sequence_of( sub { |
30
|
|
|
|
|
|
|
$self->any_of( |
31
|
|
|
|
|
|
|
sub { $self->token_int }, |
32
|
|
|
|
|
|
|
sub { $self->token_string }, |
33
|
|
|
|
|
|
|
sub { \$self->token_ident }, |
34
|
|
|
|
|
|
|
sub { $self->scope_of( "(", \&parse, ")" ) } |
35
|
|
|
|
|
|
|
); |
36
|
|
|
|
|
|
|
} ); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $parser = My::Grammar::Parser->new; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $tree = $parser->from_file( $ARGV[0] ); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
... |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 DESCRIPTION |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
This base class provides a low-level framework for building recursive-descent |
48
|
|
|
|
|
|
|
parsers that consume a given input string from left to right, returning a |
49
|
|
|
|
|
|
|
parse structure. It takes its name from the C regexps used to implement |
50
|
|
|
|
|
|
|
the token parsing behaviour. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
It provides a number of token-parsing methods, which each extract a |
53
|
|
|
|
|
|
|
grammatical token from the string. It also provides wrapping methods that can |
54
|
|
|
|
|
|
|
be used to build up a possibly-recursive grammar structure, by applying a |
55
|
|
|
|
|
|
|
structure around other parts of parsing code. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head2 Backtracking |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Each method, both token and structural, atomically either consumes a prefix of |
60
|
|
|
|
|
|
|
the string and returns its result, or fails and consumes nothing. This makes |
61
|
|
|
|
|
|
|
it simple to implement grammars that require backtracking. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Several structure-forming methods have some form of "optional" behaviour; they |
64
|
|
|
|
|
|
|
can optionally consume some amount of input or take some particular choice, |
65
|
|
|
|
|
|
|
but if the code invoked inside that subsequently fails, the structure can |
66
|
|
|
|
|
|
|
backtrack and take some different behaviour. This is usually what is required |
67
|
|
|
|
|
|
|
when testing whether the structure of the input string matches some part of |
68
|
|
|
|
|
|
|
the grammar that is optional, or has multiple choices. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
However, once the choice of grammar has been made, it is often useful to be |
71
|
|
|
|
|
|
|
able to fix on that one choice, thus making subsequent failures propagate up |
72
|
|
|
|
|
|
|
rather than taking that alternative behaviour. Control of this backtracking |
73
|
|
|
|
|
|
|
is given by the C method; and careful use of this method is one of the |
74
|
|
|
|
|
|
|
key advantages that C has over more simple parsing using single |
75
|
|
|
|
|
|
|
regexps alone. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=cut |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=cut |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head2 new |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
$parser = Parser::MGC->new( %args ) |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Returns a new instance of a C object. This must be called on a |
88
|
|
|
|
|
|
|
subclass that provides method of the name provided as C, by default |
89
|
|
|
|
|
|
|
called C. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Takes the following named arguments |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=over 8 |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=item toplevel => STRING |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Name of the toplevel method to use to start the parse from. If not supplied, |
98
|
|
|
|
|
|
|
will try to use a method called C. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item patterns => HASH |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Keys in this hash should map to quoted regexp (C) references, to |
103
|
|
|
|
|
|
|
override the default patterns used to match tokens. See C below |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=item accept_0o_oct => BOOL |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
If true, the C method will also accept integers with a C<0o> prefix |
108
|
|
|
|
|
|
|
as octal. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=back |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=cut |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 PATTERNS |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
The following pattern names are recognised. They may be passed to the |
117
|
|
|
|
|
|
|
constructor in the C hash, or provided as a class method under the |
118
|
|
|
|
|
|
|
name C>. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=over 4 |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item * ws |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Pattern used to skip whitespace between tokens. Defaults to C[\s\n\t]+/> |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item * comment |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Pattern used to skip comments between tokens. Undefined by default. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item * int |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Pattern used to parse an integer by C. Defaults to |
133
|
|
|
|
|
|
|
C-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/>. If C is given, then |
134
|
|
|
|
|
|
|
this will be expanded to match C0o[0-7]+/> as well. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item * float |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Pattern used to parse a floating-point number by C. Defaults to |
139
|
|
|
|
|
|
|
C-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i>. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item * ident |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Pattern used to parse an identifier by C. Defaults to |
144
|
|
|
|
|
|
|
C[[:alpha:]_]\w*/> |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=item * string_delim |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Pattern used to delimit a string by C. Defaults to C["']/>. |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=back |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
my @patterns = qw( |
155
|
|
|
|
|
|
|
ws |
156
|
|
|
|
|
|
|
comment |
157
|
|
|
|
|
|
|
int |
158
|
|
|
|
|
|
|
float |
159
|
|
|
|
|
|
|
ident |
160
|
|
|
|
|
|
|
string_delim |
161
|
|
|
|
|
|
|
); |
162
|
|
|
|
|
|
|
|
163
|
31
|
|
|
31
|
|
177
|
use constant pattern_ws => qr/[\s\n\t]+/; |
|
31
|
|
|
|
|
51
|
|
|
31
|
|
|
|
|
1635
|
|
164
|
31
|
|
|
31
|
|
146
|
use constant pattern_comment => undef; |
|
31
|
|
|
|
|
56
|
|
|
31
|
|
|
|
|
2365
|
|
165
|
31
|
|
|
31
|
|
178
|
use constant pattern_int => qr/-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/; |
|
31
|
|
|
|
|
58
|
|
|
31
|
|
|
|
|
2394
|
|
166
|
31
|
|
|
31
|
|
178
|
use constant pattern_float => qr/-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i; |
|
31
|
|
|
|
|
53
|
|
|
31
|
|
|
|
|
3078
|
|
167
|
31
|
|
|
31
|
|
504
|
use constant pattern_ident => qr/[[:alpha:]_]\w*/; |
|
31
|
|
|
|
|
108
|
|
|
31
|
|
|
|
|
1685
|
|
168
|
31
|
|
|
31
|
|
151
|
use constant pattern_string_delim => qr/["']/; |
|
31
|
|
|
|
|
51
|
|
|
31
|
|
|
|
|
113440
|
|
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub new |
171
|
|
|
|
|
|
|
{ |
172
|
46
|
|
|
46
|
1
|
2555
|
my $class = shift; |
173
|
46
|
|
|
|
|
114
|
my %args = @_; |
174
|
|
|
|
|
|
|
|
175
|
46
|
|
100
|
|
|
231
|
my $toplevel = $args{toplevel} || "parse"; |
176
|
|
|
|
|
|
|
|
177
|
46
|
50
|
|
|
|
299
|
$class->can( $toplevel ) or |
178
|
|
|
|
|
|
|
croak "Expected to be a subclass that can ->$toplevel"; |
179
|
|
|
|
|
|
|
|
180
|
46
|
|
|
|
|
204
|
my $self = bless { |
181
|
|
|
|
|
|
|
toplevel => $toplevel, |
182
|
|
|
|
|
|
|
patterns => {}, |
183
|
|
|
|
|
|
|
scope_level => 0, |
184
|
|
|
|
|
|
|
}, $class; |
185
|
|
|
|
|
|
|
|
186
|
46
|
|
100
|
|
|
241
|
$self->{patterns}{$_} = $args{patterns}{$_} || $self->${\"pattern_$_"} for @patterns; |
187
|
|
|
|
|
|
|
|
188
|
46
|
100
|
|
|
|
141
|
if( $args{accept_0o_oct} ) { |
189
|
1
|
|
|
|
|
30
|
$self->{patterns}{int} = qr/0o[0-7]+|$self->{patterns}{int}/; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
46
|
100
|
|
|
|
142
|
if( defined $self->{patterns}{comment} ) { |
193
|
1
|
|
|
|
|
23
|
$self->{patterns}{_skip} = qr/$self->{patterns}{ws}|$self->{patterns}{comment}/; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
else { |
196
|
45
|
|
|
|
|
101
|
$self->{patterns}{_skip} = $self->{patterns}{ws}; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
46
|
|
|
|
|
253
|
return $self; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head1 METHODS |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=cut |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head2 from_string |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
$result = $parser->from_string( $str ) |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Parse the given literal string and return the result from the toplevel method. |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=cut |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub from_string |
215
|
|
|
|
|
|
|
{ |
216
|
161
|
|
|
161
|
1
|
33770
|
my $self = shift; |
217
|
161
|
|
|
|
|
287
|
my ( $str ) = @_; |
218
|
|
|
|
|
|
|
|
219
|
161
|
|
|
|
|
286
|
$self->{str} = $str; |
220
|
|
|
|
|
|
|
|
221
|
161
|
|
|
|
|
372
|
pos $self->{str} = 0; |
222
|
|
|
|
|
|
|
|
223
|
161
|
|
|
|
|
282
|
my $toplevel = $self->{toplevel}; |
224
|
161
|
|
|
|
|
475
|
my $result = $self->$toplevel; |
225
|
|
|
|
|
|
|
|
226
|
143
|
100
|
|
|
|
434
|
$self->at_eos or |
227
|
|
|
|
|
|
|
$self->fail( "Expected end of input" ); |
228
|
|
|
|
|
|
|
|
229
|
138
|
|
|
|
|
602
|
return $result; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head2 from_file |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
$result = $parser->from_file( $file, %opts ) |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
Parse the given file, which may be a pathname in a string, or an opened IO |
237
|
|
|
|
|
|
|
handle, and return the result from the toplevel method. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
The following options are recognised: |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=over 8 |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=item binmode => STRING |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
If set, applies the given binmode to the filehandle before reading. Typically |
246
|
|
|
|
|
|
|
this can be used to set the encoding of the file. |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
$parser->from_file( $file, binmode => ":encoding(UTF-8)" ) |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=back |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=cut |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub from_file |
255
|
|
|
|
|
|
|
{ |
256
|
3
|
|
|
3
|
1
|
1350
|
my $self = shift; |
257
|
3
|
|
|
|
|
8
|
my ( $file, %opts ) = @_; |
258
|
|
|
|
|
|
|
|
259
|
3
|
50
|
|
|
|
10
|
defined $file or croak "Expected a filename to ->from_file"; |
260
|
|
|
|
|
|
|
|
261
|
3
|
|
|
|
|
6
|
$self->{filename} = $file; |
262
|
|
|
|
|
|
|
|
263
|
3
|
|
|
|
|
5
|
my $fh; |
264
|
3
|
100
|
|
|
|
10
|
if( ref $file ) { |
265
|
2
|
|
|
|
|
4
|
$fh = $file; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
else { |
268
|
1
|
50
|
|
|
|
35
|
open $fh, "<", $file or die "Cannot open $file for reading - $!"; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
3
|
50
|
|
|
|
11
|
binmode $fh, $opts{binmode} if $opts{binmode}; |
272
|
|
|
|
|
|
|
|
273
|
3
|
|
|
|
|
6
|
$self->from_string( do { local $/; <$fh>; } ); |
|
3
|
|
|
|
|
12
|
|
|
3
|
|
|
|
|
106
|
|
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=head2 filename |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
$filename = $parser->filename |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
I |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
Returns the name of the file currently being parsed, if invoked from within |
283
|
|
|
|
|
|
|
L. |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=cut |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
sub filename |
288
|
|
|
|
|
|
|
{ |
289
|
43
|
|
|
43
|
1
|
68
|
my $self = shift; |
290
|
43
|
|
|
|
|
101
|
return $self->{filename}; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=head2 from_reader |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
$result = $parser->from_reader( \&reader ) |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
I |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
Parse the input which is read by the C function. This function will be |
300
|
|
|
|
|
|
|
called in scalar context to generate portions of string to parse, being passed |
301
|
|
|
|
|
|
|
the C<$parser> object. The function should return C when it has no more |
302
|
|
|
|
|
|
|
string to return. |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
$reader->( $parser ) |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Note that because it is not generally possible to detect exactly when more |
307
|
|
|
|
|
|
|
input may be required due to failed regexp parsing, the reader function is |
308
|
|
|
|
|
|
|
only invoked during searching for skippable whitespace. This makes it suitable |
309
|
|
|
|
|
|
|
for reading lines of a file in the common case where lines are considered as |
310
|
|
|
|
|
|
|
skippable whitespace, or for reading lines of input interactively from a |
311
|
|
|
|
|
|
|
user. It cannot be used in all cases (for example, reading fixed-size buffers |
312
|
|
|
|
|
|
|
from a file) because two successive invocations may split a single token |
313
|
|
|
|
|
|
|
across the buffer boundaries, and cause parse failures. |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=cut |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub from_reader |
318
|
|
|
|
|
|
|
{ |
319
|
1
|
|
|
1
|
1
|
13
|
my $self = shift; |
320
|
1
|
|
|
|
|
2
|
my ( $reader ) = @_; |
321
|
|
|
|
|
|
|
|
322
|
1
|
|
|
|
|
2
|
local $self->{reader} = $reader; |
323
|
|
|
|
|
|
|
|
324
|
1
|
|
|
|
|
2
|
$self->{str} = ""; |
325
|
1
|
|
|
|
|
3
|
pos $self->{str} = 0; |
326
|
|
|
|
|
|
|
|
327
|
1
|
|
|
|
|
4
|
my $result = $self->parse; |
328
|
|
|
|
|
|
|
|
329
|
1
|
50
|
|
|
|
2
|
$self->at_eos or |
330
|
|
|
|
|
|
|
$self->fail( "Expected end of input" ); |
331
|
|
|
|
|
|
|
|
332
|
1
|
|
|
|
|
9
|
return $result; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head2 pos |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
$pos = $parser->pos |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
I |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Returns the current parse position, as a character offset from the beginning |
342
|
|
|
|
|
|
|
of the file or string. |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=cut |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub pos |
347
|
|
|
|
|
|
|
{ |
348
|
279
|
|
|
279
|
1
|
358
|
my $self = shift; |
349
|
279
|
|
|
|
|
600
|
return pos $self->{str}; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=head2 take |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
$str = $parser->take( $len ) |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
I |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Returns the next C<$len> characters directly from the input, prior to any |
359
|
|
|
|
|
|
|
whitespace or comment skipping. This does I take account of any |
360
|
|
|
|
|
|
|
end-of-scope marker that may be pending. It is intended for use by parsers of |
361
|
|
|
|
|
|
|
partially-binary protocols, or other situations in which it would be incorrect |
362
|
|
|
|
|
|
|
for the end-of-scope marker to take effect at this time. |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=cut |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub take |
367
|
|
|
|
|
|
|
{ |
368
|
46
|
|
|
46
|
1
|
64
|
my $self = shift; |
369
|
46
|
|
|
|
|
66
|
my ( $len ) = @_; |
370
|
|
|
|
|
|
|
|
371
|
46
|
|
|
|
|
65
|
my $start = pos( $self->{str} ); |
372
|
|
|
|
|
|
|
|
373
|
46
|
|
|
|
|
90
|
pos( $self->{str} ) += $len; |
374
|
|
|
|
|
|
|
|
375
|
46
|
|
|
|
|
218
|
return substr( $self->{str}, $start, $len ); |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=head2 where |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
( $lineno, $col, $text ) = $parser->where |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Returns the current parse position, as a line and column number, and |
383
|
|
|
|
|
|
|
the entire current line of text. The first line is numbered 1, and the first |
384
|
|
|
|
|
|
|
column is numbered 0. |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=cut |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
sub where |
389
|
|
|
|
|
|
|
{ |
390
|
49
|
|
|
49
|
1
|
79
|
my $self = shift; |
391
|
49
|
|
|
|
|
89
|
my ( $pos ) = @_; |
392
|
|
|
|
|
|
|
|
393
|
49
|
100
|
|
|
|
127
|
defined $pos or $pos = pos $self->{str}; |
394
|
|
|
|
|
|
|
|
395
|
49
|
|
|
|
|
83
|
my $str = $self->{str}; |
396
|
|
|
|
|
|
|
|
397
|
49
|
|
|
|
|
62
|
my $sol = $pos; |
398
|
49
|
100
|
100
|
|
|
217
|
$sol-- if $sol > 0 and substr( $str, $sol, 1 ) =~ m/^[\r\n]$/; |
399
|
49
|
|
100
|
|
|
299
|
$sol-- while $sol > 0 and substr( $str, $sol-1, 1 ) !~ m/^[\r\n]$/; |
400
|
|
|
|
|
|
|
|
401
|
49
|
|
|
|
|
83
|
my $eol = $pos; |
402
|
49
|
|
100
|
|
|
477
|
$eol++ while $eol < length($str) and substr( $str, $eol, 1 ) !~ m/^[\r\n]$/; |
403
|
|
|
|
|
|
|
|
404
|
49
|
|
|
|
|
117
|
my $line = substr( $str, $sol, $eol - $sol ); |
405
|
|
|
|
|
|
|
|
406
|
49
|
|
|
|
|
105
|
my $col = $pos - $sol; |
407
|
49
|
|
|
|
|
146
|
my $lineno = ( () = substr( $str, 0, $pos ) =~ m/\n/g ) + 1; |
408
|
|
|
|
|
|
|
|
409
|
49
|
|
|
|
|
164
|
return ( $lineno, $col, $line ); |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head2 fail |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 fail_from |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
$parser->fail( $message ) |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
$parser->fail_from( $pos, $message ) |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
I since version 0.09.> |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
Aborts the current parse attempt with the given message string. The failure |
423
|
|
|
|
|
|
|
message will include the line and column position, and the line of input that |
424
|
|
|
|
|
|
|
failed at the current parse position (C), or a position earlier obtained |
425
|
|
|
|
|
|
|
using the C method (C). |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
This failure will propagate up to the inner-most structure parsing method that |
428
|
|
|
|
|
|
|
has not been committed; or will cause the entire parser to fail if there are |
429
|
|
|
|
|
|
|
no further options to take. |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=cut |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
sub fail |
434
|
|
|
|
|
|
|
{ |
435
|
272
|
|
|
272
|
1
|
346
|
my $self = shift; |
436
|
272
|
|
|
|
|
377
|
my ( $message ) = @_; |
437
|
272
|
|
|
|
|
484
|
$self->fail_from( $self->pos, $message ); |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
sub fail_from |
441
|
|
|
|
|
|
|
{ |
442
|
272
|
|
|
272
|
1
|
311
|
my $self = shift; |
443
|
272
|
|
|
|
|
374
|
my ( $pos, $message ) = @_; |
444
|
272
|
|
|
|
|
527
|
die Parser::MGC::Failure->new( $message, $self, $pos ); |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
# On perl 5.32 onwards we can use the nicer `isa` infix operator |
448
|
|
|
|
|
|
|
# Problem is it won't even parse correctly on older perls so we'll have to go |
449
|
|
|
|
|
|
|
# the long way around |
450
|
|
|
|
|
|
|
*_isa_failure = ( $^V ge v5.32 ) |
451
|
|
|
|
|
|
|
? do { eval 'use experimental "isa"; sub { $_[0] isa Parser::MGC::Failure }' // die $@ } |
452
|
|
|
|
|
|
|
: do { require Scalar::Util; |
453
|
253
|
100
|
|
253
|
|
2186
|
sub { Scalar::Util::blessed($_[0]) and $_[0]->isa( "Parser::MGC::Failure" ) } }; |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 die |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head2 die_from |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
$parser->die( $message ) |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
$parser->die_from( $pos, $message ) |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
I |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
Throws an exception that propagates as normal for C, entirely out of the |
466
|
|
|
|
|
|
|
entire parser and to the caller of the toplevel C method that invoked |
467
|
|
|
|
|
|
|
it, bypassing all of the back-tracking logic. |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This is much like using core's C directly, except that the message string |
470
|
|
|
|
|
|
|
will include the line and column position, and the line of input that the |
471
|
|
|
|
|
|
|
parser was working on, as it does in the L method. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
This method is intended for reporting fatal errors where the parsed input was |
474
|
|
|
|
|
|
|
correctly recognised at a grammar level, but is requesting something that |
475
|
|
|
|
|
|
|
cannot be fulfilled semantically. |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=cut |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
sub die :method |
480
|
|
|
|
|
|
|
{ |
481
|
1
|
|
|
1
|
1
|
6
|
my $self = shift; |
482
|
1
|
|
|
|
|
1
|
my ( $message ) = @_; |
483
|
1
|
|
|
|
|
3
|
$self->die_from( $self->pos, $message ); |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub die_from |
487
|
|
|
|
|
|
|
{ |
488
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
489
|
1
|
|
|
|
|
1
|
my ( $pos, $message ) = @_; |
490
|
|
|
|
|
|
|
# Convenient just to use the ->STRING method of a Failure object but don't |
491
|
|
|
|
|
|
|
# throw it directly |
492
|
1
|
|
|
|
|
3
|
die Parser::MGC::Failure->new( $message, $self, $pos )->STRING; |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=head2 at_eos |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
$eos = $parser->at_eos |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Returns true if the input string is at the end of the string. |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=cut |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
sub at_eos |
504
|
|
|
|
|
|
|
{ |
505
|
596
|
|
|
596
|
1
|
743
|
my $self = shift; |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
# Save pos() before skipping ws so we don't break the substring_before method |
508
|
596
|
|
|
|
|
797
|
my $pos = pos $self->{str}; |
509
|
|
|
|
|
|
|
|
510
|
596
|
|
|
|
|
1050
|
$self->skip_ws; |
511
|
|
|
|
|
|
|
|
512
|
596
|
|
|
|
|
632
|
my $at_eos; |
513
|
596
|
100
|
|
|
|
1186
|
if( pos( $self->{str} ) >= length $self->{str} ) { |
|
|
100
|
|
|
|
|
|
514
|
162
|
|
|
|
|
200
|
$at_eos = 1; |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
elsif( defined $self->{endofscope} ) { |
517
|
114
|
|
|
|
|
396
|
$at_eos = $self->{str} =~ m/\G$self->{endofscope}/; |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
else { |
520
|
320
|
|
|
|
|
357
|
$at_eos = 0; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
596
|
|
|
|
|
1022
|
pos( $self->{str} ) = $pos; |
524
|
|
|
|
|
|
|
|
525
|
596
|
|
|
|
|
1326
|
return $at_eos; |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=head2 scope_level |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
$level = $parser->scope_level |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
I |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
Returns the number of nested C calls that have been made. |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=cut |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
sub scope_level |
539
|
|
|
|
|
|
|
{ |
540
|
5
|
|
|
5
|
1
|
7
|
my $self = shift; |
541
|
5
|
|
|
|
|
34
|
return $self->{scope_level}; |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=head1 STRUCTURE-FORMING METHODS |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
The following methods may be used to build a grammatical structure out of the |
547
|
|
|
|
|
|
|
defined basic token-parsing methods. Each takes at least one code reference, |
548
|
|
|
|
|
|
|
which will be passed the actual C<$parser> object as its first argument. |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Anywhere that a code reference is expected also permits a plain string giving |
551
|
|
|
|
|
|
|
the name of a method to invoke. This is sufficient in many simple cases, such |
552
|
|
|
|
|
|
|
as |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
$self->any_of( |
555
|
|
|
|
|
|
|
'token_int', |
556
|
|
|
|
|
|
|
'token_string', |
557
|
|
|
|
|
|
|
... |
558
|
|
|
|
|
|
|
); |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=cut |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=head2 maybe |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
$ret = $parser->maybe( $code ) |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
Attempts to execute the given C<$code> in scalar context, and returns what it |
567
|
|
|
|
|
|
|
returned, accepting that it might fail. C<$code> may either be a CODE |
568
|
|
|
|
|
|
|
reference or a method name given as a string. |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
If the code fails (either by calling C itself, or by propagating a |
571
|
|
|
|
|
|
|
failure from another method it invoked) before it has invoked C, then |
572
|
|
|
|
|
|
|
none of the input string will be consumed; the current parsing position will |
573
|
|
|
|
|
|
|
be restored. C will be returned in this case. |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
If it calls C then any subsequent failure will be propagated to the |
576
|
|
|
|
|
|
|
caller, rather than returning C. |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
This may be considered to be similar to the C> regexp qualifier. |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
sub parse_declaration |
581
|
|
|
|
|
|
|
{ |
582
|
|
|
|
|
|
|
my $self = shift; |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
[ $self->parse_type, |
585
|
|
|
|
|
|
|
$self->token_ident, |
586
|
|
|
|
|
|
|
$self->maybe( sub { |
587
|
|
|
|
|
|
|
$self->expect( "=" ); |
588
|
|
|
|
|
|
|
$self->parse_expression |
589
|
|
|
|
|
|
|
} ), |
590
|
|
|
|
|
|
|
]; |
591
|
|
|
|
|
|
|
} |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=cut |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
sub maybe |
596
|
|
|
|
|
|
|
{ |
597
|
4
|
|
|
4
|
1
|
26
|
my $self = shift; |
598
|
4
|
|
|
|
|
7
|
my ( $code ) = @_; |
599
|
|
|
|
|
|
|
|
600
|
4
|
|
|
|
|
5
|
my $pos = pos $self->{str}; |
601
|
|
|
|
|
|
|
|
602
|
4
|
|
|
|
|
6
|
my $committed = 0; |
603
|
4
|
|
|
0
|
|
10
|
local $self->{committer} = sub { $committed++ }; |
|
0
|
|
|
|
|
0
|
|
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
try { |
606
|
|
|
|
|
|
|
return $self->$code; |
607
|
|
|
|
|
|
|
} |
608
|
4
|
|
|
|
|
10
|
catch ( $e ) { |
609
|
|
|
|
|
|
|
pos($self->{str}) = $pos; |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
612
|
|
|
|
|
|
|
return undef; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=head2 scope_of |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
$ret = $parser->scope_of( $start, $code, $stop ) |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Expects to find the C<$start> pattern, then attempts to execute the given |
621
|
|
|
|
|
|
|
C<$code>, then expects to find the C<$stop> pattern. Returns whatever the |
622
|
|
|
|
|
|
|
code returned. C<$code> may either be a CODE reference of a method name given |
623
|
|
|
|
|
|
|
as a string. |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
While the code is being executed, the C<$stop> pattern will be used by the |
626
|
|
|
|
|
|
|
token parsing methods as an end-of-scope marker; causing them to raise a |
627
|
|
|
|
|
|
|
failure if called at the end of a scope. |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
sub parse_block |
630
|
|
|
|
|
|
|
{ |
631
|
|
|
|
|
|
|
my $self = shift; |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
$self->scope_of( "{", 'parse_statements', "}" ); |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
If the C<$start> pattern is undefined, it is presumed the caller has already |
637
|
|
|
|
|
|
|
checked for this. This is useful when the stop pattern needs to be calculated |
638
|
|
|
|
|
|
|
based on the start pattern. |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
sub parse_bracketed |
641
|
|
|
|
|
|
|
{ |
642
|
|
|
|
|
|
|
my $self = shift; |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
my $delim = $self->expect( qr/[\(\[\<\{]/ ); |
645
|
|
|
|
|
|
|
$delim =~ tr/([<{/)]>}/; |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
$self->scope_of( undef, 'parse_body', $delim ); |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
This method does not have any optional parts to it; any failures are |
651
|
|
|
|
|
|
|
immediately propagated to the caller. |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=cut |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
sub scope_of |
656
|
|
|
|
|
|
|
{ |
657
|
19
|
|
|
19
|
1
|
93
|
my $self = shift; |
658
|
19
|
|
|
|
|
53
|
$self->_scope_of( 0, @_ ); |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
sub _scope_of |
662
|
|
|
|
|
|
|
{ |
663
|
73
|
|
|
73
|
|
79
|
my $self = shift; |
664
|
73
|
|
|
|
|
123
|
my ( $commit_if_started, $start, $code, $stop ) = @_; |
665
|
|
|
|
|
|
|
|
666
|
73
|
50
|
|
|
|
426
|
ref $stop or $stop = qr/\Q$stop/; |
667
|
|
|
|
|
|
|
|
668
|
73
|
100
|
|
|
|
249
|
$self->expect( $start ) if defined $start; |
669
|
|
|
|
|
|
|
|
670
|
34
|
100
|
|
|
|
108
|
$self->commit if $commit_if_started; |
671
|
|
|
|
|
|
|
|
672
|
34
|
|
|
|
|
66
|
local $self->{endofscope} = $stop; |
673
|
34
|
|
|
|
|
69
|
local $self->{scope_level} = $self->{scope_level} + 1; |
674
|
|
|
|
|
|
|
|
675
|
34
|
|
|
|
|
89
|
my $ret = $self->$code; |
676
|
|
|
|
|
|
|
|
677
|
31
|
|
|
|
|
87
|
$self->expect( $stop ); |
678
|
|
|
|
|
|
|
|
679
|
30
|
|
|
|
|
156
|
return $ret; |
680
|
|
|
|
|
|
|
} |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=head2 committed_scope_of |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
$ret = $parser->committed_scope_of( $start, $code, $stop ) |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
I |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
A variant of L that calls L after a successful match of |
689
|
|
|
|
|
|
|
the start pattern. This is usually what you want if using C from |
690
|
|
|
|
|
|
|
within an C choice, if no other alternative following this one could |
691
|
|
|
|
|
|
|
possibly match if the start pattern has. |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
=cut |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
sub committed_scope_of |
696
|
|
|
|
|
|
|
{ |
697
|
54
|
|
|
54
|
1
|
70
|
my $self = shift; |
698
|
54
|
|
|
|
|
99
|
$self->_scope_of( 1, @_ ); |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=head2 list_of |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
$ret = $parser->list_of( $sep, $code ) |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
Expects to find a list of instances of something parsed by C<$code>, |
706
|
|
|
|
|
|
|
separated by the C<$sep> pattern. Returns an ARRAY ref containing a list of |
707
|
|
|
|
|
|
|
the return values from the C<$code>. A single trailing delimiter is allowed, |
708
|
|
|
|
|
|
|
and does not affect the return value. C<$code> may either be a CODE reference |
709
|
|
|
|
|
|
|
or a method name given as a string. It is called in list context, and whatever |
710
|
|
|
|
|
|
|
values it returns are appended to the eventual result - similar to perl's |
711
|
|
|
|
|
|
|
C |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
This method does not consider it an error if the returned list is empty; that |
714
|
|
|
|
|
|
|
is, that the scope ended before any item instances were parsed from it. |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
sub parse_numbers |
717
|
|
|
|
|
|
|
{ |
718
|
|
|
|
|
|
|
my $self = shift; |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
$self->list_of( ",", 'token_int' ); |
721
|
|
|
|
|
|
|
} |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
If the code fails (either by invoking C itself, or by propagating a |
724
|
|
|
|
|
|
|
failure from another method it invoked) before it has invoked C on a |
725
|
|
|
|
|
|
|
particular item, then the item is aborted and the parsing position will be |
726
|
|
|
|
|
|
|
restored to the beginning of that failed item. The list of results from |
727
|
|
|
|
|
|
|
previous successful attempts will be returned. |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
If it calls C within an item then any subsequent failure for that item |
730
|
|
|
|
|
|
|
will cause the entire C to fail, propagating that to the caller. |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
=cut |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
sub list_of |
735
|
|
|
|
|
|
|
{ |
736
|
72
|
|
|
72
|
1
|
128
|
my $self = shift; |
737
|
72
|
|
|
|
|
108
|
my ( $sep, $code ) = @_; |
738
|
|
|
|
|
|
|
|
739
|
72
|
100
|
33
|
|
|
207
|
ref $sep or $sep = qr/\Q$sep/ if defined $sep; |
740
|
|
|
|
|
|
|
|
741
|
72
|
|
|
|
|
81
|
my $committed; |
742
|
72
|
|
|
14
|
|
226
|
local $self->{committer} = sub { $committed++ }; |
|
14
|
|
|
|
|
22
|
|
743
|
|
|
|
|
|
|
|
744
|
72
|
|
|
|
|
99
|
my @ret; |
745
|
|
|
|
|
|
|
|
746
|
72
|
|
|
|
|
140
|
while( !$self->at_eos ) { |
747
|
127
|
|
|
|
|
176
|
$committed = 0; |
748
|
127
|
|
|
|
|
154
|
my $pos = pos $self->{str}; |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
try { |
751
|
|
|
|
|
|
|
push @ret, $self->$code; |
752
|
|
|
|
|
|
|
next; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
catch ( $e ) { |
755
|
|
|
|
|
|
|
pos($self->{str}) = $pos; |
756
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
last; |
759
|
|
|
|
|
|
|
} |
760
|
127
|
|
|
|
|
212
|
} |
761
|
|
|
|
|
|
|
continue { |
762
|
103
|
100
|
|
|
|
224
|
if( defined $sep ) { |
763
|
32
|
|
|
|
|
58
|
$self->skip_ws; |
764
|
32
|
100
|
|
|
|
145
|
$self->{str} =~ m/\G$sep/gc or last; |
765
|
|
|
|
|
|
|
} |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
|
768
|
71
|
|
|
|
|
247
|
return \@ret; |
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=head2 sequence_of |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
$ret = $parser->sequence_of( $code ) |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
A shortcut for calling C with an empty string as separator; expects |
776
|
|
|
|
|
|
|
to find at least one instance of something parsed by C<$code>, separated only |
777
|
|
|
|
|
|
|
by skipped whitespace. |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
This may be considered to be similar to the C<+> or C<*> regexp qualifiers. |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
sub parse_statements |
782
|
|
|
|
|
|
|
{ |
783
|
|
|
|
|
|
|
my $self = shift; |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
$self->sequence_of( 'parse_statement' ); |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
The interaction of failures in the code and the C method is identical |
789
|
|
|
|
|
|
|
to that of C. |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=cut |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
sub sequence_of |
794
|
|
|
|
|
|
|
{ |
795
|
56
|
|
|
56
|
1
|
169
|
my $self = shift; |
796
|
56
|
|
|
|
|
91
|
my ( $code ) = @_; |
797
|
|
|
|
|
|
|
|
798
|
56
|
|
|
|
|
125
|
$self->list_of( undef, $code ); |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
=head2 any_of |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
$ret = $parser->any_of( @codes ) |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
I |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
Expects that one of the given code instances can parse something from the |
808
|
|
|
|
|
|
|
input, returning what it returned. Each code instance may indicate a failure |
809
|
|
|
|
|
|
|
to parse by calling the C method or otherwise propagating a failure. |
810
|
|
|
|
|
|
|
Each code instance may either be a CODE reference or a method name given as a |
811
|
|
|
|
|
|
|
string. |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
This may be considered to be similar to the C<|> regexp operator for forming |
814
|
|
|
|
|
|
|
alternations of possible parse trees. |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
sub parse_statement |
817
|
|
|
|
|
|
|
{ |
818
|
|
|
|
|
|
|
my $self = shift; |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
$self->any_of( |
821
|
|
|
|
|
|
|
sub { $self->parse_declaration; $self->expect(";") }, |
822
|
|
|
|
|
|
|
sub { $self->parse_expression; $self->expect(";") }, |
823
|
|
|
|
|
|
|
sub { $self->parse_block }, |
824
|
|
|
|
|
|
|
); |
825
|
|
|
|
|
|
|
} |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
If the code for a given choice fails (either by invoking C itself, or by |
828
|
|
|
|
|
|
|
propagating a failure from another method it invoked) before it has invoked |
829
|
|
|
|
|
|
|
C itself, then the parsing position restored and the next choice will |
830
|
|
|
|
|
|
|
be attempted. |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
If it calls C then any subsequent failure for that choice will cause |
833
|
|
|
|
|
|
|
the entire C to fail, propagating that to the caller and no further |
834
|
|
|
|
|
|
|
choices will be attempted. |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
If none of the choices match then a simple failure message is printed: |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
Found nothing parseable |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
As this is unlikely to be helpful to users, a better message can be provided |
841
|
|
|
|
|
|
|
by the final choice instead. Don't forget to C before printing the |
842
|
|
|
|
|
|
|
failure message, or it won't count. |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
$self->any_of( |
845
|
|
|
|
|
|
|
'token_int', |
846
|
|
|
|
|
|
|
'token_string', |
847
|
|
|
|
|
|
|
..., |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
sub { $self->commit; $self->fail( "Expected an int or string" ) } |
850
|
|
|
|
|
|
|
); |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=cut |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
sub any_of |
855
|
|
|
|
|
|
|
{ |
856
|
218
|
|
|
218
|
1
|
379
|
my $self = shift; |
857
|
|
|
|
|
|
|
|
858
|
218
|
|
|
|
|
399
|
while( @_ ) { |
859
|
433
|
|
|
|
|
536
|
my $code = shift; |
860
|
433
|
|
|
|
|
508
|
my $pos = pos $self->{str}; |
861
|
|
|
|
|
|
|
|
862
|
433
|
|
|
|
|
457
|
my $committed = 0; |
863
|
433
|
|
|
55
|
|
1091
|
local $self->{committer} = sub { $committed++ }; |
|
55
|
|
|
|
|
80
|
|
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
try { |
866
|
|
|
|
|
|
|
return $self->$code; |
867
|
|
|
|
|
|
|
} |
868
|
433
|
|
|
|
|
741
|
catch ( $e ) { |
869
|
|
|
|
|
|
|
pos( $self->{str} ) = $pos; |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
872
|
|
|
|
|
|
|
} |
873
|
|
|
|
|
|
|
} |
874
|
|
|
|
|
|
|
|
875
|
12
|
|
|
|
|
26
|
$self->fail( "Found nothing parseable" ); |
876
|
|
|
|
|
|
|
} |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
sub one_of { |
879
|
0
|
|
|
0
|
0
|
0
|
croak "Parser::MGC->one_of is deprecated; use ->any_of instead"; |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
=head2 commit |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
$parser->commit |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
Calling this method will cancel the backtracking behaviour of the innermost |
887
|
|
|
|
|
|
|
C, C, C, or C structure forming method. |
888
|
|
|
|
|
|
|
That is, if later code then calls C, the exception will be propagated |
889
|
|
|
|
|
|
|
out of C, no further list items will be attempted by C or |
890
|
|
|
|
|
|
|
C, and no further code blocks will be attempted by C. |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
Typically this will be called once the grammatical structure alter has been |
893
|
|
|
|
|
|
|
determined, ensuring that any further failures are raised as real exceptions, |
894
|
|
|
|
|
|
|
rather than by attempting other alternatives. |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
sub parse_statement |
897
|
|
|
|
|
|
|
{ |
898
|
|
|
|
|
|
|
my $self = shift; |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
$self->any_of( |
901
|
|
|
|
|
|
|
... |
902
|
|
|
|
|
|
|
sub { |
903
|
|
|
|
|
|
|
$self->scope_of( "{", |
904
|
|
|
|
|
|
|
sub { $self->commit; $self->parse_statements; }, |
905
|
|
|
|
|
|
|
"}" ), |
906
|
|
|
|
|
|
|
}, |
907
|
|
|
|
|
|
|
); |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
Though in this common pattern, L may be used instead. |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
=cut |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
sub commit |
915
|
|
|
|
|
|
|
{ |
916
|
69
|
|
|
69
|
1
|
101
|
my $self = shift; |
917
|
69
|
50
|
|
|
|
117
|
if( $self->{committer} ) { |
918
|
69
|
|
|
|
|
108
|
$self->{committer}->(); |
919
|
|
|
|
|
|
|
} |
920
|
|
|
|
|
|
|
else { |
921
|
0
|
|
|
|
|
0
|
croak "Cannot commit except within a backtrack-able structure"; |
922
|
|
|
|
|
|
|
} |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
=head1 TOKEN PARSING METHODS |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
The following methods attempt to consume some part of the input string, to be |
928
|
|
|
|
|
|
|
used as part of the parsing process. |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
=cut |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
sub skip_ws |
933
|
|
|
|
|
|
|
{ |
934
|
1283
|
|
|
1283
|
0
|
1368
|
my $self = shift; |
935
|
|
|
|
|
|
|
|
936
|
1283
|
|
|
|
|
1476
|
my $pattern = $self->{patterns}{_skip}; |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
{ |
939
|
1283
|
|
|
|
|
1283
|
1 while $self->{str} =~ m/\G$pattern/gc; |
|
1286
|
|
|
|
|
5533
|
|
940
|
|
|
|
|
|
|
|
941
|
1286
|
100
|
|
|
|
2800
|
return if pos( $self->{str} ) < length $self->{str}; |
942
|
|
|
|
|
|
|
|
943
|
222
|
100
|
|
|
|
483
|
return unless $self->{reader}; |
944
|
|
|
|
|
|
|
|
945
|
4
|
|
|
|
|
9
|
my $more = $self->{reader}->( $self ); |
946
|
4
|
100
|
|
|
|
12
|
if( defined $more ) { |
947
|
3
|
|
|
|
|
5
|
my $pos = pos( $self->{str} ); |
948
|
3
|
|
|
|
|
7
|
$self->{str} .= $more; |
949
|
3
|
|
|
|
|
4
|
pos( $self->{str} ) = $pos; |
950
|
|
|
|
|
|
|
|
951
|
3
|
|
|
|
|
7
|
redo; |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
|
954
|
1
|
|
|
|
|
3
|
undef $self->{reader}; |
955
|
1
|
|
|
|
|
1
|
return; |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
} |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=head2 expect |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
$str = $parser->expect( $literal ) |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
$str = $parser->expect( qr/pattern/ ) |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
@groups = $parser->expect( qr/pattern/ ) |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
Expects to find a literal string or regexp pattern match, and consumes it. |
968
|
|
|
|
|
|
|
In scalar context, this method returns the string that was captured. In list |
969
|
|
|
|
|
|
|
context it returns the matching substring and the contents of any subgroups |
970
|
|
|
|
|
|
|
contained in the pattern. |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
This method will raise a parse error (by calling C) if the regexp fails |
973
|
|
|
|
|
|
|
to match. Note that if the pattern could match an empty string (such as for |
974
|
|
|
|
|
|
|
example C), the pattern will always match, even if it has to match an |
975
|
|
|
|
|
|
|
empty string. This method will not consider a failure if the regexp matches |
976
|
|
|
|
|
|
|
with zero-width. |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
=head2 maybe_expect |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
$str = $parser->maybe_expect( ... ) |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
@groups = $parser->maybe_expect( ... ) |
983
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
I |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
A convenient shortcut equivalent to calling C within C, but |
987
|
|
|
|
|
|
|
implemented more efficiently, avoiding the exception-handling set up by |
988
|
|
|
|
|
|
|
C. Returns C or an empty list if the match fails. |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
=cut |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
sub maybe_expect |
993
|
|
|
|
|
|
|
{ |
994
|
371
|
|
|
371
|
1
|
461
|
my $self = shift; |
995
|
371
|
|
|
|
|
482
|
my ( $expect ) = @_; |
996
|
|
|
|
|
|
|
|
997
|
371
|
100
|
|
|
|
624
|
ref $expect or $expect = qr/\Q$expect/; |
998
|
|
|
|
|
|
|
|
999
|
371
|
|
|
|
|
637
|
$self->skip_ws; |
1000
|
371
|
100
|
|
|
|
3512
|
$self->{str} =~ m/\G$expect/gc or return; |
1001
|
|
|
|
|
|
|
|
1002
|
210
|
100
|
|
|
|
1098
|
return substr( $self->{str}, $-[0], $+[0]-$-[0] ) if !wantarray; |
1003
|
22
|
100
|
|
|
|
56
|
return map { defined $-[$_] ? substr( $self->{str}, $-[$_], $+[$_]-$-[$_] ) : undef } 0 .. $#+; |
|
37
|
|
|
|
|
226
|
|
1004
|
|
|
|
|
|
|
} |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
sub expect |
1007
|
|
|
|
|
|
|
{ |
1008
|
355
|
|
|
355
|
1
|
544
|
my $self = shift; |
1009
|
355
|
|
|
|
|
507
|
my ( $expect ) = @_; |
1010
|
|
|
|
|
|
|
|
1011
|
355
|
100
|
|
|
|
2156
|
ref $expect or $expect = qr/\Q$expect/; |
1012
|
|
|
|
|
|
|
|
1013
|
355
|
100
|
|
|
|
657
|
if( wantarray ) { |
1014
|
31
|
100
|
|
|
|
64
|
my @ret = $self->maybe_expect( $expect ) or |
1015
|
|
|
|
|
|
|
$self->fail( "Expected $expect" ); |
1016
|
20
|
|
|
|
|
80
|
return @ret; |
1017
|
|
|
|
|
|
|
} |
1018
|
|
|
|
|
|
|
else { |
1019
|
324
|
100
|
|
|
|
553
|
defined( my $ret = $self->maybe_expect( $expect ) ) or |
1020
|
|
|
|
|
|
|
$self->fail( "Expected $expect" ); |
1021
|
185
|
|
|
|
|
502
|
return $ret; |
1022
|
|
|
|
|
|
|
} |
1023
|
|
|
|
|
|
|
} |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
=head2 substring_before |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
$str = $parser->substring_before( $literal ) |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
$str = $parser->substring_before( qr/pattern/ ) |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
I |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
Expects to possibly find a literal string or regexp pattern match. If it finds |
1034
|
|
|
|
|
|
|
such, consume all the input text before but excluding this match, and return |
1035
|
|
|
|
|
|
|
it. If it fails to find a match before the end of the current scope, consumes |
1036
|
|
|
|
|
|
|
all the input text until the end of scope and return it. |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
This method does not consume the part of input that matches, only the text |
1039
|
|
|
|
|
|
|
before it. It is not considered a failure if the substring before this match |
1040
|
|
|
|
|
|
|
is empty. If a non-empty match is required, use the C method: |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
sub token_nonempty_part |
1043
|
|
|
|
|
|
|
{ |
1044
|
|
|
|
|
|
|
my $self = shift; |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
my $str = $parser->substring_before( "," ); |
1047
|
|
|
|
|
|
|
length $str or $self->fail( "Expected a string fragment before ," ); |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
return $str; |
1050
|
|
|
|
|
|
|
} |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
Note that unlike most of the other token parsing methods, this method does not |
1053
|
|
|
|
|
|
|
consume either leading or trailing whitespace around the substring. It is |
1054
|
|
|
|
|
|
|
expected that this method would be used as part a parser to read quoted |
1055
|
|
|
|
|
|
|
strings, or similar cases where whitespace should be preserved. |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
=head2 nonempty_substring_before |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
$str = $parser->nonempty_substring_before( $literal ) |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
$str = $parser->nonempty_substring_before( qr/pattern/ ) |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
I |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
A variant of L which fails if the matched part is empty. |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
The example above could have been written: |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
sub token_nonempty_part |
1070
|
|
|
|
|
|
|
{ |
1071
|
|
|
|
|
|
|
my $self = shift; |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
return $parser->nonempty_substring_before( "," ); |
1074
|
|
|
|
|
|
|
} |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
This is often useful for breaking out of repeating loops; e.g. |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
sub token_escaped_string |
1079
|
|
|
|
|
|
|
{ |
1080
|
|
|
|
|
|
|
my $self = shift; |
1081
|
|
|
|
|
|
|
$self->expect( '"' ); |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
my $ret = ""; |
1084
|
|
|
|
|
|
|
1 while $self->any_of( |
1085
|
|
|
|
|
|
|
sub { $ret .= $self->nonempty_substring_before( qr/%|$/m ); 1 } |
1086
|
|
|
|
|
|
|
sub { my $escape = ( $self->expect( qr/%(.)/ ) )[1]; |
1087
|
|
|
|
|
|
|
$ret .= _handle_escape( $escape ); |
1088
|
|
|
|
|
|
|
1 }, |
1089
|
|
|
|
|
|
|
sub { 0 }, |
1090
|
|
|
|
|
|
|
) |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
return $ret; |
1093
|
|
|
|
|
|
|
} |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
=cut |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
sub _substring_before |
1098
|
|
|
|
|
|
|
{ |
1099
|
41
|
|
|
41
|
|
50
|
my $self = shift; |
1100
|
41
|
|
|
|
|
65
|
my ( $expect, $fail_if_empty ) = @_; |
1101
|
|
|
|
|
|
|
|
1102
|
41
|
100
|
|
|
|
150
|
ref $expect or $expect = qr/\Q$expect/; |
1103
|
|
|
|
|
|
|
|
1104
|
41
|
100
|
|
|
|
131
|
my $endre = ( defined $self->{endofscope} ) ? |
1105
|
|
|
|
|
|
|
qr/$expect|$self->{endofscope}/ : |
1106
|
|
|
|
|
|
|
$expect; |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
# NO skip_ws |
1109
|
|
|
|
|
|
|
|
1110
|
41
|
|
|
|
|
59
|
my $start = pos $self->{str}; |
1111
|
41
|
|
|
|
|
43
|
my $end; |
1112
|
41
|
100
|
|
|
|
422
|
if( $self->{str} =~ m/\G(?s:.*?)($endre)/ ) { |
1113
|
38
|
|
|
|
|
94
|
$end = $-[1]; |
1114
|
|
|
|
|
|
|
} |
1115
|
|
|
|
|
|
|
else { |
1116
|
3
|
|
|
|
|
8
|
$end = length $self->{str}; |
1117
|
|
|
|
|
|
|
} |
1118
|
|
|
|
|
|
|
|
1119
|
41
|
100
|
66
|
|
|
106
|
$self->fail( "Expected to find a non-empty substring before $expect" ) |
1120
|
|
|
|
|
|
|
if $fail_if_empty and $end == $start; |
1121
|
|
|
|
|
|
|
|
1122
|
40
|
|
|
|
|
92
|
return $self->take( $end - $start ); |
1123
|
|
|
|
|
|
|
} |
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
sub substring_before |
1126
|
|
|
|
|
|
|
{ |
1127
|
40
|
|
|
40
|
1
|
63
|
my $self = shift; |
1128
|
40
|
|
|
|
|
71
|
return $self->_substring_before( $_[0], 0 ); |
1129
|
|
|
|
|
|
|
} |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
sub nonempty_substring_before |
1132
|
|
|
|
|
|
|
{ |
1133
|
1
|
|
|
1
|
1
|
9
|
my $self = shift; |
1134
|
1
|
|
|
|
|
13
|
return $self->_substring_before( $_[0], 1 ); |
1135
|
|
|
|
|
|
|
} |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
=head2 generic_token |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
$val = $parser->generic_token( $name, $re, $convert ) |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
I |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
Expects to find a token matching the precompiled regexp C<$re>. If provided, |
1144
|
|
|
|
|
|
|
the C<$convert> CODE reference can be used to convert the string into a more |
1145
|
|
|
|
|
|
|
convenient form. C<$name> is used in the failure message if the pattern fails |
1146
|
|
|
|
|
|
|
to match. |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
If provided, the C<$convert> function will be passed the parser and the |
1149
|
|
|
|
|
|
|
matching substring; the value it returns is returned from C. |
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
$convert->( $parser, $substr ) |
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
If not provided, the substring will be returned as it stands. |
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
This method is mostly provided for subclasses to define their own token types. |
1156
|
|
|
|
|
|
|
For example: |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
sub token_hex |
1159
|
|
|
|
|
|
|
{ |
1160
|
|
|
|
|
|
|
my $self = shift; |
1161
|
|
|
|
|
|
|
$self->generic_token( hex => qr/[0-9A-F]{2}h/, sub { hex $_[1] } ); |
1162
|
|
|
|
|
|
|
} |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
=cut |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
sub generic_token |
1167
|
|
|
|
|
|
|
{ |
1168
|
230
|
|
|
230
|
1
|
273
|
my $self = shift; |
1169
|
230
|
|
|
|
|
359
|
my ( $name, $re, $convert ) = @_; |
1170
|
|
|
|
|
|
|
|
1171
|
230
|
50
|
|
|
|
377
|
$self->fail( "Expected $name" ) if $self->at_eos; |
1172
|
|
|
|
|
|
|
|
1173
|
230
|
|
|
|
|
432
|
$self->skip_ws; |
1174
|
230
|
100
|
|
|
|
2985
|
$self->{str} =~ m/\G$re/gc or |
1175
|
|
|
|
|
|
|
$self->fail( "Expected $name" ); |
1176
|
|
|
|
|
|
|
|
1177
|
169
|
|
|
|
|
762
|
my $match = substr( $self->{str}, $-[0], $+[0] - $-[0] ); |
1178
|
|
|
|
|
|
|
|
1179
|
169
|
100
|
|
|
|
558
|
return $convert ? $convert->( $self, $match ) : $match; |
1180
|
|
|
|
|
|
|
} |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
sub _token_generic |
1183
|
|
|
|
|
|
|
{ |
1184
|
226
|
|
|
226
|
|
270
|
my $self = shift; |
1185
|
226
|
|
|
|
|
559
|
my %args = @_; |
1186
|
|
|
|
|
|
|
|
1187
|
226
|
|
|
|
|
311
|
my $name = $args{name}; |
1188
|
226
|
50
|
|
|
|
501
|
my $re = $args{pattern} ? $self->{patterns}{ $args{pattern} } : $args{re}; |
1189
|
226
|
|
|
|
|
257
|
my $convert = $args{convert}; |
1190
|
|
|
|
|
|
|
|
1191
|
226
|
|
|
|
|
433
|
$self->generic_token( $name, $re, $convert ); |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
=head2 token_int |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
$int = $parser->token_int |
1197
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
Expects to find an integer in decimal, octal or hexadecimal notation, and |
1199
|
|
|
|
|
|
|
consumes it. Negative integers, preceeded by C<->, are also recognised. |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
=cut |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
sub token_int |
1204
|
|
|
|
|
|
|
{ |
1205
|
134
|
|
|
134
|
1
|
289
|
my $self = shift; |
1206
|
|
|
|
|
|
|
$self->_token_generic( |
1207
|
|
|
|
|
|
|
name => "int", |
1208
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
pattern => "int", |
1210
|
|
|
|
|
|
|
convert => sub { |
1211
|
104
|
|
|
104
|
|
151
|
my $int = $_[1]; |
1212
|
104
|
100
|
|
|
|
213
|
my $sign = ( $int =~ s/^-// ) ? -1 : 1; |
1213
|
|
|
|
|
|
|
|
1214
|
104
|
|
|
|
|
126
|
$int =~ s/^0o/0/; |
1215
|
|
|
|
|
|
|
|
1216
|
104
|
100
|
|
|
|
227
|
return $sign * oct $int if $int =~ m/^0/; |
1217
|
99
|
|
|
|
|
659
|
return $sign * $int; |
1218
|
|
|
|
|
|
|
}, |
1219
|
134
|
|
|
|
|
463
|
); |
1220
|
|
|
|
|
|
|
} |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
=head2 token_float |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
$float = $parser->token_float |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
I |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
Expects to find a number expressed in floating-point notation; a sequence of |
1229
|
|
|
|
|
|
|
digits possibly prefixed by C<->, possibly containing a decimal point, |
1230
|
|
|
|
|
|
|
possibly followed by an exponent specified by C followed by an integer. The |
1231
|
|
|
|
|
|
|
numerical value is then returned. |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
=cut |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
sub token_float |
1236
|
|
|
|
|
|
|
{ |
1237
|
20
|
|
|
20
|
1
|
59
|
my $self = shift; |
1238
|
|
|
|
|
|
|
$self->_token_generic( |
1239
|
|
|
|
|
|
|
name => "float", |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
pattern => "float", |
1242
|
18
|
|
|
18
|
|
122
|
convert => sub { $_[1] + 0 }, |
1243
|
20
|
|
|
|
|
76
|
); |
1244
|
|
|
|
|
|
|
} |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
=head2 token_number |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
$number = $parser->token_number |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
I |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
Expects to find a number expressed in either of the above forms. |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
=cut |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
sub token_number |
1257
|
|
|
|
|
|
|
{ |
1258
|
7
|
|
|
7
|
1
|
28
|
my $self = shift; |
1259
|
7
|
|
|
|
|
21
|
$self->any_of( \&token_float, \&token_int ); |
1260
|
|
|
|
|
|
|
} |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
=head2 token_string |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
$str = $parser->token_string |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
Expects to find a quoted string, and consumes it. The string should be quoted |
1267
|
|
|
|
|
|
|
using C<"> or C<'> quote marks. |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
The content of the quoted string can contain character escapes similar to |
1270
|
|
|
|
|
|
|
those accepted by C or Perl. Specifically, the following forms are recognised: |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
\a Bell ("alert") |
1273
|
|
|
|
|
|
|
\b Backspace |
1274
|
|
|
|
|
|
|
\e Escape |
1275
|
|
|
|
|
|
|
\f Form feed |
1276
|
|
|
|
|
|
|
\n Newline |
1277
|
|
|
|
|
|
|
\r Return |
1278
|
|
|
|
|
|
|
\t Horizontal Tab |
1279
|
|
|
|
|
|
|
\0, \012 Octal character |
1280
|
|
|
|
|
|
|
\x34, \x{5678} Hexadecimal character |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
C's C<\v> for vertical tab is not supported as it is rarely used in practice |
1283
|
|
|
|
|
|
|
and it collides with Perl's C<\v> regexp escape. Perl's C<\c> for forming other |
1284
|
|
|
|
|
|
|
control characters is also not supported. |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
=cut |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
my %escapes = ( |
1289
|
|
|
|
|
|
|
a => "\a", |
1290
|
|
|
|
|
|
|
b => "\b", |
1291
|
|
|
|
|
|
|
e => "\e", |
1292
|
|
|
|
|
|
|
f => "\f", |
1293
|
|
|
|
|
|
|
n => "\n", |
1294
|
|
|
|
|
|
|
r => "\r", |
1295
|
|
|
|
|
|
|
t => "\t", |
1296
|
|
|
|
|
|
|
); |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
sub token_string |
1299
|
|
|
|
|
|
|
{ |
1300
|
53
|
|
|
53
|
1
|
137
|
my $self = shift; |
1301
|
|
|
|
|
|
|
|
1302
|
53
|
100
|
|
|
|
102
|
$self->fail( "Expected string" ) if $self->at_eos; |
1303
|
|
|
|
|
|
|
|
1304
|
52
|
|
|
|
|
115
|
my $pos = pos $self->{str}; |
1305
|
|
|
|
|
|
|
|
1306
|
52
|
|
|
|
|
109
|
$self->skip_ws; |
1307
|
52
|
100
|
|
|
|
347
|
$self->{str} =~ m/\G($self->{patterns}{string_delim})/gc or |
1308
|
|
|
|
|
|
|
$self->fail( "Expected string delimiter" ); |
1309
|
|
|
|
|
|
|
|
1310
|
32
|
|
|
|
|
89
|
my $delim = $1; |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
$self->{str} =~ m/ |
1313
|
|
|
|
|
|
|
\G( |
1314
|
|
|
|
|
|
|
(?: |
1315
|
|
|
|
|
|
|
\\[0-7]{1,3} # octal escape |
1316
|
|
|
|
|
|
|
|\\x[0-9A-F]{2} # 2-digit hex escape |
1317
|
|
|
|
|
|
|
|\\x\{[0-9A-F]+\} # {}-delimited hex escape |
1318
|
|
|
|
|
|
|
|\\. # symbolic escape |
1319
|
|
|
|
|
|
|
|[^\\$delim]+ # plain chunk |
1320
|
|
|
|
|
|
|
)*? |
1321
|
|
|
|
|
|
|
)$delim/gcix or |
1322
|
32
|
50
|
|
|
|
780
|
pos($self->{str}) = $pos, $self->fail( "Expected contents of string" ); |
1323
|
|
|
|
|
|
|
|
1324
|
32
|
|
|
|
|
84
|
my $string = $1; |
1325
|
|
|
|
|
|
|
|
1326
|
32
|
|
|
|
|
80
|
$string =~ s<\\(?:([0-7]{1,3})|x([0-9A-F]{2})|x\{([0-9A-F]+)\}|(.))> |
1327
|
|
|
|
|
|
|
[defined $1 ? chr oct $1 : |
1328
|
|
|
|
|
|
|
defined $2 ? chr hex $2 : |
1329
|
11
|
50
|
|
|
|
67
|
defined $3 ? chr hex $3 : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
exists $escapes{$4} ? $escapes{$4} : $4]egi; |
1331
|
32
|
|
|
|
|
101
|
|
1332
|
|
|
|
|
|
|
return $string; |
1333
|
|
|
|
|
|
|
} |
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
=head2 token_ident |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
$ident = $parser->token_ident |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
Expects to find an identifier, and consumes it. |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
=cut |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
sub token_ident |
1344
|
72
|
|
|
72
|
1
|
121
|
{ |
1345
|
72
|
|
|
|
|
123
|
my $self = shift; |
1346
|
|
|
|
|
|
|
$self->_token_generic( |
1347
|
|
|
|
|
|
|
name => "ident", |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
pattern => "ident", |
1350
|
|
|
|
|
|
|
); |
1351
|
|
|
|
|
|
|
} |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
=head2 token_kw |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
$keyword = $parser->token_kw( @keywords ) |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
Expects to find a keyword, and consumes it. A keyword is defined as an |
1358
|
|
|
|
|
|
|
identifier which is exactly one of the literal values passed in. |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=cut |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
sub token_kw |
1363
|
2
|
|
|
2
|
1
|
9
|
{ |
1364
|
2
|
|
|
|
|
5
|
my $self = shift; |
1365
|
|
|
|
|
|
|
my @acceptable = @_; |
1366
|
2
|
|
|
|
|
6
|
|
1367
|
|
|
|
|
|
|
$self->skip_ws; |
1368
|
2
|
|
|
|
|
3
|
|
1369
|
|
|
|
|
|
|
my $pos = pos $self->{str}; |
1370
|
2
|
50
|
|
|
|
7
|
|
1371
|
|
|
|
|
|
|
defined( my $kw = $self->token_ident ) or |
1372
|
|
|
|
|
|
|
return undef; |
1373
|
4
|
|
|
|
|
17
|
|
1374
|
2
|
100
|
|
|
|
3
|
grep { $_ eq $kw } @acceptable or |
1375
|
|
|
|
|
|
|
pos($self->{str}) = $pos, $self->fail( "Expected any of ".join( ", ", @acceptable ) ); |
1376
|
1
|
|
|
|
|
3
|
|
1377
|
|
|
|
|
|
|
return $kw; |
1378
|
|
|
|
|
|
|
} |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
package # hide from indexer |
1381
|
|
|
|
|
|
|
Parser::MGC::Failure; |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
sub new |
1384
|
273
|
|
|
273
|
|
319
|
{ |
1385
|
273
|
|
|
|
|
442
|
my $class = shift; |
1386
|
273
|
|
|
|
|
459
|
my $self = bless {}, $class; |
|
273
|
|
|
|
|
1107
|
|
1387
|
273
|
|
|
|
|
1114
|
@{$self}{qw( message parser pos )} = @_; |
1388
|
|
|
|
|
|
|
return $self; |
1389
|
|
|
|
|
|
|
} |
1390
|
31
|
|
|
31
|
|
25447
|
|
|
31
|
|
|
|
|
19299
|
|
|
31
|
|
|
|
|
200
|
|
1391
|
|
|
|
|
|
|
use overload '""' => "STRING"; |
1392
|
|
|
|
|
|
|
sub STRING |
1393
|
43
|
|
|
43
|
|
3444
|
{ |
1394
|
|
|
|
|
|
|
my $self = shift; |
1395
|
43
|
|
|
|
|
88
|
|
1396
|
43
|
|
|
|
|
164
|
my $parser = $self->{parser}; |
1397
|
|
|
|
|
|
|
my ( $linenum, $col, $text ) = $parser->where( $self->{pos} ); |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
# Column number only counts characters. There may be tabs in there. |
1400
|
|
|
|
|
|
|
# Rather than trying to calculate the visual column number, just print the |
1401
|
|
|
|
|
|
|
# indentation as it stands. |
1402
|
43
|
|
|
|
|
85
|
|
1403
|
43
|
|
|
|
|
122
|
my $indent = substr( $text, 0, $col ); |
1404
|
|
|
|
|
|
|
$indent =~ s/[^ \t]/ /g; # blank out all the non-whitespace |
1405
|
43
|
|
|
|
|
144
|
|
1406
|
43
|
100
|
100
|
|
|
152
|
my $filename = $parser->filename; |
1407
|
|
|
|
|
|
|
my $in_file = ( defined $filename and !ref $filename ) |
1408
|
|
|
|
|
|
|
? "in $filename " : ""; |
1409
|
43
|
|
|
|
|
422
|
|
1410
|
|
|
|
|
|
|
return "$self->{message} ${in_file}on line $linenum at:\n" . |
1411
|
|
|
|
|
|
|
"$text\n" . |
1412
|
|
|
|
|
|
|
"$indent^\n"; |
1413
|
|
|
|
|
|
|
} |
1414
|
|
|
|
|
|
|
|
1415
|
31
|
|
|
31
|
|
6153
|
# Provide fallback operators for cmp, eq, etc... |
|
31
|
|
|
|
|
58
|
|
|
31
|
|
|
|
|
137
|
|
1416
|
|
|
|
|
|
|
use overload fallback => 1; |
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
=head1 EXAMPLES |
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
=head2 Accumulating Results Using Variables |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
Although the structure-forming methods all return a value, obtained from their |
1423
|
|
|
|
|
|
|
nested parsing code, it can sometimes be more convenient to use a variable to |
1424
|
|
|
|
|
|
|
accumulate a result in instead. For example, consider the following parser |
1425
|
|
|
|
|
|
|
method, designed to parse a set of C assignments, such as might |
1426
|
|
|
|
|
|
|
be found in a configuration file, or YAML/JSON-style mapping value. |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
sub parse_dict |
1429
|
|
|
|
|
|
|
{ |
1430
|
|
|
|
|
|
|
my $self = shift; |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
my %ret; |
1433
|
|
|
|
|
|
|
$self->list_of( ",", sub { |
1434
|
|
|
|
|
|
|
my $key = $self->token_ident; |
1435
|
|
|
|
|
|
|
exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" ); |
1436
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
$self->expect( ":" ); |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
$ret{$key} = $self->parse_value; |
1440
|
|
|
|
|
|
|
} ); |
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
return \%ret |
1443
|
|
|
|
|
|
|
} |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
Instead of using the return value from C, this method accumulates |
1446
|
|
|
|
|
|
|
values in the C<%ret> hash, eventually returning a reference to it as its |
1447
|
|
|
|
|
|
|
result. Because of this, it can perform some error checking while it parses; |
1448
|
|
|
|
|
|
|
namely, rejecting duplicate keys. |
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
=head1 TODO |
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
=over 4 |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
=item * |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
Make unescaping of string constants more customisable. Possibly consider |
1457
|
|
|
|
|
|
|
instead a C using a loop over C. |
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
=item * |
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
Easy ability for subclasses to define more token types as methods. Perhaps |
1462
|
|
|
|
|
|
|
provide a class method such as |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
__PACKAGE__->has_token( hex => qr/[0-9A-F]+/i, sub { hex $_[1] } ); |
1465
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
=item * |
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
Investigate how well C can cope with buffer splitting across |
1469
|
|
|
|
|
|
|
other tokens than simply skippable whitespace |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
=back |
1472
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
=head1 AUTHOR |
1474
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
Paul Evans |
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
=cut |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
0x55AA; |