| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package PPIx::Regexp::Tokenizer; |
|
2
|
|
|
|
|
|
|
|
|
3
|
9
|
|
|
9
|
|
60
|
use strict; |
|
|
9
|
|
|
|
|
17
|
|
|
|
9
|
|
|
|
|
261
|
|
|
4
|
9
|
|
|
9
|
|
54
|
use warnings; |
|
|
9
|
|
|
|
|
17
|
|
|
|
9
|
|
|
|
|
235
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
9
|
|
|
9
|
|
47
|
use base qw{ PPIx::Regexp::Support }; |
|
|
9
|
|
|
|
|
19
|
|
|
|
9
|
|
|
|
|
722
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
9
|
|
|
9
|
|
54
|
use Carp qw{ carp croak confess }; |
|
|
9
|
|
|
|
|
25
|
|
|
|
9
|
|
|
|
|
600
|
|
|
9
|
9
|
|
|
|
|
1263
|
use PPIx::Regexp::Constant qw{ |
|
10
|
|
|
|
|
|
|
ARRAY_REF |
|
11
|
|
|
|
|
|
|
CODE_REF |
|
12
|
|
|
|
|
|
|
HASH_REF |
|
13
|
|
|
|
|
|
|
LOCATION_LINE |
|
14
|
|
|
|
|
|
|
LOCATION_CHARACTER |
|
15
|
|
|
|
|
|
|
LOCATION_COLUMN |
|
16
|
|
|
|
|
|
|
LOCATION_LOGICAL_LINE |
|
17
|
|
|
|
|
|
|
MINIMUM_PERL |
|
18
|
|
|
|
|
|
|
REGEXP_REF |
|
19
|
|
|
|
|
|
|
TOKEN_LITERAL |
|
20
|
|
|
|
|
|
|
TOKEN_UNKNOWN |
|
21
|
|
|
|
|
|
|
@CARP_NOT |
|
22
|
9
|
|
|
9
|
|
56
|
}; |
|
|
9
|
|
|
|
|
38
|
|
|
23
|
9
|
|
|
9
|
|
4042
|
use PPIx::Regexp::Token::Assertion (); |
|
|
9
|
|
|
|
|
25
|
|
|
|
9
|
|
|
|
|
208
|
|
|
24
|
9
|
|
|
9
|
|
4151
|
use PPIx::Regexp::Token::Backreference (); |
|
|
9
|
|
|
|
|
26
|
|
|
|
9
|
|
|
|
|
250
|
|
|
25
|
9
|
|
|
9
|
|
4524
|
use PPIx::Regexp::Token::Backtrack (); |
|
|
9
|
|
|
|
|
23
|
|
|
|
9
|
|
|
|
|
195
|
|
|
26
|
9
|
|
|
9
|
|
4156
|
use PPIx::Regexp::Token::CharClass::POSIX (); |
|
|
9
|
|
|
|
|
26
|
|
|
|
9
|
|
|
|
|
195
|
|
|
27
|
9
|
|
|
9
|
|
4083
|
use PPIx::Regexp::Token::CharClass::POSIX::Unknown (); |
|
|
9
|
|
|
|
|
43
|
|
|
|
9
|
|
|
|
|
183
|
|
|
28
|
9
|
|
|
9
|
|
4230
|
use PPIx::Regexp::Token::CharClass::Simple (); |
|
|
9
|
|
|
|
|
26
|
|
|
|
9
|
|
|
|
|
207
|
|
|
29
|
9
|
|
|
9
|
|
4097
|
use PPIx::Regexp::Token::Code (); |
|
|
9
|
|
|
|
|
45
|
|
|
|
9
|
|
|
|
|
263
|
|
|
30
|
9
|
|
|
9
|
|
4288
|
use PPIx::Regexp::Token::Comment (); |
|
|
9
|
|
|
|
|
22
|
|
|
|
9
|
|
|
|
|
180
|
|
|
31
|
9
|
|
|
9
|
|
3996
|
use PPIx::Regexp::Token::Condition (); |
|
|
9
|
|
|
|
|
24
|
|
|
|
9
|
|
|
|
|
270
|
|
|
32
|
9
|
|
|
9
|
|
4219
|
use PPIx::Regexp::Token::Control (); |
|
|
9
|
|
|
|
|
24
|
|
|
|
9
|
|
|
|
|
289
|
|
|
33
|
9
|
|
|
9
|
|
3960
|
use PPIx::Regexp::Token::Delimiter (); |
|
|
9
|
|
|
|
|
41
|
|
|
|
9
|
|
|
|
|
229
|
|
|
34
|
9
|
|
|
9
|
|
3972
|
use PPIx::Regexp::Token::Greediness (); |
|
|
9
|
|
|
|
|
29
|
|
|
|
9
|
|
|
|
|
182
|
|
|
35
|
9
|
|
|
9
|
|
3794
|
use PPIx::Regexp::Token::GroupType::Assertion (); |
|
|
9
|
|
|
|
|
29
|
|
|
|
9
|
|
|
|
|
242
|
|
|
36
|
9
|
|
|
9
|
|
3858
|
use PPIx::Regexp::Token::GroupType::Atomic_Script_Run (); |
|
|
9
|
|
|
|
|
28
|
|
|
|
9
|
|
|
|
|
205
|
|
|
37
|
9
|
|
|
9
|
|
3836
|
use PPIx::Regexp::Token::GroupType::BranchReset (); |
|
|
9
|
|
|
|
|
28
|
|
|
|
9
|
|
|
|
|
189
|
|
|
38
|
9
|
|
|
9
|
|
3753
|
use PPIx::Regexp::Token::GroupType::Code (); |
|
|
9
|
|
|
|
|
28
|
|
|
|
9
|
|
|
|
|
192
|
|
|
39
|
9
|
|
|
9
|
|
3721
|
use PPIx::Regexp::Token::GroupType::Modifier (); |
|
|
9
|
|
|
|
|
26
|
|
|
|
9
|
|
|
|
|
246
|
|
|
40
|
9
|
|
|
9
|
|
3802
|
use PPIx::Regexp::Token::GroupType::NamedCapture (); |
|
|
9
|
|
|
|
|
26
|
|
|
|
9
|
|
|
|
|
184
|
|
|
41
|
9
|
|
|
9
|
|
3677
|
use PPIx::Regexp::Token::GroupType::Script_Run (); |
|
|
9
|
|
|
|
|
24
|
|
|
|
9
|
|
|
|
|
202
|
|
|
42
|
9
|
|
|
9
|
|
3912
|
use PPIx::Regexp::Token::GroupType::Subexpression (); |
|
|
9
|
|
|
|
|
26
|
|
|
|
9
|
|
|
|
|
197
|
|
|
43
|
9
|
|
|
9
|
|
3971
|
use PPIx::Regexp::Token::GroupType::Switch (); |
|
|
9
|
|
|
|
|
48
|
|
|
|
9
|
|
|
|
|
215
|
|
|
44
|
9
|
|
|
9
|
|
4387
|
use PPIx::Regexp::Token::Interpolation (); |
|
|
9
|
|
|
|
|
42
|
|
|
|
9
|
|
|
|
|
215
|
|
|
45
|
9
|
|
|
9
|
|
4639
|
use PPIx::Regexp::Token::Literal (); |
|
|
9
|
|
|
|
|
28
|
|
|
|
9
|
|
|
|
|
238
|
|
|
46
|
9
|
|
|
9
|
|
65
|
use PPIx::Regexp::Token::Modifier (); |
|
|
9
|
|
|
|
|
20
|
|
|
|
9
|
|
|
|
|
135
|
|
|
47
|
9
|
|
|
9
|
|
4197
|
use PPIx::Regexp::Token::Operator (); |
|
|
9
|
|
|
|
|
25
|
|
|
|
9
|
|
|
|
|
185
|
|
|
48
|
9
|
|
|
9
|
|
4114
|
use PPIx::Regexp::Token::Quantifier (); |
|
|
9
|
|
|
|
|
35
|
|
|
|
9
|
|
|
|
|
195
|
|
|
49
|
9
|
|
|
9
|
|
61
|
use PPIx::Regexp::Token::Recursion (); |
|
|
9
|
|
|
|
|
18
|
|
|
|
9
|
|
|
|
|
127
|
|
|
50
|
9
|
|
|
9
|
|
41
|
use PPIx::Regexp::Token::Structure (); |
|
|
9
|
|
|
|
|
36
|
|
|
|
9
|
|
|
|
|
113
|
|
|
51
|
9
|
|
|
9
|
|
4053
|
use PPIx::Regexp::Token::Unknown (); |
|
|
9
|
|
|
|
|
21
|
|
|
|
9
|
|
|
|
|
179
|
|
|
52
|
9
|
|
|
9
|
|
3867
|
use PPIx::Regexp::Token::Whitespace (); |
|
|
9
|
|
|
|
|
30
|
|
|
|
9
|
|
|
|
|
261
|
|
|
53
|
9
|
|
|
|
|
491
|
use PPIx::Regexp::Util qw{ |
|
54
|
|
|
|
|
|
|
is_ppi_regexp_element |
|
55
|
|
|
|
|
|
|
__instance |
|
56
|
9
|
|
|
9
|
|
63
|
}; |
|
|
9
|
|
|
|
|
25
|
|
|
57
|
|
|
|
|
|
|
|
|
58
|
9
|
|
|
9
|
|
75
|
use Scalar::Util qw{ looks_like_number }; |
|
|
9
|
|
|
|
|
20
|
|
|
|
9
|
|
|
|
|
57942
|
|
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
our $VERSION = '0.087_01'; |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
our $DEFAULT_POSTDEREF; |
|
63
|
|
|
|
|
|
|
defined $DEFAULT_POSTDEREF |
|
64
|
|
|
|
|
|
|
or $DEFAULT_POSTDEREF = 1; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
{ |
|
67
|
|
|
|
|
|
|
# Names of classes containing tokenization machinery. There are few |
|
68
|
|
|
|
|
|
|
# known ordering requirements, since each class recognizes its own, |
|
69
|
|
|
|
|
|
|
# and I have tried to prevent overlap. Absent such constraints, the |
|
70
|
|
|
|
|
|
|
# order is in perceived frequency of acceptance, to keep the search |
|
71
|
|
|
|
|
|
|
# as short as possible. If I were conscientious I would gather |
|
72
|
|
|
|
|
|
|
# statistics on this. |
|
73
|
|
|
|
|
|
|
my @classes = ( # TODO make readonly when acceptable way appears |
|
74
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Literal', |
|
75
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Interpolation', |
|
76
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Control', # Note 1 |
|
77
|
|
|
|
|
|
|
'PPIx::Regexp::Token::CharClass::Simple', # Note 2 |
|
78
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Quantifier', |
|
79
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Greediness', |
|
80
|
|
|
|
|
|
|
'PPIx::Regexp::Token::CharClass::POSIX', # Note 3 |
|
81
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Structure', |
|
82
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Assertion', |
|
83
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Backreference', |
|
84
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Operator', # Note 4 |
|
85
|
|
|
|
|
|
|
); |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# Note 1: If we are in quote mode ( \Q ... \E ), Control makes a |
|
88
|
|
|
|
|
|
|
# literal out of anything it sees other than \E. So it |
|
89
|
|
|
|
|
|
|
# needs to come before almost all other tokenizers. Not |
|
90
|
|
|
|
|
|
|
# Literal, which already makes literals, and not |
|
91
|
|
|
|
|
|
|
# Interpolation, which is legal in quote mode, but |
|
92
|
|
|
|
|
|
|
# everything else. |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# Note 2: CharClass::Simple must come after Literal, because it |
|
95
|
|
|
|
|
|
|
# relies on Literal to recognize a Unicode named character |
|
96
|
|
|
|
|
|
|
# ( \N{something} ), so any \N that comes through to it |
|
97
|
|
|
|
|
|
|
# must be the \N simple character class (which represents |
|
98
|
|
|
|
|
|
|
# anything but a newline, and was introduced in Perl |
|
99
|
|
|
|
|
|
|
# 5.11.0. |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# Note 3: CharClass::POSIX has to come before Structure, since both |
|
102
|
|
|
|
|
|
|
# look for square brackets, and CharClass::POSIX is the |
|
103
|
|
|
|
|
|
|
# more particular. |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# Note 4: Operator relies on Literal making the characters literal |
|
106
|
|
|
|
|
|
|
# if they appear in a context where they can not be |
|
107
|
|
|
|
|
|
|
# operators, and Control making them literals if quoting, |
|
108
|
|
|
|
|
|
|
# so it must come after both. |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# Return the declared tokenizer classes. |
|
111
|
|
|
|
|
|
|
sub __tokenizer_classes { |
|
112
|
538
|
|
|
538
|
|
2949
|
return @classes; |
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
{ |
|
118
|
|
|
|
|
|
|
my $errstr; |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub new { |
|
121
|
739
|
|
|
739
|
1
|
92344
|
my ( $class, $re, %args ) = @_; |
|
122
|
739
|
50
|
|
|
|
2245
|
ref $class and $class = ref $class; |
|
123
|
|
|
|
|
|
|
|
|
124
|
739
|
|
|
|
|
1426
|
$errstr = undef; |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
exists $args{default_modifiers} |
|
127
|
|
|
|
|
|
|
and ARRAY_REF ne ref $args{default_modifiers} |
|
128
|
739
|
50
|
66
|
|
|
2926
|
and do { |
|
129
|
0
|
|
|
|
|
0
|
$errstr = 'default_modifiers must be an array reference'; |
|
130
|
0
|
|
|
|
|
0
|
return; |
|
131
|
|
|
|
|
|
|
}; |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
my $self = { |
|
134
|
|
|
|
|
|
|
index_locations => $args{index_locations}, # Index locations |
|
135
|
|
|
|
|
|
|
capture => undef, # Captures from find_regexp. |
|
136
|
|
|
|
|
|
|
content => undef, # The string we are tokenizing. |
|
137
|
|
|
|
|
|
|
cookie => {}, # Cookies |
|
138
|
|
|
|
|
|
|
cursor_curr => 0, # The current position in the string. |
|
139
|
|
|
|
|
|
|
cursor_limit => undef, # The end of the portion of the |
|
140
|
|
|
|
|
|
|
# string being tokenized. |
|
141
|
|
|
|
|
|
|
cursor_orig => undef, # Position of cursor when tokenizer |
|
142
|
|
|
|
|
|
|
# called. Used by get_token to prevent |
|
143
|
|
|
|
|
|
|
# recursion. |
|
144
|
|
|
|
|
|
|
cursor_modifiers => undef, # Position of modifiers. |
|
145
|
|
|
|
|
|
|
default_modifiers => $args{default_modifiers} || [], |
|
146
|
|
|
|
|
|
|
delimiter_finish => undef, # Finishing delimiter of regexp. |
|
147
|
|
|
|
|
|
|
delimiter_start => undef, # Starting delimiter of regexp. |
|
148
|
|
|
|
|
|
|
encoding => $args{encoding}, # Character encoding. |
|
149
|
|
|
|
|
|
|
expect => undef, # Extra classes to expect. |
|
150
|
|
|
|
|
|
|
expect_next => undef, # Extra classes as of next parse cycle |
|
151
|
|
|
|
|
|
|
failures => 0, # Number of parse failures. |
|
152
|
|
|
|
|
|
|
find => undef, # String for find_regexp |
|
153
|
|
|
|
|
|
|
known => {}, # Known tokenizers, by mode. |
|
154
|
|
|
|
|
|
|
location => $args{location}, |
|
155
|
|
|
|
|
|
|
match => undef, # Match from find_regexp. |
|
156
|
|
|
|
|
|
|
mode => 'init', # Initialize |
|
157
|
|
|
|
|
|
|
modifiers => [{}], # Modifier hash. |
|
158
|
|
|
|
|
|
|
pending => [], # Tokens made but not returned. |
|
159
|
|
|
|
|
|
|
prior => TOKEN_UNKNOWN, # Prior significant token. |
|
160
|
|
|
|
|
|
|
source => $re, # The object we were initialized with. |
|
161
|
|
|
|
|
|
|
strict => $args{strict}, # like "use re 'strict';". |
|
162
|
|
|
|
|
|
|
trace => __PACKAGE__->__defined_or( |
|
163
|
739
|
|
100
|
|
|
9538
|
$args{trace}, $ENV{PPIX_REGEXP_TOKENIZER_TRACE}, 0 ), |
|
164
|
|
|
|
|
|
|
}; |
|
165
|
|
|
|
|
|
|
|
|
166
|
739
|
100
|
|
|
|
3510
|
if ( __instance( $re, 'PPI::Element' ) ) { |
|
|
|
100
|
|
|
|
|
|
|
167
|
11
|
50
|
|
|
|
53
|
is_ppi_regexp_element( $re ) |
|
168
|
|
|
|
|
|
|
or return __set_errstr( ref $re, 'not supported by', $class ); |
|
169
|
|
|
|
|
|
|
# TODO conditionalizstion on PPI class does not really |
|
170
|
|
|
|
|
|
|
# belong here, but at the moment I have no other idea of |
|
171
|
|
|
|
|
|
|
# where to put it. |
|
172
|
11
|
50
|
|
|
|
112
|
$self->{content} = $re->isa( 'PPI::Token::HereDoc' ) ? |
|
173
|
|
|
|
|
|
|
join( '', $re->content(), "\n", $re->heredoc(), |
|
174
|
|
|
|
|
|
|
$re->terminator(), "\n" ) : |
|
175
|
|
|
|
|
|
|
$re->content(); |
|
176
|
|
|
|
|
|
|
} elsif ( ref $re ) { |
|
177
|
2
|
|
|
|
|
10
|
return __set_errstr( ref $re, 'not supported' ); |
|
178
|
|
|
|
|
|
|
} else { |
|
179
|
726
|
|
|
|
|
1998
|
$self->{content} = $re; |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
|
|
182
|
737
|
|
|
|
|
1656
|
bless $self, $class; |
|
183
|
|
|
|
|
|
|
|
|
184
|
737
|
|
|
|
|
2895
|
$self->{content} = $self->decode( $self->{content} ); |
|
185
|
|
|
|
|
|
|
|
|
186
|
737
|
|
|
|
|
2057
|
$self->{cursor_limit} = length $self->{content}; |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
$self->{trace} |
|
189
|
737
|
50
|
|
|
|
2454
|
and warn "\ntokenizing '$self->{content}'\n"; |
|
190
|
|
|
|
|
|
|
|
|
191
|
737
|
|
|
|
|
2632
|
return $self; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub __set_errstr { |
|
195
|
2
|
|
|
2
|
|
7
|
$errstr = join ' ', @_; |
|
196
|
2
|
|
|
|
|
16
|
return; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
sub errstr { |
|
200
|
2
|
|
|
2
|
1
|
6
|
return $errstr; |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub capture { |
|
206
|
712
|
|
|
712
|
1
|
1755
|
my ( $self ) = @_; |
|
207
|
712
|
100
|
|
|
|
2151
|
$self->{capture} or return; |
|
208
|
691
|
50
|
|
|
|
1667
|
defined wantarray or return; |
|
209
|
691
|
50
|
|
|
|
1623
|
return wantarray ? @{ $self->{capture} } : $self->{capture}; |
|
|
691
|
|
|
|
|
3284
|
|
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub content { |
|
213
|
1
|
|
|
1
|
1
|
10
|
my ( $self ) = @_; |
|
214
|
1
|
|
|
|
|
4
|
return $self->{content}; |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub cookie { |
|
218
|
10177
|
|
|
10177
|
1
|
18543
|
my ( $self, $name, @args ) = @_; |
|
219
|
10177
|
50
|
|
|
|
18901
|
defined $name |
|
220
|
|
|
|
|
|
|
or confess "Programming error - undefined cookie name"; |
|
221
|
10177
|
50
|
|
|
|
19653
|
if ( $self->{trace} ) { |
|
222
|
0
|
|
|
|
|
0
|
local $" = ', '; |
|
223
|
0
|
|
|
|
|
0
|
warn "cookie( '$name', @args )\n"; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
10177
|
100
|
|
|
|
38457
|
@args or return $self->{cookie}{$name}; |
|
226
|
721
|
|
|
|
|
1804
|
my $cookie = shift @args; |
|
227
|
721
|
100
|
|
|
|
2516
|
if ( CODE_REF eq ref $cookie ) { |
|
|
|
50
|
|
|
|
|
|
|
228
|
593
|
|
|
|
|
2901
|
return ( $self->{cookie}{$name} = $cookie ); |
|
229
|
|
|
|
|
|
|
} elsif ( defined $cookie ) { |
|
230
|
0
|
|
|
|
|
0
|
confess "Programming error - cookie must be CODE ref or undef"; |
|
231
|
|
|
|
|
|
|
} else { |
|
232
|
128
|
|
|
|
|
517
|
return delete $self->{cookie}{$name}; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# NOTE: Currently this is called only against |
|
237
|
|
|
|
|
|
|
# COOKIE_LOOKAROUND_ASSERTION, once in PPIx::Token::GroupType::Assertion |
|
238
|
|
|
|
|
|
|
# to prevent the cookie from being remade if it already exists, and once |
|
239
|
|
|
|
|
|
|
# in PPIx::Regexp::Token::Assertion to determine if \K is inside a |
|
240
|
|
|
|
|
|
|
# lookaround assertion. If it gets used other places, or if there is |
|
241
|
|
|
|
|
|
|
# call for it, I should consider removing the underscores and |
|
242
|
|
|
|
|
|
|
# documenting it as public. |
|
243
|
|
|
|
|
|
|
sub __cookie_exists { |
|
244
|
57
|
|
|
57
|
|
165
|
my ( $self, $name ) = @_; |
|
245
|
57
|
50
|
|
|
|
171
|
defined $name |
|
246
|
|
|
|
|
|
|
or confess "Programming error - undefined cookie name"; |
|
247
|
57
|
|
|
|
|
222
|
return $self->{cookie}{$name}; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub default_modifiers { |
|
251
|
0
|
|
|
0
|
1
|
0
|
my ( $self ) = @_; |
|
252
|
0
|
|
|
|
|
0
|
return [ @{ $self->{default_modifiers} } ]; |
|
|
0
|
|
|
|
|
0
|
|
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub __effective_modifiers { |
|
256
|
332
|
|
|
332
|
|
844
|
my ( $self ) = @_; |
|
257
|
|
|
|
|
|
|
HASH_REF eq ref $self->{effective_modifiers} |
|
258
|
332
|
100
|
|
|
|
1277
|
or return {}; |
|
259
|
324
|
|
|
|
|
664
|
return { %{ $self->{effective_modifiers} } }; |
|
|
324
|
|
|
|
|
1293
|
|
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
sub encoding { |
|
263
|
0
|
|
|
0
|
1
|
0
|
my ( $self ) = @_; |
|
264
|
0
|
|
|
|
|
0
|
return $self->{encoding}; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub expect { |
|
268
|
330
|
|
|
330
|
1
|
1395
|
my ( $self, @args ) = @_; |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
@args |
|
271
|
330
|
50
|
|
|
|
842
|
or return; |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
$self->{expect_next} = [ |
|
274
|
330
|
50
|
|
|
|
787
|
map { m/ \A PPIx::Regexp:: /smx ? $_ : 'PPIx::Regexp::' . $_ } |
|
|
2602
|
|
|
|
|
7939
|
|
|
275
|
|
|
|
|
|
|
@args |
|
276
|
|
|
|
|
|
|
]; |
|
277
|
330
|
|
|
|
|
1089
|
$self->{expect} = undef; |
|
278
|
330
|
|
|
|
|
776
|
return; |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub failures { |
|
282
|
8
|
|
|
8
|
1
|
22
|
my ( $self ) = @_; |
|
283
|
8
|
|
|
|
|
21
|
return $self->{failures}; |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub find_matching_delimiter { |
|
287
|
583
|
|
|
583
|
1
|
1535
|
my ( $self ) = @_; |
|
288
|
583
|
|
100
|
|
|
2555
|
$self->{cursor_curr} ||= 0; |
|
289
|
|
|
|
|
|
|
my $start = substr |
|
290
|
|
|
|
|
|
|
$self->{content}, |
|
291
|
|
|
|
|
|
|
$self->{cursor_curr}, |
|
292
|
583
|
|
|
|
|
1645
|
1; |
|
293
|
|
|
|
|
|
|
|
|
294
|
583
|
|
|
|
|
1021
|
my $inx = $self->{cursor_curr}; |
|
295
|
583
|
|
66
|
|
|
2131
|
my $finish = ( |
|
296
|
|
|
|
|
|
|
my $bracketed = $self->close_bracket( $start ) ) || $start; |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=begin comment |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
$self->{trace} |
|
301
|
|
|
|
|
|
|
and warn "Find matching delimiter: Start with '$start' at $self->{cursor_curr}, end with '$finish' at or before $self->{cursor_limit}\n"; |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=end comment |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=cut |
|
306
|
|
|
|
|
|
|
|
|
307
|
583
|
|
|
|
|
1199
|
my $nest = 0; |
|
308
|
|
|
|
|
|
|
|
|
309
|
583
|
|
|
|
|
1894
|
while ( ++$inx < $self->{cursor_limit} ) { |
|
310
|
6122
|
|
|
|
|
9818
|
my $char = substr $self->{content}, $inx, 1; |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=begin comment |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
$self->{trace} |
|
315
|
|
|
|
|
|
|
and warn " looking at '$char' at $inx, nest level $nest\n"; |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=end comment |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=cut |
|
320
|
|
|
|
|
|
|
|
|
321
|
6122
|
100
|
100
|
|
|
22768
|
if ( $char eq '\\' && $finish ne '\\' ) { |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
322
|
317
|
|
|
|
|
746
|
++$inx; |
|
323
|
|
|
|
|
|
|
} elsif ( $bracketed && $char eq $start ) { |
|
324
|
1
|
|
|
|
|
3
|
++$nest; |
|
325
|
|
|
|
|
|
|
} elsif ( $char eq $finish ) { |
|
326
|
|
|
|
|
|
|
--$nest < 0 |
|
327
|
582
|
100
|
|
|
|
3474
|
and return $inx - $self->{cursor_curr}; |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
2
|
|
|
|
|
7
|
return; |
|
332
|
|
|
|
|
|
|
} |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub find_regexp { |
|
335
|
16631
|
|
|
16631
|
1
|
30844
|
my ( $self, $regexp ) = @_; |
|
336
|
|
|
|
|
|
|
|
|
337
|
16631
|
50
|
0
|
|
|
35975
|
REGEXP_REF eq ref $regexp |
|
338
|
|
|
|
|
|
|
or confess |
|
339
|
|
|
|
|
|
|
'Argument is a ', ( ref $regexp || 'scalar' ), ' not a Regexp'; |
|
340
|
|
|
|
|
|
|
|
|
341
|
16631
|
100
|
|
|
|
36073
|
defined $self->{find} or $self->_remainder(); |
|
342
|
|
|
|
|
|
|
|
|
343
|
16631
|
100
|
|
|
|
99150
|
$self->{find} =~ $regexp |
|
344
|
|
|
|
|
|
|
or return; |
|
345
|
|
|
|
|
|
|
|
|
346
|
1840
|
|
|
|
|
3429
|
my @capture; |
|
347
|
1840
|
|
|
|
|
6642
|
foreach my $inx ( 0 .. $#+ ) { |
|
348
|
4247
|
100
|
66
|
|
|
18641
|
if ( defined $-[$inx] && defined $+[$inx] ) { |
|
349
|
|
|
|
|
|
|
push @capture, $self->{capture} = substr |
|
350
|
|
|
|
|
|
|
$self->{find}, |
|
351
|
3758
|
|
|
|
|
19976
|
$-[$inx], |
|
352
|
|
|
|
|
|
|
$+[$inx] - $-[$inx]; |
|
353
|
|
|
|
|
|
|
} else { |
|
354
|
489
|
|
|
|
|
1531
|
push @capture, undef; |
|
355
|
|
|
|
|
|
|
} |
|
356
|
|
|
|
|
|
|
} |
|
357
|
1840
|
|
|
|
|
4880
|
$self->{match} = shift @capture; |
|
358
|
1840
|
|
|
|
|
3808
|
$self->{capture} = \@capture; |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# The following circumlocution seems to be needed under Perl 5.13.0 |
|
361
|
|
|
|
|
|
|
# for reasons I do not fathom -- at least in the case where |
|
362
|
|
|
|
|
|
|
# wantarray is false. RT 56864 details the symptoms, which I was |
|
363
|
|
|
|
|
|
|
# never able to reproduce outside Perl::Critic. But returning $+[0] |
|
364
|
|
|
|
|
|
|
# directly, the value could transmogrify between here and the |
|
365
|
|
|
|
|
|
|
# calling module. |
|
366
|
|
|
|
|
|
|
## my @data = ( $-[0], $+[0] ); |
|
367
|
|
|
|
|
|
|
## return wantarray ? @data : $data[1]; |
|
368
|
1840
|
50
|
|
|
|
9693
|
return wantarray ? ( $-[0] + 0, $+[0] + 0 ) : $+[0] + 0; |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub get_mode { |
|
372
|
46
|
|
|
46
|
1
|
97
|
my ( $self ) = @_; |
|
373
|
46
|
|
|
|
|
187
|
return $self->{mode}; |
|
374
|
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub get_start_delimiter { |
|
377
|
1794
|
|
|
1794
|
1
|
2977
|
my ( $self ) = @_; |
|
378
|
1794
|
|
|
|
|
7582
|
return $self->{delimiter_start}; |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub get_token { |
|
382
|
4114
|
|
|
4114
|
1
|
6916
|
my ( $self ) = @_; |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
caller eq __PACKAGE__ or $self->{cursor_curr} > $self->{cursor_orig} |
|
385
|
4114
|
50
|
66
|
|
|
12416
|
or confess 'Programming error - get_token() called without ', |
|
386
|
|
|
|
|
|
|
'first calling make_token()'; |
|
387
|
|
|
|
|
|
|
|
|
388
|
4114
|
|
|
|
|
10375
|
my $handler = '__PPIX_TOKENIZER__' . $self->{mode}; |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
my $code = $self->can( $handler ) |
|
391
|
|
|
|
|
|
|
or confess 'Programming error - ', |
|
392
|
|
|
|
|
|
|
"Getting token in mode '$self->{mode}'. ", |
|
393
|
|
|
|
|
|
|
"cursor_curr = $self->{cursor_curr}; ", |
|
394
|
|
|
|
|
|
|
"cursor_limit = $self->{cursor_limit}; ", |
|
395
|
|
|
|
|
|
|
"length( content ) = ", length $self->{content}, |
|
396
|
4114
|
50
|
|
|
|
15927
|
"; content = '$self->{content}'"; |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
my $character = substr( |
|
399
|
|
|
|
|
|
|
$self->{content}, |
|
400
|
|
|
|
|
|
|
$self->{cursor_curr}, |
|
401
|
4114
|
|
|
|
|
10255
|
1 |
|
402
|
|
|
|
|
|
|
); |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
$self->{trace} |
|
405
|
4114
|
50
|
|
|
|
9066
|
and warn "get_token() got '$character' from $self->{cursor_curr}\n"; |
|
406
|
|
|
|
|
|
|
|
|
407
|
4114
|
|
|
|
|
10138
|
return ( $code->( $self, $character ) ); |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
sub interpolates { |
|
411
|
141
|
|
|
141
|
1
|
333
|
my ( $self ) = @_; |
|
412
|
141
|
|
|
|
|
596
|
return $self->{delimiter_start} ne q{'}; |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
sub make_token { |
|
416
|
5216
|
|
|
5216
|
1
|
12250
|
my ( $self, $length, $class, $arg ) = @_; |
|
417
|
5216
|
100
|
|
|
|
10828
|
defined $class or $class = caller; |
|
418
|
|
|
|
|
|
|
|
|
419
|
5216
|
50
|
|
|
|
12958
|
if ( $length + $self->{cursor_curr} > $self->{cursor_limit} ) { |
|
420
|
|
|
|
|
|
|
$length = $self->{cursor_limit} - $self->{cursor_curr} |
|
421
|
0
|
0
|
|
|
|
0
|
or return; |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
|
|
424
|
5216
|
50
|
|
|
|
18487
|
$class =~ m/ \A PPIx::Regexp:: /smx |
|
425
|
|
|
|
|
|
|
or $class = 'PPIx::Regexp::' . $class; |
|
426
|
|
|
|
|
|
|
my $content = substr |
|
427
|
|
|
|
|
|
|
$self->{content}, |
|
428
|
|
|
|
|
|
|
$self->{cursor_curr}, |
|
429
|
5216
|
|
|
|
|
12325
|
$length; |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
$self->{trace} |
|
432
|
5216
|
50
|
|
|
|
10864
|
and warn "make_token( $length, '$class' ) => '$content'\n"; |
|
433
|
5216
|
50
|
|
|
|
11231
|
$self->{trace} > 1 |
|
434
|
|
|
|
|
|
|
and warn " make_token: cursor_curr = $self->{cursor_curr}; ", |
|
435
|
|
|
|
|
|
|
"cursor_limit = $self->{cursor_limit}\n"; |
|
436
|
|
|
|
|
|
|
my $token = $class->__new( $content, |
|
437
|
|
|
|
|
|
|
tokenizer => $self, |
|
438
|
5216
|
100
|
|
|
|
7938
|
%{ $arg || {} } ) |
|
|
5216
|
50
|
|
|
|
36989
|
|
|
439
|
|
|
|
|
|
|
or return; |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
$self->{index_locations} |
|
442
|
5216
|
100
|
|
|
|
15341
|
and $self->_update_location( $token ); |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
$token->significant() |
|
445
|
5216
|
100
|
|
|
|
15343
|
and $self->{expect} = undef; |
|
446
|
|
|
|
|
|
|
|
|
447
|
5216
|
100
|
|
|
|
23884
|
$token->isa( TOKEN_UNKNOWN ) and $self->{failures}++; |
|
448
|
|
|
|
|
|
|
|
|
449
|
5216
|
|
|
|
|
9920
|
$self->{cursor_curr} += $length; |
|
450
|
5216
|
|
|
|
|
8147
|
$self->{find} = undef; |
|
451
|
5216
|
|
|
|
|
8248
|
$self->{match} = undef; |
|
452
|
5216
|
|
|
|
|
8845
|
$self->{capture} = undef; |
|
453
|
|
|
|
|
|
|
|
|
454
|
5216
|
|
|
|
|
8380
|
foreach my $name ( keys %{ $self->{cookie} } ) { |
|
|
5216
|
|
|
|
|
13910
|
|
|
455
|
3615
|
|
|
|
|
6761
|
my $cookie = $self->{cookie}{$name}; |
|
456
|
|
|
|
|
|
|
$cookie->( $self, $token ) |
|
457
|
3615
|
100
|
|
|
|
9554
|
or delete $self->{cookie}{$name}; |
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# Record this token as the prior token if it is significant. We must |
|
461
|
|
|
|
|
|
|
# do this after processing cookies, so that the cookies have access |
|
462
|
|
|
|
|
|
|
# to the old token if they want. |
|
463
|
|
|
|
|
|
|
$token->significant() |
|
464
|
5216
|
100
|
|
|
|
12771
|
and $self->{prior_significant_token} = $token; |
|
465
|
|
|
|
|
|
|
|
|
466
|
5216
|
|
|
|
|
21698
|
return $token; |
|
467
|
|
|
|
|
|
|
} |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub match { |
|
470
|
86
|
|
|
86
|
1
|
253
|
my ( $self ) = @_; |
|
471
|
86
|
|
|
|
|
264
|
return $self->{match}; |
|
472
|
|
|
|
|
|
|
} |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
sub modifier { |
|
475
|
5036
|
|
|
5036
|
1
|
9865
|
my ( $self, $modifier ) = @_; |
|
476
|
|
|
|
|
|
|
return PPIx::Regexp::Token::Modifier::__asserts( |
|
477
|
5036
|
|
|
|
|
12774
|
$self->{modifiers}[-1], $modifier ); |
|
478
|
|
|
|
|
|
|
} |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub modifier_duplicate { |
|
481
|
292
|
|
|
292
|
1
|
621
|
my ( $self ) = @_; |
|
482
|
292
|
|
|
|
|
715
|
push @{ $self->{modifiers} }, |
|
483
|
292
|
|
|
|
|
460
|
{ %{ $self->{modifiers}[-1] } }; |
|
|
292
|
|
|
|
|
1101
|
|
|
484
|
292
|
|
|
|
|
700
|
return; |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
sub modifier_modify { |
|
488
|
592
|
|
|
592
|
1
|
1691
|
my ( $self, %args ) = @_; |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# Modifier code is centralized in PPIx::Regexp::Token::Modifier |
|
491
|
|
|
|
|
|
|
$self->{modifiers}[-1] = |
|
492
|
|
|
|
|
|
|
PPIx::Regexp::Token::Modifier::__PPIX_TOKENIZER__modifier_modify( |
|
493
|
592
|
|
|
|
|
2313
|
$self->{modifiers}[-1], \%args ); |
|
494
|
|
|
|
|
|
|
|
|
495
|
592
|
|
|
|
|
1664
|
return; |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
sub modifier_pop { |
|
500
|
288
|
|
|
288
|
1
|
663
|
my ( $self ) = @_; |
|
501
|
288
|
|
|
|
|
980
|
@{ $self->{modifiers} } > 1 |
|
502
|
288
|
100
|
|
|
|
521
|
and pop @{ $self->{modifiers} }; |
|
|
282
|
|
|
|
|
722
|
|
|
503
|
288
|
|
|
|
|
713
|
return; |
|
504
|
|
|
|
|
|
|
} |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
sub modifier_seen { |
|
507
|
8
|
|
|
8
|
1
|
35
|
my ( $self, $modifier ) = @_; |
|
508
|
8
|
|
|
|
|
58
|
foreach my $mod ( reverse @{ $self->{modifiers} } ) { |
|
|
8
|
|
|
|
|
32
|
|
|
509
|
10
|
100
|
|
|
|
52
|
exists $mod->{$modifier} |
|
510
|
|
|
|
|
|
|
and return 1; |
|
511
|
|
|
|
|
|
|
} |
|
512
|
5
|
|
|
|
|
20
|
return; |
|
513
|
|
|
|
|
|
|
} |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub next_token { |
|
516
|
5750
|
|
|
5750
|
1
|
10093
|
my ( $self ) = @_; |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
{ |
|
519
|
|
|
|
|
|
|
|
|
520
|
5750
|
100
|
|
|
|
9430
|
if ( @{ $self->{pending} } ) { |
|
|
9847
|
|
|
|
|
13336
|
|
|
|
9847
|
|
|
|
|
21044
|
|
|
521
|
5214
|
|
|
|
|
7768
|
return shift @{ $self->{pending} }; |
|
|
5214
|
|
|
|
|
17270
|
|
|
522
|
|
|
|
|
|
|
} |
|
523
|
|
|
|
|
|
|
|
|
524
|
4633
|
100
|
|
|
|
11518
|
if ( $self->{cursor_curr} >= $self->{cursor_limit} ) { |
|
525
|
|
|
|
|
|
|
$self->{cursor_limit} >= length $self->{content} |
|
526
|
1091
|
100
|
|
|
|
5231
|
and return; |
|
527
|
555
|
50
|
|
|
|
1889
|
$self->{mode} eq 'finish' and return; |
|
528
|
555
|
|
|
|
|
1736
|
$self->_set_mode( 'finish' ); |
|
529
|
555
|
|
|
|
|
1130
|
$self->{cursor_limit} += length $self->{delimiter_finish}; |
|
530
|
|
|
|
|
|
|
} |
|
531
|
|
|
|
|
|
|
|
|
532
|
4097
|
50
|
|
|
|
9599
|
if ( my @tokens = $self->get_token() ) { |
|
533
|
4097
|
|
|
|
|
6314
|
push @{ $self->{pending} }, @tokens; |
|
|
4097
|
|
|
|
|
8954
|
|
|
534
|
4097
|
|
|
|
|
7293
|
redo; |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
} |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
} |
|
539
|
|
|
|
|
|
|
|
|
540
|
0
|
|
|
|
|
0
|
return; |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
} |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
sub peek { |
|
545
|
379
|
|
|
379
|
1
|
821
|
my ( $self, $offset ) = @_; |
|
546
|
379
|
100
|
|
|
|
875
|
defined $offset or $offset = 0; |
|
547
|
379
|
50
|
|
|
|
904
|
$offset < 0 and return; |
|
548
|
379
|
|
|
|
|
702
|
$offset += $self->{cursor_curr}; |
|
549
|
379
|
50
|
|
|
|
915
|
$offset >= $self->{cursor_limit} and return; |
|
550
|
379
|
|
|
|
|
1697
|
return substr $self->{content}, $offset, 1; |
|
551
|
|
|
|
|
|
|
} |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
sub ppi_document { |
|
554
|
83
|
|
|
83
|
1
|
221
|
my ( $self ) = @_; |
|
555
|
|
|
|
|
|
|
|
|
556
|
83
|
50
|
|
|
|
242
|
defined $self->{find} or $self->_remainder(); |
|
557
|
|
|
|
|
|
|
|
|
558
|
83
|
|
|
|
|
504
|
return PPI::Document->new( \"$self->{find}" ); |
|
559
|
|
|
|
|
|
|
} |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
sub prior_significant_token { |
|
562
|
2413
|
|
|
2413
|
1
|
4640
|
my ( $self, $method, @args ) = @_; |
|
563
|
2413
|
100
|
|
|
|
4798
|
defined $method or return $self->{prior_significant_token}; |
|
564
|
|
|
|
|
|
|
$self->{prior_significant_token}->can( $method ) |
|
565
|
|
|
|
|
|
|
or confess 'Programming error - ', |
|
566
|
|
|
|
|
|
|
( ref $self->{prior_significant_token} || |
|
567
|
2394
|
50
|
0
|
|
|
9409
|
$self->{prior_significant_token} ), |
|
568
|
|
|
|
|
|
|
' does not support method ', $method; |
|
569
|
2394
|
|
|
|
|
8787
|
return $self->{prior_significant_token}->$method( @args ); |
|
570
|
|
|
|
|
|
|
} |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
# my $length = $token->__recognize_postderef( $tokenizer, $iterator ). |
|
573
|
|
|
|
|
|
|
# |
|
574
|
|
|
|
|
|
|
# This method is private to the PPIx-Regexp package, and may be changed |
|
575
|
|
|
|
|
|
|
# or retracted without warning. What it does is to recognize postfix |
|
576
|
|
|
|
|
|
|
# dereferences. It returns the length in characters of the first postfix |
|
577
|
|
|
|
|
|
|
# dereference found, or a false value if none is found. |
|
578
|
|
|
|
|
|
|
# |
|
579
|
|
|
|
|
|
|
# The optional $iterator argument can be one of the following: |
|
580
|
|
|
|
|
|
|
# - A code reference, which will be called to provide PPI::Element |
|
581
|
|
|
|
|
|
|
# objects to be checked to see if they represent a postfix |
|
582
|
|
|
|
|
|
|
# dereference. |
|
583
|
|
|
|
|
|
|
# - A PPI::Element, which is checked to see if it is a postfix |
|
584
|
|
|
|
|
|
|
# dereference. |
|
585
|
|
|
|
|
|
|
# - Undef, or omitted, in which case ppi() is called on the invocant, |
|
586
|
|
|
|
|
|
|
# and everything that follows the '->' operator is checked to see if |
|
587
|
|
|
|
|
|
|
# it is a postfix dereference. |
|
588
|
|
|
|
|
|
|
# - Anything else results in an exception and stack trace. |
|
589
|
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
{ |
|
591
|
|
|
|
|
|
|
sub __recognize_postderef { |
|
592
|
148
|
|
|
148
|
|
469
|
my ( $self, $token, $iterator ) = @_; |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
# Note that if ppi() gets called I have to hold a reference to |
|
595
|
|
|
|
|
|
|
# the returned object until I am done with all its children. |
|
596
|
148
|
|
|
|
|
244
|
my $ppi; |
|
597
|
148
|
100
|
|
|
|
366
|
if ( ! defined $iterator ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# This MUST be done before ppi() is called. |
|
600
|
|
|
|
|
|
|
$self->{index_locations} |
|
601
|
144
|
100
|
|
|
|
400
|
and $self->_update_location( $token ); |
|
602
|
|
|
|
|
|
|
|
|
603
|
144
|
|
|
|
|
494
|
$ppi = $token->ppi(); |
|
604
|
29
|
|
|
|
|
8358
|
my @ops = grep { '->' eq $_->content() } @{ |
|
605
|
144
|
100
|
|
|
|
324
|
$ppi->find( 'PPI::Token::Operator' ) || [] }; |
|
|
144
|
|
|
|
|
598
|
|
|
606
|
|
|
|
|
|
|
$iterator = sub { |
|
607
|
150
|
100
|
|
150
|
|
643
|
my $op = shift @ops |
|
608
|
|
|
|
|
|
|
or return; |
|
609
|
15
|
|
|
|
|
89
|
return $op->snext_sibling(); |
|
610
|
144
|
|
|
|
|
39259
|
}; |
|
611
|
|
|
|
|
|
|
} elsif ( $iterator->isa( 'PPI::Element' ) ) { |
|
612
|
4
|
|
|
|
|
10
|
my @eles = ( $iterator ); |
|
613
|
|
|
|
|
|
|
$iterator = sub { |
|
614
|
4
|
|
|
4
|
|
16
|
return shift @eles; |
|
615
|
4
|
|
|
|
|
14
|
}; |
|
616
|
|
|
|
|
|
|
} elsif ( CODE_REF ne ref $iterator ) { |
|
617
|
0
|
|
|
|
|
0
|
confess 'Programming error - Iterator not understood'; |
|
618
|
|
|
|
|
|
|
} |
|
619
|
|
|
|
|
|
|
|
|
620
|
148
|
|
|
|
|
744
|
my $accept = $token->__postderef_accept_cast(); |
|
621
|
|
|
|
|
|
|
|
|
622
|
148
|
|
|
|
|
333
|
while ( my $elem = $iterator->() ) { |
|
623
|
|
|
|
|
|
|
|
|
624
|
19
|
|
|
|
|
443
|
my $content = $elem->content(); |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
# As of PPI 1.238, all postfix dereferences are parsed as |
|
627
|
|
|
|
|
|
|
# casts. So if we find a cast of the correct content we have |
|
628
|
|
|
|
|
|
|
# a postfix deref. |
|
629
|
19
|
100
|
|
|
|
184
|
$elem->isa( 'PPI::Token::Cast' ) |
|
630
|
|
|
|
|
|
|
or next; |
|
631
|
|
|
|
|
|
|
|
|
632
|
15
|
100
|
|
|
|
92
|
if ( $content =~ m/ ( .* ) \* \z /smx ) { |
|
|
|
50
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
# If we're an acceptable cast ending in a glob, accept |
|
634
|
|
|
|
|
|
|
# it. |
|
635
|
10
|
100
|
|
|
|
112
|
$accept->{$1} |
|
636
|
|
|
|
|
|
|
and return length $content; |
|
637
|
|
|
|
|
|
|
} elsif ( $accept->{$content} ) { |
|
638
|
|
|
|
|
|
|
# If we're an acceptable cast followed by a subscript, |
|
639
|
|
|
|
|
|
|
# we're a slice -- accept both cast and subscript. |
|
640
|
5
|
50
|
|
|
|
21
|
my $next = $elem->snext_sibling() |
|
641
|
|
|
|
|
|
|
or next; |
|
642
|
5
|
50
|
|
|
|
132
|
$next->isa( 'PPI::Structure::Subscript' ) |
|
643
|
|
|
|
|
|
|
or next; |
|
644
|
5
|
|
|
|
|
23
|
return length( $content ) + length( $next->content() ); |
|
645
|
|
|
|
|
|
|
} |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
# Otherwise, we're not a postfix dereference; try the next |
|
648
|
|
|
|
|
|
|
# iteration. |
|
649
|
|
|
|
|
|
|
} |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# No postfix dereference found. |
|
652
|
135
|
|
|
|
|
782
|
return; |
|
653
|
|
|
|
|
|
|
} |
|
654
|
|
|
|
|
|
|
} |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
sub significant { |
|
657
|
0
|
|
|
0
|
1
|
0
|
return 1; |
|
658
|
|
|
|
|
|
|
} |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
sub strict { |
|
661
|
4
|
|
|
4
|
1
|
12
|
my ( $self ) = @_; |
|
662
|
4
|
|
|
|
|
34
|
return $self->{strict}; |
|
663
|
|
|
|
|
|
|
} |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
sub _known_tokenizers { |
|
666
|
3025
|
|
|
3025
|
|
5409
|
my ( $self ) = @_; |
|
667
|
|
|
|
|
|
|
|
|
668
|
3025
|
|
|
|
|
5262
|
my $mode = $self->{mode}; |
|
669
|
|
|
|
|
|
|
|
|
670
|
3025
|
|
|
|
|
4506
|
my @expect; |
|
671
|
3025
|
100
|
|
|
|
6651
|
if ( $self->{expect_next} ) { |
|
672
|
328
|
|
|
|
|
988
|
$self->{expect} = $self->{expect_next}; |
|
673
|
328
|
|
|
|
|
657
|
$self->{expect_next} = undef; |
|
674
|
|
|
|
|
|
|
} |
|
675
|
3025
|
100
|
|
|
|
6418
|
if ( $self->{expect} ) { |
|
676
|
|
|
|
|
|
|
@expect = $self->_known_tokenizer_check( |
|
677
|
334
|
|
|
|
|
628
|
@{ $self->{expect} } ); |
|
|
334
|
|
|
|
|
951
|
|
|
678
|
|
|
|
|
|
|
} |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
exists $self->{known}{$mode} and return ( |
|
681
|
3025
|
100
|
|
|
|
7669
|
@expect, @{ $self->{known}{$mode} } ); |
|
|
2487
|
|
|
|
|
9559
|
|
|
682
|
|
|
|
|
|
|
|
|
683
|
538
|
|
|
|
|
1496
|
my @found = $self->_known_tokenizer_check( |
|
684
|
|
|
|
|
|
|
$self->__tokenizer_classes() ); |
|
685
|
|
|
|
|
|
|
|
|
686
|
538
|
|
|
|
|
2256
|
$self->{known}{$mode} = \@found; |
|
687
|
538
|
|
|
|
|
2243
|
return (@expect, @found); |
|
688
|
|
|
|
|
|
|
} |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
sub _known_tokenizer_check { |
|
691
|
872
|
|
|
872
|
|
3097
|
my ( $self, @args ) = @_; |
|
692
|
|
|
|
|
|
|
|
|
693
|
872
|
|
|
|
|
2015
|
my $handler = '__PPIX_TOKENIZER__' . $self->{mode}; |
|
694
|
872
|
|
|
|
|
1275
|
my @found; |
|
695
|
|
|
|
|
|
|
|
|
696
|
872
|
|
|
|
|
1783
|
foreach my $class ( @args ) { |
|
697
|
|
|
|
|
|
|
|
|
698
|
8556
|
100
|
|
|
|
44666
|
$class->can( $handler ) or next; |
|
699
|
8367
|
|
|
|
|
15496
|
push @found, $class; |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
} |
|
702
|
|
|
|
|
|
|
|
|
703
|
872
|
|
|
|
|
4640
|
return @found; |
|
704
|
|
|
|
|
|
|
} |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
sub tokens { |
|
707
|
204
|
|
|
204
|
1
|
583
|
my ( $self ) = @_; |
|
708
|
|
|
|
|
|
|
|
|
709
|
204
|
|
|
|
|
445
|
my @rslt; |
|
710
|
204
|
|
|
|
|
807
|
while ( my $token = $self->next_token() ) { |
|
711
|
1924
|
|
|
|
|
4960
|
push @rslt, $token; |
|
712
|
|
|
|
|
|
|
} |
|
713
|
|
|
|
|
|
|
|
|
714
|
204
|
|
|
|
|
1727
|
return @rslt; |
|
715
|
|
|
|
|
|
|
} |
|
716
|
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
# $self->_deprecation_notice( $type, $name ); |
|
718
|
|
|
|
|
|
|
# |
|
719
|
|
|
|
|
|
|
# This method centralizes deprecation. Type is 'attribute' or |
|
720
|
|
|
|
|
|
|
# 'method'. Deprecation is driven of the %deprecate hash. Values |
|
721
|
|
|
|
|
|
|
# are: |
|
722
|
|
|
|
|
|
|
# false - no warning |
|
723
|
|
|
|
|
|
|
# 1 - warn on first use |
|
724
|
|
|
|
|
|
|
# 2 - warn on each use |
|
725
|
|
|
|
|
|
|
# 3 - die on each use. |
|
726
|
|
|
|
|
|
|
# |
|
727
|
|
|
|
|
|
|
# $self->_deprecation_in_progress( $type, $name ) |
|
728
|
|
|
|
|
|
|
# |
|
729
|
|
|
|
|
|
|
# This method returns true if the deprecation is in progress. In |
|
730
|
|
|
|
|
|
|
# fact it returns the deprecation level. |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
=begin comment |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
{ |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
my %deprecate = ( |
|
737
|
|
|
|
|
|
|
attribute => { |
|
738
|
|
|
|
|
|
|
postderef => 3, |
|
739
|
|
|
|
|
|
|
}, |
|
740
|
|
|
|
|
|
|
); |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
sub _deprecation_notice { |
|
743
|
|
|
|
|
|
|
my ( undef, $type, $name, $repl ) = @_; # Invocant unused |
|
744
|
|
|
|
|
|
|
$deprecate{$type} or return; |
|
745
|
|
|
|
|
|
|
$deprecate{$type}{$name} or return; |
|
746
|
|
|
|
|
|
|
my $msg = sprintf 'The %s %s is %s', $name, $type, |
|
747
|
|
|
|
|
|
|
$deprecate{$type}{$name} > 2 ? 'removed' : 'deprecated'; |
|
748
|
|
|
|
|
|
|
defined $repl |
|
749
|
|
|
|
|
|
|
and $msg .= "; use $repl instead"; |
|
750
|
|
|
|
|
|
|
$deprecate{$type}{$name} >= 3 |
|
751
|
|
|
|
|
|
|
and croak $msg; |
|
752
|
|
|
|
|
|
|
warnings::enabled( 'deprecated' ) |
|
753
|
|
|
|
|
|
|
and carp $msg; |
|
754
|
|
|
|
|
|
|
$deprecate{$type}{$name} == 1 |
|
755
|
|
|
|
|
|
|
and $deprecate{$type}{$name} = 0; |
|
756
|
|
|
|
|
|
|
return; |
|
757
|
|
|
|
|
|
|
} |
|
758
|
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
sub _deprecation_in_progress { |
|
760
|
|
|
|
|
|
|
my ( $self, $type, $name ) = @_; |
|
761
|
|
|
|
|
|
|
$deprecate{$type} or return; |
|
762
|
|
|
|
|
|
|
return $deprecate{$type}{$name}; |
|
763
|
|
|
|
|
|
|
} |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
} |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
=end comment |
|
768
|
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
=cut |
|
770
|
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
sub _remainder { |
|
772
|
3620
|
|
|
3620
|
|
6410
|
my ( $self ) = @_; |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
$self->{cursor_curr} > $self->{cursor_limit} |
|
775
|
3620
|
50
|
|
|
|
8728
|
and confess "Programming error - Trying to find past end of string"; |
|
776
|
|
|
|
|
|
|
$self->{find} = substr( |
|
777
|
|
|
|
|
|
|
$self->{content}, |
|
778
|
|
|
|
|
|
|
$self->{cursor_curr}, |
|
779
|
|
|
|
|
|
|
$self->{cursor_limit} - $self->{cursor_curr} |
|
780
|
3620
|
|
|
|
|
9904
|
); |
|
781
|
|
|
|
|
|
|
|
|
782
|
3620
|
|
|
|
|
6159
|
return; |
|
783
|
|
|
|
|
|
|
} |
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
sub _make_final_token { |
|
786
|
10
|
|
|
10
|
|
30
|
my ( $self, $len, $class, $arg ) = @_; |
|
787
|
10
|
|
|
|
|
35
|
my $token = $self->make_token( $len, $class, $arg ); |
|
788
|
10
|
|
|
|
|
43
|
$self->_set_mode( 'kaput' ); |
|
789
|
10
|
|
|
|
|
56
|
return $token; |
|
790
|
|
|
|
|
|
|
} |
|
791
|
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
sub _set_mode { |
|
793
|
1644
|
|
|
1644
|
|
3687
|
my ( $self, $mode ) = @_; |
|
794
|
|
|
|
|
|
|
$self->{trace} |
|
795
|
1644
|
50
|
|
|
|
3664
|
and warn "Tokenizer going from mode $self->{mode} to $mode\n"; |
|
796
|
1644
|
|
|
|
|
3034
|
$self->{mode} = $mode; |
|
797
|
1644
|
100
|
|
|
|
3950
|
if ( 'kaput' eq $mode ) { |
|
798
|
|
|
|
|
|
|
$self->{cursor_curr} = $self->{cursor_limit} = |
|
799
|
534
|
|
|
|
|
1815
|
length $self->{content}; |
|
800
|
|
|
|
|
|
|
} |
|
801
|
1644
|
|
|
|
|
2781
|
return; |
|
802
|
|
|
|
|
|
|
} |
|
803
|
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
sub __init_error { |
|
805
|
10
|
|
|
10
|
|
28
|
my ( $self , $err ) = @_; |
|
806
|
10
|
100
|
|
|
|
30
|
defined $err |
|
807
|
|
|
|
|
|
|
or $err = 'Tokenizer found illegal first characters'; |
|
808
|
|
|
|
|
|
|
return $self->_make_final_token( |
|
809
|
10
|
|
|
|
|
69
|
length $self->{content}, TOKEN_UNKNOWN, { |
|
810
|
|
|
|
|
|
|
error => $err, |
|
811
|
|
|
|
|
|
|
}, |
|
812
|
|
|
|
|
|
|
); |
|
813
|
|
|
|
|
|
|
} |
|
814
|
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
sub _update_location { |
|
816
|
107
|
|
|
107
|
|
220
|
my ( $self, $token ) = @_; |
|
817
|
|
|
|
|
|
|
$token->{location} # Idempotent |
|
818
|
107
|
100
|
|
|
|
281
|
and return; |
|
819
|
105
|
|
66
|
|
|
291
|
my $loc = $self->{_location} ||= do { |
|
820
|
|
|
|
|
|
|
my %loc = ( |
|
821
|
|
|
|
|
|
|
location => $self->{location}, |
|
822
|
12
|
|
|
|
|
52
|
); |
|
823
|
12
|
100
|
|
|
|
65
|
if ( __instance( $self->{source}, 'PPI::Element' ) ) { |
|
824
|
11
|
|
33
|
|
|
121
|
$loc{location} ||= $self->{source}->location(); |
|
825
|
11
|
50
|
|
|
|
2065
|
if ( my $doc = $self->{source}->document() ) { |
|
826
|
11
|
|
|
|
|
400
|
$loc{tab_width} = $doc->tab_width(); |
|
827
|
|
|
|
|
|
|
} |
|
828
|
|
|
|
|
|
|
} |
|
829
|
12
|
|
100
|
|
|
112
|
$loc{tab_width} ||= 1; |
|
830
|
12
|
|
|
|
|
49
|
\%loc; |
|
831
|
|
|
|
|
|
|
}; |
|
832
|
|
|
|
|
|
|
$loc->{location} |
|
833
|
105
|
50
|
|
|
|
275
|
or return; |
|
834
|
105
|
|
|
|
|
167
|
$token->{location} = [ @{ $loc->{location} } ]; |
|
|
105
|
|
|
|
|
347
|
|
|
835
|
105
|
50
|
|
|
|
362
|
if ( defined( my $content = $token->content() ) ) { |
|
836
|
|
|
|
|
|
|
|
|
837
|
105
|
|
|
|
|
196
|
my $lines; |
|
838
|
105
|
|
|
|
|
315
|
pos( $content ) = 0; |
|
839
|
105
|
|
|
|
|
410
|
$lines++ while $content =~ m/ \n /smxgc; |
|
840
|
105
|
100
|
|
|
|
251
|
if ( pos $content ) { |
|
841
|
2
|
|
|
|
|
6
|
$loc->{location}[LOCATION_LINE] += $lines; |
|
842
|
2
|
|
|
|
|
5
|
$loc->{location}[LOCATION_LOGICAL_LINE] += $lines; |
|
843
|
|
|
|
|
|
|
$loc->{location}[LOCATION_CHARACTER] = |
|
844
|
2
|
|
|
|
|
4
|
$loc->{location}[LOCATION_COLUMN] = 1; |
|
845
|
|
|
|
|
|
|
} |
|
846
|
|
|
|
|
|
|
|
|
847
|
105
|
100
|
|
|
|
286
|
if ( my $chars = length( $content ) - pos( $content ) ) { |
|
848
|
102
|
|
|
|
|
224
|
$loc->{location}[LOCATION_CHARACTER] += $chars; |
|
849
|
102
|
100
|
100
|
|
|
347
|
if ( $loc->{tab_width} > 1 && $content =~ m/ \t /smx ) { |
|
850
|
5
|
|
|
|
|
14
|
my $pos = $loc->{location}[LOCATION_COLUMN]; |
|
851
|
5
|
|
|
|
|
8
|
my $tab_width = $loc->{tab_width}; |
|
852
|
|
|
|
|
|
|
# Stolen shamelessly from PPI::Document::_visual_length |
|
853
|
5
|
|
|
|
|
8
|
my ( $vis_inc ); |
|
854
|
5
|
|
|
|
|
23
|
foreach my $part ( split /(\t)/, $content ) { |
|
855
|
10
|
100
|
|
|
|
20
|
if ($part eq "\t") { |
|
856
|
5
|
|
|
|
|
10
|
$vis_inc = $tab_width - ($pos-1) % $tab_width; |
|
857
|
|
|
|
|
|
|
} else { |
|
858
|
5
|
|
|
|
|
8
|
$vis_inc = length $part; |
|
859
|
|
|
|
|
|
|
} |
|
860
|
10
|
|
|
|
|
16
|
$pos += $vis_inc; |
|
861
|
|
|
|
|
|
|
} |
|
862
|
5
|
|
|
|
|
13
|
$loc->{location}[LOCATION_COLUMN] = $pos; |
|
863
|
|
|
|
|
|
|
} else { |
|
864
|
97
|
|
|
|
|
182
|
$loc->{location}[LOCATION_COLUMN] += $chars; |
|
865
|
|
|
|
|
|
|
} |
|
866
|
|
|
|
|
|
|
} |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
} |
|
869
|
105
|
|
|
|
|
228
|
return; |
|
870
|
|
|
|
|
|
|
} |
|
871
|
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
sub __PPIX_TOKENIZER__init { |
|
873
|
534
|
|
|
534
|
|
1326
|
my ( $self ) = @_; |
|
874
|
|
|
|
|
|
|
|
|
875
|
534
|
50
|
|
|
|
3178
|
$self->find_regexp( |
|
876
|
|
|
|
|
|
|
qr{ \A ( \s* ) ( qr | m | s )? ( \s* ) ( . ) }smx ) |
|
877
|
|
|
|
|
|
|
or return $self->__init_error(); |
|
878
|
|
|
|
|
|
|
|
|
879
|
534
|
|
|
|
|
2790
|
my ( $leading_white, $type, $next_white, $delim_start ) = $self->capture(); |
|
880
|
|
|
|
|
|
|
|
|
881
|
534
|
100
|
|
|
|
2067
|
defined $type |
|
882
|
|
|
|
|
|
|
or $type = ''; |
|
883
|
|
|
|
|
|
|
|
|
884
|
534
|
100
|
100
|
|
|
2743
|
$type |
|
885
|
|
|
|
|
|
|
or $delim_start =~ m< \A [/?] \z >smx |
|
886
|
|
|
|
|
|
|
or return $self->__init_error(); |
|
887
|
528
|
100
|
100
|
|
|
2889
|
$type |
|
|
|
|
100
|
|
|
|
|
|
888
|
|
|
|
|
|
|
and not $next_white |
|
889
|
|
|
|
|
|
|
and $delim_start =~ m< \A \w \z >smx |
|
890
|
|
|
|
|
|
|
and return $self->__init_error(); |
|
891
|
|
|
|
|
|
|
|
|
892
|
526
|
|
|
|
|
1522
|
$self->{type} = $type; |
|
893
|
|
|
|
|
|
|
|
|
894
|
526
|
|
|
|
|
1023
|
my @tokens; |
|
895
|
|
|
|
|
|
|
|
|
896
|
526
|
100
|
|
|
|
2036
|
'' ne $leading_white |
|
897
|
|
|
|
|
|
|
and push @tokens, $self->make_token( length $leading_white, |
|
898
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Whitespace' ); |
|
899
|
526
|
|
|
|
|
2221
|
push @tokens, $self->make_token( length $type, |
|
900
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Structure' ); |
|
901
|
526
|
100
|
|
|
|
1726
|
'' ne $next_white |
|
902
|
|
|
|
|
|
|
and push @tokens, $self->make_token( length $next_white, |
|
903
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Whitespace' ); |
|
904
|
|
|
|
|
|
|
|
|
905
|
526
|
|
|
|
|
1386
|
$self->{delimiter_start} = $delim_start; |
|
906
|
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
$self->{trace} |
|
908
|
526
|
50
|
|
|
|
1512
|
and warn "Tokenizer found regexp start delimiter '$delim_start' at $self->{cursor_curr}\n"; |
|
909
|
|
|
|
|
|
|
|
|
910
|
526
|
50
|
|
|
|
1848
|
if ( my $offset = $self->find_matching_delimiter() ) { |
|
911
|
526
|
|
|
|
|
1317
|
my $cursor_limit = $self->{cursor_curr} + $offset; |
|
912
|
|
|
|
|
|
|
$self->{trace} |
|
913
|
526
|
50
|
|
|
|
1649
|
and warn "Tokenizer found regexp end delimiter at $cursor_limit\n"; |
|
914
|
526
|
100
|
|
|
|
1909
|
if ( $self->__number_of_extra_parts() ) { |
|
915
|
|
|
|
|
|
|
### my $found_embedded_comments; |
|
916
|
43
|
100
|
|
|
|
208
|
if ( $self->close_bracket( |
|
917
|
|
|
|
|
|
|
$self->{delimiter_start} ) ) { |
|
918
|
|
|
|
|
|
|
pos $self->{content} = $self->{cursor_curr} + |
|
919
|
7
|
|
|
|
|
52
|
$offset + 1; |
|
920
|
|
|
|
|
|
|
# If we're bracketed, there may be Perl comments between |
|
921
|
|
|
|
|
|
|
# the regex and the replacement. PPI gets the parse |
|
922
|
|
|
|
|
|
|
# wrong as of 1.220, but if we get the handling of the |
|
923
|
|
|
|
|
|
|
# underlying string right, we will Just Work when PPI |
|
924
|
|
|
|
|
|
|
# gets it right. |
|
925
|
7
|
|
|
|
|
64
|
while ( $self->{content} =~ |
|
926
|
|
|
|
|
|
|
m/ \G \s* \n \s* \# [^\n]* /smxgc ) { |
|
927
|
|
|
|
|
|
|
## $found_embedded_comments = 1; |
|
928
|
|
|
|
|
|
|
} |
|
929
|
7
|
|
|
|
|
42
|
$self->{content} =~ m/ \s* /smxgc; |
|
930
|
|
|
|
|
|
|
} else { |
|
931
|
|
|
|
|
|
|
pos $self->{content} = $self->{cursor_curr} + |
|
932
|
36
|
|
|
|
|
225
|
$offset; |
|
933
|
|
|
|
|
|
|
} |
|
934
|
|
|
|
|
|
|
# Localizing cursor_curr and delimiter_start would be |
|
935
|
|
|
|
|
|
|
# cleaner, but I don't want the old values restored if a |
|
936
|
|
|
|
|
|
|
# parse error occurs. |
|
937
|
43
|
|
|
|
|
132
|
my $cursor_curr = $self->{cursor_curr}; |
|
938
|
43
|
|
|
|
|
109
|
my $delimiter_start = $self->{delimiter_start}; |
|
939
|
43
|
|
|
|
|
121
|
$self->{cursor_curr} = pos $self->{content}; |
|
940
|
|
|
|
|
|
|
$self->{delimiter_start} = substr |
|
941
|
|
|
|
|
|
|
$self->{content}, |
|
942
|
|
|
|
|
|
|
$self->{cursor_curr}, |
|
943
|
43
|
|
|
|
|
138
|
1; |
|
944
|
|
|
|
|
|
|
$self->{trace} |
|
945
|
43
|
50
|
|
|
|
131
|
and warn "Tokenizer found replacement start delimiter '$self->{delimiter_start}' at $self->{cursor_curr}\n"; |
|
946
|
43
|
100
|
|
|
|
124
|
if ( my $s_off = $self->find_matching_delimiter() ) { |
|
947
|
|
|
|
|
|
|
$self->{cursor_modifiers} = |
|
948
|
41
|
|
|
|
|
195
|
$self->{cursor_curr} + $s_off + 1; |
|
949
|
|
|
|
|
|
|
$self->{trace} |
|
950
|
41
|
50
|
|
|
|
159
|
and warn "Tokenizer found replacement end delimiter at @{[ |
|
951
|
0
|
|
|
|
|
0
|
$self->{cursor_curr} + $s_off ]}\n"; |
|
952
|
41
|
|
|
|
|
112
|
$self->{cursor_curr} = $cursor_curr; |
|
953
|
41
|
|
|
|
|
117
|
$self->{delimiter_start} = $delimiter_start; |
|
954
|
|
|
|
|
|
|
} else { |
|
955
|
|
|
|
|
|
|
$self->{trace} |
|
956
|
2
|
50
|
|
|
|
6
|
and warn 'Tokenizer failed to find replacement', |
|
957
|
|
|
|
|
|
|
"end delimiter starting at $self->{cursor_curr}\n"; |
|
958
|
2
|
|
|
|
|
5
|
$self->{cursor_curr} = 0; |
|
959
|
|
|
|
|
|
|
# TODO If I were smart enough here I could check for |
|
960
|
|
|
|
|
|
|
# PPI mis-parses like s{foo} |
|
961
|
|
|
|
|
|
|
# #{bar} |
|
962
|
|
|
|
|
|
|
# {baz} |
|
963
|
|
|
|
|
|
|
# here, doing so if $found_embedded_comments (commented |
|
964
|
|
|
|
|
|
|
# out above) is true. The problem is that there seem to |
|
965
|
|
|
|
|
|
|
# as many mis-parses as there are possible delimiters. |
|
966
|
2
|
|
|
|
|
8
|
return $self->__init_error( |
|
967
|
|
|
|
|
|
|
'Tokenizer found mismatched replacement delimiters', |
|
968
|
|
|
|
|
|
|
); |
|
969
|
|
|
|
|
|
|
} |
|
970
|
|
|
|
|
|
|
} else { |
|
971
|
483
|
|
|
|
|
1377
|
$self->{cursor_modifiers} = $cursor_limit + 1; |
|
972
|
|
|
|
|
|
|
} |
|
973
|
524
|
|
|
|
|
1223
|
$self->{cursor_limit} = $cursor_limit; |
|
974
|
|
|
|
|
|
|
} else { |
|
975
|
0
|
|
|
|
|
0
|
$self->{cursor_curr} = 0; |
|
976
|
|
|
|
|
|
|
return $self->_make_final_token( |
|
977
|
0
|
|
|
|
|
0
|
length( $self->{content} ), TOKEN_UNKNOWN, { |
|
978
|
|
|
|
|
|
|
error => 'Tokenizer found mismatched regexp delimiters', |
|
979
|
|
|
|
|
|
|
}, |
|
980
|
|
|
|
|
|
|
); |
|
981
|
|
|
|
|
|
|
} |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
{ |
|
984
|
|
|
|
|
|
|
# We have to instantiate the trailing tokens now so we can |
|
985
|
|
|
|
|
|
|
# figure out what modifiers are in effect. But we can't |
|
986
|
|
|
|
|
|
|
# index their locations (if desired) because they are being |
|
987
|
|
|
|
|
|
|
# instantiated out of order |
|
988
|
|
|
|
|
|
|
|
|
989
|
524
|
|
|
|
|
926
|
local $self->{index_locations} = 0; |
|
|
524
|
|
|
|
|
1573
|
|
|
990
|
|
|
|
|
|
|
|
|
991
|
524
|
|
|
|
|
833
|
my @mods = @{ $self->{default_modifiers} }; |
|
|
524
|
|
|
|
|
1475
|
|
|
992
|
524
|
|
|
|
|
2024
|
pos $self->{content} = $self->{cursor_modifiers}; |
|
993
|
524
|
|
|
|
|
1547
|
local $self->{cursor_curr} = $self->{cursor_modifiers}; |
|
994
|
524
|
|
|
|
|
1451
|
local $self->{cursor_limit} = length $self->{content}; |
|
995
|
524
|
|
|
|
|
1002
|
my @trailing; |
|
996
|
|
|
|
|
|
|
{ |
|
997
|
524
|
|
|
|
|
788
|
my $len = $self->find_regexp( qr{ \A [[:lower:]]* }smx ); |
|
|
524
|
|
|
|
|
2514
|
|
|
998
|
524
|
|
|
|
|
2881
|
push @trailing, $self->make_token( $len, |
|
999
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Modifier' ); |
|
1000
|
|
|
|
|
|
|
} |
|
1001
|
524
|
100
|
|
|
|
2921
|
if ( my $len = $self->find_regexp( qr{ \A \s+ }smx ) ) { |
|
1002
|
1
|
|
|
|
|
14
|
push @trailing, $self->make_token( $len, |
|
1003
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Whitespace' ); |
|
1004
|
|
|
|
|
|
|
} |
|
1005
|
524
|
100
|
|
|
|
2627
|
if ( my $len = $self->find_regexp( qr{ \A .+ }smx ) ) { |
|
1006
|
1
|
|
|
|
|
20
|
push @trailing, $self->make_token( $len, TOKEN_UNKNOWN, { |
|
1007
|
|
|
|
|
|
|
error => 'Trailing characters after expression', |
|
1008
|
|
|
|
|
|
|
} ); |
|
1009
|
|
|
|
|
|
|
} |
|
1010
|
524
|
|
|
|
|
1778
|
$self->{trailing_tokens} = \@trailing; |
|
1011
|
524
|
|
|
|
|
2111
|
push @mods, $trailing[0]->content(); |
|
1012
|
|
|
|
|
|
|
$self->{effective_modifiers} = |
|
1013
|
524
|
|
|
|
|
1631
|
PPIx::Regexp::Token::Modifier::__aggregate_modifiers ( |
|
1014
|
|
|
|
|
|
|
@mods ); |
|
1015
|
|
|
|
|
|
|
$self->{modifiers} = [ |
|
1016
|
524
|
|
|
|
|
1284
|
{ %{ $self->{effective_modifiers} } }, |
|
|
524
|
|
|
|
|
3331
|
|
|
1017
|
|
|
|
|
|
|
]; |
|
1018
|
|
|
|
|
|
|
} |
|
1019
|
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
$self->{delimiter_finish} = substr |
|
1021
|
|
|
|
|
|
|
$self->{content}, |
|
1022
|
|
|
|
|
|
|
$self->{cursor_limit}, |
|
1023
|
524
|
|
|
|
|
2139
|
1; |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
524
|
|
|
|
|
1579
|
push @tokens, $self->make_token( 1, |
|
1026
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Delimiter' ); |
|
1027
|
|
|
|
|
|
|
|
|
1028
|
524
|
|
|
|
|
2904
|
$self->_set_mode( 'regexp' ); |
|
1029
|
|
|
|
|
|
|
|
|
1030
|
524
|
|
|
|
|
993
|
$self->{find} = undef; |
|
1031
|
|
|
|
|
|
|
|
|
1032
|
524
|
|
|
|
|
2723
|
return @tokens; |
|
1033
|
|
|
|
|
|
|
} |
|
1034
|
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
# Match the initial part of the regexp including any leading white |
|
1036
|
|
|
|
|
|
|
# space. The initial delimiter is the first thing not consumed, though |
|
1037
|
|
|
|
|
|
|
# we check it for sanity. |
|
1038
|
|
|
|
|
|
|
sub __initial_match { |
|
1039
|
0
|
|
|
0
|
|
0
|
my ( $self ) = @_; |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
0
|
0
|
|
|
|
0
|
$self->find_regexp( |
|
1042
|
|
|
|
|
|
|
qr{ \A ( \s* ) ( qr | m | s )? ( \s* ) (?: [^\w\s] ) }smx ) |
|
1043
|
|
|
|
|
|
|
or return; |
|
1044
|
|
|
|
|
|
|
|
|
1045
|
0
|
|
|
|
|
0
|
my ( $leading_white, $type, $next_white ) = $self->capture(); |
|
1046
|
|
|
|
|
|
|
|
|
1047
|
0
|
0
|
|
|
|
0
|
defined $type |
|
1048
|
|
|
|
|
|
|
or $type = ''; |
|
1049
|
|
|
|
|
|
|
|
|
1050
|
0
|
|
|
|
|
0
|
$self->{type} = $type; |
|
1051
|
|
|
|
|
|
|
|
|
1052
|
0
|
|
|
|
|
0
|
my @tokens; |
|
1053
|
|
|
|
|
|
|
|
|
1054
|
0
|
0
|
|
|
|
0
|
'' ne $leading_white |
|
1055
|
|
|
|
|
|
|
and push @tokens, $self->make_token( length $leading_white, |
|
1056
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Whitespace' ); |
|
1057
|
0
|
|
|
|
|
0
|
push @tokens, $self->make_token( length $type, |
|
1058
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Structure' ); |
|
1059
|
0
|
0
|
|
|
|
0
|
'' ne $next_white |
|
1060
|
|
|
|
|
|
|
and push @tokens, $self->make_token( length $next_white, |
|
1061
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Whitespace' ); |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
0
|
|
|
|
|
0
|
return @tokens; |
|
1064
|
|
|
|
|
|
|
} |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
{ |
|
1067
|
|
|
|
|
|
|
my %extra_parts = ( |
|
1068
|
|
|
|
|
|
|
s => 1, |
|
1069
|
|
|
|
|
|
|
); |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
# Return the number of extra delimited parts. This will be 0 except |
|
1072
|
|
|
|
|
|
|
# for s///, which will be 1. |
|
1073
|
|
|
|
|
|
|
sub __number_of_extra_parts { |
|
1074
|
850
|
|
|
850
|
|
1767
|
my ( $self ) = @_; |
|
1075
|
850
|
|
100
|
|
|
4110
|
return $extra_parts{$self->{type}} || 0; |
|
1076
|
|
|
|
|
|
|
} |
|
1077
|
|
|
|
|
|
|
} |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
{ |
|
1080
|
|
|
|
|
|
|
my @part_class = qw{ |
|
1081
|
|
|
|
|
|
|
PPIx::Regexp::Structure::Regexp |
|
1082
|
|
|
|
|
|
|
PPIx::Regexp::Structure::Replacement |
|
1083
|
|
|
|
|
|
|
}; |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
# Return the classes for the parts of the expression. |
|
1086
|
|
|
|
|
|
|
sub __part_classes { |
|
1087
|
324
|
|
|
324
|
|
912
|
my ( $self ) = @_; |
|
1088
|
324
|
|
|
|
|
906
|
my $max = $self->__number_of_extra_parts(); |
|
1089
|
324
|
|
|
|
|
2298
|
return @part_class[ 0 .. $max ]; |
|
1090
|
|
|
|
|
|
|
} |
|
1091
|
|
|
|
|
|
|
} |
|
1092
|
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
sub __PPIX_TOKENIZER__regexp { |
|
1094
|
3025
|
|
|
3025
|
|
6757
|
my ( $self, $character ) = @_; |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
3025
|
|
|
|
|
5303
|
my $mode = $self->{mode}; |
|
1097
|
3025
|
|
|
|
|
5913
|
my $handler = '__PPIX_TOKENIZER__' . $mode; |
|
1098
|
|
|
|
|
|
|
|
|
1099
|
3025
|
|
|
|
|
5277
|
$self->{cursor_orig} = $self->{cursor_curr}; |
|
1100
|
3025
|
|
|
|
|
6954
|
foreach my $class ( $self->_known_tokenizers() ) { |
|
1101
|
13467
|
|
|
|
|
51955
|
my @tokens = grep { $_ } $class->$handler( $self, $character ); |
|
|
3856
|
|
|
|
|
9960
|
|
|
1102
|
|
|
|
|
|
|
$self->{trace} |
|
1103
|
13467
|
50
|
|
|
|
27435
|
and warn $class, "->$handler( \$self, '$character' )", |
|
1104
|
|
|
|
|
|
|
" => (@tokens)\n"; |
|
1105
|
|
|
|
|
|
|
@tokens |
|
1106
|
|
|
|
|
|
|
and return ( map { |
|
1107
|
13467
|
100
|
|
|
|
30020
|
ref $_ ? $_ : $self->make_token( $_, |
|
|
3022
|
100
|
|
|
|
11348
|
|
|
1108
|
|
|
|
|
|
|
$class ) } @tokens ); |
|
1109
|
|
|
|
|
|
|
} |
|
1110
|
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
# Find a fallback processor for the character. |
|
1112
|
27
|
|
33
|
|
|
293
|
my $fallback = __PACKAGE__->can( '__PPIX_TOKEN_FALLBACK__' . $mode ) |
|
1113
|
|
|
|
|
|
|
|| __PACKAGE__->can( '__PPIX_TOKEN_FALLBACK__regexp' ) |
|
1114
|
|
|
|
|
|
|
|| confess "Programming error - unable to find fallback for $mode"; |
|
1115
|
27
|
|
|
|
|
144
|
return $fallback->( $self, $character ); |
|
1116
|
|
|
|
|
|
|
} |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
*__PPIX_TOKENIZER__repl = \&__PPIX_TOKENIZER__regexp; |
|
1119
|
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
sub __PPIX_TOKEN_FALLBACK__regexp { |
|
1121
|
18
|
|
|
18
|
|
71
|
my ( $self, $character ) = @_; |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
# As a fallback in regexp mode, any escaped character is a literal. |
|
1124
|
18
|
100
|
66
|
|
|
70
|
if ( $character eq '\\' |
|
1125
|
|
|
|
|
|
|
&& $self->{cursor_limit} - $self->{cursor_curr} > 1 |
|
1126
|
|
|
|
|
|
|
) { |
|
1127
|
2
|
|
|
|
|
7
|
return $self->make_token( 2, TOKEN_LITERAL ); |
|
1128
|
|
|
|
|
|
|
} |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# Any normal character is unknown. |
|
1131
|
16
|
|
|
|
|
95
|
return $self->make_token( 1, TOKEN_UNKNOWN, { |
|
1132
|
|
|
|
|
|
|
error => 'Tokenizer found unexpected literal', |
|
1133
|
|
|
|
|
|
|
}, |
|
1134
|
|
|
|
|
|
|
); |
|
1135
|
|
|
|
|
|
|
} |
|
1136
|
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
sub __PPIX_TOKEN_FALLBACK__repl { |
|
1138
|
9
|
|
|
9
|
|
33
|
my ( $self, $character ) = @_; |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
# As a fallback in replacement mode, any escaped character is a literal. |
|
1141
|
9
|
100
|
66
|
|
|
40
|
if ( $character eq '\\' |
|
1142
|
|
|
|
|
|
|
&& defined ( my $next = $self->peek( 1 ) ) ) { |
|
1143
|
|
|
|
|
|
|
|
|
1144
|
5
|
0
|
33
|
|
|
19
|
if ( $self->interpolates() || $next eq q<'> || $next eq '\\' ) { |
|
|
|
|
33
|
|
|
|
|
|
1145
|
5
|
|
|
|
|
20
|
return $self->make_token( 2, TOKEN_LITERAL ); |
|
1146
|
|
|
|
|
|
|
} |
|
1147
|
0
|
|
|
|
|
0
|
return $self->make_token( 1, TOKEN_LITERAL ); |
|
1148
|
|
|
|
|
|
|
} |
|
1149
|
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
# So is any normal character. |
|
1151
|
4
|
|
|
|
|
33
|
return $self->make_token( 1, TOKEN_LITERAL ); |
|
1152
|
|
|
|
|
|
|
} |
|
1153
|
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
sub __PPIX_TOKENIZER__finish { |
|
1155
|
555
|
|
|
555
|
|
1827
|
my ( $self ) = @_; # $character unused |
|
1156
|
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
$self->{cursor_limit} > length $self->{content} |
|
1158
|
555
|
50
|
|
|
|
2480
|
and confess "Programming error - ran off string"; |
|
1159
|
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
my @tokens = $self->make_token( length $self->{delimiter_finish}, |
|
1161
|
555
|
|
|
|
|
2123
|
'PPIx::Regexp::Token::Delimiter' ); |
|
1162
|
|
|
|
|
|
|
|
|
1163
|
555
|
100
|
|
|
|
2860
|
if ( $self->{cursor_curr} == $self->{cursor_modifiers} ) { |
|
1164
|
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
# We are out of string. Add the trailing tokens (created when we |
|
1166
|
|
|
|
|
|
|
# did the initial bracket scan) and close up shop. |
|
1167
|
|
|
|
|
|
|
|
|
1168
|
514
|
|
|
|
|
1796
|
push @tokens, $self->_get_trailing_tokens(); |
|
1169
|
|
|
|
|
|
|
|
|
1170
|
514
|
|
|
|
|
1448
|
$self->_set_mode( 'kaput' ); |
|
1171
|
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
} else { |
|
1173
|
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
# Clear the cookies, because we are going around again. |
|
1175
|
41
|
|
|
|
|
219
|
$self->{cookie} = {}; |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
# Move the cursor limit to just before the modifiers. |
|
1178
|
41
|
|
|
|
|
144
|
$self->{cursor_limit} = $self->{cursor_modifiers} - 1; |
|
1179
|
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
# If the preceding regular expression was bracketed, we need to |
|
1181
|
|
|
|
|
|
|
# consume possible whitespace and find another delimiter. |
|
1182
|
|
|
|
|
|
|
|
|
1183
|
41
|
100
|
|
|
|
220
|
if ( $self->close_bracket( $self->{delimiter_start} ) ) { |
|
1184
|
7
|
|
|
|
|
37
|
my $accept; |
|
1185
|
|
|
|
|
|
|
# If we are bracketed, there can be honest-to-God Perl |
|
1186
|
|
|
|
|
|
|
# comments between the regexp and the replacement, not just |
|
1187
|
|
|
|
|
|
|
# regexp comments. As of version 1.220, PPI does not get |
|
1188
|
|
|
|
|
|
|
# this parse right, but if we can handle this is a string, |
|
1189
|
|
|
|
|
|
|
# then we will Just Work when PPI gets itself straight. |
|
1190
|
7
|
|
|
|
|
63
|
while ( $self->find_regexp( |
|
1191
|
|
|
|
|
|
|
qr{ \A ( \s* \n \s* ) ( \# [^\n]* \n ) }smx ) ) { |
|
1192
|
2
|
|
|
|
|
9
|
my ( $white_space, $comment ) = $self->capture(); |
|
1193
|
2
|
|
|
|
|
11
|
push @tokens, $self->make_token( |
|
1194
|
|
|
|
|
|
|
length $white_space, |
|
1195
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Whitespace', |
|
1196
|
|
|
|
|
|
|
), $self->make_token( |
|
1197
|
|
|
|
|
|
|
length $comment, |
|
1198
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Comment', |
|
1199
|
|
|
|
|
|
|
); |
|
1200
|
|
|
|
|
|
|
} |
|
1201
|
7
|
100
|
|
|
|
71
|
$accept = $self->find_regexp( qr{ \A \s+ }smx ) |
|
1202
|
|
|
|
|
|
|
and push @tokens, $self->make_token( |
|
1203
|
|
|
|
|
|
|
$accept, 'PPIx::Regexp::Token::Whitespace' ); |
|
1204
|
7
|
|
|
|
|
56
|
my $character = $self->peek(); |
|
1205
|
7
|
|
|
|
|
34
|
$self->{delimiter_start} = $character; |
|
1206
|
7
|
|
|
|
|
43
|
push @tokens, $self->make_token( |
|
1207
|
|
|
|
|
|
|
1, 'PPIx::Regexp::Token::Delimiter' ); |
|
1208
|
|
|
|
|
|
|
$self->{delimiter_finish} = substr |
|
1209
|
|
|
|
|
|
|
$self->{content}, |
|
1210
|
7
|
|
|
|
|
72
|
$self->{cursor_limit} - 1, |
|
1211
|
|
|
|
|
|
|
1; |
|
1212
|
|
|
|
|
|
|
} |
|
1213
|
|
|
|
|
|
|
|
|
1214
|
41
|
100
|
|
|
|
172
|
if ( $self->modifier( 'e*' ) ) { |
|
1215
|
|
|
|
|
|
|
# With /e or /ee, the replacement portion is code. We make |
|
1216
|
|
|
|
|
|
|
# it all into one big PPIx::Regexp::Token::Code, slap on the |
|
1217
|
|
|
|
|
|
|
# trailing delimiter and modifiers, and return it all. |
|
1218
|
|
|
|
|
|
|
push @tokens, $self->make_token( |
|
1219
|
|
|
|
|
|
|
$self->{cursor_limit} - $self->{cursor_curr}, |
|
1220
|
10
|
|
|
|
|
97
|
'PPIx::Regexp::Token::Code', |
|
1221
|
|
|
|
|
|
|
{ perl_version_introduced => MINIMUM_PERL }, |
|
1222
|
|
|
|
|
|
|
); |
|
1223
|
10
|
|
|
|
|
57
|
$self->{cursor_limit} = length $self->{content}; |
|
1224
|
10
|
|
|
|
|
41
|
push @tokens, $self->make_token( 1, |
|
1225
|
|
|
|
|
|
|
'PPIx::Regexp::Token::Delimiter' ), |
|
1226
|
|
|
|
|
|
|
$self->_get_trailing_tokens(); |
|
1227
|
10
|
|
|
|
|
50
|
$self->_set_mode( 'kaput' ); |
|
1228
|
|
|
|
|
|
|
} else { |
|
1229
|
|
|
|
|
|
|
# Put our mode to replacement. |
|
1230
|
31
|
|
|
|
|
216
|
$self->_set_mode( 'repl' ); |
|
1231
|
|
|
|
|
|
|
} |
|
1232
|
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
} |
|
1234
|
|
|
|
|
|
|
|
|
1235
|
555
|
|
|
|
|
2021
|
return @tokens; |
|
1236
|
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
} |
|
1238
|
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
# To common processing on trailing tokens. |
|
1240
|
|
|
|
|
|
|
sub _get_trailing_tokens { |
|
1241
|
524
|
|
|
524
|
|
1436
|
my ( $self ) = @_; |
|
1242
|
524
|
100
|
|
|
|
1482
|
if ( $self->{index_locations} ) { |
|
1243
|
|
|
|
|
|
|
# We turned off index_locations when these were created, because |
|
1244
|
|
|
|
|
|
|
# they were done out of order. Fix that now. |
|
1245
|
11
|
|
|
|
|
28
|
foreach my $token ( @{ $self->{trailing_tokens} } ) { |
|
|
11
|
|
|
|
|
43
|
|
|
1246
|
11
|
|
|
|
|
31
|
$self->_update_location( $token ); |
|
1247
|
|
|
|
|
|
|
} |
|
1248
|
|
|
|
|
|
|
} |
|
1249
|
524
|
|
|
|
|
919
|
return @{ delete $self->{trailing_tokens} }; |
|
|
524
|
|
|
|
|
1892
|
|
|
1250
|
|
|
|
|
|
|
} |
|
1251
|
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
1; |
|
1253
|
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
__END__ |