| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Perl::Tokenizer; |
|
2
|
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
268353
|
use utf8; |
|
|
5
|
|
|
|
|
90
|
|
|
|
5
|
|
|
|
|
22
|
|
|
4
|
5
|
|
|
5
|
|
171
|
use 5.018; |
|
|
5
|
|
|
|
|
12
|
|
|
5
|
5
|
|
|
5
|
|
20
|
use strict; |
|
|
5
|
|
|
|
|
8
|
|
|
|
5
|
|
|
|
|
77
|
|
|
6
|
5
|
|
|
5
|
|
19
|
use warnings; |
|
|
5
|
|
|
|
|
5
|
|
|
|
5
|
|
|
|
|
22073
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
require Exporter; |
|
9
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
10
|
|
|
|
|
|
|
our @EXPORT = qw(perl_tokens); |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.09'; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=encoding utf8 |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Perl::Tokenizer - A tiny Perl code tokenizer. |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 VERSION |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
Version 0.09 |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $make_esc_delim = sub { |
|
27
|
|
|
|
|
|
|
if ($_[0] eq '\\') { |
|
28
|
|
|
|
|
|
|
return qr{\\(.*?)\\}s; |
|
29
|
|
|
|
|
|
|
} |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
my $delim = quotemeta shift; |
|
32
|
|
|
|
|
|
|
qr{$delim([^$delim\\]*+(?>\\.|[^$delim\\]+)*+)$delim}s; |
|
33
|
|
|
|
|
|
|
}; |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my $make_end_delim = sub { |
|
36
|
|
|
|
|
|
|
if ($_[0] eq '\\') { |
|
37
|
|
|
|
|
|
|
return qr{.*?\\}s; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my $delim = quotemeta shift; |
|
41
|
|
|
|
|
|
|
qr{[^$delim\\]*+(?>\\.|[^$delim\\]+)*+$delim}s; |
|
42
|
|
|
|
|
|
|
}; |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my %bdelims; |
|
45
|
|
|
|
|
|
|
foreach my $d ([qw~< >~], [qw~( )~], [qw~{ }~], [qw~[ ]~]) { |
|
46
|
|
|
|
|
|
|
my @ed = map { quotemeta } @{$d}; |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
$bdelims{$d->[0]} = qr{ |
|
49
|
|
|
|
|
|
|
$ed[0] |
|
50
|
|
|
|
|
|
|
(?> |
|
51
|
|
|
|
|
|
|
[^$ed[0]$ed[1]\\]+ |
|
52
|
|
|
|
|
|
|
| |
|
53
|
|
|
|
|
|
|
\\. |
|
54
|
|
|
|
|
|
|
| |
|
55
|
|
|
|
|
|
|
(??{$bdelims{$d->[0]}}) |
|
56
|
|
|
|
|
|
|
)* |
|
57
|
|
|
|
|
|
|
$ed[1] |
|
58
|
|
|
|
|
|
|
}xs; |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# string - single quote |
|
62
|
|
|
|
|
|
|
my $str_sq = $make_esc_delim->(q{'}); |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# string - double quote |
|
65
|
|
|
|
|
|
|
my $str_dq = $make_esc_delim->(q{"}); |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# backtick - backquote |
|
68
|
|
|
|
|
|
|
my $str_bq = $make_esc_delim->(q{`}); |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# regex - // |
|
71
|
|
|
|
|
|
|
my $match_re = $make_esc_delim->(q{/}); |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# glob/readline |
|
74
|
|
|
|
|
|
|
my $glob = $bdelims{'<'}; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# Cache regular expressions that are generated dynamically |
|
77
|
|
|
|
|
|
|
my %cache_esc; |
|
78
|
|
|
|
|
|
|
my %cache_end; |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# Double pairs |
|
81
|
|
|
|
|
|
|
my $dpairs = qr{ |
|
82
|
|
|
|
|
|
|
(?= |
|
83
|
|
|
|
|
|
|
(?(?<=\s) |
|
84
|
|
|
|
|
|
|
(.) |
|
85
|
|
|
|
|
|
|
| |
|
86
|
|
|
|
|
|
|
(\W) |
|
87
|
|
|
|
|
|
|
) |
|
88
|
|
|
|
|
|
|
) |
|
89
|
|
|
|
|
|
|
(??{$bdelims{$+} // ($cache_esc{$+} //= $make_esc_delim->($+))}) |
|
90
|
|
|
|
|
|
|
}x; |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# Double pairs -- comments |
|
93
|
|
|
|
|
|
|
my $dcomm = qr{ |
|
94
|
|
|
|
|
|
|
\s* (?>(?<=\s)\# (?-s:.*) \s*)* |
|
95
|
|
|
|
|
|
|
}x; |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Quote-like balanced (q{}, m//) |
|
98
|
|
|
|
|
|
|
my $make_single_q_balanced = sub { |
|
99
|
|
|
|
|
|
|
my $name = shift; |
|
100
|
|
|
|
|
|
|
qr{ |
|
101
|
|
|
|
|
|
|
$name |
|
102
|
|
|
|
|
|
|
$dcomm |
|
103
|
|
|
|
|
|
|
$dpairs |
|
104
|
|
|
|
|
|
|
}x; |
|
105
|
|
|
|
|
|
|
}; |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# Quote-like balanced (q{}, m//) |
|
108
|
|
|
|
|
|
|
my %single_q; |
|
109
|
|
|
|
|
|
|
foreach my $name (qw(q qq qr qw qx m)) { |
|
110
|
|
|
|
|
|
|
$single_q{$name} = $make_single_q_balanced->($name); |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# First of balanced pairs |
|
114
|
|
|
|
|
|
|
my $bbpair = qr~[<\[\{\(]~; |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
my $make_double_q_balanced = sub { |
|
117
|
|
|
|
|
|
|
my $name = shift; |
|
118
|
|
|
|
|
|
|
qr{ |
|
119
|
|
|
|
|
|
|
$name |
|
120
|
|
|
|
|
|
|
$dcomm |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
(?(?=$bbpair) # balanced pairs (e.g.: s{}//) |
|
123
|
|
|
|
|
|
|
$dpairs |
|
124
|
|
|
|
|
|
|
$dcomm |
|
125
|
|
|
|
|
|
|
$dpairs |
|
126
|
|
|
|
|
|
|
| # or: single delims (e.g.: s///) |
|
127
|
|
|
|
|
|
|
$dpairs |
|
128
|
|
|
|
|
|
|
(??{$cache_end{$+} //= $make_end_delim->($+)}) |
|
129
|
|
|
|
|
|
|
) |
|
130
|
|
|
|
|
|
|
}x; |
|
131
|
|
|
|
|
|
|
}; |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Double quote-like balanced (s{}{}, s///) |
|
134
|
|
|
|
|
|
|
my %double_q; |
|
135
|
|
|
|
|
|
|
foreach my $name (qw(tr s y)) { |
|
136
|
|
|
|
|
|
|
$double_q{$name} = $make_double_q_balanced->($name); |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
my $number = qr{(?=\.?[0-9])[0-9_]*(?:\.(?!\.)[0-9_]*)?(?:[Ee](?:[+-]?[0-9_]+))?}; |
|
140
|
|
|
|
|
|
|
my $hex_num = qr{0x[_0-9A-Fa-f]*}; |
|
141
|
|
|
|
|
|
|
my $binary_num = qr{0b[_01]*}; |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
my $var_name = qr{(?>\w+|(?>::)+|'(?=\w))++}; |
|
144
|
|
|
|
|
|
|
my $vstring = qr{\b(?:v[0-9]+(?>\.[0-9][0-9_]*+)*+ | [0-9][0-9_]*(?>\.[0-9][0-9_]*){2,})\b}x; |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# HERE-DOC beginning |
|
147
|
|
|
|
|
|
|
my $bhdoc = qr{ |
|
148
|
|
|
|
|
|
|
<<(?>\h*(?>$str_sq|$str_dq)|\\?+(\w+)) |
|
149
|
|
|
|
|
|
|
}x; |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
my $tr_flags = qr{[rcds]*}; |
|
152
|
|
|
|
|
|
|
my $match_flags = qr{[mnsixpogcdual]*}; |
|
153
|
|
|
|
|
|
|
my $substitution_flags = qr{[mnsixpogcerdual]*}; |
|
154
|
|
|
|
|
|
|
my $compiled_regex_flags = qr{[mnsixpodual]*}; |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
my @postfix_operators = qw( ++ -- ); |
|
157
|
|
|
|
|
|
|
my @prec_operators = qw ( ... .. -> ++ -- =~ <=> \\ ? ~~ ~. ~ : >> >= > << <= < == != ! ); |
|
158
|
|
|
|
|
|
|
my @assignment_operators = qw( && || // ** % ^. ^ &. & |. | * + - = / . << >> ); |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
my $operators = do { |
|
161
|
|
|
|
|
|
|
local $" = '|'; |
|
162
|
|
|
|
|
|
|
qr{@{[map{quotemeta} @prec_operators, @assignment_operators]}}; |
|
163
|
|
|
|
|
|
|
}; |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
my $postfix_operators = do { |
|
166
|
|
|
|
|
|
|
local $" = '|'; |
|
167
|
|
|
|
|
|
|
qr{@{[map{quotemeta} @postfix_operators]}}; |
|
168
|
|
|
|
|
|
|
}; |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
my $assignment_operators = do { |
|
171
|
|
|
|
|
|
|
local $" = '|'; |
|
172
|
|
|
|
|
|
|
qr{@{[map{($_ eq '=') ? '=(?!=)' : "\Q$_=\E"} @assignment_operators]}}; |
|
173
|
|
|
|
|
|
|
}; |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
my @special_var_names = (qw( \\ | + / ~ ! @ $ % ^ & * ( ) } < > : ; " ` ' ? = - [ ] . ), '#', ','); |
|
176
|
|
|
|
|
|
|
my $special_var_names = do { |
|
177
|
|
|
|
|
|
|
local $" = '|'; |
|
178
|
|
|
|
|
|
|
qr{@{[map {quotemeta} @special_var_names]}}; |
|
179
|
|
|
|
|
|
|
}; |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
my $bracket_var = qr~(?=\s*\{)(?!\s*\{\s*(?:\^?$var_name|$special_var_names|\{)\s*\})~; |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
#<<< |
|
184
|
|
|
|
|
|
|
my $perl_keywords = |
|
185
|
|
|
|
|
|
|
qr/(?:CORE::)?(?>(a(?:(?:ccept|larm|tan2|bs|nd))|b(?:in(?:(?:mode|d))|less|reak)|c(?:aller|h(?:dir|mod |
|
186
|
|
|
|
|
|
|
|o(?:(?:m?p|wn))|r(?:oot)?)|lose(?:dir)?|mp|o(?:n(?:(?:tinue|nect))|s)|rypt)|d(?:bm(?:(?:close|open))| |
|
187
|
|
|
|
|
|
|
e(?:f(?:(?:ault|ined))|lete)|ie|ump|o)|e(?:ach|ls(?:(?:if|e))|nd(?:grent|hostent|netent|p(?:(?:roto|w) |
|
188
|
|
|
|
|
|
|
ent)|servent)|of|val|x(?:ec|i(?:(?:sts|t))|p)|q)|f(?:c(?:ntl)?|ileno|lock|or(?:(?:each|m(?:(?:line|at) |
|
189
|
|
|
|
|
|
|
)|k))?)|g(?:e(?:t(?:gr(?:(?:ent|gid|nam))|host(?:by(?:(?:addr|name))|ent)|login|net(?:by(?:(?:addr|nam |
|
190
|
|
|
|
|
|
|
e))|ent)|p(?:eername|grp|pid|r(?:iority|oto(?:byn(?:(?:umber|ame))|ent))|w(?:(?:ent|nam|uid)))|s(?:erv |
|
191
|
|
|
|
|
|
|
(?:by(?:(?:name|port))|ent)|ock(?:(?:name|opt)))|c))?|iven|lob|mtime|oto|rep|t)|hex|i(?:mport|n(?:(?:d |
|
192
|
|
|
|
|
|
|
ex|t))|octl|sa|f)|join|k(?:(?:eys|ill))|l(?:ast|c(?:first)?|e(?:ngth)?|i(?:(?:sten|nk))|o(?:c(?:al(?:t |
|
193
|
|
|
|
|
|
|
ime)?|k)|g)|stat|t)|m(?:ap|kdir|sg(?:(?:ctl|get|rcv|snd))|y)|n(?:e(?:xt)?|ot?)|o(?:ct|pen(?:dir)?|rd?| |
|
194
|
|
|
|
|
|
|
ur)|p(?:ack(?:age)?|ipe|o[ps]|r(?:(?:ototype|intf?))|ush)|quotemeta|r(?:and|e(?:ad(?:(?:(?:lin[ek]|pip |
|
195
|
|
|
|
|
|
|
e|dir)))?|cv|do|name|quire|set|turn|verse|winddir|f)|index|mdir)|s(?:ay|calar|e(?:ek(?:dir)?|lect|m(?: |
|
196
|
|
|
|
|
|
|
(?:ctl|get|op))|nd|t(?:grent|hostent|netent|p(?:grp|r(?:(?:iority|otoent))|went)|s(?:(?:erven|ockop)t) |
|
197
|
|
|
|
|
|
|
))|h(?:ift|m(?:(?:write|read|ctl|get))|utdown)|in|leep|o(?:cket(?:pair)?|rt)|p(?:li(?:(?:ce|t))|rintf) |
|
198
|
|
|
|
|
|
|
|qrt|rand|t(?:(?:ate?|udy))|ub(?:str)?|y(?:mlink|s(?:(?:write|call|open|read|seek|tem))))|t(?:ell(?:di |
|
199
|
|
|
|
|
|
|
r)?|i(?:(?:mes?|ed?))|runcate)|u(?:c(?:first)?|mask|n(?:def|l(?:(?:ess|ink))|pack|shift|ti[el])|se|tim |
|
200
|
|
|
|
|
|
|
e)|v(?:(?:alues|ec))|w(?:a(?:it(?:pid)?|ntarray|rn)|h(?:ile|e(?:n|re(?:so|is)))|rite)|xor|BEGIN|END|IN |
|
201
|
|
|
|
|
|
|
IT|CHECK))\b |
|
202
|
|
|
|
|
|
|
/x; |
|
203
|
|
|
|
|
|
|
#>>> |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
my $perl_filetests = qr/\-[ABCMORSTWXbcdefgkloprstuwxz]/; |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub perl_tokens(&$) { |
|
208
|
66
|
|
|
66
|
1
|
19073
|
my ($callback, $code) = @_; |
|
209
|
|
|
|
|
|
|
|
|
210
|
66
|
50
|
|
|
|
179
|
ref($callback) eq 'CODE' |
|
211
|
|
|
|
|
|
|
or die "usage: perl_tokens {...} \$code;"; |
|
212
|
|
|
|
|
|
|
|
|
213
|
66
|
|
|
|
|
90
|
my $variable = 0; |
|
214
|
66
|
|
|
|
|
80
|
my $flat = 0; |
|
215
|
66
|
|
|
|
|
78
|
my $regex = 1; |
|
216
|
66
|
|
|
|
|
78
|
my $canpod = 1; |
|
217
|
66
|
|
|
|
|
76
|
my $proto = 0; |
|
218
|
66
|
|
|
|
|
78
|
my $format = 0; |
|
219
|
66
|
|
|
|
|
76
|
my $expect_format = 0; |
|
220
|
66
|
|
|
|
|
81
|
my $postfix_op = 0; |
|
221
|
66
|
|
|
|
|
84
|
my @heredoc_eofs; |
|
222
|
|
|
|
|
|
|
|
|
223
|
66
|
|
|
|
|
117
|
$code = "$code"; |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
{ |
|
226
|
66
|
100
|
66
|
|
|
86
|
if ($expect_format == 1 and $code =~ /\G(?=\R)/) { |
|
|
351
|
|
|
|
|
889
|
|
|
227
|
2
|
50
|
|
|
|
12
|
if ($code =~ /.*?\R\.\h*(?=\R|\z)/cgs) { |
|
228
|
2
|
|
|
|
|
10
|
$callback->('vertical_space', $-[0], $-[0] + 1); |
|
229
|
2
|
|
|
|
|
854
|
$callback->('format', $-[0] + 1, $+[0]); |
|
230
|
2
|
|
|
|
|
859
|
$expect_format = 0; |
|
231
|
2
|
|
|
|
|
4
|
$canpod = 1; |
|
232
|
2
|
|
|
|
|
2
|
$regex = 1; |
|
233
|
2
|
|
|
|
|
3
|
$postfix_op = 0; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
else { |
|
236
|
0
|
0
|
|
|
|
0
|
if ($code =~ /\G(.)/cgs) { |
|
237
|
0
|
|
|
|
|
0
|
$callback->('unknown_char', $-[0], $+[0]); |
|
238
|
0
|
|
|
|
|
0
|
redo; |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
} |
|
241
|
2
|
|
|
|
|
4
|
redo; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
349
|
100
|
100
|
|
|
843
|
if ($#heredoc_eofs >= 0 and $code =~ /\G(?=\R)/) { |
|
245
|
3
|
|
|
|
|
10
|
my $token = shift @heredoc_eofs; |
|
246
|
3
|
50
|
|
|
|
90
|
if ($code =~ m{\G.*?\R\Q$token\E(?=\R|\z)}sgc) { |
|
247
|
3
|
|
|
|
|
48
|
$callback->('vertical_space', $-[0], $-[0] + 1); |
|
248
|
3
|
|
|
|
|
2123
|
$callback->('heredoc', $-[0] + 1, $+[0]); |
|
249
|
|
|
|
|
|
|
} |
|
250
|
3
|
|
|
|
|
2136
|
redo; |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
|
|
253
|
346
|
100
|
100
|
|
|
3031
|
if (($regex == 1 or $code =~ /\G(?!<<[0-9])/) and $code =~ m{\G$bhdoc}gc) { |
|
|
|
|
100
|
|
|
|
|
|
254
|
3
|
|
|
|
|
19
|
$callback->('heredoc_beg', $-[0], $+[0]); |
|
255
|
3
|
|
|
|
|
2056
|
push @heredoc_eofs, $+; |
|
256
|
3
|
|
|
|
|
7
|
$regex = 0; |
|
257
|
3
|
|
|
|
|
8
|
$canpod = 0; |
|
258
|
3
|
|
|
|
|
9
|
redo; |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
343
|
50
|
66
|
|
|
888
|
if ($canpod == 1 and $code =~ /\G^=[a-zA-Z]/cgm) { |
|
262
|
0
|
0
|
|
|
|
0
|
$code =~ /\G.*?\R=cut\h*(?=\R|\z)/cgs |
|
263
|
|
|
|
|
|
|
or $code =~ /\G.*\z/cgs; |
|
264
|
0
|
|
|
|
|
0
|
$callback->('pod', $-[0] - 2, $+[0]); |
|
265
|
0
|
|
|
|
|
0
|
redo; |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
|
|
268
|
343
|
100
|
|
|
|
920
|
if ($code =~ /\G(?=\s)/) { |
|
269
|
40
|
100
|
|
|
|
111
|
if ($code =~ /\G\h+/cg) { |
|
270
|
37
|
|
|
|
|
154
|
$callback->('horizontal_space', $-[0], $+[0]); |
|
271
|
37
|
|
|
|
|
17985
|
redo; |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
|
|
274
|
3
|
50
|
|
|
|
12
|
if ($code =~ /\G\v+/cg) { |
|
275
|
3
|
|
|
|
|
15
|
$callback->('vertical_space', $-[0], $+[0]); |
|
276
|
3
|
|
|
|
|
1478
|
redo; |
|
277
|
|
|
|
|
|
|
} |
|
278
|
|
|
|
|
|
|
|
|
279
|
0
|
0
|
|
|
|
0
|
if ($code =~ /\G\s+/cg) { |
|
280
|
0
|
|
|
|
|
0
|
$callback->({other_space => [$-[0], $+[0]]}); |
|
281
|
0
|
|
|
|
|
0
|
redo; |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
303
|
100
|
|
|
|
522
|
if ($variable > 0) { |
|
286
|
22
|
50
|
33
|
|
|
183
|
if ($code =~ m{\G$var_name}gco or $code =~ m{\G(?<=\$)\#$var_name}gco) { |
|
287
|
22
|
|
|
|
|
91
|
$callback->('var_name', $-[0], $+[0]); |
|
288
|
22
|
|
|
|
|
10049
|
$regex = 0; |
|
289
|
22
|
|
|
|
|
32
|
$variable = 0; |
|
290
|
22
|
|
|
|
|
29
|
$canpod = 0; |
|
291
|
22
|
100
|
|
|
|
100
|
$flat = ($code =~ /\G(?=\s*\{)/) ? 1 : 0; |
|
292
|
22
|
|
|
|
|
48
|
redo; |
|
293
|
|
|
|
|
|
|
} |
|
294
|
|
|
|
|
|
|
|
|
295
|
0
|
0
|
0
|
|
|
0
|
if ( |
|
|
|
|
0
|
|
|
|
|
|
296
|
|
|
|
|
|
|
$code =~ m{\G(?!\$+$var_name)}o |
|
297
|
|
|
|
|
|
|
and ( $code =~ m~\G(?:\s+|#?)\{\s*(?:$var_name|$special_var_names|[#{])\s*\}~goc |
|
298
|
|
|
|
|
|
|
or $code =~ m{\G(?:\^\w+|#(?!\{)|$special_var_names)}gco |
|
299
|
|
|
|
|
|
|
or $code =~ /\G#/cg) |
|
300
|
|
|
|
|
|
|
) { |
|
301
|
0
|
|
|
|
|
0
|
$callback->('special_var_name', $-[0], $+[0]); |
|
302
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
303
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
304
|
0
|
|
|
|
|
0
|
$variable = 0; |
|
305
|
0
|
0
|
|
|
|
0
|
$flat = ($code =~ /\G(?
|
|
306
|
0
|
|
|
|
|
0
|
redo; |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
# continue |
|
310
|
|
|
|
|
|
|
} |
|
311
|
|
|
|
|
|
|
|
|
312
|
281
|
100
|
|
|
|
480
|
if ($code =~ /\G#.*/cg) { |
|
313
|
2
|
|
|
|
|
13
|
$callback->('comment', $-[0], $+[0]); |
|
314
|
2
|
|
|
|
|
1236
|
redo; |
|
315
|
|
|
|
|
|
|
} |
|
316
|
|
|
|
|
|
|
|
|
317
|
279
|
100
|
100
|
|
|
935
|
if (($regex == 1 and not($postfix_op)) or $code =~ /\G(?=[\@\$])/) { |
|
|
|
|
100
|
|
|
|
|
|
318
|
148
|
100
|
|
|
|
354
|
if ($code =~ /\G\$/cg) { |
|
319
|
21
|
|
|
|
|
74
|
$callback->('scalar_sigil', $-[0], $+[0]); |
|
320
|
21
|
50
|
|
|
|
9648
|
$code =~ /\G$bracket_var/o or ++$variable; |
|
321
|
21
|
|
|
|
|
38
|
$regex = 0; |
|
322
|
21
|
|
|
|
|
24
|
$canpod = 0; |
|
323
|
21
|
|
|
|
|
26
|
$flat = 1; |
|
324
|
21
|
|
|
|
|
48
|
redo; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
|
|
327
|
127
|
100
|
|
|
|
218
|
if ($code =~ /\G\@/cg) { |
|
328
|
1
|
|
|
|
|
7
|
$callback->('array_sigil', $-[0], $+[0]); |
|
329
|
1
|
50
|
|
|
|
813
|
$code =~ /\G$bracket_var/o or ++$variable; |
|
330
|
1
|
|
|
|
|
7
|
$regex = 0; |
|
331
|
1
|
|
|
|
|
2
|
$canpod = 0; |
|
332
|
1
|
|
|
|
|
3
|
$flat = 1; |
|
333
|
1
|
|
|
|
|
3
|
redo; |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
|
|
336
|
126
|
50
|
|
|
|
235
|
if ($code =~ /\G\%/cg) { |
|
337
|
0
|
|
|
|
|
0
|
$callback->('hash_sigil', $-[0], $+[0]); |
|
338
|
0
|
0
|
|
|
|
0
|
$code =~ /\G$bracket_var/o or ++$variable; |
|
339
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
340
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
341
|
0
|
|
|
|
|
0
|
$flat = 1; |
|
342
|
0
|
|
|
|
|
0
|
redo; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
|
|
345
|
126
|
50
|
|
|
|
221
|
if ($code =~ /\G\*/cg) { |
|
346
|
0
|
|
|
|
|
0
|
$callback->('glob_sigil', $-[0], $+[0]); |
|
347
|
0
|
0
|
|
|
|
0
|
$code =~ /\G$bracket_var/o or ++$variable; |
|
348
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
349
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
350
|
0
|
|
|
|
|
0
|
$flat = 1; |
|
351
|
0
|
|
|
|
|
0
|
redo; |
|
352
|
|
|
|
|
|
|
} |
|
353
|
|
|
|
|
|
|
|
|
354
|
126
|
50
|
|
|
|
223
|
if ($code =~ /\G&/cg) { |
|
355
|
0
|
|
|
|
|
0
|
$callback->('ampersand_sigil', $-[0], $+[0]); |
|
356
|
0
|
0
|
|
|
|
0
|
$code =~ /\G$bracket_var/o or ++$variable; |
|
357
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
358
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
359
|
0
|
|
|
|
|
0
|
$flat = 1; |
|
360
|
0
|
|
|
|
|
0
|
redo; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# continue |
|
364
|
|
|
|
|
|
|
} |
|
365
|
|
|
|
|
|
|
|
|
366
|
257
|
50
|
33
|
|
|
464
|
if ($proto == 1 and $code =~ /\G\(.*?\)/cgs) { |
|
367
|
0
|
|
|
|
|
0
|
$callback->('sub_proto', $-[0], $+[0]); |
|
368
|
0
|
|
|
|
|
0
|
$proto = 0; |
|
369
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
370
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
371
|
0
|
|
|
|
|
0
|
redo; |
|
372
|
|
|
|
|
|
|
} |
|
373
|
|
|
|
|
|
|
|
|
374
|
257
|
100
|
|
|
|
466
|
if ($code =~ /\G\(/cg) { |
|
375
|
2
|
|
|
|
|
12
|
$callback->('parenthesis_open', $-[0], $+[0]); |
|
376
|
2
|
|
|
|
|
1293
|
$regex = 1; |
|
377
|
2
|
|
|
|
|
6
|
$flat = 0; |
|
378
|
2
|
|
|
|
|
5
|
$canpod = 0; |
|
379
|
2
|
|
|
|
|
7
|
redo; |
|
380
|
|
|
|
|
|
|
} |
|
381
|
|
|
|
|
|
|
|
|
382
|
255
|
100
|
|
|
|
428
|
if ($code =~ /\G\)/cg) { |
|
383
|
2
|
|
|
|
|
15
|
$callback->('parenthesis_close', $-[0], $+[0]); |
|
384
|
2
|
|
|
|
|
1410
|
$regex = 0; |
|
385
|
2
|
|
|
|
|
6
|
$canpod = 0; |
|
386
|
2
|
|
|
|
|
4
|
$flat = 0; |
|
387
|
2
|
|
|
|
|
36
|
redo; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
|
|
390
|
253
|
100
|
|
|
|
426
|
if ($code =~ /\G\{/cg) { |
|
391
|
4
|
|
|
|
|
20
|
$callback->('curly_bracket_open', $-[0], $+[0]); |
|
392
|
4
|
|
|
|
|
2291
|
$regex = 1; |
|
393
|
4
|
|
|
|
|
7
|
$proto = 0; |
|
394
|
4
|
|
|
|
|
8
|
redo; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
249
|
100
|
|
|
|
399
|
if ($code =~ /\G\}/cg) { |
|
398
|
4
|
|
|
|
|
16
|
$callback->('curly_bracket_close', $-[0], $+[0]); |
|
399
|
4
|
|
|
|
|
2009
|
$flat = 0; |
|
400
|
4
|
|
|
|
|
6
|
$canpod = 1; |
|
401
|
4
|
|
|
|
|
9
|
redo; |
|
402
|
|
|
|
|
|
|
} |
|
403
|
|
|
|
|
|
|
|
|
404
|
245
|
100
|
|
|
|
415
|
if ($code =~ /\G\[/cg) { |
|
405
|
2
|
|
|
|
|
10
|
$callback->('right_bracket_open', $-[0], $+[0]); |
|
406
|
2
|
|
|
|
|
855
|
$regex = 1; |
|
407
|
2
|
|
|
|
|
4
|
$postfix_op = 0; |
|
408
|
2
|
|
|
|
|
3
|
$flat = 0; |
|
409
|
2
|
|
|
|
|
3
|
$canpod = 0; |
|
410
|
2
|
|
|
|
|
5
|
redo; |
|
411
|
|
|
|
|
|
|
} |
|
412
|
|
|
|
|
|
|
|
|
413
|
243
|
100
|
|
|
|
400
|
if ($code =~ /\G\]/cg) { |
|
414
|
2
|
|
|
|
|
10
|
$callback->('right_bracket_close', $-[0], $+[0]); |
|
415
|
2
|
|
|
|
|
852
|
$regex = 0; |
|
416
|
2
|
|
|
|
|
2
|
$canpod = 0; |
|
417
|
2
|
|
|
|
|
3
|
$flat = 0; |
|
418
|
2
|
|
|
|
|
5
|
redo; |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
|
|
421
|
241
|
50
|
|
|
|
442
|
if ($proto == 0) { |
|
422
|
241
|
100
|
100
|
|
|
479
|
if ($canpod == 1 and $code =~ /\Gformat\b/cg) { |
|
423
|
2
|
|
|
|
|
20
|
$callback->('keyword', $-[0], $+[0]); |
|
424
|
2
|
|
|
|
|
1007
|
$regex = 0; |
|
425
|
2
|
|
|
|
|
4
|
$canpod = 0; |
|
426
|
2
|
|
|
|
|
2
|
$format = 1; |
|
427
|
2
|
|
|
|
|
4
|
redo; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
|
|
430
|
239
|
100
|
66
|
|
|
3545
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
431
|
|
|
|
|
|
|
( |
|
432
|
|
|
|
|
|
|
$flat == 0 or ( $flat == 1 |
|
433
|
|
|
|
|
|
|
and $code =~ /\G(?!\w+\h*\})/) |
|
434
|
|
|
|
|
|
|
) |
|
435
|
|
|
|
|
|
|
and $code =~ m{\G(?)$perl_keywords}gco |
|
436
|
|
|
|
|
|
|
) { |
|
437
|
5
|
|
|
|
|
20
|
my $name = $1; |
|
438
|
5
|
|
|
|
|
23
|
my @pos = ($-[0], $+[0]); |
|
439
|
5
|
|
|
|
|
22
|
my $is_bare_word = ($code =~ /\G(?=\h*=>)/); |
|
440
|
5
|
50
|
|
|
|
25
|
$callback->(($is_bare_word ? 'bare_word' : 'keyword'), @pos); |
|
441
|
|
|
|
|
|
|
|
|
442
|
5
|
50
|
33
|
|
|
3115
|
if ($name eq 'sub' and not $is_bare_word) { |
|
443
|
0
|
|
|
|
|
0
|
$proto = 1; |
|
444
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
else { |
|
447
|
5
|
|
|
|
|
10
|
$regex = 1; |
|
448
|
5
|
|
|
|
|
11
|
$postfix_op = 0; |
|
449
|
|
|
|
|
|
|
} |
|
450
|
5
|
|
|
|
|
8
|
$canpod = 0; |
|
451
|
5
|
|
|
|
|
18
|
redo; |
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
# continue |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
|
|
457
|
234
|
50
|
33
|
|
|
988
|
if ($code =~ /\G(?!(?>tr|[ysm]|q[rwxq]?)\h*=>)/ and $code =~ /\G(?)/) { |
|
458
|
|
|
|
|
|
|
|
|
459
|
234
|
100
|
66
|
|
|
924
|
if (($flat == 1 and $code =~ /\G(?=[a-z]+\h*\})/) or $code =~ /\G((?<=\{)|(?<=\{\h))(?=[a-z]+\h*\})/) { |
|
|
|
|
100
|
|
|
|
|
|
460
|
|
|
|
|
|
|
## ok |
|
461
|
|
|
|
|
|
|
} |
|
462
|
|
|
|
|
|
|
else { |
|
463
|
|
|
|
|
|
|
|
|
464
|
231
|
100
|
|
|
|
663
|
if ($code =~ m{\G $double_q{s} $substitution_flags }gcxo) { |
|
465
|
4
|
|
|
|
|
16
|
$callback->('substitution', $-[0], $+[0]); |
|
466
|
4
|
|
|
|
|
1741
|
$regex = 0; |
|
467
|
4
|
|
|
|
|
5
|
$canpod = 0; |
|
468
|
4
|
|
|
|
|
7
|
redo; |
|
469
|
|
|
|
|
|
|
} |
|
470
|
|
|
|
|
|
|
|
|
471
|
227
|
100
|
|
|
|
1080
|
if ($code =~ m{\G (?> $double_q{tr} | $double_q{y} ) $tr_flags }gxco) { |
|
472
|
2
|
|
|
|
|
16
|
$callback->('transliteration', $-[0], $+[0]); |
|
473
|
2
|
|
|
|
|
1129
|
$regex = 0; |
|
474
|
2
|
|
|
|
|
3
|
$canpod = 0; |
|
475
|
2
|
|
|
|
|
6
|
redo; |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
|
|
478
|
225
|
100
|
100
|
|
|
1019
|
if ($code =~ m{\G $single_q{m} $match_flags }gcxo |
|
|
|
|
100
|
|
|
|
|
|
479
|
|
|
|
|
|
|
or ($regex == 1 and $code =~ m{\G $match_re $match_flags }gcxo)) { |
|
480
|
4
|
|
|
|
|
15
|
$callback->('match_regex', $-[0], $+[0]); |
|
481
|
4
|
|
|
|
|
1644
|
$regex = 0; |
|
482
|
4
|
|
|
|
|
6
|
$canpod = 0; |
|
483
|
4
|
|
|
|
|
21
|
redo; |
|
484
|
|
|
|
|
|
|
} |
|
485
|
|
|
|
|
|
|
|
|
486
|
221
|
100
|
|
|
|
477
|
if ($code =~ m{\G $single_q{qr} $compiled_regex_flags }gcxo) { |
|
487
|
1
|
|
|
|
|
6
|
$callback->('compiled_regex', $-[0], $+[0]); |
|
488
|
1
|
|
|
|
|
484
|
$regex = 0; |
|
489
|
1
|
|
|
|
|
2
|
$canpod = 0; |
|
490
|
1
|
|
|
|
|
4
|
redo; |
|
491
|
|
|
|
|
|
|
} |
|
492
|
|
|
|
|
|
|
|
|
493
|
220
|
50
|
|
|
|
528
|
if ($code =~ m{\G$single_q{q}}gco) { |
|
494
|
0
|
|
|
|
|
0
|
$callback->('q_string', $-[0], $+[0]); |
|
495
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
496
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
497
|
0
|
|
|
|
|
0
|
redo; |
|
498
|
|
|
|
|
|
|
} |
|
499
|
|
|
|
|
|
|
|
|
500
|
220
|
50
|
|
|
|
393
|
if ($code =~ m{\G$single_q{qq}}gco) { |
|
501
|
0
|
|
|
|
|
0
|
$callback->('qq_string', $-[0], $+[0]); |
|
502
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
503
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
504
|
0
|
|
|
|
|
0
|
redo; |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
|
|
507
|
220
|
50
|
|
|
|
395
|
if ($code =~ m{\G$single_q{qw}}gco) { |
|
508
|
0
|
|
|
|
|
0
|
$callback->('qw_string', $-[0], $+[0]); |
|
509
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
510
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
511
|
0
|
|
|
|
|
0
|
redo; |
|
512
|
|
|
|
|
|
|
} |
|
513
|
|
|
|
|
|
|
|
|
514
|
220
|
50
|
|
|
|
422
|
if ($code =~ m{\G$single_q{qx}}gco) { |
|
515
|
0
|
|
|
|
|
0
|
$callback->('qx_string', $-[0], $+[0]); |
|
516
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
517
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
518
|
0
|
|
|
|
|
0
|
redo; |
|
519
|
|
|
|
|
|
|
} |
|
520
|
|
|
|
|
|
|
} |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
# continue |
|
523
|
|
|
|
|
|
|
} |
|
524
|
|
|
|
|
|
|
|
|
525
|
223
|
100
|
|
|
|
511
|
if ($code =~ m{\G$str_dq}gco) { |
|
526
|
10
|
|
|
|
|
44
|
$callback->('double_quoted_string', $-[0], $+[0]); |
|
527
|
10
|
|
|
|
|
4938
|
$regex = 0; |
|
528
|
10
|
|
|
|
|
16
|
$canpod = 0; |
|
529
|
10
|
|
|
|
|
13
|
$flat = 0; |
|
530
|
10
|
|
|
|
|
20
|
redo; |
|
531
|
|
|
|
|
|
|
} |
|
532
|
|
|
|
|
|
|
|
|
533
|
213
|
50
|
|
|
|
391
|
if ($code =~ m{\G$str_sq}gco) { |
|
534
|
0
|
|
|
|
|
0
|
$callback->('single_quoted_string', $-[0], $+[0]); |
|
535
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
536
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
537
|
0
|
|
|
|
|
0
|
$flat = 0; |
|
538
|
0
|
|
|
|
|
0
|
redo; |
|
539
|
|
|
|
|
|
|
} |
|
540
|
|
|
|
|
|
|
|
|
541
|
213
|
50
|
|
|
|
426
|
if ($code =~ m{\G$str_bq}gco) { |
|
542
|
0
|
|
|
|
|
0
|
$callback->('backtick', $-[0], $+[0]); |
|
543
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
544
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
545
|
0
|
|
|
|
|
0
|
$flat = 0; |
|
546
|
0
|
|
|
|
|
0
|
redo; |
|
547
|
|
|
|
|
|
|
} |
|
548
|
|
|
|
|
|
|
|
|
549
|
213
|
100
|
|
|
|
358
|
if ($code =~ /\G;/cg) { |
|
550
|
4
|
|
|
|
|
22
|
$callback->('semicolon', $-[0], $+[0]); |
|
551
|
4
|
|
|
|
|
2261
|
$canpod = 1; |
|
552
|
4
|
|
|
|
|
9
|
$regex = 1; |
|
553
|
4
|
|
|
|
|
7
|
$postfix_op = 0; |
|
554
|
4
|
|
|
|
|
8
|
$proto = 0; |
|
555
|
4
|
|
|
|
|
7
|
$flat = 0; |
|
556
|
4
|
|
|
|
|
9
|
redo; |
|
557
|
|
|
|
|
|
|
} |
|
558
|
|
|
|
|
|
|
|
|
559
|
209
|
50
|
|
|
|
344
|
if ($code =~ /\G=>/cg) { |
|
560
|
0
|
|
|
|
|
0
|
$callback->('fat_comma', $-[0], $+[0]); |
|
561
|
0
|
|
|
|
|
0
|
$regex = 1; |
|
562
|
0
|
|
|
|
|
0
|
$postfix_op = 0; |
|
563
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
564
|
0
|
|
|
|
|
0
|
$flat = 0; |
|
565
|
0
|
|
|
|
|
0
|
redo; |
|
566
|
|
|
|
|
|
|
} |
|
567
|
|
|
|
|
|
|
|
|
568
|
209
|
100
|
|
|
|
326
|
if ($code =~ /\G,/cg) { |
|
569
|
3
|
|
|
|
|
21
|
$callback->('comma', $-[0], $+[0]); |
|
570
|
3
|
|
|
|
|
2099
|
$regex = 1; |
|
571
|
3
|
|
|
|
|
8
|
$postfix_op = 0; |
|
572
|
3
|
|
|
|
|
7
|
$canpod = 0; |
|
573
|
3
|
|
|
|
|
7
|
$flat = 0; |
|
574
|
3
|
|
|
|
|
10
|
redo; |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
|
|
577
|
206
|
50
|
|
|
|
536
|
if ($code =~ m{\G$vstring}gco) { |
|
578
|
0
|
|
|
|
|
0
|
$callback->('v_string', $-[0], $+[0]); |
|
579
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
580
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
581
|
0
|
|
|
|
|
0
|
redo; |
|
582
|
|
|
|
|
|
|
} |
|
583
|
|
|
|
|
|
|
|
|
584
|
206
|
100
|
|
|
|
420
|
if ($code =~ m{\G$perl_filetests\b}gco) { |
|
585
|
1
|
|
|
|
|
8
|
my @pos = ($-[0], $+[0]); |
|
586
|
1
|
|
|
|
|
7
|
my $is_bare_word = ($code =~ /\G(?=\h*=>)/); |
|
587
|
|
|
|
|
|
|
|
|
588
|
1
|
50
|
|
|
|
8
|
$callback->(($is_bare_word ? 'bare_word' : 'file_test'), @pos); |
|
589
|
|
|
|
|
|
|
|
|
590
|
1
|
50
|
|
|
|
670
|
if ($is_bare_word) { |
|
591
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
592
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
593
|
|
|
|
|
|
|
} |
|
594
|
|
|
|
|
|
|
else { |
|
595
|
1
|
|
|
|
|
3
|
$regex = 1; # ambiguous, but possible |
|
596
|
1
|
|
|
|
|
3
|
$postfix_op = 0; |
|
597
|
1
|
|
|
|
|
4
|
$canpod = 0; |
|
598
|
|
|
|
|
|
|
} |
|
599
|
1
|
|
|
|
|
4
|
redo; |
|
600
|
|
|
|
|
|
|
} |
|
601
|
|
|
|
|
|
|
|
|
602
|
205
|
50
|
|
|
|
389
|
if ($code =~ /\G(?=__)/) { |
|
603
|
0
|
0
|
|
|
|
0
|
if ($code =~ m{\G__(?>DATA|END)__\b\h*+(?!=>).*\z}gcs) { |
|
604
|
0
|
|
|
|
|
0
|
$callback->('data', $-[0], $+[0]); |
|
605
|
0
|
|
|
|
|
0
|
redo; |
|
606
|
|
|
|
|
|
|
} |
|
607
|
|
|
|
|
|
|
|
|
608
|
0
|
0
|
|
|
|
0
|
if ($code =~ m{\G__(?>SUB|FILE|PACKAGE|LINE)__\b(?!\h*+=>)}gc) { |
|
609
|
0
|
|
|
|
|
0
|
$callback->('special_keyword', $-[0], $+[0]); |
|
610
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
611
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
612
|
0
|
|
|
|
|
0
|
redo; |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# continue |
|
616
|
|
|
|
|
|
|
} |
|
617
|
|
|
|
|
|
|
|
|
618
|
205
|
100
|
100
|
|
|
693
|
if ($regex == 1 and $code =~ /\G(?
|
|
|
|
|
100
|
|
|
|
|
|
619
|
1
|
|
|
|
|
8
|
$callback->('glob_readline', $-[0], $+[0]); |
|
620
|
1
|
|
|
|
|
685
|
$regex = 0; |
|
621
|
1
|
|
|
|
|
3
|
$canpod = 0; |
|
622
|
1
|
|
|
|
|
3
|
redo; |
|
623
|
|
|
|
|
|
|
} |
|
624
|
|
|
|
|
|
|
|
|
625
|
204
|
100
|
|
|
|
615
|
if ($code =~ m{\G$assignment_operators}gco) { |
|
626
|
16
|
|
|
|
|
81
|
$callback->('assignment_operator', $-[0], $+[0]); |
|
627
|
16
|
100
|
|
|
|
7503
|
if ($format) { |
|
628
|
2
|
50
|
|
|
|
18
|
if (substr($code, $-[0], $+[0] - $-[0]) eq '=') { |
|
629
|
2
|
|
|
|
|
3
|
$format = 0; |
|
630
|
2
|
|
|
|
|
3
|
$expect_format = 1; |
|
631
|
|
|
|
|
|
|
} |
|
632
|
|
|
|
|
|
|
} |
|
633
|
16
|
|
|
|
|
27
|
$regex = 1; |
|
634
|
16
|
|
|
|
|
27
|
$canpod = 0; |
|
635
|
16
|
|
|
|
|
20
|
$flat = 0; |
|
636
|
16
|
|
|
|
|
31
|
redo; |
|
637
|
|
|
|
|
|
|
} |
|
638
|
|
|
|
|
|
|
|
|
639
|
188
|
50
|
|
|
|
317
|
if ($code =~ /\G->/cg) { |
|
640
|
0
|
|
|
|
|
0
|
$callback->('dereference_operator', $-[0], $+[0]); |
|
641
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
642
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
643
|
0
|
|
|
|
|
0
|
$flat = 1; |
|
644
|
0
|
|
|
|
|
0
|
redo; |
|
645
|
|
|
|
|
|
|
} |
|
646
|
|
|
|
|
|
|
|
|
647
|
188
|
100
|
66
|
|
|
765
|
if ($code =~ m{\G$operators}gco or $code =~ /\Gx(?=[0-9\W])/cg) { |
|
648
|
50
|
|
|
|
|
184
|
$callback->('operator', $-[0], $+[0]); |
|
649
|
50
|
100
|
|
|
|
23013
|
if (substr($code, $-[0], ($+[0] - $-[0])) =~ /^$postfix_operators\z/o) { |
|
650
|
5
|
|
|
|
|
11
|
$postfix_op = 1; |
|
651
|
|
|
|
|
|
|
} |
|
652
|
|
|
|
|
|
|
else { |
|
653
|
45
|
|
|
|
|
91
|
$postfix_op = 0; |
|
654
|
|
|
|
|
|
|
} |
|
655
|
50
|
|
|
|
|
75
|
$canpod = 0; |
|
656
|
50
|
|
|
|
|
67
|
$regex = 1; |
|
657
|
50
|
|
|
|
|
53
|
$flat = 0; |
|
658
|
50
|
|
|
|
|
81
|
redo; |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
|
|
661
|
138
|
50
|
|
|
|
263
|
if ($code =~ m{\G$hex_num}gco) { |
|
662
|
0
|
|
|
|
|
0
|
$callback->('hex_number', $-[0], $+[0]); |
|
663
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
664
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
665
|
0
|
|
|
|
|
0
|
redo; |
|
666
|
|
|
|
|
|
|
} |
|
667
|
|
|
|
|
|
|
|
|
668
|
138
|
50
|
|
|
|
250
|
if ($code =~ m{\G$binary_num}gco) { |
|
669
|
0
|
|
|
|
|
0
|
$callback->('binary_number', $-[0], $+[0]); |
|
670
|
0
|
|
|
|
|
0
|
$regex = 0; |
|
671
|
0
|
|
|
|
|
0
|
$canpod = 0; |
|
672
|
0
|
|
|
|
|
0
|
redo; |
|
673
|
|
|
|
|
|
|
} |
|
674
|
|
|
|
|
|
|
|
|
675
|
138
|
100
|
|
|
|
479
|
if ($code =~ m{\G$number}gco) { |
|
676
|
66
|
|
|
|
|
258
|
$callback->('number', $-[0], $+[0]); |
|
677
|
66
|
|
|
|
|
30199
|
$regex = 0; |
|
678
|
66
|
|
|
|
|
116
|
$canpod = 0; |
|
679
|
66
|
|
|
|
|
102
|
redo; |
|
680
|
|
|
|
|
|
|
} |
|
681
|
|
|
|
|
|
|
|
|
682
|
72
|
100
|
|
|
|
130
|
if ($code =~ m{\GSTD(?>OUT|ERR|IN)\b}gc) { |
|
683
|
2
|
|
|
|
|
16
|
$callback->('special_fh', $-[0], $+[0]); |
|
684
|
2
|
|
|
|
|
1147
|
$regex = 1; |
|
685
|
2
|
|
|
|
|
5
|
$postfix_op = 0; |
|
686
|
2
|
|
|
|
|
6
|
$canpod = 0; |
|
687
|
2
|
|
|
|
|
6
|
redo; |
|
688
|
|
|
|
|
|
|
} |
|
689
|
|
|
|
|
|
|
|
|
690
|
70
|
100
|
|
|
|
302
|
if ($code =~ m{\G$var_name}gco) { |
|
691
|
4
|
50
|
|
|
|
23
|
$callback->(($proto == 1 ? 'sub_name' : 'bare_word'), $-[0], $+[0]); |
|
692
|
4
|
|
|
|
|
1744
|
$regex = 0; |
|
693
|
4
|
|
|
|
|
8
|
$canpod = 0; |
|
694
|
4
|
|
|
|
|
7
|
$flat = 0; |
|
695
|
4
|
|
|
|
|
8
|
redo; |
|
696
|
|
|
|
|
|
|
} |
|
697
|
|
|
|
|
|
|
|
|
698
|
66
|
50
|
|
|
|
158
|
if ($code =~ /\G(.)/cgs) { |
|
699
|
0
|
|
|
|
|
0
|
$callback->('unknown_char', $-[0], $+[0]); |
|
700
|
0
|
|
|
|
|
0
|
redo; |
|
701
|
|
|
|
|
|
|
} |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
# all done |
|
704
|
|
|
|
|
|
|
} |
|
705
|
|
|
|
|
|
|
|
|
706
|
66
|
|
|
|
|
1123
|
return pos($code); |
|
707
|
|
|
|
|
|
|
} |
|
708
|
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
1; |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
use Perl::Tokenizer; |
|
714
|
|
|
|
|
|
|
my $code = 'my $num = 42;'; |
|
715
|
|
|
|
|
|
|
perl_tokens { print "@_\n" } $code; |
|
716
|
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
Perl::Tokenizer is a tiny tokenizer which splits a given Perl code into a list of tokens, using the power of regular expressions. |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=head1 SUBROUTINES |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
=over 4 |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=item perl_tokens(&$) |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
This function takes a callback subroutine and a string. The subroutine is called for each token in real-time. |
|
728
|
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
perl_tokens { |
|
730
|
|
|
|
|
|
|
my ($token, $pos_beg, $pos_end) = @_; |
|
731
|
|
|
|
|
|
|
... |
|
732
|
|
|
|
|
|
|
} $code; |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
The positions are absolute to the string. |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
=back |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
=head2 EXPORT |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
The function B is exported by default. This is the only function provided by this module. |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=head1 TOKENS |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
The standard token names that are available are: |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
format .................. Format text |
|
747
|
|
|
|
|
|
|
heredoc_beg ............. The beginning of a here-document ('<<"EOT"') |
|
748
|
|
|
|
|
|
|
heredoc ................. The content of a here-document |
|
749
|
|
|
|
|
|
|
pod ..................... An inline POD document, until '=cut' or end of the file |
|
750
|
|
|
|
|
|
|
horizontal_space ........ Horizontal whitespace (matched by /\h/) |
|
751
|
|
|
|
|
|
|
vertical_space .......... Vertical whitespace (matched by /\v/) |
|
752
|
|
|
|
|
|
|
other_space ............. Whitespace that is neither vertical nor horizontal (matched by /\s/) |
|
753
|
|
|
|
|
|
|
var_name ................ Alphanumeric name of a variable (excluding the sigil) |
|
754
|
|
|
|
|
|
|
special_var_name ........ Non-alphanumeric name of a variable, such as $/ or $^H (excluding the sigil) |
|
755
|
|
|
|
|
|
|
sub_name ................ Subroutine name |
|
756
|
|
|
|
|
|
|
sub_proto ............... Subroutine prototype |
|
757
|
|
|
|
|
|
|
comment ................. A #-to-newline comment (excluding the newline) |
|
758
|
|
|
|
|
|
|
scalar_sigil ............ The sigil of a scalar variable: '$' |
|
759
|
|
|
|
|
|
|
array_sigil ............. The sigil of an array variable: '@' |
|
760
|
|
|
|
|
|
|
hash_sigil .............. The sigil of a hash variable: '%' |
|
761
|
|
|
|
|
|
|
glob_sigil .............. The sigil of a glob symbol: '*' |
|
762
|
|
|
|
|
|
|
ampersand_sigil ......... The sigil of a subroutine call: '&' |
|
763
|
|
|
|
|
|
|
parenthesis_open ........ Open parenthesis: '(' |
|
764
|
|
|
|
|
|
|
parenthesis_close ....... Closed parenthesis: ')' |
|
765
|
|
|
|
|
|
|
right_bracket_open ...... Open right bracket: '[' |
|
766
|
|
|
|
|
|
|
right_bracket_close ..... Closed right bracket: ']' |
|
767
|
|
|
|
|
|
|
curly_bracket_open ...... Open curly bracket: '{' |
|
768
|
|
|
|
|
|
|
curly_bracket_close ..... Closed curly bracket: '}' |
|
769
|
|
|
|
|
|
|
substitution ............ Regex substitution: s/.../.../ |
|
770
|
|
|
|
|
|
|
transliteration.......... Transliteration: tr/.../.../ or y/.../.../ |
|
771
|
|
|
|
|
|
|
match_regex ............. Regex in matching context: m/.../ |
|
772
|
|
|
|
|
|
|
compiled_regex .......... Quoted compiled regex: qr/.../ |
|
773
|
|
|
|
|
|
|
q_string ................ Single quoted string: q/.../ |
|
774
|
|
|
|
|
|
|
qq_string ............... Double quoted string: qq/.../ |
|
775
|
|
|
|
|
|
|
qw_string ............... List of quoted words: qw/.../ |
|
776
|
|
|
|
|
|
|
qx_string ............... System command quoted string: qx/.../ |
|
777
|
|
|
|
|
|
|
backtick ................ Backtick system command quoted string: `...` |
|
778
|
|
|
|
|
|
|
single_quoted_string .... Single quoted string, as: '...' |
|
779
|
|
|
|
|
|
|
double_quoted_string .... Double quoted string, as: "..." |
|
780
|
|
|
|
|
|
|
bare_word ............... Unquoted string |
|
781
|
|
|
|
|
|
|
glob_readline ........... or |
|
782
|
|
|
|
|
|
|
v_string ................ Version string: "vX" or "X.X.X" |
|
783
|
|
|
|
|
|
|
file_test ............... File test operator (-X), such as: "-d", "-e", etc... |
|
784
|
|
|
|
|
|
|
data .................... The content of `__DATA__` or `__END__` sections |
|
785
|
|
|
|
|
|
|
keyword ................. Regular Perl keyword, such as: `if`, `else`, etc... |
|
786
|
|
|
|
|
|
|
special_keyword ......... Special Perl keyword, such as: `__PACKAGE__`, `__FILE__`, etc... |
|
787
|
|
|
|
|
|
|
comma ................... Comma: ',' |
|
788
|
|
|
|
|
|
|
fat_comma ............... Fat comma: '=>' |
|
789
|
|
|
|
|
|
|
operator ................ Primitive operator, such as: '+', '||', etc... |
|
790
|
|
|
|
|
|
|
assignment_operator ..... '=' or any assignment operator: '+=', '||=', etc... |
|
791
|
|
|
|
|
|
|
dereference_operator .... Arrow dereference operator: '->' |
|
792
|
|
|
|
|
|
|
hex_number .............. Hexadecimal literal number: 0x... |
|
793
|
|
|
|
|
|
|
binary_number ........... Binary literal number: 0b... |
|
794
|
|
|
|
|
|
|
number .................. Decimal literal number, such as 42, 3.1e4, etc... |
|
795
|
|
|
|
|
|
|
special_fh .............. Special file-handle name, such as 'STDIN', 'STDOUT', etc... |
|
796
|
|
|
|
|
|
|
unknown_char ............ Unknown or unexpected character |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=head1 EXAMPLE |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
For this code: |
|
801
|
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
my $num = 42; |
|
803
|
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
it generates the following tokens: |
|
805
|
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
# TOKEN POS |
|
807
|
|
|
|
|
|
|
( keyword => ( 0, 2) ) |
|
808
|
|
|
|
|
|
|
( horizontal_space => ( 2, 3) ) |
|
809
|
|
|
|
|
|
|
( scalar_sigil => ( 3, 4) ) |
|
810
|
|
|
|
|
|
|
( var_name => ( 4, 7) ) |
|
811
|
|
|
|
|
|
|
( horizontal_space => ( 7, 8) ) |
|
812
|
|
|
|
|
|
|
( assignment_operator => ( 8, 9) ) |
|
813
|
|
|
|
|
|
|
( horizontal_space => ( 9, 10) ) |
|
814
|
|
|
|
|
|
|
( number => (10, 12) ) |
|
815
|
|
|
|
|
|
|
( semicolon => (12, 13) ) |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
=head1 REPOSITORY |
|
818
|
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
L |
|
820
|
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
=head1 AUTHOR |
|
822
|
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
Daniel "Trizen" Șuteu, Etrizen@protonmail.comE |
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Copyright (C) 2013-2017 |
|
828
|
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
830
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.22.0 or, |
|
831
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
|
832
|
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
=cut |