line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Perl::Tokenizer; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
347076
|
use utf8; |
|
5
|
|
|
|
|
123
|
|
|
5
|
|
|
|
|
28
|
|
4
|
5
|
|
|
5
|
|
244
|
use 5.018; |
|
5
|
|
|
|
|
18
|
|
5
|
5
|
|
|
5
|
|
30
|
use strict; |
|
5
|
|
|
|
|
10
|
|
|
5
|
|
|
|
|
103
|
|
6
|
5
|
|
|
5
|
|
23
|
use warnings; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
27903
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
require Exporter; |
9
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
10
|
|
|
|
|
|
|
our @EXPORT = qw(perl_tokens); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.10'; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=encoding utf8 |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Perl::Tokenizer - A tiny Perl code tokenizer. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 VERSION |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
Version 0.10 |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $make_esc_delim = sub { |
27
|
|
|
|
|
|
|
if ($_[0] eq '\\') { |
28
|
|
|
|
|
|
|
return qr{\\(.*?)\\}s; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
my $delim = quotemeta shift; |
32
|
|
|
|
|
|
|
qr{$delim([^$delim\\]*+(?>\\.|[^$delim\\]+)*+)$delim}s; |
33
|
|
|
|
|
|
|
}; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my $make_end_delim = sub { |
36
|
|
|
|
|
|
|
if ($_[0] eq '\\') { |
37
|
|
|
|
|
|
|
return qr{.*?\\}s; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my $delim = quotemeta shift; |
41
|
|
|
|
|
|
|
qr{[^$delim\\]*+(?>\\.|[^$delim\\]+)*+$delim}s; |
42
|
|
|
|
|
|
|
}; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my %bdelims; |
45
|
|
|
|
|
|
|
foreach my $d ([qw~< >~], [qw~( )~], [qw~{ }~], [qw~[ ]~]) { |
46
|
|
|
|
|
|
|
my @ed = map { quotemeta } @{$d}; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
$bdelims{$d->[0]} = qr{ |
49
|
|
|
|
|
|
|
$ed[0] |
50
|
|
|
|
|
|
|
(?> |
51
|
|
|
|
|
|
|
[^$ed[0]$ed[1]\\]+ |
52
|
|
|
|
|
|
|
| |
53
|
|
|
|
|
|
|
\\. |
54
|
|
|
|
|
|
|
| |
55
|
|
|
|
|
|
|
(??{$bdelims{$d->[0]}}) |
56
|
|
|
|
|
|
|
)* |
57
|
|
|
|
|
|
|
$ed[1] |
58
|
|
|
|
|
|
|
}xs; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# string - single quote |
62
|
|
|
|
|
|
|
my $str_sq = $make_esc_delim->(q{'}); |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# string - double quote |
65
|
|
|
|
|
|
|
my $str_dq = $make_esc_delim->(q{"}); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# backtick - backquote |
68
|
|
|
|
|
|
|
my $str_bq = $make_esc_delim->(q{`}); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# regex - // |
71
|
|
|
|
|
|
|
my $match_re = $make_esc_delim->(q{/}); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# glob/readline |
74
|
|
|
|
|
|
|
my $glob = $bdelims{'<'}; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# Cache regular expressions that are generated dynamically |
77
|
|
|
|
|
|
|
my %cache_esc; |
78
|
|
|
|
|
|
|
my %cache_end; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# Double pairs |
81
|
|
|
|
|
|
|
my $dpairs = qr{ |
82
|
|
|
|
|
|
|
(?= |
83
|
|
|
|
|
|
|
(?(?<=\s) |
84
|
|
|
|
|
|
|
(.) |
85
|
|
|
|
|
|
|
| |
86
|
|
|
|
|
|
|
(\W) |
87
|
|
|
|
|
|
|
) |
88
|
|
|
|
|
|
|
) |
89
|
|
|
|
|
|
|
(??{$bdelims{$+} // ($cache_esc{$+} //= $make_esc_delim->($+))}) |
90
|
|
|
|
|
|
|
}x; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# Double pairs -- comments |
93
|
|
|
|
|
|
|
my $dcomm = qr{ |
94
|
|
|
|
|
|
|
\s* (?>(?<=\s)\# (?-s:.*) \s*)* |
95
|
|
|
|
|
|
|
}x; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Quote-like balanced (q{}, m//) |
98
|
|
|
|
|
|
|
my $make_single_q_balanced = sub { |
99
|
|
|
|
|
|
|
my $name = shift; |
100
|
|
|
|
|
|
|
qr{ |
101
|
|
|
|
|
|
|
$name |
102
|
|
|
|
|
|
|
$dcomm |
103
|
|
|
|
|
|
|
$dpairs |
104
|
|
|
|
|
|
|
}x; |
105
|
|
|
|
|
|
|
}; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# Quote-like balanced (q{}, m//) |
108
|
|
|
|
|
|
|
my %single_q; |
109
|
|
|
|
|
|
|
foreach my $name (qw(q qq qr qw qx m)) { |
110
|
|
|
|
|
|
|
$single_q{$name} = $make_single_q_balanced->($name); |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# First of balanced pairs |
114
|
|
|
|
|
|
|
my $bbpair = qr~[<\[\{\(]~; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
my $make_double_q_balanced = sub { |
117
|
|
|
|
|
|
|
my $name = shift; |
118
|
|
|
|
|
|
|
qr{ |
119
|
|
|
|
|
|
|
$name |
120
|
|
|
|
|
|
|
$dcomm |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
(?(?=$bbpair) # balanced pairs (e.g.: s{}//) |
123
|
|
|
|
|
|
|
$dpairs |
124
|
|
|
|
|
|
|
$dcomm |
125
|
|
|
|
|
|
|
$dpairs |
126
|
|
|
|
|
|
|
| # or: single delims (e.g.: s///) |
127
|
|
|
|
|
|
|
$dpairs |
128
|
|
|
|
|
|
|
(??{$cache_end{$+} //= $make_end_delim->($+)}) |
129
|
|
|
|
|
|
|
) |
130
|
|
|
|
|
|
|
}x; |
131
|
|
|
|
|
|
|
}; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Double quote-like balanced (s{}{}, s///) |
134
|
|
|
|
|
|
|
my %double_q; |
135
|
|
|
|
|
|
|
foreach my $name (qw(tr s y)) { |
136
|
|
|
|
|
|
|
$double_q{$name} = $make_double_q_balanced->($name); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
my $number = qr{(?=\.?[0-9])[0-9_]*(?:\.(?!\.)[0-9_]*)?(?:[Ee](?:[+-]?[0-9_]+))?}; |
140
|
|
|
|
|
|
|
my $hex_num = qr{0x[_0-9A-Fa-f]*}; |
141
|
|
|
|
|
|
|
my $binary_num = qr{0b[_01]*}; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
my $var_name = qr{(?>\w+|(?>::)+|'(?=\w))++}; |
144
|
|
|
|
|
|
|
my $vstring = qr{\b(?:v[0-9]+(?>\.[0-9][0-9_]*+)*+ | [0-9][0-9_]*(?>\.[0-9][0-9_]*){2,})\b}x; |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# HERE-DOC beginning |
147
|
|
|
|
|
|
|
my $bhdoc = qr{ |
148
|
|
|
|
|
|
|
<<(?>\h*(?>$str_sq|$str_dq)|\\?+(\w+)) |
149
|
|
|
|
|
|
|
}x; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
my $tr_flags = qr{[rcds]*}; |
152
|
|
|
|
|
|
|
my $match_flags = qr{[mnsixpogcdual]*}; |
153
|
|
|
|
|
|
|
my $substitution_flags = qr{[mnsixpogcerdual]*}; |
154
|
|
|
|
|
|
|
my $compiled_regex_flags = qr{[mnsixpodual]*}; |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
my @postfix_operators = qw( ++ -- ); |
157
|
|
|
|
|
|
|
my @prec_operators = qw ( ... .. -> ++ -- =~ <=> \\ ? ~~ ~. ~ : >> >= > << <= < == != ! ); |
158
|
|
|
|
|
|
|
my @assignment_operators = qw( && || // ** % ^. ^ &. & |. | * + - = / . << >> ); |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
my $operators = do { |
161
|
|
|
|
|
|
|
local $" = '|'; |
162
|
|
|
|
|
|
|
qr{@{[map{quotemeta} @prec_operators, @assignment_operators]}}; |
163
|
|
|
|
|
|
|
}; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
my $postfix_operators = do { |
166
|
|
|
|
|
|
|
local $" = '|'; |
167
|
|
|
|
|
|
|
qr{@{[map{quotemeta} @postfix_operators]}}; |
168
|
|
|
|
|
|
|
}; |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
my $assignment_operators = do { |
171
|
|
|
|
|
|
|
local $" = '|'; |
172
|
|
|
|
|
|
|
qr{@{[map{($_ eq '=') ? '=(?!=)' : "\Q$_=\E"} @assignment_operators]}}; |
173
|
|
|
|
|
|
|
}; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
my @special_var_names = (qw( \\ | + / ~ ! @ $ % ^ & * ( ) } < > : ; " ` ' ? = - [ ] . ), '#', ','); |
176
|
|
|
|
|
|
|
my $special_var_names = do { |
177
|
|
|
|
|
|
|
local $" = '|'; |
178
|
|
|
|
|
|
|
qr{@{[map {quotemeta} @special_var_names]}}; |
179
|
|
|
|
|
|
|
}; |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
my $bracket_var = qr~(?=\s*\{)(?!\s*\{\s*(?:\^?$var_name|$special_var_names|\{)\s*\})~; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
#<<< |
184
|
|
|
|
|
|
|
my $perl_keywords = |
185
|
|
|
|
|
|
|
qr/(?:CORE::)?(?>(a(?:(?:ccept|larm|tan2|bs|nd))|b(?:in(?:(?:mode|d))|less|reak)|c(?:aller|h(?:dir|mod |
186
|
|
|
|
|
|
|
|o(?:(?:m?p|wn))|r(?:oot)?)|lose(?:dir)?|mp|o(?:n(?:(?:tinue|nect))|s)|rypt)|d(?:bm(?:(?:close|open))| |
187
|
|
|
|
|
|
|
e(?:f(?:(?:ault|ined))|lete)|ie|ump|o)|e(?:ach|ls(?:(?:if|e))|nd(?:grent|hostent|netent|p(?:(?:roto|w) |
188
|
|
|
|
|
|
|
ent)|servent)|of|val|x(?:ec|i(?:(?:sts|t))|p)|q)|f(?:c(?:ntl)?|ileno|lock|or(?:(?:each|m(?:(?:line|at) |
189
|
|
|
|
|
|
|
)|k))?)|g(?:e(?:t(?:gr(?:(?:ent|gid|nam))|host(?:by(?:(?:addr|name))|ent)|login|net(?:by(?:(?:addr|nam |
190
|
|
|
|
|
|
|
e))|ent)|p(?:eername|grp|pid|r(?:iority|oto(?:byn(?:(?:umber|ame))|ent))|w(?:(?:ent|nam|uid)))|s(?:erv |
191
|
|
|
|
|
|
|
(?:by(?:(?:name|port))|ent)|ock(?:(?:name|opt)))|c))?|iven|lob|mtime|oto|rep|t)|hex|i(?:mport|n(?:(?:d |
192
|
|
|
|
|
|
|
ex|t))|octl|sa|f)|join|k(?:(?:eys|ill))|l(?:ast|c(?:first)?|e(?:ngth)?|i(?:(?:sten|nk))|o(?:c(?:al(?:t |
193
|
|
|
|
|
|
|
ime)?|k)|g)|stat|t)|m(?:ap|kdir|sg(?:(?:ctl|get|rcv|snd))|y)|n(?:e(?:xt)?|ot?)|o(?:ct|pen(?:dir)?|rd?| |
194
|
|
|
|
|
|
|
ur)|p(?:ack(?:age)?|ipe|o[ps]|r(?:(?:ototype|intf?))|ush)|quotemeta|r(?:and|e(?:ad(?:(?:(?:lin[ek]|pip |
195
|
|
|
|
|
|
|
e|dir)))?|cv|do|name|quire|set|turn|verse|winddir|f)|index|mdir)|s(?:ay|calar|e(?:ek(?:dir)?|lect|m(?: |
196
|
|
|
|
|
|
|
(?:ctl|get|op))|nd|t(?:grent|hostent|netent|p(?:grp|r(?:(?:iority|otoent))|went)|s(?:(?:erven|ockop)t) |
197
|
|
|
|
|
|
|
))|h(?:ift|m(?:(?:write|read|ctl|get))|utdown)|in|leep|o(?:cket(?:pair)?|rt)|p(?:li(?:(?:ce|t))|rintf) |
198
|
|
|
|
|
|
|
|qrt|rand|t(?:(?:ate?|udy))|ub(?:str)?|y(?:mlink|s(?:(?:write|call|open|read|seek|tem))))|t(?:ell(?:di |
199
|
|
|
|
|
|
|
r)?|i(?:(?:mes?|ed?))|runcate)|u(?:c(?:first)?|mask|n(?:def|l(?:(?:ess|ink))|pack|shift|ti[el])|se|tim |
200
|
|
|
|
|
|
|
e)|v(?:(?:alues|ec))|w(?:a(?:it(?:pid)?|ntarray|rn)|h(?:(?:ile|en))|rite)|xor|BEGIN|END|INIT|CHECK))\b |
201
|
|
|
|
|
|
|
/x; |
202
|
|
|
|
|
|
|
#>>> |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
my $perl_filetests = qr/\-[ABCMORSTWXbcdefgkloprstuwxz]/; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub perl_tokens(&$) { |
207
|
66
|
|
|
66
|
1
|
23534
|
my ($callback, $code) = @_; |
208
|
|
|
|
|
|
|
|
209
|
66
|
50
|
|
|
|
229
|
ref($callback) eq 'CODE' |
210
|
|
|
|
|
|
|
or die "usage: perl_tokens {...} \$code;"; |
211
|
|
|
|
|
|
|
|
212
|
66
|
|
|
|
|
104
|
my $variable = 0; |
213
|
66
|
|
|
|
|
101
|
my $flat = 0; |
214
|
66
|
|
|
|
|
93
|
my $regex = 1; |
215
|
66
|
|
|
|
|
98
|
my $canpod = 1; |
216
|
66
|
|
|
|
|
82
|
my $proto = 0; |
217
|
66
|
|
|
|
|
85
|
my $format = 0; |
218
|
66
|
|
|
|
|
95
|
my $expect_format = 0; |
219
|
66
|
|
|
|
|
85
|
my $postfix_op = 0; |
220
|
66
|
|
|
|
|
93
|
my @heredoc_eofs; |
221
|
|
|
|
|
|
|
|
222
|
66
|
|
|
|
|
138
|
$code = "$code"; |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
{ |
225
|
66
|
100
|
66
|
|
|
93
|
if ($expect_format == 1 and $code =~ /\G(?=\R)/) { |
|
351
|
|
|
|
|
1066
|
|
226
|
2
|
50
|
|
|
|
16
|
if ($code =~ /.*?\R\.\h*(?=\R|\z)/cgs) { |
227
|
2
|
|
|
|
|
10
|
$callback->('vertical_space', $-[0], $-[0] + 1); |
228
|
2
|
|
|
|
|
1058
|
$callback->('format', $-[0] + 1, $+[0]); |
229
|
2
|
|
|
|
|
1122
|
$expect_format = 0; |
230
|
2
|
|
|
|
|
5
|
$canpod = 1; |
231
|
2
|
|
|
|
|
4
|
$regex = 1; |
232
|
2
|
|
|
|
|
4
|
$postfix_op = 0; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
else { |
235
|
0
|
0
|
|
|
|
0
|
if ($code =~ /\G(.)/cgs) { |
236
|
0
|
|
|
|
|
0
|
$callback->('unknown_char', $-[0], $+[0]); |
237
|
0
|
|
|
|
|
0
|
redo; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
} |
240
|
2
|
|
|
|
|
4
|
redo; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
349
|
100
|
100
|
|
|
914
|
if ($#heredoc_eofs >= 0 and $code =~ /\G(?=\R)/) { |
244
|
3
|
|
|
|
|
8
|
my $token = shift @heredoc_eofs; |
245
|
3
|
50
|
|
|
|
76
|
if ($code =~ m{\G.*?\R\Q$token\E(?=\R|\z)}sgc) { |
246
|
3
|
|
|
|
|
21
|
$callback->('vertical_space', $-[0], $-[0] + 1); |
247
|
3
|
|
|
|
|
1664
|
$callback->('heredoc', $-[0] + 1, $+[0]); |
248
|
|
|
|
|
|
|
} |
249
|
3
|
|
|
|
|
1655
|
redo; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
346
|
100
|
100
|
|
|
3616
|
if (($regex == 1 or $code =~ /\G(?!<<[0-9])/) and $code =~ m{\G$bhdoc}gc) { |
|
|
|
100
|
|
|
|
|
253
|
3
|
|
|
|
|
15
|
$callback->('heredoc_beg', $-[0], $+[0]); |
254
|
3
|
|
|
|
|
1733
|
push @heredoc_eofs, $+; |
255
|
3
|
|
|
|
|
8
|
$regex = 0; |
256
|
3
|
|
|
|
|
6
|
$canpod = 0; |
257
|
3
|
|
|
|
|
7
|
redo; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
343
|
50
|
66
|
|
|
1057
|
if ($canpod == 1 and $code =~ /\G^=[a-zA-Z]/cgm) { |
261
|
0
|
0
|
|
|
|
0
|
$code =~ /\G.*?\R=cut\h*(?=\R|\z)/cgs |
262
|
|
|
|
|
|
|
or $code =~ /\G.*\z/cgs; |
263
|
0
|
|
|
|
|
0
|
$callback->('pod', $-[0] - 2, $+[0]); |
264
|
0
|
|
|
|
|
0
|
redo; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
343
|
100
|
|
|
|
1055
|
if ($code =~ /\G(?=\s)/) { |
268
|
40
|
100
|
|
|
|
123
|
if ($code =~ /\G\h+/cg) { |
269
|
37
|
|
|
|
|
169
|
$callback->('horizontal_space', $-[0], $+[0]); |
270
|
37
|
|
|
|
|
20706
|
redo; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
3
|
50
|
|
|
|
13
|
if ($code =~ /\G\v+/cg) { |
274
|
3
|
|
|
|
|
17
|
$callback->('vertical_space', $-[0], $+[0]); |
275
|
3
|
|
|
|
|
1643
|
redo; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
0
|
0
|
|
|
|
0
|
if ($code =~ /\G\s+/cg) { |
279
|
0
|
|
|
|
|
0
|
$callback->('other_space', $-[0], $+[0]); |
280
|
0
|
|
|
|
|
0
|
redo; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
303
|
100
|
|
|
|
659
|
if ($variable > 0) { |
285
|
22
|
50
|
33
|
|
|
230
|
if ($code =~ m{\G$var_name}gco or $code =~ m{\G(?<=\$)\#$var_name}gco) { |
286
|
22
|
|
|
|
|
134
|
$callback->('var_name', $-[0], $+[0]); |
287
|
22
|
|
|
|
|
12314
|
$regex = 0; |
288
|
22
|
|
|
|
|
37
|
$variable = 0; |
289
|
22
|
|
|
|
|
28
|
$canpod = 0; |
290
|
22
|
100
|
|
|
|
134
|
$flat = ($code =~ /\G(?=\s*\{)/) ? 1 : 0; |
291
|
22
|
|
|
|
|
52
|
redo; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
0
|
0
|
0
|
|
|
0
|
if ( |
|
|
|
0
|
|
|
|
|
295
|
|
|
|
|
|
|
$code =~ m{\G(?!\$+$var_name)}o |
296
|
|
|
|
|
|
|
and ( $code =~ m~\G(?:\s+|#?)\{\s*(?:$var_name|$special_var_names|[#{])\s*\}~goc |
297
|
|
|
|
|
|
|
or $code =~ m{\G(?:\^\w+|#(?!\{)|$special_var_names)}gco |
298
|
|
|
|
|
|
|
or $code =~ /\G#/cg) |
299
|
|
|
|
|
|
|
) { |
300
|
0
|
|
|
|
|
0
|
$callback->('special_var_name', $-[0], $+[0]); |
301
|
0
|
|
|
|
|
0
|
$regex = 0; |
302
|
0
|
|
|
|
|
0
|
$canpod = 0; |
303
|
0
|
|
|
|
|
0
|
$variable = 0; |
304
|
0
|
0
|
|
|
|
0
|
$flat = ($code =~ /\G(?
|
305
|
0
|
|
|
|
|
0
|
redo; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# continue |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
281
|
100
|
|
|
|
604
|
if ($code =~ /\G#.*/cg) { |
312
|
2
|
|
|
|
|
11
|
$callback->('comment', $-[0], $+[0]); |
313
|
2
|
|
|
|
|
1210
|
redo; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
279
|
100
|
100
|
|
|
1112
|
if (($regex == 1 and not($postfix_op)) or $code =~ /\G(?=[\@\$])/) { |
|
|
|
100
|
|
|
|
|
317
|
148
|
100
|
|
|
|
383
|
if ($code =~ /\G\$/cg) { |
318
|
21
|
|
|
|
|
107
|
$callback->('scalar_sigil', $-[0], $+[0]); |
319
|
21
|
50
|
|
|
|
12341
|
$code =~ /\G$bracket_var/o or ++$variable; |
320
|
21
|
|
|
|
|
44
|
$regex = 0; |
321
|
21
|
|
|
|
|
33
|
$canpod = 0; |
322
|
21
|
|
|
|
|
28
|
$flat = 1; |
323
|
21
|
|
|
|
|
45
|
redo; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
127
|
100
|
|
|
|
278
|
if ($code =~ /\G\@/cg) { |
327
|
1
|
|
|
|
|
6
|
$callback->('array_sigil', $-[0], $+[0]); |
328
|
1
|
50
|
|
|
|
670
|
$code =~ /\G$bracket_var/o or ++$variable; |
329
|
1
|
|
|
|
|
5
|
$regex = 0; |
330
|
1
|
|
|
|
|
3
|
$canpod = 0; |
331
|
1
|
|
|
|
|
2
|
$flat = 1; |
332
|
1
|
|
|
|
|
3
|
redo; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
126
|
50
|
|
|
|
263
|
if ($code =~ /\G\%/cg) { |
336
|
0
|
|
|
|
|
0
|
$callback->('hash_sigil', $-[0], $+[0]); |
337
|
0
|
0
|
|
|
|
0
|
$code =~ /\G$bracket_var/o or ++$variable; |
338
|
0
|
|
|
|
|
0
|
$regex = 0; |
339
|
0
|
|
|
|
|
0
|
$canpod = 0; |
340
|
0
|
|
|
|
|
0
|
$flat = 1; |
341
|
0
|
|
|
|
|
0
|
redo; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
126
|
50
|
|
|
|
244
|
if ($code =~ /\G\*/cg) { |
345
|
0
|
|
|
|
|
0
|
$callback->('glob_sigil', $-[0], $+[0]); |
346
|
0
|
0
|
|
|
|
0
|
$code =~ /\G$bracket_var/o or ++$variable; |
347
|
0
|
|
|
|
|
0
|
$regex = 0; |
348
|
0
|
|
|
|
|
0
|
$canpod = 0; |
349
|
0
|
|
|
|
|
0
|
$flat = 1; |
350
|
0
|
|
|
|
|
0
|
redo; |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
126
|
50
|
|
|
|
253
|
if ($code =~ /\G&/cg) { |
354
|
0
|
|
|
|
|
0
|
$callback->('ampersand_sigil', $-[0], $+[0]); |
355
|
0
|
0
|
|
|
|
0
|
$code =~ /\G$bracket_var/o or ++$variable; |
356
|
0
|
|
|
|
|
0
|
$regex = 0; |
357
|
0
|
|
|
|
|
0
|
$canpod = 0; |
358
|
0
|
|
|
|
|
0
|
$flat = 1; |
359
|
0
|
|
|
|
|
0
|
redo; |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# continue |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
257
|
50
|
33
|
|
|
581
|
if ($proto == 1 and $code =~ /\G\(.*?\)/cgs) { |
366
|
0
|
|
|
|
|
0
|
$callback->('sub_proto', $-[0], $+[0]); |
367
|
0
|
|
|
|
|
0
|
$proto = 0; |
368
|
0
|
|
|
|
|
0
|
$canpod = 0; |
369
|
0
|
|
|
|
|
0
|
$regex = 0; |
370
|
0
|
|
|
|
|
0
|
redo; |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
|
373
|
257
|
100
|
|
|
|
467
|
if ($code =~ /\G\(/cg) { |
374
|
2
|
|
|
|
|
12
|
$callback->('parenthesis_open', $-[0], $+[0]); |
375
|
2
|
|
|
|
|
1163
|
$regex = 1; |
376
|
2
|
|
|
|
|
5
|
$flat = 0; |
377
|
2
|
|
|
|
|
4
|
$canpod = 0; |
378
|
2
|
|
|
|
|
5
|
redo; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
255
|
100
|
|
|
|
487
|
if ($code =~ /\G\)/cg) { |
382
|
2
|
|
|
|
|
12
|
$callback->('parenthesis_close', $-[0], $+[0]); |
383
|
2
|
|
|
|
|
1080
|
$regex = 0; |
384
|
2
|
|
|
|
|
5
|
$canpod = 0; |
385
|
2
|
|
|
|
|
5
|
$flat = 0; |
386
|
2
|
|
|
|
|
4
|
redo; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
253
|
100
|
|
|
|
490
|
if ($code =~ /\G\{/cg) { |
390
|
4
|
|
|
|
|
37
|
$callback->('curly_bracket_open', $-[0], $+[0]); |
391
|
4
|
|
|
|
|
2211
|
$regex = 1; |
392
|
4
|
|
|
|
|
9
|
$proto = 0; |
393
|
4
|
|
|
|
|
9
|
redo; |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
|
396
|
249
|
100
|
|
|
|
473
|
if ($code =~ /\G\}/cg) { |
397
|
4
|
|
|
|
|
18
|
$callback->('curly_bracket_close', $-[0], $+[0]); |
398
|
4
|
|
|
|
|
2195
|
$flat = 0; |
399
|
4
|
|
|
|
|
6
|
$canpod = 1; |
400
|
4
|
|
|
|
|
10
|
redo; |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
245
|
100
|
|
|
|
478
|
if ($code =~ /\G\[/cg) { |
404
|
2
|
|
|
|
|
11
|
$callback->('right_bracket_open', $-[0], $+[0]); |
405
|
2
|
|
|
|
|
1121
|
$regex = 1; |
406
|
2
|
|
|
|
|
6
|
$postfix_op = 0; |
407
|
2
|
|
|
|
|
4
|
$flat = 0; |
408
|
2
|
|
|
|
|
4
|
$canpod = 0; |
409
|
2
|
|
|
|
|
4
|
redo; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
243
|
100
|
|
|
|
450
|
if ($code =~ /\G\]/cg) { |
413
|
2
|
|
|
|
|
10
|
$callback->('right_bracket_close', $-[0], $+[0]); |
414
|
2
|
|
|
|
|
1095
|
$regex = 0; |
415
|
2
|
|
|
|
|
5
|
$canpod = 0; |
416
|
2
|
|
|
|
|
3
|
$flat = 0; |
417
|
2
|
|
|
|
|
4
|
redo; |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
241
|
50
|
|
|
|
506
|
if ($proto == 0) { |
421
|
241
|
100
|
100
|
|
|
590
|
if ($canpod == 1 and $code =~ /\Gformat\b/cg) { |
422
|
2
|
|
|
|
|
14
|
$callback->('keyword', $-[0], $+[0]); |
423
|
2
|
|
|
|
|
1157
|
$regex = 0; |
424
|
2
|
|
|
|
|
4
|
$canpod = 0; |
425
|
2
|
|
|
|
|
4
|
$format = 1; |
426
|
2
|
|
|
|
|
5
|
redo; |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
239
|
100
|
66
|
|
|
4338
|
if ( |
|
|
|
100
|
|
|
|
|
430
|
|
|
|
|
|
|
( |
431
|
|
|
|
|
|
|
$flat == 0 or ( $flat == 1 |
432
|
|
|
|
|
|
|
and $code =~ /\G(?!\w+\h*\})/) |
433
|
|
|
|
|
|
|
) |
434
|
|
|
|
|
|
|
and $code =~ m{\G(?)$perl_keywords}gco |
435
|
|
|
|
|
|
|
) { |
436
|
5
|
|
|
|
|
21
|
my $name = $1; |
437
|
5
|
|
|
|
|
22
|
my @pos = ($-[0], $+[0]); |
438
|
5
|
|
|
|
|
22
|
my $is_bare_word = ($code =~ /\G(?=\h*=>)/); |
439
|
5
|
50
|
|
|
|
26
|
$callback->(($is_bare_word ? 'bare_word' : 'keyword'), @pos); |
440
|
|
|
|
|
|
|
|
441
|
5
|
50
|
33
|
|
|
2913
|
if ($name eq 'sub' and not $is_bare_word) { |
442
|
0
|
|
|
|
|
0
|
$proto = 1; |
443
|
0
|
|
|
|
|
0
|
$regex = 0; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
else { |
446
|
5
|
|
|
|
|
11
|
$regex = 1; |
447
|
5
|
|
|
|
|
8
|
$postfix_op = 0; |
448
|
|
|
|
|
|
|
} |
449
|
5
|
|
|
|
|
9
|
$canpod = 0; |
450
|
5
|
|
|
|
|
43
|
redo; |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
# continue |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
234
|
50
|
33
|
|
|
1148
|
if ($code =~ /\G(?!(?>tr|[ysm]|q[rwxq]?)\h*=>)/ and $code =~ /\G(?)/) { |
457
|
|
|
|
|
|
|
|
458
|
234
|
100
|
66
|
|
|
1021
|
if (($flat == 1 and $code =~ /\G(?=[a-z]+\h*\})/) or $code =~ /\G((?<=\{)|(?<=\{\h))(?=[a-z]+\h*\})/) { |
|
|
|
100
|
|
|
|
|
459
|
|
|
|
|
|
|
## ok |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
else { |
462
|
|
|
|
|
|
|
|
463
|
231
|
100
|
|
|
|
787
|
if ($code =~ m{\G $double_q{s} $substitution_flags }gcxo) { |
464
|
4
|
|
|
|
|
20
|
$callback->('substitution', $-[0], $+[0]); |
465
|
4
|
|
|
|
|
2114
|
$regex = 0; |
466
|
4
|
|
|
|
|
6
|
$canpod = 0; |
467
|
4
|
|
|
|
|
9
|
redo; |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
227
|
100
|
|
|
|
1195
|
if ($code =~ m{\G (?> $double_q{tr} | $double_q{y} ) $tr_flags }gxco) { |
471
|
2
|
|
|
|
|
12
|
$callback->('transliteration', $-[0], $+[0]); |
472
|
2
|
|
|
|
|
1280
|
$regex = 0; |
473
|
2
|
|
|
|
|
4
|
$canpod = 0; |
474
|
2
|
|
|
|
|
7
|
redo; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
225
|
100
|
100
|
|
|
1337
|
if ($code =~ m{\G $single_q{m} $match_flags }gcxo |
|
|
|
100
|
|
|
|
|
478
|
|
|
|
|
|
|
or ($regex == 1 and $code =~ m{\G $match_re $match_flags }gcxo)) { |
479
|
4
|
|
|
|
|
20
|
$callback->('match_regex', $-[0], $+[0]); |
480
|
4
|
|
|
|
|
2155
|
$regex = 0; |
481
|
4
|
|
|
|
|
7
|
$canpod = 0; |
482
|
4
|
|
|
|
|
11
|
redo; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
221
|
100
|
|
|
|
594
|
if ($code =~ m{\G $single_q{qr} $compiled_regex_flags }gcxo) { |
486
|
1
|
|
|
|
|
7
|
$callback->('compiled_regex', $-[0], $+[0]); |
487
|
1
|
|
|
|
|
619
|
$regex = 0; |
488
|
1
|
|
|
|
|
2
|
$canpod = 0; |
489
|
1
|
|
|
|
|
5
|
redo; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
220
|
50
|
|
|
|
505
|
if ($code =~ m{\G$single_q{q}}gco) { |
493
|
0
|
|
|
|
|
0
|
$callback->('q_string', $-[0], $+[0]); |
494
|
0
|
|
|
|
|
0
|
$regex = 0; |
495
|
0
|
|
|
|
|
0
|
$canpod = 0; |
496
|
0
|
|
|
|
|
0
|
redo; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
220
|
50
|
|
|
|
494
|
if ($code =~ m{\G$single_q{qq}}gco) { |
500
|
0
|
|
|
|
|
0
|
$callback->('qq_string', $-[0], $+[0]); |
501
|
0
|
|
|
|
|
0
|
$regex = 0; |
502
|
0
|
|
|
|
|
0
|
$canpod = 0; |
503
|
0
|
|
|
|
|
0
|
redo; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
220
|
50
|
|
|
|
510
|
if ($code =~ m{\G$single_q{qw}}gco) { |
507
|
0
|
|
|
|
|
0
|
$callback->('qw_string', $-[0], $+[0]); |
508
|
0
|
|
|
|
|
0
|
$regex = 0; |
509
|
0
|
|
|
|
|
0
|
$canpod = 0; |
510
|
0
|
|
|
|
|
0
|
redo; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
|
513
|
220
|
50
|
|
|
|
523
|
if ($code =~ m{\G$single_q{qx}}gco) { |
514
|
0
|
|
|
|
|
0
|
$callback->('qx_string', $-[0], $+[0]); |
515
|
0
|
|
|
|
|
0
|
$regex = 0; |
516
|
0
|
|
|
|
|
0
|
$canpod = 0; |
517
|
0
|
|
|
|
|
0
|
redo; |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
# continue |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
223
|
100
|
|
|
|
592
|
if ($code =~ m{\G$str_dq}gco) { |
525
|
10
|
|
|
|
|
52
|
$callback->('double_quoted_string', $-[0], $+[0]); |
526
|
10
|
|
|
|
|
5830
|
$regex = 0; |
527
|
10
|
|
|
|
|
19
|
$canpod = 0; |
528
|
10
|
|
|
|
|
16
|
$flat = 0; |
529
|
10
|
|
|
|
|
22
|
redo; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
213
|
50
|
|
|
|
522
|
if ($code =~ m{\G$str_sq}gco) { |
533
|
0
|
|
|
|
|
0
|
$callback->('single_quoted_string', $-[0], $+[0]); |
534
|
0
|
|
|
|
|
0
|
$regex = 0; |
535
|
0
|
|
|
|
|
0
|
$canpod = 0; |
536
|
0
|
|
|
|
|
0
|
$flat = 0; |
537
|
0
|
|
|
|
|
0
|
redo; |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
|
540
|
213
|
50
|
|
|
|
492
|
if ($code =~ m{\G$str_bq}gco) { |
541
|
0
|
|
|
|
|
0
|
$callback->('backtick', $-[0], $+[0]); |
542
|
0
|
|
|
|
|
0
|
$regex = 0; |
543
|
0
|
|
|
|
|
0
|
$canpod = 0; |
544
|
0
|
|
|
|
|
0
|
$flat = 0; |
545
|
0
|
|
|
|
|
0
|
redo; |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
213
|
100
|
|
|
|
408
|
if ($code =~ /\G;/cg) { |
549
|
4
|
|
|
|
|
25
|
$callback->('semicolon', $-[0], $+[0]); |
550
|
4
|
|
|
|
|
2266
|
$canpod = 1; |
551
|
4
|
|
|
|
|
22
|
$regex = 1; |
552
|
4
|
|
|
|
|
11
|
$postfix_op = 0; |
553
|
4
|
|
|
|
|
9
|
$proto = 0; |
554
|
4
|
|
|
|
|
7
|
$flat = 0; |
555
|
4
|
|
|
|
|
10
|
redo; |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
|
558
|
209
|
50
|
|
|
|
415
|
if ($code =~ /\G=>/cg) { |
559
|
0
|
|
|
|
|
0
|
$callback->('fat_comma', $-[0], $+[0]); |
560
|
0
|
|
|
|
|
0
|
$regex = 1; |
561
|
0
|
|
|
|
|
0
|
$postfix_op = 0; |
562
|
0
|
|
|
|
|
0
|
$canpod = 0; |
563
|
0
|
|
|
|
|
0
|
$flat = 0; |
564
|
0
|
|
|
|
|
0
|
redo; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
209
|
100
|
|
|
|
391
|
if ($code =~ /\G,/cg) { |
568
|
3
|
|
|
|
|
17
|
$callback->('comma', $-[0], $+[0]); |
569
|
3
|
|
|
|
|
1680
|
$regex = 1; |
570
|
3
|
|
|
|
|
5
|
$postfix_op = 0; |
571
|
3
|
|
|
|
|
6
|
$canpod = 0; |
572
|
3
|
|
|
|
|
6
|
$flat = 0; |
573
|
3
|
|
|
|
|
6
|
redo; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
206
|
50
|
|
|
|
745
|
if ($code =~ m{\G$vstring}gco) { |
577
|
0
|
|
|
|
|
0
|
$callback->('v_string', $-[0], $+[0]); |
578
|
0
|
|
|
|
|
0
|
$regex = 0; |
579
|
0
|
|
|
|
|
0
|
$canpod = 0; |
580
|
0
|
|
|
|
|
0
|
redo; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
|
583
|
206
|
100
|
|
|
|
451
|
if ($code =~ m{\G$perl_filetests\b}gco) { |
584
|
1
|
|
|
|
|
7
|
my @pos = ($-[0], $+[0]); |
585
|
1
|
|
|
|
|
6
|
my $is_bare_word = ($code =~ /\G(?=\h*=>)/); |
586
|
|
|
|
|
|
|
|
587
|
1
|
50
|
|
|
|
8
|
$callback->(($is_bare_word ? 'bare_word' : 'file_test'), @pos); |
588
|
|
|
|
|
|
|
|
589
|
1
|
50
|
|
|
|
602
|
if ($is_bare_word) { |
590
|
0
|
|
|
|
|
0
|
$canpod = 0; |
591
|
0
|
|
|
|
|
0
|
$regex = 0; |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
else { |
594
|
1
|
|
|
|
|
3
|
$regex = 1; # ambiguous, but possible |
595
|
1
|
|
|
|
|
2
|
$postfix_op = 0; |
596
|
1
|
|
|
|
|
3
|
$canpod = 0; |
597
|
|
|
|
|
|
|
} |
598
|
1
|
|
|
|
|
2
|
redo; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
205
|
50
|
|
|
|
484
|
if ($code =~ /\G(?=__)/) { |
602
|
0
|
0
|
|
|
|
0
|
if ($code =~ m{\G__(?>DATA|END)__\b\h*+(?!=>).*\z}gcs) { |
603
|
0
|
|
|
|
|
0
|
$callback->('data', $-[0], $+[0]); |
604
|
0
|
|
|
|
|
0
|
redo; |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
|
607
|
0
|
0
|
|
|
|
0
|
if ($code =~ m{\G__(?>SUB|FILE|PACKAGE|LINE)__\b(?!\h*+=>)}gc) { |
608
|
0
|
|
|
|
|
0
|
$callback->('special_keyword', $-[0], $+[0]); |
609
|
0
|
|
|
|
|
0
|
$canpod = 0; |
610
|
0
|
|
|
|
|
0
|
$regex = 0; |
611
|
0
|
|
|
|
|
0
|
redo; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
# continue |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
|
617
|
205
|
100
|
100
|
|
|
835
|
if ($regex == 1 and $code =~ /\G(?
|
|
|
|
100
|
|
|
|
|
618
|
1
|
|
|
|
|
7
|
$callback->('glob_readline', $-[0], $+[0]); |
619
|
1
|
|
|
|
|
573
|
$regex = 0; |
620
|
1
|
|
|
|
|
3
|
$canpod = 0; |
621
|
1
|
|
|
|
|
2
|
redo; |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
|
624
|
204
|
100
|
|
|
|
762
|
if ($code =~ m{\G$assignment_operators}gco) { |
625
|
16
|
|
|
|
|
80
|
$callback->('assignment_operator', $-[0], $+[0]); |
626
|
16
|
100
|
|
|
|
9008
|
if ($format) { |
627
|
2
|
50
|
|
|
|
24
|
if (substr($code, $-[0], $+[0] - $-[0]) eq '=') { |
628
|
2
|
|
|
|
|
4
|
$format = 0; |
629
|
2
|
|
|
|
|
4
|
$expect_format = 1; |
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
} |
632
|
16
|
|
|
|
|
30
|
$regex = 1; |
633
|
16
|
|
|
|
|
25
|
$canpod = 0; |
634
|
16
|
|
|
|
|
24
|
$flat = 0; |
635
|
16
|
|
|
|
|
73
|
redo; |
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
|
638
|
188
|
50
|
|
|
|
349
|
if ($code =~ /\G->/cg) { |
639
|
0
|
|
|
|
|
0
|
$callback->('dereference_operator', $-[0], $+[0]); |
640
|
0
|
|
|
|
|
0
|
$regex = 0; |
641
|
0
|
|
|
|
|
0
|
$canpod = 0; |
642
|
0
|
|
|
|
|
0
|
$flat = 1; |
643
|
0
|
|
|
|
|
0
|
redo; |
644
|
|
|
|
|
|
|
} |
645
|
|
|
|
|
|
|
|
646
|
188
|
100
|
66
|
|
|
915
|
if ($code =~ m{\G$operators}gco or $code =~ /\Gx(?=[0-9\W])/cg) { |
647
|
50
|
|
|
|
|
234
|
$callback->('operator', $-[0], $+[0]); |
648
|
50
|
100
|
|
|
|
28941
|
if (substr($code, $-[0], ($+[0] - $-[0])) =~ /^$postfix_operators\z/o) { |
649
|
5
|
|
|
|
|
14
|
$postfix_op = 1; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
else { |
652
|
45
|
|
|
|
|
133
|
$postfix_op = 0; |
653
|
|
|
|
|
|
|
} |
654
|
50
|
|
|
|
|
89
|
$canpod = 0; |
655
|
50
|
|
|
|
|
88
|
$regex = 1; |
656
|
50
|
|
|
|
|
62
|
$flat = 0; |
657
|
50
|
|
|
|
|
95
|
redo; |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
|
660
|
138
|
50
|
|
|
|
339
|
if ($code =~ m{\G$hex_num}gco) { |
661
|
0
|
|
|
|
|
0
|
$callback->('hex_number', $-[0], $+[0]); |
662
|
0
|
|
|
|
|
0
|
$regex = 0; |
663
|
0
|
|
|
|
|
0
|
$canpod = 0; |
664
|
0
|
|
|
|
|
0
|
redo; |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
138
|
50
|
|
|
|
337
|
if ($code =~ m{\G$binary_num}gco) { |
668
|
0
|
|
|
|
|
0
|
$callback->('binary_number', $-[0], $+[0]); |
669
|
0
|
|
|
|
|
0
|
$regex = 0; |
670
|
0
|
|
|
|
|
0
|
$canpod = 0; |
671
|
0
|
|
|
|
|
0
|
redo; |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
138
|
100
|
|
|
|
638
|
if ($code =~ m{\G$number}gco) { |
675
|
66
|
|
|
|
|
318
|
$callback->('number', $-[0], $+[0]); |
676
|
66
|
|
|
|
|
37448
|
$regex = 0; |
677
|
66
|
|
|
|
|
120
|
$canpod = 0; |
678
|
66
|
|
|
|
|
132
|
redo; |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
|
681
|
72
|
100
|
|
|
|
174
|
if ($code =~ m{\GSTD(?>OUT|ERR|IN)\b}gc) { |
682
|
2
|
|
|
|
|
14
|
$callback->('special_fh', $-[0], $+[0]); |
683
|
2
|
|
|
|
|
1068
|
$regex = 1; |
684
|
2
|
|
|
|
|
5
|
$postfix_op = 0; |
685
|
2
|
|
|
|
|
5
|
$canpod = 0; |
686
|
2
|
|
|
|
|
6
|
redo; |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
70
|
100
|
|
|
|
359
|
if ($code =~ m{\G$var_name}gco) { |
690
|
4
|
50
|
|
|
|
30
|
$callback->(($proto == 1 ? 'sub_name' : 'bare_word'), $-[0], $+[0]); |
691
|
4
|
|
|
|
|
2200
|
$regex = 0; |
692
|
4
|
|
|
|
|
10
|
$canpod = 0; |
693
|
4
|
|
|
|
|
7
|
$flat = 0; |
694
|
4
|
|
|
|
|
10
|
redo; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
|
697
|
66
|
50
|
|
|
|
198
|
if ($code =~ /\G(.)/cgs) { |
698
|
0
|
|
|
|
|
0
|
$callback->('unknown_char', $-[0], $+[0]); |
699
|
0
|
|
|
|
|
0
|
redo; |
700
|
|
|
|
|
|
|
} |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
# all done |
703
|
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
|
|
705
|
66
|
|
|
|
|
1632
|
return pos($code); |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
1; |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=head1 SYNOPSIS |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
use Perl::Tokenizer; |
713
|
|
|
|
|
|
|
my $code = 'my $num = 42;'; |
714
|
|
|
|
|
|
|
perl_tokens { print "@_\n" } $code; |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=head1 DESCRIPTION |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
Perl::Tokenizer is a tiny tokenizer which splits a given Perl code into a list of tokens, using the power of regular expressions. |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=head1 SUBROUTINES |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
=over 4 |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
=item perl_tokens(&$) |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
This function takes a callback subroutine and a string. The subroutine is called for each token in real-time. |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
perl_tokens { |
729
|
|
|
|
|
|
|
my ($token, $pos_beg, $pos_end) = @_; |
730
|
|
|
|
|
|
|
... |
731
|
|
|
|
|
|
|
} $code; |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
The positions are absolute to the string. |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
=back |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=head2 EXPORT |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
The function B is exported by default. This is the only function provided by this module. |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=head1 TOKENS |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
The standard token names that are available are: |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
format .................. Format text |
746
|
|
|
|
|
|
|
heredoc_beg ............. The beginning of a here-document ('<<"EOT"') |
747
|
|
|
|
|
|
|
heredoc ................. The content of a here-document |
748
|
|
|
|
|
|
|
pod ..................... An inline POD document, until '=cut' or end of the file |
749
|
|
|
|
|
|
|
horizontal_space ........ Horizontal whitespace (matched by /\h/) |
750
|
|
|
|
|
|
|
vertical_space .......... Vertical whitespace (matched by /\v/) |
751
|
|
|
|
|
|
|
other_space ............. Whitespace that is neither vertical nor horizontal (matched by /\s/) |
752
|
|
|
|
|
|
|
var_name ................ Alphanumeric name of a variable (excluding the sigil) |
753
|
|
|
|
|
|
|
special_var_name ........ Non-alphanumeric name of a variable, such as $/ or $^H (excluding the sigil) |
754
|
|
|
|
|
|
|
sub_name ................ Subroutine name |
755
|
|
|
|
|
|
|
sub_proto ............... Subroutine prototype |
756
|
|
|
|
|
|
|
comment ................. A #-to-newline comment (excluding the newline) |
757
|
|
|
|
|
|
|
scalar_sigil ............ The sigil of a scalar variable: '$' |
758
|
|
|
|
|
|
|
array_sigil ............. The sigil of an array variable: '@' |
759
|
|
|
|
|
|
|
hash_sigil .............. The sigil of a hash variable: '%' |
760
|
|
|
|
|
|
|
glob_sigil .............. The sigil of a glob symbol: '*' |
761
|
|
|
|
|
|
|
ampersand_sigil ......... The sigil of a subroutine call: '&' |
762
|
|
|
|
|
|
|
parenthesis_open ........ Open parenthesis: '(' |
763
|
|
|
|
|
|
|
parenthesis_close ....... Closed parenthesis: ')' |
764
|
|
|
|
|
|
|
right_bracket_open ...... Open right bracket: '[' |
765
|
|
|
|
|
|
|
right_bracket_close ..... Closed right bracket: ']' |
766
|
|
|
|
|
|
|
curly_bracket_open ...... Open curly bracket: '{' |
767
|
|
|
|
|
|
|
curly_bracket_close ..... Closed curly bracket: '}' |
768
|
|
|
|
|
|
|
substitution ............ Regex substitution: s/.../.../ |
769
|
|
|
|
|
|
|
transliteration.......... Transliteration: tr/.../.../ or y/.../.../ |
770
|
|
|
|
|
|
|
match_regex ............. Regex in matching context: m/.../ |
771
|
|
|
|
|
|
|
compiled_regex .......... Quoted compiled regex: qr/.../ |
772
|
|
|
|
|
|
|
q_string ................ Single quoted string: q/.../ |
773
|
|
|
|
|
|
|
qq_string ............... Double quoted string: qq/.../ |
774
|
|
|
|
|
|
|
qw_string ............... List of quoted words: qw/.../ |
775
|
|
|
|
|
|
|
qx_string ............... System command quoted string: qx/.../ |
776
|
|
|
|
|
|
|
backtick ................ Backtick system command quoted string: `...` |
777
|
|
|
|
|
|
|
single_quoted_string .... Single quoted string, as: '...' |
778
|
|
|
|
|
|
|
double_quoted_string .... Double quoted string, as: "..." |
779
|
|
|
|
|
|
|
bare_word ............... Unquoted string |
780
|
|
|
|
|
|
|
glob_readline ........... or |
781
|
|
|
|
|
|
|
v_string ................ Version string: "vX" or "X.X.X" |
782
|
|
|
|
|
|
|
file_test ............... File test operator (-X), such as: "-d", "-e", etc... |
783
|
|
|
|
|
|
|
data .................... The content of `__DATA__` or `__END__` sections |
784
|
|
|
|
|
|
|
keyword ................. Regular Perl keyword, such as: `if`, `else`, etc... |
785
|
|
|
|
|
|
|
special_keyword ......... Special Perl keyword, such as: `__PACKAGE__`, `__FILE__`, etc... |
786
|
|
|
|
|
|
|
comma ................... Comma: ',' |
787
|
|
|
|
|
|
|
fat_comma ............... Fat comma: '=>' |
788
|
|
|
|
|
|
|
operator ................ Primitive operator, such as: '+', '||', etc... |
789
|
|
|
|
|
|
|
assignment_operator ..... '=' or any assignment operator: '+=', '||=', etc... |
790
|
|
|
|
|
|
|
dereference_operator .... Arrow dereference operator: '->' |
791
|
|
|
|
|
|
|
hex_number .............. Hexadecimal literal number: 0x... |
792
|
|
|
|
|
|
|
binary_number ........... Binary literal number: 0b... |
793
|
|
|
|
|
|
|
number .................. Decimal literal number, such as 42, 3.1e4, etc... |
794
|
|
|
|
|
|
|
special_fh .............. Special file-handle name, such as 'STDIN', 'STDOUT', etc... |
795
|
|
|
|
|
|
|
unknown_char ............ Unknown or unexpected character |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
=head1 EXAMPLE |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
For this code: |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
my $num = 42; |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
it generates the following tokens: |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
# TOKEN POS |
806
|
|
|
|
|
|
|
( keyword => ( 0, 2) ) |
807
|
|
|
|
|
|
|
( horizontal_space => ( 2, 3) ) |
808
|
|
|
|
|
|
|
( scalar_sigil => ( 3, 4) ) |
809
|
|
|
|
|
|
|
( var_name => ( 4, 7) ) |
810
|
|
|
|
|
|
|
( horizontal_space => ( 7, 8) ) |
811
|
|
|
|
|
|
|
( assignment_operator => ( 8, 9) ) |
812
|
|
|
|
|
|
|
( horizontal_space => ( 9, 10) ) |
813
|
|
|
|
|
|
|
( number => (10, 12) ) |
814
|
|
|
|
|
|
|
( semicolon => (12, 13) ) |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
=head1 REPOSITORY |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
L |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=head1 AUTHOR |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Daniel "Trizen" Șuteu, Etrizen@protonmail.comE |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
Copyright (C) 2013-2017 |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
829
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.22.0 or, |
830
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=cut |