line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
##################################################################### |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# The Perl::Tidy::Tokenizer package is essentially a filter which |
4
|
|
|
|
|
|
|
# reads lines of perl source code from a source object and provides |
5
|
|
|
|
|
|
|
# corresponding tokenized lines through its get_line() method. Lines |
6
|
|
|
|
|
|
|
# flow from the source_object to the caller like this: |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# source_object --> LineBuffer_object --> Tokenizer --> calling routine |
9
|
|
|
|
|
|
|
# get_line() get_line() get_line() line_of_tokens |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# The source object can be any object with a get_line() method which |
12
|
|
|
|
|
|
|
# supplies one line (a character string) perl call. |
13
|
|
|
|
|
|
|
# The LineBuffer object is created by the Tokenizer. |
14
|
|
|
|
|
|
|
# The Tokenizer returns a reference to a data structure 'line_of_tokens' |
15
|
|
|
|
|
|
|
# containing one tokenized line for each call to its get_line() method. |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
# WARNING: This is not a real class. Only one tokenizer my be used. |
18
|
|
|
|
|
|
|
# |
19
|
|
|
|
|
|
|
######################################################################## |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
package Perl::Tidy::Tokenizer; |
22
|
38
|
|
|
38
|
|
289
|
use strict; |
|
38
|
|
|
|
|
108
|
|
|
38
|
|
|
|
|
1231
|
|
23
|
38
|
|
|
38
|
|
204
|
use warnings; |
|
38
|
|
|
|
|
90
|
|
|
38
|
|
|
|
|
1379
|
|
24
|
38
|
|
|
38
|
|
238
|
use English qw( -no_match_vars ); |
|
38
|
|
|
|
|
119
|
|
|
38
|
|
|
|
|
234
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our $VERSION = '20230701'; |
27
|
|
|
|
|
|
|
|
28
|
38
|
|
|
38
|
|
30891
|
use Perl::Tidy::LineBuffer; |
|
38
|
|
|
|
|
124
|
|
|
38
|
|
|
|
|
1176
|
|
29
|
38
|
|
|
38
|
|
295
|
use Carp; |
|
38
|
|
|
|
|
112
|
|
|
38
|
|
|
|
|
2242
|
|
30
|
|
|
|
|
|
|
|
31
|
38
|
|
|
38
|
|
257
|
use constant DEVEL_MODE => 0; |
|
38
|
|
|
|
|
114
|
|
|
38
|
|
|
|
|
2142
|
|
32
|
38
|
|
|
38
|
|
254
|
use constant EMPTY_STRING => q{}; |
|
38
|
|
|
|
|
104
|
|
|
38
|
|
|
|
|
1796
|
|
33
|
38
|
|
|
38
|
|
325
|
use constant SPACE => q{ }; |
|
38
|
|
|
|
|
120
|
|
|
38
|
|
|
|
|
2281
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# Decimal values of some ascii characters for quick checks |
36
|
38
|
|
|
38
|
|
284
|
use constant ORD_TAB => 9; |
|
38
|
|
|
|
|
84
|
|
|
38
|
|
|
|
|
2092
|
|
37
|
38
|
|
|
38
|
|
270
|
use constant ORD_SPACE => 32; |
|
38
|
|
|
|
|
97
|
|
|
38
|
|
|
|
|
1957
|
|
38
|
38
|
|
|
38
|
|
268
|
use constant ORD_PRINTABLE_MIN => 33; |
|
38
|
|
|
|
|
103
|
|
|
38
|
|
|
|
|
2012
|
|
39
|
38
|
|
|
38
|
|
291
|
use constant ORD_PRINTABLE_MAX => 126; |
|
38
|
|
|
|
|
98
|
|
|
38
|
|
|
|
|
7241
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# GLOBAL VARIABLES which change during tokenization: |
42
|
|
|
|
|
|
|
# These could also be stored in $self but it is more convenient and |
43
|
|
|
|
|
|
|
# efficient to make them global lexical variables. |
44
|
|
|
|
|
|
|
# INITIALIZER: sub prepare_for_a_new_file |
45
|
|
|
|
|
|
|
my ( |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$brace_depth, |
48
|
|
|
|
|
|
|
$context, |
49
|
|
|
|
|
|
|
$current_package, |
50
|
|
|
|
|
|
|
$last_nonblank_block_type, |
51
|
|
|
|
|
|
|
$last_nonblank_token, |
52
|
|
|
|
|
|
|
$last_nonblank_type, |
53
|
|
|
|
|
|
|
$next_sequence_number, |
54
|
|
|
|
|
|
|
$paren_depth, |
55
|
|
|
|
|
|
|
$rbrace_context, |
56
|
|
|
|
|
|
|
$rbrace_package, |
57
|
|
|
|
|
|
|
$rbrace_structural_type, |
58
|
|
|
|
|
|
|
$rbrace_type, |
59
|
|
|
|
|
|
|
$rcurrent_depth, |
60
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
61
|
|
|
|
|
|
|
$rdepth_array, |
62
|
|
|
|
|
|
|
$ris_block_function, |
63
|
|
|
|
|
|
|
$ris_block_list_function, |
64
|
|
|
|
|
|
|
$ris_constant, |
65
|
|
|
|
|
|
|
$ris_user_function, |
66
|
|
|
|
|
|
|
$rnested_statement_type, |
67
|
|
|
|
|
|
|
$rnested_ternary_flag, |
68
|
|
|
|
|
|
|
$rparen_semicolon_count, |
69
|
|
|
|
|
|
|
$rparen_structural_type, |
70
|
|
|
|
|
|
|
$rparen_type, |
71
|
|
|
|
|
|
|
$rsaw_function_definition, |
72
|
|
|
|
|
|
|
$rsaw_use_module, |
73
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
74
|
|
|
|
|
|
|
$rsquare_bracket_type, |
75
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
76
|
|
|
|
|
|
|
$rtotal_depth, |
77
|
|
|
|
|
|
|
$ruser_function_prototype, |
78
|
|
|
|
|
|
|
$square_bracket_depth, |
79
|
|
|
|
|
|
|
$statement_type, |
80
|
|
|
|
|
|
|
$total_depth, |
81
|
|
|
|
|
|
|
); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
my ( |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# GLOBAL CONSTANTS for routines in this package, |
86
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block. |
87
|
|
|
|
|
|
|
%can_start_digraph, |
88
|
|
|
|
|
|
|
%expecting_operator_token, |
89
|
|
|
|
|
|
|
%expecting_operator_types, |
90
|
|
|
|
|
|
|
%expecting_term_token, |
91
|
|
|
|
|
|
|
%expecting_term_types, |
92
|
|
|
|
|
|
|
%is_block_operator, |
93
|
|
|
|
|
|
|
%is_comma_question_colon, |
94
|
|
|
|
|
|
|
%is_digraph, |
95
|
|
|
|
|
|
|
%is_file_test_operator, |
96
|
|
|
|
|
|
|
%is_if_elsif_unless, |
97
|
|
|
|
|
|
|
%is_if_elsif_unless_case_when, |
98
|
|
|
|
|
|
|
%is_indirect_object_taker, |
99
|
|
|
|
|
|
|
%is_keyword_rejecting_question_as_pattern_delimiter, |
100
|
|
|
|
|
|
|
%is_keyword_rejecting_slash_as_pattern_delimiter, |
101
|
|
|
|
|
|
|
%is_keyword_taking_list, |
102
|
|
|
|
|
|
|
%is_keyword_taking_optional_arg, |
103
|
|
|
|
|
|
|
%is_q_qq_qw_qx_qr_s_y_tr_m, |
104
|
|
|
|
|
|
|
%is_q_qq_qx_qr_s_y_tr_m, |
105
|
|
|
|
|
|
|
%is_semicolon_or_t, |
106
|
|
|
|
|
|
|
%is_sort_map_grep, |
107
|
|
|
|
|
|
|
%is_sort_map_grep_eval_do, |
108
|
|
|
|
|
|
|
%is_tetragraph, |
109
|
|
|
|
|
|
|
%is_trigraph, |
110
|
|
|
|
|
|
|
%is_valid_token_type, |
111
|
|
|
|
|
|
|
%other_line_endings, |
112
|
|
|
|
|
|
|
%really_want_term, |
113
|
|
|
|
|
|
|
@closing_brace_names, |
114
|
|
|
|
|
|
|
@opening_brace_names, |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# GLOBAL VARIABLES which are constant after being configured. |
117
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block and modified by sub check_options |
118
|
|
|
|
|
|
|
%is_code_block_token, |
119
|
|
|
|
|
|
|
%is_keyword, |
120
|
|
|
|
|
|
|
%is_my_our_state, |
121
|
|
|
|
|
|
|
%is_package, |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# INITIALIZER: sub check_options |
124
|
|
|
|
|
|
|
$code_skipping_pattern_begin, |
125
|
|
|
|
|
|
|
$code_skipping_pattern_end, |
126
|
|
|
|
|
|
|
$rOpts_code_skipping, |
127
|
|
|
|
|
|
|
%is_END_DATA_format_sub, |
128
|
|
|
|
|
|
|
%is_grep_alias, |
129
|
|
|
|
|
|
|
%is_sub, |
130
|
|
|
|
|
|
|
); |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# possible values of operator_expected() |
133
|
38
|
|
|
38
|
|
321
|
use constant TERM => -1; |
|
38
|
|
|
|
|
145
|
|
|
38
|
|
|
|
|
2163
|
|
134
|
38
|
|
|
38
|
|
274
|
use constant UNKNOWN => 0; |
|
38
|
|
|
|
|
104
|
|
|
38
|
|
|
|
|
1986
|
|
135
|
38
|
|
|
38
|
|
316
|
use constant OPERATOR => 1; |
|
38
|
|
|
|
|
84
|
|
|
38
|
|
|
|
|
2046
|
|
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# possible values of context |
138
|
38
|
|
|
38
|
|
246
|
use constant SCALAR_CONTEXT => -1; |
|
38
|
|
|
|
|
127
|
|
|
38
|
|
|
|
|
5617
|
|
139
|
38
|
|
|
38
|
|
274
|
use constant UNKNOWN_CONTEXT => 0; |
|
38
|
|
|
|
|
82
|
|
|
38
|
|
|
|
|
2882
|
|
140
|
38
|
|
|
38
|
|
278
|
use constant LIST_CONTEXT => 1; |
|
38
|
|
|
|
|
112
|
|
|
38
|
|
|
|
|
1896
|
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# Maximum number of little messages; probably need not be changed. |
143
|
38
|
|
|
38
|
|
266
|
use constant MAX_NAG_MESSAGES => 6; |
|
38
|
|
|
|
|
115
|
|
|
38
|
|
|
|
|
8286
|
|
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
0
|
BEGIN { |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Array index names for $self. |
148
|
|
|
|
|
|
|
# Do not combine with other BEGIN blocks (c101). |
149
|
38
|
|
|
38
|
|
249989
|
my $i = 0; |
150
|
|
|
|
|
|
|
use constant { |
151
|
38
|
|
|
|
|
23422
|
_rhere_target_list_ => $i++, |
152
|
|
|
|
|
|
|
_in_here_doc_ => $i++, |
153
|
|
|
|
|
|
|
_here_doc_target_ => $i++, |
154
|
|
|
|
|
|
|
_here_quote_character_ => $i++, |
155
|
|
|
|
|
|
|
_in_data_ => $i++, |
156
|
|
|
|
|
|
|
_in_end_ => $i++, |
157
|
|
|
|
|
|
|
_in_format_ => $i++, |
158
|
|
|
|
|
|
|
_in_error_ => $i++, |
159
|
|
|
|
|
|
|
_in_trouble_ => $i++, |
160
|
|
|
|
|
|
|
_warning_count_ => $i++, |
161
|
|
|
|
|
|
|
_html_tag_count_ => $i++, |
162
|
|
|
|
|
|
|
_in_pod_ => $i++, |
163
|
|
|
|
|
|
|
_in_skipped_ => $i++, |
164
|
|
|
|
|
|
|
_in_attribute_list_ => $i++, |
165
|
|
|
|
|
|
|
_in_quote_ => $i++, |
166
|
|
|
|
|
|
|
_quote_target_ => $i++, |
167
|
|
|
|
|
|
|
_line_start_quote_ => $i++, |
168
|
|
|
|
|
|
|
_starting_level_ => $i++, |
169
|
|
|
|
|
|
|
_know_starting_level_ => $i++, |
170
|
|
|
|
|
|
|
_tabsize_ => $i++, |
171
|
|
|
|
|
|
|
_indent_columns_ => $i++, |
172
|
|
|
|
|
|
|
_look_for_hash_bang_ => $i++, |
173
|
|
|
|
|
|
|
_trim_qw_ => $i++, |
174
|
|
|
|
|
|
|
_continuation_indentation_ => $i++, |
175
|
|
|
|
|
|
|
_outdent_labels_ => $i++, |
176
|
|
|
|
|
|
|
_last_line_number_ => $i++, |
177
|
|
|
|
|
|
|
_saw_perl_dash_P_ => $i++, |
178
|
|
|
|
|
|
|
_saw_perl_dash_w_ => $i++, |
179
|
|
|
|
|
|
|
_saw_use_strict_ => $i++, |
180
|
|
|
|
|
|
|
_saw_v_string_ => $i++, |
181
|
|
|
|
|
|
|
_saw_brace_error_ => $i++, |
182
|
|
|
|
|
|
|
_hit_bug_ => $i++, |
183
|
|
|
|
|
|
|
_look_for_autoloader_ => $i++, |
184
|
|
|
|
|
|
|
_look_for_selfloader_ => $i++, |
185
|
|
|
|
|
|
|
_saw_autoloader_ => $i++, |
186
|
|
|
|
|
|
|
_saw_selfloader_ => $i++, |
187
|
|
|
|
|
|
|
_saw_hash_bang_ => $i++, |
188
|
|
|
|
|
|
|
_saw_end_ => $i++, |
189
|
|
|
|
|
|
|
_saw_data_ => $i++, |
190
|
|
|
|
|
|
|
_saw_negative_indentation_ => $i++, |
191
|
|
|
|
|
|
|
_started_tokenizing_ => $i++, |
192
|
|
|
|
|
|
|
_line_buffer_object_ => $i++, |
193
|
|
|
|
|
|
|
_debugger_object_ => $i++, |
194
|
|
|
|
|
|
|
_diagnostics_object_ => $i++, |
195
|
|
|
|
|
|
|
_logger_object_ => $i++, |
196
|
|
|
|
|
|
|
_unexpected_error_count_ => $i++, |
197
|
|
|
|
|
|
|
_started_looking_for_here_target_at_ => $i++, |
198
|
|
|
|
|
|
|
_nearly_matched_here_target_at_ => $i++, |
199
|
|
|
|
|
|
|
_line_of_text_ => $i++, |
200
|
|
|
|
|
|
|
_rlower_case_labels_at_ => $i++, |
201
|
|
|
|
|
|
|
_extended_syntax_ => $i++, |
202
|
|
|
|
|
|
|
_maximum_level_ => $i++, |
203
|
|
|
|
|
|
|
_true_brace_error_count_ => $i++, |
204
|
|
|
|
|
|
|
_rOpts_maximum_level_errors_ => $i++, |
205
|
|
|
|
|
|
|
_rOpts_maximum_unexpected_errors_ => $i++, |
206
|
|
|
|
|
|
|
_rOpts_logfile_ => $i++, |
207
|
|
|
|
|
|
|
_rOpts_ => $i++, |
208
|
|
|
|
|
|
|
_calculate_ci_ => $i++, |
209
|
38
|
|
|
38
|
|
335
|
}; |
|
38
|
|
|
|
|
113
|
|
210
|
|
|
|
|
|
|
} ## end BEGIN |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
{ ## closure for subs to count instances |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# methods to count instances |
215
|
|
|
|
|
|
|
my $_count = 0; |
216
|
0
|
|
|
0
|
0
|
0
|
sub get_count { return $_count; } |
217
|
556
|
|
|
556
|
|
2540
|
sub _increment_count { return ++$_count } |
218
|
556
|
|
|
556
|
|
1311
|
sub _decrement_count { return --$_count } |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub DESTROY { |
222
|
556
|
|
|
556
|
|
1412
|
my $self = shift; |
223
|
556
|
|
|
|
|
2961
|
$self->_decrement_count(); |
224
|
556
|
|
|
|
|
7795
|
return; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub AUTOLOAD { |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# Catch any undefined sub calls so that we are sure to get |
230
|
|
|
|
|
|
|
# some diagnostic information. This sub should never be called |
231
|
|
|
|
|
|
|
# except for a programming error. |
232
|
0
|
|
|
0
|
|
0
|
our $AUTOLOAD; |
233
|
0
|
0
|
|
|
|
0
|
return if ( $AUTOLOAD =~ /\bDESTROY$/ ); |
234
|
0
|
|
|
|
|
0
|
my ( $pkg, $fname, $lno ) = caller(); |
235
|
0
|
|
|
|
|
0
|
my $my_package = __PACKAGE__; |
236
|
0
|
|
|
|
|
0
|
print STDERR <<EOM; |
237
|
|
|
|
|
|
|
====================================================================== |
238
|
|
|
|
|
|
|
Error detected in package '$my_package', version $VERSION |
239
|
|
|
|
|
|
|
Received unexpected AUTOLOAD call for sub '$AUTOLOAD' |
240
|
|
|
|
|
|
|
Called from package: '$pkg' |
241
|
|
|
|
|
|
|
Called from File '$fname' at line '$lno' |
242
|
|
|
|
|
|
|
This error is probably due to a recent programming change |
243
|
|
|
|
|
|
|
====================================================================== |
244
|
|
|
|
|
|
|
EOM |
245
|
0
|
|
|
|
|
0
|
exit 1; |
246
|
|
|
|
|
|
|
} ## end sub AUTOLOAD |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub Die { |
249
|
0
|
|
|
0
|
0
|
0
|
my ($msg) = @_; |
250
|
0
|
|
|
|
|
0
|
Perl::Tidy::Die($msg); |
251
|
0
|
|
|
|
|
0
|
croak "unexpected return from Perl::Tidy::Die"; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub Fault { |
255
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# This routine is called for errors that really should not occur |
258
|
|
|
|
|
|
|
# except if there has been a bug introduced by a recent program change. |
259
|
|
|
|
|
|
|
# Please add comments at calls to Fault to explain why the call |
260
|
|
|
|
|
|
|
# should not occur, and where to look to fix it. |
261
|
0
|
|
|
|
|
0
|
my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); |
262
|
0
|
|
|
|
|
0
|
my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); |
263
|
0
|
|
|
|
|
0
|
my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); |
264
|
0
|
|
|
|
|
0
|
my $pkg = __PACKAGE__; |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# Catch potential error of Fault not called as a method |
267
|
0
|
|
|
|
|
0
|
my $input_stream_name; |
268
|
0
|
0
|
|
|
|
0
|
if ( !ref($self) ) { |
269
|
0
|
|
|
|
|
0
|
$msg = "Fault not called as a method - please fix\n"; |
270
|
0
|
0
|
0
|
|
|
0
|
if ( $self && length($self) < 200 ) { $msg .= $self } |
|
0
|
|
|
|
|
0
|
|
271
|
0
|
|
|
|
|
0
|
$self = undef; |
272
|
0
|
|
|
|
|
0
|
$input_stream_name = "(UNKNOWN)"; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
else { |
275
|
0
|
|
|
|
|
0
|
$input_stream_name = $self->get_input_stream_name(); |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
0
|
|
|
|
|
0
|
Die(<<EOM); |
279
|
|
|
|
|
|
|
============================================================================== |
280
|
|
|
|
|
|
|
While operating on input stream with name: '$input_stream_name' |
281
|
|
|
|
|
|
|
A fault was detected at line $line0 of sub '$subroutine1' |
282
|
|
|
|
|
|
|
in file '$filename1' |
283
|
|
|
|
|
|
|
which was called from line $line1 of sub '$subroutine2' |
284
|
|
|
|
|
|
|
Message: '$msg' |
285
|
|
|
|
|
|
|
This is probably an error introduced by a recent programming change. |
286
|
|
|
|
|
|
|
$pkg reports VERSION='$VERSION'. |
287
|
|
|
|
|
|
|
============================================================================== |
288
|
|
|
|
|
|
|
EOM |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# We shouldn't get here, but this return is to keep Perl-Critic from |
291
|
|
|
|
|
|
|
# complaining. |
292
|
0
|
|
|
|
|
0
|
return; |
293
|
|
|
|
|
|
|
} ## end sub Fault |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
sub bad_pattern { |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# See if a pattern will compile. We have to use a string eval here, |
298
|
|
|
|
|
|
|
# but it should be safe because the pattern has been constructed |
299
|
|
|
|
|
|
|
# by this program. |
300
|
1108
|
|
|
1108
|
0
|
2598
|
my ($pattern) = @_; |
301
|
1108
|
|
|
|
|
82436
|
my $ok = eval "'##'=~/$pattern/"; |
302
|
1108
|
|
33
|
|
|
10116
|
return !defined($ok) || $EVAL_ERROR; |
303
|
|
|
|
|
|
|
} ## end sub bad_pattern |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub make_code_skipping_pattern { |
306
|
1108
|
|
|
1108
|
0
|
3389
|
my ( $rOpts, $opt_name, $default ) = @_; |
307
|
1108
|
|
|
|
|
2756
|
my $param = $rOpts->{$opt_name}; |
308
|
1108
|
100
|
|
|
|
2973
|
unless ($param) { $param = $default } |
|
1106
|
|
|
|
|
2203
|
|
309
|
1108
|
|
|
|
|
4601
|
$param =~ s/^\s*//; # allow leading spaces to be like format-skipping |
310
|
1108
|
50
|
|
|
|
4777
|
if ( $param !~ /^#/ ) { |
311
|
0
|
|
|
|
|
0
|
Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n"); |
312
|
|
|
|
|
|
|
} |
313
|
1108
|
|
|
|
|
3863
|
my $pattern = '^\s*' . $param . '\b'; |
314
|
1108
|
50
|
|
|
|
3357
|
if ( bad_pattern($pattern) ) { |
315
|
0
|
|
|
|
|
0
|
Die( |
316
|
|
|
|
|
|
|
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n" |
317
|
|
|
|
|
|
|
); |
318
|
|
|
|
|
|
|
} |
319
|
1108
|
|
|
|
|
3945
|
return $pattern; |
320
|
|
|
|
|
|
|
} ## end sub make_code_skipping_pattern |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub check_options { |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# Check Tokenizer parameters |
325
|
554
|
|
|
554
|
0
|
1543
|
my $rOpts = shift; |
326
|
|
|
|
|
|
|
|
327
|
554
|
|
|
|
|
2188
|
%is_sub = (); |
328
|
554
|
|
|
|
|
1834
|
$is_sub{'sub'} = 1; |
329
|
|
|
|
|
|
|
|
330
|
554
|
|
|
|
|
4129
|
%is_END_DATA_format_sub = ( |
331
|
|
|
|
|
|
|
'__END__' => 1, |
332
|
|
|
|
|
|
|
'__DATA__' => 1, |
333
|
|
|
|
|
|
|
'format' => 1, |
334
|
|
|
|
|
|
|
'sub' => 1, |
335
|
|
|
|
|
|
|
); |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# Install any aliases to 'sub' |
338
|
554
|
50
|
|
|
|
2205
|
if ( $rOpts->{'sub-alias-list'} ) { |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# Note that any 'sub-alias-list' has been preprocessed to |
341
|
|
|
|
|
|
|
# be a trimmed, space-separated list which includes 'sub' |
342
|
|
|
|
|
|
|
# for example, it might be 'sub method fun' |
343
|
554
|
|
|
|
|
4373
|
my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'}; |
344
|
554
|
|
|
|
|
2355
|
foreach my $word (@sub_alias_list) { |
345
|
1114
|
|
|
|
|
2421
|
$is_sub{$word} = 1; |
346
|
1114
|
|
|
|
|
2684
|
$is_END_DATA_format_sub{$word} = 1; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
#------------------------------------------------ |
351
|
|
|
|
|
|
|
# Update hash values for any -use-feature options |
352
|
|
|
|
|
|
|
#------------------------------------------------ |
353
|
554
|
|
|
|
|
4419
|
my $use_feature_class = $rOpts->{'use-feature'} =~ /\bclass\b/; |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
# These are the main updates for this option. There are additional |
356
|
|
|
|
|
|
|
# changes elsewhere, usually indicated with a comment 'rt145706' |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# Update hash values for use_feature=class, added for rt145706 |
359
|
|
|
|
|
|
|
# see 'perlclass.pod' |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# IMPORTANT: We are changing global hash values initially set in a BEGIN |
362
|
|
|
|
|
|
|
# block. Values must be defined (true or false) for each of these new |
363
|
|
|
|
|
|
|
# words whether true or false. Otherwise, programs using the module which |
364
|
|
|
|
|
|
|
# change options between runs (such as test code) will have |
365
|
|
|
|
|
|
|
# incorrect settings and fail. |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# There are 4 new keywords: |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# 'class' - treated specially as generalization of 'package' |
370
|
|
|
|
|
|
|
# Note: we must not set 'class' to be a keyword to avoid problems |
371
|
|
|
|
|
|
|
# with older uses. |
372
|
554
|
|
|
|
|
2475
|
$is_package{'class'} = $use_feature_class; |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
# 'method' - treated like sub using the sub-alias-list option |
375
|
|
|
|
|
|
|
# Note: we must not set 'method' to be a keyword to avoid problems |
376
|
|
|
|
|
|
|
# with older uses. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# 'field' - added as a keyword, and works like 'my' |
379
|
554
|
|
|
|
|
1764
|
$is_keyword{'field'} = $use_feature_class; |
380
|
554
|
|
|
|
|
1511
|
$is_my_our_state{'field'} = $use_feature_class; |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# 'ADJUST' - added as a keyword and works like 'BEGIN' |
383
|
|
|
|
|
|
|
# TODO: if ADJUST gets a paren list, this will need to be updated |
384
|
554
|
|
|
|
|
1587
|
$is_keyword{'ADJUST'} = $use_feature_class; |
385
|
554
|
|
|
|
|
1587
|
$is_code_block_token{'ADJUST'} = $use_feature_class; |
386
|
|
|
|
|
|
|
|
387
|
554
|
|
|
|
|
2142
|
%is_grep_alias = (); |
388
|
554
|
50
|
|
|
|
1964
|
if ( $rOpts->{'grep-alias-list'} ) { |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
# Note that 'grep-alias-list' has been preprocessed to be a trimmed, |
391
|
|
|
|
|
|
|
# space-separated list |
392
|
554
|
|
|
|
|
3797
|
my @q = split /\s+/, $rOpts->{'grep-alias-list'}; |
393
|
554
|
|
|
|
|
4306
|
@{is_grep_alias}{@q} = (1) x scalar(@q); |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
|
396
|
554
|
|
|
|
|
1678
|
$rOpts_code_skipping = $rOpts->{'code-skipping'}; |
397
|
554
|
|
|
|
|
2823
|
$code_skipping_pattern_begin = |
398
|
|
|
|
|
|
|
make_code_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' ); |
399
|
554
|
|
|
|
|
2105
|
$code_skipping_pattern_end = |
400
|
|
|
|
|
|
|
make_code_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' ); |
401
|
|
|
|
|
|
|
|
402
|
554
|
|
|
|
|
2752
|
return; |
403
|
|
|
|
|
|
|
} ## end sub check_options |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
sub new { |
406
|
|
|
|
|
|
|
|
407
|
556
|
|
|
556
|
0
|
5789
|
my ( $class, @args ) = @_; |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
# Note: 'tabs' and 'indent_columns' are temporary and should be |
410
|
|
|
|
|
|
|
# removed asap |
411
|
556
|
|
|
|
|
8851
|
my %defaults = ( |
412
|
|
|
|
|
|
|
source_object => undef, |
413
|
|
|
|
|
|
|
debugger_object => undef, |
414
|
|
|
|
|
|
|
diagnostics_object => undef, |
415
|
|
|
|
|
|
|
logger_object => undef, |
416
|
|
|
|
|
|
|
starting_level => undef, |
417
|
|
|
|
|
|
|
indent_columns => 4, |
418
|
|
|
|
|
|
|
tabsize => 8, |
419
|
|
|
|
|
|
|
look_for_hash_bang => 0, |
420
|
|
|
|
|
|
|
trim_qw => 1, |
421
|
|
|
|
|
|
|
look_for_autoloader => 1, |
422
|
|
|
|
|
|
|
look_for_selfloader => 1, |
423
|
|
|
|
|
|
|
starting_line_number => 1, |
424
|
|
|
|
|
|
|
extended_syntax => 0, |
425
|
|
|
|
|
|
|
rOpts => {}, |
426
|
|
|
|
|
|
|
); |
427
|
556
|
|
|
|
|
7890
|
my %args = ( %defaults, @args ); |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
# we are given an object with a get_line() method to supply source lines |
430
|
556
|
|
|
|
|
2259
|
my $source_object = $args{source_object}; |
431
|
556
|
|
|
|
|
1627
|
my $rOpts = $args{rOpts}; |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
# we create another object with a get_line() and peek_ahead() method |
434
|
556
|
|
|
|
|
4097
|
my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# Tokenizer state data is as follows: |
437
|
|
|
|
|
|
|
# _rhere_target_list_ reference to list of here-doc targets |
438
|
|
|
|
|
|
|
# _here_doc_target_ the target string for a here document |
439
|
|
|
|
|
|
|
# _here_quote_character_ the type of here-doc quoting (" ' ` or none) |
440
|
|
|
|
|
|
|
# to determine if interpolation is done |
441
|
|
|
|
|
|
|
# _quote_target_ character we seek if chasing a quote |
442
|
|
|
|
|
|
|
# _line_start_quote_ line where we started looking for a long quote |
443
|
|
|
|
|
|
|
# _in_here_doc_ flag indicating if we are in a here-doc |
444
|
|
|
|
|
|
|
# _in_pod_ flag set if we are in pod documentation |
445
|
|
|
|
|
|
|
# _in_skipped_ flag set if we are in a skipped section |
446
|
|
|
|
|
|
|
# _in_error_ flag set if we saw severe error (binary in script) |
447
|
|
|
|
|
|
|
# _in_trouble_ set if we saw a troublesome lexical like 'my sub s' |
448
|
|
|
|
|
|
|
# _warning_count_ number of calls to logger sub warning |
449
|
|
|
|
|
|
|
# _html_tag_count_ number of apparent html tags seen (indicates html) |
450
|
|
|
|
|
|
|
# _in_data_ flag set if we are in __DATA__ section |
451
|
|
|
|
|
|
|
# _in_end_ flag set if we are in __END__ section |
452
|
|
|
|
|
|
|
# _in_format_ flag set if we are in a format description |
453
|
|
|
|
|
|
|
# _in_attribute_list_ flag telling if we are looking for attributes |
454
|
|
|
|
|
|
|
# _in_quote_ flag telling if we are chasing a quote |
455
|
|
|
|
|
|
|
# _starting_level_ indentation level of first line |
456
|
|
|
|
|
|
|
# _line_buffer_object_ object with get_line() method to supply source code |
457
|
|
|
|
|
|
|
# _diagnostics_object_ place to write debugging information |
458
|
|
|
|
|
|
|
# _unexpected_error_count_ error count used to limit output |
459
|
|
|
|
|
|
|
# _lower_case_labels_at_ line numbers where lower case labels seen |
460
|
|
|
|
|
|
|
# _hit_bug_ program bug detected |
461
|
|
|
|
|
|
|
|
462
|
556
|
|
|
|
|
1621
|
my $self = []; |
463
|
556
|
|
|
|
|
1726
|
$self->[_rhere_target_list_] = []; |
464
|
556
|
|
|
|
|
1298
|
$self->[_in_here_doc_] = 0; |
465
|
556
|
|
|
|
|
1545
|
$self->[_here_doc_target_] = EMPTY_STRING; |
466
|
556
|
|
|
|
|
1449
|
$self->[_here_quote_character_] = EMPTY_STRING; |
467
|
556
|
|
|
|
|
1718
|
$self->[_in_data_] = 0; |
468
|
556
|
|
|
|
|
1645
|
$self->[_in_end_] = 0; |
469
|
556
|
|
|
|
|
1413
|
$self->[_in_format_] = 0; |
470
|
556
|
|
|
|
|
1334
|
$self->[_in_error_] = 0; |
471
|
556
|
|
|
|
|
1334
|
$self->[_in_trouble_] = 0; |
472
|
556
|
|
|
|
|
1345
|
$self->[_warning_count_] = 0; |
473
|
556
|
|
|
|
|
1363
|
$self->[_html_tag_count_] = 0; |
474
|
556
|
|
|
|
|
1346
|
$self->[_in_pod_] = 0; |
475
|
556
|
|
|
|
|
1336
|
$self->[_in_skipped_] = 0; |
476
|
556
|
|
|
|
|
1215
|
$self->[_in_attribute_list_] = 0; |
477
|
556
|
|
|
|
|
1313
|
$self->[_in_quote_] = 0; |
478
|
556
|
|
|
|
|
1446
|
$self->[_quote_target_] = EMPTY_STRING; |
479
|
556
|
|
|
|
|
1249
|
$self->[_line_start_quote_] = -1; |
480
|
556
|
|
|
|
|
1302
|
$self->[_starting_level_] = $args{starting_level}; |
481
|
556
|
|
|
|
|
1658
|
$self->[_know_starting_level_] = defined( $args{starting_level} ); |
482
|
556
|
|
|
|
|
1484
|
$self->[_tabsize_] = $args{tabsize}; |
483
|
556
|
|
|
|
|
1547
|
$self->[_indent_columns_] = $args{indent_columns}; |
484
|
556
|
|
|
|
|
1506
|
$self->[_look_for_hash_bang_] = $args{look_for_hash_bang}; |
485
|
556
|
|
|
|
|
1387
|
$self->[_trim_qw_] = $args{trim_qw}; |
486
|
556
|
|
|
|
|
1423
|
$self->[_continuation_indentation_] = $args{continuation_indentation}; |
487
|
556
|
|
|
|
|
1385
|
$self->[_outdent_labels_] = $args{outdent_labels}; |
488
|
556
|
|
|
|
|
1622
|
$self->[_last_line_number_] = $args{starting_line_number} - 1; |
489
|
556
|
|
|
|
|
1163
|
$self->[_saw_perl_dash_P_] = 0; |
490
|
556
|
|
|
|
|
1372
|
$self->[_saw_perl_dash_w_] = 0; |
491
|
556
|
|
|
|
|
1205
|
$self->[_saw_use_strict_] = 0; |
492
|
556
|
|
|
|
|
1280
|
$self->[_saw_v_string_] = 0; |
493
|
556
|
|
|
|
|
1311
|
$self->[_saw_brace_error_] = 0; |
494
|
556
|
|
|
|
|
1179
|
$self->[_hit_bug_] = 0; |
495
|
556
|
|
|
|
|
1254
|
$self->[_look_for_autoloader_] = $args{look_for_autoloader}; |
496
|
556
|
|
|
|
|
1559
|
$self->[_look_for_selfloader_] = $args{look_for_selfloader}; |
497
|
556
|
|
|
|
|
1322
|
$self->[_saw_autoloader_] = 0; |
498
|
556
|
|
|
|
|
1196
|
$self->[_saw_selfloader_] = 0; |
499
|
556
|
|
|
|
|
1163
|
$self->[_saw_hash_bang_] = 0; |
500
|
556
|
|
|
|
|
1200
|
$self->[_saw_end_] = 0; |
501
|
556
|
|
|
|
|
1238
|
$self->[_saw_data_] = 0; |
502
|
556
|
|
|
|
|
1250
|
$self->[_saw_negative_indentation_] = 0; |
503
|
556
|
|
|
|
|
1408
|
$self->[_started_tokenizing_] = 0; |
504
|
556
|
|
|
|
|
1270
|
$self->[_line_buffer_object_] = $line_buffer_object; |
505
|
556
|
|
|
|
|
1283
|
$self->[_debugger_object_] = $args{debugger_object}; |
506
|
556
|
|
|
|
|
1477
|
$self->[_diagnostics_object_] = $args{diagnostics_object}; |
507
|
556
|
|
|
|
|
1297
|
$self->[_logger_object_] = $args{logger_object}; |
508
|
556
|
|
|
|
|
1275
|
$self->[_unexpected_error_count_] = 0; |
509
|
556
|
|
|
|
|
1220
|
$self->[_started_looking_for_here_target_at_] = 0; |
510
|
556
|
|
|
|
|
1286
|
$self->[_nearly_matched_here_target_at_] = undef; |
511
|
556
|
|
|
|
|
1600
|
$self->[_line_of_text_] = EMPTY_STRING; |
512
|
556
|
|
|
|
|
1351
|
$self->[_rlower_case_labels_at_] = undef; |
513
|
556
|
|
|
|
|
1284
|
$self->[_extended_syntax_] = $args{extended_syntax}; |
514
|
556
|
|
|
|
|
1414
|
$self->[_maximum_level_] = 0; |
515
|
556
|
|
|
|
|
1335
|
$self->[_true_brace_error_count_] = 0; |
516
|
556
|
|
|
|
|
1364
|
$self->[_rOpts_maximum_level_errors_] = $rOpts->{'maximum-level-errors'}; |
517
|
|
|
|
|
|
|
$self->[_rOpts_maximum_unexpected_errors_] = |
518
|
556
|
|
|
|
|
1500
|
$rOpts->{'maximum-unexpected-errors'}; |
519
|
556
|
|
|
|
|
1449
|
$self->[_rOpts_logfile_] = $rOpts->{'logfile'}; |
520
|
556
|
|
|
|
|
1302
|
$self->[_rOpts_] = $rOpts; |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
# -exp=ci0 and -exp=ci1 turn on the tokenizer ci calculation for testing. |
523
|
|
|
|
|
|
|
# See comments in sub Perl::Tidy::Formatter::set_ci. |
524
|
556
|
|
|
|
|
1305
|
my $calculate_ci = 0; # current default |
525
|
556
|
50
|
33
|
|
|
2581
|
if ( $rOpts->{'experimental'} && $rOpts->{'experimental'} =~ /\bci(\d+)\b/ ) |
526
|
|
|
|
|
|
|
{ |
527
|
0
|
|
0
|
|
|
0
|
$calculate_ci = ( $1 == 0 || $1 == 1 ); |
528
|
|
|
|
|
|
|
} |
529
|
556
|
|
|
|
|
1320
|
$self->[_calculate_ci_] = $calculate_ci; |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
# These vars are used for guessing indentation and must be positive |
532
|
556
|
50
|
|
|
|
2202
|
$self->[_tabsize_] = 8 if ( !$self->[_tabsize_] ); |
533
|
556
|
100
|
|
|
|
1933
|
$self->[_indent_columns_] = 4 if ( !$self->[_indent_columns_] ); |
534
|
|
|
|
|
|
|
|
535
|
556
|
|
|
|
|
1375
|
bless $self, $class; |
536
|
|
|
|
|
|
|
|
537
|
556
|
|
|
|
|
3387
|
$self->prepare_for_a_new_file(); |
538
|
556
|
|
|
|
|
3241
|
$self->find_starting_indentation_level(); |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
# This is not a full class yet, so die if an attempt is made to |
541
|
|
|
|
|
|
|
# create more than one object. |
542
|
|
|
|
|
|
|
|
543
|
556
|
50
|
|
|
|
2876
|
if ( _increment_count() > 1 ) { |
544
|
0
|
|
|
|
|
0
|
confess |
545
|
|
|
|
|
|
|
"Attempt to create more than 1 object in $class, which is not a true class yet\n"; |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
556
|
|
|
|
|
6064
|
return $self; |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
} ## end sub new |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# Called externally |
553
|
|
|
|
|
|
|
sub get_unexpected_error_count { |
554
|
4
|
|
|
4
|
0
|
16
|
my ($self) = @_; |
555
|
4
|
|
|
|
|
17
|
return $self->[_unexpected_error_count_]; |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
# Called externally |
559
|
|
|
|
|
|
|
sub is_keyword { |
560
|
2766
|
|
|
2766
|
0
|
4959
|
my ($str) = @_; |
561
|
2766
|
|
|
|
|
9828
|
return $is_keyword{$str}; |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
#----------------------------------------- |
565
|
|
|
|
|
|
|
# interface to Perl::Tidy::Logger routines |
566
|
|
|
|
|
|
|
#----------------------------------------- |
567
|
|
|
|
|
|
|
sub warning { |
568
|
|
|
|
|
|
|
|
569
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
570
|
|
|
|
|
|
|
|
571
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
572
|
0
|
|
|
|
|
0
|
$self->[_warning_count_]++; |
573
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
574
|
0
|
|
|
|
|
0
|
my $msg_line_number = $self->[_last_line_number_]; |
575
|
0
|
|
|
|
|
0
|
$logger_object->warning( $msg, $msg_line_number ); |
576
|
|
|
|
|
|
|
} |
577
|
0
|
|
|
|
|
0
|
return; |
578
|
|
|
|
|
|
|
} ## end sub warning |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
sub get_input_stream_name { |
581
|
|
|
|
|
|
|
|
582
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
583
|
|
|
|
|
|
|
|
584
|
0
|
|
|
|
|
0
|
my $input_stream_name = EMPTY_STRING; |
585
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
586
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
587
|
0
|
|
|
|
|
0
|
$input_stream_name = $logger_object->get_input_stream_name(); |
588
|
|
|
|
|
|
|
} |
589
|
0
|
|
|
|
|
0
|
return $input_stream_name; |
590
|
|
|
|
|
|
|
} ## end sub get_input_stream_name |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
sub complain { |
593
|
|
|
|
|
|
|
|
594
|
32
|
|
|
32
|
0
|
99
|
my ( $self, $msg ) = @_; |
595
|
|
|
|
|
|
|
|
596
|
32
|
|
|
|
|
72
|
my $logger_object = $self->[_logger_object_]; |
597
|
32
|
50
|
|
|
|
101
|
if ($logger_object) { |
598
|
32
|
|
|
|
|
85
|
my $input_line_number = $self->[_last_line_number_]; |
599
|
32
|
|
|
|
|
166
|
$logger_object->complain( $msg, $input_line_number ); |
600
|
|
|
|
|
|
|
} |
601
|
32
|
|
|
|
|
81
|
return; |
602
|
|
|
|
|
|
|
} ## end sub complain |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub write_logfile_entry { |
605
|
|
|
|
|
|
|
|
606
|
1840
|
|
|
1840
|
0
|
4254
|
my ( $self, $msg ) = @_; |
607
|
|
|
|
|
|
|
|
608
|
1840
|
|
|
|
|
3502
|
my $logger_object = $self->[_logger_object_]; |
609
|
1840
|
100
|
|
|
|
4597
|
if ($logger_object) { |
610
|
1834
|
|
|
|
|
5296
|
$logger_object->write_logfile_entry($msg); |
611
|
|
|
|
|
|
|
} |
612
|
1840
|
|
|
|
|
5323
|
return; |
613
|
|
|
|
|
|
|
} ## end sub write_logfile_entry |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
sub interrupt_logfile { |
616
|
|
|
|
|
|
|
|
617
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
618
|
|
|
|
|
|
|
|
619
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
620
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
621
|
0
|
|
|
|
|
0
|
$logger_object->interrupt_logfile(); |
622
|
|
|
|
|
|
|
} |
623
|
0
|
|
|
|
|
0
|
return; |
624
|
|
|
|
|
|
|
} ## end sub interrupt_logfile |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
sub resume_logfile { |
627
|
|
|
|
|
|
|
|
628
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
629
|
|
|
|
|
|
|
|
630
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
631
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
632
|
0
|
|
|
|
|
0
|
$logger_object->resume_logfile(); |
633
|
|
|
|
|
|
|
} |
634
|
0
|
|
|
|
|
0
|
return; |
635
|
|
|
|
|
|
|
} ## end sub resume_logfile |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
sub brace_warning { |
638
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
639
|
0
|
|
|
|
|
0
|
$self->[_saw_brace_error_]++; |
640
|
|
|
|
|
|
|
|
641
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
642
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
643
|
0
|
|
|
|
|
0
|
my $msg_line_number = $self->[_last_line_number_]; |
644
|
0
|
|
|
|
|
0
|
$logger_object->brace_warning( $msg, $msg_line_number ); |
645
|
|
|
|
|
|
|
} |
646
|
0
|
|
|
|
|
0
|
return; |
647
|
|
|
|
|
|
|
} ## end sub brace_warning |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
sub increment_brace_error { |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# This is same as sub brace_warning but without a message |
652
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
653
|
0
|
|
|
|
|
0
|
$self->[_saw_brace_error_]++; |
654
|
|
|
|
|
|
|
|
655
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
656
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
657
|
0
|
|
|
|
|
0
|
$logger_object->increment_brace_error(); |
658
|
|
|
|
|
|
|
} |
659
|
0
|
|
|
|
|
0
|
return; |
660
|
|
|
|
|
|
|
} ## end sub increment_brace_error |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
sub get_saw_brace_error { |
663
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
664
|
0
|
|
|
|
|
0
|
return $self->[_saw_brace_error_]; |
665
|
|
|
|
|
|
|
} ## end sub get_saw_brace_error |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub report_definite_bug { |
668
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
669
|
0
|
|
|
|
|
0
|
$self->[_hit_bug_] = 1; |
670
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
671
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
672
|
0
|
|
|
|
|
0
|
$logger_object->report_definite_bug(); |
673
|
|
|
|
|
|
|
} |
674
|
0
|
|
|
|
|
0
|
return; |
675
|
|
|
|
|
|
|
} ## end sub report_definite_bug |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
#------------------------------------- |
678
|
|
|
|
|
|
|
# Interface to Perl::Tidy::Diagnostics |
679
|
|
|
|
|
|
|
#------------------------------------- |
680
|
|
|
|
|
|
|
sub write_diagnostics { |
681
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
682
|
0
|
|
|
|
|
0
|
my $input_line_number = $self->[_last_line_number_]; |
683
|
0
|
|
|
|
|
0
|
my $diagnostics_object = $self->[_diagnostics_object_]; |
684
|
0
|
0
|
|
|
|
0
|
if ($diagnostics_object) { |
685
|
0
|
|
|
|
|
0
|
$diagnostics_object->write_diagnostics( $msg, $input_line_number ); |
686
|
|
|
|
|
|
|
} |
687
|
0
|
|
|
|
|
0
|
return; |
688
|
|
|
|
|
|
|
} ## end sub write_diagnostics |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
sub report_tokenization_errors { |
691
|
|
|
|
|
|
|
|
692
|
556
|
|
|
556
|
0
|
1891
|
my ($self) = @_; |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# Report any tokenization errors and return a flag '$severe_error'. |
695
|
|
|
|
|
|
|
# Set $severe_error = 1 if the tokenization errors are so severe that |
696
|
|
|
|
|
|
|
# the formatter should not attempt to format the file. Instead, it will |
697
|
|
|
|
|
|
|
# just output the file verbatim. |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
# set severe error flag if tokenizer has encountered file reading problems |
700
|
|
|
|
|
|
|
# (i.e. unexpected binary characters) |
701
|
|
|
|
|
|
|
# or code which may not be formatted correctly (such as 'my sub q') |
702
|
|
|
|
|
|
|
# The difference between _in_error_ and _in_trouble_ is that |
703
|
|
|
|
|
|
|
# _in_error_ stops the tokenizer immediately whereas |
704
|
|
|
|
|
|
|
# _in_trouble_ lets the tokenizer finish so that all errors are seen |
705
|
|
|
|
|
|
|
# Both block formatting and cause the input stream to be output verbatim. |
706
|
556
|
|
33
|
|
|
3617
|
my $severe_error = $self->[_in_error_] || $self->[_in_trouble_]; |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
# And do not format if it looks like an html file (c209) |
709
|
556
|
|
33
|
|
|
4142
|
$severe_error ||= $self->[_html_tag_count_] && $self->[_warning_count_]; |
|
|
|
33
|
|
|
|
|
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
# Inform the logger object on length of input stream |
712
|
556
|
|
|
|
|
1467
|
my $logger_object = $self->[_logger_object_]; |
713
|
556
|
100
|
|
|
|
2044
|
if ($logger_object) { |
714
|
554
|
|
|
|
|
1469
|
my $last_line_number = $self->[_last_line_number_]; |
715
|
554
|
|
|
|
|
2873
|
$logger_object->set_last_input_line_number($last_line_number); |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
|
718
|
556
|
|
|
|
|
1507
|
my $maxle = $self->[_rOpts_maximum_level_errors_]; |
719
|
556
|
|
|
|
|
1569
|
my $maxue = $self->[_rOpts_maximum_unexpected_errors_]; |
720
|
556
|
50
|
|
|
|
1783
|
$maxle = 1 unless defined($maxle); |
721
|
556
|
50
|
|
|
|
1761
|
$maxue = 0 unless defined($maxue); |
722
|
|
|
|
|
|
|
|
723
|
556
|
|
|
|
|
2386
|
my $level = get_indentation_level(); |
724
|
556
|
50
|
|
|
|
2174
|
if ( $level != $self->[_starting_level_] ) { |
725
|
0
|
|
|
|
|
0
|
$self->warning("final indentation level: $level\n"); |
726
|
0
|
|
|
|
|
0
|
my $level_diff = $self->[_starting_level_] - $level; |
727
|
0
|
0
|
|
|
|
0
|
if ( $level_diff < 0 ) { $level_diff = -$level_diff } |
|
0
|
|
|
|
|
0
|
|
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
# Set severe error flag if the level error is greater than 1. |
730
|
|
|
|
|
|
|
# The formatter can function for any level error but it is probably |
731
|
|
|
|
|
|
|
# best not to attempt formatting for a high level error. |
732
|
0
|
0
|
0
|
|
|
0
|
if ( $maxle >= 0 && $level_diff > $maxle ) { |
733
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
734
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
735
|
|
|
|
|
|
|
Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting |
736
|
|
|
|
|
|
|
EOM |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
} |
739
|
|
|
|
|
|
|
|
740
|
556
|
|
|
|
|
3163
|
$self->check_final_nesting_depths(); |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
# Likewise, large numbers of brace errors usually indicate non-perl |
743
|
|
|
|
|
|
|
# scripts, so set the severe error flag at a low number. This is similar |
744
|
|
|
|
|
|
|
# to the level check, but different because braces may balance but be |
745
|
|
|
|
|
|
|
# incorrectly interlaced. |
746
|
556
|
50
|
|
|
|
2824
|
if ( $self->[_true_brace_error_count_] > 2 ) { |
747
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
|
750
|
556
|
50
|
66
|
|
|
2634
|
if ( $self->[_look_for_hash_bang_] |
751
|
|
|
|
|
|
|
&& !$self->[_saw_hash_bang_] ) |
752
|
|
|
|
|
|
|
{ |
753
|
0
|
|
|
|
|
0
|
$self->warning( |
754
|
|
|
|
|
|
|
"hit EOF without seeing hash-bang line; maybe don't need -x?\n"); |
755
|
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
|
757
|
556
|
50
|
|
|
|
1904
|
if ( $self->[_in_format_] ) { |
758
|
0
|
|
|
|
|
0
|
$self->warning("hit EOF while in format description\n"); |
759
|
|
|
|
|
|
|
} |
760
|
|
|
|
|
|
|
|
761
|
556
|
50
|
|
|
|
1997
|
if ( $self->[_in_skipped_] ) { |
762
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
763
|
|
|
|
|
|
|
"hit EOF while in lines skipped with --code-skipping\n"); |
764
|
|
|
|
|
|
|
} |
765
|
|
|
|
|
|
|
|
766
|
556
|
50
|
|
|
|
1995
|
if ( $self->[_in_pod_] ) { |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
# Just write log entry if this is after __END__ or __DATA__ |
769
|
|
|
|
|
|
|
# because this happens to often, and it is not likely to be |
770
|
|
|
|
|
|
|
# a parsing error. |
771
|
0
|
0
|
0
|
|
|
0
|
if ( $self->[_saw_data_] || $self->[_saw_end_] ) { |
772
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
773
|
|
|
|
|
|
|
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" |
774
|
|
|
|
|
|
|
); |
775
|
|
|
|
|
|
|
} |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
else { |
778
|
0
|
|
|
|
|
0
|
$self->complain( |
779
|
|
|
|
|
|
|
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" |
780
|
|
|
|
|
|
|
); |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
|
785
|
556
|
50
|
|
|
|
2027
|
if ( $self->[_in_here_doc_] ) { |
786
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
787
|
0
|
|
|
|
|
0
|
my $here_doc_target = $self->[_here_doc_target_]; |
788
|
0
|
|
|
|
|
0
|
my $started_looking_for_here_target_at = |
789
|
|
|
|
|
|
|
$self->[_started_looking_for_here_target_at_]; |
790
|
0
|
0
|
|
|
|
0
|
if ($here_doc_target) { |
791
|
0
|
|
|
|
|
0
|
$self->warning( |
792
|
|
|
|
|
|
|
"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" |
793
|
|
|
|
|
|
|
); |
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
else { |
796
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
797
|
|
|
|
|
|
|
Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string. |
798
|
|
|
|
|
|
|
(Perl will match to the end of file but this may not be intended). |
799
|
|
|
|
|
|
|
EOM |
800
|
|
|
|
|
|
|
} |
801
|
0
|
|
|
|
|
0
|
my $nearly_matched_here_target_at = |
802
|
|
|
|
|
|
|
$self->[_nearly_matched_here_target_at_]; |
803
|
0
|
0
|
|
|
|
0
|
if ($nearly_matched_here_target_at) { |
804
|
0
|
|
|
|
|
0
|
$self->warning( |
805
|
|
|
|
|
|
|
"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" |
806
|
|
|
|
|
|
|
); |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
} |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
# Something is seriously wrong if we ended inside a quote |
811
|
556
|
50
|
|
|
|
2020
|
if ( $self->[_in_quote_] ) { |
812
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
813
|
0
|
|
|
|
|
0
|
my $line_start_quote = $self->[_line_start_quote_]; |
814
|
0
|
|
|
|
|
0
|
my $quote_target = $self->[_quote_target_]; |
815
|
0
|
0
|
|
|
|
0
|
my $what = |
816
|
|
|
|
|
|
|
( $self->[_in_attribute_list_] ) |
817
|
|
|
|
|
|
|
? "attribute list" |
818
|
|
|
|
|
|
|
: "quote/pattern"; |
819
|
0
|
|
|
|
|
0
|
$self->warning( |
820
|
|
|
|
|
|
|
"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" |
821
|
|
|
|
|
|
|
); |
822
|
|
|
|
|
|
|
} |
823
|
|
|
|
|
|
|
|
824
|
556
|
50
|
|
|
|
2289
|
if ( $self->[_hit_bug_] ) { |
825
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
# Multiple "unexpected" type tokenization errors usually indicate parsing |
829
|
|
|
|
|
|
|
# non-perl scripts, or that something is seriously wrong, so we should |
830
|
|
|
|
|
|
|
# avoid formatting them. This can happen for example if we run perltidy on |
831
|
|
|
|
|
|
|
# a shell script or an html file. But unfortunately this check can |
832
|
|
|
|
|
|
|
# interfere with some extended syntaxes, such as RPerl, so it has to be off |
833
|
|
|
|
|
|
|
# by default. |
834
|
556
|
|
|
|
|
1420
|
my $ue_count = $self->[_unexpected_error_count_]; |
835
|
556
|
50
|
33
|
|
|
2447
|
if ( $maxue > 0 && $ue_count > $maxue ) { |
836
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
837
|
|
|
|
|
|
|
Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting |
838
|
|
|
|
|
|
|
EOM |
839
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
840
|
|
|
|
|
|
|
} |
841
|
|
|
|
|
|
|
|
842
|
556
|
100
|
|
|
|
1997
|
unless ( $self->[_saw_perl_dash_w_] ) { |
843
|
540
|
50
|
|
|
|
2199
|
if ( $] < 5.006 ) { |
844
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("Suggest including '-w parameter'\n"); |
845
|
|
|
|
|
|
|
} |
846
|
|
|
|
|
|
|
else { |
847
|
540
|
|
|
|
|
1905
|
$self->write_logfile_entry("Suggest including 'use warnings;'\n"); |
848
|
|
|
|
|
|
|
} |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
|
851
|
556
|
50
|
|
|
|
3412
|
if ( $self->[_saw_perl_dash_P_] ) { |
852
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
853
|
|
|
|
|
|
|
"Use of -P parameter for defines is discouraged\n"); |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
|
856
|
556
|
100
|
|
|
|
2241
|
unless ( $self->[_saw_use_strict_] ) { |
857
|
542
|
|
|
|
|
1592
|
$self->write_logfile_entry("Suggest including 'use strict;'\n"); |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
# it is suggested that labels have at least one upper case character |
861
|
|
|
|
|
|
|
# for legibility and to avoid code breakage as new keywords are introduced |
862
|
556
|
100
|
|
|
|
3352
|
if ( $self->[_rlower_case_labels_at_] ) { |
863
|
12
|
|
|
|
|
33
|
my @lower_case_labels_at = @{ $self->[_rlower_case_labels_at_] }; |
|
12
|
|
|
|
|
36
|
|
864
|
12
|
|
|
|
|
51
|
$self->write_logfile_entry( |
865
|
|
|
|
|
|
|
"Suggest using upper case characters in label(s)\n"); |
866
|
12
|
|
|
|
|
94
|
local $LIST_SEPARATOR = ')('; |
867
|
12
|
|
|
|
|
81
|
$self->write_logfile_entry( |
868
|
|
|
|
|
|
|
" defined at line(s): (@lower_case_labels_at)\n"); |
869
|
|
|
|
|
|
|
} |
870
|
556
|
|
|
|
|
2154
|
return $severe_error; |
871
|
|
|
|
|
|
|
} ## end sub report_tokenization_errors |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
sub report_v_string { |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
# warn if this version can't handle v-strings |
876
|
2
|
|
|
2
|
0
|
10
|
my ( $self, $tok ) = @_; |
877
|
2
|
50
|
|
|
|
10
|
unless ( $self->[_saw_v_string_] ) { |
878
|
2
|
|
|
|
|
6
|
$self->[_saw_v_string_] = $self->[_last_line_number_]; |
879
|
|
|
|
|
|
|
} |
880
|
2
|
50
|
|
|
|
8
|
if ( $] < 5.006 ) { |
881
|
0
|
|
|
|
|
0
|
$self->warning( |
882
|
|
|
|
|
|
|
"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" |
883
|
|
|
|
|
|
|
); |
884
|
|
|
|
|
|
|
} |
885
|
2
|
|
|
|
|
6
|
return; |
886
|
|
|
|
|
|
|
} ## end sub report_v_string |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
sub is_valid_token_type { |
889
|
3
|
|
|
3
|
0
|
7
|
my ($type) = @_; |
890
|
3
|
|
|
|
|
13
|
return $is_valid_token_type{$type}; |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
sub log_numbered_msg { |
894
|
165
|
|
|
165
|
0
|
817
|
my ( $self, $msg ) = @_; |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
# write input line number + message to logfile |
897
|
165
|
|
|
|
|
364
|
my $input_line_number = $self->[_last_line_number_]; |
898
|
165
|
|
|
|
|
780
|
$self->write_logfile_entry("Line $input_line_number: $msg"); |
899
|
165
|
|
|
|
|
407
|
return; |
900
|
|
|
|
|
|
|
} ## end sub log_numbered_msg |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
# returns the next tokenized line |
903
|
|
|
|
|
|
|
sub get_line { |
904
|
|
|
|
|
|
|
|
905
|
8205
|
|
|
8205
|
0
|
15740
|
my $self = shift; |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: |
908
|
|
|
|
|
|
|
# $brace_depth, $square_bracket_depth, $paren_depth |
909
|
|
|
|
|
|
|
|
910
|
8205
|
|
|
|
|
25794
|
my $input_line = $self->[_line_buffer_object_]->get_line(); |
911
|
8205
|
|
|
|
|
16028
|
$self->[_line_of_text_] = $input_line; |
912
|
|
|
|
|
|
|
|
913
|
8205
|
100
|
|
|
|
19855
|
return unless ($input_line); |
914
|
|
|
|
|
|
|
|
915
|
7649
|
|
|
|
|
12591
|
my $input_line_number = ++$self->[_last_line_number_]; |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
# Find and remove what characters terminate this line, including any |
918
|
|
|
|
|
|
|
# control r |
919
|
7649
|
|
|
|
|
11649
|
my $input_line_separator = EMPTY_STRING; |
920
|
7649
|
50
|
|
|
|
21229
|
if ( chomp($input_line) ) { |
921
|
7649
|
|
|
|
|
17473
|
$input_line_separator = $INPUT_RECORD_SEPARATOR; |
922
|
|
|
|
|
|
|
} |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
# The first test here very significantly speeds things up, but be sure to |
925
|
|
|
|
|
|
|
# keep the regex and hash %other_line_endings the same. |
926
|
7649
|
100
|
|
|
|
23927
|
if ( $other_line_endings{ substr( $input_line, -1 ) } ) { |
927
|
24
|
50
|
|
|
|
357
|
if ( $input_line =~ s/((\r|\035|\032)+)$// ) { |
928
|
24
|
|
|
|
|
82
|
$input_line_separator = $2 . $input_line_separator; |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
} |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
# for backwards compatibility we keep the line text terminated with |
933
|
|
|
|
|
|
|
# a newline character |
934
|
7649
|
|
|
|
|
15528
|
$input_line .= "\n"; |
935
|
7649
|
|
|
|
|
13520
|
$self->[_line_of_text_] = $input_line; |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
# create a data structure describing this line which will be |
938
|
|
|
|
|
|
|
# returned to the caller. |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
# _line_type codes are: |
941
|
|
|
|
|
|
|
# SYSTEM - system-specific code before hash-bang line |
942
|
|
|
|
|
|
|
# CODE - line of perl code (including comments) |
943
|
|
|
|
|
|
|
# POD_START - line starting pod, such as '=head' |
944
|
|
|
|
|
|
|
# POD - pod documentation text |
945
|
|
|
|
|
|
|
# POD_END - last line of pod section, '=cut' |
946
|
|
|
|
|
|
|
# HERE - text of here-document |
947
|
|
|
|
|
|
|
# HERE_END - last line of here-doc (target word) |
948
|
|
|
|
|
|
|
# FORMAT - format section |
949
|
|
|
|
|
|
|
# FORMAT_END - last line of format section, '.' |
950
|
|
|
|
|
|
|
# SKIP - code skipping section |
951
|
|
|
|
|
|
|
# SKIP_END - last line of code skipping section, '#>>V' |
952
|
|
|
|
|
|
|
# DATA_START - __DATA__ line |
953
|
|
|
|
|
|
|
# DATA - unidentified text following __DATA__ |
954
|
|
|
|
|
|
|
# END_START - __END__ line |
955
|
|
|
|
|
|
|
# END - unidentified text following __END__ |
956
|
|
|
|
|
|
|
# ERROR - we are in big trouble, probably not a perl script |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
# Other variables: |
959
|
|
|
|
|
|
|
# _curly_brace_depth - depth of curly braces at start of line |
960
|
|
|
|
|
|
|
# _square_bracket_depth - depth of square brackets at start of line |
961
|
|
|
|
|
|
|
# _paren_depth - depth of parens at start of line |
962
|
|
|
|
|
|
|
# _starting_in_quote - this line continues a multi-line quote |
963
|
|
|
|
|
|
|
# (so don't trim leading blanks!) |
964
|
|
|
|
|
|
|
# _ending_in_quote - this line ends in a multi-line quote |
965
|
|
|
|
|
|
|
# (so don't trim trailing blanks!) |
966
|
7649
|
|
|
|
|
42598
|
my $line_of_tokens = { |
967
|
|
|
|
|
|
|
_line_type => 'EOF', |
968
|
|
|
|
|
|
|
_line_text => $input_line, |
969
|
|
|
|
|
|
|
_line_number => $input_line_number, |
970
|
|
|
|
|
|
|
_guessed_indentation_level => 0, |
971
|
|
|
|
|
|
|
_curly_brace_depth => $brace_depth, |
972
|
|
|
|
|
|
|
_square_bracket_depth => $square_bracket_depth, |
973
|
|
|
|
|
|
|
_paren_depth => $paren_depth, |
974
|
|
|
|
|
|
|
_quote_character => EMPTY_STRING, |
975
|
|
|
|
|
|
|
## Skip these needless initializations for efficiency: |
976
|
|
|
|
|
|
|
## _rtoken_type => undef, |
977
|
|
|
|
|
|
|
## _rtokens => undef, |
978
|
|
|
|
|
|
|
## _rlevels => undef, |
979
|
|
|
|
|
|
|
## _rblock_type => undef, |
980
|
|
|
|
|
|
|
## _rtype_sequence => undef, |
981
|
|
|
|
|
|
|
## _rci_levels => undef, |
982
|
|
|
|
|
|
|
## _starting_in_quote => 0, |
983
|
|
|
|
|
|
|
## _ending_in_quote => 0, |
984
|
|
|
|
|
|
|
}; |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
# must print line unchanged if we are in a here document |
987
|
7649
|
100
|
|
|
|
38786
|
if ( $self->[_in_here_doc_] ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
988
|
|
|
|
|
|
|
|
989
|
24
|
|
|
|
|
82
|
$line_of_tokens->{_line_type} = 'HERE'; |
990
|
24
|
|
|
|
|
56
|
my $here_doc_target = $self->[_here_doc_target_]; |
991
|
24
|
|
|
|
|
63
|
my $here_quote_character = $self->[_here_quote_character_]; |
992
|
24
|
|
|
|
|
62
|
my $candidate_target = $input_line; |
993
|
24
|
|
|
|
|
45
|
chomp $candidate_target; |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
# Handle <<~ targets, which are indicated here by a leading space on |
996
|
|
|
|
|
|
|
# the here quote character |
997
|
24
|
100
|
|
|
|
111
|
if ( $here_quote_character =~ /^\s/ ) { |
998
|
4
|
|
|
|
|
16
|
$candidate_target =~ s/^\s*//; |
999
|
|
|
|
|
|
|
} |
1000
|
24
|
100
|
|
|
|
87
|
if ( $candidate_target eq $here_doc_target ) { |
1001
|
9
|
|
|
|
|
52
|
$self->[_nearly_matched_here_target_at_] = undef; |
1002
|
9
|
|
|
|
|
43
|
$line_of_tokens->{_line_type} = 'HERE_END'; |
1003
|
9
|
|
|
|
|
65
|
$self->log_numbered_msg("Exiting HERE document $here_doc_target\n"); |
1004
|
|
|
|
|
|
|
|
1005
|
9
|
|
|
|
|
45
|
my $rhere_target_list = $self->[_rhere_target_list_]; |
1006
|
9
|
50
|
|
|
|
42
|
if ( @{$rhere_target_list} ) { # there can be multiple here targets |
|
9
|
|
|
|
|
41
|
|
1007
|
|
|
|
|
|
|
( $here_doc_target, $here_quote_character ) = |
1008
|
0
|
|
|
|
|
0
|
@{ shift @{$rhere_target_list} }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1009
|
0
|
|
|
|
|
0
|
$self->[_here_doc_target_] = $here_doc_target; |
1010
|
0
|
|
|
|
|
0
|
$self->[_here_quote_character_] = $here_quote_character; |
1011
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
1012
|
|
|
|
|
|
|
"Entering HERE document $here_doc_target\n"); |
1013
|
0
|
|
|
|
|
0
|
$self->[_nearly_matched_here_target_at_] = undef; |
1014
|
0
|
|
|
|
|
0
|
$self->[_started_looking_for_here_target_at_] = |
1015
|
|
|
|
|
|
|
$input_line_number; |
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
else { |
1018
|
9
|
|
|
|
|
32
|
$self->[_in_here_doc_] = 0; |
1019
|
9
|
|
|
|
|
31
|
$self->[_here_doc_target_] = EMPTY_STRING; |
1020
|
9
|
|
|
|
|
31
|
$self->[_here_quote_character_] = EMPTY_STRING; |
1021
|
|
|
|
|
|
|
} |
1022
|
|
|
|
|
|
|
} |
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
# check for error of extra whitespace |
1025
|
|
|
|
|
|
|
# note for PERL6: leading whitespace is allowed |
1026
|
|
|
|
|
|
|
else { |
1027
|
15
|
|
|
|
|
147
|
$candidate_target =~ s/\s*$//; |
1028
|
15
|
|
|
|
|
72
|
$candidate_target =~ s/^\s*//; |
1029
|
15
|
50
|
|
|
|
64
|
if ( $candidate_target eq $here_doc_target ) { |
1030
|
0
|
|
|
|
|
0
|
$self->[_nearly_matched_here_target_at_] = $input_line_number; |
1031
|
|
|
|
|
|
|
} |
1032
|
|
|
|
|
|
|
} |
1033
|
24
|
|
|
|
|
103
|
return $line_of_tokens; |
1034
|
|
|
|
|
|
|
} |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
# Print line unchanged if we are in a format section |
1037
|
|
|
|
|
|
|
elsif ( $self->[_in_format_] ) { |
1038
|
|
|
|
|
|
|
|
1039
|
3
|
100
|
|
|
|
15
|
if ( $input_line =~ /^\.[\s#]*$/ ) { |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
# Decrement format depth count at a '.' after a 'format' |
1042
|
1
|
|
|
|
|
5
|
$self->[_in_format_]--; |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
# This is the end when count reaches 0 |
1045
|
1
|
50
|
|
|
|
5
|
if ( !$self->[_in_format_] ) { |
1046
|
1
|
|
|
|
|
4
|
$self->log_numbered_msg("Exiting format section\n"); |
1047
|
1
|
|
|
|
|
3
|
$line_of_tokens->{_line_type} = 'FORMAT_END'; |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
# Make the tokenizer mark an opening brace which follows |
1050
|
|
|
|
|
|
|
# as a code block. Fixes issue c202/t032. |
1051
|
1
|
|
|
|
|
3
|
$last_nonblank_token = ';'; |
1052
|
1
|
|
|
|
|
3
|
$last_nonblank_type = ';'; |
1053
|
|
|
|
|
|
|
} |
1054
|
|
|
|
|
|
|
} |
1055
|
|
|
|
|
|
|
else { |
1056
|
2
|
|
|
|
|
4
|
$line_of_tokens->{_line_type} = 'FORMAT'; |
1057
|
2
|
50
|
|
|
|
9
|
if ( $input_line =~ /^\s*format\s+\w+/ ) { |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
# Increment format depth count at a 'format' within a 'format' |
1060
|
|
|
|
|
|
|
# This is a simple way to handle nested formats (issue c019). |
1061
|
0
|
|
|
|
|
0
|
$self->[_in_format_]++; |
1062
|
|
|
|
|
|
|
} |
1063
|
|
|
|
|
|
|
} |
1064
|
3
|
|
|
|
|
11
|
return $line_of_tokens; |
1065
|
|
|
|
|
|
|
} |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
# must print line unchanged if we are in pod documentation |
1068
|
|
|
|
|
|
|
elsif ( $self->[_in_pod_] ) { |
1069
|
|
|
|
|
|
|
|
1070
|
44
|
|
|
|
|
117
|
$line_of_tokens->{_line_type} = 'POD'; |
1071
|
44
|
100
|
|
|
|
267
|
if ( $input_line =~ /^=cut/ ) { |
1072
|
19
|
|
|
|
|
61
|
$line_of_tokens->{_line_type} = 'POD_END'; |
1073
|
19
|
|
|
|
|
68
|
$self->log_numbered_msg("Exiting POD section\n"); |
1074
|
19
|
|
|
|
|
51
|
$self->[_in_pod_] = 0; |
1075
|
|
|
|
|
|
|
} |
1076
|
44
|
50
|
33
|
|
|
144
|
if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) { |
1077
|
0
|
|
|
|
|
0
|
$self->warning( |
1078
|
|
|
|
|
|
|
"Hash-bang in pod can cause older versions of perl to fail! \n" |
1079
|
|
|
|
|
|
|
); |
1080
|
|
|
|
|
|
|
} |
1081
|
|
|
|
|
|
|
|
1082
|
44
|
|
|
|
|
152
|
return $line_of_tokens; |
1083
|
|
|
|
|
|
|
} |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
# print line unchanged if in skipped section |
1086
|
|
|
|
|
|
|
elsif ( $self->[_in_skipped_] ) { |
1087
|
|
|
|
|
|
|
|
1088
|
8
|
|
|
|
|
27
|
$line_of_tokens->{_line_type} = 'SKIP'; |
1089
|
8
|
100
|
|
|
|
102
|
if ( $input_line =~ /$code_skipping_pattern_end/ ) { |
|
|
50
|
|
|
|
|
|
1090
|
2
|
|
|
|
|
10
|
$line_of_tokens->{_line_type} = 'SKIP_END'; |
1091
|
2
|
|
|
|
|
10
|
$self->log_numbered_msg("Exiting code-skipping section\n"); |
1092
|
2
|
|
|
|
|
10
|
$self->[_in_skipped_] = 0; |
1093
|
|
|
|
|
|
|
} |
1094
|
|
|
|
|
|
|
elsif ( $input_line =~ /$code_skipping_pattern_begin/ ) { |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
# warn of duplicate starting comment lines, git #118 |
1097
|
0
|
|
|
|
|
0
|
my $lno = $self->[_in_skipped_]; |
1098
|
0
|
|
|
|
|
0
|
$self->warning( |
1099
|
|
|
|
|
|
|
"Already in code-skipping section which started at line $lno\n" |
1100
|
|
|
|
|
|
|
); |
1101
|
|
|
|
|
|
|
} |
1102
|
8
|
|
|
|
|
32
|
return $line_of_tokens; |
1103
|
|
|
|
|
|
|
} |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
# must print line unchanged if we have seen a severe error (i.e., we |
1106
|
|
|
|
|
|
|
# are seeing illegal tokens and cannot continue. Syntax errors do |
1107
|
|
|
|
|
|
|
# not pass this route). Calling routine can decide what to do, but |
1108
|
|
|
|
|
|
|
# the default can be to just pass all lines as if they were after __END__ |
1109
|
|
|
|
|
|
|
elsif ( $self->[_in_error_] ) { |
1110
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'ERROR'; |
1111
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1112
|
|
|
|
|
|
|
} |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
# print line unchanged if we are __DATA__ section |
1115
|
|
|
|
|
|
|
elsif ( $self->[_in_data_] ) { |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
# ...but look for POD |
1118
|
|
|
|
|
|
|
# Note that the _in_data and _in_end flags remain set |
1119
|
|
|
|
|
|
|
# so that we return to that state after seeing the |
1120
|
|
|
|
|
|
|
# end of a pod section |
1121
|
1
|
50
|
33
|
|
|
10
|
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { |
1122
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1123
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg("Entering POD section\n"); |
1124
|
0
|
|
|
|
|
0
|
$self->[_in_pod_] = 1; |
1125
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1126
|
|
|
|
|
|
|
} |
1127
|
|
|
|
|
|
|
else { |
1128
|
1
|
|
|
|
|
3
|
$line_of_tokens->{_line_type} = 'DATA'; |
1129
|
1
|
|
|
|
|
5
|
return $line_of_tokens; |
1130
|
|
|
|
|
|
|
} |
1131
|
|
|
|
|
|
|
} |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
# print line unchanged if we are in __END__ section |
1134
|
|
|
|
|
|
|
elsif ( $self->[_in_end_] ) { |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
# ...but look for POD |
1137
|
|
|
|
|
|
|
# Note that the _in_data and _in_end flags remain set |
1138
|
|
|
|
|
|
|
# so that we return to that state after seeing the |
1139
|
|
|
|
|
|
|
# end of a pod section |
1140
|
48
|
100
|
66
|
|
|
273
|
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { |
1141
|
6
|
|
|
|
|
29
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1142
|
6
|
|
|
|
|
23
|
$self->log_numbered_msg("Entering POD section\n"); |
1143
|
6
|
|
|
|
|
42
|
$self->[_in_pod_] = 1; |
1144
|
6
|
|
|
|
|
30
|
return $line_of_tokens; |
1145
|
|
|
|
|
|
|
} |
1146
|
|
|
|
|
|
|
else { |
1147
|
42
|
|
|
|
|
85
|
$line_of_tokens->{_line_type} = 'END'; |
1148
|
42
|
|
|
|
|
132
|
return $line_of_tokens; |
1149
|
|
|
|
|
|
|
} |
1150
|
|
|
|
|
|
|
} |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
# check for a hash-bang line if we haven't seen one |
1153
|
7521
|
100
|
|
|
|
16727
|
if ( !$self->[_saw_hash_bang_] ) { |
1154
|
6973
|
100
|
|
|
|
20727
|
if ( $input_line =~ /^\#\!.*perl\b/ ) { |
1155
|
15
|
|
|
|
|
55
|
$self->[_saw_hash_bang_] = $input_line_number; |
1156
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
# check for -w and -P flags |
1158
|
15
|
50
|
|
|
|
86
|
if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { |
1159
|
0
|
|
|
|
|
0
|
$self->[_saw_perl_dash_P_] = 1; |
1160
|
|
|
|
|
|
|
} |
1161
|
|
|
|
|
|
|
|
1162
|
15
|
100
|
|
|
|
94
|
if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { |
1163
|
8
|
|
|
|
|
27
|
$self->[_saw_perl_dash_w_] = 1; |
1164
|
|
|
|
|
|
|
} |
1165
|
|
|
|
|
|
|
|
1166
|
15
|
100
|
33
|
|
|
123
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
1167
|
|
|
|
|
|
|
$input_line_number > 1 |
1168
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
# leave any hash bang in a BEGIN block alone |
1170
|
|
|
|
|
|
|
# i.e. see 'debugger-duck_type.t' |
1171
|
|
|
|
|
|
|
&& !( |
1172
|
|
|
|
|
|
|
$last_nonblank_block_type |
1173
|
|
|
|
|
|
|
&& $last_nonblank_block_type eq 'BEGIN' |
1174
|
|
|
|
|
|
|
) |
1175
|
|
|
|
|
|
|
&& !$self->[_look_for_hash_bang_] |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
# Try to avoid giving a false alarm at a simple comment. |
1178
|
|
|
|
|
|
|
# These look like valid hash-bang lines: |
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
1181
|
|
|
|
|
|
|
#! /usr/bin/perl -w |
1182
|
|
|
|
|
|
|
#!c:\perl\bin\perl.exe |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
# These are comments: |
1185
|
|
|
|
|
|
|
#! I love perl |
1186
|
|
|
|
|
|
|
#! sunos does not yet provide a /usr/bin/perl |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
# Comments typically have multiple spaces, which suggests |
1189
|
|
|
|
|
|
|
# the filter |
1190
|
|
|
|
|
|
|
&& $input_line =~ /^\#\!(\s+)?(\S+)?perl/ |
1191
|
|
|
|
|
|
|
) |
1192
|
|
|
|
|
|
|
{ |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
# this is helpful for VMS systems; we may have accidentally |
1195
|
|
|
|
|
|
|
# tokenized some DCL commands |
1196
|
1
|
50
|
|
|
|
4
|
if ( $self->[_started_tokenizing_] ) { |
1197
|
0
|
|
|
|
|
0
|
$self->warning( |
1198
|
|
|
|
|
|
|
"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" |
1199
|
|
|
|
|
|
|
); |
1200
|
|
|
|
|
|
|
} |
1201
|
|
|
|
|
|
|
else { |
1202
|
1
|
|
|
|
|
6
|
$self->complain("Useless hash-bang after line 1\n"); |
1203
|
|
|
|
|
|
|
} |
1204
|
|
|
|
|
|
|
} |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
# Report the leading hash-bang as a system line |
1207
|
|
|
|
|
|
|
# This will prevent -dac from deleting it |
1208
|
|
|
|
|
|
|
else { |
1209
|
14
|
|
|
|
|
50
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
1210
|
14
|
|
|
|
|
131
|
return $line_of_tokens; |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
} |
1213
|
|
|
|
|
|
|
} |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
# wait for a hash-bang before parsing if the user invoked us with -x |
1216
|
7507
|
100
|
100
|
|
|
18409
|
if ( $self->[_look_for_hash_bang_] |
1217
|
|
|
|
|
|
|
&& !$self->[_saw_hash_bang_] ) |
1218
|
|
|
|
|
|
|
{ |
1219
|
5
|
|
|
|
|
9
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
1220
|
5
|
|
|
|
|
17
|
return $line_of_tokens; |
1221
|
|
|
|
|
|
|
} |
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
# a first line of the form ': #' will be marked as SYSTEM |
1224
|
|
|
|
|
|
|
# since lines of this form may be used by tcsh |
1225
|
7502
|
50
|
66
|
|
|
19912
|
if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { |
1226
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
1227
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1228
|
|
|
|
|
|
|
} |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
# now we know that it is ok to tokenize the line... |
1231
|
|
|
|
|
|
|
# the line tokenizer will modify any of these private variables: |
1232
|
|
|
|
|
|
|
# _rhere_target_list_ |
1233
|
|
|
|
|
|
|
# _in_data_ |
1234
|
|
|
|
|
|
|
# _in_end_ |
1235
|
|
|
|
|
|
|
# _in_format_ |
1236
|
|
|
|
|
|
|
# _in_error_ |
1237
|
|
|
|
|
|
|
# _in_skipped_ |
1238
|
|
|
|
|
|
|
# _in_pod_ |
1239
|
|
|
|
|
|
|
# _in_quote_ |
1240
|
7502
|
|
|
|
|
24455
|
$self->tokenize_this_line($line_of_tokens); |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
# Now finish defining the return structure and return it |
1243
|
7502
|
|
|
|
|
15635
|
$line_of_tokens->{_ending_in_quote} = $self->[_in_quote_]; |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
# handle severe error (binary data in script) |
1246
|
7502
|
50
|
|
|
|
17287
|
if ( $self->[_in_error_] ) { |
1247
|
0
|
|
|
|
|
0
|
$self->[_in_quote_] = 0; # to avoid any more messages |
1248
|
0
|
|
|
|
|
0
|
$self->warning("Giving up after error\n"); |
1249
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'ERROR'; |
1250
|
0
|
|
|
|
|
0
|
reset_indentation_level(0); # avoid error messages |
1251
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1252
|
|
|
|
|
|
|
} |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
# handle start of pod documentation |
1255
|
7502
|
100
|
|
|
|
15698
|
if ( $self->[_in_pod_] ) { |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
# This gets tricky..above a __DATA__ or __END__ section, perl |
1258
|
|
|
|
|
|
|
# accepts '=cut' as the start of pod section. But afterwards, |
1259
|
|
|
|
|
|
|
# only pod utilities see it and they may ignore an =cut without |
1260
|
|
|
|
|
|
|
# leading =head. In any case, this isn't good. |
1261
|
13
|
50
|
|
|
|
58
|
if ( $input_line =~ /^=cut\b/ ) { |
1262
|
0
|
0
|
0
|
|
|
0
|
if ( $self->[_saw_data_] || $self->[_saw_end_] ) { |
1263
|
0
|
|
|
|
|
0
|
$self->complain("=cut while not in pod ignored\n"); |
1264
|
0
|
|
|
|
|
0
|
$self->[_in_pod_] = 0; |
1265
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_END'; |
1266
|
|
|
|
|
|
|
} |
1267
|
|
|
|
|
|
|
else { |
1268
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1269
|
0
|
|
|
|
|
0
|
$self->warning( |
1270
|
|
|
|
|
|
|
"=cut starts a pod section .. this can fool pod utilities.\n" |
1271
|
|
|
|
|
|
|
) unless (DEVEL_MODE); |
1272
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg("Entering POD section\n"); |
1273
|
|
|
|
|
|
|
} |
1274
|
|
|
|
|
|
|
} |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
else { |
1277
|
13
|
|
|
|
|
38
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1278
|
13
|
|
|
|
|
58
|
$self->log_numbered_msg("Entering POD section\n"); |
1279
|
|
|
|
|
|
|
} |
1280
|
|
|
|
|
|
|
|
1281
|
13
|
|
|
|
|
62
|
return $line_of_tokens; |
1282
|
|
|
|
|
|
|
} |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
# handle start of skipped section |
1285
|
7489
|
100
|
|
|
|
15155
|
if ( $self->[_in_skipped_] ) { |
1286
|
|
|
|
|
|
|
|
1287
|
2
|
|
|
|
|
6
|
$line_of_tokens->{_line_type} = 'SKIP'; |
1288
|
2
|
|
|
|
|
12
|
$self->log_numbered_msg("Entering code-skipping section\n"); |
1289
|
2
|
|
|
|
|
8
|
return $line_of_tokens; |
1290
|
|
|
|
|
|
|
} |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
# see if this line contains here doc targets |
1293
|
7487
|
|
|
|
|
11942
|
my $rhere_target_list = $self->[_rhere_target_list_]; |
1294
|
7487
|
100
|
|
|
|
10474
|
if ( @{$rhere_target_list} ) { |
|
7487
|
|
|
|
|
17069
|
|
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
my ( $here_doc_target, $here_quote_character ) = |
1297
|
9
|
|
|
|
|
39
|
@{ shift @{$rhere_target_list} }; |
|
9
|
|
|
|
|
24
|
|
|
9
|
|
|
|
|
43
|
|
1298
|
9
|
|
|
|
|
38
|
$self->[_in_here_doc_] = 1; |
1299
|
9
|
|
|
|
|
36
|
$self->[_here_doc_target_] = $here_doc_target; |
1300
|
9
|
|
|
|
|
30
|
$self->[_here_quote_character_] = $here_quote_character; |
1301
|
9
|
|
|
|
|
70
|
$self->log_numbered_msg("Entering HERE document $here_doc_target\n"); |
1302
|
9
|
|
|
|
|
36
|
$self->[_started_looking_for_here_target_at_] = $input_line_number; |
1303
|
|
|
|
|
|
|
} |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
# NOTE: __END__ and __DATA__ statements are written unformatted |
1306
|
|
|
|
|
|
|
# because they can theoretically contain additional characters |
1307
|
|
|
|
|
|
|
# which are not tokenized (and cannot be read with <DATA> either!). |
1308
|
7487
|
100
|
|
|
|
20623
|
if ( $self->[_in_data_] ) { |
|
|
100
|
|
|
|
|
|
1309
|
1
|
|
|
|
|
3
|
$line_of_tokens->{_line_type} = 'DATA_START'; |
1310
|
1
|
|
|
|
|
6
|
$self->log_numbered_msg("Starting __DATA__ section\n"); |
1311
|
1
|
|
|
|
|
3
|
$self->[_saw_data_] = 1; |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
# keep parsing after __DATA__ if use SelfLoader was seen |
1314
|
1
|
50
|
|
|
|
3
|
if ( $self->[_saw_selfloader_] ) { |
1315
|
0
|
|
|
|
|
0
|
$self->[_in_data_] = 0; |
1316
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
1317
|
|
|
|
|
|
|
"SelfLoader seen, continuing; -nlsl deactivates\n"); |
1318
|
|
|
|
|
|
|
} |
1319
|
|
|
|
|
|
|
|
1320
|
1
|
|
|
|
|
6
|
return $line_of_tokens; |
1321
|
|
|
|
|
|
|
} |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
elsif ( $self->[_in_end_] ) { |
1324
|
6
|
|
|
|
|
31
|
$line_of_tokens->{_line_type} = 'END_START'; |
1325
|
6
|
|
|
|
|
32
|
$self->log_numbered_msg("Starting __END__ section\n"); |
1326
|
6
|
|
|
|
|
15
|
$self->[_saw_end_] = 1; |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
# keep parsing after __END__ if use AutoLoader was seen |
1329
|
6
|
50
|
|
|
|
23
|
if ( $self->[_saw_autoloader_] ) { |
1330
|
0
|
|
|
|
|
0
|
$self->[_in_end_] = 0; |
1331
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
1332
|
|
|
|
|
|
|
"AutoLoader seen, continuing; -nlal deactivates\n"); |
1333
|
|
|
|
|
|
|
} |
1334
|
6
|
|
|
|
|
29
|
return $line_of_tokens; |
1335
|
|
|
|
|
|
|
} |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
# now, finally, we know that this line is type 'CODE' |
1338
|
7480
|
|
|
|
|
13980
|
$line_of_tokens->{_line_type} = 'CODE'; |
1339
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
# remember if we have seen any real code |
1341
|
7480
|
100
|
100
|
|
|
23649
|
if ( !$self->[_started_tokenizing_] |
|
|
|
100
|
|
|
|
|
1342
|
|
|
|
|
|
|
&& $input_line !~ /^\s*$/ |
1343
|
|
|
|
|
|
|
&& $input_line !~ /^\s*#/ ) |
1344
|
|
|
|
|
|
|
{ |
1345
|
553
|
|
|
|
|
2078
|
$self->[_started_tokenizing_] = 1; |
1346
|
|
|
|
|
|
|
} |
1347
|
|
|
|
|
|
|
|
1348
|
7480
|
100
|
|
|
|
16071
|
if ( $self->[_debugger_object_] ) { |
1349
|
7
|
|
|
|
|
33
|
$self->[_debugger_object_]->write_debug_entry($line_of_tokens); |
1350
|
|
|
|
|
|
|
} |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
# Note: if keyword 'format' occurs in this line code, it is still CODE |
1353
|
|
|
|
|
|
|
# (keyword 'format' need not start a line) |
1354
|
7480
|
100
|
|
|
|
15466
|
if ( $self->[_in_format_] ) { |
1355
|
1
|
|
|
|
|
7
|
$self->log_numbered_msg("Entering format section\n"); |
1356
|
|
|
|
|
|
|
} |
1357
|
|
|
|
|
|
|
|
1358
|
7480
|
100
|
100
|
|
|
29097
|
if ( $self->[_in_quote_] |
|
|
100
|
100
|
|
|
|
|
1359
|
|
|
|
|
|
|
and ( $self->[_line_start_quote_] < 0 ) ) |
1360
|
|
|
|
|
|
|
{ |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { |
1363
|
49
|
100
|
|
|
|
413
|
if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) { |
1364
|
48
|
|
|
|
|
132
|
$self->[_line_start_quote_] = $input_line_number; |
1365
|
48
|
|
|
|
|
328
|
$self->log_numbered_msg( |
1366
|
|
|
|
|
|
|
"Start multi-line quote or pattern ending in $quote_target\n"); |
1367
|
|
|
|
|
|
|
} |
1368
|
|
|
|
|
|
|
} |
1369
|
|
|
|
|
|
|
elsif ( ( $self->[_line_start_quote_] >= 0 ) |
1370
|
|
|
|
|
|
|
&& !$self->[_in_quote_] ) |
1371
|
|
|
|
|
|
|
{ |
1372
|
48
|
|
|
|
|
209
|
$self->[_line_start_quote_] = -1; |
1373
|
48
|
|
|
|
|
252
|
$self->log_numbered_msg("End of multi-line quote or pattern\n"); |
1374
|
|
|
|
|
|
|
} |
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
# we are returning a line of CODE |
1377
|
7480
|
|
|
|
|
29692
|
return $line_of_tokens; |
1378
|
|
|
|
|
|
|
} ## end sub get_line |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
sub find_starting_indentation_level { |
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
# We need to find the indentation level of the first line of the |
1383
|
|
|
|
|
|
|
# script being formatted. Often it will be zero for an entire file, |
1384
|
|
|
|
|
|
|
# but if we are formatting a local block of code (within an editor for |
1385
|
|
|
|
|
|
|
# example) it may not be zero. The user may specify this with the |
1386
|
|
|
|
|
|
|
# -sil=n parameter but normally doesn't so we have to guess. |
1387
|
|
|
|
|
|
|
# |
1388
|
556
|
|
|
556
|
0
|
1740
|
my ($self) = @_; |
1389
|
556
|
|
|
|
|
1355
|
my $starting_level = 0; |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
# use value if given as parameter |
1392
|
556
|
100
|
|
|
|
2861
|
if ( $self->[_know_starting_level_] ) { |
|
|
100
|
|
|
|
|
|
1393
|
1
|
|
|
|
|
2
|
$starting_level = $self->[_starting_level_]; |
1394
|
|
|
|
|
|
|
} |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
# if we know there is a hash_bang line, the level must be zero |
1397
|
|
|
|
|
|
|
elsif ( $self->[_look_for_hash_bang_] ) { |
1398
|
1
|
|
|
|
|
3
|
$self->[_know_starting_level_] = 1; |
1399
|
|
|
|
|
|
|
} |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
# otherwise figure it out from the input file |
1402
|
|
|
|
|
|
|
else { |
1403
|
554
|
|
|
|
|
1189
|
my $line; |
1404
|
554
|
|
|
|
|
1171
|
my $i = 0; |
1405
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
# keep looking at lines until we find a hash bang or piece of code |
1407
|
554
|
|
|
|
|
1258
|
my $msg = EMPTY_STRING; |
1408
|
554
|
|
|
|
|
3560
|
while ( $line = $self->[_line_buffer_object_]->peek_ahead( $i++ ) ) { |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
# if first line is #! then assume starting level is zero |
1411
|
866
|
100
|
100
|
|
|
4973
|
if ( $i == 1 && $line =~ /^\#\!/ ) { |
1412
|
13
|
|
|
|
|
51
|
$starting_level = 0; |
1413
|
13
|
|
|
|
|
33
|
last; |
1414
|
|
|
|
|
|
|
} |
1415
|
853
|
100
|
|
|
|
4654
|
next if ( $line =~ /^\s*#/ ); # skip past comments |
1416
|
557
|
100
|
|
|
|
3847
|
next if ( $line =~ /^\s*$/ ); # skip past blank lines |
1417
|
539
|
|
|
|
|
2658
|
$starting_level = $self->guess_old_indentation_level($line); |
1418
|
539
|
|
|
|
|
1369
|
last; |
1419
|
|
|
|
|
|
|
} |
1420
|
554
|
|
|
|
|
2620
|
$msg = "Line $i implies starting-indentation-level = $starting_level\n"; |
1421
|
554
|
|
|
|
|
2980
|
$self->write_logfile_entry("$msg"); |
1422
|
|
|
|
|
|
|
} |
1423
|
556
|
|
|
|
|
2236
|
$self->[_starting_level_] = $starting_level; |
1424
|
556
|
|
|
|
|
3524
|
reset_indentation_level($starting_level); |
1425
|
556
|
|
|
|
|
1186
|
return; |
1426
|
|
|
|
|
|
|
} ## end sub find_starting_indentation_level |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
sub guess_old_indentation_level { |
1429
|
539
|
|
|
539
|
0
|
1870
|
my ( $self, $line ) = @_; |
1430
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
# Guess the indentation level of an input line. |
1432
|
|
|
|
|
|
|
# |
1433
|
|
|
|
|
|
|
# For the first line of code this result will define the starting |
1434
|
|
|
|
|
|
|
# indentation level. It will mainly be non-zero when perltidy is applied |
1435
|
|
|
|
|
|
|
# within an editor to a local block of code. |
1436
|
|
|
|
|
|
|
# |
1437
|
|
|
|
|
|
|
# This is an impossible task in general because we can't know what tabs |
1438
|
|
|
|
|
|
|
# meant for the old script and how many spaces were used for one |
1439
|
|
|
|
|
|
|
# indentation level in the given input script. For example it may have |
1440
|
|
|
|
|
|
|
# been previously formatted with -i=7 -et=3. But we can at least try to |
1441
|
|
|
|
|
|
|
# make sure that perltidy guesses correctly if it is applied repeatedly to |
1442
|
|
|
|
|
|
|
# a block of code within an editor, so that the block stays at the same |
1443
|
|
|
|
|
|
|
# level when perltidy is applied repeatedly. |
1444
|
|
|
|
|
|
|
# |
1445
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
1446
|
539
|
|
|
|
|
1257
|
my $level = 0; |
1447
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
# find leading tabs, spaces, and any statement label |
1449
|
539
|
|
|
|
|
1146
|
my $spaces = 0; |
1450
|
539
|
50
|
|
|
|
4304
|
if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { |
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
# If there are leading tabs, we use the tab scheme for this run, if |
1453
|
|
|
|
|
|
|
# any, so that the code will remain stable when editing. |
1454
|
539
|
100
|
|
|
|
2602
|
if ($1) { $spaces += length($1) * $self->[_tabsize_] } |
|
2
|
|
|
|
|
10
|
|
1455
|
|
|
|
|
|
|
|
1456
|
539
|
100
|
|
|
|
2040
|
if ($2) { $spaces += length($2) } |
|
77
|
|
|
|
|
292
|
|
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
# correct for outdented labels |
1459
|
539
|
50
|
66
|
|
|
2419
|
if ( $3 && $self->[_outdent_labels_] ) { |
1460
|
1
|
|
|
|
|
6
|
$spaces += $self->[_continuation_indentation_]; |
1461
|
|
|
|
|
|
|
} |
1462
|
|
|
|
|
|
|
} |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
# compute indentation using the value of -i for this run. |
1465
|
|
|
|
|
|
|
# If -i=0 is used for this run (which is possible) it doesn't matter |
1466
|
|
|
|
|
|
|
# what we do here but we'll guess that the old run used 4 spaces per level. |
1467
|
539
|
|
|
|
|
1651
|
my $indent_columns = $self->[_indent_columns_]; |
1468
|
539
|
50
|
|
|
|
1730
|
$indent_columns = 4 if ( !$indent_columns ); |
1469
|
539
|
|
|
|
|
2206
|
$level = int( $spaces / $indent_columns ); |
1470
|
539
|
|
|
|
|
1449
|
return ($level); |
1471
|
|
|
|
|
|
|
} ## end sub guess_old_indentation_level |
1472
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
# This is a currently unused debug routine |
1474
|
|
|
|
|
|
|
sub dump_functions { |
1475
|
|
|
|
|
|
|
|
1476
|
0
|
|
|
0
|
0
|
0
|
my $fh = *STDOUT; |
1477
|
0
|
|
|
|
|
0
|
foreach my $pkg ( keys %{$ris_user_function} ) { |
|
0
|
|
|
|
|
0
|
|
1478
|
0
|
|
|
|
|
0
|
$fh->print("\nnon-constant subs in package $pkg\n"); |
1479
|
|
|
|
|
|
|
|
1480
|
0
|
|
|
|
|
0
|
foreach my $sub ( keys %{ $ris_user_function->{$pkg} } ) { |
|
0
|
|
|
|
|
0
|
|
1481
|
0
|
|
|
|
|
0
|
my $msg = EMPTY_STRING; |
1482
|
0
|
0
|
|
|
|
0
|
if ( $ris_block_list_function->{$pkg}{$sub} ) { |
1483
|
0
|
|
|
|
|
0
|
$msg = 'block_list'; |
1484
|
|
|
|
|
|
|
} |
1485
|
|
|
|
|
|
|
|
1486
|
0
|
0
|
|
|
|
0
|
if ( $ris_block_function->{$pkg}{$sub} ) { |
1487
|
0
|
|
|
|
|
0
|
$msg = 'block'; |
1488
|
|
|
|
|
|
|
} |
1489
|
0
|
|
|
|
|
0
|
$fh->print("$sub $msg\n"); |
1490
|
|
|
|
|
|
|
} |
1491
|
|
|
|
|
|
|
} |
1492
|
|
|
|
|
|
|
|
1493
|
0
|
|
|
|
|
0
|
foreach my $pkg ( keys %{$ris_constant} ) { |
|
0
|
|
|
|
|
0
|
|
1494
|
0
|
|
|
|
|
0
|
$fh->print("\nconstants and constant subs in package $pkg\n"); |
1495
|
|
|
|
|
|
|
|
1496
|
0
|
|
|
|
|
0
|
foreach my $sub ( keys %{ $ris_constant->{$pkg} } ) { |
|
0
|
|
|
|
|
0
|
|
1497
|
0
|
|
|
|
|
0
|
$fh->print("$sub\n"); |
1498
|
|
|
|
|
|
|
} |
1499
|
|
|
|
|
|
|
} |
1500
|
0
|
|
|
|
|
0
|
return; |
1501
|
|
|
|
|
|
|
} ## end sub dump_functions |
1502
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
sub prepare_for_a_new_file { |
1504
|
|
|
|
|
|
|
|
1505
|
556
|
|
|
556
|
0
|
1359
|
my $self = shift; |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
# previous tokens needed to determine what to expect next |
1508
|
556
|
|
|
|
|
1662
|
$last_nonblank_token = ';'; # the only possible starting state which |
1509
|
556
|
|
|
|
|
1438
|
$last_nonblank_type = ';'; # will make a leading brace a code block |
1510
|
556
|
|
|
|
|
1353
|
$last_nonblank_block_type = EMPTY_STRING; |
1511
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
# scalars for remembering statement types across multiple lines |
1513
|
556
|
|
|
|
|
1409
|
$statement_type = EMPTY_STRING; # '' or 'use' or 'sub..' or 'case..' |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
# scalars for remembering where we are in the file |
1516
|
556
|
|
|
|
|
1343
|
$current_package = "main"; |
1517
|
556
|
|
|
|
|
1212
|
$context = UNKNOWN_CONTEXT; |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
# hashes used to remember function information |
1520
|
556
|
|
|
|
|
1915
|
$ris_constant = {}; # user-defined constants |
1521
|
556
|
|
|
|
|
1613
|
$ris_user_function = {}; # user-defined functions |
1522
|
556
|
|
|
|
|
1550
|
$ruser_function_prototype = {}; # their prototypes |
1523
|
556
|
|
|
|
|
1706
|
$ris_block_function = {}; |
1524
|
556
|
|
|
|
|
1687
|
$ris_block_list_function = {}; |
1525
|
556
|
|
|
|
|
1405
|
$rsaw_function_definition = {}; |
1526
|
556
|
|
|
|
|
1418
|
$rsaw_use_module = {}; |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
# variables used to track depths of various containers |
1529
|
|
|
|
|
|
|
# and report nesting errors |
1530
|
556
|
|
|
|
|
1283
|
$paren_depth = 0; |
1531
|
556
|
|
|
|
|
1124
|
$brace_depth = 0; |
1532
|
556
|
|
|
|
|
1033
|
$square_bracket_depth = 0; |
1533
|
556
|
|
|
|
|
2434
|
$rcurrent_depth = [ (0) x scalar @closing_brace_names ]; |
1534
|
556
|
|
|
|
|
1179
|
$total_depth = 0; |
1535
|
556
|
|
|
|
|
2172
|
$rtotal_depth = []; |
1536
|
556
|
|
|
|
|
1942
|
$rcurrent_sequence_number = []; |
1537
|
556
|
|
|
|
|
1347
|
$next_sequence_number = 2; # The value 1 is reserved for SEQ_ROOT |
1538
|
|
|
|
|
|
|
|
1539
|
556
|
|
|
|
|
1973
|
$rparen_type = []; |
1540
|
556
|
|
|
|
|
1635
|
$rparen_semicolon_count = []; |
1541
|
556
|
|
|
|
|
1847
|
$rparen_structural_type = []; |
1542
|
556
|
|
|
|
|
1744
|
$rbrace_type = []; |
1543
|
556
|
|
|
|
|
1706
|
$rbrace_structural_type = []; |
1544
|
556
|
|
|
|
|
1462
|
$rbrace_context = []; |
1545
|
556
|
|
|
|
|
1623
|
$rbrace_package = []; |
1546
|
556
|
|
|
|
|
1491
|
$rsquare_bracket_type = []; |
1547
|
556
|
|
|
|
|
1430
|
$rsquare_bracket_structural_type = []; |
1548
|
556
|
|
|
|
|
3467
|
$rdepth_array = []; |
1549
|
556
|
|
|
|
|
1200
|
$rnested_ternary_flag = []; |
1550
|
556
|
|
|
|
|
3879
|
$rnested_statement_type = []; |
1551
|
556
|
|
|
|
|
3130
|
$rstarting_line_of_current_depth = []; |
1552
|
|
|
|
|
|
|
|
1553
|
556
|
|
|
|
|
1744
|
$rparen_type->[$paren_depth] = EMPTY_STRING; |
1554
|
556
|
|
|
|
|
1388
|
$rparen_semicolon_count->[$paren_depth] = 0; |
1555
|
556
|
|
|
|
|
1379
|
$rparen_structural_type->[$brace_depth] = EMPTY_STRING; |
1556
|
556
|
|
|
|
|
1400
|
$rbrace_type->[$brace_depth] = ';'; # identify opening brace as code block |
1557
|
556
|
|
|
|
|
1360
|
$rbrace_structural_type->[$brace_depth] = EMPTY_STRING; |
1558
|
556
|
|
|
|
|
1270
|
$rbrace_context->[$brace_depth] = UNKNOWN_CONTEXT; |
1559
|
556
|
|
|
|
|
1413
|
$rbrace_package->[$paren_depth] = $current_package; |
1560
|
556
|
|
|
|
|
1426
|
$rsquare_bracket_type->[$square_bracket_depth] = EMPTY_STRING; |
1561
|
556
|
|
|
|
|
1296
|
$rsquare_bracket_structural_type->[$square_bracket_depth] = EMPTY_STRING; |
1562
|
|
|
|
|
|
|
|
1563
|
556
|
|
|
|
|
2790
|
initialize_tokenizer_state(); |
1564
|
556
|
|
|
|
|
1158
|
return; |
1565
|
|
|
|
|
|
|
} ## end sub prepare_for_a_new_file |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
{ ## closure for sub tokenize_this_line |
1568
|
|
|
|
|
|
|
|
1569
|
38
|
|
|
38
|
|
375
|
use constant BRACE => 0; |
|
38
|
|
|
|
|
88
|
|
|
38
|
|
|
|
|
2682
|
|
1570
|
38
|
|
|
38
|
|
293
|
use constant SQUARE_BRACKET => 1; |
|
38
|
|
|
|
|
81
|
|
|
38
|
|
|
|
|
2187
|
|
1571
|
38
|
|
|
38
|
|
352
|
use constant PAREN => 2; |
|
38
|
|
|
|
|
132
|
|
|
38
|
|
|
|
|
2409
|
|
1572
|
38
|
|
|
38
|
|
307
|
use constant QUESTION_COLON => 3; |
|
38
|
|
|
|
|
127
|
|
|
38
|
|
|
|
|
87603
|
|
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
# TV1: scalars for processing one LINE. |
1575
|
|
|
|
|
|
|
# Re-initialized on each entry to sub tokenize_this_line. |
1576
|
|
|
|
|
|
|
my ( |
1577
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
1578
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
1579
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
1580
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
1581
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
1582
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
1583
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
1584
|
|
|
|
|
|
|
); |
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
# TV2: refs to ARRAYS for processing one LINE |
1587
|
|
|
|
|
|
|
# Re-initialized on each call. |
1588
|
|
|
|
|
|
|
my $routput_token_list = []; # stack of output token indexes |
1589
|
|
|
|
|
|
|
my $routput_token_type = []; # token types |
1590
|
|
|
|
|
|
|
my $routput_block_type = []; # types of code block |
1591
|
|
|
|
|
|
|
my $routput_container_type = []; # paren types, such as if, elsif, .. |
1592
|
|
|
|
|
|
|
my $routput_type_sequence = []; # nesting sequential number |
1593
|
|
|
|
|
|
|
my $routput_indent_flag = []; # |
1594
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
# TV3: SCALARS for quote variables. These are initialized with a |
1596
|
|
|
|
|
|
|
# subroutine call and continually updated as lines are processed. |
1597
|
|
|
|
|
|
|
my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, |
1598
|
|
|
|
|
|
|
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
# TV4: SCALARS for multi-line identifiers and |
1601
|
|
|
|
|
|
|
# statements. These are initialized with a subroutine call |
1602
|
|
|
|
|
|
|
# and continually updated as lines are processed. |
1603
|
|
|
|
|
|
|
my ( $id_scan_state, $identifier, $want_paren ); |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
# TV5: SCALARS for tracking indentation level. |
1606
|
|
|
|
|
|
|
# Initialized once and continually updated as lines are |
1607
|
|
|
|
|
|
|
# processed. |
1608
|
|
|
|
|
|
|
my ( |
1609
|
|
|
|
|
|
|
$nesting_token_string, $nesting_type_string, |
1610
|
|
|
|
|
|
|
$nesting_block_string, $nesting_block_flag, |
1611
|
|
|
|
|
|
|
$nesting_list_string, $nesting_list_flag, |
1612
|
|
|
|
|
|
|
$ci_string_in_tokenizer, $continuation_string_in_tokenizer, |
1613
|
|
|
|
|
|
|
$in_statement_continuation, $level_in_tokenizer, |
1614
|
|
|
|
|
|
|
$slevel_in_tokenizer, $rslevel_stack, |
1615
|
|
|
|
|
|
|
); |
1616
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
# TV6: SCALARS for remembering several previous |
1618
|
|
|
|
|
|
|
# tokens. Initialized once and continually updated as |
1619
|
|
|
|
|
|
|
# lines are processed. |
1620
|
|
|
|
|
|
|
my ( |
1621
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
1622
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
1623
|
|
|
|
|
|
|
$last_last_nonblank_block_type, $last_last_nonblank_container_type, |
1624
|
|
|
|
|
|
|
$last_last_nonblank_type_sequence, $last_nonblank_prototype, |
1625
|
|
|
|
|
|
|
); |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
# ---------------------------------------------------------------- |
1628
|
|
|
|
|
|
|
# beginning of tokenizer variable access and manipulation routines |
1629
|
|
|
|
|
|
|
# ---------------------------------------------------------------- |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
sub initialize_tokenizer_state { |
1632
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
# GV1: initialized once |
1634
|
|
|
|
|
|
|
# TV1: initialized on each call |
1635
|
|
|
|
|
|
|
# TV2: initialized on each call |
1636
|
|
|
|
|
|
|
# TV3: |
1637
|
556
|
|
|
556
|
0
|
1264
|
$in_quote = 0; |
1638
|
556
|
|
|
|
|
1335
|
$quote_type = 'Q'; |
1639
|
556
|
|
|
|
|
1140
|
$quote_character = EMPTY_STRING; |
1640
|
556
|
|
|
|
|
1101
|
$quote_pos = 0; |
1641
|
556
|
|
|
|
|
1072
|
$quote_depth = 0; |
1642
|
556
|
|
|
|
|
1163
|
$quoted_string_1 = EMPTY_STRING; |
1643
|
556
|
|
|
|
|
1144
|
$quoted_string_2 = EMPTY_STRING; |
1644
|
556
|
|
|
|
|
1171
|
$allowed_quote_modifiers = EMPTY_STRING; |
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
# TV4: |
1647
|
556
|
|
|
|
|
1065
|
$id_scan_state = EMPTY_STRING; |
1648
|
556
|
|
|
|
|
1196
|
$identifier = EMPTY_STRING; |
1649
|
556
|
|
|
|
|
1299
|
$want_paren = EMPTY_STRING; |
1650
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
# TV5: |
1652
|
556
|
|
|
|
|
1303
|
$nesting_token_string = EMPTY_STRING; |
1653
|
556
|
|
|
|
|
1199
|
$nesting_type_string = EMPTY_STRING; |
1654
|
556
|
|
|
|
|
1219
|
$nesting_block_string = '1'; # initially in a block |
1655
|
556
|
|
|
|
|
1008
|
$nesting_block_flag = 1; |
1656
|
556
|
|
|
|
|
1094
|
$nesting_list_string = '0'; # initially not in a list |
1657
|
556
|
|
|
|
|
1058
|
$nesting_list_flag = 0; # initially not in a list |
1658
|
556
|
|
|
|
|
1098
|
$ci_string_in_tokenizer = EMPTY_STRING; |
1659
|
556
|
|
|
|
|
1164
|
$continuation_string_in_tokenizer = "0"; |
1660
|
556
|
|
|
|
|
1039
|
$in_statement_continuation = 0; |
1661
|
556
|
|
|
|
|
1087
|
$level_in_tokenizer = 0; |
1662
|
556
|
|
|
|
|
1082
|
$slevel_in_tokenizer = 0; |
1663
|
556
|
|
|
|
|
1676
|
$rslevel_stack = []; |
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
# TV6: |
1666
|
556
|
|
|
|
|
1285
|
$last_nonblank_container_type = EMPTY_STRING; |
1667
|
556
|
|
|
|
|
1281
|
$last_nonblank_type_sequence = EMPTY_STRING; |
1668
|
556
|
|
|
|
|
1342
|
$last_last_nonblank_token = ';'; |
1669
|
556
|
|
|
|
|
1194
|
$last_last_nonblank_type = ';'; |
1670
|
556
|
|
|
|
|
1154
|
$last_last_nonblank_block_type = EMPTY_STRING; |
1671
|
556
|
|
|
|
|
1087
|
$last_last_nonblank_container_type = EMPTY_STRING; |
1672
|
556
|
|
|
|
|
1093
|
$last_last_nonblank_type_sequence = EMPTY_STRING; |
1673
|
556
|
|
|
|
|
1045
|
$last_nonblank_prototype = EMPTY_STRING; |
1674
|
556
|
|
|
|
|
1106
|
return; |
1675
|
|
|
|
|
|
|
} ## end sub initialize_tokenizer_state |
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
sub save_tokenizer_state { |
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
# Global variables: |
1680
|
0
|
|
|
0
|
0
|
0
|
my $rGV1 = [ |
1681
|
|
|
|
|
|
|
$brace_depth, |
1682
|
|
|
|
|
|
|
$context, |
1683
|
|
|
|
|
|
|
$current_package, |
1684
|
|
|
|
|
|
|
$last_nonblank_block_type, |
1685
|
|
|
|
|
|
|
$last_nonblank_token, |
1686
|
|
|
|
|
|
|
$last_nonblank_type, |
1687
|
|
|
|
|
|
|
$next_sequence_number, |
1688
|
|
|
|
|
|
|
$paren_depth, |
1689
|
|
|
|
|
|
|
$rbrace_context, |
1690
|
|
|
|
|
|
|
$rbrace_package, |
1691
|
|
|
|
|
|
|
$rbrace_structural_type, |
1692
|
|
|
|
|
|
|
$rbrace_type, |
1693
|
|
|
|
|
|
|
$rcurrent_depth, |
1694
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
1695
|
|
|
|
|
|
|
$rdepth_array, |
1696
|
|
|
|
|
|
|
$ris_block_function, |
1697
|
|
|
|
|
|
|
$ris_block_list_function, |
1698
|
|
|
|
|
|
|
$ris_constant, |
1699
|
|
|
|
|
|
|
$ris_user_function, |
1700
|
|
|
|
|
|
|
$rnested_statement_type, |
1701
|
|
|
|
|
|
|
$rnested_ternary_flag, |
1702
|
|
|
|
|
|
|
$rparen_semicolon_count, |
1703
|
|
|
|
|
|
|
$rparen_structural_type, |
1704
|
|
|
|
|
|
|
$rparen_type, |
1705
|
|
|
|
|
|
|
$rsaw_function_definition, |
1706
|
|
|
|
|
|
|
$rsaw_use_module, |
1707
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
1708
|
|
|
|
|
|
|
$rsquare_bracket_type, |
1709
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
1710
|
|
|
|
|
|
|
$rtotal_depth, |
1711
|
|
|
|
|
|
|
$ruser_function_prototype, |
1712
|
|
|
|
|
|
|
$square_bracket_depth, |
1713
|
|
|
|
|
|
|
$statement_type, |
1714
|
|
|
|
|
|
|
$total_depth, |
1715
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
]; |
1717
|
|
|
|
|
|
|
|
1718
|
|
|
|
|
|
|
# Tokenizer closure variables: |
1719
|
0
|
|
|
|
|
0
|
my $rTV1 = [ |
1720
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
1721
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
1722
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
1723
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
1724
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
1725
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
1726
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
1727
|
|
|
|
|
|
|
]; |
1728
|
|
|
|
|
|
|
|
1729
|
0
|
|
|
|
|
0
|
my $rTV2 = [ |
1730
|
|
|
|
|
|
|
$routput_token_list, $routput_token_type, |
1731
|
|
|
|
|
|
|
$routput_block_type, $routput_container_type, |
1732
|
|
|
|
|
|
|
$routput_type_sequence, $routput_indent_flag, |
1733
|
|
|
|
|
|
|
]; |
1734
|
|
|
|
|
|
|
|
1735
|
0
|
|
|
|
|
0
|
my $rTV3 = [ |
1736
|
|
|
|
|
|
|
$in_quote, $quote_type, |
1737
|
|
|
|
|
|
|
$quote_character, $quote_pos, |
1738
|
|
|
|
|
|
|
$quote_depth, $quoted_string_1, |
1739
|
|
|
|
|
|
|
$quoted_string_2, $allowed_quote_modifiers, |
1740
|
|
|
|
|
|
|
]; |
1741
|
|
|
|
|
|
|
|
1742
|
0
|
|
|
|
|
0
|
my $rTV4 = [ $id_scan_state, $identifier, $want_paren ]; |
1743
|
|
|
|
|
|
|
|
1744
|
0
|
|
|
|
|
0
|
my $rTV5 = [ |
1745
|
|
|
|
|
|
|
$nesting_token_string, $nesting_type_string, |
1746
|
|
|
|
|
|
|
$nesting_block_string, $nesting_block_flag, |
1747
|
|
|
|
|
|
|
$nesting_list_string, $nesting_list_flag, |
1748
|
|
|
|
|
|
|
$ci_string_in_tokenizer, $continuation_string_in_tokenizer, |
1749
|
|
|
|
|
|
|
$in_statement_continuation, $level_in_tokenizer, |
1750
|
|
|
|
|
|
|
$slevel_in_tokenizer, $rslevel_stack, |
1751
|
|
|
|
|
|
|
]; |
1752
|
|
|
|
|
|
|
|
1753
|
0
|
|
|
|
|
0
|
my $rTV6 = [ |
1754
|
|
|
|
|
|
|
$last_nonblank_container_type, |
1755
|
|
|
|
|
|
|
$last_nonblank_type_sequence, |
1756
|
|
|
|
|
|
|
$last_last_nonblank_token, |
1757
|
|
|
|
|
|
|
$last_last_nonblank_type, |
1758
|
|
|
|
|
|
|
$last_last_nonblank_block_type, |
1759
|
|
|
|
|
|
|
$last_last_nonblank_container_type, |
1760
|
|
|
|
|
|
|
$last_last_nonblank_type_sequence, |
1761
|
|
|
|
|
|
|
$last_nonblank_prototype, |
1762
|
|
|
|
|
|
|
]; |
1763
|
0
|
|
|
|
|
0
|
return [ $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; |
1764
|
|
|
|
|
|
|
} ## end sub save_tokenizer_state |
1765
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
sub restore_tokenizer_state { |
1767
|
0
|
|
|
0
|
0
|
0
|
my ($rstate) = @_; |
1768
|
0
|
|
|
|
|
0
|
my ( $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; |
|
0
|
|
|
|
|
0
|
|
1769
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
( |
1771
|
|
|
|
|
|
|
$brace_depth, |
1772
|
|
|
|
|
|
|
$context, |
1773
|
|
|
|
|
|
|
$current_package, |
1774
|
|
|
|
|
|
|
$last_nonblank_block_type, |
1775
|
|
|
|
|
|
|
$last_nonblank_token, |
1776
|
|
|
|
|
|
|
$last_nonblank_type, |
1777
|
|
|
|
|
|
|
$next_sequence_number, |
1778
|
|
|
|
|
|
|
$paren_depth, |
1779
|
|
|
|
|
|
|
$rbrace_context, |
1780
|
|
|
|
|
|
|
$rbrace_package, |
1781
|
|
|
|
|
|
|
$rbrace_structural_type, |
1782
|
|
|
|
|
|
|
$rbrace_type, |
1783
|
|
|
|
|
|
|
$rcurrent_depth, |
1784
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
1785
|
|
|
|
|
|
|
$rdepth_array, |
1786
|
|
|
|
|
|
|
$ris_block_function, |
1787
|
|
|
|
|
|
|
$ris_block_list_function, |
1788
|
|
|
|
|
|
|
$ris_constant, |
1789
|
|
|
|
|
|
|
$ris_user_function, |
1790
|
|
|
|
|
|
|
$rnested_statement_type, |
1791
|
|
|
|
|
|
|
$rnested_ternary_flag, |
1792
|
|
|
|
|
|
|
$rparen_semicolon_count, |
1793
|
|
|
|
|
|
|
$rparen_structural_type, |
1794
|
|
|
|
|
|
|
$rparen_type, |
1795
|
|
|
|
|
|
|
$rsaw_function_definition, |
1796
|
|
|
|
|
|
|
$rsaw_use_module, |
1797
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
1798
|
|
|
|
|
|
|
$rsquare_bracket_type, |
1799
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
1800
|
|
|
|
|
|
|
$rtotal_depth, |
1801
|
|
|
|
|
|
|
$ruser_function_prototype, |
1802
|
|
|
|
|
|
|
$square_bracket_depth, |
1803
|
|
|
|
|
|
|
$statement_type, |
1804
|
|
|
|
|
|
|
$total_depth, |
1805
|
|
|
|
|
|
|
|
1806
|
0
|
|
|
|
|
0
|
) = @{$rGV1}; |
|
0
|
|
|
|
|
0
|
|
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
( |
1809
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
1810
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
1811
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
1812
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
1813
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
1814
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
1815
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
1816
|
0
|
|
|
|
|
0
|
) = @{$rTV1}; |
|
0
|
|
|
|
|
0
|
|
1817
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
( |
1819
|
|
|
|
|
|
|
$routput_token_list, $routput_token_type, |
1820
|
|
|
|
|
|
|
$routput_block_type, $routput_container_type, |
1821
|
|
|
|
|
|
|
$routput_type_sequence, $routput_indent_flag, |
1822
|
0
|
|
|
|
|
0
|
) = @{$rTV2}; |
|
0
|
|
|
|
|
0
|
|
1823
|
|
|
|
|
|
|
|
1824
|
|
|
|
|
|
|
( |
1825
|
|
|
|
|
|
|
$in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, |
1826
|
|
|
|
|
|
|
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, |
1827
|
0
|
|
|
|
|
0
|
) = @{$rTV3}; |
|
0
|
|
|
|
|
0
|
|
1828
|
|
|
|
|
|
|
|
1829
|
0
|
|
|
|
|
0
|
( $id_scan_state, $identifier, $want_paren ) = @{$rTV4}; |
|
0
|
|
|
|
|
0
|
|
1830
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
( |
1832
|
|
|
|
|
|
|
$nesting_token_string, $nesting_type_string, |
1833
|
|
|
|
|
|
|
$nesting_block_string, $nesting_block_flag, |
1834
|
|
|
|
|
|
|
$nesting_list_string, $nesting_list_flag, |
1835
|
|
|
|
|
|
|
$ci_string_in_tokenizer, $continuation_string_in_tokenizer, |
1836
|
|
|
|
|
|
|
$in_statement_continuation, $level_in_tokenizer, |
1837
|
|
|
|
|
|
|
$slevel_in_tokenizer, $rslevel_stack, |
1838
|
0
|
|
|
|
|
0
|
) = @{$rTV5}; |
|
0
|
|
|
|
|
0
|
|
1839
|
|
|
|
|
|
|
|
1840
|
|
|
|
|
|
|
( |
1841
|
|
|
|
|
|
|
$last_nonblank_container_type, |
1842
|
|
|
|
|
|
|
$last_nonblank_type_sequence, |
1843
|
|
|
|
|
|
|
$last_last_nonblank_token, |
1844
|
|
|
|
|
|
|
$last_last_nonblank_type, |
1845
|
|
|
|
|
|
|
$last_last_nonblank_block_type, |
1846
|
|
|
|
|
|
|
$last_last_nonblank_container_type, |
1847
|
|
|
|
|
|
|
$last_last_nonblank_type_sequence, |
1848
|
|
|
|
|
|
|
$last_nonblank_prototype, |
1849
|
0
|
|
|
|
|
0
|
) = @{$rTV6}; |
|
0
|
|
|
|
|
0
|
|
1850
|
0
|
|
|
|
|
0
|
return; |
1851
|
|
|
|
|
|
|
} ## end sub restore_tokenizer_state |
1852
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
sub split_pretoken { |
1854
|
|
|
|
|
|
|
|
1855
|
8
|
|
|
8
|
0
|
18
|
my ( $self, $numc ) = @_; |
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
# Split the leading $numc characters from the current token (at index=$i) |
1858
|
|
|
|
|
|
|
# which is pre-type 'w' and insert the remainder back into the pretoken |
1859
|
|
|
|
|
|
|
# stream with appropriate settings. Since we are splitting a pre-type 'w', |
1860
|
|
|
|
|
|
|
# there are three cases, depending on if the remainder starts with a digit: |
1861
|
|
|
|
|
|
|
# Case 1: remainder is type 'd', all digits |
1862
|
|
|
|
|
|
|
# Case 2: remainder is type 'd' and type 'w': digits and other characters |
1863
|
|
|
|
|
|
|
# Case 3: remainder is type 'w' |
1864
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
# Examples, for $numc=1: |
1866
|
|
|
|
|
|
|
# $tok => $tok_0 $tok_1 $tok_2 |
1867
|
|
|
|
|
|
|
# 'x10' => 'x' '10' # case 1 |
1868
|
|
|
|
|
|
|
# 'x10if' => 'x' '10' 'if' # case 2 |
1869
|
|
|
|
|
|
|
# '0ne => 'O' 'ne' # case 3 |
1870
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
# where: |
1872
|
|
|
|
|
|
|
# $tok_1 is a possible string of digits (pre-type 'd') |
1873
|
|
|
|
|
|
|
# $tok_2 is a possible word (pre-type 'w') |
1874
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
# return 1 if successful |
1876
|
|
|
|
|
|
|
# return undef if error (shouldn't happen) |
1877
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
# Calling routine should update '$type' and '$tok' if successful. |
1879
|
|
|
|
|
|
|
|
1880
|
8
|
|
|
|
|
21
|
my $pretoken = $rtokens->[$i]; |
1881
|
8
|
50
|
33
|
|
|
75
|
if ( $pretoken |
|
|
|
33
|
|
|
|
|
1882
|
|
|
|
|
|
|
&& length($pretoken) > $numc |
1883
|
|
|
|
|
|
|
&& substr( $pretoken, $numc ) =~ /^(\d*)(.*)$/ ) |
1884
|
|
|
|
|
|
|
{ |
1885
|
|
|
|
|
|
|
|
1886
|
|
|
|
|
|
|
# Split $tok into up to 3 tokens: |
1887
|
8
|
|
|
|
|
22
|
my $tok_0 = substr( $pretoken, 0, $numc ); |
1888
|
8
|
50
|
|
|
|
29
|
my $tok_1 = defined($1) ? $1 : EMPTY_STRING; |
1889
|
8
|
50
|
|
|
|
27
|
my $tok_2 = defined($2) ? $2 : EMPTY_STRING; |
1890
|
|
|
|
|
|
|
|
1891
|
8
|
|
|
|
|
17
|
my $len_0 = length($tok_0); |
1892
|
8
|
|
|
|
|
14
|
my $len_1 = length($tok_1); |
1893
|
8
|
|
|
|
|
13
|
my $len_2 = length($tok_2); |
1894
|
|
|
|
|
|
|
|
1895
|
8
|
|
|
|
|
14
|
my $pre_type_0 = 'w'; |
1896
|
8
|
|
|
|
|
14
|
my $pre_type_1 = 'd'; |
1897
|
8
|
|
|
|
|
12
|
my $pre_type_2 = 'w'; |
1898
|
|
|
|
|
|
|
|
1899
|
8
|
|
|
|
|
16
|
my $pos_0 = $rtoken_map->[$i]; |
1900
|
8
|
|
|
|
|
25
|
my $pos_1 = $pos_0 + $len_0; |
1901
|
8
|
|
|
|
|
19
|
my $pos_2 = $pos_1 + $len_1; |
1902
|
|
|
|
|
|
|
|
1903
|
8
|
|
|
|
|
15
|
my $isplice = $i + 1; |
1904
|
|
|
|
|
|
|
|
1905
|
|
|
|
|
|
|
# Splice in any digits |
1906
|
8
|
100
|
|
|
|
20
|
if ($len_1) { |
1907
|
5
|
|
|
|
|
10
|
splice @{$rtoken_map}, $isplice, 0, $pos_1; |
|
5
|
|
|
|
|
14
|
|
1908
|
5
|
|
|
|
|
10
|
splice @{$rtokens}, $isplice, 0, $tok_1; |
|
5
|
|
|
|
|
13
|
|
1909
|
5
|
|
|
|
|
11
|
splice @{$rtoken_type}, $isplice, 0, $pre_type_1; |
|
5
|
|
|
|
|
10
|
|
1910
|
5
|
|
|
|
|
10
|
$max_token_index++; |
1911
|
5
|
|
|
|
|
8
|
$isplice++; |
1912
|
|
|
|
|
|
|
} |
1913
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
# Splice in any trailing word |
1915
|
8
|
100
|
|
|
|
22
|
if ($len_2) { |
1916
|
4
|
|
|
|
|
7
|
splice @{$rtoken_map}, $isplice, 0, $pos_2; |
|
4
|
|
|
|
|
11
|
|
1917
|
4
|
|
|
|
|
21
|
splice @{$rtokens}, $isplice, 0, $tok_2; |
|
4
|
|
|
|
|
9
|
|
1918
|
4
|
|
|
|
|
8
|
splice @{$rtoken_type}, $isplice, 0, $pre_type_2; |
|
4
|
|
|
|
|
8
|
|
1919
|
4
|
|
|
|
|
6
|
$max_token_index++; |
1920
|
|
|
|
|
|
|
} |
1921
|
|
|
|
|
|
|
|
1922
|
8
|
|
|
|
|
18
|
$rtokens->[$i] = $tok_0; |
1923
|
8
|
|
|
|
|
31
|
return 1; |
1924
|
|
|
|
|
|
|
} |
1925
|
|
|
|
|
|
|
else { |
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
# Shouldn't get here |
1928
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
1929
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
1930
|
|
|
|
|
|
|
While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken() |
1931
|
|
|
|
|
|
|
EOM |
1932
|
|
|
|
|
|
|
} |
1933
|
|
|
|
|
|
|
} |
1934
|
0
|
|
|
|
|
0
|
return; |
1935
|
|
|
|
|
|
|
} ## end sub split_pretoken |
1936
|
|
|
|
|
|
|
|
1937
|
|
|
|
|
|
|
sub get_indentation_level { |
1938
|
556
|
|
|
556
|
0
|
1528
|
return $level_in_tokenizer; |
1939
|
|
|
|
|
|
|
} |
1940
|
|
|
|
|
|
|
|
1941
|
|
|
|
|
|
|
sub reset_indentation_level { |
1942
|
556
|
|
|
556
|
0
|
1553
|
$level_in_tokenizer = $slevel_in_tokenizer = shift; |
1943
|
556
|
|
|
|
|
1062
|
push @{$rslevel_stack}, $slevel_in_tokenizer; |
|
556
|
|
|
|
|
1478
|
|
1944
|
556
|
|
|
|
|
1056
|
return; |
1945
|
|
|
|
|
|
|
} |
1946
|
|
|
|
|
|
|
|
1947
|
|
|
|
|
|
|
sub peeked_ahead { |
1948
|
232
|
|
|
232
|
0
|
429
|
my $flag = shift; |
1949
|
232
|
100
|
|
|
|
588
|
$peeked_ahead = defined($flag) ? $flag : $peeked_ahead; |
1950
|
232
|
|
|
|
|
534
|
return $peeked_ahead; |
1951
|
|
|
|
|
|
|
} |
1952
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
1954
|
|
|
|
|
|
|
# end of tokenizer variable access and manipulation routines |
1955
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
1956
|
|
|
|
|
|
|
|
1957
|
|
|
|
|
|
|
#------------------------------ |
1958
|
|
|
|
|
|
|
# beginning of tokenizer hashes |
1959
|
|
|
|
|
|
|
#------------------------------ |
1960
|
|
|
|
|
|
|
|
1961
|
|
|
|
|
|
|
my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); |
1962
|
|
|
|
|
|
|
|
1963
|
|
|
|
|
|
|
# These block types terminate statements and do not need a trailing |
1964
|
|
|
|
|
|
|
# semicolon |
1965
|
|
|
|
|
|
|
# patched for SWITCH/CASE/ |
1966
|
|
|
|
|
|
|
my %is_zero_continuation_block_type; |
1967
|
|
|
|
|
|
|
my @q; |
1968
|
|
|
|
|
|
|
@q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; |
1969
|
|
|
|
|
|
|
if elsif else unless while until for foreach switch case given when); |
1970
|
|
|
|
|
|
|
@is_zero_continuation_block_type{@q} = (1) x scalar(@q); |
1971
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
my %is_logical_container; |
1973
|
|
|
|
|
|
|
@q = qw(if elsif unless while and or err not && ! || for foreach); |
1974
|
|
|
|
|
|
|
@is_logical_container{@q} = (1) x scalar(@q); |
1975
|
|
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
my %is_binary_type; |
1977
|
|
|
|
|
|
|
@q = qw(|| &&); |
1978
|
|
|
|
|
|
|
@is_binary_type{@q} = (1) x scalar(@q); |
1979
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
my %is_binary_keyword; |
1981
|
|
|
|
|
|
|
@q = qw(and or err eq ne cmp); |
1982
|
|
|
|
|
|
|
@is_binary_keyword{@q} = (1) x scalar(@q); |
1983
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
# 'L' is token for opening { at hash key |
1985
|
|
|
|
|
|
|
my %is_opening_type; |
1986
|
|
|
|
|
|
|
@q = qw< L { ( [ >; |
1987
|
|
|
|
|
|
|
@is_opening_type{@q} = (1) x scalar(@q); |
1988
|
|
|
|
|
|
|
|
1989
|
|
|
|
|
|
|
my %is_opening_or_ternary_type; |
1990
|
|
|
|
|
|
|
push @q, '?'; |
1991
|
|
|
|
|
|
|
@is_opening_or_ternary_type{@q} = (1) x scalar(@q); |
1992
|
|
|
|
|
|
|
|
1993
|
|
|
|
|
|
|
# 'R' is token for closing } at hash key |
1994
|
|
|
|
|
|
|
my %is_closing_type; |
1995
|
|
|
|
|
|
|
@q = qw< R } ) ] >; |
1996
|
|
|
|
|
|
|
@is_closing_type{@q} = (1) x scalar(@q); |
1997
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
my %is_closing_or_ternary_type; |
1999
|
|
|
|
|
|
|
push @q, ':'; |
2000
|
|
|
|
|
|
|
@is_closing_or_ternary_type{@q} = (1) x scalar(@q); |
2001
|
|
|
|
|
|
|
|
2002
|
|
|
|
|
|
|
my %is_redo_last_next_goto; |
2003
|
|
|
|
|
|
|
@q = qw(redo last next goto); |
2004
|
|
|
|
|
|
|
@is_redo_last_next_goto{@q} = (1) x scalar(@q); |
2005
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
my %is_use_require; |
2007
|
|
|
|
|
|
|
@q = qw(use require); |
2008
|
|
|
|
|
|
|
@is_use_require{@q} = (1) x scalar(@q); |
2009
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
# This hash holds the array index in $self for these keywords: |
2011
|
|
|
|
|
|
|
# Fix for issue c035: removed 'format' from this hash |
2012
|
|
|
|
|
|
|
my %is_END_DATA = ( |
2013
|
|
|
|
|
|
|
'__END__' => _in_end_, |
2014
|
|
|
|
|
|
|
'__DATA__' => _in_data_, |
2015
|
|
|
|
|
|
|
); |
2016
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
my %is_list_end_type; |
2018
|
|
|
|
|
|
|
@q = qw( ; { } ); |
2019
|
|
|
|
|
|
|
push @q, ','; |
2020
|
|
|
|
|
|
|
@is_list_end_type{@q} = (1) x scalar(@q); |
2021
|
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
# original ref: camel 3 p 147, |
2023
|
|
|
|
|
|
|
# but perl may accept undocumented flags |
2024
|
|
|
|
|
|
|
# perl 5.10 adds 'p' (preserve) |
2025
|
|
|
|
|
|
|
# Perl version 5.22 added 'n' |
2026
|
|
|
|
|
|
|
# From http://perldoc.perl.org/perlop.html we have |
2027
|
|
|
|
|
|
|
# /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc |
2028
|
|
|
|
|
|
|
# s/PATTERN/REPLACEMENT/msixpodualngcer |
2029
|
|
|
|
|
|
|
# y/SEARCHLIST/REPLACEMENTLIST/cdsr |
2030
|
|
|
|
|
|
|
# tr/SEARCHLIST/REPLACEMENTLIST/cdsr |
2031
|
|
|
|
|
|
|
# qr/STRING/msixpodualn |
2032
|
|
|
|
|
|
|
my %quote_modifiers = ( |
2033
|
|
|
|
|
|
|
's' => '[msixpodualngcer]', |
2034
|
|
|
|
|
|
|
'y' => '[cdsr]', |
2035
|
|
|
|
|
|
|
'tr' => '[cdsr]', |
2036
|
|
|
|
|
|
|
'm' => '[msixpodualngc]', |
2037
|
|
|
|
|
|
|
'qr' => '[msixpodualn]', |
2038
|
|
|
|
|
|
|
'q' => EMPTY_STRING, |
2039
|
|
|
|
|
|
|
'qq' => EMPTY_STRING, |
2040
|
|
|
|
|
|
|
'qw' => EMPTY_STRING, |
2041
|
|
|
|
|
|
|
'qx' => EMPTY_STRING, |
2042
|
|
|
|
|
|
|
); |
2043
|
|
|
|
|
|
|
|
2044
|
|
|
|
|
|
|
# table showing how many quoted things to look for after quote operator.. |
2045
|
|
|
|
|
|
|
# s, y, tr have 2 (pattern and replacement) |
2046
|
|
|
|
|
|
|
# others have 1 (pattern only) |
2047
|
|
|
|
|
|
|
my %quote_items = ( |
2048
|
|
|
|
|
|
|
's' => 2, |
2049
|
|
|
|
|
|
|
'y' => 2, |
2050
|
|
|
|
|
|
|
'tr' => 2, |
2051
|
|
|
|
|
|
|
'm' => 1, |
2052
|
|
|
|
|
|
|
'qr' => 1, |
2053
|
|
|
|
|
|
|
'q' => 1, |
2054
|
|
|
|
|
|
|
'qq' => 1, |
2055
|
|
|
|
|
|
|
'qw' => 1, |
2056
|
|
|
|
|
|
|
'qx' => 1, |
2057
|
|
|
|
|
|
|
); |
2058
|
|
|
|
|
|
|
|
2059
|
|
|
|
|
|
|
my %is_for_foreach; |
2060
|
|
|
|
|
|
|
@q = qw(for foreach); |
2061
|
|
|
|
|
|
|
@is_for_foreach{@q} = (1) x scalar(@q); |
2062
|
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
# These keywords may introduce blocks after parenthesized expressions, |
2064
|
|
|
|
|
|
|
# in the form: |
2065
|
|
|
|
|
|
|
# keyword ( .... ) { BLOCK } |
2066
|
|
|
|
|
|
|
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' |
2067
|
|
|
|
|
|
|
# NOTE for --use-feature=class: if ADJUST blocks eventually take a |
2068
|
|
|
|
|
|
|
# parameter list, then ADJUST might need to be added to this list (see |
2069
|
|
|
|
|
|
|
# perlclass.pod) |
2070
|
|
|
|
|
|
|
my %is_blocktype_with_paren; |
2071
|
|
|
|
|
|
|
@q = |
2072
|
|
|
|
|
|
|
qw(if elsif unless while until for foreach switch case given when catch); |
2073
|
|
|
|
|
|
|
@is_blocktype_with_paren{@q} = (1) x scalar(@q); |
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
my %is_case_default; |
2076
|
|
|
|
|
|
|
@q = qw(case default); |
2077
|
|
|
|
|
|
|
@is_case_default{@q} = (1) x scalar(@q); |
2078
|
|
|
|
|
|
|
|
2079
|
|
|
|
|
|
|
#------------------------ |
2080
|
|
|
|
|
|
|
# end of tokenizer hashes |
2081
|
|
|
|
|
|
|
#------------------------ |
2082
|
|
|
|
|
|
|
|
2083
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2084
|
|
|
|
|
|
|
# beginning of various scanner interface routines |
2085
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2086
|
|
|
|
|
|
|
sub scan_replacement_text { |
2087
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
# check for here-docs in replacement text invoked by |
2089
|
|
|
|
|
|
|
# a substitution operator with executable modifier 'e'. |
2090
|
|
|
|
|
|
|
# |
2091
|
|
|
|
|
|
|
# given: |
2092
|
|
|
|
|
|
|
# $replacement_text |
2093
|
|
|
|
|
|
|
# return: |
2094
|
|
|
|
|
|
|
# $rht = reference to any here-doc targets |
2095
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $replacement_text ) = @_; |
2096
|
|
|
|
|
|
|
|
2097
|
|
|
|
|
|
|
# quick check |
2098
|
0
|
0
|
|
|
|
0
|
return unless ( $replacement_text =~ /<</ ); |
2099
|
|
|
|
|
|
|
|
2100
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
2101
|
|
|
|
|
|
|
"scanning replacement text for here-doc targets\n"); |
2102
|
|
|
|
|
|
|
|
2103
|
|
|
|
|
|
|
# save the logger object for error messages |
2104
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
2105
|
|
|
|
|
|
|
|
2106
|
|
|
|
|
|
|
# save all lexical variables |
2107
|
0
|
|
|
|
|
0
|
my $rstate = save_tokenizer_state(); |
2108
|
0
|
|
|
|
|
0
|
_decrement_count(); # avoid error check for multiple tokenizers |
2109
|
|
|
|
|
|
|
|
2110
|
|
|
|
|
|
|
# make a new tokenizer |
2111
|
0
|
|
|
|
|
0
|
my $rOpts = {}; |
2112
|
0
|
|
|
|
|
0
|
my $source_object = Perl::Tidy::LineSource->new( |
2113
|
|
|
|
|
|
|
input_file => \$replacement_text, |
2114
|
|
|
|
|
|
|
rOpts => $rOpts, |
2115
|
|
|
|
|
|
|
); |
2116
|
0
|
|
|
|
|
0
|
my $tokenizer = Perl::Tidy::Tokenizer->new( |
2117
|
|
|
|
|
|
|
source_object => $source_object, |
2118
|
|
|
|
|
|
|
logger_object => $logger_object, |
2119
|
|
|
|
|
|
|
starting_line_number => $input_line_number, |
2120
|
|
|
|
|
|
|
); |
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
# scan the replacement text |
2123
|
0
|
|
|
|
|
0
|
1 while ( $tokenizer->get_line() ); |
2124
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
# remove any here doc targets |
2126
|
0
|
|
|
|
|
0
|
my $rht = undef; |
2127
|
0
|
0
|
|
|
|
0
|
if ( $tokenizer->[_in_here_doc_] ) { |
2128
|
0
|
|
|
|
|
0
|
$rht = []; |
2129
|
0
|
|
|
|
|
0
|
push @{$rht}, |
|
0
|
|
|
|
|
0
|
|
2130
|
|
|
|
|
|
|
[ |
2131
|
|
|
|
|
|
|
$tokenizer->[_here_doc_target_], |
2132
|
|
|
|
|
|
|
$tokenizer->[_here_quote_character_] |
2133
|
|
|
|
|
|
|
]; |
2134
|
0
|
0
|
|
|
|
0
|
if ( $tokenizer->[_rhere_target_list_] ) { |
2135
|
0
|
|
|
|
|
0
|
push @{$rht}, @{ $tokenizer->[_rhere_target_list_] }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
2136
|
0
|
|
|
|
|
0
|
$tokenizer->[_rhere_target_list_] = undef; |
2137
|
|
|
|
|
|
|
} |
2138
|
0
|
|
|
|
|
0
|
$tokenizer->[_in_here_doc_] = undef; |
2139
|
|
|
|
|
|
|
} |
2140
|
|
|
|
|
|
|
|
2141
|
|
|
|
|
|
|
# now its safe to report errors |
2142
|
0
|
|
|
|
|
0
|
my $severe_error = $tokenizer->report_tokenization_errors(); |
2143
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
# TODO: Could propagate a severe error up |
2145
|
|
|
|
|
|
|
|
2146
|
|
|
|
|
|
|
# restore all tokenizer lexical variables |
2147
|
0
|
|
|
|
|
0
|
restore_tokenizer_state($rstate); |
2148
|
|
|
|
|
|
|
|
2149
|
|
|
|
|
|
|
# return the here doc targets |
2150
|
0
|
|
|
|
|
0
|
return $rht; |
2151
|
|
|
|
|
|
|
} ## end sub scan_replacement_text |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
sub scan_bare_identifier { |
2154
|
1672
|
|
|
1672
|
0
|
3074
|
my $self = shift; |
2155
|
1672
|
|
|
|
|
5135
|
( $i, $tok, $type, $prototype ) = |
2156
|
|
|
|
|
|
|
$self->scan_bare_identifier_do( $input_line, $i, $tok, $type, |
2157
|
|
|
|
|
|
|
$prototype, $rtoken_map, $max_token_index ); |
2158
|
1672
|
|
|
|
|
3496
|
return; |
2159
|
|
|
|
|
|
|
} ## end sub scan_bare_identifier |
2160
|
|
|
|
|
|
|
|
2161
|
|
|
|
|
|
|
sub scan_identifier { |
2162
|
|
|
|
|
|
|
|
2163
|
486
|
|
|
486
|
0
|
923
|
my $self = shift; |
2164
|
|
|
|
|
|
|
|
2165
|
|
|
|
|
|
|
( |
2166
|
486
|
|
|
|
|
2936
|
$i, $tok, $type, $id_scan_state, $identifier, |
2167
|
|
|
|
|
|
|
my $split_pretoken_flag |
2168
|
|
|
|
|
|
|
) |
2169
|
|
|
|
|
|
|
= $self->scan_complex_identifier( $i, $id_scan_state, $identifier, |
2170
|
|
|
|
|
|
|
$rtokens, $max_token_index, $expecting, |
2171
|
|
|
|
|
|
|
$rparen_type->[$paren_depth] ); |
2172
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
# Check for signal to fix a special variable adjacent to a keyword, |
2174
|
|
|
|
|
|
|
# such as '$^One$0'. |
2175
|
486
|
100
|
|
|
|
1587
|
if ($split_pretoken_flag) { |
2176
|
|
|
|
|
|
|
|
2177
|
|
|
|
|
|
|
# Try to fix it by splitting the pretoken |
2178
|
3
|
50
|
33
|
|
|
34
|
if ( $i > 0 |
|
|
|
33
|
|
|
|
|
2179
|
|
|
|
|
|
|
&& $rtokens->[ $i - 1 ] eq '^' |
2180
|
|
|
|
|
|
|
&& $self->split_pretoken(1) ) |
2181
|
|
|
|
|
|
|
{ |
2182
|
3
|
|
|
|
|
8
|
$identifier = substr( $identifier, 0, 3 ); |
2183
|
3
|
|
|
|
|
6
|
$tok = $identifier; |
2184
|
|
|
|
|
|
|
} |
2185
|
|
|
|
|
|
|
else { |
2186
|
|
|
|
|
|
|
|
2187
|
|
|
|
|
|
|
# This shouldn't happen ... |
2188
|
0
|
|
|
|
|
0
|
my $var = substr( $tok, 0, 3 ); |
2189
|
0
|
|
|
|
|
0
|
my $excess = substr( $tok, 3 ); |
2190
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
2191
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
2192
|
|
|
|
|
|
|
$input_line_number: Trouble parsing at characters '$excess' after special variable '$var'. |
2193
|
|
|
|
|
|
|
A space may be needed after '$var'. |
2194
|
|
|
|
|
|
|
EOM |
2195
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
2196
|
|
|
|
|
|
|
} |
2197
|
|
|
|
|
|
|
} |
2198
|
486
|
|
|
|
|
908
|
return; |
2199
|
|
|
|
|
|
|
} ## end sub scan_identifier |
2200
|
|
|
|
|
|
|
|
2201
|
38
|
|
|
38
|
|
484
|
use constant VERIFY_FASTSCAN => 0; |
|
38
|
|
|
|
|
95
|
|
|
38
|
|
|
|
|
3780
|
|
2202
|
|
|
|
|
|
|
my %fast_scan_context; |
2203
|
|
|
|
|
|
|
|
2204
|
|
|
|
|
|
|
BEGIN { |
2205
|
38
|
|
|
38
|
|
45804
|
%fast_scan_context = ( |
2206
|
|
|
|
|
|
|
'$' => SCALAR_CONTEXT, |
2207
|
|
|
|
|
|
|
'*' => SCALAR_CONTEXT, |
2208
|
|
|
|
|
|
|
'@' => LIST_CONTEXT, |
2209
|
|
|
|
|
|
|
'%' => LIST_CONTEXT, |
2210
|
|
|
|
|
|
|
'&' => UNKNOWN_CONTEXT, |
2211
|
|
|
|
|
|
|
); |
2212
|
|
|
|
|
|
|
} ## end BEGIN |
2213
|
|
|
|
|
|
|
|
2214
|
|
|
|
|
|
|
sub scan_simple_identifier { |
2215
|
|
|
|
|
|
|
|
2216
|
|
|
|
|
|
|
# This is a wrapper for sub scan_identifier. It does a fast preliminary |
2217
|
|
|
|
|
|
|
# scan for certain common identifiers: |
2218
|
|
|
|
|
|
|
# '$var', '@var', %var, *var, &var, '@{...}', '%{...}' |
2219
|
|
|
|
|
|
|
# If it does not find one of these, or this is a restart, it calls the |
2220
|
|
|
|
|
|
|
# original scanner directly. |
2221
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
# This gives the same results as the full scanner in about 1/4 the |
2223
|
|
|
|
|
|
|
# total runtime for a typical input stream. |
2224
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
# Notation: |
2226
|
|
|
|
|
|
|
# $var * 2 |
2227
|
|
|
|
|
|
|
# ^^ ^ |
2228
|
|
|
|
|
|
|
# || | |
2229
|
|
|
|
|
|
|
# || ---- $i_next [= next nonblank pretoken ] |
2230
|
|
|
|
|
|
|
# |----$i_plus_1 [= a bareword ] |
2231
|
|
|
|
|
|
|
# ---$i_begin [= a sigil] |
2232
|
|
|
|
|
|
|
|
2233
|
4779
|
|
|
4779
|
0
|
7198
|
my $self = shift; |
2234
|
|
|
|
|
|
|
|
2235
|
4779
|
|
|
|
|
6976
|
my $i_begin = $i; |
2236
|
4779
|
|
|
|
|
7512
|
my $tok_begin = $tok; |
2237
|
4779
|
|
|
|
|
7187
|
my $i_plus_1 = $i + 1; |
2238
|
4779
|
|
|
|
|
6847
|
my $fast_scan_type; |
2239
|
|
|
|
|
|
|
|
2240
|
|
|
|
|
|
|
#------------------------------------------------------- |
2241
|
|
|
|
|
|
|
# Do full scan for anything following a pointer, such as |
2242
|
|
|
|
|
|
|
# $cref->&*; # a postderef |
2243
|
|
|
|
|
|
|
#------------------------------------------------------- |
2244
|
4779
|
100
|
66
|
|
|
27116
|
if ( $last_nonblank_token eq '->' ) { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
33
|
|
|
|
|
2245
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
} |
2247
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
#------------------------------ |
2249
|
|
|
|
|
|
|
# quick scan with leading sigil |
2250
|
|
|
|
|
|
|
#------------------------------ |
2251
|
|
|
|
|
|
|
elsif ( !$id_scan_state |
2252
|
|
|
|
|
|
|
&& $i_plus_1 <= $max_token_index |
2253
|
|
|
|
|
|
|
&& $fast_scan_context{$tok} ) |
2254
|
|
|
|
|
|
|
{ |
2255
|
4666
|
|
|
|
|
8780
|
$context = $fast_scan_context{$tok}; |
2256
|
|
|
|
|
|
|
|
2257
|
|
|
|
|
|
|
# look for $var, @var, ... |
2258
|
4666
|
100
|
100
|
|
|
11470
|
if ( $rtoken_type->[$i_plus_1] eq 'w' ) { |
|
|
100
|
66
|
|
|
|
|
2259
|
4378
|
|
|
|
|
7355
|
my $pretype_next = EMPTY_STRING; |
2260
|
4378
|
100
|
|
|
|
9429
|
if ( $i_plus_1 < $max_token_index ) { |
2261
|
4262
|
|
|
|
|
7113
|
my $i_next = $i_plus_1 + 1; |
2262
|
4262
|
100
|
100
|
|
|
13989
|
if ( $rtoken_type->[$i_next] eq 'b' |
2263
|
|
|
|
|
|
|
&& $i_next < $max_token_index ) |
2264
|
|
|
|
|
|
|
{ |
2265
|
1699
|
|
|
|
|
2902
|
$i_next += 1; |
2266
|
|
|
|
|
|
|
} |
2267
|
4262
|
|
|
|
|
7283
|
$pretype_next = $rtoken_type->[$i_next]; |
2268
|
|
|
|
|
|
|
} |
2269
|
4378
|
100
|
100
|
|
|
15813
|
if ( $pretype_next ne ':' && $pretype_next ne "'" ) { |
2270
|
|
|
|
|
|
|
|
2271
|
|
|
|
|
|
|
# Found type 'i' like '$var', '@var', or '%var' |
2272
|
4270
|
|
|
|
|
8548
|
$identifier = $tok . $rtokens->[$i_plus_1]; |
2273
|
4270
|
|
|
|
|
6820
|
$tok = $identifier; |
2274
|
4270
|
|
|
|
|
6984
|
$type = 'i'; |
2275
|
4270
|
|
|
|
|
6107
|
$i = $i_plus_1; |
2276
|
4270
|
|
|
|
|
7201
|
$fast_scan_type = $type; |
2277
|
|
|
|
|
|
|
} |
2278
|
|
|
|
|
|
|
} |
2279
|
|
|
|
|
|
|
|
2280
|
|
|
|
|
|
|
# Look for @{ or %{ . |
2281
|
|
|
|
|
|
|
# But we must let the full scanner handle things ${ because it may |
2282
|
|
|
|
|
|
|
# keep going to get a complete identifier like '${#}' . |
2283
|
|
|
|
|
|
|
elsif ( |
2284
|
|
|
|
|
|
|
$rtoken_type->[$i_plus_1] eq '{' |
2285
|
|
|
|
|
|
|
&& ( $tok_begin eq '@' |
2286
|
|
|
|
|
|
|
|| $tok_begin eq '%' ) |
2287
|
|
|
|
|
|
|
) |
2288
|
|
|
|
|
|
|
{ |
2289
|
|
|
|
|
|
|
|
2290
|
30
|
|
|
|
|
81
|
$identifier = $tok; |
2291
|
30
|
|
|
|
|
63
|
$type = 't'; |
2292
|
30
|
|
|
|
|
62
|
$fast_scan_type = $type; |
2293
|
|
|
|
|
|
|
} |
2294
|
|
|
|
|
|
|
} |
2295
|
|
|
|
|
|
|
|
2296
|
|
|
|
|
|
|
#--------------------------- |
2297
|
|
|
|
|
|
|
# Quick scan with leading -> |
2298
|
|
|
|
|
|
|
# Look for ->[ and ->{ |
2299
|
|
|
|
|
|
|
#--------------------------- |
2300
|
|
|
|
|
|
|
elsif ( |
2301
|
|
|
|
|
|
|
$tok eq '->' |
2302
|
|
|
|
|
|
|
&& $i < $max_token_index |
2303
|
|
|
|
|
|
|
&& ( $rtokens->[$i_plus_1] eq '{' |
2304
|
|
|
|
|
|
|
|| $rtokens->[$i_plus_1] eq '[' ) |
2305
|
|
|
|
|
|
|
) |
2306
|
|
|
|
|
|
|
{ |
2307
|
0
|
|
|
|
|
0
|
$type = $tok; |
2308
|
0
|
|
|
|
|
0
|
$fast_scan_type = $type; |
2309
|
0
|
|
|
|
|
0
|
$identifier = $tok; |
2310
|
0
|
|
|
|
|
0
|
$context = UNKNOWN_CONTEXT; |
2311
|
|
|
|
|
|
|
} |
2312
|
|
|
|
|
|
|
|
2313
|
|
|
|
|
|
|
#-------------------------------------- |
2314
|
|
|
|
|
|
|
# Verify correctness during development |
2315
|
|
|
|
|
|
|
#-------------------------------------- |
2316
|
4779
|
|
|
|
|
6730
|
if ( VERIFY_FASTSCAN && $fast_scan_type ) { |
2317
|
|
|
|
|
|
|
|
2318
|
|
|
|
|
|
|
# We will call the full method |
2319
|
|
|
|
|
|
|
my $identifier_simple = $identifier; |
2320
|
|
|
|
|
|
|
my $tok_simple = $tok; |
2321
|
|
|
|
|
|
|
my $i_simple = $i; |
2322
|
|
|
|
|
|
|
my $context_simple = $context; |
2323
|
|
|
|
|
|
|
|
2324
|
|
|
|
|
|
|
$tok = $tok_begin; |
2325
|
|
|
|
|
|
|
$i = $i_begin; |
2326
|
|
|
|
|
|
|
$self->scan_identifier(); |
2327
|
|
|
|
|
|
|
|
2328
|
|
|
|
|
|
|
if ( $tok ne $tok_simple |
2329
|
|
|
|
|
|
|
|| $type ne $fast_scan_type |
2330
|
|
|
|
|
|
|
|| $i != $i_simple |
2331
|
|
|
|
|
|
|
|| $identifier ne $identifier_simple |
2332
|
|
|
|
|
|
|
|| $id_scan_state |
2333
|
|
|
|
|
|
|
|| $context ne $context_simple ) |
2334
|
|
|
|
|
|
|
{ |
2335
|
|
|
|
|
|
|
print STDERR <<EOM; |
2336
|
|
|
|
|
|
|
scan_simple_identifier differs from scan_identifier: |
2337
|
|
|
|
|
|
|
simple: i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple |
2338
|
|
|
|
|
|
|
full: i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state |
2339
|
|
|
|
|
|
|
EOM |
2340
|
|
|
|
|
|
|
} |
2341
|
|
|
|
|
|
|
} |
2342
|
|
|
|
|
|
|
|
2343
|
|
|
|
|
|
|
#------------------------------------------------- |
2344
|
|
|
|
|
|
|
# call full scanner if fast method did not succeed |
2345
|
|
|
|
|
|
|
#------------------------------------------------- |
2346
|
4779
|
100
|
|
|
|
10150
|
if ( !$fast_scan_type ) { |
2347
|
479
|
|
|
|
|
1798
|
$self->scan_identifier(); |
2348
|
|
|
|
|
|
|
} |
2349
|
4779
|
|
|
|
|
7961
|
return; |
2350
|
|
|
|
|
|
|
} ## end sub scan_simple_identifier |
2351
|
|
|
|
|
|
|
|
2352
|
|
|
|
|
|
|
sub method_ok_here { |
2353
|
|
|
|
|
|
|
|
2354
|
|
|
|
|
|
|
# Return: |
2355
|
|
|
|
|
|
|
# false if this is definitely an invalid method declaration |
2356
|
|
|
|
|
|
|
# true otherwise (even if not sure) |
2357
|
|
|
|
|
|
|
|
2358
|
|
|
|
|
|
|
# We are trying to avoid problems with old uses of 'method' |
2359
|
|
|
|
|
|
|
# when --use-feature=class is set (rt145706). |
2360
|
|
|
|
|
|
|
# For example, this should cause a return of 'false': |
2361
|
|
|
|
|
|
|
|
2362
|
|
|
|
|
|
|
# method paint => sub { |
2363
|
|
|
|
|
|
|
# return; |
2364
|
|
|
|
|
|
|
# }; |
2365
|
|
|
|
|
|
|
|
2366
|
8
|
|
|
8
|
0
|
21
|
my $self = shift; |
2367
|
|
|
|
|
|
|
|
2368
|
|
|
|
|
|
|
# from do_scan_sub: |
2369
|
8
|
|
|
|
|
18
|
my $i_beg = $i + 1; |
2370
|
8
|
|
|
|
|
22
|
my $pos_beg = $rtoken_map->[$i_beg]; |
2371
|
8
|
|
|
|
|
24
|
pos($input_line) = $pos_beg; |
2372
|
|
|
|
|
|
|
|
2373
|
|
|
|
|
|
|
# TEST 1: look a valid sub NAME |
2374
|
8
|
50
|
|
|
|
54
|
if ( |
2375
|
|
|
|
|
|
|
$input_line =~ m/\G\s* |
2376
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
2377
|
|
|
|
|
|
|
(\w+) # NAME - required |
2378
|
|
|
|
|
|
|
/gcx |
2379
|
|
|
|
|
|
|
) |
2380
|
|
|
|
|
|
|
{ |
2381
|
|
|
|
|
|
|
# For possible future use.. |
2382
|
8
|
|
|
|
|
25
|
my $subname = $2; |
2383
|
8
|
50
|
|
|
|
33
|
my $package = $1 ? $1 : EMPTY_STRING; |
2384
|
|
|
|
|
|
|
} |
2385
|
|
|
|
|
|
|
else { |
2386
|
0
|
|
|
|
|
0
|
return; |
2387
|
|
|
|
|
|
|
} |
2388
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
# TEST 2: look for invalid characters after name, such as here: |
2390
|
|
|
|
|
|
|
# method paint => sub { |
2391
|
|
|
|
|
|
|
# ... |
2392
|
|
|
|
|
|
|
# } |
2393
|
8
|
|
|
|
|
19
|
my $next_char = EMPTY_STRING; |
2394
|
8
|
100
|
|
|
|
38
|
if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 } |
|
7
|
|
|
|
|
21
|
|
2395
|
8
|
100
|
66
|
|
|
48
|
if ( !$next_char || $next_char eq '#' ) { |
2396
|
1
|
|
|
|
|
14
|
( $next_char, my $i_next ) = |
2397
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
2398
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
2399
|
|
|
|
|
|
|
} |
2400
|
|
|
|
|
|
|
|
2401
|
8
|
50
|
|
|
|
27
|
if ( !$next_char ) { |
2402
|
|
|
|
|
|
|
|
2403
|
|
|
|
|
|
|
# out of characters - give up |
2404
|
0
|
|
|
|
|
0
|
return; |
2405
|
|
|
|
|
|
|
} |
2406
|
|
|
|
|
|
|
|
2407
|
|
|
|
|
|
|
# Possibly valid next token types: |
2408
|
|
|
|
|
|
|
# '(' could start prototype or signature |
2409
|
|
|
|
|
|
|
# ':' could start ATTRIBUTE |
2410
|
|
|
|
|
|
|
# '{' cold start BLOCK |
2411
|
|
|
|
|
|
|
# ';' or '}' could end a statement |
2412
|
8
|
100
|
|
|
|
30
|
if ( $next_char !~ /^[\(\:\{\;\}]/ ) { |
2413
|
|
|
|
|
|
|
|
2414
|
|
|
|
|
|
|
# This does not match use feature 'class' syntax |
2415
|
3
|
|
|
|
|
13
|
return; |
2416
|
|
|
|
|
|
|
} |
2417
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
# We will stop here and assume that this is valid syntax for |
2419
|
|
|
|
|
|
|
# use feature 'class'. |
2420
|
5
|
|
|
|
|
25
|
return 1; |
2421
|
|
|
|
|
|
|
} ## end sub method_ok_here |
2422
|
|
|
|
|
|
|
|
2423
|
|
|
|
|
|
|
sub class_ok_here { |
2424
|
|
|
|
|
|
|
|
2425
|
|
|
|
|
|
|
# Return: |
2426
|
|
|
|
|
|
|
# false if this is definitely an invalid class declaration |
2427
|
|
|
|
|
|
|
# true otherwise (even if not sure) |
2428
|
|
|
|
|
|
|
|
2429
|
|
|
|
|
|
|
# We are trying to avoid problems with old uses of 'class' |
2430
|
|
|
|
|
|
|
# when --use-feature=class is set (rt145706). We look ahead |
2431
|
|
|
|
|
|
|
# see if this use of 'class' is obviously inconsistent with |
2432
|
|
|
|
|
|
|
# the syntax of use feature 'class'. This allows the default |
2433
|
|
|
|
|
|
|
# setting --use-feature=class to work for old syntax too. |
2434
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
# Valid class declarations look like |
2436
|
|
|
|
|
|
|
# class NAME ?ATTRS ?VERSION ?BLOCK |
2437
|
|
|
|
|
|
|
# where ATTRS VERSION and BLOCK are optional |
2438
|
|
|
|
|
|
|
|
2439
|
|
|
|
|
|
|
# For example, this should produce a return of 'false': |
2440
|
|
|
|
|
|
|
# |
2441
|
|
|
|
|
|
|
# class ExtendsBasicAttributes is BasicAttributes{ |
2442
|
|
|
|
|
|
|
|
2443
|
6
|
|
|
6
|
0
|
12
|
my $self = shift; |
2444
|
|
|
|
|
|
|
|
2445
|
|
|
|
|
|
|
# TEST 1: class stmt can only go where a new statment can start |
2446
|
6
|
50
|
|
|
|
18
|
if ( !new_statement_ok() ) { return } |
|
0
|
|
|
|
|
0
|
|
2447
|
|
|
|
|
|
|
|
2448
|
6
|
|
|
|
|
12
|
my $i_beg = $i + 1; |
2449
|
6
|
|
|
|
|
15
|
my $pos_beg = $rtoken_map->[$i_beg]; |
2450
|
6
|
|
|
|
|
19
|
pos($input_line) = $pos_beg; |
2451
|
|
|
|
|
|
|
|
2452
|
|
|
|
|
|
|
# TEST 2: look for a valid NAME |
2453
|
6
|
50
|
|
|
|
35
|
if ( |
2454
|
|
|
|
|
|
|
$input_line =~ m/\G\s* |
2455
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
2456
|
|
|
|
|
|
|
(\w+) # NAME - required |
2457
|
|
|
|
|
|
|
/gcx |
2458
|
|
|
|
|
|
|
) |
2459
|
|
|
|
|
|
|
{ |
2460
|
|
|
|
|
|
|
# For possible future use.. |
2461
|
6
|
|
|
|
|
20
|
my $subname = $2; |
2462
|
6
|
100
|
|
|
|
20
|
my $package = $1 ? $1 : EMPTY_STRING; |
2463
|
|
|
|
|
|
|
} |
2464
|
|
|
|
|
|
|
else { |
2465
|
0
|
|
|
|
|
0
|
return; |
2466
|
|
|
|
|
|
|
} |
2467
|
|
|
|
|
|
|
|
2468
|
|
|
|
|
|
|
# TEST 3: look for valid characters after NAME |
2469
|
6
|
|
|
|
|
10
|
my $next_char = EMPTY_STRING; |
2470
|
6
|
100
|
|
|
|
20
|
if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 } |
|
5
|
|
|
|
|
18
|
|
2471
|
6
|
100
|
66
|
|
|
27
|
if ( !$next_char || $next_char eq '#' ) { |
2472
|
1
|
|
|
|
|
4
|
( $next_char, my $i_next ) = |
2473
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
2474
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
2475
|
|
|
|
|
|
|
} |
2476
|
6
|
50
|
|
|
|
17
|
if ( !$next_char ) { |
2477
|
|
|
|
|
|
|
|
2478
|
|
|
|
|
|
|
# out of characters - give up |
2479
|
0
|
|
|
|
|
0
|
return; |
2480
|
|
|
|
|
|
|
} |
2481
|
|
|
|
|
|
|
|
2482
|
|
|
|
|
|
|
# Must see one of: ATTRIBUTE, VERSION, BLOCK, or end stmt |
2483
|
|
|
|
|
|
|
|
2484
|
|
|
|
|
|
|
# Possibly valid next token types: |
2485
|
|
|
|
|
|
|
# ':' could start ATTRIBUTE |
2486
|
|
|
|
|
|
|
# '\d' could start VERSION |
2487
|
|
|
|
|
|
|
# '{' cold start BLOCK |
2488
|
|
|
|
|
|
|
# ';' could end a statement |
2489
|
|
|
|
|
|
|
# '}' could end statement but would be strange |
2490
|
|
|
|
|
|
|
|
2491
|
6
|
100
|
|
|
|
22
|
if ( $next_char !~ /^[\:\d\{\;\}]/ ) { |
2492
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
# This does not match use feature 'class' syntax |
2494
|
2
|
|
|
|
|
9
|
return; |
2495
|
|
|
|
|
|
|
} |
2496
|
|
|
|
|
|
|
|
2497
|
|
|
|
|
|
|
# We will stop here and assume that this is valid syntax for |
2498
|
|
|
|
|
|
|
# use feature 'class'. |
2499
|
4
|
|
|
|
|
17
|
return 1; |
2500
|
|
|
|
|
|
|
} ## end sub class_ok_here |
2501
|
|
|
|
|
|
|
|
2502
|
|
|
|
|
|
|
sub scan_id { |
2503
|
330
|
|
|
330
|
0
|
706
|
my $self = shift; |
2504
|
330
|
|
|
|
|
1299
|
( $i, $tok, $type, $id_scan_state ) = |
2505
|
|
|
|
|
|
|
$self->scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, |
2506
|
|
|
|
|
|
|
$id_scan_state, $max_token_index ); |
2507
|
330
|
|
|
|
|
818
|
return; |
2508
|
|
|
|
|
|
|
} ## end sub scan_id |
2509
|
|
|
|
|
|
|
|
2510
|
|
|
|
|
|
|
sub scan_number { |
2511
|
629
|
|
|
629
|
0
|
1134
|
my $self = shift; |
2512
|
629
|
|
|
|
|
1021
|
my $number; |
2513
|
629
|
|
|
|
|
1848
|
( $i, $type, $number ) = |
2514
|
|
|
|
|
|
|
$self->scan_number_do( $input_line, $i, $rtoken_map, $type, |
2515
|
|
|
|
|
|
|
$max_token_index ); |
2516
|
629
|
|
|
|
|
1530
|
return $number; |
2517
|
|
|
|
|
|
|
} ## end sub scan_number |
2518
|
|
|
|
|
|
|
|
2519
|
38
|
|
|
38
|
|
1630
|
use constant VERIFY_FASTNUM => 0; |
|
38
|
|
|
|
|
98
|
|
|
38
|
|
|
|
|
351150
|
|
2520
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
sub scan_number_fast { |
2522
|
|
|
|
|
|
|
|
2523
|
|
|
|
|
|
|
# This is a wrapper for sub scan_number. It does a fast preliminary |
2524
|
|
|
|
|
|
|
# scan for a simple integer. It calls the original scan_number if it |
2525
|
|
|
|
|
|
|
# does not find one. |
2526
|
|
|
|
|
|
|
|
2527
|
2272
|
|
|
2272
|
0
|
3565
|
my $self = shift; |
2528
|
2272
|
|
|
|
|
3440
|
my $i_begin = $i; |
2529
|
2272
|
|
|
|
|
3499
|
my $tok_begin = $tok; |
2530
|
2272
|
|
|
|
|
3248
|
my $number; |
2531
|
|
|
|
|
|
|
|
2532
|
|
|
|
|
|
|
#--------------------------------- |
2533
|
|
|
|
|
|
|
# Quick check for (signed) integer |
2534
|
|
|
|
|
|
|
#--------------------------------- |
2535
|
|
|
|
|
|
|
|
2536
|
|
|
|
|
|
|
# This will be the string of digits: |
2537
|
2272
|
|
|
|
|
3355
|
my $i_d = $i; |
2538
|
2272
|
|
|
|
|
3453
|
my $tok_d = $tok; |
2539
|
2272
|
|
|
|
|
4254
|
my $typ_d = $rtoken_type->[$i_d]; |
2540
|
|
|
|
|
|
|
|
2541
|
|
|
|
|
|
|
# check for signed integer |
2542
|
2272
|
|
|
|
|
3666
|
my $sign = EMPTY_STRING; |
2543
|
2272
|
50
|
66
|
|
|
6962
|
if ( $typ_d ne 'd' |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
2544
|
|
|
|
|
|
|
&& ( $typ_d eq '+' || $typ_d eq '-' ) |
2545
|
|
|
|
|
|
|
&& $i_d < $max_token_index ) |
2546
|
|
|
|
|
|
|
{ |
2547
|
343
|
|
|
|
|
595
|
$sign = $tok_d; |
2548
|
343
|
|
|
|
|
532
|
$i_d++; |
2549
|
343
|
|
|
|
|
592
|
$tok_d = $rtokens->[$i_d]; |
2550
|
343
|
|
|
|
|
614
|
$typ_d = $rtoken_type->[$i_d]; |
2551
|
|
|
|
|
|
|
} |
2552
|
|
|
|
|
|
|
|
2553
|
|
|
|
|
|
|
# Handle integers |
2554
|
2272
|
100
|
100
|
|
|
16419
|
if ( |
|
|
|
100
|
|
|
|
|
2555
|
|
|
|
|
|
|
$typ_d eq 'd' |
2556
|
|
|
|
|
|
|
&& ( |
2557
|
|
|
|
|
|
|
$i_d == $max_token_index |
2558
|
|
|
|
|
|
|
|| ( $i_d < $max_token_index |
2559
|
|
|
|
|
|
|
&& $rtoken_type->[ $i_d + 1 ] ne '.' |
2560
|
|
|
|
|
|
|
&& $rtoken_type->[ $i_d + 1 ] ne 'w' ) |
2561
|
|
|
|
|
|
|
) |
2562
|
|
|
|
|
|
|
) |
2563
|
|
|
|
|
|
|
{ |
2564
|
|
|
|
|
|
|
# Let let full scanner handle multi-digit integers beginning with |
2565
|
|
|
|
|
|
|
# '0' because there could be error messages. For example, '009' is |
2566
|
|
|
|
|
|
|
# not a valid number. |
2567
|
|
|
|
|
|
|
|
2568
|
1710
|
100
|
100
|
|
|
7451
|
if ( $tok_d eq '0' || substr( $tok_d, 0, 1 ) ne '0' ) { |
2569
|
1653
|
|
|
|
|
3136
|
$number = $sign . $tok_d; |
2570
|
1653
|
|
|
|
|
2652
|
$type = 'n'; |
2571
|
1653
|
|
|
|
|
2682
|
$i = $i_d; |
2572
|
|
|
|
|
|
|
} |
2573
|
|
|
|
|
|
|
} |
2574
|
|
|
|
|
|
|
|
2575
|
|
|
|
|
|
|
#-------------------------------------- |
2576
|
|
|
|
|
|
|
# Verify correctness during development |
2577
|
|
|
|
|
|
|
#-------------------------------------- |
2578
|
2272
|
|
|
|
|
3278
|
if ( VERIFY_FASTNUM && defined($number) ) { |
2579
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
# We will call the full method |
2581
|
|
|
|
|
|
|
my $type_simple = $type; |
2582
|
|
|
|
|
|
|
my $i_simple = $i; |
2583
|
|
|
|
|
|
|
my $number_simple = $number; |
2584
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
$tok = $tok_begin; |
2586
|
|
|
|
|
|
|
$i = $i_begin; |
2587
|
|
|
|
|
|
|
$number = $self->scan_number(); |
2588
|
|
|
|
|
|
|
|
2589
|
|
|
|
|
|
|
if ( $type ne $type_simple |
2590
|
|
|
|
|
|
|
|| ( $i != $i_simple && $i <= $max_token_index ) |
2591
|
|
|
|
|
|
|
|| $number ne $number_simple ) |
2592
|
|
|
|
|
|
|
{ |
2593
|
|
|
|
|
|
|
print STDERR <<EOM; |
2594
|
|
|
|
|
|
|
scan_number_fast differs from scan_number: |
2595
|
|
|
|
|
|
|
simple: i=$i_simple, type=$type_simple, number=$number_simple |
2596
|
|
|
|
|
|
|
full: i=$i, type=$type, number=$number |
2597
|
|
|
|
|
|
|
EOM |
2598
|
|
|
|
|
|
|
} |
2599
|
|
|
|
|
|
|
} |
2600
|
|
|
|
|
|
|
|
2601
|
|
|
|
|
|
|
#---------------------------------------- |
2602
|
|
|
|
|
|
|
# call full scanner if may not be integer |
2603
|
|
|
|
|
|
|
#---------------------------------------- |
2604
|
2272
|
100
|
|
|
|
4978
|
if ( !defined($number) ) { |
2605
|
619
|
|
|
|
|
1585
|
$number = $self->scan_number(); |
2606
|
|
|
|
|
|
|
} |
2607
|
2272
|
|
|
|
|
5263
|
return $number; |
2608
|
|
|
|
|
|
|
} ## end sub scan_number_fast |
2609
|
|
|
|
|
|
|
|
2610
|
|
|
|
|
|
|
# a sub to warn if token found where term expected |
2611
|
|
|
|
|
|
|
sub error_if_expecting_TERM { |
2612
|
9
|
|
|
9
|
0
|
19
|
my $self = shift; |
2613
|
9
|
50
|
|
|
|
29
|
if ( $expecting == TERM ) { |
2614
|
9
|
50
|
|
|
|
30
|
if ( $really_want_term{$last_nonblank_type} ) { |
2615
|
0
|
|
|
|
|
0
|
$self->report_unexpected( $tok, "term", $i_tok, |
2616
|
|
|
|
|
|
|
$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line ); |
2617
|
0
|
|
|
|
|
0
|
return 1; |
2618
|
|
|
|
|
|
|
} |
2619
|
|
|
|
|
|
|
} |
2620
|
9
|
|
|
|
|
18
|
return; |
2621
|
|
|
|
|
|
|
} ## end sub error_if_expecting_TERM |
2622
|
|
|
|
|
|
|
|
2623
|
|
|
|
|
|
|
# a sub to warn if token found where operator expected |
2624
|
|
|
|
|
|
|
sub error_if_expecting_OPERATOR { |
2625
|
769
|
|
|
769
|
0
|
1666
|
my ( $self, $thing ) = @_; |
2626
|
769
|
50
|
|
|
|
1833
|
if ( $expecting == OPERATOR ) { |
2627
|
0
|
0
|
|
|
|
0
|
if ( !defined($thing) ) { $thing = $tok } |
|
0
|
|
|
|
|
0
|
|
2628
|
0
|
|
|
|
|
0
|
$self->report_unexpected( $thing, "operator", $i_tok, |
2629
|
|
|
|
|
|
|
$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line ); |
2630
|
0
|
0
|
|
|
|
0
|
if ( $i_tok == 0 ) { |
2631
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
2632
|
0
|
|
|
|
|
0
|
$self->warning("Missing ';' or ',' above?\n"); |
2633
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
2634
|
|
|
|
|
|
|
} |
2635
|
0
|
|
|
|
|
0
|
return 1; |
2636
|
|
|
|
|
|
|
} |
2637
|
769
|
|
|
|
|
1517
|
return; |
2638
|
|
|
|
|
|
|
} ## end sub error_if_expecting_OPERATOR |
2639
|
|
|
|
|
|
|
|
2640
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2641
|
|
|
|
|
|
|
# end scanner interfaces |
2642
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2643
|
|
|
|
|
|
|
|
2644
|
|
|
|
|
|
|
#------------------ |
2645
|
|
|
|
|
|
|
# Tokenization subs |
2646
|
|
|
|
|
|
|
#------------------ |
2647
|
|
|
|
|
|
|
sub do_GREATER_THAN_SIGN { |
2648
|
|
|
|
|
|
|
|
2649
|
31
|
|
|
31
|
0
|
84
|
my $self = shift; |
2650
|
|
|
|
|
|
|
|
2651
|
|
|
|
|
|
|
# '>' |
2652
|
31
|
50
|
|
|
|
118
|
$self->error_if_expecting_TERM() |
2653
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
2654
|
31
|
|
|
|
|
71
|
return; |
2655
|
|
|
|
|
|
|
} ## end sub do_GREATER_THAN_SIGN |
2656
|
|
|
|
|
|
|
|
2657
|
|
|
|
|
|
|
sub do_VERTICAL_LINE { |
2658
|
|
|
|
|
|
|
|
2659
|
4
|
|
|
4
|
0
|
10
|
my $self = shift; |
2660
|
|
|
|
|
|
|
|
2661
|
|
|
|
|
|
|
# '|' |
2662
|
4
|
50
|
|
|
|
12
|
$self->error_if_expecting_TERM() |
2663
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
2664
|
4
|
|
|
|
|
7
|
return; |
2665
|
|
|
|
|
|
|
} ## end sub do_VERTICAL_LINE |
2666
|
|
|
|
|
|
|
|
2667
|
|
|
|
|
|
|
sub do_DOLLAR_SIGN { |
2668
|
|
|
|
|
|
|
|
2669
|
4024
|
|
|
4024
|
0
|
7182
|
my $self = shift; |
2670
|
|
|
|
|
|
|
|
2671
|
|
|
|
|
|
|
# '$' |
2672
|
|
|
|
|
|
|
# start looking for a scalar |
2673
|
4024
|
50
|
|
|
|
9035
|
$self->error_if_expecting_OPERATOR("Scalar") |
2674
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
2675
|
4024
|
|
|
|
|
12174
|
$self->scan_simple_identifier(); |
2676
|
|
|
|
|
|
|
|
2677
|
4024
|
100
|
|
|
|
9213
|
if ( $identifier eq '$^W' ) { |
2678
|
1
|
|
|
|
|
4
|
$self->[_saw_perl_dash_w_] = 1; |
2679
|
|
|
|
|
|
|
} |
2680
|
|
|
|
|
|
|
|
2681
|
|
|
|
|
|
|
# Check for identifier in indirect object slot |
2682
|
|
|
|
|
|
|
# (vorboard.pl, sort.t). Something like: |
2683
|
|
|
|
|
|
|
# /^(print|printf|sort|exec|system)$/ |
2684
|
4024
|
100
|
66
|
|
|
31930
|
if ( |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
2685
|
|
|
|
|
|
|
$is_indirect_object_taker{$last_nonblank_token} |
2686
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'k' |
2687
|
|
|
|
|
|
|
|| ( ( $last_nonblank_token eq '(' ) |
2688
|
|
|
|
|
|
|
&& $is_indirect_object_taker{ $rparen_type->[$paren_depth] } ) |
2689
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'w' |
2690
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'U' ) # possible object |
2691
|
|
|
|
|
|
|
) |
2692
|
|
|
|
|
|
|
{ |
2693
|
|
|
|
|
|
|
|
2694
|
|
|
|
|
|
|
# An identifier followed by '->' is not indirect object; |
2695
|
|
|
|
|
|
|
# fixes b1175, b1176 |
2696
|
98
|
|
|
|
|
647
|
my ( $next_nonblank_type, $i_next ) = |
2697
|
|
|
|
|
|
|
$self->find_next_noncomment_type( $i, $rtokens, |
2698
|
|
|
|
|
|
|
$max_token_index ); |
2699
|
98
|
100
|
|
|
|
379
|
$type = 'Z' if ( $next_nonblank_type ne '->' ); |
2700
|
|
|
|
|
|
|
} |
2701
|
4024
|
|
|
|
|
6532
|
return; |
2702
|
|
|
|
|
|
|
} ## end sub do_DOLLAR_SIGN |
2703
|
|
|
|
|
|
|
|
2704
|
|
|
|
|
|
|
sub do_LEFT_PARENTHESIS { |
2705
|
|
|
|
|
|
|
|
2706
|
2121
|
|
|
2121
|
0
|
4408
|
my $self = shift; |
2707
|
|
|
|
|
|
|
|
2708
|
|
|
|
|
|
|
# '(' |
2709
|
2121
|
|
|
|
|
3533
|
++$paren_depth; |
2710
|
2121
|
|
|
|
|
4350
|
$rparen_semicolon_count->[$paren_depth] = 0; |
2711
|
2121
|
100
|
|
|
|
6387
|
if ($want_paren) { |
|
|
100
|
|
|
|
|
|
2712
|
68
|
|
|
|
|
193
|
$container_type = $want_paren; |
2713
|
68
|
|
|
|
|
164
|
$want_paren = EMPTY_STRING; |
2714
|
|
|
|
|
|
|
} |
2715
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^sub\b/ ) { |
2716
|
14
|
|
|
|
|
42
|
$container_type = $statement_type; |
2717
|
|
|
|
|
|
|
} |
2718
|
|
|
|
|
|
|
else { |
2719
|
2039
|
|
|
|
|
3570
|
$container_type = $last_nonblank_token; |
2720
|
|
|
|
|
|
|
|
2721
|
|
|
|
|
|
|
# We can check for a syntax error here of unexpected '(', |
2722
|
|
|
|
|
|
|
# but this is going to get messy... |
2723
|
2039
|
100
|
100
|
|
|
7969
|
if ( |
2724
|
|
|
|
|
|
|
$expecting == OPERATOR |
2725
|
|
|
|
|
|
|
|
2726
|
|
|
|
|
|
|
# Be sure this is not a method call of the form |
2727
|
|
|
|
|
|
|
# &method(...), $method->(..), &{method}(...), |
2728
|
|
|
|
|
|
|
# $ref[2](list) is ok & short for $ref[2]->(list) |
2729
|
|
|
|
|
|
|
# NOTE: at present, braces in something like &{ xxx } |
2730
|
|
|
|
|
|
|
# are not marked as a block, we might have a method call. |
2731
|
|
|
|
|
|
|
# Added ')' to fix case c017, something like ()()() |
2732
|
|
|
|
|
|
|
&& $last_nonblank_token !~ /^([\]\}\)\&]|\-\>)/ |
2733
|
|
|
|
|
|
|
) |
2734
|
|
|
|
|
|
|
{ |
2735
|
|
|
|
|
|
|
|
2736
|
|
|
|
|
|
|
# ref: camel 3 p 703. |
2737
|
3
|
50
|
|
|
|
11
|
if ( $last_last_nonblank_token eq 'do' ) { |
2738
|
0
|
|
|
|
|
0
|
$self->complain( |
2739
|
|
|
|
|
|
|
"do SUBROUTINE is deprecated; consider & or -> notation\n" |
2740
|
|
|
|
|
|
|
); |
2741
|
|
|
|
|
|
|
} |
2742
|
|
|
|
|
|
|
else { |
2743
|
|
|
|
|
|
|
|
2744
|
|
|
|
|
|
|
# if this is an empty list, (), then it is not an |
2745
|
|
|
|
|
|
|
# error; for example, we might have a constant pi and |
2746
|
|
|
|
|
|
|
# invoke it with pi() or just pi; |
2747
|
3
|
|
|
|
|
10
|
my ( $next_nonblank_token, $i_next ) = |
2748
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, |
2749
|
|
|
|
|
|
|
$max_token_index ); |
2750
|
|
|
|
|
|
|
|
2751
|
|
|
|
|
|
|
# Patch for c029: give up error check if |
2752
|
|
|
|
|
|
|
# a side comment follows |
2753
|
3
|
50
|
33
|
|
|
20
|
if ( $next_nonblank_token ne ')' |
2754
|
|
|
|
|
|
|
&& $next_nonblank_token ne '#' ) |
2755
|
|
|
|
|
|
|
{ |
2756
|
0
|
|
|
|
|
0
|
my $hint; |
2757
|
|
|
|
|
|
|
|
2758
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR('('); |
2759
|
|
|
|
|
|
|
|
2760
|
0
|
0
|
|
|
|
0
|
if ( $last_nonblank_type eq 'C' ) { |
|
|
0
|
|
|
|
|
|
2761
|
0
|
|
|
|
|
0
|
$hint = |
2762
|
|
|
|
|
|
|
"$last_nonblank_token has a void prototype\n"; |
2763
|
|
|
|
|
|
|
} |
2764
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'i' ) { |
2765
|
0
|
0
|
0
|
|
|
0
|
if ( $i_tok > 0 |
2766
|
|
|
|
|
|
|
&& $last_nonblank_token =~ /^\$/ ) |
2767
|
|
|
|
|
|
|
{ |
2768
|
0
|
|
|
|
|
0
|
$hint = |
2769
|
|
|
|
|
|
|
"Do you mean '$last_nonblank_token->(' ?\n"; |
2770
|
|
|
|
|
|
|
} |
2771
|
|
|
|
|
|
|
} |
2772
|
0
|
0
|
|
|
|
0
|
if ($hint) { |
2773
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
2774
|
0
|
|
|
|
|
0
|
$self->warning($hint); |
2775
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
2776
|
|
|
|
|
|
|
} |
2777
|
|
|
|
|
|
|
} ## end if ( $next_nonblank_token... |
2778
|
|
|
|
|
|
|
} ## end else [ if ( $last_last_nonblank_token... |
2779
|
|
|
|
|
|
|
} ## end if ( $expecting == OPERATOR... |
2780
|
|
|
|
|
|
|
} |
2781
|
|
|
|
|
|
|
|
2782
|
|
|
|
|
|
|
# Do not update container type at ') ('; fix for git #105. This will |
2783
|
|
|
|
|
|
|
# propagate the container type onward so that any subsequent brace gets |
2784
|
|
|
|
|
|
|
# correctly marked. I have implemented this as a general rule, which |
2785
|
|
|
|
|
|
|
# should be safe, but if necessary it could be restricted to certain |
2786
|
|
|
|
|
|
|
# container statement types such as 'for'. |
2787
|
2121
|
100
|
|
|
|
6272
|
$rparen_type->[$paren_depth] = $container_type |
2788
|
|
|
|
|
|
|
if ( $last_nonblank_token ne ')' ); |
2789
|
|
|
|
|
|
|
|
2790
|
2121
|
|
|
|
|
6422
|
( $type_sequence, $indent_flag ) = |
2791
|
|
|
|
|
|
|
$self->increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); |
2792
|
|
|
|
|
|
|
|
2793
|
|
|
|
|
|
|
# propagate types down through nested parens |
2794
|
|
|
|
|
|
|
# for example: the second paren in 'if ((' would be structural |
2795
|
|
|
|
|
|
|
# since the first is. |
2796
|
|
|
|
|
|
|
|
2797
|
2121
|
100
|
|
|
|
5635
|
if ( $last_nonblank_token eq '(' ) { |
2798
|
61
|
|
|
|
|
233
|
$type = $last_nonblank_type; |
2799
|
|
|
|
|
|
|
} |
2800
|
|
|
|
|
|
|
|
2801
|
|
|
|
|
|
|
# We exclude parens as structural after a ',' because it |
2802
|
|
|
|
|
|
|
# causes subtle problems with continuation indentation for |
2803
|
|
|
|
|
|
|
# something like this, where the first 'or' will not get |
2804
|
|
|
|
|
|
|
# indented. |
2805
|
|
|
|
|
|
|
# |
2806
|
|
|
|
|
|
|
# assert( |
2807
|
|
|
|
|
|
|
# __LINE__, |
2808
|
|
|
|
|
|
|
# ( not defined $check ) |
2809
|
|
|
|
|
|
|
# or ref $check |
2810
|
|
|
|
|
|
|
# or $check eq "new" |
2811
|
|
|
|
|
|
|
# or $check eq "old", |
2812
|
|
|
|
|
|
|
# ); |
2813
|
|
|
|
|
|
|
# |
2814
|
|
|
|
|
|
|
# Likewise, we exclude parens where a statement can start |
2815
|
|
|
|
|
|
|
# because of problems with continuation indentation, like |
2816
|
|
|
|
|
|
|
# these: |
2817
|
|
|
|
|
|
|
# |
2818
|
|
|
|
|
|
|
# ($firstline =~ /^#\!.*perl/) |
2819
|
|
|
|
|
|
|
# and (print $File::Find::name, "\n") |
2820
|
|
|
|
|
|
|
# and (return 1); |
2821
|
|
|
|
|
|
|
# |
2822
|
|
|
|
|
|
|
# (ref($usage_fref) =~ /CODE/) |
2823
|
|
|
|
|
|
|
# ? &$usage_fref |
2824
|
|
|
|
|
|
|
# : (&blast_usage, &blast_params, &blast_general_params); |
2825
|
|
|
|
|
|
|
|
2826
|
|
|
|
|
|
|
else { |
2827
|
2060
|
|
|
|
|
3641
|
$type = '{'; |
2828
|
|
|
|
|
|
|
} |
2829
|
|
|
|
|
|
|
|
2830
|
2121
|
50
|
|
|
|
5061
|
if ( $last_nonblank_type eq ')' ) { |
2831
|
0
|
|
|
|
|
0
|
$self->warning( |
2832
|
|
|
|
|
|
|
"Syntax error? found token '$last_nonblank_type' then '('\n"); |
2833
|
|
|
|
|
|
|
} |
2834
|
2121
|
|
|
|
|
4115
|
$rparen_structural_type->[$paren_depth] = $type; |
2835
|
2121
|
|
|
|
|
3477
|
return; |
2836
|
|
|
|
|
|
|
|
2837
|
|
|
|
|
|
|
} ## end sub do_LEFT_PARENTHESIS |
2838
|
|
|
|
|
|
|
|
2839
|
|
|
|
|
|
|
sub do_RIGHT_PARENTHESIS { |
2840
|
|
|
|
|
|
|
|
2841
|
2121
|
|
|
2121
|
0
|
4395
|
my $self = shift; |
2842
|
|
|
|
|
|
|
|
2843
|
|
|
|
|
|
|
# ')' |
2844
|
2121
|
|
|
|
|
6662
|
( $type_sequence, $indent_flag ) = |
2845
|
|
|
|
|
|
|
$self->decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); |
2846
|
|
|
|
|
|
|
|
2847
|
2121
|
50
|
|
|
|
6339
|
if ( $rparen_structural_type->[$paren_depth] eq '{' ) { |
2848
|
2121
|
|
|
|
|
3736
|
$type = '}'; |
2849
|
|
|
|
|
|
|
} |
2850
|
|
|
|
|
|
|
|
2851
|
2121
|
|
|
|
|
4074
|
$container_type = $rparen_type->[$paren_depth]; |
2852
|
|
|
|
|
|
|
|
2853
|
|
|
|
|
|
|
# restore statement type as 'sub' at closing paren of a signature |
2854
|
|
|
|
|
|
|
# so that a subsequent ':' is identified as an attribute |
2855
|
2121
|
100
|
|
|
|
6167
|
if ( $container_type =~ /^sub\b/ ) { |
2856
|
24
|
|
|
|
|
70
|
$statement_type = $container_type; |
2857
|
|
|
|
|
|
|
} |
2858
|
|
|
|
|
|
|
|
2859
|
|
|
|
|
|
|
# /^(for|foreach)$/ |
2860
|
2121
|
100
|
|
|
|
5771
|
if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) { |
2861
|
69
|
|
|
|
|
238
|
my $num_sc = $rparen_semicolon_count->[$paren_depth]; |
2862
|
69
|
50
|
66
|
|
|
486
|
if ( $num_sc > 0 && $num_sc != 2 ) { |
2863
|
0
|
|
|
|
|
0
|
$self->warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); |
2864
|
|
|
|
|
|
|
} |
2865
|
|
|
|
|
|
|
} |
2866
|
|
|
|
|
|
|
|
2867
|
2121
|
50
|
|
|
|
4920
|
if ( $paren_depth > 0 ) { $paren_depth-- } |
|
2121
|
|
|
|
|
3241
|
|
2868
|
2121
|
|
|
|
|
3548
|
return; |
2869
|
|
|
|
|
|
|
} ## end sub do_RIGHT_PARENTHESIS |
2870
|
|
|
|
|
|
|
|
2871
|
|
|
|
|
|
|
sub do_COMMA { |
2872
|
|
|
|
|
|
|
|
2873
|
3075
|
|
|
3075
|
0
|
5166
|
my $self = shift; |
2874
|
|
|
|
|
|
|
|
2875
|
|
|
|
|
|
|
# ',' |
2876
|
3075
|
100
|
33
|
|
|
11169
|
if ( $last_nonblank_type eq ',' ) { |
|
|
50
|
|
|
|
|
|
2877
|
10
|
|
|
|
|
62
|
$self->complain("Repeated ','s \n"); |
2878
|
|
|
|
|
|
|
} |
2879
|
|
|
|
|
|
|
|
2880
|
|
|
|
|
|
|
# Note that we have to check both token and type here because a |
2881
|
|
|
|
|
|
|
# comma following a qw list can have last token='(' but type = 'q' |
2882
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) { |
2883
|
0
|
|
|
|
|
0
|
$self->warning("Unexpected leading ',' after a '('\n"); |
2884
|
|
|
|
|
|
|
} |
2885
|
|
|
|
|
|
|
|
2886
|
|
|
|
|
|
|
# patch for operator_expected: note if we are in the list (use.t) |
2887
|
3075
|
100
|
|
|
|
6430
|
if ( $statement_type eq 'use' ) { $statement_type = '_use' } |
|
6
|
|
|
|
|
15
|
|
2888
|
3075
|
|
|
|
|
4684
|
return; |
2889
|
|
|
|
|
|
|
|
2890
|
|
|
|
|
|
|
} ## end sub do_COMMA |
2891
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
sub do_SEMICOLON { |
2893
|
|
|
|
|
|
|
|
2894
|
2448
|
|
|
2448
|
0
|
4848
|
my $self = shift; |
2895
|
|
|
|
|
|
|
|
2896
|
|
|
|
|
|
|
# ';' |
2897
|
2448
|
|
|
|
|
3965
|
$context = UNKNOWN_CONTEXT; |
2898
|
2448
|
|
|
|
|
4033
|
$statement_type = EMPTY_STRING; |
2899
|
2448
|
|
|
|
|
4028
|
$want_paren = EMPTY_STRING; |
2900
|
|
|
|
|
|
|
|
2901
|
|
|
|
|
|
|
# /^(for|foreach)$/ |
2902
|
2448
|
100
|
|
|
|
7086
|
if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) |
2903
|
|
|
|
|
|
|
{ # mark ; in for loop |
2904
|
|
|
|
|
|
|
|
2905
|
|
|
|
|
|
|
# Be careful: we do not want a semicolon such as the |
2906
|
|
|
|
|
|
|
# following to be included: |
2907
|
|
|
|
|
|
|
# |
2908
|
|
|
|
|
|
|
# for (sort {strcoll($a,$b);} keys %investments) { |
2909
|
|
|
|
|
|
|
|
2910
|
35
|
100
|
66
|
|
|
240
|
if ( $brace_depth == $rdepth_array->[PAREN][BRACE][$paren_depth] |
2911
|
|
|
|
|
|
|
&& $square_bracket_depth == |
2912
|
|
|
|
|
|
|
$rdepth_array->[PAREN][SQUARE_BRACKET][$paren_depth] ) |
2913
|
|
|
|
|
|
|
{ |
2914
|
|
|
|
|
|
|
|
2915
|
34
|
|
|
|
|
68
|
$type = 'f'; |
2916
|
34
|
|
|
|
|
76
|
$rparen_semicolon_count->[$paren_depth]++; |
2917
|
|
|
|
|
|
|
} |
2918
|
|
|
|
|
|
|
} |
2919
|
2448
|
|
|
|
|
4654
|
return; |
2920
|
|
|
|
|
|
|
} ## end sub do_SEMICOLON |
2921
|
|
|
|
|
|
|
|
2922
|
|
|
|
|
|
|
sub do_QUOTATION_MARK { |
2923
|
|
|
|
|
|
|
|
2924
|
1124
|
|
|
1124
|
0
|
2276
|
my $self = shift; |
2925
|
|
|
|
|
|
|
|
2926
|
|
|
|
|
|
|
# '"' |
2927
|
1124
|
50
|
|
|
|
2799
|
$self->error_if_expecting_OPERATOR("String") |
2928
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
2929
|
1124
|
|
|
|
|
1838
|
$in_quote = 1; |
2930
|
1124
|
|
|
|
|
1934
|
$type = 'Q'; |
2931
|
1124
|
|
|
|
|
1877
|
$allowed_quote_modifiers = EMPTY_STRING; |
2932
|
1124
|
|
|
|
|
1824
|
return; |
2933
|
|
|
|
|
|
|
} ## end sub do_QUOTATION_MARK |
2934
|
|
|
|
|
|
|
|
2935
|
|
|
|
|
|
|
sub do_APOSTROPHE { |
2936
|
|
|
|
|
|
|
|
2937
|
1160
|
|
|
1160
|
0
|
2243
|
my $self = shift; |
2938
|
|
|
|
|
|
|
|
2939
|
|
|
|
|
|
|
# "'" |
2940
|
1160
|
50
|
|
|
|
2861
|
$self->error_if_expecting_OPERATOR("String") |
2941
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
2942
|
1160
|
|
|
|
|
1944
|
$in_quote = 1; |
2943
|
1160
|
|
|
|
|
1968
|
$type = 'Q'; |
2944
|
1160
|
|
|
|
|
1881
|
$allowed_quote_modifiers = EMPTY_STRING; |
2945
|
1160
|
|
|
|
|
1879
|
return; |
2946
|
|
|
|
|
|
|
} ## end sub do_APOSTROPHE |
2947
|
|
|
|
|
|
|
|
2948
|
|
|
|
|
|
|
sub do_BACKTICK { |
2949
|
|
|
|
|
|
|
|
2950
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
2951
|
|
|
|
|
|
|
|
2952
|
|
|
|
|
|
|
# '`' |
2953
|
0
|
0
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR("String") |
2954
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
2955
|
0
|
|
|
|
|
0
|
$in_quote = 1; |
2956
|
0
|
|
|
|
|
0
|
$type = 'Q'; |
2957
|
0
|
|
|
|
|
0
|
$allowed_quote_modifiers = EMPTY_STRING; |
2958
|
0
|
|
|
|
|
0
|
return; |
2959
|
|
|
|
|
|
|
} ## end sub do_BACKTICK |
2960
|
|
|
|
|
|
|
|
2961
|
|
|
|
|
|
|
sub do_SLASH { |
2962
|
|
|
|
|
|
|
|
2963
|
207
|
|
|
207
|
0
|
477
|
my $self = shift; |
2964
|
|
|
|
|
|
|
|
2965
|
|
|
|
|
|
|
# '/' |
2966
|
207
|
|
|
|
|
384
|
my $is_pattern; |
2967
|
|
|
|
|
|
|
|
2968
|
|
|
|
|
|
|
# a pattern cannot follow certain keywords which take optional |
2969
|
|
|
|
|
|
|
# arguments, like 'shift' and 'pop'. See also '?'. |
2970
|
207
|
50
|
66
|
|
|
1127
|
if ( |
|
|
50
|
|
|
|
|
|
2971
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
2972
|
|
|
|
|
|
|
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ |
2973
|
|
|
|
|
|
|
$last_nonblank_token} |
2974
|
|
|
|
|
|
|
) |
2975
|
|
|
|
|
|
|
{ |
2976
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
2977
|
|
|
|
|
|
|
} |
2978
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess.. |
2979
|
0
|
|
|
|
|
0
|
my $msg; |
2980
|
0
|
|
|
|
|
0
|
( $is_pattern, $msg ) = |
2981
|
|
|
|
|
|
|
$self->guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, |
2982
|
|
|
|
|
|
|
$max_token_index ); |
2983
|
|
|
|
|
|
|
|
2984
|
0
|
0
|
|
|
|
0
|
if ($msg) { |
2985
|
0
|
|
|
|
|
0
|
$self->write_diagnostics("DIVIDE:$msg\n"); |
2986
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry($msg); |
2987
|
|
|
|
|
|
|
} |
2988
|
|
|
|
|
|
|
} |
2989
|
207
|
|
|
|
|
711
|
else { $is_pattern = ( $expecting == TERM ) } |
2990
|
|
|
|
|
|
|
|
2991
|
207
|
100
|
|
|
|
498
|
if ($is_pattern) { |
2992
|
78
|
|
|
|
|
152
|
$in_quote = 1; |
2993
|
78
|
|
|
|
|
151
|
$type = 'Q'; |
2994
|
78
|
|
|
|
|
156
|
$allowed_quote_modifiers = '[msixpodualngc]'; |
2995
|
|
|
|
|
|
|
} |
2996
|
|
|
|
|
|
|
else { # not a pattern; check for a /= token |
2997
|
|
|
|
|
|
|
|
2998
|
129
|
50
|
|
|
|
409
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /= |
2999
|
0
|
|
|
|
|
0
|
$i++; |
3000
|
0
|
|
|
|
|
0
|
$tok = '/='; |
3001
|
0
|
|
|
|
|
0
|
$type = $tok; |
3002
|
|
|
|
|
|
|
} |
3003
|
|
|
|
|
|
|
|
3004
|
|
|
|
|
|
|
#DEBUG - collecting info on what tokens follow a divide |
3005
|
|
|
|
|
|
|
# for development of guessing algorithm |
3006
|
|
|
|
|
|
|
## if ( |
3007
|
|
|
|
|
|
|
## $self->is_possible_numerator( $i, $rtokens, |
3008
|
|
|
|
|
|
|
## $max_token_index ) < 0 |
3009
|
|
|
|
|
|
|
## ) |
3010
|
|
|
|
|
|
|
## { |
3011
|
|
|
|
|
|
|
## $self->write_diagnostics("DIVIDE? $input_line\n"); |
3012
|
|
|
|
|
|
|
## } |
3013
|
|
|
|
|
|
|
} |
3014
|
207
|
|
|
|
|
429
|
return; |
3015
|
|
|
|
|
|
|
} ## end sub do_SLASH |
3016
|
|
|
|
|
|
|
|
3017
|
|
|
|
|
|
|
sub do_LEFT_CURLY_BRACKET { |
3018
|
|
|
|
|
|
|
|
3019
|
1659
|
|
|
1659
|
0
|
3236
|
my $self = shift; |
3020
|
|
|
|
|
|
|
|
3021
|
|
|
|
|
|
|
# '{' |
3022
|
|
|
|
|
|
|
# if we just saw a ')', we will label this block with |
3023
|
|
|
|
|
|
|
# its type. We need to do this to allow sub |
3024
|
|
|
|
|
|
|
# code_block_type to determine if this brace starts a |
3025
|
|
|
|
|
|
|
# code block or anonymous hash. (The type of a paren |
3026
|
|
|
|
|
|
|
# pair is the preceding token, such as 'if', 'else', |
3027
|
|
|
|
|
|
|
# etc). |
3028
|
1659
|
|
|
|
|
3006
|
$container_type = EMPTY_STRING; |
3029
|
|
|
|
|
|
|
|
3030
|
|
|
|
|
|
|
# ATTRS: for a '{' following an attribute list, reset |
3031
|
|
|
|
|
|
|
# things to look like we just saw the sub name |
3032
|
|
|
|
|
|
|
# Added 'package' (can be 'class') for --use-feature=class (rt145706) |
3033
|
1659
|
100
|
100
|
|
|
13756
|
if ( $statement_type =~ /^(sub|package)\b/ ) { |
|
|
50
|
66
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
50
|
|
|
|
|
|
3034
|
38
|
|
|
|
|
94
|
$last_nonblank_token = $statement_type; |
3035
|
38
|
|
|
|
|
81
|
$last_nonblank_type = 'i'; |
3036
|
38
|
|
|
|
|
70
|
$statement_type = EMPTY_STRING; |
3037
|
|
|
|
|
|
|
} |
3038
|
|
|
|
|
|
|
|
3039
|
|
|
|
|
|
|
# patch for SWITCH/CASE: hide these keywords from an immediately |
3040
|
|
|
|
|
|
|
# following opening brace |
3041
|
|
|
|
|
|
|
elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) |
3042
|
|
|
|
|
|
|
&& $statement_type eq $last_nonblank_token ) |
3043
|
|
|
|
|
|
|
{ |
3044
|
0
|
|
|
|
|
0
|
$last_nonblank_token = ";"; |
3045
|
|
|
|
|
|
|
} |
3046
|
|
|
|
|
|
|
|
3047
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq ')' ) { |
3048
|
237
|
|
|
|
|
763
|
$last_nonblank_token = $rparen_type->[ $paren_depth + 1 ]; |
3049
|
|
|
|
|
|
|
|
3050
|
|
|
|
|
|
|
# defensive move in case of a nesting error (pbug.t) |
3051
|
|
|
|
|
|
|
# in which this ')' had no previous '(' |
3052
|
|
|
|
|
|
|
# this nesting error will have been caught |
3053
|
237
|
50
|
|
|
|
770
|
if ( !defined($last_nonblank_token) ) { |
3054
|
0
|
|
|
|
|
0
|
$last_nonblank_token = 'if'; |
3055
|
|
|
|
|
|
|
} |
3056
|
|
|
|
|
|
|
|
3057
|
|
|
|
|
|
|
# check for syntax error here; |
3058
|
237
|
100
|
|
|
|
1041
|
unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { |
3059
|
14
|
50
|
|
|
|
76
|
if ( $self->[_extended_syntax_] ) { |
3060
|
|
|
|
|
|
|
|
3061
|
|
|
|
|
|
|
# we append a trailing () to mark this as an unknown |
3062
|
|
|
|
|
|
|
# block type. This allows perltidy to format some |
3063
|
|
|
|
|
|
|
# common extensions of perl syntax. |
3064
|
|
|
|
|
|
|
# This is used by sub code_block_type |
3065
|
14
|
|
|
|
|
48
|
$last_nonblank_token .= '()'; |
3066
|
|
|
|
|
|
|
} |
3067
|
|
|
|
|
|
|
else { |
3068
|
0
|
|
|
|
|
0
|
my $list = |
3069
|
|
|
|
|
|
|
join( SPACE, sort keys %is_blocktype_with_paren ); |
3070
|
0
|
|
|
|
|
0
|
$self->warning( |
3071
|
|
|
|
|
|
|
"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n" |
3072
|
|
|
|
|
|
|
); |
3073
|
|
|
|
|
|
|
} |
3074
|
|
|
|
|
|
|
} |
3075
|
|
|
|
|
|
|
} |
3076
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
# patch for paren-less for/foreach glitch, part 2. |
3078
|
|
|
|
|
|
|
# see note below under 'qw' |
3079
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq 'qw' |
3080
|
|
|
|
|
|
|
&& $is_for_foreach{$want_paren} ) |
3081
|
|
|
|
|
|
|
{ |
3082
|
0
|
|
|
|
|
0
|
$last_nonblank_token = $want_paren; |
3083
|
0
|
0
|
|
|
|
0
|
if ( $last_last_nonblank_token eq $want_paren ) { |
3084
|
0
|
|
|
|
|
0
|
$self->warning( |
3085
|
|
|
|
|
|
|
"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" |
3086
|
|
|
|
|
|
|
); |
3087
|
|
|
|
|
|
|
|
3088
|
|
|
|
|
|
|
} |
3089
|
0
|
|
|
|
|
0
|
$want_paren = EMPTY_STRING; |
3090
|
|
|
|
|
|
|
} |
3091
|
|
|
|
|
|
|
|
3092
|
|
|
|
|
|
|
# now identify which of the three possible types of |
3093
|
|
|
|
|
|
|
# curly braces we have: hash index container, anonymous |
3094
|
|
|
|
|
|
|
# hash reference, or code block. |
3095
|
|
|
|
|
|
|
|
3096
|
|
|
|
|
|
|
# non-structural (hash index) curly brace pair |
3097
|
|
|
|
|
|
|
# get marked 'L' and 'R' |
3098
|
1659
|
100
|
|
|
|
4549
|
if ( is_non_structural_brace() ) { |
3099
|
363
|
|
|
|
|
865
|
$type = 'L'; |
3100
|
|
|
|
|
|
|
|
3101
|
|
|
|
|
|
|
# patch for SWITCH/CASE: |
3102
|
|
|
|
|
|
|
# allow paren-less identifier after 'when' |
3103
|
|
|
|
|
|
|
# if the brace is preceded by a space |
3104
|
363
|
0
|
33
|
|
|
1444
|
if ( $statement_type eq 'when' |
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
3105
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'i' |
3106
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'k' |
3107
|
|
|
|
|
|
|
&& ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) |
3108
|
|
|
|
|
|
|
{ |
3109
|
0
|
|
|
|
|
0
|
$type = '{'; |
3110
|
0
|
|
|
|
|
0
|
$block_type = $statement_type; |
3111
|
|
|
|
|
|
|
} |
3112
|
|
|
|
|
|
|
} |
3113
|
|
|
|
|
|
|
|
3114
|
|
|
|
|
|
|
# code and anonymous hash have the same type, '{', but are |
3115
|
|
|
|
|
|
|
# distinguished by 'block_type', |
3116
|
|
|
|
|
|
|
# which will be blank for an anonymous hash |
3117
|
|
|
|
|
|
|
else { |
3118
|
|
|
|
|
|
|
|
3119
|
1296
|
|
|
|
|
4418
|
$block_type = |
3120
|
|
|
|
|
|
|
$self->code_block_type( $i_tok, $rtokens, $rtoken_type, |
3121
|
|
|
|
|
|
|
$max_token_index ); |
3122
|
|
|
|
|
|
|
|
3123
|
|
|
|
|
|
|
# patch to promote bareword type to function taking block |
3124
|
1296
|
100
|
100
|
|
|
6063
|
if ( $block_type |
|
|
|
66
|
|
|
|
|
3125
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
3126
|
|
|
|
|
|
|
&& $last_nonblank_i >= 0 ) |
3127
|
|
|
|
|
|
|
{ |
3128
|
34
|
50
|
|
|
|
181
|
if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { |
3129
|
|
|
|
|
|
|
$routput_token_type->[$last_nonblank_i] = |
3130
|
34
|
100
|
|
|
|
220
|
$is_grep_alias{$block_type} ? 'k' : 'G'; |
3131
|
|
|
|
|
|
|
} |
3132
|
|
|
|
|
|
|
} |
3133
|
|
|
|
|
|
|
|
3134
|
|
|
|
|
|
|
# patch for SWITCH/CASE: if we find a stray opening block brace |
3135
|
|
|
|
|
|
|
# where we might accept a 'case' or 'when' block, then take it |
3136
|
1296
|
100
|
100
|
|
|
5549
|
if ( $statement_type eq 'case' |
3137
|
|
|
|
|
|
|
|| $statement_type eq 'when' ) |
3138
|
|
|
|
|
|
|
{ |
3139
|
38
|
100
|
66
|
|
|
219
|
if ( !$block_type || $block_type eq '}' ) { |
3140
|
4
|
|
|
|
|
6
|
$block_type = $statement_type; |
3141
|
|
|
|
|
|
|
} |
3142
|
|
|
|
|
|
|
} |
3143
|
|
|
|
|
|
|
} |
3144
|
|
|
|
|
|
|
|
3145
|
1659
|
|
|
|
|
3781
|
$rbrace_type->[ ++$brace_depth ] = $block_type; |
3146
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
# Patch for CLASS BLOCK definitions: do not update the package for the |
3148
|
|
|
|
|
|
|
# current depth if this is a BLOCK type definition. |
3149
|
|
|
|
|
|
|
# TODO: should make 'class' separate from 'package' and only do |
3150
|
|
|
|
|
|
|
# this for 'class' |
3151
|
1659
|
100
|
|
|
|
5504
|
$rbrace_package->[$brace_depth] = $current_package |
3152
|
|
|
|
|
|
|
if ( substr( $block_type, 0, 8 ) ne 'package ' ); |
3153
|
|
|
|
|
|
|
|
3154
|
1659
|
|
|
|
|
3422
|
$rbrace_structural_type->[$brace_depth] = $type; |
3155
|
1659
|
|
|
|
|
3263
|
$rbrace_context->[$brace_depth] = $context; |
3156
|
1659
|
|
|
|
|
4768
|
( $type_sequence, $indent_flag ) = |
3157
|
|
|
|
|
|
|
$self->increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); |
3158
|
1659
|
|
|
|
|
3342
|
return; |
3159
|
|
|
|
|
|
|
} ## end sub do_LEFT_CURLY_BRACKET |
3160
|
|
|
|
|
|
|
|
3161
|
|
|
|
|
|
|
sub do_RIGHT_CURLY_BRACKET { |
3162
|
|
|
|
|
|
|
|
3163
|
1659
|
|
|
1659
|
0
|
3697
|
my $self = shift; |
3164
|
|
|
|
|
|
|
|
3165
|
|
|
|
|
|
|
# '}' |
3166
|
1659
|
|
|
|
|
3560
|
$block_type = $rbrace_type->[$brace_depth]; |
3167
|
1659
|
100
|
|
|
|
4267
|
if ($block_type) { $statement_type = EMPTY_STRING } |
|
967
|
|
|
|
|
1999
|
|
3168
|
1659
|
100
|
|
|
|
3848
|
if ( defined( $rbrace_package->[$brace_depth] ) ) { |
3169
|
1655
|
|
|
|
|
3187
|
$current_package = $rbrace_package->[$brace_depth]; |
3170
|
|
|
|
|
|
|
} |
3171
|
|
|
|
|
|
|
|
3172
|
|
|
|
|
|
|
# can happen on brace error (caught elsewhere) |
3173
|
|
|
|
|
|
|
else { |
3174
|
|
|
|
|
|
|
} |
3175
|
1659
|
|
|
|
|
5050
|
( $type_sequence, $indent_flag ) = |
3176
|
|
|
|
|
|
|
$self->decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); |
3177
|
|
|
|
|
|
|
|
3178
|
1659
|
100
|
|
|
|
5435
|
if ( $rbrace_structural_type->[$brace_depth] eq 'L' ) { |
3179
|
363
|
|
|
|
|
776
|
$type = 'R'; |
3180
|
|
|
|
|
|
|
} |
3181
|
|
|
|
|
|
|
|
3182
|
|
|
|
|
|
|
# propagate type information for 'do' and 'eval' blocks, and also |
3183
|
|
|
|
|
|
|
# for smartmatch operator. This is necessary to enable us to know |
3184
|
|
|
|
|
|
|
# if an operator or term is expected next. |
3185
|
1659
|
100
|
|
|
|
4701
|
if ( $is_block_operator{$block_type} ) { |
3186
|
83
|
|
|
|
|
224
|
$tok = $block_type; |
3187
|
|
|
|
|
|
|
} |
3188
|
|
|
|
|
|
|
|
3189
|
1659
|
|
|
|
|
2914
|
$context = $rbrace_context->[$brace_depth]; |
3190
|
1659
|
50
|
|
|
|
3927
|
if ( $brace_depth > 0 ) { $brace_depth--; } |
|
1659
|
|
|
|
|
3304
|
|
3191
|
1659
|
|
|
|
|
2764
|
return; |
3192
|
|
|
|
|
|
|
} ## end sub do_RIGHT_CURLY_BRACKET |
3193
|
|
|
|
|
|
|
|
3194
|
|
|
|
|
|
|
sub do_AMPERSAND { |
3195
|
|
|
|
|
|
|
|
3196
|
126
|
|
|
126
|
0
|
326
|
my $self = shift; |
3197
|
|
|
|
|
|
|
|
3198
|
|
|
|
|
|
|
# '&' = maybe sub call? start looking |
3199
|
|
|
|
|
|
|
# We have to check for sub call unless we are sure we |
3200
|
|
|
|
|
|
|
# are expecting an operator. This example from s2p |
3201
|
|
|
|
|
|
|
# got mistaken as a q operator in an early version: |
3202
|
|
|
|
|
|
|
# print BODY &q(<<'EOT'); |
3203
|
126
|
100
|
|
|
|
380
|
if ( $expecting != OPERATOR ) { |
3204
|
|
|
|
|
|
|
|
3205
|
|
|
|
|
|
|
# But only look for a sub call if we are expecting a term or |
3206
|
|
|
|
|
|
|
# if there is no existing space after the &. |
3207
|
|
|
|
|
|
|
# For example we probably don't want & as sub call here: |
3208
|
|
|
|
|
|
|
# Fcntl::S_IRUSR & $mode; |
3209
|
107
|
100
|
66
|
|
|
487
|
if ( $expecting == TERM || $next_type ne 'b' ) { |
3210
|
104
|
|
|
|
|
321
|
$self->scan_simple_identifier(); |
3211
|
|
|
|
|
|
|
} |
3212
|
|
|
|
|
|
|
} |
3213
|
|
|
|
|
|
|
else { |
3214
|
|
|
|
|
|
|
} |
3215
|
126
|
|
|
|
|
289
|
return; |
3216
|
|
|
|
|
|
|
} ## end sub do_AMPERSAND |
3217
|
|
|
|
|
|
|
|
3218
|
|
|
|
|
|
|
sub do_LESS_THAN_SIGN { |
3219
|
|
|
|
|
|
|
|
3220
|
29
|
|
|
29
|
0
|
94
|
my $self = shift; |
3221
|
|
|
|
|
|
|
|
3222
|
|
|
|
|
|
|
# '<' - angle operator or less than? |
3223
|
29
|
100
|
|
|
|
121
|
if ( $expecting != OPERATOR ) { |
3224
|
8
|
|
|
|
|
55
|
( $i, $type ) = |
3225
|
|
|
|
|
|
|
$self->find_angle_operator_termination( $input_line, $i, |
3226
|
|
|
|
|
|
|
$rtoken_map, $expecting, $max_token_index ); |
3227
|
|
|
|
|
|
|
|
3228
|
|
|
|
|
|
|
## This message is not very helpful and quite confusing if the above |
3229
|
|
|
|
|
|
|
## routine decided not to write a message with the line number. |
3230
|
|
|
|
|
|
|
## if ( $type eq '<' && $expecting == TERM ) { |
3231
|
|
|
|
|
|
|
## $self->error_if_expecting_TERM(); |
3232
|
|
|
|
|
|
|
## $self->interrupt_logfile(); |
3233
|
|
|
|
|
|
|
## $self->warning("Unterminated <> operator?\n"); |
3234
|
|
|
|
|
|
|
## $self->resume_logfile(); |
3235
|
|
|
|
|
|
|
## } |
3236
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
} |
3238
|
|
|
|
|
|
|
else { |
3239
|
|
|
|
|
|
|
} |
3240
|
29
|
|
|
|
|
340
|
return; |
3241
|
|
|
|
|
|
|
} ## end sub do_LESS_THAN_SIGN |
3242
|
|
|
|
|
|
|
|
3243
|
|
|
|
|
|
|
sub do_QUESTION_MARK { |
3244
|
|
|
|
|
|
|
|
3245
|
187
|
|
|
187
|
0
|
553
|
my $self = shift; |
3246
|
|
|
|
|
|
|
|
3247
|
|
|
|
|
|
|
# '?' = conditional or starting pattern? |
3248
|
187
|
|
|
|
|
380
|
my $is_pattern; |
3249
|
|
|
|
|
|
|
|
3250
|
|
|
|
|
|
|
# Patch for rt #126965 |
3251
|
|
|
|
|
|
|
# a pattern cannot follow certain keywords which take optional |
3252
|
|
|
|
|
|
|
# arguments, like 'shift' and 'pop'. See also '/'. |
3253
|
187
|
100
|
66
|
|
|
1522
|
if ( |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3254
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
3255
|
|
|
|
|
|
|
&& $is_keyword_rejecting_question_as_pattern_delimiter{ |
3256
|
|
|
|
|
|
|
$last_nonblank_token} |
3257
|
|
|
|
|
|
|
) |
3258
|
|
|
|
|
|
|
{ |
3259
|
1
|
|
|
|
|
3
|
$is_pattern = 0; |
3260
|
|
|
|
|
|
|
} |
3261
|
|
|
|
|
|
|
|
3262
|
|
|
|
|
|
|
# patch for RT#131288, user constant function without prototype |
3263
|
|
|
|
|
|
|
# last type is 'U' followed by ?. |
3264
|
|
|
|
|
|
|
elsif ( $last_nonblank_type =~ /^[FUY]$/ ) { |
3265
|
1
|
|
|
|
|
3
|
$is_pattern = 0; |
3266
|
|
|
|
|
|
|
} |
3267
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { |
3268
|
|
|
|
|
|
|
|
3269
|
|
|
|
|
|
|
# In older versions of Perl, a bare ? can be a pattern |
3270
|
|
|
|
|
|
|
# delimiter. In perl version 5.22 this was |
3271
|
|
|
|
|
|
|
# dropped, but we have to support it in order to format |
3272
|
|
|
|
|
|
|
# older programs. See: |
3273
|
|
|
|
|
|
|
## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html |
3274
|
|
|
|
|
|
|
# For example, the following line worked |
3275
|
|
|
|
|
|
|
# at one time: |
3276
|
|
|
|
|
|
|
# ?(.*)? && (print $1,"\n"); |
3277
|
|
|
|
|
|
|
# In current versions it would have to be written with slashes: |
3278
|
|
|
|
|
|
|
# /(.*)/ && (print $1,"\n"); |
3279
|
11
|
|
|
|
|
32
|
my $msg; |
3280
|
11
|
|
|
|
|
69
|
( $is_pattern, $msg ) = |
3281
|
|
|
|
|
|
|
$self->guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, |
3282
|
|
|
|
|
|
|
$max_token_index ); |
3283
|
|
|
|
|
|
|
|
3284
|
11
|
50
|
|
|
|
51
|
if ($msg) { $self->write_logfile_entry($msg) } |
|
11
|
|
|
|
|
48
|
|
3285
|
|
|
|
|
|
|
} |
3286
|
174
|
|
|
|
|
442
|
else { $is_pattern = ( $expecting == TERM ) } |
3287
|
|
|
|
|
|
|
|
3288
|
187
|
50
|
|
|
|
548
|
if ($is_pattern) { |
3289
|
0
|
|
|
|
|
0
|
$in_quote = 1; |
3290
|
0
|
|
|
|
|
0
|
$type = 'Q'; |
3291
|
0
|
|
|
|
|
0
|
$allowed_quote_modifiers = '[msixpodualngc]'; |
3292
|
|
|
|
|
|
|
} |
3293
|
|
|
|
|
|
|
else { |
3294
|
187
|
|
|
|
|
729
|
( $type_sequence, $indent_flag ) = |
3295
|
|
|
|
|
|
|
$self->increase_nesting_depth( QUESTION_COLON, |
3296
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3297
|
|
|
|
|
|
|
} |
3298
|
187
|
|
|
|
|
443
|
return; |
3299
|
|
|
|
|
|
|
} ## end sub do_QUESTION_MARK |
3300
|
|
|
|
|
|
|
|
3301
|
|
|
|
|
|
|
sub do_STAR { |
3302
|
|
|
|
|
|
|
|
3303
|
238
|
|
|
238
|
0
|
484
|
my $self = shift; |
3304
|
|
|
|
|
|
|
|
3305
|
|
|
|
|
|
|
# '*' = typeglob, or multiply? |
3306
|
238
|
50
|
66
|
|
|
841
|
if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) { |
3307
|
0
|
0
|
0
|
|
|
0
|
if ( $next_type ne 'b' |
|
|
|
0
|
|
|
|
|
3308
|
|
|
|
|
|
|
&& $next_type ne '(' |
3309
|
|
|
|
|
|
|
&& $next_type ne '#' ) # Fix c036 |
3310
|
|
|
|
|
|
|
{ |
3311
|
0
|
|
|
|
|
0
|
$expecting = TERM; |
3312
|
|
|
|
|
|
|
} |
3313
|
|
|
|
|
|
|
} |
3314
|
238
|
100
|
|
|
|
622
|
if ( $expecting == TERM ) { |
3315
|
21
|
|
|
|
|
89
|
$self->scan_simple_identifier(); |
3316
|
|
|
|
|
|
|
} |
3317
|
|
|
|
|
|
|
else { |
3318
|
|
|
|
|
|
|
|
3319
|
217
|
50
|
|
|
|
877
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { |
|
|
100
|
|
|
|
|
|
3320
|
0
|
|
|
|
|
0
|
$tok = '*='; |
3321
|
0
|
|
|
|
|
0
|
$type = $tok; |
3322
|
0
|
|
|
|
|
0
|
$i++; |
3323
|
|
|
|
|
|
|
} |
3324
|
|
|
|
|
|
|
elsif ( $rtokens->[ $i + 1 ] eq '*' ) { |
3325
|
36
|
|
|
|
|
102
|
$tok = '**'; |
3326
|
36
|
|
|
|
|
67
|
$type = $tok; |
3327
|
36
|
|
|
|
|
61
|
$i++; |
3328
|
36
|
50
|
|
|
|
123
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { |
3329
|
0
|
|
|
|
|
0
|
$tok = '**='; |
3330
|
0
|
|
|
|
|
0
|
$type = $tok; |
3331
|
0
|
|
|
|
|
0
|
$i++; |
3332
|
|
|
|
|
|
|
} |
3333
|
|
|
|
|
|
|
} |
3334
|
|
|
|
|
|
|
} |
3335
|
238
|
|
|
|
|
420
|
return; |
3336
|
|
|
|
|
|
|
} ## end sub do_STAR |
3337
|
|
|
|
|
|
|
|
3338
|
|
|
|
|
|
|
sub do_DOT { |
3339
|
|
|
|
|
|
|
|
3340
|
150
|
|
|
150
|
0
|
329
|
my $self = shift; |
3341
|
|
|
|
|
|
|
|
3342
|
|
|
|
|
|
|
# '.' = what kind of . ? |
3343
|
150
|
100
|
|
|
|
448
|
if ( $expecting != OPERATOR ) { |
3344
|
10
|
|
|
|
|
39
|
$self->scan_number(); |
3345
|
10
|
100
|
|
|
|
29
|
if ( $type eq '.' ) { |
3346
|
2
|
50
|
|
|
|
7
|
$self->error_if_expecting_TERM() |
3347
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
3348
|
|
|
|
|
|
|
} |
3349
|
|
|
|
|
|
|
} |
3350
|
|
|
|
|
|
|
else { |
3351
|
|
|
|
|
|
|
} |
3352
|
150
|
|
|
|
|
299
|
return; |
3353
|
|
|
|
|
|
|
} ## end sub do_DOT |
3354
|
|
|
|
|
|
|
|
3355
|
|
|
|
|
|
|
sub do_COLON { |
3356
|
|
|
|
|
|
|
|
3357
|
271
|
|
|
271
|
0
|
718
|
my $self = shift; |
3358
|
|
|
|
|
|
|
|
3359
|
|
|
|
|
|
|
# ':' = label, ternary, attribute, ? |
3360
|
|
|
|
|
|
|
|
3361
|
|
|
|
|
|
|
# if this is the first nonblank character, call it a label |
3362
|
|
|
|
|
|
|
# since perl seems to just swallow it |
3363
|
271
|
50
|
66
|
|
|
3765
|
if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3364
|
0
|
|
|
|
|
0
|
$type = 'J'; |
3365
|
|
|
|
|
|
|
} |
3366
|
|
|
|
|
|
|
|
3367
|
|
|
|
|
|
|
# ATTRS: check for a ':' which introduces an attribute list |
3368
|
|
|
|
|
|
|
# either after a 'sub' keyword or within a paren list |
3369
|
|
|
|
|
|
|
# Added 'package' (can be 'class') for --use-feature=class (rt145706) |
3370
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^(sub|package)\b/ ) { |
3371
|
22
|
|
|
|
|
60
|
$type = 'A'; |
3372
|
22
|
|
|
|
|
62
|
$self->[_in_attribute_list_] = 1; |
3373
|
|
|
|
|
|
|
} |
3374
|
|
|
|
|
|
|
|
3375
|
|
|
|
|
|
|
# Within a signature, unless we are in a ternary. For example, |
3376
|
|
|
|
|
|
|
# from 't/filter_example.t': |
3377
|
|
|
|
|
|
|
# method foo4 ( $class: $bar ) { $class->bar($bar) } |
3378
|
|
|
|
|
|
|
elsif ( $rparen_type->[$paren_depth] =~ /^sub\b/ |
3379
|
|
|
|
|
|
|
&& !is_balanced_closing_container(QUESTION_COLON) ) |
3380
|
|
|
|
|
|
|
{ |
3381
|
1
|
|
|
|
|
3
|
$type = 'A'; |
3382
|
1
|
|
|
|
|
2
|
$self->[_in_attribute_list_] = 1; |
3383
|
|
|
|
|
|
|
} |
3384
|
|
|
|
|
|
|
|
3385
|
|
|
|
|
|
|
# check for scalar attribute, such as |
3386
|
|
|
|
|
|
|
# my $foo : shared = 1; |
3387
|
|
|
|
|
|
|
elsif ($is_my_our_state{$statement_type} |
3388
|
|
|
|
|
|
|
&& $rcurrent_depth->[QUESTION_COLON] == 0 ) |
3389
|
|
|
|
|
|
|
{ |
3390
|
15
|
|
|
|
|
31
|
$type = 'A'; |
3391
|
15
|
|
|
|
|
30
|
$self->[_in_attribute_list_] = 1; |
3392
|
|
|
|
|
|
|
} |
3393
|
|
|
|
|
|
|
|
3394
|
|
|
|
|
|
|
# Look for Switch::Plain syntax if an error would otherwise occur |
3395
|
|
|
|
|
|
|
# here. Note that we do not need to check if the extended syntax |
3396
|
|
|
|
|
|
|
# flag is set because otherwise an error would occur, and we would |
3397
|
|
|
|
|
|
|
# then have to output a message telling the user to set the |
3398
|
|
|
|
|
|
|
# extended syntax flag to avoid the error. |
3399
|
|
|
|
|
|
|
# case 1: { |
3400
|
|
|
|
|
|
|
# default: { |
3401
|
|
|
|
|
|
|
# default: |
3402
|
|
|
|
|
|
|
# Note that the line 'default:' will be parsed as a label elsewhere. |
3403
|
|
|
|
|
|
|
elsif ( $is_case_default{$statement_type} |
3404
|
|
|
|
|
|
|
&& !is_balanced_closing_container(QUESTION_COLON) ) |
3405
|
|
|
|
|
|
|
{ |
3406
|
|
|
|
|
|
|
# mark it as a perltidy label type |
3407
|
46
|
|
|
|
|
106
|
$type = 'J'; |
3408
|
|
|
|
|
|
|
} |
3409
|
|
|
|
|
|
|
|
3410
|
|
|
|
|
|
|
# otherwise, it should be part of a ?/: operator |
3411
|
|
|
|
|
|
|
else { |
3412
|
187
|
|
|
|
|
764
|
( $type_sequence, $indent_flag ) = |
3413
|
|
|
|
|
|
|
$self->decrease_nesting_depth( QUESTION_COLON, |
3414
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3415
|
187
|
50
|
|
|
|
962
|
if ( $last_nonblank_token eq '?' ) { |
3416
|
0
|
|
|
|
|
0
|
$self->warning("Syntax error near ? :\n"); |
3417
|
|
|
|
|
|
|
} |
3418
|
|
|
|
|
|
|
} |
3419
|
271
|
|
|
|
|
519
|
return; |
3420
|
|
|
|
|
|
|
} ## end sub do_COLON |
3421
|
|
|
|
|
|
|
|
3422
|
|
|
|
|
|
|
sub do_PLUS_SIGN { |
3423
|
|
|
|
|
|
|
|
3424
|
227
|
|
|
227
|
0
|
499
|
my $self = shift; |
3425
|
|
|
|
|
|
|
|
3426
|
|
|
|
|
|
|
# '+' = what kind of plus? |
3427
|
227
|
100
|
|
|
|
943
|
if ( $expecting == TERM ) { |
|
|
100
|
|
|
|
|
|
3428
|
13
|
|
|
|
|
54
|
my $number = $self->scan_number_fast(); |
3429
|
|
|
|
|
|
|
|
3430
|
|
|
|
|
|
|
# unary plus is safest assumption if not a number |
3431
|
13
|
50
|
|
|
|
54
|
if ( !defined($number) ) { $type = 'p'; } |
|
13
|
|
|
|
|
26
|
|
3432
|
|
|
|
|
|
|
} |
3433
|
|
|
|
|
|
|
elsif ( $expecting == OPERATOR ) { |
3434
|
|
|
|
|
|
|
} |
3435
|
|
|
|
|
|
|
else { |
3436
|
3
|
100
|
|
|
|
12
|
if ( $next_type eq 'w' ) { $type = 'p' } |
|
2
|
|
|
|
|
6
|
|
3437
|
|
|
|
|
|
|
} |
3438
|
227
|
|
|
|
|
429
|
return; |
3439
|
|
|
|
|
|
|
} ## end sub do_PLUS_SIGN |
3440
|
|
|
|
|
|
|
|
3441
|
|
|
|
|
|
|
sub do_AT_SIGN { |
3442
|
|
|
|
|
|
|
|
3443
|
438
|
|
|
438
|
0
|
1211
|
my $self = shift; |
3444
|
|
|
|
|
|
|
|
3445
|
|
|
|
|
|
|
# '@' = sigil for array? |
3446
|
438
|
50
|
|
|
|
1436
|
$self->error_if_expecting_OPERATOR("Array") |
3447
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3448
|
438
|
|
|
|
|
1616
|
$self->scan_simple_identifier(); |
3449
|
438
|
|
|
|
|
835
|
return; |
3450
|
|
|
|
|
|
|
} ## end sub do_AT_SIGN |
3451
|
|
|
|
|
|
|
|
3452
|
|
|
|
|
|
|
sub do_PERCENT_SIGN { |
3453
|
|
|
|
|
|
|
|
3454
|
202
|
|
|
202
|
0
|
670
|
my $self = shift; |
3455
|
|
|
|
|
|
|
|
3456
|
|
|
|
|
|
|
# '%' = hash or modulo? |
3457
|
|
|
|
|
|
|
# first guess is hash if no following blank or paren |
3458
|
202
|
50
|
|
|
|
724
|
if ( $expecting == UNKNOWN ) { |
3459
|
0
|
0
|
0
|
|
|
0
|
if ( $next_type ne 'b' && $next_type ne '(' ) { |
3460
|
0
|
|
|
|
|
0
|
$expecting = TERM; |
3461
|
|
|
|
|
|
|
} |
3462
|
|
|
|
|
|
|
} |
3463
|
202
|
100
|
|
|
|
660
|
if ( $expecting == TERM ) { |
3464
|
192
|
|
|
|
|
710
|
$self->scan_simple_identifier(); |
3465
|
|
|
|
|
|
|
} |
3466
|
202
|
|
|
|
|
494
|
return; |
3467
|
|
|
|
|
|
|
} ## end sub do_PERCENT_SIGN |
3468
|
|
|
|
|
|
|
|
3469
|
|
|
|
|
|
|
sub do_LEFT_SQUARE_BRACKET { |
3470
|
|
|
|
|
|
|
|
3471
|
594
|
|
|
594
|
0
|
1257
|
my $self = shift; |
3472
|
|
|
|
|
|
|
|
3473
|
|
|
|
|
|
|
# '[' |
3474
|
594
|
|
|
|
|
1392
|
$rsquare_bracket_type->[ ++$square_bracket_depth ] = |
3475
|
|
|
|
|
|
|
$last_nonblank_token; |
3476
|
594
|
|
|
|
|
2025
|
( $type_sequence, $indent_flag ) = |
3477
|
|
|
|
|
|
|
$self->increase_nesting_depth( SQUARE_BRACKET, |
3478
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3479
|
|
|
|
|
|
|
|
3480
|
|
|
|
|
|
|
# It may seem odd, but structural square brackets have |
3481
|
|
|
|
|
|
|
# type '{' and '}'. This simplifies the indentation logic. |
3482
|
594
|
100
|
|
|
|
1900
|
if ( !is_non_structural_brace() ) { |
3483
|
287
|
|
|
|
|
673
|
$type = '{'; |
3484
|
|
|
|
|
|
|
} |
3485
|
594
|
|
|
|
|
1315
|
$rsquare_bracket_structural_type->[$square_bracket_depth] = $type; |
3486
|
594
|
|
|
|
|
1076
|
return; |
3487
|
|
|
|
|
|
|
} ## end sub do_LEFT_SQUARE_BRACKET |
3488
|
|
|
|
|
|
|
|
3489
|
|
|
|
|
|
|
sub do_RIGHT_SQUARE_BRACKET { |
3490
|
|
|
|
|
|
|
|
3491
|
594
|
|
|
594
|
0
|
1311
|
my $self = shift; |
3492
|
|
|
|
|
|
|
|
3493
|
|
|
|
|
|
|
# ']' |
3494
|
594
|
|
|
|
|
2052
|
( $type_sequence, $indent_flag ) = |
3495
|
|
|
|
|
|
|
$self->decrease_nesting_depth( SQUARE_BRACKET, |
3496
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3497
|
|
|
|
|
|
|
|
3498
|
594
|
100
|
|
|
|
2119
|
if ( $rsquare_bracket_structural_type->[$square_bracket_depth] eq '{' ) |
3499
|
|
|
|
|
|
|
{ |
3500
|
287
|
|
|
|
|
642
|
$type = '}'; |
3501
|
|
|
|
|
|
|
} |
3502
|
|
|
|
|
|
|
|
3503
|
|
|
|
|
|
|
# propagate type information for smartmatch operator. This is |
3504
|
|
|
|
|
|
|
# necessary to enable us to know if an operator or term is expected |
3505
|
|
|
|
|
|
|
# next. |
3506
|
594
|
100
|
|
|
|
1693
|
if ( $rsquare_bracket_type->[$square_bracket_depth] eq '~~' ) { |
3507
|
20
|
|
|
|
|
43
|
$tok = $rsquare_bracket_type->[$square_bracket_depth]; |
3508
|
|
|
|
|
|
|
} |
3509
|
|
|
|
|
|
|
|
3510
|
594
|
50
|
|
|
|
1573
|
if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } |
|
594
|
|
|
|
|
963
|
|
3511
|
594
|
|
|
|
|
1024
|
return; |
3512
|
|
|
|
|
|
|
} ## end sub do_RIGHT_SQUARE_BRACKET |
3513
|
|
|
|
|
|
|
|
3514
|
|
|
|
|
|
|
sub do_MINUS_SIGN { |
3515
|
|
|
|
|
|
|
|
3516
|
441
|
|
|
441
|
0
|
916
|
my $self = shift; |
3517
|
|
|
|
|
|
|
|
3518
|
|
|
|
|
|
|
# '-' = what kind of minus? |
3519
|
441
|
100
|
100
|
|
|
2991
|
if ( ( $expecting != OPERATOR ) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3520
|
|
|
|
|
|
|
&& $is_file_test_operator{$next_tok} ) |
3521
|
|
|
|
|
|
|
{ |
3522
|
10
|
|
|
|
|
52
|
my ( $next_nonblank_token, $i_next ) = |
3523
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i + 1, $rtokens, |
3524
|
|
|
|
|
|
|
$max_token_index ); |
3525
|
|
|
|
|
|
|
|
3526
|
|
|
|
|
|
|
# check for a quoted word like "-w=>xx"; |
3527
|
|
|
|
|
|
|
# it is sufficient to just check for a following '=' |
3528
|
10
|
50
|
|
|
|
81
|
if ( $next_nonblank_token eq '=' ) { |
3529
|
0
|
|
|
|
|
0
|
$type = 'm'; |
3530
|
|
|
|
|
|
|
} |
3531
|
|
|
|
|
|
|
else { |
3532
|
10
|
|
|
|
|
28
|
$i++; |
3533
|
10
|
|
|
|
|
26
|
$tok .= $next_tok; |
3534
|
10
|
|
|
|
|
37
|
$type = 'F'; |
3535
|
|
|
|
|
|
|
} |
3536
|
|
|
|
|
|
|
} |
3537
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
3538
|
330
|
|
|
|
|
908
|
my $number = $self->scan_number_fast(); |
3539
|
|
|
|
|
|
|
|
3540
|
|
|
|
|
|
|
# maybe part of bareword token? unary is safest |
3541
|
330
|
100
|
|
|
|
907
|
if ( !defined($number) ) { $type = 'm'; } |
|
288
|
|
|
|
|
542
|
|
3542
|
|
|
|
|
|
|
|
3543
|
|
|
|
|
|
|
} |
3544
|
|
|
|
|
|
|
elsif ( $expecting == OPERATOR ) { |
3545
|
|
|
|
|
|
|
} |
3546
|
|
|
|
|
|
|
else { |
3547
|
|
|
|
|
|
|
|
3548
|
4
|
50
|
|
|
|
17
|
if ( $next_type eq 'w' ) { |
3549
|
4
|
|
|
|
|
10
|
$type = 'm'; |
3550
|
|
|
|
|
|
|
} |
3551
|
|
|
|
|
|
|
} |
3552
|
441
|
|
|
|
|
791
|
return; |
3553
|
|
|
|
|
|
|
} ## end sub do_MINUS_SIGN |
3554
|
|
|
|
|
|
|
|
3555
|
|
|
|
|
|
|
sub do_CARAT_SIGN { |
3556
|
|
|
|
|
|
|
|
3557
|
12
|
|
|
12
|
0
|
25
|
my $self = shift; |
3558
|
|
|
|
|
|
|
|
3559
|
|
|
|
|
|
|
# '^' |
3560
|
|
|
|
|
|
|
# check for special variables like ${^WARNING_BITS} |
3561
|
12
|
100
|
|
|
|
35
|
if ( $expecting == TERM ) { |
3562
|
|
|
|
|
|
|
|
3563
|
5
|
50
|
33
|
|
|
52
|
if ( $last_nonblank_token eq '{' |
|
|
|
33
|
|
|
|
|
3564
|
|
|
|
|
|
|
&& ( $next_tok !~ /^\d/ ) |
3565
|
|
|
|
|
|
|
&& ( $next_tok =~ /^\w/ ) ) |
3566
|
|
|
|
|
|
|
{ |
3567
|
|
|
|
|
|
|
|
3568
|
5
|
100
|
|
|
|
25
|
if ( $next_tok eq 'W' ) { |
3569
|
1
|
|
|
|
|
3
|
$self->[_saw_perl_dash_w_] = 1; |
3570
|
|
|
|
|
|
|
} |
3571
|
5
|
|
|
|
|
13
|
$tok = $tok . $next_tok; |
3572
|
5
|
|
|
|
|
9
|
$i = $i + 1; |
3573
|
5
|
|
|
|
|
12
|
$type = 'w'; |
3574
|
|
|
|
|
|
|
|
3575
|
|
|
|
|
|
|
# Optional coding to try to catch syntax errors. This can |
3576
|
|
|
|
|
|
|
# be removed if it ever causes incorrect warning messages. |
3577
|
|
|
|
|
|
|
# The '{^' should be preceded by either by a type or '$#' |
3578
|
|
|
|
|
|
|
# Examples: |
3579
|
|
|
|
|
|
|
# $#{^CAPTURE} ok |
3580
|
|
|
|
|
|
|
# *${^LAST_FH}{NAME} ok |
3581
|
|
|
|
|
|
|
# @{^HOWDY} ok |
3582
|
|
|
|
|
|
|
# $hash{^HOWDY} error |
3583
|
|
|
|
|
|
|
|
3584
|
|
|
|
|
|
|
# Note that a type sigil '$' may be tokenized as 'Z' |
3585
|
|
|
|
|
|
|
# after something like 'print', so allow type 'Z' |
3586
|
5
|
0
|
33
|
|
|
20
|
if ( $last_last_nonblank_type ne 't' |
|
|
|
33
|
|
|
|
|
3587
|
|
|
|
|
|
|
&& $last_last_nonblank_type ne 'Z' |
3588
|
|
|
|
|
|
|
&& $last_last_nonblank_token ne '$#' ) |
3589
|
|
|
|
|
|
|
{ |
3590
|
0
|
|
|
|
|
0
|
$self->warning("Possible syntax error near '{^'\n"); |
3591
|
|
|
|
|
|
|
} |
3592
|
|
|
|
|
|
|
} |
3593
|
|
|
|
|
|
|
|
3594
|
|
|
|
|
|
|
else { |
3595
|
0
|
0
|
|
|
|
0
|
unless ( $self->error_if_expecting_TERM() ) { |
3596
|
|
|
|
|
|
|
|
3597
|
|
|
|
|
|
|
# Something like this is valid but strange: |
3598
|
|
|
|
|
|
|
# undef ^I; |
3599
|
0
|
|
|
|
|
0
|
$self->complain("The '^' seems unusual here\n"); |
3600
|
|
|
|
|
|
|
} |
3601
|
|
|
|
|
|
|
} |
3602
|
|
|
|
|
|
|
} |
3603
|
12
|
|
|
|
|
24
|
return; |
3604
|
|
|
|
|
|
|
} ## end sub do_CARAT_SIGN |
3605
|
|
|
|
|
|
|
|
3606
|
|
|
|
|
|
|
sub do_DOUBLE_COLON { |
3607
|
|
|
|
|
|
|
|
3608
|
9
|
|
|
9
|
0
|
16
|
my $self = shift; |
3609
|
|
|
|
|
|
|
|
3610
|
|
|
|
|
|
|
# '::' = probably a sub call |
3611
|
9
|
|
|
|
|
28
|
$self->scan_bare_identifier(); |
3612
|
9
|
|
|
|
|
17
|
return; |
3613
|
|
|
|
|
|
|
} ## end sub do_DOUBLE_COLON |
3614
|
|
|
|
|
|
|
|
3615
|
|
|
|
|
|
|
sub do_LEFT_SHIFT { |
3616
|
|
|
|
|
|
|
|
3617
|
7
|
|
|
7
|
0
|
27
|
my $self = shift; |
3618
|
|
|
|
|
|
|
|
3619
|
|
|
|
|
|
|
# '<<' = maybe a here-doc? |
3620
|
|
|
|
|
|
|
|
3621
|
|
|
|
|
|
|
## This check removed because it could be a deprecated here-doc with |
3622
|
|
|
|
|
|
|
## no specified target. See example in log 16 Sep 2020. |
3623
|
|
|
|
|
|
|
## return |
3624
|
|
|
|
|
|
|
## unless ( $i < $max_token_index ) |
3625
|
|
|
|
|
|
|
## ; # here-doc not possible if end of line |
3626
|
|
|
|
|
|
|
|
3627
|
7
|
50
|
|
|
|
38
|
if ( $expecting != OPERATOR ) { |
3628
|
7
|
|
|
|
|
23
|
my ( $found_target, $here_doc_target, $here_quote_character, |
3629
|
|
|
|
|
|
|
$saw_error ); |
3630
|
|
|
|
|
|
|
( |
3631
|
7
|
|
|
|
|
55
|
$found_target, $here_doc_target, $here_quote_character, $i, |
3632
|
|
|
|
|
|
|
$saw_error |
3633
|
|
|
|
|
|
|
) |
3634
|
|
|
|
|
|
|
= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map, |
3635
|
|
|
|
|
|
|
$max_token_index ); |
3636
|
|
|
|
|
|
|
|
3637
|
7
|
50
|
|
|
|
35
|
if ($found_target) { |
|
|
0
|
|
|
|
|
|
3638
|
7
|
|
|
|
|
17
|
push @{$rhere_target_list}, |
|
7
|
|
|
|
|
27
|
|
3639
|
|
|
|
|
|
|
[ $here_doc_target, $here_quote_character ]; |
3640
|
7
|
|
|
|
|
53
|
$type = 'h'; |
3641
|
7
|
50
|
|
|
|
87
|
if ( length($here_doc_target) > 80 ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3642
|
0
|
|
|
|
|
0
|
my $truncated = substr( $here_doc_target, 0, 80 ); |
3643
|
0
|
|
|
|
|
0
|
$self->complain("Long here-target: '$truncated' ...\n"); |
3644
|
|
|
|
|
|
|
} |
3645
|
|
|
|
|
|
|
elsif ( !$here_doc_target ) { |
3646
|
0
|
0
|
|
|
|
0
|
$self->warning( |
3647
|
|
|
|
|
|
|
'Use of bare << to mean <<"" is deprecated' . "\n" ) |
3648
|
|
|
|
|
|
|
unless ($here_quote_character); |
3649
|
|
|
|
|
|
|
} |
3650
|
|
|
|
|
|
|
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { |
3651
|
2
|
|
|
|
|
12
|
$self->complain( |
3652
|
|
|
|
|
|
|
"Unconventional here-target: '$here_doc_target'\n"); |
3653
|
|
|
|
|
|
|
} |
3654
|
|
|
|
|
|
|
} |
3655
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
3656
|
0
|
0
|
|
|
|
0
|
unless ($saw_error) { |
3657
|
|
|
|
|
|
|
|
3658
|
|
|
|
|
|
|
# shouldn't happen..arriving here implies an error in |
3659
|
|
|
|
|
|
|
# the logic in sub 'find_here_doc' |
3660
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
3661
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
3662
|
|
|
|
|
|
|
Program bug; didn't find here doc target |
3663
|
|
|
|
|
|
|
EOM |
3664
|
|
|
|
|
|
|
} |
3665
|
|
|
|
|
|
|
$self->warning( |
3666
|
0
|
|
|
|
|
0
|
"Possible program error: didn't find here doc target\n" |
3667
|
|
|
|
|
|
|
); |
3668
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
3669
|
|
|
|
|
|
|
} |
3670
|
|
|
|
|
|
|
} |
3671
|
|
|
|
|
|
|
} |
3672
|
|
|
|
|
|
|
else { |
3673
|
|
|
|
|
|
|
} |
3674
|
7
|
|
|
|
|
24
|
return; |
3675
|
|
|
|
|
|
|
} ## end sub do_LEFT_SHIFT |
3676
|
|
|
|
|
|
|
|
3677
|
|
|
|
|
|
|
sub do_NEW_HERE_DOC { |
3678
|
|
|
|
|
|
|
|
3679
|
|
|
|
|
|
|
# '<<~' = a here-doc, new type added in v26 |
3680
|
|
|
|
|
|
|
|
3681
|
2
|
|
|
2
|
0
|
9
|
my $self = shift; |
3682
|
|
|
|
|
|
|
|
3683
|
|
|
|
|
|
|
return |
3684
|
2
|
50
|
|
|
|
12
|
unless ( $i < $max_token_index ) |
3685
|
|
|
|
|
|
|
; # here-doc not possible if end of line |
3686
|
2
|
50
|
|
|
|
11
|
if ( $expecting != OPERATOR ) { |
3687
|
2
|
|
|
|
|
14
|
my ( $found_target, $here_doc_target, $here_quote_character, |
3688
|
|
|
|
|
|
|
$saw_error ); |
3689
|
|
|
|
|
|
|
( |
3690
|
2
|
|
|
|
|
12
|
$found_target, $here_doc_target, $here_quote_character, $i, |
3691
|
|
|
|
|
|
|
$saw_error |
3692
|
|
|
|
|
|
|
) |
3693
|
|
|
|
|
|
|
= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map, |
3694
|
|
|
|
|
|
|
$max_token_index ); |
3695
|
|
|
|
|
|
|
|
3696
|
2
|
50
|
|
|
|
8
|
if ($found_target) { |
|
|
0
|
|
|
|
|
|
3697
|
|
|
|
|
|
|
|
3698
|
2
|
50
|
|
|
|
19
|
if ( length($here_doc_target) > 80 ) { |
|
|
50
|
|
|
|
|
|
3699
|
0
|
|
|
|
|
0
|
my $truncated = substr( $here_doc_target, 0, 80 ); |
3700
|
0
|
|
|
|
|
0
|
$self->complain("Long here-target: '$truncated' ...\n"); |
3701
|
|
|
|
|
|
|
} |
3702
|
|
|
|
|
|
|
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { |
3703
|
0
|
|
|
|
|
0
|
$self->complain( |
3704
|
|
|
|
|
|
|
"Unconventional here-target: '$here_doc_target'\n"); |
3705
|
|
|
|
|
|
|
} |
3706
|
|
|
|
|
|
|
|
3707
|
|
|
|
|
|
|
# Note that we put a leading space on the here quote |
3708
|
|
|
|
|
|
|
# character indicate that it may be preceded by spaces |
3709
|
2
|
|
|
|
|
6
|
$here_quote_character = SPACE . $here_quote_character; |
3710
|
2
|
|
|
|
|
6
|
push @{$rhere_target_list}, |
|
2
|
|
|
|
|
7
|
|
3711
|
|
|
|
|
|
|
[ $here_doc_target, $here_quote_character ]; |
3712
|
2
|
|
|
|
|
7
|
$type = 'h'; |
3713
|
|
|
|
|
|
|
} |
3714
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
3715
|
0
|
0
|
|
|
|
0
|
unless ($saw_error) { |
3716
|
|
|
|
|
|
|
|
3717
|
|
|
|
|
|
|
# shouldn't happen..arriving here implies an error in |
3718
|
|
|
|
|
|
|
# the logic in sub 'find_here_doc' |
3719
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
3720
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
3721
|
|
|
|
|
|
|
Program bug; didn't find here doc target |
3722
|
|
|
|
|
|
|
EOM |
3723
|
|
|
|
|
|
|
} |
3724
|
|
|
|
|
|
|
$self->warning( |
3725
|
0
|
|
|
|
|
0
|
"Possible program error: didn't find here doc target\n" |
3726
|
|
|
|
|
|
|
); |
3727
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
3728
|
|
|
|
|
|
|
} |
3729
|
|
|
|
|
|
|
} |
3730
|
|
|
|
|
|
|
} |
3731
|
|
|
|
|
|
|
else { |
3732
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR(); |
3733
|
|
|
|
|
|
|
} |
3734
|
2
|
|
|
|
|
6
|
return; |
3735
|
|
|
|
|
|
|
} ## end sub do_NEW_HERE_DOC |
3736
|
|
|
|
|
|
|
|
3737
|
|
|
|
|
|
|
sub do_POINTER { |
3738
|
|
|
|
|
|
|
|
3739
|
|
|
|
|
|
|
# '->' |
3740
|
886
|
|
|
886
|
0
|
1594
|
return; |
3741
|
|
|
|
|
|
|
} |
3742
|
|
|
|
|
|
|
|
3743
|
|
|
|
|
|
|
sub do_PLUS_PLUS { |
3744
|
|
|
|
|
|
|
|
3745
|
46
|
|
|
46
|
0
|
156
|
my $self = shift; |
3746
|
|
|
|
|
|
|
|
3747
|
|
|
|
|
|
|
# '++' |
3748
|
|
|
|
|
|
|
# type = 'pp' for pre-increment, '++' for post-increment |
3749
|
46
|
100
|
|
|
|
260
|
if ( $expecting == TERM ) { $type = 'pp' } |
|
7
|
100
|
|
|
|
18
|
|
3750
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { |
3751
|
|
|
|
|
|
|
|
3752
|
2
|
|
|
|
|
9
|
my ( $next_nonblank_token, $i_next ) = |
3753
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
3754
|
|
|
|
|
|
|
|
3755
|
|
|
|
|
|
|
# Fix for c042: look past a side comment |
3756
|
2
|
50
|
|
|
|
11
|
if ( $next_nonblank_token eq '#' ) { |
3757
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
3758
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
3759
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
3760
|
|
|
|
|
|
|
} |
3761
|
|
|
|
|
|
|
|
3762
|
2
|
50
|
|
|
|
8
|
if ( $next_nonblank_token eq '$' ) { $type = 'pp' } |
|
0
|
|
|
|
|
0
|
|
3763
|
|
|
|
|
|
|
} |
3764
|
46
|
|
|
|
|
137
|
return; |
3765
|
|
|
|
|
|
|
} ## end sub do_PLUS_PLUS |
3766
|
|
|
|
|
|
|
|
3767
|
|
|
|
|
|
|
sub do_FAT_COMMA { |
3768
|
|
|
|
|
|
|
|
3769
|
1025
|
|
|
1025
|
0
|
1857
|
my $self = shift; |
3770
|
|
|
|
|
|
|
|
3771
|
|
|
|
|
|
|
# '=>' |
3772
|
1025
|
50
|
|
|
|
2555
|
if ( $last_nonblank_type eq $tok ) { |
3773
|
0
|
|
|
|
|
0
|
$self->complain("Repeated '=>'s \n"); |
3774
|
|
|
|
|
|
|
} |
3775
|
|
|
|
|
|
|
|
3776
|
|
|
|
|
|
|
# patch for operator_expected: note if we are in the list (use.t) |
3777
|
|
|
|
|
|
|
# TODO: make version numbers a new token type |
3778
|
1025
|
100
|
|
|
|
2448
|
if ( $statement_type eq 'use' ) { $statement_type = '_use' } |
|
18
|
|
|
|
|
45
|
|
3779
|
1025
|
|
|
|
|
1655
|
return; |
3780
|
|
|
|
|
|
|
} ## end sub do_FAT_COMMA |
3781
|
|
|
|
|
|
|
|
3782
|
|
|
|
|
|
|
sub do_MINUS_MINUS { |
3783
|
|
|
|
|
|
|
|
3784
|
2
|
|
|
2
|
0
|
8
|
my $self = shift; |
3785
|
|
|
|
|
|
|
|
3786
|
|
|
|
|
|
|
# '--' |
3787
|
|
|
|
|
|
|
# type = 'mm' for pre-decrement, '--' for post-decrement |
3788
|
|
|
|
|
|
|
|
3789
|
2
|
50
|
|
|
|
8
|
if ( $expecting == TERM ) { $type = 'mm' } |
|
2
|
0
|
|
|
|
9
|
|
3790
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { |
3791
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
3792
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
3793
|
|
|
|
|
|
|
|
3794
|
|
|
|
|
|
|
# Fix for c042: look past a side comment |
3795
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '#' ) { |
3796
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
3797
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
3798
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
3799
|
|
|
|
|
|
|
} |
3800
|
|
|
|
|
|
|
|
3801
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '$' ) { $type = 'mm' } |
|
0
|
|
|
|
|
0
|
|
3802
|
|
|
|
|
|
|
} |
3803
|
2
|
|
|
|
|
7
|
return; |
3804
|
|
|
|
|
|
|
} ## end sub do_MINUS_MINUS |
3805
|
|
|
|
|
|
|
|
3806
|
|
|
|
|
|
|
sub do_LOGICAL_AND { |
3807
|
|
|
|
|
|
|
|
3808
|
58
|
|
|
58
|
0
|
137
|
my $self = shift; |
3809
|
|
|
|
|
|
|
|
3810
|
|
|
|
|
|
|
# '&&' |
3811
|
58
|
50
|
33
|
|
|
236
|
$self->error_if_expecting_TERM() |
3812
|
|
|
|
|
|
|
if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015 |
3813
|
58
|
|
|
|
|
113
|
return; |
3814
|
|
|
|
|
|
|
} ## end sub do_LOGICAL_AND |
3815
|
|
|
|
|
|
|
|
3816
|
|
|
|
|
|
|
sub do_LOGICAL_OR { |
3817
|
|
|
|
|
|
|
|
3818
|
74
|
|
|
74
|
0
|
198
|
my $self = shift; |
3819
|
|
|
|
|
|
|
|
3820
|
|
|
|
|
|
|
# '||' |
3821
|
74
|
100
|
66
|
|
|
339
|
$self->error_if_expecting_TERM() |
3822
|
|
|
|
|
|
|
if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015 |
3823
|
74
|
|
|
|
|
149
|
return; |
3824
|
|
|
|
|
|
|
} ## end sub do_LOGICAL_OR |
3825
|
|
|
|
|
|
|
|
3826
|
|
|
|
|
|
|
sub do_SLASH_SLASH { |
3827
|
|
|
|
|
|
|
|
3828
|
10
|
|
|
10
|
0
|
22
|
my $self = shift; |
3829
|
|
|
|
|
|
|
|
3830
|
|
|
|
|
|
|
# '//' |
3831
|
10
|
100
|
|
|
|
35
|
$self->error_if_expecting_TERM() |
3832
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
3833
|
10
|
|
|
|
|
17
|
return; |
3834
|
|
|
|
|
|
|
} ## end sub do_SLASH_SLASH |
3835
|
|
|
|
|
|
|
|
3836
|
|
|
|
|
|
|
sub do_DIGITS { |
3837
|
|
|
|
|
|
|
|
3838
|
1929
|
|
|
1929
|
0
|
3241
|
my $self = shift; |
3839
|
|
|
|
|
|
|
|
3840
|
|
|
|
|
|
|
# 'd' = string of digits |
3841
|
1929
|
50
|
|
|
|
4362
|
$self->error_if_expecting_OPERATOR("Number") |
3842
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3843
|
|
|
|
|
|
|
|
3844
|
1929
|
|
|
|
|
4889
|
my $number = $self->scan_number_fast(); |
3845
|
1929
|
50
|
|
|
|
4792
|
if ( !defined($number) ) { |
3846
|
|
|
|
|
|
|
|
3847
|
|
|
|
|
|
|
# shouldn't happen - we should always get a number |
3848
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
3849
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
3850
|
|
|
|
|
|
|
non-number beginning with digit--program bug |
3851
|
|
|
|
|
|
|
EOM |
3852
|
|
|
|
|
|
|
} |
3853
|
|
|
|
|
|
|
$self->warning( |
3854
|
0
|
|
|
|
|
0
|
"Unexpected error condition: non-number beginning with digit\n" |
3855
|
|
|
|
|
|
|
); |
3856
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
3857
|
|
|
|
|
|
|
} |
3858
|
1929
|
|
|
|
|
4636
|
return; |
3859
|
|
|
|
|
|
|
} ## end sub do_DIGITS |
3860
|
|
|
|
|
|
|
|
3861
|
|
|
|
|
|
|
sub do_ATTRIBUTE_LIST { |
3862
|
|
|
|
|
|
|
|
3863
|
39
|
|
|
39
|
0
|
109
|
my ( $self, $next_nonblank_token ) = @_; |
3864
|
|
|
|
|
|
|
|
3865
|
|
|
|
|
|
|
# Called at a bareword encountered while in an attribute list |
3866
|
|
|
|
|
|
|
# returns 'is_attribute': |
3867
|
|
|
|
|
|
|
# true if attribute found |
3868
|
|
|
|
|
|
|
# false if an attribute (continue parsing bareword) |
3869
|
|
|
|
|
|
|
|
3870
|
|
|
|
|
|
|
# treat bare word followed by open paren like qw( |
3871
|
39
|
100
|
|
|
|
130
|
if ( $next_nonblank_token eq '(' ) { |
3872
|
|
|
|
|
|
|
|
3873
|
|
|
|
|
|
|
# For something like: |
3874
|
|
|
|
|
|
|
# : prototype($$) |
3875
|
|
|
|
|
|
|
# we should let do_scan_sub see it so that it can see |
3876
|
|
|
|
|
|
|
# the prototype. All other attributes get parsed as a |
3877
|
|
|
|
|
|
|
# quoted string. |
3878
|
18
|
100
|
|
|
|
62
|
if ( $tok eq 'prototype' ) { |
3879
|
2
|
|
|
|
|
7
|
$id_scan_state = 'prototype'; |
3880
|
|
|
|
|
|
|
|
3881
|
|
|
|
|
|
|
# start just after the word 'prototype' |
3882
|
2
|
|
|
|
|
5
|
my $i_beg = $i + 1; |
3883
|
2
|
|
|
|
|
21
|
( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub( |
3884
|
|
|
|
|
|
|
{ |
3885
|
|
|
|
|
|
|
input_line => $input_line, |
3886
|
|
|
|
|
|
|
i => $i, |
3887
|
|
|
|
|
|
|
i_beg => $i_beg, |
3888
|
|
|
|
|
|
|
tok => $tok, |
3889
|
|
|
|
|
|
|
type => $type, |
3890
|
|
|
|
|
|
|
rtokens => $rtokens, |
3891
|
|
|
|
|
|
|
rtoken_map => $rtoken_map, |
3892
|
|
|
|
|
|
|
id_scan_state => $id_scan_state, |
3893
|
|
|
|
|
|
|
max_token_index => $max_token_index, |
3894
|
|
|
|
|
|
|
} |
3895
|
|
|
|
|
|
|
); |
3896
|
|
|
|
|
|
|
|
3897
|
|
|
|
|
|
|
# If successful, mark as type 'q' to be consistent |
3898
|
|
|
|
|
|
|
# with other attributes. Type 'w' would also work. |
3899
|
2
|
50
|
|
|
|
20
|
if ( $i > $i_beg ) { |
3900
|
2
|
|
|
|
|
5
|
$type = 'q'; |
3901
|
2
|
|
|
|
|
8
|
return 1; |
3902
|
|
|
|
|
|
|
} |
3903
|
|
|
|
|
|
|
|
3904
|
|
|
|
|
|
|
# If not successful, continue and parse as a quote. |
3905
|
|
|
|
|
|
|
} |
3906
|
|
|
|
|
|
|
|
3907
|
|
|
|
|
|
|
# All other attribute lists must be parsed as quotes |
3908
|
|
|
|
|
|
|
# (see 'signatures.t' for good examples) |
3909
|
16
|
|
|
|
|
50
|
$in_quote = $quote_items{'q'}; |
3910
|
16
|
|
|
|
|
43
|
$allowed_quote_modifiers = $quote_modifiers{'q'}; |
3911
|
16
|
|
|
|
|
32
|
$type = 'q'; |
3912
|
16
|
|
|
|
|
31
|
$quote_type = 'q'; |
3913
|
16
|
|
|
|
|
36
|
return 1; |
3914
|
|
|
|
|
|
|
} |
3915
|
|
|
|
|
|
|
|
3916
|
|
|
|
|
|
|
# handle bareword not followed by open paren |
3917
|
|
|
|
|
|
|
else { |
3918
|
21
|
|
|
|
|
44
|
$type = 'w'; |
3919
|
21
|
|
|
|
|
52
|
return 1; |
3920
|
|
|
|
|
|
|
} |
3921
|
|
|
|
|
|
|
|
3922
|
|
|
|
|
|
|
# attribute not found |
3923
|
0
|
|
|
|
|
0
|
return; |
3924
|
|
|
|
|
|
|
} ## end sub do_ATTRIBUTE_LIST |
3925
|
|
|
|
|
|
|
|
3926
|
|
|
|
|
|
|
sub do_QUOTED_BAREWORD { |
3927
|
|
|
|
|
|
|
|
3928
|
786
|
|
|
786
|
0
|
1424
|
my $self = shift; |
3929
|
|
|
|
|
|
|
|
3930
|
|
|
|
|
|
|
# find type of a bareword followed by a '=>' |
3931
|
786
|
100
|
|
|
|
4006
|
if ( $ris_constant->{$current_package}{$tok} ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
3932
|
14
|
|
|
|
|
34
|
$type = 'C'; |
3933
|
|
|
|
|
|
|
} |
3934
|
|
|
|
|
|
|
elsif ( $ris_user_function->{$current_package}{$tok} ) { |
3935
|
0
|
|
|
|
|
0
|
$type = 'U'; |
3936
|
0
|
|
|
|
|
0
|
$prototype = $ruser_function_prototype->{$current_package}{$tok}; |
3937
|
|
|
|
|
|
|
} |
3938
|
|
|
|
|
|
|
elsif ( $tok =~ /^v\d+$/ ) { |
3939
|
0
|
|
|
|
|
0
|
$type = 'v'; |
3940
|
0
|
|
|
|
|
0
|
$self->report_v_string($tok); |
3941
|
|
|
|
|
|
|
} |
3942
|
|
|
|
|
|
|
else { |
3943
|
|
|
|
|
|
|
|
3944
|
|
|
|
|
|
|
# Bareword followed by a fat comma - see 'git18.in' |
3945
|
|
|
|
|
|
|
# If tok is something like 'x17' then it could |
3946
|
|
|
|
|
|
|
# actually be operator x followed by number 17. |
3947
|
|
|
|
|
|
|
# For example, here: |
3948
|
|
|
|
|
|
|
# 123x17 => [ 792, 1224 ], |
3949
|
|
|
|
|
|
|
# (a key of 123 repeated 17 times, perhaps not |
3950
|
|
|
|
|
|
|
# what was intended). We will mark x17 as type |
3951
|
|
|
|
|
|
|
# 'n' and it will be split. If the previous token |
3952
|
|
|
|
|
|
|
# was also a bareword then it is not very clear is |
3953
|
|
|
|
|
|
|
# going on. In this case we will not be sure that |
3954
|
|
|
|
|
|
|
# an operator is expected, so we just mark it as a |
3955
|
|
|
|
|
|
|
# bareword. Perl is a little murky in what it does |
3956
|
|
|
|
|
|
|
# with stuff like this, and its behavior can change |
3957
|
|
|
|
|
|
|
# over time. Something like |
3958
|
|
|
|
|
|
|
# a x18 => [792, 1224], will compile as |
3959
|
|
|
|
|
|
|
# a key with 18 a's. But something like |
3960
|
|
|
|
|
|
|
# push @array, a x18; |
3961
|
|
|
|
|
|
|
# is a syntax error. |
3962
|
772
|
100
|
66
|
|
|
2739
|
if ( |
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
3963
|
|
|
|
|
|
|
$expecting == OPERATOR |
3964
|
|
|
|
|
|
|
&& substr( $tok, 0, 1 ) eq 'x' |
3965
|
|
|
|
|
|
|
&& ( length($tok) == 1 |
3966
|
|
|
|
|
|
|
|| substr( $tok, 1, 1 ) =~ /^\d/ ) |
3967
|
|
|
|
|
|
|
) |
3968
|
|
|
|
|
|
|
{ |
3969
|
3
|
|
|
|
|
5
|
$type = 'n'; |
3970
|
3
|
50
|
|
|
|
12
|
if ( $self->split_pretoken(1) ) { |
3971
|
3
|
|
|
|
|
5
|
$type = 'x'; |
3972
|
3
|
|
|
|
|
7
|
$tok = 'x'; |
3973
|
|
|
|
|
|
|
} |
3974
|
|
|
|
|
|
|
} |
3975
|
|
|
|
|
|
|
else { |
3976
|
|
|
|
|
|
|
|
3977
|
|
|
|
|
|
|
# git #18 |
3978
|
769
|
|
|
|
|
1331
|
$type = 'w'; |
3979
|
769
|
|
|
|
|
1981
|
$self->error_if_expecting_OPERATOR(); |
3980
|
|
|
|
|
|
|
} |
3981
|
|
|
|
|
|
|
} |
3982
|
786
|
|
|
|
|
1257
|
return; |
3983
|
|
|
|
|
|
|
} ## end sub do_QUOTED_BAREWORD |
3984
|
|
|
|
|
|
|
|
3985
|
|
|
|
|
|
|
sub do_X_OPERATOR { |
3986
|
|
|
|
|
|
|
|
3987
|
17
|
|
|
17
|
0
|
46
|
my $self = shift; |
3988
|
|
|
|
|
|
|
|
3989
|
17
|
100
|
|
|
|
64
|
if ( $tok eq 'x' ) { |
3990
|
15
|
50
|
|
|
|
80
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { # x= |
3991
|
0
|
|
|
|
|
0
|
$tok = 'x='; |
3992
|
0
|
|
|
|
|
0
|
$type = $tok; |
3993
|
0
|
|
|
|
|
0
|
$i++; |
3994
|
|
|
|
|
|
|
} |
3995
|
|
|
|
|
|
|
else { |
3996
|
15
|
|
|
|
|
39
|
$type = 'x'; |
3997
|
|
|
|
|
|
|
} |
3998
|
|
|
|
|
|
|
} |
3999
|
|
|
|
|
|
|
else { |
4000
|
|
|
|
|
|
|
|
4001
|
|
|
|
|
|
|
# Split a pretoken like 'x10' into 'x' and '10'. |
4002
|
|
|
|
|
|
|
# Note: In previous versions of perltidy it was marked |
4003
|
|
|
|
|
|
|
# as a number, $type = 'n', and fixed downstream by the |
4004
|
|
|
|
|
|
|
# Formatter. |
4005
|
2
|
|
|
|
|
6
|
$type = 'n'; |
4006
|
2
|
50
|
|
|
|
6
|
if ( $self->split_pretoken(1) ) { |
4007
|
2
|
|
|
|
|
5
|
$type = 'x'; |
4008
|
2
|
|
|
|
|
4
|
$tok = 'x'; |
4009
|
|
|
|
|
|
|
} |
4010
|
|
|
|
|
|
|
} |
4011
|
17
|
|
|
|
|
32
|
return; |
4012
|
|
|
|
|
|
|
} ## end sub do_X_OPERATOR |
4013
|
|
|
|
|
|
|
|
4014
|
|
|
|
|
|
|
sub do_USE_CONSTANT { |
4015
|
|
|
|
|
|
|
|
4016
|
16
|
|
|
16
|
0
|
36
|
my $self = shift; |
4017
|
|
|
|
|
|
|
|
4018
|
16
|
|
|
|
|
61
|
$self->scan_bare_identifier(); |
4019
|
16
|
|
|
|
|
92
|
my ( $next_nonblank_tok2, $i_next2 ) = |
4020
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
4021
|
|
|
|
|
|
|
|
4022
|
16
|
50
|
|
|
|
74
|
if ($next_nonblank_tok2) { |
4023
|
|
|
|
|
|
|
|
4024
|
16
|
100
|
|
|
|
98
|
if ( $is_keyword{$next_nonblank_tok2} ) { |
4025
|
|
|
|
|
|
|
|
4026
|
|
|
|
|
|
|
# Assume qw is used as a quote and okay, as in: |
4027
|
|
|
|
|
|
|
# use constant qw{ DEBUG 0 }; |
4028
|
|
|
|
|
|
|
# Not worth trying to parse for just a warning |
4029
|
|
|
|
|
|
|
|
4030
|
|
|
|
|
|
|
# NOTE: This warning is deactivated because recent |
4031
|
|
|
|
|
|
|
# versions of perl do not complain here, but |
4032
|
|
|
|
|
|
|
# the coding is retained for reference. |
4033
|
1
|
|
|
|
|
2
|
if ( 0 && $next_nonblank_tok2 ne 'qw' ) { |
4034
|
|
|
|
|
|
|
$self->warning( |
4035
|
|
|
|
|
|
|
"Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n" |
4036
|
|
|
|
|
|
|
); |
4037
|
|
|
|
|
|
|
} |
4038
|
|
|
|
|
|
|
} |
4039
|
|
|
|
|
|
|
|
4040
|
|
|
|
|
|
|
else { |
4041
|
15
|
|
|
|
|
54
|
$ris_constant->{$current_package}{$next_nonblank_tok2} = 1; |
4042
|
|
|
|
|
|
|
} |
4043
|
|
|
|
|
|
|
} |
4044
|
16
|
|
|
|
|
65
|
return; |
4045
|
|
|
|
|
|
|
} ## end sub do_USE_CONSTANT |
4046
|
|
|
|
|
|
|
|
4047
|
|
|
|
|
|
|
sub do_KEYWORD { |
4048
|
|
|
|
|
|
|
|
4049
|
2636
|
|
|
2636
|
0
|
4855
|
my $self = shift; |
4050
|
|
|
|
|
|
|
|
4051
|
|
|
|
|
|
|
# found a keyword - set any associated flags |
4052
|
2636
|
|
|
|
|
4500
|
$type = 'k'; |
4053
|
|
|
|
|
|
|
|
4054
|
|
|
|
|
|
|
# Since for and foreach may not be followed immediately |
4055
|
|
|
|
|
|
|
# by an opening paren, we have to remember which keyword |
4056
|
|
|
|
|
|
|
# is associated with the next '(' |
4057
|
2636
|
100
|
100
|
|
|
20838
|
if ( $is_for_foreach{$tok} ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
4058
|
74
|
100
|
|
|
|
328
|
if ( new_statement_ok() ) { |
4059
|
72
|
|
|
|
|
243
|
$want_paren = $tok; |
4060
|
|
|
|
|
|
|
} |
4061
|
|
|
|
|
|
|
} |
4062
|
|
|
|
|
|
|
|
4063
|
|
|
|
|
|
|
# recognize 'use' statements, which are special |
4064
|
|
|
|
|
|
|
elsif ( $is_use_require{$tok} ) { |
4065
|
175
|
|
|
|
|
419
|
$statement_type = $tok; |
4066
|
175
|
50
|
|
|
|
529
|
$self->error_if_expecting_OPERATOR() |
4067
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4068
|
|
|
|
|
|
|
} |
4069
|
|
|
|
|
|
|
|
4070
|
|
|
|
|
|
|
# remember my and our to check for trailing ": shared" |
4071
|
|
|
|
|
|
|
elsif ( $is_my_our_state{$tok} ) { |
4072
|
628
|
|
|
|
|
1404
|
$statement_type = $tok; |
4073
|
|
|
|
|
|
|
} |
4074
|
|
|
|
|
|
|
|
4075
|
|
|
|
|
|
|
# Check for misplaced 'elsif' and 'else', but allow isolated |
4076
|
|
|
|
|
|
|
# else or elsif blocks to be formatted. This is indicated |
4077
|
|
|
|
|
|
|
# by a last noblank token of ';' |
4078
|
|
|
|
|
|
|
elsif ( $tok eq 'elsif' ) { |
4079
|
27
|
50
|
66
|
|
|
211
|
if ( |
4080
|
|
|
|
|
|
|
$last_nonblank_token ne ';' |
4081
|
|
|
|
|
|
|
|
4082
|
|
|
|
|
|
|
## !~ /^(if|elsif|unless)$/ |
4083
|
|
|
|
|
|
|
&& !$is_if_elsif_unless{$last_nonblank_block_type} |
4084
|
|
|
|
|
|
|
) |
4085
|
|
|
|
|
|
|
{ |
4086
|
0
|
|
|
|
|
0
|
$self->warning( |
4087
|
|
|
|
|
|
|
"expecting '$tok' to follow one of 'if|elsif|unless'\n"); |
4088
|
|
|
|
|
|
|
} |
4089
|
|
|
|
|
|
|
} |
4090
|
|
|
|
|
|
|
elsif ( $tok eq 'else' ) { |
4091
|
|
|
|
|
|
|
|
4092
|
|
|
|
|
|
|
# patched for SWITCH/CASE |
4093
|
44
|
50
|
66
|
|
|
535
|
if ( |
|
|
|
66
|
|
|
|
|
4094
|
|
|
|
|
|
|
$last_nonblank_token ne ';' |
4095
|
|
|
|
|
|
|
|
4096
|
|
|
|
|
|
|
## !~ /^(if|elsif|unless|case|when)$/ |
4097
|
|
|
|
|
|
|
&& !$is_if_elsif_unless_case_when{$last_nonblank_block_type} |
4098
|
|
|
|
|
|
|
|
4099
|
|
|
|
|
|
|
# patch to avoid an unwanted error message for |
4100
|
|
|
|
|
|
|
# the case of a parenless 'case' (RT 105484): |
4101
|
|
|
|
|
|
|
# switch ( 1 ) { case x { 2 } else { } } |
4102
|
|
|
|
|
|
|
## !~ /^(if|elsif|unless|case|when)$/ |
4103
|
|
|
|
|
|
|
&& !$is_if_elsif_unless_case_when{$statement_type} |
4104
|
|
|
|
|
|
|
) |
4105
|
|
|
|
|
|
|
{ |
4106
|
0
|
|
|
|
|
0
|
$self->warning( |
4107
|
|
|
|
|
|
|
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" |
4108
|
|
|
|
|
|
|
); |
4109
|
|
|
|
|
|
|
} |
4110
|
|
|
|
|
|
|
} |
4111
|
|
|
|
|
|
|
|
4112
|
|
|
|
|
|
|
# patch for SWITCH/CASE if 'case' and 'when are |
4113
|
|
|
|
|
|
|
# treated as keywords. Also 'default' for Switch::Plain |
4114
|
|
|
|
|
|
|
elsif ($tok eq 'when' |
4115
|
|
|
|
|
|
|
|| $tok eq 'case' |
4116
|
|
|
|
|
|
|
|| $tok eq 'default' ) |
4117
|
|
|
|
|
|
|
{ |
4118
|
56
|
|
|
|
|
129
|
$statement_type = $tok; # next '{' is block |
4119
|
|
|
|
|
|
|
} |
4120
|
|
|
|
|
|
|
|
4121
|
|
|
|
|
|
|
# feature 'err' was removed in Perl 5.10. So mark this as |
4122
|
|
|
|
|
|
|
# a bareword unless an operator is expected (see c158). |
4123
|
|
|
|
|
|
|
elsif ( $tok eq 'err' ) { |
4124
|
1
|
50
|
|
|
|
9
|
if ( $expecting != OPERATOR ) { $type = 'w' } |
|
1
|
|
|
|
|
2
|
|
4125
|
|
|
|
|
|
|
} |
4126
|
|
|
|
|
|
|
|
4127
|
2636
|
|
|
|
|
4711
|
return; |
4128
|
|
|
|
|
|
|
} ## end sub do_KEYWORD |
4129
|
|
|
|
|
|
|
|
4130
|
|
|
|
|
|
|
sub do_QUOTE_OPERATOR { |
4131
|
|
|
|
|
|
|
|
4132
|
202
|
|
|
202
|
0
|
457
|
my $self = shift; |
4133
|
|
|
|
|
|
|
|
4134
|
202
|
50
|
|
|
|
638
|
if ( $expecting == OPERATOR ) { |
4135
|
|
|
|
|
|
|
|
4136
|
|
|
|
|
|
|
# Be careful not to call an error for a qw quote |
4137
|
|
|
|
|
|
|
# where a parenthesized list is allowed. For example, |
4138
|
|
|
|
|
|
|
# it could also be a for/foreach construct such as |
4139
|
|
|
|
|
|
|
# |
4140
|
|
|
|
|
|
|
# foreach my $key qw\Uno Due Tres Quadro\ { |
4141
|
|
|
|
|
|
|
# print "Set $key\n"; |
4142
|
|
|
|
|
|
|
# } |
4143
|
|
|
|
|
|
|
# |
4144
|
|
|
|
|
|
|
|
4145
|
|
|
|
|
|
|
# Or it could be a function call. |
4146
|
|
|
|
|
|
|
# NOTE: Braces in something like &{ xxx } are not |
4147
|
|
|
|
|
|
|
# marked as a block, we might have a method call. |
4148
|
|
|
|
|
|
|
# &method(...), $method->(..), &{method}(...), |
4149
|
|
|
|
|
|
|
# $ref[2](list) is ok & short for $ref[2]->(list) |
4150
|
|
|
|
|
|
|
# |
4151
|
|
|
|
|
|
|
# See notes in 'sub code_block_type' and |
4152
|
|
|
|
|
|
|
# 'sub is_non_structural_brace' |
4153
|
|
|
|
|
|
|
|
4154
|
0
|
0
|
0
|
|
|
0
|
unless ( |
|
|
|
0
|
|
|
|
|
4155
|
|
|
|
|
|
|
$tok eq 'qw' |
4156
|
|
|
|
|
|
|
&& ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ |
4157
|
|
|
|
|
|
|
|| $is_for_foreach{$want_paren} ) |
4158
|
|
|
|
|
|
|
) |
4159
|
|
|
|
|
|
|
{ |
4160
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR(); |
4161
|
|
|
|
|
|
|
} |
4162
|
|
|
|
|
|
|
} |
4163
|
202
|
|
|
|
|
556
|
$in_quote = $quote_items{$tok}; |
4164
|
202
|
|
|
|
|
554
|
$allowed_quote_modifiers = $quote_modifiers{$tok}; |
4165
|
|
|
|
|
|
|
|
4166
|
|
|
|
|
|
|
# All quote types are 'Q' except possibly qw quotes. |
4167
|
|
|
|
|
|
|
# qw quotes are special in that they may generally be trimmed |
4168
|
|
|
|
|
|
|
# of leading and trailing whitespace. So they are given a |
4169
|
|
|
|
|
|
|
# separate type, 'q', unless requested otherwise. |
4170
|
202
|
100
|
66
|
|
|
987
|
$type = |
4171
|
|
|
|
|
|
|
( $tok eq 'qw' && $self->[_trim_qw_] ) |
4172
|
|
|
|
|
|
|
? 'q' |
4173
|
|
|
|
|
|
|
: 'Q'; |
4174
|
202
|
|
|
|
|
380
|
$quote_type = $type; |
4175
|
202
|
|
|
|
|
374
|
return; |
4176
|
|
|
|
|
|
|
} ## end sub do_QUOTE_OPERATOR |
4177
|
|
|
|
|
|
|
|
4178
|
|
|
|
|
|
|
sub do_UNKNOWN_BAREWORD { |
4179
|
|
|
|
|
|
|
|
4180
|
957
|
|
|
957
|
0
|
3120
|
my ( $self, $next_nonblank_token ) = @_; |
4181
|
|
|
|
|
|
|
|
4182
|
957
|
|
|
|
|
3149
|
$self->scan_bare_identifier(); |
4183
|
|
|
|
|
|
|
|
4184
|
957
|
100
|
100
|
|
|
3494
|
if ( $statement_type eq 'use' |
4185
|
|
|
|
|
|
|
&& $last_nonblank_token eq 'use' ) |
4186
|
|
|
|
|
|
|
{ |
4187
|
108
|
|
|
|
|
422
|
$rsaw_use_module->{$current_package}->{$tok} = 1; |
4188
|
|
|
|
|
|
|
} |
4189
|
|
|
|
|
|
|
|
4190
|
957
|
100
|
|
|
|
2602
|
if ( $type eq 'w' ) { |
4191
|
|
|
|
|
|
|
|
4192
|
932
|
50
|
|
|
|
2428
|
if ( $expecting == OPERATOR ) { |
4193
|
|
|
|
|
|
|
|
4194
|
|
|
|
|
|
|
# Patch to avoid error message for RPerl overloaded |
4195
|
|
|
|
|
|
|
# operator functions: use overload |
4196
|
|
|
|
|
|
|
# '+' => \&sse_add, |
4197
|
|
|
|
|
|
|
# '-' => \&sse_sub, |
4198
|
|
|
|
|
|
|
# '*' => \&sse_mul, |
4199
|
|
|
|
|
|
|
# '/' => \&sse_div; |
4200
|
|
|
|
|
|
|
# TODO: this could eventually be generalized |
4201
|
0
|
0
|
0
|
|
|
0
|
if ( $rsaw_use_module->{$current_package}->{'RPerl'} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4202
|
|
|
|
|
|
|
&& $tok =~ /^sse_(mul|div|add|sub)$/ ) |
4203
|
|
|
|
|
|
|
{ |
4204
|
|
|
|
|
|
|
|
4205
|
|
|
|
|
|
|
} |
4206
|
|
|
|
|
|
|
|
4207
|
|
|
|
|
|
|
# Fix part 1 for git #63 in which a comment falls |
4208
|
|
|
|
|
|
|
# between an -> and the following word. An |
4209
|
|
|
|
|
|
|
# alternate fix would be to change operator_expected |
4210
|
|
|
|
|
|
|
# to return an UNKNOWN for this type. |
4211
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq '->' ) { |
4212
|
|
|
|
|
|
|
|
4213
|
|
|
|
|
|
|
} |
4214
|
|
|
|
|
|
|
|
4215
|
|
|
|
|
|
|
# don't complain about possible indirect object |
4216
|
|
|
|
|
|
|
# notation. |
4217
|
|
|
|
|
|
|
# For example: |
4218
|
|
|
|
|
|
|
# package main; |
4219
|
|
|
|
|
|
|
# sub new($) { ... } |
4220
|
|
|
|
|
|
|
# $b = new A::; # calls A::new |
4221
|
|
|
|
|
|
|
# $c = new A; # same thing but suspicious |
4222
|
|
|
|
|
|
|
# This will call A::new but we have a 'new' in |
4223
|
|
|
|
|
|
|
# main:: which looks like a constant. |
4224
|
|
|
|
|
|
|
# |
4225
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'C' ) { |
4226
|
0
|
0
|
|
|
|
0
|
if ( $tok !~ /::$/ ) { |
4227
|
0
|
|
|
|
|
0
|
$self->complain(<<EOM); |
4228
|
|
|
|
|
|
|
Expecting operator after '$last_nonblank_token' but found bare word '$tok' |
4229
|
|
|
|
|
|
|
Maybe indirectet object notation? |
4230
|
|
|
|
|
|
|
EOM |
4231
|
|
|
|
|
|
|
} |
4232
|
|
|
|
|
|
|
} |
4233
|
|
|
|
|
|
|
else { |
4234
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR("bareword"); |
4235
|
|
|
|
|
|
|
} |
4236
|
|
|
|
|
|
|
} |
4237
|
|
|
|
|
|
|
|
4238
|
|
|
|
|
|
|
# mark bare words immediately followed by a paren as |
4239
|
|
|
|
|
|
|
# functions |
4240
|
932
|
|
|
|
|
2242
|
$next_tok = $rtokens->[ $i + 1 ]; |
4241
|
932
|
100
|
|
|
|
2535
|
if ( $next_tok eq '(' ) { |
4242
|
|
|
|
|
|
|
|
4243
|
|
|
|
|
|
|
# Patch for issue c151, where we are processing a snippet and |
4244
|
|
|
|
|
|
|
# have not seen that SPACE is a constant. In this case 'x' is |
4245
|
|
|
|
|
|
|
# probably an operator. The only disadvantage with an incorrect |
4246
|
|
|
|
|
|
|
# guess is that the space after it may be incorrect. For example |
4247
|
|
|
|
|
|
|
# $str .= SPACE x ( 16 - length($str) ); See also b1410. |
4248
|
276
|
50
|
33
|
|
|
1583
|
if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' } |
|
0
|
50
|
|
|
|
0
|
|
4249
|
|
|
|
|
|
|
|
4250
|
|
|
|
|
|
|
# Fix part 2 for git #63. Leave type as 'w' to keep |
4251
|
|
|
|
|
|
|
# the type the same as if the -> were not separated |
4252
|
276
|
|
|
|
|
567
|
elsif ( $last_nonblank_type ne '->' ) { $type = 'U' } |
4253
|
|
|
|
|
|
|
|
4254
|
|
|
|
|
|
|
} |
4255
|
|
|
|
|
|
|
|
4256
|
|
|
|
|
|
|
# underscore after file test operator is file handle |
4257
|
932
|
50
|
66
|
|
|
2757
|
if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { |
4258
|
0
|
|
|
|
|
0
|
$type = 'Z'; |
4259
|
|
|
|
|
|
|
} |
4260
|
|
|
|
|
|
|
|
4261
|
|
|
|
|
|
|
# patch for SWITCH/CASE if 'case' and 'when are |
4262
|
|
|
|
|
|
|
# not treated as keywords: |
4263
|
932
|
50
|
33
|
|
|
4691
|
if ( |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
4264
|
|
|
|
|
|
|
( $tok eq 'case' && $rbrace_type->[$brace_depth] eq 'switch' ) |
4265
|
|
|
|
|
|
|
|| ( $tok eq 'when' |
4266
|
|
|
|
|
|
|
&& $rbrace_type->[$brace_depth] eq 'given' ) |
4267
|
|
|
|
|
|
|
) |
4268
|
|
|
|
|
|
|
{ |
4269
|
0
|
|
|
|
|
0
|
$statement_type = $tok; # next '{' is block |
4270
|
0
|
|
|
|
|
0
|
$type = 'k'; # for keyword syntax coloring |
4271
|
|
|
|
|
|
|
} |
4272
|
932
|
100
|
|
|
|
2334
|
if ( $next_nonblank_token eq '(' ) { |
4273
|
|
|
|
|
|
|
|
4274
|
|
|
|
|
|
|
# patch for SWITCH/CASE if switch and given not keywords |
4275
|
|
|
|
|
|
|
# Switch is not a perl 5 keyword, but we will gamble |
4276
|
|
|
|
|
|
|
# and mark switch followed by paren as a keyword. This |
4277
|
|
|
|
|
|
|
# is only necessary to get html syntax coloring nice, |
4278
|
|
|
|
|
|
|
# and does not commit this as being a switch/case. |
4279
|
241
|
50
|
33
|
|
|
1941
|
if ( $tok eq 'switch' || $tok eq 'given' ) { |
|
|
50
|
33
|
|
|
|
|
4280
|
0
|
|
|
|
|
0
|
$type = 'k'; # for keyword syntax coloring |
4281
|
|
|
|
|
|
|
} |
4282
|
|
|
|
|
|
|
|
4283
|
|
|
|
|
|
|
# mark 'x' as operator for something like this (see b1410) |
4284
|
|
|
|
|
|
|
# my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths ); |
4285
|
|
|
|
|
|
|
elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { |
4286
|
0
|
|
|
|
|
0
|
$type = 'x'; |
4287
|
|
|
|
|
|
|
} |
4288
|
|
|
|
|
|
|
} |
4289
|
|
|
|
|
|
|
} |
4290
|
957
|
|
|
|
|
1764
|
return; |
4291
|
|
|
|
|
|
|
} ## end sub do_UNKNOWN_BAREWORD |
4292
|
|
|
|
|
|
|
|
4293
|
|
|
|
|
|
|
sub sub_attribute_ok_here { |
4294
|
|
|
|
|
|
|
|
4295
|
35
|
|
|
35
|
0
|
163
|
my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_; |
4296
|
|
|
|
|
|
|
|
4297
|
|
|
|
|
|
|
# Decide if 'sub :' can be the start of a sub attribute list. |
4298
|
|
|
|
|
|
|
# We will decide based on if the colon is followed by a |
4299
|
|
|
|
|
|
|
# bareword which is not a keyword. |
4300
|
|
|
|
|
|
|
# Changed inext+1 to inext to fixed case b1190. |
4301
|
35
|
|
|
|
|
65
|
my $sub_attribute_ok_here; |
4302
|
35
|
50
|
66
|
|
|
161
|
if ( $is_sub{$tok_kw} |
|
|
|
66
|
|
|
|
|
4303
|
|
|
|
|
|
|
&& $expecting != OPERATOR |
4304
|
|
|
|
|
|
|
&& $next_nonblank_token eq ':' ) |
4305
|
|
|
|
|
|
|
{ |
4306
|
3
|
|
|
|
|
12
|
my ( $nn_nonblank_token, $i_nn ) = |
4307
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i_next, $rtokens, |
4308
|
|
|
|
|
|
|
$max_token_index ); |
4309
|
|
|
|
|
|
|
$sub_attribute_ok_here = |
4310
|
|
|
|
|
|
|
$nn_nonblank_token =~ /^\w/ |
4311
|
|
|
|
|
|
|
&& $nn_nonblank_token !~ /^\d/ |
4312
|
3
|
|
66
|
|
|
29
|
&& !$is_keyword{$nn_nonblank_token}; |
4313
|
|
|
|
|
|
|
} |
4314
|
35
|
|
|
|
|
300
|
return $sub_attribute_ok_here; |
4315
|
|
|
|
|
|
|
} ## end sub sub_attribute_ok_here |
4316
|
|
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
sub do_BAREWORD { |
4318
|
|
|
|
|
|
|
|
4319
|
5826
|
|
|
5826
|
0
|
11547
|
my ( $self, $is_END_or_DATA ) = @_; |
4320
|
|
|
|
|
|
|
|
4321
|
|
|
|
|
|
|
# handle a bareword token: |
4322
|
|
|
|
|
|
|
# returns |
4323
|
|
|
|
|
|
|
# true if this token ends the current line |
4324
|
|
|
|
|
|
|
# false otherwise |
4325
|
|
|
|
|
|
|
|
4326
|
5826
|
|
|
|
|
16287
|
my ( $next_nonblank_token, $i_next ) = |
4327
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
4328
|
|
|
|
|
|
|
|
4329
|
|
|
|
|
|
|
# a bare word immediately followed by :: is not a keyword; |
4330
|
|
|
|
|
|
|
# use $tok_kw when testing for keywords to avoid a mistake |
4331
|
5826
|
|
|
|
|
11407
|
my $tok_kw = $tok; |
4332
|
5826
|
100
|
100
|
|
|
16815
|
if ( $rtokens->[ $i + 1 ] eq ':' |
4333
|
|
|
|
|
|
|
&& $rtokens->[ $i + 2 ] eq ':' ) |
4334
|
|
|
|
|
|
|
{ |
4335
|
266
|
|
|
|
|
613
|
$tok_kw .= '::'; |
4336
|
|
|
|
|
|
|
} |
4337
|
|
|
|
|
|
|
|
4338
|
5826
|
100
|
|
|
|
13224
|
if ( $self->[_in_attribute_list_] ) { |
4339
|
39
|
|
|
|
|
187
|
my $is_attribute = $self->do_ATTRIBUTE_LIST($next_nonblank_token); |
4340
|
39
|
50
|
|
|
|
119
|
return if ($is_attribute); |
4341
|
|
|
|
|
|
|
} |
4342
|
|
|
|
|
|
|
|
4343
|
|
|
|
|
|
|
#---------------------------------------- |
4344
|
|
|
|
|
|
|
# Starting final if-elsif- chain of tests |
4345
|
|
|
|
|
|
|
#---------------------------------------- |
4346
|
|
|
|
|
|
|
|
4347
|
|
|
|
|
|
|
# This is the return flag: |
4348
|
|
|
|
|
|
|
# true => this is the last token on the line |
4349
|
|
|
|
|
|
|
# false => keep tokenizing the line |
4350
|
5787
|
|
|
|
|
8778
|
my $is_last; |
4351
|
|
|
|
|
|
|
|
4352
|
|
|
|
|
|
|
# The following blocks of code must update these vars: |
4353
|
|
|
|
|
|
|
# $type - the final token type, must always be set |
4354
|
|
|
|
|
|
|
|
4355
|
|
|
|
|
|
|
# In addition, if additional pretokens are added: |
4356
|
|
|
|
|
|
|
# $tok - the final token |
4357
|
|
|
|
|
|
|
# $i - the index of the last pretoken |
4358
|
|
|
|
|
|
|
|
4359
|
|
|
|
|
|
|
# They may also need to check and set various flags |
4360
|
|
|
|
|
|
|
|
4361
|
|
|
|
|
|
|
# Scan a bare word following a -> as an identifier; it could |
4362
|
|
|
|
|
|
|
# have a long package name. Fixes c037, c041. |
4363
|
5787
|
100
|
100
|
|
|
93750
|
if ( $last_nonblank_token eq '->' ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
100
|
|
|
|
|
|
|
50
|
100
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
4364
|
669
|
|
|
|
|
2362
|
$self->scan_bare_identifier(); |
4365
|
|
|
|
|
|
|
|
4366
|
|
|
|
|
|
|
# a bareward after '->' gets type 'i' |
4367
|
669
|
|
|
|
|
1240
|
$type = 'i'; |
4368
|
|
|
|
|
|
|
} |
4369
|
|
|
|
|
|
|
|
4370
|
|
|
|
|
|
|
# Quote a word followed by => operator |
4371
|
|
|
|
|
|
|
# unless the word __END__ or __DATA__ and the only word on |
4372
|
|
|
|
|
|
|
# the line. |
4373
|
|
|
|
|
|
|
elsif ( !$is_END_or_DATA |
4374
|
|
|
|
|
|
|
&& $next_nonblank_token eq '=' |
4375
|
|
|
|
|
|
|
&& $rtokens->[ $i_next + 1 ] eq '>' ) |
4376
|
|
|
|
|
|
|
{ |
4377
|
786
|
|
|
|
|
2366
|
$self->do_QUOTED_BAREWORD(); |
4378
|
|
|
|
|
|
|
} |
4379
|
|
|
|
|
|
|
|
4380
|
|
|
|
|
|
|
# quote a bare word within braces..like xxx->{s}; note that we |
4381
|
|
|
|
|
|
|
# must be sure this is not a structural brace, to avoid |
4382
|
|
|
|
|
|
|
# mistaking {s} in the following for a quoted bare word: |
4383
|
|
|
|
|
|
|
# for(@[){s}bla}BLA} |
4384
|
|
|
|
|
|
|
# Also treat q in something like var{-q} as a bare word, not |
4385
|
|
|
|
|
|
|
# a quote operator |
4386
|
|
|
|
|
|
|
elsif ( |
4387
|
|
|
|
|
|
|
$next_nonblank_token eq '}' |
4388
|
|
|
|
|
|
|
&& ( |
4389
|
|
|
|
|
|
|
$last_nonblank_type eq 'L' |
4390
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'm' |
4391
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'L' ) |
4392
|
|
|
|
|
|
|
) |
4393
|
|
|
|
|
|
|
) |
4394
|
|
|
|
|
|
|
{ |
4395
|
100
|
|
|
|
|
242
|
$type = 'w'; |
4396
|
|
|
|
|
|
|
} |
4397
|
|
|
|
|
|
|
|
4398
|
|
|
|
|
|
|
# handle operator x (now we know it isn't $x=) |
4399
|
|
|
|
|
|
|
elsif ( |
4400
|
|
|
|
|
|
|
$expecting == OPERATOR |
4401
|
|
|
|
|
|
|
&& substr( $tok, 0, 1 ) eq 'x' |
4402
|
|
|
|
|
|
|
&& ( length($tok) == 1 |
4403
|
|
|
|
|
|
|
|| substr( $tok, 1, 1 ) =~ /^\d/ ) |
4404
|
|
|
|
|
|
|
) |
4405
|
|
|
|
|
|
|
{ |
4406
|
17
|
|
|
|
|
108
|
$self->do_X_OPERATOR(); |
4407
|
|
|
|
|
|
|
} |
4408
|
|
|
|
|
|
|
elsif ( $tok_kw eq 'CORE::' ) { |
4409
|
3
|
|
|
|
|
25
|
$type = $tok = $tok_kw; |
4410
|
3
|
|
|
|
|
6
|
$i += 2; |
4411
|
|
|
|
|
|
|
} |
4412
|
|
|
|
|
|
|
elsif ( ( $tok eq 'strict' ) |
4413
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
4414
|
|
|
|
|
|
|
{ |
4415
|
14
|
|
|
|
|
33
|
$self->[_saw_use_strict_] = 1; |
4416
|
14
|
|
|
|
|
73
|
$self->scan_bare_identifier(); |
4417
|
|
|
|
|
|
|
} |
4418
|
|
|
|
|
|
|
|
4419
|
|
|
|
|
|
|
elsif ( ( $tok eq 'warnings' ) |
4420
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
4421
|
|
|
|
|
|
|
{ |
4422
|
7
|
|
|
|
|
24
|
$self->[_saw_perl_dash_w_] = 1; |
4423
|
|
|
|
|
|
|
|
4424
|
|
|
|
|
|
|
# scan as identifier, so that we pick up something like: |
4425
|
|
|
|
|
|
|
# use warnings::register |
4426
|
7
|
|
|
|
|
27
|
$self->scan_bare_identifier(); |
4427
|
|
|
|
|
|
|
} |
4428
|
|
|
|
|
|
|
|
4429
|
|
|
|
|
|
|
elsif ( |
4430
|
|
|
|
|
|
|
$tok eq 'AutoLoader' |
4431
|
|
|
|
|
|
|
&& $self->[_look_for_autoloader_] |
4432
|
|
|
|
|
|
|
&& ( |
4433
|
|
|
|
|
|
|
$last_nonblank_token eq 'use' |
4434
|
|
|
|
|
|
|
|
4435
|
|
|
|
|
|
|
# these regexes are from AutoSplit.pm, which we want |
4436
|
|
|
|
|
|
|
# to mimic |
4437
|
|
|
|
|
|
|
|| $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ |
4438
|
|
|
|
|
|
|
|| $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ |
4439
|
|
|
|
|
|
|
) |
4440
|
|
|
|
|
|
|
) |
4441
|
|
|
|
|
|
|
{ |
4442
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); |
4443
|
0
|
|
|
|
|
0
|
$self->[_saw_autoloader_] = 1; |
4444
|
0
|
|
|
|
|
0
|
$self->[_look_for_autoloader_] = 0; |
4445
|
0
|
|
|
|
|
0
|
$self->scan_bare_identifier(); |
4446
|
|
|
|
|
|
|
} |
4447
|
|
|
|
|
|
|
|
4448
|
|
|
|
|
|
|
elsif ( |
4449
|
|
|
|
|
|
|
$tok eq 'SelfLoader' |
4450
|
|
|
|
|
|
|
&& $self->[_look_for_selfloader_] |
4451
|
|
|
|
|
|
|
&& ( $last_nonblank_token eq 'use' |
4452
|
|
|
|
|
|
|
|| $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ |
4453
|
|
|
|
|
|
|
|| $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) |
4454
|
|
|
|
|
|
|
) |
4455
|
|
|
|
|
|
|
{ |
4456
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); |
4457
|
0
|
|
|
|
|
0
|
$self->[_saw_selfloader_] = 1; |
4458
|
0
|
|
|
|
|
0
|
$self->[_look_for_selfloader_] = 0; |
4459
|
0
|
|
|
|
|
0
|
$self->scan_bare_identifier(); |
4460
|
|
|
|
|
|
|
} |
4461
|
|
|
|
|
|
|
|
4462
|
|
|
|
|
|
|
elsif ( ( $tok eq 'constant' ) |
4463
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
4464
|
|
|
|
|
|
|
{ |
4465
|
16
|
|
|
|
|
73
|
$self->do_USE_CONSTANT(); |
4466
|
|
|
|
|
|
|
} |
4467
|
|
|
|
|
|
|
|
4468
|
|
|
|
|
|
|
# various quote operators |
4469
|
|
|
|
|
|
|
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { |
4470
|
202
|
|
|
|
|
910
|
$self->do_QUOTE_OPERATOR(); |
4471
|
|
|
|
|
|
|
} |
4472
|
|
|
|
|
|
|
|
4473
|
|
|
|
|
|
|
# check for a statement label |
4474
|
|
|
|
|
|
|
elsif ( |
4475
|
|
|
|
|
|
|
( $next_nonblank_token eq ':' ) |
4476
|
|
|
|
|
|
|
&& ( $rtokens->[ $i_next + 1 ] ne ':' ) |
4477
|
|
|
|
|
|
|
&& ( $i_next <= $max_token_index ) # colon on same line |
4478
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
# like 'sub : lvalue' ? |
4480
|
|
|
|
|
|
|
&& !$self->sub_attribute_ok_here( $tok_kw, $next_nonblank_token, |
4481
|
|
|
|
|
|
|
$i_next ) |
4482
|
|
|
|
|
|
|
&& label_ok() |
4483
|
|
|
|
|
|
|
) |
4484
|
|
|
|
|
|
|
{ |
4485
|
33
|
100
|
|
|
|
205
|
if ( $tok !~ /[A-Z]/ ) { |
4486
|
15
|
|
|
|
|
35
|
push @{ $self->[_rlower_case_labels_at_] }, $input_line_number; |
|
15
|
|
|
|
|
52
|
|
4487
|
|
|
|
|
|
|
} |
4488
|
33
|
|
|
|
|
82
|
$type = 'J'; |
4489
|
33
|
|
|
|
|
87
|
$tok .= ':'; |
4490
|
33
|
|
|
|
|
67
|
$i = $i_next; |
4491
|
|
|
|
|
|
|
} |
4492
|
|
|
|
|
|
|
|
4493
|
|
|
|
|
|
|
# 'sub' or other sub alias |
4494
|
|
|
|
|
|
|
elsif ( $is_sub{$tok_kw} ) { |
4495
|
|
|
|
|
|
|
|
4496
|
|
|
|
|
|
|
# Update for --use-feature=class (rt145706): |
4497
|
|
|
|
|
|
|
# We have to be extra careful to avoid misparsing other uses of |
4498
|
|
|
|
|
|
|
# 'method' in older scripts. |
4499
|
301
|
100
|
|
|
|
1105
|
if ( $tok_kw eq 'method' ) { |
4500
|
12
|
100
|
66
|
|
|
132
|
if ( $expecting == OPERATOR |
|
|
|
100
|
|
|
|
|
4501
|
|
|
|
|
|
|
|| $next_nonblank_token !~ /^(\w|\:)/ |
4502
|
|
|
|
|
|
|
|| !$self->method_ok_here() ) |
4503
|
|
|
|
|
|
|
{ |
4504
|
7
|
|
|
|
|
24
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
4505
|
|
|
|
|
|
|
} |
4506
|
|
|
|
|
|
|
else { |
4507
|
5
|
|
|
|
|
23
|
initialize_subname(); |
4508
|
5
|
|
|
|
|
20
|
$self->scan_id(); |
4509
|
|
|
|
|
|
|
} |
4510
|
|
|
|
|
|
|
} |
4511
|
|
|
|
|
|
|
else { |
4512
|
289
|
50
|
|
|
|
886
|
$self->error_if_expecting_OPERATOR() |
4513
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4514
|
289
|
|
|
|
|
1067
|
initialize_subname(); |
4515
|
289
|
|
|
|
|
1111
|
$self->scan_id(); |
4516
|
|
|
|
|
|
|
} |
4517
|
|
|
|
|
|
|
} |
4518
|
|
|
|
|
|
|
|
4519
|
|
|
|
|
|
|
# 'package' |
4520
|
|
|
|
|
|
|
elsif ( $is_package{$tok_kw} ) { |
4521
|
|
|
|
|
|
|
|
4522
|
|
|
|
|
|
|
# Update for --use-feature=class (rt145706): |
4523
|
|
|
|
|
|
|
# We have to be extra careful because 'class' may be used for other |
4524
|
|
|
|
|
|
|
# purposes on older code; i.e. |
4525
|
|
|
|
|
|
|
# class($x) - valid sub call |
4526
|
|
|
|
|
|
|
# package($x) - error |
4527
|
30
|
100
|
|
|
|
101
|
if ( $tok_kw eq 'class' ) { |
4528
|
8
|
100
|
66
|
|
|
75
|
if ( $expecting == OPERATOR |
|
|
|
100
|
|
|
|
|
4529
|
|
|
|
|
|
|
|| $next_nonblank_token !~ /^(\w|\:)/ |
4530
|
|
|
|
|
|
|
|| !$self->class_ok_here() ) |
4531
|
|
|
|
|
|
|
{ |
4532
|
4
|
|
|
|
|
13
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
4533
|
|
|
|
|
|
|
} |
4534
|
4
|
|
|
|
|
15
|
else { $self->scan_id() } |
4535
|
|
|
|
|
|
|
} |
4536
|
|
|
|
|
|
|
else { |
4537
|
22
|
50
|
|
|
|
63
|
$self->error_if_expecting_OPERATOR() |
4538
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4539
|
22
|
|
|
|
|
79
|
$self->scan_id(); |
4540
|
|
|
|
|
|
|
} |
4541
|
|
|
|
|
|
|
} |
4542
|
|
|
|
|
|
|
|
4543
|
|
|
|
|
|
|
# Fix for c035: split 'format' from 'is_format_END_DATA' to be |
4544
|
|
|
|
|
|
|
# more restrictive. Require a new statement to be ok here. |
4545
|
|
|
|
|
|
|
elsif ( $tok_kw eq 'format' && new_statement_ok() ) { |
4546
|
1
|
|
|
|
|
3
|
$type = ';'; # make tokenizer look for TERM next |
4547
|
1
|
|
|
|
|
3
|
$self->[_in_format_] = 1; |
4548
|
1
|
|
|
|
|
2
|
$is_last = 1; ## is last token on this line |
4549
|
|
|
|
|
|
|
} |
4550
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
# Note on token types for format, __DATA__, __END__: |
4552
|
|
|
|
|
|
|
# It simplifies things to give these type ';', so that when we |
4553
|
|
|
|
|
|
|
# start rescanning we will be expecting a token of type TERM. |
4554
|
|
|
|
|
|
|
# We will switch to type 'k' before outputting the tokens. |
4555
|
|
|
|
|
|
|
elsif ( $is_END_DATA{$tok_kw} ) { |
4556
|
7
|
|
|
|
|
16
|
$type = ';'; # make tokenizer look for TERM next |
4557
|
|
|
|
|
|
|
|
4558
|
|
|
|
|
|
|
# Remember that we are in one of these three sections |
4559
|
7
|
|
|
|
|
21
|
$self->[ $is_END_DATA{$tok_kw} ] = 1; |
4560
|
7
|
|
|
|
|
14
|
$is_last = 1; ## is last token on this line |
4561
|
|
|
|
|
|
|
} |
4562
|
|
|
|
|
|
|
|
4563
|
|
|
|
|
|
|
elsif ( $is_keyword{$tok_kw} ) { |
4564
|
2636
|
|
|
|
|
7923
|
$self->do_KEYWORD(); |
4565
|
|
|
|
|
|
|
} |
4566
|
|
|
|
|
|
|
|
4567
|
|
|
|
|
|
|
# check for inline label following |
4568
|
|
|
|
|
|
|
# /^(redo|last|next|goto)$/ |
4569
|
|
|
|
|
|
|
elsif (( $last_nonblank_type eq 'k' ) |
4570
|
|
|
|
|
|
|
&& ( $is_redo_last_next_goto{$last_nonblank_token} ) ) |
4571
|
|
|
|
|
|
|
{ |
4572
|
19
|
|
|
|
|
49
|
$type = 'j'; |
4573
|
|
|
|
|
|
|
} |
4574
|
|
|
|
|
|
|
|
4575
|
|
|
|
|
|
|
# something else -- |
4576
|
|
|
|
|
|
|
else { |
4577
|
946
|
|
|
|
|
3129
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
4578
|
|
|
|
|
|
|
} |
4579
|
|
|
|
|
|
|
|
4580
|
5787
|
|
|
|
|
11267
|
return $is_last; |
4581
|
|
|
|
|
|
|
|
4582
|
|
|
|
|
|
|
} ## end sub do_BAREWORD |
4583
|
|
|
|
|
|
|
|
4584
|
|
|
|
|
|
|
sub do_FOLLOW_QUOTE { |
4585
|
|
|
|
|
|
|
|
4586
|
2763
|
|
|
2763
|
0
|
4475
|
my $self = shift; |
4587
|
|
|
|
|
|
|
|
4588
|
|
|
|
|
|
|
# Continue following a quote on a new line |
4589
|
2763
|
|
|
|
|
4432
|
$type = $quote_type; |
4590
|
|
|
|
|
|
|
|
4591
|
2763
|
100
|
|
|
|
4041
|
unless ( @{$routput_token_list} ) { # initialize if continuation line |
|
2763
|
|
|
|
|
6809
|
|
4592
|
184
|
|
|
|
|
470
|
push( @{$routput_token_list}, $i ); |
|
184
|
|
|
|
|
431
|
|
4593
|
184
|
|
|
|
|
436
|
$routput_token_type->[$i] = $type; |
4594
|
|
|
|
|
|
|
|
4595
|
|
|
|
|
|
|
} |
4596
|
|
|
|
|
|
|
|
4597
|
|
|
|
|
|
|
# scan for the end of the quote or pattern |
4598
|
|
|
|
|
|
|
( |
4599
|
2763
|
|
|
|
|
8883
|
$i, |
4600
|
|
|
|
|
|
|
$in_quote, |
4601
|
|
|
|
|
|
|
$quote_character, |
4602
|
|
|
|
|
|
|
$quote_pos, |
4603
|
|
|
|
|
|
|
$quote_depth, |
4604
|
|
|
|
|
|
|
$quoted_string_1, |
4605
|
|
|
|
|
|
|
$quoted_string_2, |
4606
|
|
|
|
|
|
|
|
4607
|
|
|
|
|
|
|
) = $self->do_quote( |
4608
|
|
|
|
|
|
|
|
4609
|
|
|
|
|
|
|
$i, |
4610
|
|
|
|
|
|
|
$in_quote, |
4611
|
|
|
|
|
|
|
$quote_character, |
4612
|
|
|
|
|
|
|
$quote_pos, |
4613
|
|
|
|
|
|
|
$quote_depth, |
4614
|
|
|
|
|
|
|
$quoted_string_1, |
4615
|
|
|
|
|
|
|
$quoted_string_2, |
4616
|
|
|
|
|
|
|
$rtokens, |
4617
|
|
|
|
|
|
|
$rtoken_map, |
4618
|
|
|
|
|
|
|
$max_token_index, |
4619
|
|
|
|
|
|
|
|
4620
|
|
|
|
|
|
|
); |
4621
|
|
|
|
|
|
|
|
4622
|
|
|
|
|
|
|
# all done if we didn't find it |
4623
|
2763
|
100
|
|
|
|
6953
|
if ($in_quote) { return } |
|
183
|
|
|
|
|
367
|
|
4624
|
|
|
|
|
|
|
|
4625
|
|
|
|
|
|
|
# save pattern and replacement text for rescanning |
4626
|
2580
|
|
|
|
|
4349
|
my $qs1 = $quoted_string_1; |
4627
|
|
|
|
|
|
|
|
4628
|
|
|
|
|
|
|
# re-initialize for next search |
4629
|
2580
|
|
|
|
|
4075
|
$quote_character = EMPTY_STRING; |
4630
|
2580
|
|
|
|
|
3820
|
$quote_pos = 0; |
4631
|
2580
|
|
|
|
|
3844
|
$quote_type = 'Q'; |
4632
|
2580
|
|
|
|
|
3818
|
$quoted_string_1 = EMPTY_STRING; |
4633
|
2580
|
|
|
|
|
3798
|
$quoted_string_2 = EMPTY_STRING; |
4634
|
2580
|
100
|
|
|
|
5651
|
if ( ++$i > $max_token_index ) { return } |
|
116
|
|
|
|
|
320
|
|
4635
|
|
|
|
|
|
|
|
4636
|
|
|
|
|
|
|
# look for any modifiers |
4637
|
2464
|
100
|
|
|
|
5334
|
if ($allowed_quote_modifiers) { |
4638
|
|
|
|
|
|
|
|
4639
|
|
|
|
|
|
|
# check for exact quote modifiers |
4640
|
144
|
100
|
|
|
|
740
|
if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) { |
4641
|
30
|
|
|
|
|
89
|
my $str = $rtokens->[$i]; |
4642
|
30
|
|
|
|
|
59
|
my $saw_modifier_e; |
4643
|
30
|
|
|
|
|
501
|
while ( $str =~ /\G$allowed_quote_modifiers/gc ) { |
4644
|
47
|
|
|
|
|
118
|
my $pos = pos($str); |
4645
|
47
|
|
|
|
|
111
|
my $char = substr( $str, $pos - 1, 1 ); |
4646
|
47
|
|
66
|
|
|
307
|
$saw_modifier_e ||= ( $char eq 'e' ); |
4647
|
|
|
|
|
|
|
} |
4648
|
|
|
|
|
|
|
|
4649
|
|
|
|
|
|
|
# For an 'e' quote modifier we must scan the replacement |
4650
|
|
|
|
|
|
|
# text for here-doc targets... |
4651
|
|
|
|
|
|
|
# but if the modifier starts a new line we can skip |
4652
|
|
|
|
|
|
|
# this because either the here doc will be fully |
4653
|
|
|
|
|
|
|
# contained in the replacement text (so we can |
4654
|
|
|
|
|
|
|
# ignore it) or Perl will not find it. |
4655
|
|
|
|
|
|
|
# See test 'here2.in'. |
4656
|
30
|
50
|
66
|
|
|
161
|
if ( $saw_modifier_e && $i_tok >= 0 ) { |
4657
|
|
|
|
|
|
|
|
4658
|
0
|
|
|
|
|
0
|
my $rht = $self->scan_replacement_text($qs1); |
4659
|
|
|
|
|
|
|
|
4660
|
|
|
|
|
|
|
# Change type from 'Q' to 'h' for quotes with |
4661
|
|
|
|
|
|
|
# here-doc targets so that the formatter (see sub |
4662
|
|
|
|
|
|
|
# process_line_of_CODE) will not make any line |
4663
|
|
|
|
|
|
|
# breaks after this point. |
4664
|
0
|
0
|
|
|
|
0
|
if ($rht) { |
4665
|
0
|
|
|
|
|
0
|
push @{$rhere_target_list}, @{$rht}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
4666
|
0
|
|
|
|
|
0
|
$type = 'h'; |
4667
|
0
|
0
|
|
|
|
0
|
if ( $i_tok < 0 ) { |
4668
|
0
|
|
|
|
|
0
|
my $ilast = $routput_token_list->[-1]; |
4669
|
0
|
|
|
|
|
0
|
$routput_token_type->[$ilast] = $type; |
4670
|
|
|
|
|
|
|
} |
4671
|
|
|
|
|
|
|
} |
4672
|
|
|
|
|
|
|
} |
4673
|
|
|
|
|
|
|
|
4674
|
30
|
50
|
|
|
|
96
|
if ( defined( pos($str) ) ) { |
4675
|
|
|
|
|
|
|
|
4676
|
|
|
|
|
|
|
# matched |
4677
|
30
|
50
|
|
|
|
100
|
if ( pos($str) == length($str) ) { |
4678
|
30
|
50
|
|
|
|
128
|
if ( ++$i > $max_token_index ) { return } |
|
0
|
|
|
|
|
0
|
|
4679
|
|
|
|
|
|
|
} |
4680
|
|
|
|
|
|
|
|
4681
|
|
|
|
|
|
|
# Looks like a joined quote modifier |
4682
|
|
|
|
|
|
|
# and keyword, maybe something like |
4683
|
|
|
|
|
|
|
# s/xxx/yyy/gefor @k=... |
4684
|
|
|
|
|
|
|
# Example is "galgen.pl". Would have to split |
4685
|
|
|
|
|
|
|
# the word and insert a new token in the |
4686
|
|
|
|
|
|
|
# pre-token list. This is so rare that I haven't |
4687
|
|
|
|
|
|
|
# done it. Will just issue a warning citation. |
4688
|
|
|
|
|
|
|
|
4689
|
|
|
|
|
|
|
# This error might also be triggered if my quote |
4690
|
|
|
|
|
|
|
# modifier characters are incomplete |
4691
|
|
|
|
|
|
|
else { |
4692
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
4693
|
|
|
|
|
|
|
|
4694
|
|
|
|
|
|
|
Partial match to quote modifier $allowed_quote_modifiers at word: '$str' |
4695
|
|
|
|
|
|
|
Please put a space between quote modifiers and trailing keywords. |
4696
|
|
|
|
|
|
|
EOM |
4697
|
|
|
|
|
|
|
|
4698
|
|
|
|
|
|
|
# print "token $rtokens->[$i]\n"; |
4699
|
|
|
|
|
|
|
# my $num = length($str) - pos($str); |
4700
|
|
|
|
|
|
|
# $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num); |
4701
|
|
|
|
|
|
|
# print "continuing with new token $rtokens->[$i]\n"; |
4702
|
|
|
|
|
|
|
|
4703
|
|
|
|
|
|
|
# skipping past this token does least damage |
4704
|
0
|
0
|
|
|
|
0
|
if ( ++$i > $max_token_index ) { return } |
|
0
|
|
|
|
|
0
|
|
4705
|
|
|
|
|
|
|
} |
4706
|
|
|
|
|
|
|
} |
4707
|
|
|
|
|
|
|
else { |
4708
|
|
|
|
|
|
|
|
4709
|
|
|
|
|
|
|
# example file: rokicki4.pl |
4710
|
|
|
|
|
|
|
# This error might also be triggered if my quote |
4711
|
|
|
|
|
|
|
# modifier characters are incomplete |
4712
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
4713
|
|
|
|
|
|
|
"Note: found word $str at quote modifier location\n"); |
4714
|
|
|
|
|
|
|
} |
4715
|
|
|
|
|
|
|
} |
4716
|
|
|
|
|
|
|
|
4717
|
|
|
|
|
|
|
# re-initialize |
4718
|
144
|
|
|
|
|
283
|
$allowed_quote_modifiers = EMPTY_STRING; |
4719
|
|
|
|
|
|
|
} |
4720
|
2464
|
|
|
|
|
4526
|
return; |
4721
|
|
|
|
|
|
|
} ## end sub do_FOLLOW_QUOTE |
4722
|
|
|
|
|
|
|
|
4723
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4724
|
|
|
|
|
|
|
# begin hash of code for handling most token types |
4725
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4726
|
|
|
|
|
|
|
my $tokenization_code = { |
4727
|
|
|
|
|
|
|
|
4728
|
|
|
|
|
|
|
'>' => \&do_GREATER_THAN_SIGN, |
4729
|
|
|
|
|
|
|
'|' => \&do_VERTICAL_LINE, |
4730
|
|
|
|
|
|
|
'$' => \&do_DOLLAR_SIGN, |
4731
|
|
|
|
|
|
|
'(' => \&do_LEFT_PARENTHESIS, |
4732
|
|
|
|
|
|
|
')' => \&do_RIGHT_PARENTHESIS, |
4733
|
|
|
|
|
|
|
',' => \&do_COMMA, |
4734
|
|
|
|
|
|
|
';' => \&do_SEMICOLON, |
4735
|
|
|
|
|
|
|
'"' => \&do_QUOTATION_MARK, |
4736
|
|
|
|
|
|
|
"'" => \&do_APOSTROPHE, |
4737
|
|
|
|
|
|
|
'`' => \&do_BACKTICK, |
4738
|
|
|
|
|
|
|
'/' => \&do_SLASH, |
4739
|
|
|
|
|
|
|
'{' => \&do_LEFT_CURLY_BRACKET, |
4740
|
|
|
|
|
|
|
'}' => \&do_RIGHT_CURLY_BRACKET, |
4741
|
|
|
|
|
|
|
'&' => \&do_AMPERSAND, |
4742
|
|
|
|
|
|
|
'<' => \&do_LESS_THAN_SIGN, |
4743
|
|
|
|
|
|
|
'?' => \&do_QUESTION_MARK, |
4744
|
|
|
|
|
|
|
'*' => \&do_STAR, |
4745
|
|
|
|
|
|
|
'.' => \&do_DOT, |
4746
|
|
|
|
|
|
|
':' => \&do_COLON, |
4747
|
|
|
|
|
|
|
'+' => \&do_PLUS_SIGN, |
4748
|
|
|
|
|
|
|
'@' => \&do_AT_SIGN, |
4749
|
|
|
|
|
|
|
'%' => \&do_PERCENT_SIGN, |
4750
|
|
|
|
|
|
|
'[' => \&do_LEFT_SQUARE_BRACKET, |
4751
|
|
|
|
|
|
|
']' => \&do_RIGHT_SQUARE_BRACKET, |
4752
|
|
|
|
|
|
|
'-' => \&do_MINUS_SIGN, |
4753
|
|
|
|
|
|
|
'^' => \&do_CARAT_SIGN, |
4754
|
|
|
|
|
|
|
'::' => \&do_DOUBLE_COLON, |
4755
|
|
|
|
|
|
|
'<<' => \&do_LEFT_SHIFT, |
4756
|
|
|
|
|
|
|
'<<~' => \&do_NEW_HERE_DOC, |
4757
|
|
|
|
|
|
|
'->' => \&do_POINTER, |
4758
|
|
|
|
|
|
|
'++' => \&do_PLUS_PLUS, |
4759
|
|
|
|
|
|
|
'=>' => \&do_FAT_COMMA, |
4760
|
|
|
|
|
|
|
'--' => \&do_MINUS_MINUS, |
4761
|
|
|
|
|
|
|
'&&' => \&do_LOGICAL_AND, |
4762
|
|
|
|
|
|
|
'||' => \&do_LOGICAL_OR, |
4763
|
|
|
|
|
|
|
'//' => \&do_SLASH_SLASH, |
4764
|
|
|
|
|
|
|
|
4765
|
|
|
|
|
|
|
# No special code for these types yet, but syntax checks |
4766
|
|
|
|
|
|
|
# could be added. |
4767
|
|
|
|
|
|
|
## '!' => undef, |
4768
|
|
|
|
|
|
|
## '!=' => undef, |
4769
|
|
|
|
|
|
|
## '!~' => undef, |
4770
|
|
|
|
|
|
|
## '%=' => undef, |
4771
|
|
|
|
|
|
|
## '&&=' => undef, |
4772
|
|
|
|
|
|
|
## '&=' => undef, |
4773
|
|
|
|
|
|
|
## '+=' => undef, |
4774
|
|
|
|
|
|
|
## '-=' => undef, |
4775
|
|
|
|
|
|
|
## '..' => undef, |
4776
|
|
|
|
|
|
|
## '..' => undef, |
4777
|
|
|
|
|
|
|
## '...' => undef, |
4778
|
|
|
|
|
|
|
## '.=' => undef, |
4779
|
|
|
|
|
|
|
## '<<=' => undef, |
4780
|
|
|
|
|
|
|
## '<=' => undef, |
4781
|
|
|
|
|
|
|
## '<=>' => undef, |
4782
|
|
|
|
|
|
|
## '<>' => undef, |
4783
|
|
|
|
|
|
|
## '=' => undef, |
4784
|
|
|
|
|
|
|
## '==' => undef, |
4785
|
|
|
|
|
|
|
## '=~' => undef, |
4786
|
|
|
|
|
|
|
## '>=' => undef, |
4787
|
|
|
|
|
|
|
## '>>' => undef, |
4788
|
|
|
|
|
|
|
## '>>=' => undef, |
4789
|
|
|
|
|
|
|
## '\\' => undef, |
4790
|
|
|
|
|
|
|
## '^=' => undef, |
4791
|
|
|
|
|
|
|
## '|=' => undef, |
4792
|
|
|
|
|
|
|
## '||=' => undef, |
4793
|
|
|
|
|
|
|
## '//=' => undef, |
4794
|
|
|
|
|
|
|
## '~' => undef, |
4795
|
|
|
|
|
|
|
## '~~' => undef, |
4796
|
|
|
|
|
|
|
## '!~~' => undef, |
4797
|
|
|
|
|
|
|
|
4798
|
|
|
|
|
|
|
}; |
4799
|
|
|
|
|
|
|
|
4800
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4801
|
|
|
|
|
|
|
# end hash of code for handling individual token types |
4802
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4803
|
|
|
|
|
|
|
|
4804
|
38
|
|
|
38
|
|
442
|
use constant DEBUG_TOKENIZE => 0; |
|
38
|
|
|
|
|
92
|
|
|
38
|
|
|
|
|
186316
|
|
4805
|
|
|
|
|
|
|
|
4806
|
|
|
|
|
|
|
sub tokenize_this_line { |
4807
|
|
|
|
|
|
|
|
4808
|
|
|
|
|
|
|
# This routine breaks a line of perl code into tokens which are of use in |
4809
|
|
|
|
|
|
|
# indentation and reformatting. One of my goals has been to define tokens |
4810
|
|
|
|
|
|
|
# such that a newline may be inserted between any pair of tokens without |
4811
|
|
|
|
|
|
|
# changing or invalidating the program. This version comes close to this, |
4812
|
|
|
|
|
|
|
# although there are necessarily a few exceptions which must be caught by |
4813
|
|
|
|
|
|
|
# the formatter. Many of these involve the treatment of bare words. |
4814
|
|
|
|
|
|
|
# |
4815
|
|
|
|
|
|
|
# The tokens and their types are returned in arrays. See previous |
4816
|
|
|
|
|
|
|
# routine for their names. |
4817
|
|
|
|
|
|
|
# |
4818
|
|
|
|
|
|
|
# See also the array "valid_token_types" in the BEGIN section for an |
4819
|
|
|
|
|
|
|
# up-to-date list. |
4820
|
|
|
|
|
|
|
# |
4821
|
|
|
|
|
|
|
# To simplify things, token types are either a single character, or they |
4822
|
|
|
|
|
|
|
# are identical to the tokens themselves. |
4823
|
|
|
|
|
|
|
# |
4824
|
|
|
|
|
|
|
# As a debugging aid, the -D flag creates a file containing a side-by-side |
4825
|
|
|
|
|
|
|
# comparison of the input string and its tokenization for each line of a file. |
4826
|
|
|
|
|
|
|
# This is an invaluable debugging aid. |
4827
|
|
|
|
|
|
|
# |
4828
|
|
|
|
|
|
|
# In addition to tokens, and some associated quantities, the tokenizer |
4829
|
|
|
|
|
|
|
# also returns flags indication any special line types. These include |
4830
|
|
|
|
|
|
|
# quotes, here_docs, formats. |
4831
|
|
|
|
|
|
|
# |
4832
|
|
|
|
|
|
|
# ----------------------------------------------------------------------- |
4833
|
|
|
|
|
|
|
# |
4834
|
|
|
|
|
|
|
# How to add NEW_TOKENS: |
4835
|
|
|
|
|
|
|
# |
4836
|
|
|
|
|
|
|
# New token types will undoubtedly be needed in the future both to keep up |
4837
|
|
|
|
|
|
|
# with changes in perl and to help adapt the tokenizer to other applications. |
4838
|
|
|
|
|
|
|
# |
4839
|
|
|
|
|
|
|
# Here are some notes on the minimal steps. I wrote these notes while |
4840
|
|
|
|
|
|
|
# adding the 'v' token type for v-strings, which are things like version |
4841
|
|
|
|
|
|
|
# numbers 5.6.0, and ip addresses, and will use that as an example. ( You |
4842
|
|
|
|
|
|
|
# can use your editor to search for the string "NEW_TOKENS" to find the |
4843
|
|
|
|
|
|
|
# appropriate sections to change): |
4844
|
|
|
|
|
|
|
# |
4845
|
|
|
|
|
|
|
# *. Try to talk somebody else into doing it! If not, .. |
4846
|
|
|
|
|
|
|
# |
4847
|
|
|
|
|
|
|
# *. Make a backup of your current version in case things don't work out! |
4848
|
|
|
|
|
|
|
# |
4849
|
|
|
|
|
|
|
# *. Think of a new, unused character for the token type, and add to |
4850
|
|
|
|
|
|
|
# the array @valid_token_types in the BEGIN section of this package. |
4851
|
|
|
|
|
|
|
# For example, I used 'v' for v-strings. |
4852
|
|
|
|
|
|
|
# |
4853
|
|
|
|
|
|
|
# *. Implement coding to recognize the $type of the token in this routine. |
4854
|
|
|
|
|
|
|
# This is the hardest part, and is best done by imitating or modifying |
4855
|
|
|
|
|
|
|
# some of the existing coding. For example, to recognize v-strings, I |
4856
|
|
|
|
|
|
|
# patched 'sub scan_bare_identifier' to recognize v-strings beginning with |
4857
|
|
|
|
|
|
|
# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. |
4858
|
|
|
|
|
|
|
# |
4859
|
|
|
|
|
|
|
# *. Update sub operator_expected. This update is critically important but |
4860
|
|
|
|
|
|
|
# the coding is trivial. Look at the comments in that routine for help. |
4861
|
|
|
|
|
|
|
# For v-strings, which should behave like numbers, I just added 'v' to the |
4862
|
|
|
|
|
|
|
# regex used to handle numbers and strings (types 'n' and 'Q'). |
4863
|
|
|
|
|
|
|
# |
4864
|
|
|
|
|
|
|
# *. Implement a 'bond strength' rule in sub set_bond_strengths in |
4865
|
|
|
|
|
|
|
# Perl::Tidy::Formatter for breaking lines around this token type. You can |
4866
|
|
|
|
|
|
|
# skip this step and take the default at first, then adjust later to get |
4867
|
|
|
|
|
|
|
# desired results. For adding type 'v', I looked at sub bond_strength and |
4868
|
|
|
|
|
|
|
# saw that number type 'n' was using default strengths, so I didn't do |
4869
|
|
|
|
|
|
|
# anything. I may tune it up someday if I don't like the way line |
4870
|
|
|
|
|
|
|
# breaks with v-strings look. |
4871
|
|
|
|
|
|
|
# |
4872
|
|
|
|
|
|
|
# *. Implement a 'whitespace' rule in sub set_whitespace_flags in |
4873
|
|
|
|
|
|
|
# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine |
4874
|
|
|
|
|
|
|
# and saw that type 'n' used spaces on both sides, so I just added 'v' |
4875
|
|
|
|
|
|
|
# to the array @spaces_both_sides. |
4876
|
|
|
|
|
|
|
# |
4877
|
|
|
|
|
|
|
# *. Update HtmlWriter package so that users can colorize the token as |
4878
|
|
|
|
|
|
|
# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in |
4879
|
|
|
|
|
|
|
# that package. For v-strings, I initially chose to use a default color |
4880
|
|
|
|
|
|
|
# equal to the default for numbers, but it might be nice to change that |
4881
|
|
|
|
|
|
|
# eventually. |
4882
|
|
|
|
|
|
|
# |
4883
|
|
|
|
|
|
|
# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. |
4884
|
|
|
|
|
|
|
# |
4885
|
|
|
|
|
|
|
# *. Run lots and lots of debug tests. Start with special files designed |
4886
|
|
|
|
|
|
|
# to test the new token type. Run with the -D flag to create a .DEBUG |
4887
|
|
|
|
|
|
|
# file which shows the tokenization. When these work ok, test as many old |
4888
|
|
|
|
|
|
|
# scripts as possible. Start with all of the '.t' files in the 'test' |
4889
|
|
|
|
|
|
|
# directory of the distribution file. Compare .tdy output with previous |
4890
|
|
|
|
|
|
|
# version and updated version to see the differences. Then include as |
4891
|
|
|
|
|
|
|
# many more files as possible. My own technique has been to collect a huge |
4892
|
|
|
|
|
|
|
# number of perl scripts (thousands!) into one directory and run perltidy |
4893
|
|
|
|
|
|
|
# *, then run diff between the output of the previous version and the |
4894
|
|
|
|
|
|
|
# current version. |
4895
|
|
|
|
|
|
|
# |
4896
|
|
|
|
|
|
|
# *. For another example, search for the smartmatch operator '~~' |
4897
|
|
|
|
|
|
|
# with your editor to see where updates were made for it. |
4898
|
|
|
|
|
|
|
# |
4899
|
|
|
|
|
|
|
# ----------------------------------------------------------------------- |
4900
|
|
|
|
|
|
|
|
4901
|
7502
|
|
|
7502
|
0
|
14957
|
my ( $self, $line_of_tokens ) = @_; |
4902
|
7502
|
|
|
|
|
14326
|
my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; |
4903
|
|
|
|
|
|
|
|
4904
|
|
|
|
|
|
|
# Extract line number for use in error messages |
4905
|
7502
|
|
|
|
|
12476
|
$input_line_number = $line_of_tokens->{_line_number}; |
4906
|
|
|
|
|
|
|
|
4907
|
|
|
|
|
|
|
# Check for pod documentation |
4908
|
7502
|
100
|
66
|
|
|
21313
|
if ( substr( $untrimmed_input_line, 0, 1 ) eq '=' |
4909
|
|
|
|
|
|
|
&& $untrimmed_input_line =~ /^=[A-Za-z_]/ ) |
4910
|
|
|
|
|
|
|
{ |
4911
|
|
|
|
|
|
|
|
4912
|
|
|
|
|
|
|
# Must not be in multi-line quote |
4913
|
|
|
|
|
|
|
# and must not be in an equation |
4914
|
13
|
50
|
33
|
|
|
97
|
if ( !$in_quote |
4915
|
|
|
|
|
|
|
&& ( $self->operator_expected( [ 'b', '=', 'b' ] ) == TERM ) ) |
4916
|
|
|
|
|
|
|
{ |
4917
|
13
|
|
|
|
|
35
|
$self->[_in_pod_] = 1; |
4918
|
13
|
|
|
|
|
27
|
return; |
4919
|
|
|
|
|
|
|
} |
4920
|
|
|
|
|
|
|
} |
4921
|
|
|
|
|
|
|
|
4922
|
7489
|
|
|
|
|
14352
|
$input_line = $untrimmed_input_line; |
4923
|
|
|
|
|
|
|
|
4924
|
7489
|
|
|
|
|
14361
|
chomp $input_line; |
4925
|
|
|
|
|
|
|
|
4926
|
|
|
|
|
|
|
# Set a flag to indicate if we might be at an __END__ or __DATA__ line |
4927
|
|
|
|
|
|
|
# This will be used below to avoid quoting a bare word followed by |
4928
|
|
|
|
|
|
|
# a fat comma. |
4929
|
7489
|
|
|
|
|
11267
|
my $is_END_or_DATA; |
4930
|
|
|
|
|
|
|
|
4931
|
|
|
|
|
|
|
# Reinitialize the multi-line quote flag |
4932
|
7489
|
100
|
100
|
|
|
18373
|
if ( $in_quote && $quote_type eq 'Q' ) { |
4933
|
47
|
|
|
|
|
149
|
$line_of_tokens->{_starting_in_quote} = 1; |
4934
|
|
|
|
|
|
|
} |
4935
|
|
|
|
|
|
|
else { |
4936
|
7442
|
|
|
|
|
13293
|
$line_of_tokens->{_starting_in_quote} = 0; |
4937
|
|
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
# Trim start of this line unless we are continuing a quoted line. |
4939
|
|
|
|
|
|
|
# Do not trim end because we might end in a quote (test: deken4.pl) |
4940
|
|
|
|
|
|
|
# Perl::Tidy::Formatter will delete needless trailing blanks |
4941
|
7442
|
100
|
|
|
|
33972
|
if ( !length($input_line) ) { |
|
|
100
|
|
|
|
|
|
4942
|
|
|
|
|
|
|
|
4943
|
|
|
|
|
|
|
# line is empty |
4944
|
|
|
|
|
|
|
} |
4945
|
|
|
|
|
|
|
elsif ( $input_line =~ m/\S/g ) { |
4946
|
|
|
|
|
|
|
|
4947
|
|
|
|
|
|
|
# There are $spaces blank characters before a nonblank character |
4948
|
6636
|
|
|
|
|
13687
|
my $spaces = pos($input_line) - 1; |
4949
|
6636
|
100
|
|
|
|
14945
|
if ( $spaces > 0 ) { |
4950
|
|
|
|
|
|
|
|
4951
|
|
|
|
|
|
|
# Trim the leading spaces |
4952
|
3537
|
|
|
|
|
9417
|
$input_line = substr( $input_line, $spaces ); |
4953
|
|
|
|
|
|
|
|
4954
|
|
|
|
|
|
|
# Find actual space count if there are leading tabs |
4955
|
3537
|
100
|
66
|
|
|
12674
|
if ( |
4956
|
|
|
|
|
|
|
ord( substr( $untrimmed_input_line, 0, 1 ) ) == ORD_TAB |
4957
|
|
|
|
|
|
|
&& $untrimmed_input_line =~ /^(\t+)/ ) |
4958
|
|
|
|
|
|
|
{ |
4959
|
213
|
|
|
|
|
519
|
my $tabsize = $self->[_tabsize_]; |
4960
|
213
|
|
|
|
|
710
|
$spaces += length($1) * ( $tabsize - 1 ); |
4961
|
|
|
|
|
|
|
} |
4962
|
|
|
|
|
|
|
|
4963
|
|
|
|
|
|
|
# Calculate a guessed level for nonblank lines to avoid |
4964
|
|
|
|
|
|
|
# calls to sub guess_old_indentation_level() |
4965
|
3537
|
|
|
|
|
7773
|
my $indent_columns = $self->[_indent_columns_]; |
4966
|
|
|
|
|
|
|
$line_of_tokens->{_guessed_indentation_level} = |
4967
|
3537
|
|
|
|
|
10094
|
int( $spaces / $indent_columns ); |
4968
|
|
|
|
|
|
|
} |
4969
|
|
|
|
|
|
|
} |
4970
|
|
|
|
|
|
|
else { |
4971
|
|
|
|
|
|
|
|
4972
|
|
|
|
|
|
|
# line has all blank characters |
4973
|
9
|
|
|
|
|
43
|
$input_line = EMPTY_STRING; |
4974
|
|
|
|
|
|
|
} |
4975
|
|
|
|
|
|
|
|
4976
|
7442
|
|
100
|
|
|
19810
|
$is_END_or_DATA = substr( $input_line, 0, 1 ) eq '_' |
4977
|
|
|
|
|
|
|
&& $input_line =~ /^__(END|DATA)__\s*$/; |
4978
|
|
|
|
|
|
|
} |
4979
|
|
|
|
|
|
|
|
4980
|
|
|
|
|
|
|
# Optimize for a full-line comment. |
4981
|
7489
|
100
|
|
|
|
15613
|
if ( !$in_quote ) { |
4982
|
7305
|
100
|
|
|
|
16514
|
if ( substr( $input_line, 0, 1 ) eq '#' ) { |
4983
|
|
|
|
|
|
|
|
4984
|
|
|
|
|
|
|
# and check for skipped section |
4985
|
788
|
100
|
66
|
|
|
6098
|
if ( $rOpts_code_skipping |
4986
|
|
|
|
|
|
|
&& $input_line =~ /$code_skipping_pattern_begin/ ) |
4987
|
|
|
|
|
|
|
{ |
4988
|
2
|
|
|
|
|
9
|
$self->[_in_skipped_] = $self->[_last_line_number_]; |
4989
|
2
|
|
|
|
|
6
|
return; |
4990
|
|
|
|
|
|
|
} |
4991
|
|
|
|
|
|
|
|
4992
|
|
|
|
|
|
|
# Optional fast processing of a block comment |
4993
|
786
|
|
|
|
|
2251
|
my $ci_string_sum = |
4994
|
|
|
|
|
|
|
( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/; |
4995
|
786
|
|
|
|
|
1611
|
my $ci_string_i = $ci_string_sum + $in_statement_continuation; |
4996
|
786
|
|
|
|
|
1669
|
$line_of_tokens->{_line_type} = 'CODE'; |
4997
|
786
|
|
|
|
|
2380
|
$line_of_tokens->{_rtokens} = [$input_line]; |
4998
|
786
|
|
|
|
|
2100
|
$line_of_tokens->{_rtoken_type} = ['#']; |
4999
|
786
|
|
|
|
|
2067
|
$line_of_tokens->{_rlevels} = [$level_in_tokenizer]; |
5000
|
786
|
|
|
|
|
1948
|
$line_of_tokens->{_rci_levels} = [$ci_string_i]; |
5001
|
786
|
|
|
|
|
2104
|
$line_of_tokens->{_rblock_type} = [EMPTY_STRING]; |
5002
|
786
|
|
|
|
|
1868
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
5003
|
786
|
|
|
|
|
3789
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string; |
5004
|
786
|
|
|
|
|
1930
|
return; |
5005
|
|
|
|
|
|
|
} |
5006
|
|
|
|
|
|
|
|
5007
|
|
|
|
|
|
|
# Optimize handling of a blank line |
5008
|
6517
|
100
|
|
|
|
15214
|
if ( !length($input_line) ) { |
5009
|
806
|
|
|
|
|
2186
|
$line_of_tokens->{_line_type} = 'CODE'; |
5010
|
806
|
|
|
|
|
1964
|
$line_of_tokens->{_rtokens} = []; |
5011
|
806
|
|
|
|
|
1827
|
$line_of_tokens->{_rtoken_type} = []; |
5012
|
806
|
|
|
|
|
1686
|
$line_of_tokens->{_rlevels} = []; |
5013
|
806
|
|
|
|
|
1664
|
$line_of_tokens->{_rci_levels} = []; |
5014
|
806
|
|
|
|
|
1646
|
$line_of_tokens->{_rblock_type} = []; |
5015
|
806
|
|
|
|
|
1724
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
5016
|
806
|
|
|
|
|
2440
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string; |
5017
|
806
|
|
|
|
|
1826
|
return; |
5018
|
|
|
|
|
|
|
} |
5019
|
|
|
|
|
|
|
} |
5020
|
|
|
|
|
|
|
|
5021
|
|
|
|
|
|
|
# update the copy of the line for use in error messages |
5022
|
|
|
|
|
|
|
# This must be exactly what we give the pre_tokenizer |
5023
|
5895
|
|
|
|
|
10694
|
$self->[_line_of_text_] = $input_line; |
5024
|
|
|
|
|
|
|
|
5025
|
|
|
|
|
|
|
# re-initialize for the main loop |
5026
|
5895
|
|
|
|
|
14798
|
$routput_token_list = []; # stack of output token indexes |
5027
|
5895
|
|
|
|
|
18180
|
$routput_token_type = []; # token types |
5028
|
5895
|
|
|
|
|
15995
|
$routput_block_type = []; # types of code block |
5029
|
5895
|
|
|
|
|
15340
|
$routput_container_type = []; # paren types, such as if, elsif, .. |
5030
|
5895
|
|
|
|
|
13778
|
$routput_type_sequence = []; # nesting sequential number |
5031
|
|
|
|
|
|
|
|
5032
|
5895
|
|
|
|
|
9353
|
$rhere_target_list = []; |
5033
|
|
|
|
|
|
|
|
5034
|
5895
|
|
|
|
|
9768
|
$tok = $last_nonblank_token; |
5035
|
5895
|
|
|
|
|
9253
|
$type = $last_nonblank_type; |
5036
|
5895
|
|
|
|
|
9186
|
$prototype = $last_nonblank_prototype; |
5037
|
5895
|
|
|
|
|
8683
|
$last_nonblank_i = -1; |
5038
|
5895
|
|
|
|
|
9044
|
$block_type = $last_nonblank_block_type; |
5039
|
5895
|
|
|
|
|
8700
|
$container_type = $last_nonblank_container_type; |
5040
|
5895
|
|
|
|
|
8661
|
$type_sequence = $last_nonblank_type_sequence; |
5041
|
5895
|
|
|
|
|
7959
|
$indent_flag = 0; |
5042
|
5895
|
|
|
|
|
8202
|
$peeked_ahead = 0; |
5043
|
|
|
|
|
|
|
|
5044
|
5895
|
|
|
|
|
18362
|
$self->tokenizer_main_loop($is_END_or_DATA); |
5045
|
|
|
|
|
|
|
|
5046
|
|
|
|
|
|
|
#----------------------------------------------- |
5047
|
|
|
|
|
|
|
# all done tokenizing this line ... |
5048
|
|
|
|
|
|
|
# now prepare the final list of tokens and types |
5049
|
|
|
|
|
|
|
#----------------------------------------------- |
5050
|
5895
|
50
|
|
|
|
11704
|
if ( $self->[_calculate_ci_] ) { |
5051
|
0
|
|
|
|
|
0
|
$self->OLD_tokenizer_wrapup_line($line_of_tokens); |
5052
|
|
|
|
|
|
|
} |
5053
|
|
|
|
|
|
|
else { |
5054
|
5895
|
|
|
|
|
16321
|
$self->tokenizer_wrapup_line($line_of_tokens); |
5055
|
|
|
|
|
|
|
} |
5056
|
|
|
|
|
|
|
|
5057
|
5895
|
|
|
|
|
11390
|
return; |
5058
|
|
|
|
|
|
|
} ## end sub tokenize_this_line |
5059
|
|
|
|
|
|
|
|
5060
|
|
|
|
|
|
|
sub tokenizer_main_loop { |
5061
|
|
|
|
|
|
|
|
5062
|
5895
|
|
|
5895
|
0
|
12160
|
my ( $self, $is_END_or_DATA ) = @_; |
5063
|
|
|
|
|
|
|
|
5064
|
|
|
|
|
|
|
#--------------------------------- |
5065
|
|
|
|
|
|
|
# Break one input line into tokens |
5066
|
|
|
|
|
|
|
#--------------------------------- |
5067
|
|
|
|
|
|
|
|
5068
|
|
|
|
|
|
|
# Input parameter: |
5069
|
|
|
|
|
|
|
# $is_END_or_DATA is true for a __END__ or __DATA__ line |
5070
|
|
|
|
|
|
|
|
5071
|
|
|
|
|
|
|
# start by breaking the line into pre-tokens |
5072
|
5895
|
|
|
|
|
8758
|
my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens |
5073
|
5895
|
|
|
|
|
13955
|
( $rtokens, $rtoken_map, $rtoken_type ) = |
5074
|
|
|
|
|
|
|
pre_tokenize( $input_line, $max_tokens_wanted ); |
5075
|
|
|
|
|
|
|
|
5076
|
5895
|
|
|
|
|
23371
|
$max_token_index = scalar( @{$rtokens} ) - 1; |
|
5895
|
|
|
|
|
11233
|
|
5077
|
5895
|
|
|
|
|
9064
|
push( @{$rtokens}, SPACE, SPACE, SPACE ) |
|
5895
|
|
|
|
|
15096
|
|
5078
|
|
|
|
|
|
|
; # extra whitespace simplifies logic |
5079
|
5895
|
|
|
|
|
8930
|
push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced |
|
5895
|
|
|
|
|
12648
|
|
5080
|
5895
|
|
|
|
|
8783
|
push( @{$rtoken_type}, 'b', 'b', 'b' ); |
|
5895
|
|
|
|
|
12945
|
|
5081
|
|
|
|
|
|
|
|
5082
|
|
|
|
|
|
|
# initialize for main loop |
5083
|
5895
|
|
|
|
|
8486
|
if (0) { #<<< this is not necessary |
5084
|
|
|
|
|
|
|
foreach my $ii ( 0 .. $max_token_index + 3 ) { |
5085
|
|
|
|
|
|
|
$routput_token_type->[$ii] = EMPTY_STRING; |
5086
|
|
|
|
|
|
|
$routput_block_type->[$ii] = EMPTY_STRING; |
5087
|
|
|
|
|
|
|
$routput_container_type->[$ii] = EMPTY_STRING; |
5088
|
|
|
|
|
|
|
$routput_type_sequence->[$ii] = EMPTY_STRING; |
5089
|
|
|
|
|
|
|
$routput_indent_flag->[$ii] = 0; |
5090
|
|
|
|
|
|
|
} |
5091
|
|
|
|
|
|
|
} |
5092
|
|
|
|
|
|
|
|
5093
|
5895
|
|
|
|
|
8928
|
$i = -1; |
5094
|
5895
|
|
|
|
|
8732
|
$i_tok = -1; |
5095
|
|
|
|
|
|
|
|
5096
|
|
|
|
|
|
|
#----------------------------- |
5097
|
|
|
|
|
|
|
# begin main tokenization loop |
5098
|
|
|
|
|
|
|
#----------------------------- |
5099
|
|
|
|
|
|
|
|
5100
|
|
|
|
|
|
|
# we are looking at each pre-token of one line and combining them |
5101
|
|
|
|
|
|
|
# into tokens |
5102
|
5895
|
|
|
|
|
13764
|
while ( ++$i <= $max_token_index ) { |
5103
|
|
|
|
|
|
|
|
5104
|
|
|
|
|
|
|
# continue looking for the end of a quote |
5105
|
50693
|
100
|
|
|
|
90154
|
if ($in_quote) { |
5106
|
2763
|
|
|
|
|
8506
|
$self->do_FOLLOW_QUOTE(); |
5107
|
2763
|
100
|
100
|
|
|
10572
|
last if ( $in_quote || $i > $max_token_index ); |
5108
|
|
|
|
|
|
|
} |
5109
|
|
|
|
|
|
|
|
5110
|
50394
|
100
|
100
|
|
|
138777
|
if ( $type ne 'b' && $tok ne 'CORE::' ) { |
5111
|
|
|
|
|
|
|
|
5112
|
|
|
|
|
|
|
# try to catch some common errors |
5113
|
35221
|
100
|
100
|
|
|
76896
|
if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { |
5114
|
|
|
|
|
|
|
|
5115
|
1588
|
100
|
|
|
|
4938
|
if ( $last_nonblank_token eq 'eq' ) { |
|
|
50
|
|
|
|
|
|
5116
|
9
|
|
|
|
|
71
|
$self->complain("Should 'eq' be '==' here ?\n"); |
5117
|
|
|
|
|
|
|
} |
5118
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq 'ne' ) { |
5119
|
0
|
|
|
|
|
0
|
$self->complain("Should 'ne' be '!=' here ?\n"); |
5120
|
|
|
|
|
|
|
} |
5121
|
|
|
|
|
|
|
} |
5122
|
|
|
|
|
|
|
|
5123
|
|
|
|
|
|
|
# fix c090, only rotate vars if a new token will be stored |
5124
|
35221
|
100
|
|
|
|
65611
|
if ( $i_tok >= 0 ) { |
5125
|
29464
|
|
|
|
|
43857
|
$last_last_nonblank_token = $last_nonblank_token; |
5126
|
29464
|
|
|
|
|
39870
|
$last_last_nonblank_type = $last_nonblank_type; |
5127
|
29464
|
|
|
|
|
40270
|
$last_last_nonblank_block_type = $last_nonblank_block_type; |
5128
|
29464
|
|
|
|
|
40287
|
$last_last_nonblank_container_type = |
5129
|
|
|
|
|
|
|
$last_nonblank_container_type; |
5130
|
29464
|
|
|
|
|
41364
|
$last_last_nonblank_type_sequence = |
5131
|
|
|
|
|
|
|
$last_nonblank_type_sequence; |
5132
|
|
|
|
|
|
|
|
5133
|
|
|
|
|
|
|
# Fix part #3 for git82: propagate type 'Z' though L-R pair |
5134
|
29464
|
100
|
100
|
|
|
62616
|
unless ( $type eq 'R' && $last_nonblank_type eq 'Z' ) { |
5135
|
29461
|
|
|
|
|
40596
|
$last_nonblank_token = $tok; |
5136
|
29461
|
|
|
|
|
39359
|
$last_nonblank_type = $type; |
5137
|
|
|
|
|
|
|
} |
5138
|
29464
|
|
|
|
|
41125
|
$last_nonblank_prototype = $prototype; |
5139
|
29464
|
|
|
|
|
40938
|
$last_nonblank_block_type = $block_type; |
5140
|
29464
|
|
|
|
|
40168
|
$last_nonblank_container_type = $container_type; |
5141
|
29464
|
|
|
|
|
40860
|
$last_nonblank_type_sequence = $type_sequence; |
5142
|
29464
|
|
|
|
|
39077
|
$last_nonblank_i = $i_tok; |
5143
|
|
|
|
|
|
|
} |
5144
|
|
|
|
|
|
|
|
5145
|
|
|
|
|
|
|
# Patch for c030: Fix things in case a '->' got separated from |
5146
|
|
|
|
|
|
|
# the subsequent identifier by a side comment. We need the |
5147
|
|
|
|
|
|
|
# last_nonblank_token to have a leading -> to avoid triggering |
5148
|
|
|
|
|
|
|
# an operator expected error message at the next '('. See also |
5149
|
|
|
|
|
|
|
# fix for git #63. |
5150
|
35221
|
100
|
|
|
|
65070
|
if ( $last_last_nonblank_token eq '->' ) { |
5151
|
885
|
100
|
66
|
|
|
4976
|
if ( $last_nonblank_type eq 'w' |
5152
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'i' ) |
5153
|
|
|
|
|
|
|
{ |
5154
|
674
|
|
|
|
|
2647
|
$last_nonblank_token = '->' . $last_nonblank_token; |
5155
|
674
|
|
|
|
|
1395
|
$last_nonblank_type = 'i'; |
5156
|
|
|
|
|
|
|
} |
5157
|
|
|
|
|
|
|
} |
5158
|
|
|
|
|
|
|
} |
5159
|
|
|
|
|
|
|
|
5160
|
|
|
|
|
|
|
# store previous token type |
5161
|
50394
|
100
|
|
|
|
86480
|
if ( $i_tok >= 0 ) { |
5162
|
44637
|
|
|
|
|
85008
|
$routput_token_type->[$i_tok] = $type; |
5163
|
44637
|
|
|
|
|
72322
|
$routput_block_type->[$i_tok] = $block_type; |
5164
|
44637
|
|
|
|
|
72244
|
$routput_container_type->[$i_tok] = $container_type; |
5165
|
44637
|
|
|
|
|
70708
|
$routput_type_sequence->[$i_tok] = $type_sequence; |
5166
|
44637
|
|
|
|
|
69279
|
$routput_indent_flag->[$i_tok] = $indent_flag; |
5167
|
|
|
|
|
|
|
} |
5168
|
|
|
|
|
|
|
|
5169
|
|
|
|
|
|
|
# get the next pre-token and type |
5170
|
|
|
|
|
|
|
# $tok and $type will be modified to make the output token |
5171
|
50394
|
|
|
|
|
81888
|
my $pre_tok = $tok = $rtokens->[$i]; # get the next pre-token |
5172
|
50394
|
|
|
|
|
76515
|
my $pre_type = $type = $rtoken_type->[$i]; # and type |
5173
|
|
|
|
|
|
|
|
5174
|
|
|
|
|
|
|
# remember the starting index of this token; we will be updating $i |
5175
|
50394
|
|
|
|
|
67556
|
$i_tok = $i; |
5176
|
|
|
|
|
|
|
|
5177
|
|
|
|
|
|
|
# re-initialize various flags for the next output token |
5178
|
50394
|
|
100
|
|
|
90928
|
$block_type &&= EMPTY_STRING; |
5179
|
50394
|
|
100
|
|
|
92555
|
$container_type &&= EMPTY_STRING; |
5180
|
50394
|
|
100
|
|
|
98745
|
$type_sequence &&= EMPTY_STRING; |
5181
|
50394
|
|
100
|
|
|
84653
|
$indent_flag &&= 0; |
5182
|
50394
|
|
100
|
|
|
81402
|
$prototype &&= EMPTY_STRING; |
5183
|
|
|
|
|
|
|
|
5184
|
|
|
|
|
|
|
# this pre-token will start an output token |
5185
|
50394
|
|
|
|
|
65002
|
push( @{$routput_token_list}, $i_tok ); |
|
50394
|
|
|
|
|
87335
|
|
5186
|
|
|
|
|
|
|
|
5187
|
|
|
|
|
|
|
#-------------------------- |
5188
|
|
|
|
|
|
|
# handle a whitespace token |
5189
|
|
|
|
|
|
|
#-------------------------- |
5190
|
50394
|
100
|
|
|
|
105920
|
next if ( $pre_type eq 'b' ); |
5191
|
|
|
|
|
|
|
|
5192
|
|
|
|
|
|
|
#----------------- |
5193
|
|
|
|
|
|
|
# handle a comment |
5194
|
|
|
|
|
|
|
#----------------- |
5195
|
35079
|
100
|
|
|
|
60510
|
last if ( $pre_type eq '#' ); |
5196
|
|
|
|
|
|
|
|
5197
|
|
|
|
|
|
|
# continue gathering identifier if necessary |
5198
|
34751
|
100
|
|
|
|
59634
|
if ($id_scan_state) { |
5199
|
|
|
|
|
|
|
|
5200
|
17
|
100
|
66
|
|
|
145
|
if ( $is_sub{$id_scan_state} || $is_package{$id_scan_state} ) { |
5201
|
10
|
|
|
|
|
38
|
$self->scan_id(); |
5202
|
|
|
|
|
|
|
} |
5203
|
|
|
|
|
|
|
else { |
5204
|
7
|
|
|
|
|
30
|
$self->scan_identifier(); |
5205
|
|
|
|
|
|
|
} |
5206
|
|
|
|
|
|
|
|
5207
|
17
|
100
|
|
|
|
80
|
if ($id_scan_state) { |
5208
|
|
|
|
|
|
|
|
5209
|
|
|
|
|
|
|
# Still scanning ... |
5210
|
|
|
|
|
|
|
# Check for side comment between sub and prototype (c061) |
5211
|
|
|
|
|
|
|
|
5212
|
|
|
|
|
|
|
# done if nothing left to scan on this line |
5213
|
1
|
50
|
|
|
|
8
|
last if ( $i > $max_token_index ); |
5214
|
|
|
|
|
|
|
|
5215
|
1
|
|
|
|
|
6
|
my ( $next_nonblank_token, $i_next ) = |
5216
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, |
5217
|
|
|
|
|
|
|
$max_token_index ); |
5218
|
|
|
|
|
|
|
|
5219
|
|
|
|
|
|
|
# done if it was just some trailing space |
5220
|
1
|
50
|
|
|
|
4
|
last if ( $i_next > $max_token_index ); |
5221
|
|
|
|
|
|
|
|
5222
|
|
|
|
|
|
|
# something remains on the line ... must be a side comment |
5223
|
1
|
|
|
|
|
7
|
next; |
5224
|
|
|
|
|
|
|
} |
5225
|
|
|
|
|
|
|
|
5226
|
16
|
100
|
100
|
|
|
125
|
next if ( ( $i > 0 ) || $type ); |
5227
|
|
|
|
|
|
|
|
5228
|
|
|
|
|
|
|
# didn't find any token; start over |
5229
|
7
|
|
|
|
|
26
|
$type = $pre_type; |
5230
|
7
|
|
|
|
|
16
|
$tok = $pre_tok; |
5231
|
|
|
|
|
|
|
} |
5232
|
|
|
|
|
|
|
|
5233
|
|
|
|
|
|
|
## my $prev_tok = $i > 0 ? $rtokens->[ $i - 1 ] : SPACE; |
5234
|
34741
|
100
|
|
|
|
75058
|
my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b'; |
5235
|
|
|
|
|
|
|
|
5236
|
|
|
|
|
|
|
#----------------------------------------------------------- |
5237
|
|
|
|
|
|
|
# Combine pre-tokens into digraphs and trigraphs if possible |
5238
|
|
|
|
|
|
|
#----------------------------------------------------------- |
5239
|
|
|
|
|
|
|
|
5240
|
|
|
|
|
|
|
# See if we can make a digraph... |
5241
|
|
|
|
|
|
|
# The following tokens are excluded and handled specially: |
5242
|
|
|
|
|
|
|
# '/=' is excluded because the / might start a pattern. |
5243
|
|
|
|
|
|
|
# 'x=' is excluded since it might be $x=, with $ on previous line |
5244
|
|
|
|
|
|
|
# '**' and *= might be typeglobs of punctuation variables |
5245
|
|
|
|
|
|
|
# I have allowed tokens starting with <, such as <=, |
5246
|
|
|
|
|
|
|
# because I don't think these could be valid angle operators. |
5247
|
|
|
|
|
|
|
# test file: storrs4.pl |
5248
|
34741
|
100
|
100
|
|
|
108014
|
if ( $can_start_digraph{$tok} |
|
|
|
100
|
|
|
|
|
5249
|
|
|
|
|
|
|
&& $i < $max_token_index |
5250
|
|
|
|
|
|
|
&& $is_digraph{ $tok . $rtokens->[ $i + 1 ] } ) |
5251
|
|
|
|
|
|
|
{ |
5252
|
|
|
|
|
|
|
|
5253
|
2555
|
|
|
|
|
4793
|
my $combine_ok = 1; |
5254
|
2555
|
|
|
|
|
5222
|
my $test_tok = $tok . $rtokens->[ $i + 1 ]; |
5255
|
|
|
|
|
|
|
|
5256
|
|
|
|
|
|
|
# check for special cases which cannot be combined |
5257
|
|
|
|
|
|
|
|
5258
|
|
|
|
|
|
|
# '//' must be defined_or operator if an operator is expected. |
5259
|
|
|
|
|
|
|
# TODO: Code for other ambiguous digraphs (/=, x=, **, *=) |
5260
|
|
|
|
|
|
|
# could be migrated here for clarity |
5261
|
|
|
|
|
|
|
|
5262
|
|
|
|
|
|
|
# Patch for RT#102371, misparsing a // in the following snippet: |
5263
|
|
|
|
|
|
|
# state $b //= ccc(); |
5264
|
|
|
|
|
|
|
# The solution is to always accept the digraph (or trigraph) |
5265
|
|
|
|
|
|
|
# after type 'Z' (possible file handle). The reason is that |
5266
|
|
|
|
|
|
|
# sub operator_expected gives TERM expected here, which is |
5267
|
|
|
|
|
|
|
# wrong in this case. |
5268
|
2555
|
100
|
66
|
|
|
7252
|
if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) { |
5269
|
|
|
|
|
|
|
|
5270
|
|
|
|
|
|
|
# note that here $tok = '/' and the next tok and type is '/' |
5271
|
16
|
|
|
|
|
69
|
$expecting = |
5272
|
|
|
|
|
|
|
$self->operator_expected( [ $prev_type, $tok, '/' ] ); |
5273
|
|
|
|
|
|
|
|
5274
|
|
|
|
|
|
|
# Patched for RT#101547, was 'unless ($expecting==OPERATOR)' |
5275
|
16
|
100
|
|
|
|
69
|
$combine_ok = 0 if ( $expecting == TERM ); |
5276
|
|
|
|
|
|
|
} |
5277
|
|
|
|
|
|
|
|
5278
|
|
|
|
|
|
|
# Patch for RT #114359: mis-parsing of "print $x ** 0.5; |
5279
|
|
|
|
|
|
|
# Accept the digraphs '**' only after type 'Z' |
5280
|
|
|
|
|
|
|
# Otherwise postpone the decision. |
5281
|
2555
|
100
|
|
|
|
5581
|
if ( $test_tok eq '**' ) { |
5282
|
39
|
100
|
|
|
|
131
|
if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 } |
|
37
|
|
|
|
|
79
|
|
5283
|
|
|
|
|
|
|
} |
5284
|
|
|
|
|
|
|
|
5285
|
2555
|
50
|
66
|
|
|
16304
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
5286
|
|
|
|
|
|
|
|
5287
|
|
|
|
|
|
|
# still ok to combine? |
5288
|
|
|
|
|
|
|
$combine_ok |
5289
|
|
|
|
|
|
|
|
5290
|
|
|
|
|
|
|
&& ( $test_tok ne '/=' ) # might be pattern |
5291
|
|
|
|
|
|
|
&& ( $test_tok ne 'x=' ) # might be $x |
5292
|
|
|
|
|
|
|
&& ( $test_tok ne '*=' ) # typeglob? |
5293
|
|
|
|
|
|
|
|
5294
|
|
|
|
|
|
|
# Moved above as part of fix for |
5295
|
|
|
|
|
|
|
# RT #114359: Missparsing of "print $x ** 0.5; |
5296
|
|
|
|
|
|
|
# && ( $test_tok ne '**' ) # typeglob? |
5297
|
|
|
|
|
|
|
) |
5298
|
|
|
|
|
|
|
{ |
5299
|
2514
|
|
|
|
|
4087
|
$tok = $test_tok; |
5300
|
2514
|
|
|
|
|
3618
|
$i++; |
5301
|
|
|
|
|
|
|
|
5302
|
|
|
|
|
|
|
# Now try to assemble trigraphs. Note that all possible |
5303
|
|
|
|
|
|
|
# perl trigraphs can be constructed by appending a character |
5304
|
|
|
|
|
|
|
# to a digraph. |
5305
|
2514
|
|
|
|
|
4327
|
$test_tok = $tok . $rtokens->[ $i + 1 ]; |
5306
|
|
|
|
|
|
|
|
5307
|
2514
|
100
|
|
|
|
8726
|
if ( $is_trigraph{$test_tok} ) { |
|
|
100
|
|
|
|
|
|
5308
|
76
|
|
|
|
|
183
|
$tok = $test_tok; |
5309
|
76
|
|
|
|
|
181
|
$i++; |
5310
|
|
|
|
|
|
|
} |
5311
|
|
|
|
|
|
|
|
5312
|
|
|
|
|
|
|
# The only current tetragraph is the double diamond operator |
5313
|
|
|
|
|
|
|
# and its first three characters are not a trigraph, so |
5314
|
|
|
|
|
|
|
# we do can do a special test for it |
5315
|
|
|
|
|
|
|
elsif ( $test_tok eq '<<>' ) { |
5316
|
1
|
|
|
|
|
3
|
$test_tok .= $rtokens->[ $i + 2 ]; |
5317
|
1
|
50
|
|
|
|
5
|
if ( $is_tetragraph{$test_tok} ) { |
5318
|
1
|
|
|
|
|
2
|
$tok = $test_tok; |
5319
|
1
|
|
|
|
|
3
|
$i += 2; |
5320
|
|
|
|
|
|
|
} |
5321
|
|
|
|
|
|
|
} |
5322
|
|
|
|
|
|
|
} |
5323
|
|
|
|
|
|
|
} |
5324
|
|
|
|
|
|
|
|
5325
|
34741
|
|
|
|
|
49705
|
$type = $tok; |
5326
|
34741
|
|
|
|
|
57544
|
$next_tok = $rtokens->[ $i + 1 ]; |
5327
|
34741
|
|
|
|
|
53018
|
$next_type = $rtoken_type->[ $i + 1 ]; |
5328
|
|
|
|
|
|
|
|
5329
|
34741
|
|
|
|
|
43615
|
DEBUG_TOKENIZE && do { |
5330
|
|
|
|
|
|
|
local $LIST_SEPARATOR = ')('; |
5331
|
|
|
|
|
|
|
my @debug_list = ( |
5332
|
|
|
|
|
|
|
$last_nonblank_token, $tok, |
5333
|
|
|
|
|
|
|
$next_tok, $brace_depth, |
5334
|
|
|
|
|
|
|
$rbrace_type->[$brace_depth], $paren_depth, |
5335
|
|
|
|
|
|
|
$rparen_type->[$paren_depth], |
5336
|
|
|
|
|
|
|
); |
5337
|
|
|
|
|
|
|
print STDOUT "TOKENIZE:(@debug_list)\n"; |
5338
|
|
|
|
|
|
|
}; |
5339
|
|
|
|
|
|
|
|
5340
|
|
|
|
|
|
|
# Turn off attribute list on first non-blank, non-bareword. |
5341
|
|
|
|
|
|
|
# Added '#' to fix c038 (later moved above). |
5342
|
34741
|
100
|
100
|
|
|
103304
|
if ( $pre_type ne 'w' && $self->[_in_attribute_list_] ) { |
5343
|
39
|
|
|
|
|
90
|
$self->[_in_attribute_list_] = 0; |
5344
|
|
|
|
|
|
|
} |
5345
|
|
|
|
|
|
|
|
5346
|
|
|
|
|
|
|
#-------------------------------------------------------- |
5347
|
|
|
|
|
|
|
# We have the next token, $tok. |
5348
|
|
|
|
|
|
|
# Now we have to examine this token and decide what it is |
5349
|
|
|
|
|
|
|
# and define its $type |
5350
|
|
|
|
|
|
|
# |
5351
|
|
|
|
|
|
|
# section 1: bare words |
5352
|
|
|
|
|
|
|
#-------------------------------------------------------- |
5353
|
|
|
|
|
|
|
|
5354
|
34741
|
100
|
|
|
|
72622
|
if ( $pre_type eq 'w' ) { |
|
|
100
|
|
|
|
|
|
5355
|
5826
|
|
|
|
|
21455
|
$expecting = |
5356
|
|
|
|
|
|
|
$self->operator_expected( [ $prev_type, $tok, $next_type ] ); |
5357
|
5826
|
|
|
|
|
18321
|
my $is_last = $self->do_BAREWORD($is_END_or_DATA); |
5358
|
5826
|
100
|
|
|
|
17438
|
last if ($is_last); |
5359
|
|
|
|
|
|
|
} |
5360
|
|
|
|
|
|
|
|
5361
|
|
|
|
|
|
|
#----------------------------- |
5362
|
|
|
|
|
|
|
# section 2: strings of digits |
5363
|
|
|
|
|
|
|
#----------------------------- |
5364
|
|
|
|
|
|
|
elsif ( $pre_type eq 'd' ) { |
5365
|
1929
|
|
|
|
|
7459
|
$expecting = |
5366
|
|
|
|
|
|
|
$self->operator_expected( [ $prev_type, $tok, $next_type ] ); |
5367
|
1929
|
|
|
|
|
6483
|
$self->do_DIGITS(); |
5368
|
|
|
|
|
|
|
} |
5369
|
|
|
|
|
|
|
|
5370
|
|
|
|
|
|
|
#---------------------------- |
5371
|
|
|
|
|
|
|
# section 3: all other tokens |
5372
|
|
|
|
|
|
|
#---------------------------- |
5373
|
|
|
|
|
|
|
else { |
5374
|
26986
|
|
|
|
|
55688
|
my $code = $tokenization_code->{$tok}; |
5375
|
26986
|
100
|
|
|
|
51207
|
if ($code) { |
5376
|
25261
|
|
|
|
|
86658
|
$expecting = |
5377
|
|
|
|
|
|
|
$self->operator_expected( |
5378
|
|
|
|
|
|
|
[ $prev_type, $tok, $next_type ] ); |
5379
|
25261
|
|
|
|
|
89188
|
$code->($self); |
5380
|
25261
|
100
|
|
|
|
68527
|
redo if $in_quote; |
5381
|
|
|
|
|
|
|
} |
5382
|
|
|
|
|
|
|
} |
5383
|
|
|
|
|
|
|
} |
5384
|
|
|
|
|
|
|
|
5385
|
|
|
|
|
|
|
# ----------------------------- |
5386
|
|
|
|
|
|
|
# end of main tokenization loop |
5387
|
|
|
|
|
|
|
# ----------------------------- |
5388
|
|
|
|
|
|
|
|
5389
|
|
|
|
|
|
|
# Store the final token |
5390
|
5895
|
100
|
|
|
|
12912
|
if ( $i_tok >= 0 ) { |
5391
|
5757
|
|
|
|
|
12608
|
$routput_token_type->[$i_tok] = $type; |
5392
|
5757
|
|
|
|
|
10434
|
$routput_block_type->[$i_tok] = $block_type; |
5393
|
5757
|
|
|
|
|
10051
|
$routput_container_type->[$i_tok] = $container_type; |
5394
|
5757
|
|
|
|
|
10329
|
$routput_type_sequence->[$i_tok] = $type_sequence; |
5395
|
5757
|
|
|
|
|
9833
|
$routput_indent_flag->[$i_tok] = $indent_flag; |
5396
|
|
|
|
|
|
|
} |
5397
|
|
|
|
|
|
|
|
5398
|
|
|
|
|
|
|
# Remember last nonblank values |
5399
|
5895
|
100
|
100
|
|
|
21242
|
if ( $type ne 'b' && $type ne '#' ) { |
5400
|
5422
|
|
|
|
|
9194
|
$last_last_nonblank_token = $last_nonblank_token; |
5401
|
5422
|
|
|
|
|
7841
|
$last_last_nonblank_type = $last_nonblank_type; |
5402
|
5422
|
|
|
|
|
8268
|
$last_last_nonblank_block_type = $last_nonblank_block_type; |
5403
|
5422
|
|
|
|
|
8269
|
$last_last_nonblank_container_type = $last_nonblank_container_type; |
5404
|
5422
|
|
|
|
|
8162
|
$last_last_nonblank_type_sequence = $last_nonblank_type_sequence; |
5405
|
5422
|
|
|
|
|
7766
|
$last_nonblank_token = $tok; |
5406
|
5422
|
|
|
|
|
7625
|
$last_nonblank_type = $type; |
5407
|
5422
|
|
|
|
|
7866
|
$last_nonblank_block_type = $block_type; |
5408
|
5422
|
|
|
|
|
7660
|
$last_nonblank_container_type = $container_type; |
5409
|
5422
|
|
|
|
|
8001
|
$last_nonblank_type_sequence = $type_sequence; |
5410
|
5422
|
|
|
|
|
8327
|
$last_nonblank_prototype = $prototype; |
5411
|
|
|
|
|
|
|
} |
5412
|
|
|
|
|
|
|
|
5413
|
|
|
|
|
|
|
# reset indentation level if necessary at a sub or package |
5414
|
|
|
|
|
|
|
# in an attempt to recover from a nesting error |
5415
|
5895
|
50
|
|
|
|
12329
|
if ( $level_in_tokenizer < 0 ) { |
5416
|
0
|
0
|
|
|
|
0
|
if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { |
5417
|
0
|
|
|
|
|
0
|
reset_indentation_level(0); |
5418
|
0
|
|
|
|
|
0
|
$self->brace_warning("resetting level to 0 at $1 $2\n"); |
5419
|
|
|
|
|
|
|
} |
5420
|
|
|
|
|
|
|
} |
5421
|
|
|
|
|
|
|
|
5422
|
5895
|
|
|
|
|
9895
|
$self->[_in_quote_] = $in_quote; |
5423
|
5895
|
100
|
|
|
|
13080
|
$self->[_quote_target_] = |
5424
|
|
|
|
|
|
|
$in_quote ? matching_end_token($quote_character) : EMPTY_STRING; |
5425
|
5895
|
|
|
|
|
10444
|
$self->[_rhere_target_list_] = $rhere_target_list; |
5426
|
|
|
|
|
|
|
|
5427
|
5895
|
|
|
|
|
10019
|
return; |
5428
|
|
|
|
|
|
|
} ## end sub tokenizer_main_loop |
5429
|
|
|
|
|
|
|
|
5430
|
|
|
|
|
|
|
sub OLD_tokenizer_wrapup_line { |
5431
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $line_of_tokens ) = @_; |
5432
|
|
|
|
|
|
|
|
5433
|
|
|
|
|
|
|
#--------------------------------------------------------- |
5434
|
|
|
|
|
|
|
# Package a line of tokens for shipping back to the caller |
5435
|
|
|
|
|
|
|
#--------------------------------------------------------- |
5436
|
|
|
|
|
|
|
|
5437
|
|
|
|
|
|
|
# NOTE: This routine is retained for testing purposes only; it should |
5438
|
|
|
|
|
|
|
# be removed by about 2025. Until then, it can be called for testing |
5439
|
|
|
|
|
|
|
# with -exp=ci0 or -exp=ci1. |
5440
|
|
|
|
|
|
|
|
5441
|
|
|
|
|
|
|
# Most of the remaining work involves defining the two indentation |
5442
|
|
|
|
|
|
|
# parameters that the formatter needs for each token: |
5443
|
|
|
|
|
|
|
# - $level = structural indentation level and |
5444
|
|
|
|
|
|
|
# - $ci_level = continuation indentation level |
5445
|
|
|
|
|
|
|
|
5446
|
|
|
|
|
|
|
# The method for setting the indentation level is straightforward. |
5447
|
|
|
|
|
|
|
# But the method used to define the continuation indentation is |
5448
|
|
|
|
|
|
|
# complicated because it has evolved over a long time by trial and |
5449
|
|
|
|
|
|
|
# error. It could undoubtedly be simplified but it works okay as is. |
5450
|
|
|
|
|
|
|
|
5451
|
|
|
|
|
|
|
# Here is a brief description of how indentation is computed. |
5452
|
|
|
|
|
|
|
# Perl::Tidy computes indentation as the sum of 2 terms: |
5453
|
|
|
|
|
|
|
# |
5454
|
|
|
|
|
|
|
# (1) structural indentation, such as if/else/elsif blocks |
5455
|
|
|
|
|
|
|
# (2) continuation indentation, such as long parameter call lists. |
5456
|
|
|
|
|
|
|
# |
5457
|
|
|
|
|
|
|
# These are occasionally called primary and secondary indentation. |
5458
|
|
|
|
|
|
|
# |
5459
|
|
|
|
|
|
|
# Structural indentation is introduced by tokens of type '{', |
5460
|
|
|
|
|
|
|
# although the actual tokens might be '{', '(', or '['. Structural |
5461
|
|
|
|
|
|
|
# indentation is of two types: BLOCK and non-BLOCK. Default |
5462
|
|
|
|
|
|
|
# structural indentation is 4 characters if the standard indentation |
5463
|
|
|
|
|
|
|
# scheme is used. |
5464
|
|
|
|
|
|
|
# |
5465
|
|
|
|
|
|
|
# Continuation indentation is introduced whenever a line at BLOCK |
5466
|
|
|
|
|
|
|
# level is broken before its termination. Default continuation |
5467
|
|
|
|
|
|
|
# indentation is 2 characters in the standard indentation scheme. |
5468
|
|
|
|
|
|
|
# |
5469
|
|
|
|
|
|
|
# Both types of indentation may be nested arbitrarily deep and |
5470
|
|
|
|
|
|
|
# interlaced. The distinction between the two is somewhat arbitrary. |
5471
|
|
|
|
|
|
|
# |
5472
|
|
|
|
|
|
|
# For each token, we will define two variables which would apply if |
5473
|
|
|
|
|
|
|
# the current statement were broken just before that token, so that |
5474
|
|
|
|
|
|
|
# that token started a new line: |
5475
|
|
|
|
|
|
|
# |
5476
|
|
|
|
|
|
|
# $level = the structural indentation level, |
5477
|
|
|
|
|
|
|
# $ci_level = the continuation indentation level |
5478
|
|
|
|
|
|
|
# |
5479
|
|
|
|
|
|
|
# The total indentation will be $level * (4 spaces) + $ci_level * (2 |
5480
|
|
|
|
|
|
|
# spaces), assuming defaults. However, in some special cases it is |
5481
|
|
|
|
|
|
|
# customary to modify $ci_level from this strict value. |
5482
|
|
|
|
|
|
|
# |
5483
|
|
|
|
|
|
|
# The total structural indentation is easy to compute by adding and |
5484
|
|
|
|
|
|
|
# subtracting 1 from a saved value as types '{' and '}' are seen. |
5485
|
|
|
|
|
|
|
# The running value of this variable is $level_in_tokenizer. |
5486
|
|
|
|
|
|
|
# |
5487
|
|
|
|
|
|
|
# The total continuation is much more difficult to compute, and |
5488
|
|
|
|
|
|
|
# requires several variables. These variables are: |
5489
|
|
|
|
|
|
|
# |
5490
|
|
|
|
|
|
|
# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for |
5491
|
|
|
|
|
|
|
# each indentation level, if there are intervening open secondary |
5492
|
|
|
|
|
|
|
# structures just prior to that level. |
5493
|
|
|
|
|
|
|
# $continuation_string_in_tokenizer = a string of 1's and 0's |
5494
|
|
|
|
|
|
|
# indicating if the last token at that level is "continued", meaning |
5495
|
|
|
|
|
|
|
# that it is not the first token of an expression. |
5496
|
|
|
|
|
|
|
# $nesting_block_string = a string of 1's and 0's indicating, for each |
5497
|
|
|
|
|
|
|
# indentation level, if the level is of type BLOCK or not. |
5498
|
|
|
|
|
|
|
# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string |
5499
|
|
|
|
|
|
|
# $nesting_list_string = a string of 1's and 0's indicating, for each |
5500
|
|
|
|
|
|
|
# indentation level, if it is appropriate for list formatting. |
5501
|
|
|
|
|
|
|
# If so, continuation indentation is used to indent long list items. |
5502
|
|
|
|
|
|
|
# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string |
5503
|
|
|
|
|
|
|
# @{$rslevel_stack} = a stack of total nesting depths at each |
5504
|
|
|
|
|
|
|
# structural indentation level, where "total nesting depth" means |
5505
|
|
|
|
|
|
|
# the nesting depth that would occur if every nesting token |
5506
|
|
|
|
|
|
|
# -- '{', '[', # and '(' -- , regardless of context, is used to |
5507
|
|
|
|
|
|
|
# compute a nesting depth. |
5508
|
|
|
|
|
|
|
|
5509
|
|
|
|
|
|
|
# Notes on the Continuation Indentation |
5510
|
|
|
|
|
|
|
# |
5511
|
|
|
|
|
|
|
# There is a sort of chicken-and-egg problem with continuation |
5512
|
|
|
|
|
|
|
# indentation. The formatter can't make decisions on line breaks |
5513
|
|
|
|
|
|
|
# without knowing what 'ci' will be at arbitrary locations. |
5514
|
|
|
|
|
|
|
# |
5515
|
|
|
|
|
|
|
# But a problem with setting the continuation indentation (ci) here |
5516
|
|
|
|
|
|
|
# in the tokenizer is that we do not know where line breaks will |
5517
|
|
|
|
|
|
|
# actually be. As a result, we don't know if we should propagate |
5518
|
|
|
|
|
|
|
# continuation indentation to higher levels of structure. |
5519
|
|
|
|
|
|
|
# |
5520
|
|
|
|
|
|
|
# For nesting of only structural indentation, we never need to do |
5521
|
|
|
|
|
|
|
# this. For example, in a long if statement, like this |
5522
|
|
|
|
|
|
|
# |
5523
|
|
|
|
|
|
|
# if ( !$output_block_type[$i] |
5524
|
|
|
|
|
|
|
# && ($in_statement_continuation) ) |
5525
|
|
|
|
|
|
|
# { <--outdented |
5526
|
|
|
|
|
|
|
# do_something(); |
5527
|
|
|
|
|
|
|
# } |
5528
|
|
|
|
|
|
|
# |
5529
|
|
|
|
|
|
|
# the second line has ci but we do normally give the lines within |
5530
|
|
|
|
|
|
|
# the BLOCK any ci. This would be true if we had blocks nested |
5531
|
|
|
|
|
|
|
# arbitrarily deeply. |
5532
|
|
|
|
|
|
|
# |
5533
|
|
|
|
|
|
|
# But consider something like this, where we have created a break |
5534
|
|
|
|
|
|
|
# after an opening paren on line 1, and the paren is not (currently) |
5535
|
|
|
|
|
|
|
# a structural indentation token: |
5536
|
|
|
|
|
|
|
# |
5537
|
|
|
|
|
|
|
# my $file = $menubar->Menubutton( |
5538
|
|
|
|
|
|
|
# qw/-text File -underline 0 -menuitems/ => [ |
5539
|
|
|
|
|
|
|
# [ |
5540
|
|
|
|
|
|
|
# Cascade => '~View', |
5541
|
|
|
|
|
|
|
# -menuitems => [ |
5542
|
|
|
|
|
|
|
# ... |
5543
|
|
|
|
|
|
|
# |
5544
|
|
|
|
|
|
|
# The second line has ci, so it would seem reasonable to propagate |
5545
|
|
|
|
|
|
|
# it down, giving the third line 1 ci + 1 indentation. This |
5546
|
|
|
|
|
|
|
# suggests the following rule, which is currently used to |
5547
|
|
|
|
|
|
|
# propagating ci down: if there are any non-structural opening |
5548
|
|
|
|
|
|
|
# parens (or brackets, or braces), before an opening structural |
5549
|
|
|
|
|
|
|
# brace, then ci is propagated down, and otherwise |
5550
|
|
|
|
|
|
|
# not. The variable $intervening_secondary_structure contains this |
5551
|
|
|
|
|
|
|
# information for the current token, and the string |
5552
|
|
|
|
|
|
|
# "$ci_string_in_tokenizer" is a stack of previous values of this |
5553
|
|
|
|
|
|
|
# variable. |
5554
|
|
|
|
|
|
|
|
5555
|
0
|
|
|
|
|
0
|
my @token_type = (); # stack of output token types |
5556
|
0
|
|
|
|
|
0
|
my @block_type = (); # stack of output code block types |
5557
|
0
|
|
|
|
|
0
|
my @type_sequence = (); # stack of output type sequence numbers |
5558
|
0
|
|
|
|
|
0
|
my @tokens = (); # output tokens |
5559
|
0
|
|
|
|
|
0
|
my @levels = (); # structural brace levels of output tokens |
5560
|
0
|
|
|
|
|
0
|
my @ci_string = (); # string needed to compute continuation indentation |
5561
|
|
|
|
|
|
|
|
5562
|
|
|
|
|
|
|
# Count the number of '1's in the string (previously sub ones_count) |
5563
|
0
|
|
|
|
|
0
|
my $ci_string_sum = ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/; |
5564
|
|
|
|
|
|
|
|
5565
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
5566
|
|
|
|
|
|
|
|
5567
|
0
|
|
|
|
|
0
|
my ( $ci_string_i, $level_i ); |
5568
|
|
|
|
|
|
|
|
5569
|
|
|
|
|
|
|
#----------------- |
5570
|
|
|
|
|
|
|
# Loop over tokens |
5571
|
|
|
|
|
|
|
#----------------- |
5572
|
0
|
|
|
|
|
0
|
my $rtoken_map_im; |
5573
|
0
|
|
|
|
|
0
|
foreach my $i ( @{$routput_token_list} ) { |
|
0
|
|
|
|
|
0
|
|
5574
|
|
|
|
|
|
|
|
5575
|
0
|
|
|
|
|
0
|
my $type_i = $routput_token_type->[$i]; |
5576
|
0
|
|
|
|
|
0
|
$level_i = $level_in_tokenizer; |
5577
|
|
|
|
|
|
|
|
5578
|
|
|
|
|
|
|
# Quick handling of indentation levels for blanks and comments |
5579
|
0
|
0
|
0
|
|
|
0
|
if ( $type_i eq 'b' || $type_i eq '#' ) { |
5580
|
0
|
|
|
|
|
0
|
$ci_string_i = $ci_string_sum + $in_statement_continuation; |
5581
|
|
|
|
|
|
|
} |
5582
|
|
|
|
|
|
|
|
5583
|
|
|
|
|
|
|
# All other types |
5584
|
|
|
|
|
|
|
else { |
5585
|
|
|
|
|
|
|
|
5586
|
|
|
|
|
|
|
# $tok_i is the PRE-token. It only equals the token for symbols |
5587
|
0
|
|
|
|
|
0
|
my $tok_i = $rtokens->[$i]; |
5588
|
|
|
|
|
|
|
|
5589
|
|
|
|
|
|
|
# Check for an invalid token type.. |
5590
|
|
|
|
|
|
|
# This can happen by running perltidy on non-scripts although |
5591
|
|
|
|
|
|
|
# it could also be bug introduced by programming change. Perl |
5592
|
|
|
|
|
|
|
# silently accepts a 032 (^Z) and takes it as the end |
5593
|
0
|
0
|
|
|
|
0
|
if ( !$is_valid_token_type{$type_i} ) { |
5594
|
0
|
|
|
|
|
0
|
my $val = ord($type_i); |
5595
|
0
|
|
|
|
|
0
|
$self->warning( |
5596
|
|
|
|
|
|
|
"unexpected character decimal $val ($type_i) in script\n" |
5597
|
|
|
|
|
|
|
); |
5598
|
0
|
|
|
|
|
0
|
$self->[_in_error_] = 1; |
5599
|
|
|
|
|
|
|
} |
5600
|
|
|
|
|
|
|
|
5601
|
|
|
|
|
|
|
# $ternary_indentation_flag indicates that we need a change |
5602
|
|
|
|
|
|
|
# in level at a nested ternary, as follows |
5603
|
|
|
|
|
|
|
# 1 => at a nested ternary ? |
5604
|
|
|
|
|
|
|
# -1 => at a nested ternary : |
5605
|
|
|
|
|
|
|
# 0 => otherwise |
5606
|
0
|
|
|
|
|
0
|
my $ternary_indentation_flag = $routput_indent_flag->[$i]; |
5607
|
|
|
|
|
|
|
|
5608
|
|
|
|
|
|
|
#------------------------------------------- |
5609
|
|
|
|
|
|
|
# Section 1: handle a level-increasing token |
5610
|
|
|
|
|
|
|
#------------------------------------------- |
5611
|
|
|
|
|
|
|
# set primary indentation levels based on structural braces |
5612
|
|
|
|
|
|
|
# Note: these are set so that the leading braces have a HIGHER |
5613
|
|
|
|
|
|
|
# level than their CONTENTS, which is convenient for indentation |
5614
|
|
|
|
|
|
|
# Also, define continuation indentation for each token. |
5615
|
0
|
0
|
0
|
|
|
0
|
if ( $type_i eq '{' |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
5616
|
|
|
|
|
|
|
|| $type_i eq 'L' |
5617
|
|
|
|
|
|
|
|| $ternary_indentation_flag > 0 ) |
5618
|
|
|
|
|
|
|
{ |
5619
|
|
|
|
|
|
|
|
5620
|
|
|
|
|
|
|
# if the difference between total nesting levels is not 1, |
5621
|
|
|
|
|
|
|
# there are intervening non-structural nesting types between |
5622
|
|
|
|
|
|
|
# this '{' and the previous unclosed '{' |
5623
|
0
|
|
|
|
|
0
|
my $intervening_secondary_structure = 0; |
5624
|
0
|
0
|
|
|
|
0
|
if ( @{$rslevel_stack} ) { |
|
0
|
|
|
|
|
0
|
|
5625
|
0
|
|
|
|
|
0
|
$intervening_secondary_structure = |
5626
|
|
|
|
|
|
|
$slevel_in_tokenizer - $rslevel_stack->[-1]; |
5627
|
|
|
|
|
|
|
} |
5628
|
|
|
|
|
|
|
|
5629
|
|
|
|
|
|
|
# save the current states |
5630
|
0
|
|
|
|
|
0
|
push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); |
|
0
|
|
|
|
|
0
|
|
5631
|
0
|
|
|
|
|
0
|
$level_in_tokenizer++; |
5632
|
|
|
|
|
|
|
|
5633
|
|
|
|
|
|
|
##NOTE: _maximum_level_ does not seem to be needed now |
5634
|
0
|
0
|
|
|
|
0
|
if ( $level_in_tokenizer > $self->[_maximum_level_] ) { |
5635
|
0
|
|
|
|
|
0
|
$self->[_maximum_level_] = $level_in_tokenizer; |
5636
|
|
|
|
|
|
|
} |
5637
|
|
|
|
|
|
|
|
5638
|
0
|
0
|
|
|
|
0
|
if ($ternary_indentation_flag) { |
5639
|
|
|
|
|
|
|
|
5640
|
|
|
|
|
|
|
# break BEFORE '?' in a nested ternary |
5641
|
0
|
0
|
|
|
|
0
|
if ( $type_i eq '?' ) { |
5642
|
0
|
|
|
|
|
0
|
$level_i = $level_in_tokenizer; |
5643
|
|
|
|
|
|
|
} |
5644
|
|
|
|
|
|
|
|
5645
|
0
|
|
|
|
|
0
|
$nesting_block_string .= "$nesting_block_flag"; |
5646
|
|
|
|
|
|
|
} ## end if ($ternary_indentation_flag) |
5647
|
|
|
|
|
|
|
else { |
5648
|
|
|
|
|
|
|
|
5649
|
0
|
0
|
|
|
|
0
|
if ( $routput_block_type->[$i] ) { |
5650
|
0
|
|
|
|
|
0
|
$nesting_block_flag = 1; |
5651
|
0
|
|
|
|
|
0
|
$nesting_block_string .= '1'; |
5652
|
|
|
|
|
|
|
} |
5653
|
|
|
|
|
|
|
else { |
5654
|
0
|
|
|
|
|
0
|
$nesting_block_flag = 0; |
5655
|
0
|
|
|
|
|
0
|
$nesting_block_string .= '0'; |
5656
|
|
|
|
|
|
|
} |
5657
|
|
|
|
|
|
|
} |
5658
|
|
|
|
|
|
|
|
5659
|
|
|
|
|
|
|
# we will use continuation indentation within containers |
5660
|
|
|
|
|
|
|
# which are not blocks and not logical expressions |
5661
|
0
|
|
|
|
|
0
|
my $bit = 0; |
5662
|
0
|
0
|
|
|
|
0
|
if ( !$routput_block_type->[$i] ) { |
5663
|
|
|
|
|
|
|
|
5664
|
|
|
|
|
|
|
# propagate flag down at nested open parens |
5665
|
0
|
0
|
|
|
|
0
|
if ( $routput_container_type->[$i] eq '(' ) { |
5666
|
0
|
0
|
|
|
|
0
|
$bit = 1 if $nesting_list_flag; |
5667
|
|
|
|
|
|
|
} |
5668
|
|
|
|
|
|
|
|
5669
|
|
|
|
|
|
|
# use list continuation if not a logical grouping |
5670
|
|
|
|
|
|
|
# /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ |
5671
|
|
|
|
|
|
|
else { |
5672
|
|
|
|
|
|
|
$bit = 1 |
5673
|
|
|
|
|
|
|
unless |
5674
|
0
|
0
|
|
|
|
0
|
$is_logical_container{ $routput_container_type |
5675
|
|
|
|
|
|
|
->[$i] }; |
5676
|
|
|
|
|
|
|
} |
5677
|
|
|
|
|
|
|
} |
5678
|
0
|
|
|
|
|
0
|
$nesting_list_string .= $bit; |
5679
|
0
|
|
|
|
|
0
|
$nesting_list_flag = $bit; |
5680
|
|
|
|
|
|
|
|
5681
|
0
|
0
|
|
|
|
0
|
$ci_string_in_tokenizer .= |
5682
|
|
|
|
|
|
|
( $intervening_secondary_structure != 0 ) ? '1' : '0'; |
5683
|
0
|
|
|
|
|
0
|
$ci_string_sum = |
5684
|
|
|
|
|
|
|
( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/; |
5685
|
0
|
0
|
|
|
|
0
|
$continuation_string_in_tokenizer .= |
5686
|
|
|
|
|
|
|
( $in_statement_continuation > 0 ) ? '1' : '0'; |
5687
|
|
|
|
|
|
|
|
5688
|
|
|
|
|
|
|
# Sometimes we want to give an opening brace |
5689
|
|
|
|
|
|
|
# continuation indentation, and sometimes not. For code |
5690
|
|
|
|
|
|
|
# blocks, we don't do it, so that the leading '{' gets |
5691
|
|
|
|
|
|
|
# outdented, like this: |
5692
|
|
|
|
|
|
|
# |
5693
|
|
|
|
|
|
|
# if ( !$output_block_type[$i] |
5694
|
|
|
|
|
|
|
# && ($in_statement_continuation) ) |
5695
|
|
|
|
|
|
|
# { <--outdented |
5696
|
|
|
|
|
|
|
# |
5697
|
|
|
|
|
|
|
# For other types, we will give them continuation |
5698
|
|
|
|
|
|
|
# indentation. For example, here is how a list looks |
5699
|
|
|
|
|
|
|
# with the opening paren indented: |
5700
|
|
|
|
|
|
|
# |
5701
|
|
|
|
|
|
|
# @LoL = |
5702
|
|
|
|
|
|
|
# ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], |
5703
|
|
|
|
|
|
|
# [ "homer", "marge", "bart" ], ); |
5704
|
|
|
|
|
|
|
# |
5705
|
|
|
|
|
|
|
# This looks best when 'ci' is one-half of the |
5706
|
|
|
|
|
|
|
# indentation (i.e., 2 and 4) |
5707
|
|
|
|
|
|
|
|
5708
|
0
|
|
|
|
|
0
|
my $total_ci = $ci_string_sum; |
5709
|
0
|
0
|
0
|
|
|
0
|
if ( |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
5710
|
|
|
|
|
|
|
!$routput_block_type->[$i] # patch: skip for BLOCK |
5711
|
|
|
|
|
|
|
&& ($in_statement_continuation) |
5712
|
|
|
|
|
|
|
&& !( $ternary_indentation_flag && $type_i eq ':' ) |
5713
|
|
|
|
|
|
|
) |
5714
|
|
|
|
|
|
|
{ |
5715
|
0
|
0
|
|
|
|
0
|
$total_ci += $in_statement_continuation |
5716
|
|
|
|
|
|
|
unless ( |
5717
|
|
|
|
|
|
|
substr( $ci_string_in_tokenizer, -1 ) eq '1' ); |
5718
|
|
|
|
|
|
|
} |
5719
|
|
|
|
|
|
|
|
5720
|
0
|
|
|
|
|
0
|
$ci_string_i = $total_ci; |
5721
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 0; |
5722
|
|
|
|
|
|
|
} ## end if ( $type_i eq '{' ||...}) |
5723
|
|
|
|
|
|
|
|
5724
|
|
|
|
|
|
|
#------------------------------------------- |
5725
|
|
|
|
|
|
|
# Section 2: handle a level-decreasing token |
5726
|
|
|
|
|
|
|
#------------------------------------------- |
5727
|
|
|
|
|
|
|
elsif ($type_i eq '}' |
5728
|
|
|
|
|
|
|
|| $type_i eq 'R' |
5729
|
|
|
|
|
|
|
|| $ternary_indentation_flag < 0 ) |
5730
|
|
|
|
|
|
|
{ |
5731
|
|
|
|
|
|
|
|
5732
|
|
|
|
|
|
|
# only a nesting error in the script would prevent |
5733
|
|
|
|
|
|
|
# popping here |
5734
|
0
|
0
|
|
|
|
0
|
if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
5735
|
|
|
|
|
|
|
|
5736
|
0
|
|
|
|
|
0
|
$level_i = --$level_in_tokenizer; |
5737
|
|
|
|
|
|
|
|
5738
|
0
|
0
|
|
|
|
0
|
if ( $level_in_tokenizer < 0 ) { |
5739
|
0
|
0
|
|
|
|
0
|
unless ( $self->[_saw_negative_indentation_] ) { |
5740
|
0
|
|
|
|
|
0
|
$self->[_saw_negative_indentation_] = 1; |
5741
|
0
|
|
|
|
|
0
|
$self->warning("Starting negative indentation\n"); |
5742
|
|
|
|
|
|
|
} |
5743
|
|
|
|
|
|
|
} |
5744
|
|
|
|
|
|
|
|
5745
|
|
|
|
|
|
|
# restore previous level values |
5746
|
0
|
0
|
|
|
|
0
|
if ( length($nesting_block_string) > 1 ) |
5747
|
|
|
|
|
|
|
{ # true for valid script |
5748
|
0
|
|
|
|
|
0
|
chop $nesting_block_string; |
5749
|
0
|
|
|
|
|
0
|
$nesting_block_flag = |
5750
|
|
|
|
|
|
|
substr( $nesting_block_string, -1 ) eq '1'; |
5751
|
0
|
|
|
|
|
0
|
chop $nesting_list_string; |
5752
|
0
|
|
|
|
|
0
|
$nesting_list_flag = |
5753
|
|
|
|
|
|
|
substr( $nesting_list_string, -1 ) eq '1'; |
5754
|
|
|
|
|
|
|
|
5755
|
0
|
|
|
|
|
0
|
chop $ci_string_in_tokenizer; |
5756
|
0
|
|
|
|
|
0
|
$ci_string_sum = |
5757
|
|
|
|
|
|
|
( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/; |
5758
|
|
|
|
|
|
|
|
5759
|
0
|
|
|
|
|
0
|
$in_statement_continuation = |
5760
|
|
|
|
|
|
|
chop $continuation_string_in_tokenizer; |
5761
|
|
|
|
|
|
|
|
5762
|
|
|
|
|
|
|
# zero continuation flag at terminal BLOCK '}' which |
5763
|
|
|
|
|
|
|
# ends a statement. |
5764
|
0
|
|
|
|
|
0
|
my $block_type_i = $routput_block_type->[$i]; |
5765
|
0
|
0
|
|
|
|
0
|
if ($block_type_i) { |
|
|
0
|
|
|
|
|
|
5766
|
|
|
|
|
|
|
|
5767
|
|
|
|
|
|
|
# ...These include non-anonymous subs |
5768
|
|
|
|
|
|
|
# note: could be sub ::abc { or sub 'abc |
5769
|
0
|
0
|
0
|
|
|
0
|
if ( substr( $block_type_i, 0, 3 ) eq 'sub' |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5770
|
|
|
|
|
|
|
&& $block_type_i =~ m/^sub\s*/gc ) |
5771
|
|
|
|
|
|
|
{ |
5772
|
|
|
|
|
|
|
|
5773
|
|
|
|
|
|
|
# note: older versions of perl require the /gc |
5774
|
|
|
|
|
|
|
# modifier here or else the \G does not work. |
5775
|
0
|
0
|
|
|
|
0
|
$in_statement_continuation = 0 |
5776
|
|
|
|
|
|
|
if ( $block_type_i =~ /\G('|::|\w)/gc ); |
5777
|
|
|
|
|
|
|
} |
5778
|
|
|
|
|
|
|
|
5779
|
|
|
|
|
|
|
# ...and include all block types except user subs |
5780
|
|
|
|
|
|
|
# with block prototypes and these: |
5781
|
|
|
|
|
|
|
# (sort|grep|map|do|eval) |
5782
|
|
|
|
|
|
|
elsif ( |
5783
|
|
|
|
|
|
|
$is_zero_continuation_block_type{$block_type_i} |
5784
|
|
|
|
|
|
|
) |
5785
|
|
|
|
|
|
|
{ |
5786
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 0; |
5787
|
|
|
|
|
|
|
} |
5788
|
|
|
|
|
|
|
|
5789
|
|
|
|
|
|
|
# ..but these are not terminal types: |
5790
|
|
|
|
|
|
|
# /^(sort|grep|map|do|eval)$/ ) |
5791
|
|
|
|
|
|
|
elsif ($is_sort_map_grep_eval_do{$block_type_i} |
5792
|
|
|
|
|
|
|
|| $is_grep_alias{$block_type_i} ) |
5793
|
|
|
|
|
|
|
{ |
5794
|
|
|
|
|
|
|
} |
5795
|
|
|
|
|
|
|
|
5796
|
|
|
|
|
|
|
# ..and a block introduced by a label |
5797
|
|
|
|
|
|
|
# /^\w+\s*:$/gc ) { |
5798
|
|
|
|
|
|
|
elsif ( $block_type_i =~ /:$/ ) { |
5799
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 0; |
5800
|
|
|
|
|
|
|
} |
5801
|
|
|
|
|
|
|
|
5802
|
|
|
|
|
|
|
# user function with block prototype |
5803
|
|
|
|
|
|
|
else { |
5804
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 0; |
5805
|
|
|
|
|
|
|
} |
5806
|
|
|
|
|
|
|
} ## end if ($block_type_i) |
5807
|
|
|
|
|
|
|
|
5808
|
|
|
|
|
|
|
# If we are in a list, then |
5809
|
|
|
|
|
|
|
# we must set continuation indentation at the closing |
5810
|
|
|
|
|
|
|
# paren of something like this (paren after $check): |
5811
|
|
|
|
|
|
|
# assert( |
5812
|
|
|
|
|
|
|
# __LINE__, |
5813
|
|
|
|
|
|
|
# ( not defined $check ) |
5814
|
|
|
|
|
|
|
# or ref $check |
5815
|
|
|
|
|
|
|
# or $check eq "new" |
5816
|
|
|
|
|
|
|
# or $check eq "old", |
5817
|
|
|
|
|
|
|
# ); |
5818
|
|
|
|
|
|
|
elsif ( $tok_i eq ')' ) { |
5819
|
|
|
|
|
|
|
$in_statement_continuation = 1 |
5820
|
|
|
|
|
|
|
if ( |
5821
|
|
|
|
|
|
|
$is_list_end_type{ |
5822
|
0
|
0
|
|
|
|
0
|
$routput_container_type->[$i] |
5823
|
|
|
|
|
|
|
} |
5824
|
|
|
|
|
|
|
); |
5825
|
|
|
|
|
|
|
##if $routput_container_type->[$i] =~ /^[;,\{\}]$/; |
5826
|
|
|
|
|
|
|
} |
5827
|
|
|
|
|
|
|
} ## end if ( length($nesting_block_string...)) |
5828
|
|
|
|
|
|
|
|
5829
|
0
|
|
|
|
|
0
|
$ci_string_i = $ci_string_sum + $in_statement_continuation; |
5830
|
|
|
|
|
|
|
} ## end elsif ( $type_i eq '}' ||...{) |
5831
|
|
|
|
|
|
|
|
5832
|
|
|
|
|
|
|
#----------------------------------------- |
5833
|
|
|
|
|
|
|
# Section 3: handle a constant level token |
5834
|
|
|
|
|
|
|
#----------------------------------------- |
5835
|
|
|
|
|
|
|
else { |
5836
|
|
|
|
|
|
|
|
5837
|
|
|
|
|
|
|
# zero the continuation indentation at certain tokens so |
5838
|
|
|
|
|
|
|
# that they will be at the same level as its container. For |
5839
|
|
|
|
|
|
|
# commas, this simplifies the -lp indentation logic, which |
5840
|
|
|
|
|
|
|
# counts commas. For ?: it makes them stand out. |
5841
|
0
|
0
|
0
|
|
|
0
|
if ( |
5842
|
|
|
|
|
|
|
$nesting_list_flag |
5843
|
|
|
|
|
|
|
## $type_i =~ /^[,\?\:]$/ |
5844
|
|
|
|
|
|
|
&& $is_comma_question_colon{$type_i} |
5845
|
|
|
|
|
|
|
) |
5846
|
|
|
|
|
|
|
{ |
5847
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 0; |
5848
|
|
|
|
|
|
|
} |
5849
|
|
|
|
|
|
|
|
5850
|
|
|
|
|
|
|
# Be sure binary operators get continuation indentation. |
5851
|
|
|
|
|
|
|
# Note: the check on $nesting_block_flag is only needed |
5852
|
|
|
|
|
|
|
# to add ci to binary operators following a 'try' block, |
5853
|
|
|
|
|
|
|
# or similar extended syntax block operator (see c158). |
5854
|
0
|
0
|
0
|
|
|
0
|
if ( |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
5855
|
|
|
|
|
|
|
!$in_statement_continuation |
5856
|
|
|
|
|
|
|
&& ( $nesting_block_flag || $nesting_list_flag ) |
5857
|
|
|
|
|
|
|
&& ( $type_i eq 'k' && $is_binary_keyword{$tok_i} |
5858
|
|
|
|
|
|
|
|| $is_binary_type{$type_i} ) |
5859
|
|
|
|
|
|
|
) |
5860
|
|
|
|
|
|
|
{ |
5861
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 1; |
5862
|
|
|
|
|
|
|
} |
5863
|
|
|
|
|
|
|
|
5864
|
|
|
|
|
|
|
# continuation indentation is sum of any open ci from |
5865
|
|
|
|
|
|
|
# previous levels plus the current level |
5866
|
0
|
|
|
|
|
0
|
$ci_string_i = $ci_string_sum + $in_statement_continuation; |
5867
|
|
|
|
|
|
|
|
5868
|
|
|
|
|
|
|
# update continuation flag ... |
5869
|
|
|
|
|
|
|
|
5870
|
|
|
|
|
|
|
# if we are in a BLOCK |
5871
|
0
|
0
|
|
|
|
0
|
if ($nesting_block_flag) { |
5872
|
|
|
|
|
|
|
|
5873
|
|
|
|
|
|
|
# the next token after a ';' and label starts a new stmt |
5874
|
0
|
0
|
0
|
|
|
0
|
if ( $type_i eq ';' || $type_i eq 'J' ) { |
5875
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 0; |
5876
|
|
|
|
|
|
|
} |
5877
|
|
|
|
|
|
|
|
5878
|
|
|
|
|
|
|
# otherwise, we are continuing the current statement |
5879
|
|
|
|
|
|
|
else { |
5880
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 1; |
5881
|
|
|
|
|
|
|
} |
5882
|
|
|
|
|
|
|
} |
5883
|
|
|
|
|
|
|
|
5884
|
|
|
|
|
|
|
# if we are not in a BLOCK.. |
5885
|
|
|
|
|
|
|
else { |
5886
|
|
|
|
|
|
|
|
5887
|
|
|
|
|
|
|
# do not use continuation indentation if not list |
5888
|
|
|
|
|
|
|
# environment (could be within if/elsif clause) |
5889
|
0
|
0
|
0
|
|
|
0
|
if ( !$nesting_list_flag ) { |
|
|
0
|
|
|
|
|
|
5890
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 0; |
5891
|
|
|
|
|
|
|
} |
5892
|
|
|
|
|
|
|
|
5893
|
|
|
|
|
|
|
# otherwise, the token after a ',' starts a new term |
5894
|
|
|
|
|
|
|
|
5895
|
|
|
|
|
|
|
# Patch FOR RT#99961; no continuation after a ';' |
5896
|
|
|
|
|
|
|
# This is needed because perltidy currently marks |
5897
|
|
|
|
|
|
|
# a block preceded by a type character like % or @ |
5898
|
|
|
|
|
|
|
# as a non block, to simplify formatting. But these |
5899
|
|
|
|
|
|
|
# are actually blocks and can have semicolons. |
5900
|
|
|
|
|
|
|
# See code_block_type() and is_non_structural_brace(). |
5901
|
|
|
|
|
|
|
elsif ( $type_i eq ',' || $type_i eq ';' ) { |
5902
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 0; |
5903
|
|
|
|
|
|
|
} |
5904
|
|
|
|
|
|
|
|
5905
|
|
|
|
|
|
|
# otherwise, we are continuing the current term |
5906
|
|
|
|
|
|
|
else { |
5907
|
0
|
|
|
|
|
0
|
$in_statement_continuation = 1; |
5908
|
|
|
|
|
|
|
} |
5909
|
|
|
|
|
|
|
} ## end else [ if ($nesting_block_flag)] |
5910
|
|
|
|
|
|
|
|
5911
|
|
|
|
|
|
|
} ## end else [ if ( $type_i eq '{' ||...})] |
5912
|
|
|
|
|
|
|
|
5913
|
|
|
|
|
|
|
#------------------------------------------- |
5914
|
|
|
|
|
|
|
# Section 4: operations common to all levels |
5915
|
|
|
|
|
|
|
#------------------------------------------- |
5916
|
|
|
|
|
|
|
|
5917
|
|
|
|
|
|
|
# set secondary nesting levels based on all containment token |
5918
|
|
|
|
|
|
|
# types Note: these are set so that the nesting depth is the |
5919
|
|
|
|
|
|
|
# depth of the PREVIOUS TOKEN, which is convenient for setting |
5920
|
|
|
|
|
|
|
# the strength of token bonds |
5921
|
|
|
|
|
|
|
|
5922
|
|
|
|
|
|
|
# /^[L\{\(\[]$/ |
5923
|
0
|
0
|
|
|
|
0
|
if ( $is_opening_type{$type_i} ) { |
|
|
0
|
|
|
|
|
|
5924
|
0
|
|
|
|
|
0
|
$slevel_in_tokenizer++; |
5925
|
0
|
|
|
|
|
0
|
$nesting_token_string .= $tok_i; |
5926
|
0
|
|
|
|
|
0
|
$nesting_type_string .= $type_i; |
5927
|
|
|
|
|
|
|
} |
5928
|
|
|
|
|
|
|
|
5929
|
|
|
|
|
|
|
# /^[R\}\)\]]$/ |
5930
|
|
|
|
|
|
|
elsif ( $is_closing_type{$type_i} ) { |
5931
|
0
|
|
|
|
|
0
|
$slevel_in_tokenizer--; |
5932
|
0
|
|
|
|
|
0
|
my $char = chop $nesting_token_string; |
5933
|
|
|
|
|
|
|
|
5934
|
0
|
0
|
|
|
|
0
|
if ( $char ne $matching_start_token{$tok_i} ) { |
5935
|
0
|
|
|
|
|
0
|
$nesting_token_string .= $char . $tok_i; |
5936
|
0
|
|
|
|
|
0
|
$nesting_type_string .= $type_i; |
5937
|
|
|
|
|
|
|
} |
5938
|
|
|
|
|
|
|
else { |
5939
|
0
|
|
|
|
|
0
|
chop $nesting_type_string; |
5940
|
|
|
|
|
|
|
} |
5941
|
|
|
|
|
|
|
} |
5942
|
|
|
|
|
|
|
|
5943
|
|
|
|
|
|
|
# apply token type patch: |
5944
|
|
|
|
|
|
|
# - output anonymous 'sub' as keyword (type 'k') |
5945
|
|
|
|
|
|
|
# - output __END__, __DATA__, and format as type 'k' instead |
5946
|
|
|
|
|
|
|
# of ';' to make html colors correct, etc. |
5947
|
|
|
|
|
|
|
# The following hash tests are equivalent to these older tests: |
5948
|
|
|
|
|
|
|
# if ( $type_i eq 't' && $is_sub{$tok_i} ) { $fix_type = 'k' } |
5949
|
|
|
|
|
|
|
# if ( $type_i eq ';' && $tok_i =~ /\w/ ) { $fix_type = 'k' } |
5950
|
0
|
0
|
0
|
|
|
0
|
if ( $is_END_DATA_format_sub{$tok_i} |
5951
|
|
|
|
|
|
|
&& $is_semicolon_or_t{$type_i} ) |
5952
|
|
|
|
|
|
|
{ |
5953
|
0
|
|
|
|
|
0
|
$type_i = 'k'; |
5954
|
|
|
|
|
|
|
} |
5955
|
|
|
|
|
|
|
} ## end else [ if ( $type_i eq 'b' ||...)] |
5956
|
|
|
|
|
|
|
|
5957
|
|
|
|
|
|
|
#-------------------------------- |
5958
|
|
|
|
|
|
|
# Store the values for this token |
5959
|
|
|
|
|
|
|
#-------------------------------- |
5960
|
0
|
0
|
|
|
|
0
|
push( @ci_string, $ci_string_i ? 1 : 0 ); # clip ci to 1 |
5961
|
0
|
|
|
|
|
0
|
push( @levels, $level_i ); |
5962
|
0
|
|
|
|
|
0
|
push( @block_type, $routput_block_type->[$i] ); |
5963
|
0
|
|
|
|
|
0
|
push( @type_sequence, $routput_type_sequence->[$i] ); |
5964
|
0
|
|
|
|
|
0
|
push( @token_type, $type_i ); |
5965
|
|
|
|
|
|
|
|
5966
|
|
|
|
|
|
|
# Form and store the PREVIOUS token |
5967
|
0
|
0
|
|
|
|
0
|
if ( defined($rtoken_map_im) ) { |
5968
|
0
|
|
|
|
|
0
|
my $numc = |
5969
|
|
|
|
|
|
|
$rtoken_map->[$i] - $rtoken_map_im; # how many characters |
5970
|
|
|
|
|
|
|
|
5971
|
0
|
0
|
|
|
|
0
|
if ( $numc > 0 ) { |
5972
|
0
|
|
|
|
|
0
|
push( @tokens, |
5973
|
|
|
|
|
|
|
substr( $input_line, $rtoken_map_im, $numc ) ); |
5974
|
|
|
|
|
|
|
} |
5975
|
|
|
|
|
|
|
else { |
5976
|
|
|
|
|
|
|
|
5977
|
|
|
|
|
|
|
# Should not happen unless @{$rtoken_map} is corrupted |
5978
|
0
|
|
|
|
|
0
|
DEVEL_MODE |
5979
|
|
|
|
|
|
|
&& $self->Fault( |
5980
|
|
|
|
|
|
|
"number of characters is '$numc' but should be >0\n"); |
5981
|
|
|
|
|
|
|
} |
5982
|
|
|
|
|
|
|
} |
5983
|
|
|
|
|
|
|
|
5984
|
|
|
|
|
|
|
# or grab some values for the leading token (needed for log output) |
5985
|
|
|
|
|
|
|
else { |
5986
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string; |
5987
|
|
|
|
|
|
|
} |
5988
|
|
|
|
|
|
|
|
5989
|
0
|
|
|
|
|
0
|
$rtoken_map_im = $rtoken_map->[$i]; |
5990
|
|
|
|
|
|
|
} ## end foreach my $i ( @{$routput_token_list...}) |
5991
|
|
|
|
|
|
|
|
5992
|
|
|
|
|
|
|
#------------------------ |
5993
|
|
|
|
|
|
|
# End loop to over tokens |
5994
|
|
|
|
|
|
|
#------------------------ |
5995
|
|
|
|
|
|
|
|
5996
|
|
|
|
|
|
|
# Form and store the final token of this line |
5997
|
0
|
0
|
|
|
|
0
|
if ( defined($rtoken_map_im) ) { |
5998
|
0
|
|
|
|
|
0
|
my $numc = length($input_line) - $rtoken_map_im; |
5999
|
0
|
0
|
|
|
|
0
|
if ( $numc > 0 ) { |
6000
|
0
|
|
|
|
|
0
|
push( @tokens, substr( $input_line, $rtoken_map_im, $numc ) ); |
6001
|
|
|
|
|
|
|
} |
6002
|
|
|
|
|
|
|
else { |
6003
|
|
|
|
|
|
|
|
6004
|
|
|
|
|
|
|
# Should not happen unless @{$rtoken_map} is corrupted |
6005
|
0
|
|
|
|
|
0
|
DEVEL_MODE |
6006
|
|
|
|
|
|
|
&& $self->Fault( |
6007
|
|
|
|
|
|
|
"Number of Characters is '$numc' but should be >0\n"); |
6008
|
|
|
|
|
|
|
} |
6009
|
|
|
|
|
|
|
} |
6010
|
|
|
|
|
|
|
|
6011
|
|
|
|
|
|
|
#---------------------------------------------------------- |
6012
|
|
|
|
|
|
|
# Wrap up this line of tokens for shipping to the Formatter |
6013
|
|
|
|
|
|
|
#---------------------------------------------------------- |
6014
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_rtoken_type} = \@token_type; |
6015
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_rtokens} = \@tokens; |
6016
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_rblock_type} = \@block_type; |
6017
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_rtype_sequence} = \@type_sequence; |
6018
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_rlevels} = \@levels; |
6019
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_rci_levels} = \@ci_string; |
6020
|
|
|
|
|
|
|
|
6021
|
0
|
|
|
|
|
0
|
return; |
6022
|
|
|
|
|
|
|
} ## end sub OLD_tokenizer_wrapup_line |
6023
|
|
|
|
|
|
|
|
6024
|
|
|
|
|
|
|
sub tokenizer_wrapup_line { |
6025
|
5895
|
|
|
5895
|
0
|
11401
|
my ( $self, $line_of_tokens ) = @_; |
6026
|
|
|
|
|
|
|
|
6027
|
|
|
|
|
|
|
#--------------------------------------------------------- |
6028
|
|
|
|
|
|
|
# Package a line of tokens for shipping back to the caller |
6029
|
|
|
|
|
|
|
#--------------------------------------------------------- |
6030
|
|
|
|
|
|
|
|
6031
|
|
|
|
|
|
|
# Note: This is the new version of this routine. It does not compute |
6032
|
|
|
|
|
|
|
# continuation indentation; it returns values ci=0. The ci values |
6033
|
|
|
|
|
|
|
# are computed later by sub Formatter::set_ci. |
6034
|
|
|
|
|
|
|
|
6035
|
|
|
|
|
|
|
# Arrays to hold token values for this line: |
6036
|
5895
|
|
|
|
|
10447
|
my @levels = (); # structural brace levels of output tokens |
6037
|
5895
|
|
|
|
|
8740
|
my @block_type = (); # stack of output code block types |
6038
|
5895
|
|
|
|
|
9314
|
my @type_sequence = (); # stack of output type sequence numbers |
6039
|
5895
|
|
|
|
|
8435
|
my @token_type = (); # stack of output token types |
6040
|
5895
|
|
|
|
|
8485
|
my @tokens = (); # output tokens |
6041
|
|
|
|
|
|
|
|
6042
|
5895
|
|
|
|
|
13816
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
6043
|
|
|
|
|
|
|
|
6044
|
|
|
|
|
|
|
# Remember starting nesting block string |
6045
|
5895
|
|
|
|
|
9470
|
my $nesting_block_string_0 = $nesting_block_string; |
6046
|
|
|
|
|
|
|
|
6047
|
|
|
|
|
|
|
#----------------- |
6048
|
|
|
|
|
|
|
# Loop over tokens |
6049
|
|
|
|
|
|
|
#----------------- |
6050
|
5895
|
|
|
|
|
8279
|
my $rtoken_map_im; |
6051
|
|
|
|
|
|
|
|
6052
|
|
|
|
|
|
|
# $i is the index of the pretoken which starts this full token |
6053
|
5895
|
|
|
|
|
9164
|
foreach my $i ( @{$routput_token_list} ) { |
|
5895
|
|
|
|
|
12814
|
|
6054
|
|
|
|
|
|
|
|
6055
|
50578
|
|
|
|
|
73883
|
my $type_i = $routput_token_type->[$i]; |
6056
|
|
|
|
|
|
|
|
6057
|
|
|
|
|
|
|
#-------------------------------- |
6058
|
|
|
|
|
|
|
# 1. Handle a non-sequenced token |
6059
|
|
|
|
|
|
|
#-------------------------------- |
6060
|
50578
|
100
|
|
|
|
79824
|
if ( !$routput_type_sequence->[$i] ) { |
6061
|
|
|
|
|
|
|
|
6062
|
|
|
|
|
|
|
# 1.1 types ';' and 't' |
6063
|
|
|
|
|
|
|
# - output anonymous 'sub' as keyword (type 'k') |
6064
|
|
|
|
|
|
|
# - output __END__, __DATA__, and format as type 'k' instead |
6065
|
|
|
|
|
|
|
# of ';' to make html colors correct, etc. |
6066
|
41456
|
100
|
|
|
|
96097
|
if ( $is_semicolon_or_t{$type_i} ) { |
|
|
50
|
|
|
|
|
|
6067
|
2674
|
|
|
|
|
6262
|
my $tok_i = $rtokens->[$i]; |
6068
|
2674
|
100
|
|
|
|
7504
|
if ( $is_END_DATA_format_sub{$tok_i} ) { |
6069
|
172
|
|
|
|
|
536
|
$type_i = 'k'; |
6070
|
|
|
|
|
|
|
} |
6071
|
|
|
|
|
|
|
} |
6072
|
|
|
|
|
|
|
|
6073
|
|
|
|
|
|
|
# 1.2 Check for an invalid token type.. |
6074
|
|
|
|
|
|
|
# This can happen by running perltidy on non-scripts although |
6075
|
|
|
|
|
|
|
# it could also be bug introduced by programming change. Perl |
6076
|
|
|
|
|
|
|
# silently accepts a 032 (^Z) and takes it as the end |
6077
|
|
|
|
|
|
|
elsif ( !$is_valid_token_type{$type_i} ) { |
6078
|
0
|
|
|
|
|
0
|
my $val = ord($type_i); |
6079
|
0
|
|
|
|
|
0
|
$self->warning( |
6080
|
|
|
|
|
|
|
"unexpected character decimal $val ($type_i) in script\n" |
6081
|
|
|
|
|
|
|
); |
6082
|
0
|
|
|
|
|
0
|
$self->[_in_error_] = 1; |
6083
|
|
|
|
|
|
|
} |
6084
|
|
|
|
|
|
|
|
6085
|
|
|
|
|
|
|
# Store values for a non-sequenced token |
6086
|
41456
|
|
|
|
|
67147
|
push( @levels, $level_in_tokenizer ); |
6087
|
41456
|
|
|
|
|
62770
|
push( @block_type, EMPTY_STRING ); |
6088
|
41456
|
|
|
|
|
60594
|
push( @type_sequence, EMPTY_STRING ); |
6089
|
41456
|
|
|
|
|
78946
|
push( @token_type, $type_i ); |
6090
|
|
|
|
|
|
|
|
6091
|
|
|
|
|
|
|
} |
6092
|
|
|
|
|
|
|
|
6093
|
|
|
|
|
|
|
#---------------------------- |
6094
|
|
|
|
|
|
|
# 2. Handle a sequenced token |
6095
|
|
|
|
|
|
|
# One of { [ ( ? ) ] } : |
6096
|
|
|
|
|
|
|
#---------------------------- |
6097
|
|
|
|
|
|
|
else { |
6098
|
|
|
|
|
|
|
|
6099
|
|
|
|
|
|
|
# $level_i is the level we will store. Levels of braces are |
6100
|
|
|
|
|
|
|
# set so that the leading braces have a HIGHER level than their |
6101
|
|
|
|
|
|
|
# CONTENTS, which is convenient for indentation. |
6102
|
9122
|
|
|
|
|
13706
|
my $level_i = $level_in_tokenizer; |
6103
|
|
|
|
|
|
|
|
6104
|
|
|
|
|
|
|
# $tok_i is the PRE-token. It only equals the token for symbols |
6105
|
9122
|
|
|
|
|
14365
|
my $tok_i = $rtokens->[$i]; |
6106
|
|
|
|
|
|
|
|
6107
|
|
|
|
|
|
|
# $routput_indent_flag->[$i] indicates that we need a change |
6108
|
|
|
|
|
|
|
# in level at a nested ternary, as follows |
6109
|
|
|
|
|
|
|
# 1 => at a nested ternary ? |
6110
|
|
|
|
|
|
|
# -1 => at a nested ternary : |
6111
|
|
|
|
|
|
|
# 0 => otherwise |
6112
|
|
|
|
|
|
|
|
6113
|
|
|
|
|
|
|
#------------------------------------ |
6114
|
|
|
|
|
|
|
# 2.1 handle a level-increasing token |
6115
|
|
|
|
|
|
|
#------------------------------------ |
6116
|
9122
|
100
|
|
|
|
24292
|
if ( $is_opening_or_ternary_type{$type_i} ) { |
|
|
50
|
|
|
|
|
|
6117
|
|
|
|
|
|
|
|
6118
|
4561
|
100
|
|
|
|
9089
|
if ( $type_i eq '?' ) { |
6119
|
|
|
|
|
|
|
|
6120
|
187
|
100
|
|
|
|
927
|
if ( $routput_indent_flag->[$i] > 0 ) { |
6121
|
8
|
|
|
|
|
19
|
$level_in_tokenizer++; |
6122
|
|
|
|
|
|
|
|
6123
|
|
|
|
|
|
|
# break BEFORE '?' in a nested ternary |
6124
|
8
|
|
|
|
|
14
|
$level_i = $level_in_tokenizer; |
6125
|
8
|
|
|
|
|
21
|
$nesting_block_string .= "$nesting_block_flag"; |
6126
|
|
|
|
|
|
|
|
6127
|
|
|
|
|
|
|
} |
6128
|
|
|
|
|
|
|
} |
6129
|
|
|
|
|
|
|
else { |
6130
|
|
|
|
|
|
|
|
6131
|
4374
|
|
|
|
|
7406
|
$nesting_token_string .= $tok_i; |
6132
|
|
|
|
|
|
|
|
6133
|
4374
|
100
|
100
|
|
|
12255
|
if ( $type_i eq '{' || $type_i eq 'L' ) { |
6134
|
|
|
|
|
|
|
|
6135
|
4067
|
|
|
|
|
5926
|
$level_in_tokenizer++; |
6136
|
|
|
|
|
|
|
|
6137
|
4067
|
100
|
|
|
|
8046
|
if ( $routput_block_type->[$i] ) { |
6138
|
967
|
|
|
|
|
1964
|
$nesting_block_flag = 1; |
6139
|
967
|
|
|
|
|
1939
|
$nesting_block_string .= '1'; |
6140
|
|
|
|
|
|
|
} |
6141
|
|
|
|
|
|
|
else { |
6142
|
3100
|
|
|
|
|
5260
|
$nesting_block_flag = 0; |
6143
|
3100
|
|
|
|
|
5535
|
$nesting_block_string .= '0'; |
6144
|
|
|
|
|
|
|
} |
6145
|
|
|
|
|
|
|
} |
6146
|
|
|
|
|
|
|
} |
6147
|
|
|
|
|
|
|
} |
6148
|
|
|
|
|
|
|
|
6149
|
|
|
|
|
|
|
#------------------------------------ |
6150
|
|
|
|
|
|
|
# 2.2 handle a level-decreasing token |
6151
|
|
|
|
|
|
|
#------------------------------------ |
6152
|
|
|
|
|
|
|
elsif ( $is_closing_or_ternary_type{$type_i} ) { |
6153
|
|
|
|
|
|
|
|
6154
|
4561
|
100
|
|
|
|
10264
|
if ( $type_i ne ':' ) { |
6155
|
4374
|
|
|
|
|
8216
|
my $char = chop $nesting_token_string; |
6156
|
4374
|
50
|
|
|
|
11282
|
if ( $char ne $matching_start_token{$tok_i} ) { |
6157
|
0
|
|
|
|
|
0
|
$nesting_token_string .= $char . $tok_i; |
6158
|
|
|
|
|
|
|
} |
6159
|
|
|
|
|
|
|
} |
6160
|
|
|
|
|
|
|
|
6161
|
4561
|
100
|
100
|
|
|
15286
|
if ( |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
6162
|
|
|
|
|
|
|
$type_i eq '}' |
6163
|
|
|
|
|
|
|
|| $type_i eq 'R' |
6164
|
|
|
|
|
|
|
|
6165
|
|
|
|
|
|
|
# only the second and higher ? : have levels |
6166
|
|
|
|
|
|
|
|| $type_i eq ':' && $routput_indent_flag->[$i] < 0 |
6167
|
|
|
|
|
|
|
) |
6168
|
|
|
|
|
|
|
{ |
6169
|
|
|
|
|
|
|
|
6170
|
4075
|
|
|
|
|
6309
|
$level_i = --$level_in_tokenizer; |
6171
|
|
|
|
|
|
|
|
6172
|
4075
|
50
|
|
|
|
7994
|
if ( $level_in_tokenizer < 0 ) { |
6173
|
0
|
0
|
|
|
|
0
|
unless ( $self->[_saw_negative_indentation_] ) { |
6174
|
0
|
|
|
|
|
0
|
$self->[_saw_negative_indentation_] = 1; |
6175
|
0
|
|
|
|
|
0
|
$self->warning( |
6176
|
|
|
|
|
|
|
"Starting negative indentation\n"); |
6177
|
|
|
|
|
|
|
} |
6178
|
|
|
|
|
|
|
} |
6179
|
|
|
|
|
|
|
|
6180
|
|
|
|
|
|
|
# restore previous level values |
6181
|
4075
|
50
|
|
|
|
8824
|
if ( length($nesting_block_string) > 1 ) |
6182
|
|
|
|
|
|
|
{ # true for valid script |
6183
|
4075
|
|
|
|
|
5882
|
chop $nesting_block_string; |
6184
|
4075
|
|
|
|
|
7973
|
$nesting_block_flag = |
6185
|
|
|
|
|
|
|
substr( $nesting_block_string, -1 ) eq '1'; |
6186
|
|
|
|
|
|
|
} |
6187
|
|
|
|
|
|
|
|
6188
|
|
|
|
|
|
|
} |
6189
|
|
|
|
|
|
|
} |
6190
|
|
|
|
|
|
|
|
6191
|
|
|
|
|
|
|
#------------------------------------------------------- |
6192
|
|
|
|
|
|
|
# 2.3 Unexpected sequenced token type - shouldn't happen |
6193
|
|
|
|
|
|
|
#------------------------------------------------------- |
6194
|
|
|
|
|
|
|
else { |
6195
|
|
|
|
|
|
|
|
6196
|
|
|
|
|
|
|
# The tokenizer should only be assigning sequence numbers |
6197
|
|
|
|
|
|
|
# to types { [ ( ? ) ] } : |
6198
|
0
|
|
|
|
|
0
|
DEVEL_MODE && $self->Fault(<<EOM); |
6199
|
|
|
|
|
|
|
unexpected sequence number on token type $type_i with pre-tok=$tok_i |
6200
|
|
|
|
|
|
|
EOM |
6201
|
|
|
|
|
|
|
} |
6202
|
|
|
|
|
|
|
|
6203
|
|
|
|
|
|
|
# The starting nesting block string, which is used in any .LOG |
6204
|
|
|
|
|
|
|
# output, should include the first token of the line |
6205
|
9122
|
100
|
|
|
|
18290
|
if ( !@levels ) { |
6206
|
1574
|
|
|
|
|
2860
|
$nesting_block_string_0 = $nesting_block_string; |
6207
|
|
|
|
|
|
|
} |
6208
|
|
|
|
|
|
|
|
6209
|
|
|
|
|
|
|
# Store values for a sequenced token |
6210
|
9122
|
|
|
|
|
17077
|
push( @levels, $level_i ); |
6211
|
9122
|
|
|
|
|
17407
|
push( @block_type, $routput_block_type->[$i] ); |
6212
|
9122
|
|
|
|
|
15666
|
push( @type_sequence, $routput_type_sequence->[$i] ); |
6213
|
9122
|
|
|
|
|
19385
|
push( @token_type, $type_i ); |
6214
|
|
|
|
|
|
|
|
6215
|
|
|
|
|
|
|
} |
6216
|
|
|
|
|
|
|
|
6217
|
|
|
|
|
|
|
} |
6218
|
|
|
|
|
|
|
|
6219
|
|
|
|
|
|
|
# End loop to over tokens |
6220
|
|
|
|
|
|
|
|
6221
|
5895
|
|
|
|
|
14565
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0; |
6222
|
|
|
|
|
|
|
|
6223
|
|
|
|
|
|
|
#-------------------------- |
6224
|
|
|
|
|
|
|
# Form and store the tokens |
6225
|
|
|
|
|
|
|
#-------------------------- |
6226
|
5895
|
50
|
|
|
|
13742
|
if (@levels) { |
6227
|
|
|
|
|
|
|
|
6228
|
5895
|
|
|
|
|
8211
|
my $im = shift @{$routput_token_list}; |
|
5895
|
|
|
|
|
11201
|
|
6229
|
5895
|
|
|
|
|
10941
|
my $offset = $rtoken_map->[$im]; |
6230
|
5895
|
|
|
|
|
8588
|
foreach my $i ( @{$routput_token_list} ) { |
|
5895
|
|
|
|
|
11000
|
|
6231
|
44683
|
|
|
|
|
58561
|
my $numc = $rtoken_map->[$i] - $offset; |
6232
|
44683
|
|
|
|
|
80435
|
push( @tokens, substr( $input_line, $offset, $numc ) ); |
6233
|
|
|
|
|
|
|
|
6234
|
44683
|
|
|
|
|
53844
|
if ( DEVEL_MODE && $numc <= 0 ) { |
6235
|
|
|
|
|
|
|
|
6236
|
|
|
|
|
|
|
# Should not happen unless @{$rtoken_map} is corrupted |
6237
|
|
|
|
|
|
|
$self->Fault( |
6238
|
|
|
|
|
|
|
"number of characters is '$numc' but should be >0\n"); |
6239
|
|
|
|
|
|
|
} |
6240
|
44683
|
|
|
|
|
65556
|
$offset = $rtoken_map->[$i]; |
6241
|
|
|
|
|
|
|
} |
6242
|
|
|
|
|
|
|
|
6243
|
|
|
|
|
|
|
# Form and store the final token of this line |
6244
|
5895
|
|
|
|
|
11594
|
my $numc = length($input_line) - $offset; |
6245
|
5895
|
|
|
|
|
12837
|
push( @tokens, substr( $input_line, $offset, $numc ) ); |
6246
|
|
|
|
|
|
|
|
6247
|
5895
|
|
|
|
|
9321
|
if ( DEVEL_MODE && $numc <= 0 ) { |
6248
|
|
|
|
|
|
|
$self->Fault( |
6249
|
|
|
|
|
|
|
"Number of Characters is '$numc' but should be >0\n"); |
6250
|
|
|
|
|
|
|
} |
6251
|
|
|
|
|
|
|
} |
6252
|
|
|
|
|
|
|
|
6253
|
|
|
|
|
|
|
# This sub returns zero ci values |
6254
|
5895
|
|
|
|
|
20735
|
my @ci_levels = (0) x scalar(@levels); |
6255
|
|
|
|
|
|
|
|
6256
|
|
|
|
|
|
|
#---------------------------------------------------------- |
6257
|
|
|
|
|
|
|
# Wrap up this line of tokens for shipping to the Formatter |
6258
|
|
|
|
|
|
|
#---------------------------------------------------------- |
6259
|
5895
|
|
|
|
|
13743
|
$line_of_tokens->{_rtoken_type} = \@token_type; |
6260
|
5895
|
|
|
|
|
11461
|
$line_of_tokens->{_rtokens} = \@tokens; |
6261
|
5895
|
|
|
|
|
12090
|
$line_of_tokens->{_rblock_type} = \@block_type; |
6262
|
5895
|
|
|
|
|
11047
|
$line_of_tokens->{_rtype_sequence} = \@type_sequence; |
6263
|
5895
|
|
|
|
|
18987
|
$line_of_tokens->{_rlevels} = \@levels; |
6264
|
5895
|
|
|
|
|
11569
|
$line_of_tokens->{_rci_levels} = \@ci_levels; |
6265
|
|
|
|
|
|
|
|
6266
|
5895
|
|
|
|
|
16354
|
return; |
6267
|
|
|
|
|
|
|
} ## end sub tokenizer_wrapup_line |
6268
|
|
|
|
|
|
|
|
6269
|
|
|
|
|
|
|
} ## end tokenize_this_line |
6270
|
|
|
|
|
|
|
|
6271
|
|
|
|
|
|
|
####################################################################### |
6272
|
|
|
|
|
|
|
# Tokenizer routines which assist in identifying token types |
6273
|
|
|
|
|
|
|
####################################################################### |
6274
|
|
|
|
|
|
|
|
6275
|
|
|
|
|
|
|
# hash lookup table of operator expected values |
6276
|
|
|
|
|
|
|
my %op_expected_table; |
6277
|
|
|
|
|
|
|
|
6278
|
|
|
|
|
|
|
# exceptions to perl's weird parsing rules after type 'Z' |
6279
|
|
|
|
|
|
|
my %is_weird_parsing_rule_exception; |
6280
|
|
|
|
|
|
|
|
6281
|
|
|
|
|
|
|
my %is_paren_dollar; |
6282
|
|
|
|
|
|
|
|
6283
|
|
|
|
|
|
|
my %is_n_v; |
6284
|
|
|
|
|
|
|
|
6285
|
|
|
|
|
|
|
BEGIN { |
6286
|
|
|
|
|
|
|
|
6287
|
|
|
|
|
|
|
# Always expecting TERM following these types: |
6288
|
|
|
|
|
|
|
# note: this is identical to '@value_requestor_type' defined later. |
6289
|
38
|
|
|
38
|
|
582
|
my @q = qw( |
6290
|
|
|
|
|
|
|
; ! + x & ? F J - p / Y : % f U ~ A G j L * . | ^ < = [ m { \ > t |
6291
|
|
|
|
|
|
|
|| >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /= |
6292
|
|
|
|
|
|
|
&= // >> ~. &. |. ^. |
6293
|
|
|
|
|
|
|
... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ |
6294
|
|
|
|
|
|
|
); |
6295
|
38
|
|
|
|
|
161
|
push @q, ','; |
6296
|
38
|
|
|
|
|
77
|
push @q, '('; # for completeness, not currently a token type |
6297
|
38
|
|
|
|
|
71
|
push @q, '->'; # was previously in UNKNOWN |
6298
|
38
|
|
|
|
|
2024
|
@{op_expected_table}{@q} = (TERM) x scalar(@q); |
6299
|
|
|
|
|
|
|
|
6300
|
|
|
|
|
|
|
# Always UNKNOWN following these types; |
6301
|
|
|
|
|
|
|
# previously had '->' in this list for c030 |
6302
|
38
|
|
|
|
|
241
|
@q = qw( w ); |
6303
|
38
|
|
|
|
|
104
|
@{op_expected_table}{@q} = (UNKNOWN) x scalar(@q); |
6304
|
|
|
|
|
|
|
|
6305
|
|
|
|
|
|
|
# Always expecting OPERATOR ... |
6306
|
|
|
|
|
|
|
# 'n' and 'v' are currently excluded because they might be VERSION numbers |
6307
|
|
|
|
|
|
|
# 'i' is currently excluded because it might be a package |
6308
|
|
|
|
|
|
|
# 'q' is currently excluded because it might be a prototype |
6309
|
|
|
|
|
|
|
# Fix for c030: removed '->' from this list: |
6310
|
38
|
|
|
|
|
128
|
@q = qw( -- C h R ++ ] Q <> ); ## n v q i ); |
6311
|
38
|
|
|
|
|
94
|
push @q, ')'; |
6312
|
38
|
|
|
|
|
229
|
@{op_expected_table}{@q} = (OPERATOR) x scalar(@q); |
6313
|
|
|
|
|
|
|
|
6314
|
|
|
|
|
|
|
# Fix for git #62: added '*' and '%' |
6315
|
38
|
|
|
|
|
109
|
@q = qw( < ? * % ); |
6316
|
38
|
|
|
|
|
112
|
@{is_weird_parsing_rule_exception}{@q} = (1) x scalar(@q); |
6317
|
|
|
|
|
|
|
|
6318
|
38
|
|
|
|
|
90
|
@q = qw<) $>; |
6319
|
38
|
|
|
|
|
110
|
@{is_paren_dollar}{@q} = (1) x scalar(@q); |
6320
|
|
|
|
|
|
|
|
6321
|
38
|
|
|
|
|
87
|
@q = qw( n v ); |
6322
|
38
|
|
|
|
|
1338
|
@{is_n_v}{@q} = (1) x scalar(@q); |
6323
|
|
|
|
|
|
|
|
6324
|
|
|
|
|
|
|
} ## end BEGIN |
6325
|
|
|
|
|
|
|
|
6326
|
38
|
|
|
38
|
|
320
|
use constant DEBUG_OPERATOR_EXPECTED => 0; |
|
38
|
|
|
|
|
114
|
|
|
38
|
|
|
|
|
88134
|
|
6327
|
|
|
|
|
|
|
|
6328
|
|
|
|
|
|
|
sub operator_expected { |
6329
|
|
|
|
|
|
|
|
6330
|
|
|
|
|
|
|
# Returns a parameter indicating what types of tokens can occur next |
6331
|
|
|
|
|
|
|
|
6332
|
|
|
|
|
|
|
# Call format: |
6333
|
|
|
|
|
|
|
# $op_expected = |
6334
|
|
|
|
|
|
|
# $self->operator_expected( [ $prev_type, $tok, $next_type ] ); |
6335
|
|
|
|
|
|
|
# where |
6336
|
|
|
|
|
|
|
# $prev_type is the type of the previous token (blank or not) |
6337
|
|
|
|
|
|
|
# $tok is the current token |
6338
|
|
|
|
|
|
|
# $next_type is the type of the next token (blank or not) |
6339
|
|
|
|
|
|
|
|
6340
|
|
|
|
|
|
|
# Many perl symbols have two or more meanings. For example, '<<' |
6341
|
|
|
|
|
|
|
# can be a shift operator or a here-doc operator. The |
6342
|
|
|
|
|
|
|
# interpretation of these symbols depends on the current state of |
6343
|
|
|
|
|
|
|
# the tokenizer, which may either be expecting a term or an |
6344
|
|
|
|
|
|
|
# operator. For this example, a << would be a shift if an OPERATOR |
6345
|
|
|
|
|
|
|
# is expected, and a here-doc if a TERM is expected. This routine |
6346
|
|
|
|
|
|
|
# is called to make this decision for any current token. It returns |
6347
|
|
|
|
|
|
|
# one of three possible values: |
6348
|
|
|
|
|
|
|
# |
6349
|
|
|
|
|
|
|
# OPERATOR - operator expected (or at least, not a term) |
6350
|
|
|
|
|
|
|
# UNKNOWN - can't tell |
6351
|
|
|
|
|
|
|
# TERM - a term is expected (or at least, not an operator) |
6352
|
|
|
|
|
|
|
# |
6353
|
|
|
|
|
|
|
# The decision is based on what has been seen so far. This |
6354
|
|
|
|
|
|
|
# information is stored in the "$last_nonblank_type" and |
6355
|
|
|
|
|
|
|
# "$last_nonblank_token" variables. For example, if the |
6356
|
|
|
|
|
|
|
# $last_nonblank_type is '=~', then we are expecting a TERM, whereas |
6357
|
|
|
|
|
|
|
# if $last_nonblank_type is 'n' (numeric), we are expecting an |
6358
|
|
|
|
|
|
|
# OPERATOR. |
6359
|
|
|
|
|
|
|
# |
6360
|
|
|
|
|
|
|
# If a UNKNOWN is returned, the calling routine must guess. A major |
6361
|
|
|
|
|
|
|
# goal of this tokenizer is to minimize the possibility of returning |
6362
|
|
|
|
|
|
|
# UNKNOWN, because a wrong guess can spoil the formatting of a |
6363
|
|
|
|
|
|
|
# script. |
6364
|
|
|
|
|
|
|
# |
6365
|
|
|
|
|
|
|
# Adding NEW_TOKENS: it is critically important that this routine be |
6366
|
|
|
|
|
|
|
# updated to allow it to determine if an operator or term is to be |
6367
|
|
|
|
|
|
|
# expected after the new token. Doing this simply involves adding |
6368
|
|
|
|
|
|
|
# the new token character to one of the regexes in this routine or |
6369
|
|
|
|
|
|
|
# to one of the hash lists |
6370
|
|
|
|
|
|
|
# that it uses, which are initialized in the BEGIN section. |
6371
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, |
6372
|
|
|
|
|
|
|
# $statement_type |
6373
|
|
|
|
|
|
|
|
6374
|
|
|
|
|
|
|
# When possible, token types should be selected such that we can determine |
6375
|
|
|
|
|
|
|
# the 'operator_expected' value by a simple hash lookup. If there are |
6376
|
|
|
|
|
|
|
# exceptions, that is an indication that a new type is needed. |
6377
|
|
|
|
|
|
|
|
6378
|
33045
|
|
|
33045
|
0
|
60074
|
my ( $self, $rarg ) = @_; |
6379
|
|
|
|
|
|
|
|
6380
|
|
|
|
|
|
|
#------------- |
6381
|
|
|
|
|
|
|
# Table lookup |
6382
|
|
|
|
|
|
|
#------------- |
6383
|
|
|
|
|
|
|
|
6384
|
|
|
|
|
|
|
# Many types are can be obtained by a table lookup given the previous type. |
6385
|
|
|
|
|
|
|
# This typically handles half or more of the calls. |
6386
|
33045
|
|
|
|
|
62265
|
my $op_expected = $op_expected_table{$last_nonblank_type}; |
6387
|
33045
|
100
|
|
|
|
62062
|
if ( defined($op_expected) ) { |
6388
|
20534
|
|
|
|
|
25564
|
DEBUG_OPERATOR_EXPECTED |
6389
|
|
|
|
|
|
|
&& print STDOUT |
6390
|
|
|
|
|
|
|
"OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; |
6391
|
20534
|
|
|
|
|
40456
|
return $op_expected; |
6392
|
|
|
|
|
|
|
} |
6393
|
|
|
|
|
|
|
|
6394
|
|
|
|
|
|
|
#--------------------- |
6395
|
|
|
|
|
|
|
# Handle special cases |
6396
|
|
|
|
|
|
|
#--------------------- |
6397
|
|
|
|
|
|
|
|
6398
|
12511
|
|
|
|
|
18342
|
$op_expected = UNKNOWN; |
6399
|
12511
|
|
|
|
|
16963
|
my ( $prev_type, $tok, $next_type ) = @{$rarg}; |
|
12511
|
|
|
|
|
25718
|
|
6400
|
|
|
|
|
|
|
|
6401
|
|
|
|
|
|
|
# Types 'k', '}' and 'Z' depend on context |
6402
|
|
|
|
|
|
|
# Types 'i', 'n', 'v', 'q' currently also temporarily depend on context. |
6403
|
|
|
|
|
|
|
|
6404
|
|
|
|
|
|
|
# identifier... |
6405
|
12511
|
100
|
|
|
|
39000
|
if ( $last_nonblank_type eq 'i' ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6406
|
4290
|
|
|
|
|
7486
|
$op_expected = OPERATOR; |
6407
|
|
|
|
|
|
|
|
6408
|
|
|
|
|
|
|
# TODO: it would be cleaner to make this a special type |
6409
|
|
|
|
|
|
|
# expecting VERSION or {} after package NAMESPACE; |
6410
|
|
|
|
|
|
|
# maybe mark these words as type 'Y'? |
6411
|
4290
|
50
|
66
|
|
|
13231
|
if ( substr( $last_nonblank_token, 0, 7 ) eq 'package' |
|
|
|
66
|
|
|
|
|
6412
|
|
|
|
|
|
|
&& $statement_type =~ /^package\b/ |
6413
|
|
|
|
|
|
|
&& $last_nonblank_token =~ /^package\b/ ) |
6414
|
|
|
|
|
|
|
{ |
6415
|
26
|
|
|
|
|
62
|
$op_expected = TERM; |
6416
|
|
|
|
|
|
|
} |
6417
|
|
|
|
|
|
|
} |
6418
|
|
|
|
|
|
|
|
6419
|
|
|
|
|
|
|
# keyword... |
6420
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'k' ) { |
6421
|
2629
|
|
|
|
|
4337
|
$op_expected = TERM; |
6422
|
2629
|
100
|
|
|
|
12214
|
if ( $expecting_operator_token{$last_nonblank_token} ) { |
|
|
100
|
|
|
|
|
|
6423
|
7
|
|
|
|
|
16
|
$op_expected = OPERATOR; |
6424
|
|
|
|
|
|
|
} |
6425
|
|
|
|
|
|
|
elsif ( $expecting_term_token{$last_nonblank_token} ) { |
6426
|
|
|
|
|
|
|
|
6427
|
|
|
|
|
|
|
# Exceptions from TERM: |
6428
|
|
|
|
|
|
|
|
6429
|
|
|
|
|
|
|
# // may follow perl functions which may be unary operators |
6430
|
|
|
|
|
|
|
# see test file dor.t (defined or); |
6431
|
2523
|
100
|
100
|
|
|
11981
|
if ( |
|
|
50
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
6432
|
|
|
|
|
|
|
$tok eq '/' |
6433
|
|
|
|
|
|
|
&& $next_type eq '/' |
6434
|
|
|
|
|
|
|
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ |
6435
|
|
|
|
|
|
|
$last_nonblank_token} |
6436
|
|
|
|
|
|
|
) |
6437
|
|
|
|
|
|
|
{ |
6438
|
1
|
|
|
|
|
3
|
$op_expected = OPERATOR; |
6439
|
|
|
|
|
|
|
} |
6440
|
|
|
|
|
|
|
|
6441
|
|
|
|
|
|
|
# Patch to allow a ? following 'split' to be a deprecated pattern |
6442
|
|
|
|
|
|
|
# delimiter. This patch is coordinated with the omission of split |
6443
|
|
|
|
|
|
|
# from the list |
6444
|
|
|
|
|
|
|
# %is_keyword_rejecting_question_as_pattern_delimiter. This patch |
6445
|
|
|
|
|
|
|
# will force perltidy to guess. |
6446
|
|
|
|
|
|
|
elsif ($tok eq '?' |
6447
|
|
|
|
|
|
|
&& $last_nonblank_token eq 'split' ) |
6448
|
|
|
|
|
|
|
{ |
6449
|
0
|
|
|
|
|
0
|
$op_expected = UNKNOWN; |
6450
|
|
|
|
|
|
|
} |
6451
|
|
|
|
|
|
|
} |
6452
|
|
|
|
|
|
|
} ## end type 'k' |
6453
|
|
|
|
|
|
|
|
6454
|
|
|
|
|
|
|
# closing container token... |
6455
|
|
|
|
|
|
|
|
6456
|
|
|
|
|
|
|
# Note that the actual token for type '}' may also be a ')'. |
6457
|
|
|
|
|
|
|
|
6458
|
|
|
|
|
|
|
# Also note that $last_nonblank_token is not the token corresponding to |
6459
|
|
|
|
|
|
|
# $last_nonblank_type when the type is a closing container. In that |
6460
|
|
|
|
|
|
|
# case it is the token before the corresponding opening container token. |
6461
|
|
|
|
|
|
|
# So for example, for this snippet |
6462
|
|
|
|
|
|
|
# $a = do { BLOCK } / 2; |
6463
|
|
|
|
|
|
|
# the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'. |
6464
|
|
|
|
|
|
|
|
6465
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq '}' ) { |
6466
|
3448
|
|
|
|
|
5718
|
$op_expected = UNKNOWN; |
6467
|
|
|
|
|
|
|
|
6468
|
|
|
|
|
|
|
# handle something after 'do' and 'eval' |
6469
|
3448
|
100
|
66
|
|
|
18814
|
if ( $is_block_operator{$last_nonblank_token} ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6470
|
|
|
|
|
|
|
|
6471
|
|
|
|
|
|
|
# something like $a = do { BLOCK } / 2; |
6472
|
82
|
|
|
|
|
241
|
$op_expected = OPERATOR; # block mode following } |
6473
|
|
|
|
|
|
|
} |
6474
|
|
|
|
|
|
|
|
6475
|
|
|
|
|
|
|
# $last_nonblank_token =~ /^(\)|\$|\-\>)/ |
6476
|
|
|
|
|
|
|
elsif ( $is_paren_dollar{ substr( $last_nonblank_token, 0, 1 ) } |
6477
|
|
|
|
|
|
|
|| substr( $last_nonblank_token, 0, 2 ) eq '->' ) |
6478
|
|
|
|
|
|
|
{ |
6479
|
2021
|
|
|
|
|
3375
|
$op_expected = OPERATOR; |
6480
|
2021
|
50
|
|
|
|
4952
|
if ( $last_nonblank_token eq '$' ) { $op_expected = UNKNOWN } |
|
0
|
|
|
|
|
0
|
|
6481
|
|
|
|
|
|
|
} |
6482
|
|
|
|
|
|
|
|
6483
|
|
|
|
|
|
|
# Check for smartmatch operator before preceding brace or square |
6484
|
|
|
|
|
|
|
# bracket. For example, at the ? after the ] in the following |
6485
|
|
|
|
|
|
|
# expressions we are expecting an operator: |
6486
|
|
|
|
|
|
|
# |
6487
|
|
|
|
|
|
|
# qr/3/ ~~ ['1234'] ? 1 : 0; |
6488
|
|
|
|
|
|
|
# map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; |
6489
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '~~' ) { |
6490
|
20
|
|
|
|
|
48
|
$op_expected = OPERATOR; |
6491
|
|
|
|
|
|
|
} |
6492
|
|
|
|
|
|
|
|
6493
|
|
|
|
|
|
|
# A right brace here indicates the end of a simple block. All |
6494
|
|
|
|
|
|
|
# non-structural right braces have type 'R' all braces associated with |
6495
|
|
|
|
|
|
|
# block operator keywords have been given those keywords as |
6496
|
|
|
|
|
|
|
# "last_nonblank_token" and caught above. (This statement is order |
6497
|
|
|
|
|
|
|
# dependent, and must come after checking $last_nonblank_token). |
6498
|
|
|
|
|
|
|
else { |
6499
|
|
|
|
|
|
|
|
6500
|
|
|
|
|
|
|
# patch for dor.t (defined or). |
6501
|
1325
|
50
|
33
|
|
|
6006
|
if ( $tok eq '/' |
|
|
100
|
33
|
|
|
|
|
6502
|
|
|
|
|
|
|
&& $next_type eq '/' |
6503
|
|
|
|
|
|
|
&& $last_nonblank_token eq ']' ) |
6504
|
|
|
|
|
|
|
{ |
6505
|
0
|
|
|
|
|
0
|
$op_expected = OPERATOR; |
6506
|
|
|
|
|
|
|
} |
6507
|
|
|
|
|
|
|
|
6508
|
|
|
|
|
|
|
# Patch for RT #116344: misparse a ternary operator after an |
6509
|
|
|
|
|
|
|
# anonymous hash, like this: |
6510
|
|
|
|
|
|
|
# return ref {} ? 1 : 0; |
6511
|
|
|
|
|
|
|
# The right brace should really be marked type 'R' in this case, |
6512
|
|
|
|
|
|
|
# and it is safest to return an UNKNOWN here. Expecting a TERM will |
6513
|
|
|
|
|
|
|
# cause the '?' to always be interpreted as a pattern delimiter |
6514
|
|
|
|
|
|
|
# rather than introducing a ternary operator. |
6515
|
|
|
|
|
|
|
elsif ( $tok eq '?' ) { |
6516
|
1
|
|
|
|
|
4
|
$op_expected = UNKNOWN; |
6517
|
|
|
|
|
|
|
} |
6518
|
|
|
|
|
|
|
else { |
6519
|
1324
|
|
|
|
|
2469
|
$op_expected = TERM; |
6520
|
|
|
|
|
|
|
} |
6521
|
|
|
|
|
|
|
} |
6522
|
|
|
|
|
|
|
} ## end type '}' |
6523
|
|
|
|
|
|
|
|
6524
|
|
|
|
|
|
|
# number or v-string... |
6525
|
|
|
|
|
|
|
# An exception is for VERSION numbers a 'use' statement. It has the format |
6526
|
|
|
|
|
|
|
# use Module VERSION LIST |
6527
|
|
|
|
|
|
|
# We could avoid this exception by writing a special sub to parse 'use' |
6528
|
|
|
|
|
|
|
# statements and perhaps mark these numbers with a new type V (for VERSION) |
6529
|
|
|
|
|
|
|
##elsif ( $last_nonblank_type =~ /^[nv]$/ ) { |
6530
|
|
|
|
|
|
|
elsif ( $is_n_v{$last_nonblank_type} ) { |
6531
|
1916
|
|
|
|
|
3114
|
$op_expected = OPERATOR; |
6532
|
1916
|
100
|
|
|
|
4350
|
if ( $statement_type eq 'use' ) { |
6533
|
11
|
|
|
|
|
27
|
$op_expected = UNKNOWN; |
6534
|
|
|
|
|
|
|
} |
6535
|
|
|
|
|
|
|
} |
6536
|
|
|
|
|
|
|
|
6537
|
|
|
|
|
|
|
# quote... |
6538
|
|
|
|
|
|
|
# TODO: labeled prototype words would better be given type 'A' or maybe |
6539
|
|
|
|
|
|
|
# 'J'; not 'q'; or maybe mark as type 'Y'? |
6540
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'q' ) { |
6541
|
137
|
|
|
|
|
296
|
$op_expected = OPERATOR; |
6542
|
137
|
50
|
|
|
|
631
|
if ( $last_nonblank_token eq 'prototype' ) { |
|
|
100
|
|
|
|
|
|
6543
|
0
|
|
|
|
|
0
|
$op_expected = TERM; |
6544
|
|
|
|
|
|
|
} |
6545
|
|
|
|
|
|
|
|
6546
|
|
|
|
|
|
|
# update for --use-feature=class (rt145706): |
6547
|
|
|
|
|
|
|
# Look for class VERSION after possible attribute, as in |
6548
|
|
|
|
|
|
|
# class Example::Subclass : isa(Example::Base) 1.345 { ... } |
6549
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^package\b/ ) { |
6550
|
3
|
|
|
|
|
6
|
$op_expected = TERM; |
6551
|
|
|
|
|
|
|
} |
6552
|
|
|
|
|
|
|
} |
6553
|
|
|
|
|
|
|
|
6554
|
|
|
|
|
|
|
# file handle or similar |
6555
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'Z' ) { |
6556
|
|
|
|
|
|
|
|
6557
|
90
|
|
|
|
|
193
|
$op_expected = UNKNOWN; |
6558
|
|
|
|
|
|
|
|
6559
|
|
|
|
|
|
|
# angle.t |
6560
|
90
|
100
|
33
|
|
|
1199
|
if ( $last_nonblank_token =~ /^\w/ ) { |
|
|
50
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6561
|
2
|
|
|
|
|
7
|
$op_expected = UNKNOWN; |
6562
|
|
|
|
|
|
|
} |
6563
|
|
|
|
|
|
|
|
6564
|
|
|
|
|
|
|
# Exception to weird parsing rules for 'x(' ... see case b1205: |
6565
|
|
|
|
|
|
|
# In something like 'print $vv x(...' the x is an operator; |
6566
|
|
|
|
|
|
|
# Likewise in 'print $vv x$ww' the x is an operator (case b1207) |
6567
|
|
|
|
|
|
|
# otherwise x follows the weird parsing rules. |
6568
|
|
|
|
|
|
|
elsif ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) { |
6569
|
0
|
|
|
|
|
0
|
$op_expected = OPERATOR; |
6570
|
|
|
|
|
|
|
} |
6571
|
|
|
|
|
|
|
|
6572
|
|
|
|
|
|
|
# The 'weird parsing rules' of next section do not work for '<' and '?' |
6573
|
|
|
|
|
|
|
# It is best to mark them as unknown. Test case: |
6574
|
|
|
|
|
|
|
# print $fh <DATA>; |
6575
|
|
|
|
|
|
|
elsif ( $is_weird_parsing_rule_exception{$tok} ) { |
6576
|
4
|
|
|
|
|
10
|
$op_expected = UNKNOWN; |
6577
|
|
|
|
|
|
|
} |
6578
|
|
|
|
|
|
|
|
6579
|
|
|
|
|
|
|
# For possible file handle like "$a", Perl uses weird parsing rules. |
6580
|
|
|
|
|
|
|
# For example: |
6581
|
|
|
|
|
|
|
# print $a/2,"/hi"; - division |
6582
|
|
|
|
|
|
|
# print $a / 2,"/hi"; - division |
6583
|
|
|
|
|
|
|
# print $a/ 2,"/hi"; - division |
6584
|
|
|
|
|
|
|
# print $a /2,"/hi"; - pattern (and error)! |
6585
|
|
|
|
|
|
|
# Some examples where this logic works okay, for '&','*','+': |
6586
|
|
|
|
|
|
|
# print $fh &xsi_protos(@mods); |
6587
|
|
|
|
|
|
|
# my $x = new $CompressClass *FH; |
6588
|
|
|
|
|
|
|
# print $OUT +( $count % 15 ? ", " : "\n\t" ); |
6589
|
|
|
|
|
|
|
elsif ($prev_type eq 'b' |
6590
|
|
|
|
|
|
|
&& $next_type ne 'b' ) |
6591
|
|
|
|
|
|
|
{ |
6592
|
9
|
|
|
|
|
15
|
$op_expected = TERM; |
6593
|
|
|
|
|
|
|
} |
6594
|
|
|
|
|
|
|
|
6595
|
|
|
|
|
|
|
# Note that '?' and '<' have been moved above |
6596
|
|
|
|
|
|
|
# ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { |
6597
|
|
|
|
|
|
|
elsif ( $tok =~ /^([x\/\+\-\*\%\&\.]|\>\>)$/ ) { |
6598
|
|
|
|
|
|
|
|
6599
|
|
|
|
|
|
|
# Do not complain in 'use' statements, which have special syntax. |
6600
|
|
|
|
|
|
|
# For example, from RT#130344: |
6601
|
|
|
|
|
|
|
# use lib $FindBin::Bin . '/lib'; |
6602
|
9
|
50
|
|
|
|
34
|
if ( $statement_type ne 'use' ) { |
6603
|
9
|
|
|
|
|
40
|
$self->complain( |
6604
|
|
|
|
|
|
|
"operator in possible indirect object location not recommended\n" |
6605
|
|
|
|
|
|
|
); |
6606
|
|
|
|
|
|
|
} |
6607
|
9
|
|
|
|
|
28
|
$op_expected = OPERATOR; |
6608
|
|
|
|
|
|
|
} |
6609
|
|
|
|
|
|
|
} |
6610
|
|
|
|
|
|
|
|
6611
|
|
|
|
|
|
|
# anything else... |
6612
|
|
|
|
|
|
|
else { |
6613
|
1
|
|
|
|
|
3
|
$op_expected = UNKNOWN; |
6614
|
|
|
|
|
|
|
} |
6615
|
|
|
|
|
|
|
|
6616
|
12511
|
|
|
|
|
16242
|
DEBUG_OPERATOR_EXPECTED |
6617
|
|
|
|
|
|
|
&& print STDOUT |
6618
|
|
|
|
|
|
|
"OPERATOR_EXPECTED: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; |
6619
|
|
|
|
|
|
|
|
6620
|
12511
|
|
|
|
|
22751
|
return $op_expected; |
6621
|
|
|
|
|
|
|
|
6622
|
|
|
|
|
|
|
} ## end sub operator_expected |
6623
|
|
|
|
|
|
|
|
6624
|
|
|
|
|
|
|
sub new_statement_ok { |
6625
|
|
|
|
|
|
|
|
6626
|
|
|
|
|
|
|
# return true if the current token can start a new statement |
6627
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_type |
6628
|
|
|
|
|
|
|
|
6629
|
81
|
|
66
|
81
|
0
|
310
|
return label_ok() # a label would be ok here |
6630
|
|
|
|
|
|
|
|
6631
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'J'; # or we follow a label |
6632
|
|
|
|
|
|
|
|
6633
|
|
|
|
|
|
|
} ## end sub new_statement_ok |
6634
|
|
|
|
|
|
|
|
6635
|
|
|
|
|
|
|
sub label_ok { |
6636
|
|
|
|
|
|
|
|
6637
|
|
|
|
|
|
|
# Decide if a bare word followed by a colon here is a label |
6638
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, |
6639
|
|
|
|
|
|
|
# $brace_depth, $rbrace_type |
6640
|
|
|
|
|
|
|
|
6641
|
|
|
|
|
|
|
# if it follows an opening or closing code block curly brace.. |
6642
|
114
|
100
|
100
|
114
|
0
|
1090
|
if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) |
|
|
|
66
|
|
|
|
|
6643
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
6644
|
|
|
|
|
|
|
{ |
6645
|
|
|
|
|
|
|
|
6646
|
|
|
|
|
|
|
# it is a label if and only if the curly encloses a code block |
6647
|
47
|
|
|
|
|
294
|
return $rbrace_type->[$brace_depth]; |
6648
|
|
|
|
|
|
|
} |
6649
|
|
|
|
|
|
|
|
6650
|
|
|
|
|
|
|
# otherwise, it is a label if and only if it follows a ';' (real or fake) |
6651
|
|
|
|
|
|
|
# or another label |
6652
|
|
|
|
|
|
|
else { |
6653
|
67
|
|
100
|
|
|
511
|
return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); |
6654
|
|
|
|
|
|
|
} |
6655
|
|
|
|
|
|
|
} ## end sub label_ok |
6656
|
|
|
|
|
|
|
|
6657
|
|
|
|
|
|
|
sub code_block_type { |
6658
|
|
|
|
|
|
|
|
6659
|
|
|
|
|
|
|
# Decide if this is a block of code, and its type. |
6660
|
|
|
|
|
|
|
# Must be called only when $type = $token = '{' |
6661
|
|
|
|
|
|
|
# The problem is to distinguish between the start of a block of code |
6662
|
|
|
|
|
|
|
# and the start of an anonymous hash reference |
6663
|
|
|
|
|
|
|
# Returns "" if not code block, otherwise returns 'last_nonblank_token' |
6664
|
|
|
|
|
|
|
# to indicate the type of code block. (For example, 'last_nonblank_token' |
6665
|
|
|
|
|
|
|
# might be 'if' for an if block, 'else' for an else block, etc). |
6666
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, |
6667
|
|
|
|
|
|
|
# $last_nonblank_block_type, $brace_depth, $rbrace_type |
6668
|
|
|
|
|
|
|
|
6669
|
|
|
|
|
|
|
# handle case of multiple '{'s |
6670
|
|
|
|
|
|
|
|
6671
|
|
|
|
|
|
|
# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; |
6672
|
|
|
|
|
|
|
|
6673
|
1296
|
|
|
1296
|
0
|
3300
|
my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; |
6674
|
1296
|
100
|
66
|
|
|
18038
|
if ( $last_nonblank_token eq '{' |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6675
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
6676
|
|
|
|
|
|
|
{ |
6677
|
|
|
|
|
|
|
|
6678
|
|
|
|
|
|
|
# opening brace where a statement may appear is probably |
6679
|
|
|
|
|
|
|
# a code block but might be and anonymous hash reference |
6680
|
90
|
50
|
|
|
|
326
|
if ( $rbrace_type->[$brace_depth] ) { |
6681
|
90
|
|
|
|
|
326
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6682
|
|
|
|
|
|
|
$max_token_index ); |
6683
|
|
|
|
|
|
|
} |
6684
|
|
|
|
|
|
|
|
6685
|
|
|
|
|
|
|
# cannot start a code block within an anonymous hash |
6686
|
|
|
|
|
|
|
else { |
6687
|
0
|
|
|
|
|
0
|
return EMPTY_STRING; |
6688
|
|
|
|
|
|
|
} |
6689
|
|
|
|
|
|
|
} |
6690
|
|
|
|
|
|
|
|
6691
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq ';' ) { |
6692
|
|
|
|
|
|
|
|
6693
|
|
|
|
|
|
|
# an opening brace where a statement may appear is probably |
6694
|
|
|
|
|
|
|
# a code block but might be and anonymous hash reference |
6695
|
48
|
|
|
|
|
297
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6696
|
|
|
|
|
|
|
$max_token_index ); |
6697
|
|
|
|
|
|
|
} |
6698
|
|
|
|
|
|
|
|
6699
|
|
|
|
|
|
|
# handle case of '}{' |
6700
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq '}' |
6701
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
6702
|
|
|
|
|
|
|
{ |
6703
|
|
|
|
|
|
|
|
6704
|
|
|
|
|
|
|
# a } { situation ... |
6705
|
|
|
|
|
|
|
# could be hash reference after code block..(blktype1.t) |
6706
|
9
|
50
|
|
|
|
39
|
if ($last_nonblank_block_type) { |
6707
|
9
|
|
|
|
|
33
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6708
|
|
|
|
|
|
|
$max_token_index ); |
6709
|
|
|
|
|
|
|
} |
6710
|
|
|
|
|
|
|
|
6711
|
|
|
|
|
|
|
# must be a block if it follows a closing hash reference |
6712
|
|
|
|
|
|
|
else { |
6713
|
0
|
|
|
|
|
0
|
return $last_nonblank_token; |
6714
|
|
|
|
|
|
|
} |
6715
|
|
|
|
|
|
|
} |
6716
|
|
|
|
|
|
|
|
6717
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6718
|
|
|
|
|
|
|
# NOTE: braces after type characters start code blocks, but for |
6719
|
|
|
|
|
|
|
# simplicity these are not identified as such. See also |
6720
|
|
|
|
|
|
|
# sub is_non_structural_brace. |
6721
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6722
|
|
|
|
|
|
|
|
6723
|
|
|
|
|
|
|
## elsif ( $last_nonblank_type eq 't' ) { |
6724
|
|
|
|
|
|
|
## return $last_nonblank_token; |
6725
|
|
|
|
|
|
|
## } |
6726
|
|
|
|
|
|
|
|
6727
|
|
|
|
|
|
|
# brace after label: |
6728
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'J' ) { |
6729
|
34
|
|
|
|
|
117
|
return $last_nonblank_token; |
6730
|
|
|
|
|
|
|
} |
6731
|
|
|
|
|
|
|
|
6732
|
|
|
|
|
|
|
# otherwise, look at previous token. This must be a code block if |
6733
|
|
|
|
|
|
|
# it follows any of these: |
6734
|
|
|
|
|
|
|
# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ |
6735
|
|
|
|
|
|
|
elsif ($is_code_block_token{$last_nonblank_token} |
6736
|
|
|
|
|
|
|
|| $is_grep_alias{$last_nonblank_token} ) |
6737
|
|
|
|
|
|
|
{ |
6738
|
|
|
|
|
|
|
|
6739
|
|
|
|
|
|
|
# Bug Patch: Note that the opening brace after the 'if' in the following |
6740
|
|
|
|
|
|
|
# snippet is an anonymous hash ref and not a code block! |
6741
|
|
|
|
|
|
|
# print 'hi' if { x => 1, }->{x}; |
6742
|
|
|
|
|
|
|
# We can identify this situation because the last nonblank type |
6743
|
|
|
|
|
|
|
# will be a keyword (instead of a closing paren) |
6744
|
476
|
50
|
33
|
|
|
2515
|
if ( |
|
|
|
66
|
|
|
|
|
6745
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
6746
|
|
|
|
|
|
|
&& ( $last_nonblank_token eq 'if' |
6747
|
|
|
|
|
|
|
|| $last_nonblank_token eq 'unless' ) |
6748
|
|
|
|
|
|
|
) |
6749
|
|
|
|
|
|
|
{ |
6750
|
0
|
|
|
|
|
0
|
return EMPTY_STRING; |
6751
|
|
|
|
|
|
|
} |
6752
|
|
|
|
|
|
|
else { |
6753
|
476
|
|
|
|
|
1472
|
return $last_nonblank_token; |
6754
|
|
|
|
|
|
|
} |
6755
|
|
|
|
|
|
|
} |
6756
|
|
|
|
|
|
|
|
6757
|
|
|
|
|
|
|
# or a sub or package BLOCK |
6758
|
|
|
|
|
|
|
elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) |
6759
|
|
|
|
|
|
|
&& $last_nonblank_token =~ /^(sub|package)\b/ ) |
6760
|
|
|
|
|
|
|
{ |
6761
|
293
|
|
|
|
|
1036
|
return $last_nonblank_token; |
6762
|
|
|
|
|
|
|
} |
6763
|
|
|
|
|
|
|
|
6764
|
|
|
|
|
|
|
# or a sub alias |
6765
|
|
|
|
|
|
|
elsif (( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) |
6766
|
|
|
|
|
|
|
&& ( $is_sub{$last_nonblank_token} ) ) |
6767
|
|
|
|
|
|
|
{ |
6768
|
0
|
|
|
|
|
0
|
return 'sub'; |
6769
|
|
|
|
|
|
|
} |
6770
|
|
|
|
|
|
|
|
6771
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^(sub|package)\b/ ) { |
6772
|
0
|
|
|
|
|
0
|
return $statement_type; |
6773
|
|
|
|
|
|
|
} |
6774
|
|
|
|
|
|
|
|
6775
|
|
|
|
|
|
|
# user-defined subs with block parameters (like grep/map/eval) |
6776
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'G' ) { |
6777
|
0
|
|
|
|
|
0
|
return $last_nonblank_token; |
6778
|
|
|
|
|
|
|
} |
6779
|
|
|
|
|
|
|
|
6780
|
|
|
|
|
|
|
# check bareword |
6781
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'w' ) { |
6782
|
|
|
|
|
|
|
|
6783
|
|
|
|
|
|
|
# check for syntax 'use MODULE LIST' |
6784
|
|
|
|
|
|
|
# This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031 |
6785
|
22
|
100
|
|
|
|
105
|
return EMPTY_STRING if ( $statement_type eq 'use' ); |
6786
|
|
|
|
|
|
|
|
6787
|
21
|
|
|
|
|
110
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6788
|
|
|
|
|
|
|
$max_token_index ); |
6789
|
|
|
|
|
|
|
} |
6790
|
|
|
|
|
|
|
|
6791
|
|
|
|
|
|
|
# Patch for bug # RT #94338 reported by Daniel Trizen |
6792
|
|
|
|
|
|
|
# for-loop in a parenthesized block-map triggering an error message: |
6793
|
|
|
|
|
|
|
# map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) ); |
6794
|
|
|
|
|
|
|
# Check for a code block within a parenthesized function call |
6795
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '(' ) { |
6796
|
81
|
|
|
|
|
213
|
my $paren_type = $rparen_type->[$paren_depth]; |
6797
|
|
|
|
|
|
|
|
6798
|
|
|
|
|
|
|
# /^(map|grep|sort)$/ |
6799
|
81
|
100
|
66
|
|
|
419
|
if ( $paren_type && $is_sort_map_grep{$paren_type} ) { |
6800
|
|
|
|
|
|
|
|
6801
|
|
|
|
|
|
|
# We will mark this as a code block but use type 't' instead |
6802
|
|
|
|
|
|
|
# of the name of the containing function. This will allow for |
6803
|
|
|
|
|
|
|
# correct parsing but will usually produce better formatting. |
6804
|
|
|
|
|
|
|
# Braces with block type 't' are not broken open automatically |
6805
|
|
|
|
|
|
|
# in the formatter as are other code block types, and this usually |
6806
|
|
|
|
|
|
|
# works best. |
6807
|
1
|
|
|
|
|
4
|
return 't'; # (Not $paren_type) |
6808
|
|
|
|
|
|
|
} |
6809
|
|
|
|
|
|
|
else { |
6810
|
80
|
|
|
|
|
233
|
return EMPTY_STRING; |
6811
|
|
|
|
|
|
|
} |
6812
|
|
|
|
|
|
|
} |
6813
|
|
|
|
|
|
|
|
6814
|
|
|
|
|
|
|
# handle unknown syntax ') {' |
6815
|
|
|
|
|
|
|
# we previously appended a '()' to mark this case |
6816
|
|
|
|
|
|
|
elsif ( $last_nonblank_token =~ /\(\)$/ ) { |
6817
|
14
|
|
|
|
|
53
|
return $last_nonblank_token; |
6818
|
|
|
|
|
|
|
} |
6819
|
|
|
|
|
|
|
|
6820
|
|
|
|
|
|
|
# anything else must be anonymous hash reference |
6821
|
|
|
|
|
|
|
else { |
6822
|
229
|
|
|
|
|
663
|
return EMPTY_STRING; |
6823
|
|
|
|
|
|
|
} |
6824
|
|
|
|
|
|
|
} ## end sub code_block_type |
6825
|
|
|
|
|
|
|
|
6826
|
|
|
|
|
|
|
sub decide_if_code_block { |
6827
|
|
|
|
|
|
|
|
6828
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
6829
|
168
|
|
|
168
|
0
|
450
|
my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; |
6830
|
|
|
|
|
|
|
|
6831
|
168
|
|
|
|
|
596
|
my ( $next_nonblank_token, $i_next ) = |
6832
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
6833
|
|
|
|
|
|
|
|
6834
|
|
|
|
|
|
|
# we are at a '{' where a statement may appear. |
6835
|
|
|
|
|
|
|
# We must decide if this brace starts an anonymous hash or a code |
6836
|
|
|
|
|
|
|
# block. |
6837
|
|
|
|
|
|
|
# return "" if anonymous hash, and $last_nonblank_token otherwise |
6838
|
|
|
|
|
|
|
|
6839
|
|
|
|
|
|
|
# initialize to be code BLOCK |
6840
|
168
|
|
|
|
|
448
|
my $code_block_type = $last_nonblank_token; |
6841
|
|
|
|
|
|
|
|
6842
|
|
|
|
|
|
|
# Check for the common case of an empty anonymous hash reference: |
6843
|
|
|
|
|
|
|
# Maybe something like sub { { } } |
6844
|
168
|
100
|
|
|
|
557
|
if ( $next_nonblank_token eq '}' ) { |
6845
|
5
|
|
|
|
|
14
|
$code_block_type = EMPTY_STRING; |
6846
|
|
|
|
|
|
|
} |
6847
|
|
|
|
|
|
|
|
6848
|
|
|
|
|
|
|
else { |
6849
|
|
|
|
|
|
|
|
6850
|
|
|
|
|
|
|
# To guess if this '{' is an anonymous hash reference, look ahead |
6851
|
|
|
|
|
|
|
# and test as follows: |
6852
|
|
|
|
|
|
|
# |
6853
|
|
|
|
|
|
|
# it is a hash reference if next come: |
6854
|
|
|
|
|
|
|
# - a string or digit followed by a comma or => |
6855
|
|
|
|
|
|
|
# - bareword followed by => |
6856
|
|
|
|
|
|
|
# otherwise it is a code block |
6857
|
|
|
|
|
|
|
# |
6858
|
|
|
|
|
|
|
# Examples of anonymous hash ref: |
6859
|
|
|
|
|
|
|
# {'aa',}; |
6860
|
|
|
|
|
|
|
# {1,2} |
6861
|
|
|
|
|
|
|
# |
6862
|
|
|
|
|
|
|
# Examples of code blocks: |
6863
|
|
|
|
|
|
|
# {1; print "hello\n", 1;} |
6864
|
|
|
|
|
|
|
# {$a,1}; |
6865
|
|
|
|
|
|
|
|
6866
|
|
|
|
|
|
|
# We are only going to look ahead one more (nonblank/comment) line. |
6867
|
|
|
|
|
|
|
# Strange formatting could cause a bad guess, but that's unlikely. |
6868
|
163
|
|
|
|
|
386
|
my @pre_types; |
6869
|
|
|
|
|
|
|
my @pre_tokens; |
6870
|
|
|
|
|
|
|
|
6871
|
|
|
|
|
|
|
# Ignore the rest of this line if it is a side comment |
6872
|
163
|
100
|
|
|
|
459
|
if ( $next_nonblank_token ne '#' ) { |
6873
|
139
|
|
|
|
|
487
|
@pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ]; |
|
139
|
|
|
|
|
725
|
|
6874
|
139
|
|
|
|
|
448
|
@pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ]; |
|
139
|
|
|
|
|
696
|
|
6875
|
|
|
|
|
|
|
} |
6876
|
|
|
|
|
|
|
|
6877
|
|
|
|
|
|
|
# Here 20 is arbitrary but generous, and prevents wasting lots of time |
6878
|
|
|
|
|
|
|
# in mangled files |
6879
|
163
|
|
|
|
|
699
|
my ( $rpre_tokens, $rpre_types ) = |
6880
|
|
|
|
|
|
|
$self->peek_ahead_for_n_nonblank_pre_tokens(20); |
6881
|
163
|
100
|
66
|
|
|
601
|
if ( defined($rpre_types) && @{$rpre_types} ) { |
|
155
|
|
|
|
|
587
|
|
6882
|
155
|
|
|
|
|
288
|
push @pre_types, @{$rpre_types}; |
|
155
|
|
|
|
|
648
|
|
6883
|
155
|
|
|
|
|
329
|
push @pre_tokens, @{$rpre_tokens}; |
|
155
|
|
|
|
|
718
|
|
6884
|
|
|
|
|
|
|
} |
6885
|
|
|
|
|
|
|
|
6886
|
|
|
|
|
|
|
# put a sentinel token to simplify stopping the search |
6887
|
163
|
|
|
|
|
397
|
push @pre_types, '}'; |
6888
|
163
|
|
|
|
|
357
|
push @pre_types, '}'; |
6889
|
|
|
|
|
|
|
|
6890
|
163
|
|
|
|
|
301
|
my $jbeg = 0; |
6891
|
163
|
100
|
|
|
|
490
|
$jbeg = 1 if $pre_types[0] eq 'b'; |
6892
|
|
|
|
|
|
|
|
6893
|
|
|
|
|
|
|
# first look for one of these |
6894
|
|
|
|
|
|
|
# - bareword |
6895
|
|
|
|
|
|
|
# - bareword with leading - |
6896
|
|
|
|
|
|
|
# - digit |
6897
|
|
|
|
|
|
|
# - quoted string |
6898
|
163
|
|
|
|
|
288
|
my $j = $jbeg; |
6899
|
163
|
100
|
33
|
|
|
1233
|
if ( $pre_types[$j] =~ /^[\'\"]/ ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
6900
|
|
|
|
|
|
|
|
6901
|
|
|
|
|
|
|
# find the closing quote; don't worry about escapes |
6902
|
1
|
|
|
|
|
3
|
my $quote_mark = $pre_types[$j]; |
6903
|
1
|
|
|
|
|
5
|
foreach my $k ( $j + 1 .. @pre_types - 2 ) { |
6904
|
1
|
50
|
|
|
|
6
|
if ( $pre_types[$k] eq $quote_mark ) { |
6905
|
1
|
|
|
|
|
3
|
$j = $k + 1; |
6906
|
|
|
|
|
|
|
##my $next = $pre_types[$j]; |
6907
|
1
|
|
|
|
|
2
|
last; |
6908
|
|
|
|
|
|
|
} |
6909
|
|
|
|
|
|
|
} |
6910
|
|
|
|
|
|
|
} |
6911
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq 'd' ) { |
6912
|
8
|
|
|
|
|
15
|
$j++; |
6913
|
|
|
|
|
|
|
} |
6914
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq 'w' ) { |
6915
|
71
|
|
|
|
|
168
|
$j++; |
6916
|
|
|
|
|
|
|
} |
6917
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { |
6918
|
0
|
|
|
|
|
0
|
$j++; |
6919
|
|
|
|
|
|
|
} |
6920
|
163
|
100
|
|
|
|
487
|
if ( $j > $jbeg ) { |
6921
|
|
|
|
|
|
|
|
6922
|
80
|
100
|
|
|
|
332
|
$j++ if $pre_types[$j] eq 'b'; |
6923
|
|
|
|
|
|
|
|
6924
|
|
|
|
|
|
|
# Patched for RT #95708 |
6925
|
80
|
100
|
33
|
|
|
665
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
6926
|
|
|
|
|
|
|
|
6927
|
|
|
|
|
|
|
# it is a comma which is not a pattern delimiter except for qw |
6928
|
|
|
|
|
|
|
( |
6929
|
|
|
|
|
|
|
$pre_types[$j] eq ',' |
6930
|
|
|
|
|
|
|
## !~ /^(s|m|y|tr|qr|q|qq|qx)$/ |
6931
|
|
|
|
|
|
|
&& !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] } |
6932
|
|
|
|
|
|
|
) |
6933
|
|
|
|
|
|
|
|
6934
|
|
|
|
|
|
|
# or a => |
6935
|
|
|
|
|
|
|
|| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) |
6936
|
|
|
|
|
|
|
) |
6937
|
|
|
|
|
|
|
{ |
6938
|
18
|
|
|
|
|
37
|
$code_block_type = EMPTY_STRING; |
6939
|
|
|
|
|
|
|
} |
6940
|
|
|
|
|
|
|
} |
6941
|
|
|
|
|
|
|
|
6942
|
163
|
100
|
|
|
|
524
|
if ($code_block_type) { |
6943
|
|
|
|
|
|
|
|
6944
|
|
|
|
|
|
|
# Patch for cases b1085 b1128: It is uncertain if this is a block. |
6945
|
|
|
|
|
|
|
# If this brace follows a bareword, then append a space as a signal |
6946
|
|
|
|
|
|
|
# to the formatter that this may not be a block brace. To find the |
6947
|
|
|
|
|
|
|
# corresponding code in Formatter.pm search for 'b1085'. |
6948
|
145
|
100
|
|
|
|
1181
|
$code_block_type .= SPACE if ( $code_block_type =~ /^\w/ ); |
6949
|
|
|
|
|
|
|
} |
6950
|
|
|
|
|
|
|
} |
6951
|
|
|
|
|
|
|
|
6952
|
168
|
|
|
|
|
595
|
return $code_block_type; |
6953
|
|
|
|
|
|
|
} ## end sub decide_if_code_block |
6954
|
|
|
|
|
|
|
|
6955
|
|
|
|
|
|
|
sub report_unexpected { |
6956
|
|
|
|
|
|
|
|
6957
|
|
|
|
|
|
|
# report unexpected token type and show where it is |
6958
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
6959
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, |
6960
|
|
|
|
|
|
|
$rpretoken_type, $input_line ) |
6961
|
|
|
|
|
|
|
= @_; |
6962
|
|
|
|
|
|
|
|
6963
|
0
|
0
|
|
|
|
0
|
if ( ++$self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) { |
6964
|
0
|
|
|
|
|
0
|
my $msg = "found $found where $expecting expected"; |
6965
|
0
|
|
|
|
|
0
|
my $pos = $rpretoken_map->[$i_tok]; |
6966
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
6967
|
0
|
|
|
|
|
0
|
my $input_line_number = $self->[_last_line_number_]; |
6968
|
0
|
|
|
|
|
0
|
my ( $offset, $numbered_line, $underline ) = |
6969
|
|
|
|
|
|
|
make_numbered_line( $input_line_number, $input_line, $pos ); |
6970
|
0
|
|
|
|
|
0
|
$underline = write_on_underline( $underline, $pos - $offset, '^' ); |
6971
|
|
|
|
|
|
|
|
6972
|
0
|
|
|
|
|
0
|
my $trailer = EMPTY_STRING; |
6973
|
0
|
0
|
0
|
|
|
0
|
if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { |
6974
|
0
|
|
|
|
|
0
|
my $pos_prev = $rpretoken_map->[$last_nonblank_i]; |
6975
|
0
|
|
|
|
|
0
|
my $num; |
6976
|
0
|
0
|
|
|
|
0
|
if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) { |
6977
|
0
|
|
|
|
|
0
|
$num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev; |
6978
|
|
|
|
|
|
|
} |
6979
|
|
|
|
|
|
|
else { |
6980
|
0
|
|
|
|
|
0
|
$num = $pos - $pos_prev; |
6981
|
|
|
|
|
|
|
} |
6982
|
0
|
0
|
|
|
|
0
|
if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
6983
|
|
|
|
|
|
|
|
6984
|
|
|
|
|
|
|
$underline = |
6985
|
0
|
|
|
|
|
0
|
write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); |
6986
|
0
|
|
|
|
|
0
|
$trailer = " (previous token underlined)"; |
6987
|
|
|
|
|
|
|
} |
6988
|
0
|
|
|
|
|
0
|
$underline =~ s/\s+$//; |
6989
|
0
|
|
|
|
|
0
|
$self->warning( $numbered_line . "\n" ); |
6990
|
0
|
|
|
|
|
0
|
$self->warning( $underline . "\n" ); |
6991
|
0
|
|
|
|
|
0
|
$self->warning( $msg . $trailer . "\n" ); |
6992
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
6993
|
|
|
|
|
|
|
} |
6994
|
0
|
|
|
|
|
0
|
return; |
6995
|
|
|
|
|
|
|
} ## end sub report_unexpected |
6996
|
|
|
|
|
|
|
|
6997
|
|
|
|
|
|
|
my %is_sigil_or_paren; |
6998
|
|
|
|
|
|
|
my %is_R_closing_sb; |
6999
|
|
|
|
|
|
|
|
7000
|
|
|
|
|
|
|
BEGIN { |
7001
|
|
|
|
|
|
|
|
7002
|
38
|
|
|
38
|
|
294
|
my @q = qw< $ & % * @ ) >; |
7003
|
38
|
|
|
|
|
324
|
@{is_sigil_or_paren}{@q} = (1) x scalar(@q); |
7004
|
|
|
|
|
|
|
|
7005
|
38
|
|
|
|
|
173
|
@q = qw(R ]); |
7006
|
38
|
|
|
|
|
84484
|
@{is_R_closing_sb}{@q} = (1) x scalar(@q); |
7007
|
|
|
|
|
|
|
} ## end BEGIN |
7008
|
|
|
|
|
|
|
|
7009
|
|
|
|
|
|
|
sub is_non_structural_brace { |
7010
|
|
|
|
|
|
|
|
7011
|
|
|
|
|
|
|
# Decide if a brace or bracket is structural or non-structural |
7012
|
|
|
|
|
|
|
# by looking at the previous token and type |
7013
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token |
7014
|
|
|
|
|
|
|
|
7015
|
|
|
|
|
|
|
# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. |
7016
|
|
|
|
|
|
|
# Tentatively deactivated because it caused the wrong operator expectation |
7017
|
|
|
|
|
|
|
# for this code: |
7018
|
|
|
|
|
|
|
# $user = @vars[1] / 100; |
7019
|
|
|
|
|
|
|
# Must update sub operator_expected before re-implementing. |
7020
|
|
|
|
|
|
|
# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { |
7021
|
|
|
|
|
|
|
# return 0; |
7022
|
|
|
|
|
|
|
# } |
7023
|
|
|
|
|
|
|
|
7024
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
7025
|
|
|
|
|
|
|
# NOTE: braces after type characters start code blocks, but for |
7026
|
|
|
|
|
|
|
# simplicity these are not identified as such. See also |
7027
|
|
|
|
|
|
|
# sub code_block_type |
7028
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
7029
|
|
|
|
|
|
|
|
7030
|
|
|
|
|
|
|
##if ($last_nonblank_type eq 't') {return 0} |
7031
|
|
|
|
|
|
|
|
7032
|
|
|
|
|
|
|
# otherwise, it is non-structural if it is decorated |
7033
|
|
|
|
|
|
|
# by type information. |
7034
|
|
|
|
|
|
|
# For example, the '{' here is non-structural: ${xxx} |
7035
|
|
|
|
|
|
|
# Removed '::' to fix c074 |
7036
|
|
|
|
|
|
|
## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ |
7037
|
|
|
|
|
|
|
return ( |
7038
|
|
|
|
|
|
|
## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->)/ |
7039
|
|
|
|
|
|
|
$is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) } |
7040
|
|
|
|
|
|
|
|| substr( $last_nonblank_token, 0, 2 ) eq '->' |
7041
|
|
|
|
|
|
|
|
7042
|
|
|
|
|
|
|
# or if we follow a hash or array closing curly brace or bracket |
7043
|
|
|
|
|
|
|
# For example, the second '{' in this is non-structural: $a{'x'}{'y'} |
7044
|
|
|
|
|
|
|
# because the first '}' would have been given type 'R' |
7045
|
|
|
|
|
|
|
##|| $last_nonblank_type =~ /^([R\]])$/ |
7046
|
2253
|
|
66
|
2253
|
0
|
14696
|
|| $is_R_closing_sb{$last_nonblank_type} |
7047
|
|
|
|
|
|
|
); |
7048
|
|
|
|
|
|
|
} ## end sub is_non_structural_brace |
7049
|
|
|
|
|
|
|
|
7050
|
|
|
|
|
|
|
####################################################################### |
7051
|
|
|
|
|
|
|
# Tokenizer routines for tracking container nesting depths |
7052
|
|
|
|
|
|
|
####################################################################### |
7053
|
|
|
|
|
|
|
|
7054
|
|
|
|
|
|
|
# The following routines keep track of nesting depths of the nesting |
7055
|
|
|
|
|
|
|
# types, ( [ { and ?. This is necessary for determining the indentation |
7056
|
|
|
|
|
|
|
# level, and also for debugging programs. Not only do they keep track of |
7057
|
|
|
|
|
|
|
# nesting depths of the individual brace types, but they check that each |
7058
|
|
|
|
|
|
|
# of the other brace types is balanced within matching pairs. For |
7059
|
|
|
|
|
|
|
# example, if the program sees this sequence: |
7060
|
|
|
|
|
|
|
# |
7061
|
|
|
|
|
|
|
# { ( ( ) } |
7062
|
|
|
|
|
|
|
# |
7063
|
|
|
|
|
|
|
# then it can determine that there is an extra left paren somewhere |
7064
|
|
|
|
|
|
|
# between the { and the }. And so on with every other possible |
7065
|
|
|
|
|
|
|
# combination of outer and inner brace types. For another |
7066
|
|
|
|
|
|
|
# example: |
7067
|
|
|
|
|
|
|
# |
7068
|
|
|
|
|
|
|
# ( [ ..... ] ] ) |
7069
|
|
|
|
|
|
|
# |
7070
|
|
|
|
|
|
|
# which has an extra ] within the parens. |
7071
|
|
|
|
|
|
|
# |
7072
|
|
|
|
|
|
|
# The brace types have indexes 0 .. 3 which are indexes into |
7073
|
|
|
|
|
|
|
# the matrices. |
7074
|
|
|
|
|
|
|
# |
7075
|
|
|
|
|
|
|
# The pair ? : are treated as just another nesting type, with ? acting |
7076
|
|
|
|
|
|
|
# as the opening brace and : acting as the closing brace. |
7077
|
|
|
|
|
|
|
# |
7078
|
|
|
|
|
|
|
# The matrix |
7079
|
|
|
|
|
|
|
# |
7080
|
|
|
|
|
|
|
# $rdepth_array->[$a][$b][ $rcurrent_depth->[$a] ] = $rcurrent_depth->[$b]; |
7081
|
|
|
|
|
|
|
# |
7082
|
|
|
|
|
|
|
# saves the nesting depth of brace type $b (where $b is either of the other |
7083
|
|
|
|
|
|
|
# nesting types) when brace type $a enters a new depth. When this depth |
7084
|
|
|
|
|
|
|
# decreases, a check is made that the current depth of brace types $b is |
7085
|
|
|
|
|
|
|
# unchanged, or otherwise there must have been an error. This can |
7086
|
|
|
|
|
|
|
# be very useful for localizing errors, particularly when perl runs to |
7087
|
|
|
|
|
|
|
# the end of a large file (such as this one) and announces that there |
7088
|
|
|
|
|
|
|
# is a problem somewhere. |
7089
|
|
|
|
|
|
|
# |
7090
|
|
|
|
|
|
|
# A numerical sequence number is maintained for every nesting type, |
7091
|
|
|
|
|
|
|
# so that each matching pair can be uniquely identified in a simple |
7092
|
|
|
|
|
|
|
# way. |
7093
|
|
|
|
|
|
|
|
7094
|
|
|
|
|
|
|
sub increase_nesting_depth { |
7095
|
4561
|
|
|
4561
|
0
|
9081
|
my ( $self, $aa, $pos ) = @_; |
7096
|
|
|
|
|
|
|
|
7097
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, |
7098
|
|
|
|
|
|
|
# $rcurrent_sequence_number, $rdepth_array, |
7099
|
|
|
|
|
|
|
# $rstarting_line_of_current_depth, $statement_type |
7100
|
4561
|
|
|
|
|
8055
|
my $cd_aa = ++$rcurrent_depth->[$aa]; |
7101
|
4561
|
|
|
|
|
6715
|
$total_depth++; |
7102
|
4561
|
|
|
|
|
8791
|
$rtotal_depth->[$aa][$cd_aa] = $total_depth; |
7103
|
4561
|
|
|
|
|
7465
|
my $input_line_number = $self->[_last_line_number_]; |
7104
|
4561
|
|
|
|
|
7598
|
my $input_line = $self->[_line_of_text_]; |
7105
|
|
|
|
|
|
|
|
7106
|
|
|
|
|
|
|
# Sequence numbers increment by number of items. This keeps |
7107
|
|
|
|
|
|
|
# a unique set of numbers but still allows the relative location |
7108
|
|
|
|
|
|
|
# of any type to be determined. |
7109
|
|
|
|
|
|
|
|
7110
|
|
|
|
|
|
|
# make a new unique sequence number |
7111
|
4561
|
|
|
|
|
7776
|
my $seqno = $next_sequence_number++; |
7112
|
|
|
|
|
|
|
|
7113
|
4561
|
|
|
|
|
8216
|
$rcurrent_sequence_number->[$aa][$cd_aa] = $seqno; |
7114
|
|
|
|
|
|
|
|
7115
|
4561
|
|
|
|
|
13446
|
$rstarting_line_of_current_depth->[$aa][$cd_aa] = |
7116
|
|
|
|
|
|
|
[ $input_line_number, $input_line, $pos ]; |
7117
|
|
|
|
|
|
|
|
7118
|
4561
|
|
|
|
|
13763
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
7119
|
18244
|
100
|
|
|
|
33322
|
next if ( $bb == $aa ); |
7120
|
13683
|
|
|
|
|
26753
|
$rdepth_array->[$aa][$bb][$cd_aa] = $rcurrent_depth->[$bb]; |
7121
|
|
|
|
|
|
|
} |
7122
|
|
|
|
|
|
|
|
7123
|
|
|
|
|
|
|
# set a flag for indenting a nested ternary statement |
7124
|
4561
|
|
|
|
|
8333
|
my $indent = 0; |
7125
|
4561
|
100
|
|
|
|
10328
|
if ( $aa == QUESTION_COLON ) { |
7126
|
187
|
|
|
|
|
614
|
$rnested_ternary_flag->[$cd_aa] = 0; |
7127
|
187
|
100
|
|
|
|
629
|
if ( $cd_aa > 1 ) { |
7128
|
17
|
100
|
|
|
|
98
|
if ( $rnested_ternary_flag->[ $cd_aa - 1 ] == 0 ) { |
7129
|
16
|
|
|
|
|
66
|
my $pdepth = $rtotal_depth->[$aa][ $cd_aa - 1 ]; |
7130
|
16
|
100
|
|
|
|
68
|
if ( $pdepth == $total_depth - 1 ) { |
7131
|
8
|
|
|
|
|
15
|
$indent = 1; |
7132
|
8
|
|
|
|
|
26
|
$rnested_ternary_flag->[ $cd_aa - 1 ] = -1; |
7133
|
|
|
|
|
|
|
} |
7134
|
|
|
|
|
|
|
} |
7135
|
|
|
|
|
|
|
} |
7136
|
|
|
|
|
|
|
} |
7137
|
|
|
|
|
|
|
|
7138
|
|
|
|
|
|
|
# Fix part #1 for git82: save last token type for propagation of type 'Z' |
7139
|
4561
|
|
|
|
|
15208
|
$rnested_statement_type->[$aa][$cd_aa] = |
7140
|
|
|
|
|
|
|
[ $statement_type, $last_nonblank_type, $last_nonblank_token ]; |
7141
|
4561
|
|
|
|
|
7928
|
$statement_type = EMPTY_STRING; |
7142
|
4561
|
|
|
|
|
12693
|
return ( $seqno, $indent ); |
7143
|
|
|
|
|
|
|
} ## end sub increase_nesting_depth |
7144
|
|
|
|
|
|
|
|
7145
|
|
|
|
|
|
|
sub is_balanced_closing_container { |
7146
|
|
|
|
|
|
|
|
7147
|
|
|
|
|
|
|
# Return true if a closing container can go here without error |
7148
|
|
|
|
|
|
|
# Return false if not |
7149
|
47
|
|
|
47
|
0
|
125
|
my ($aa) = @_; |
7150
|
|
|
|
|
|
|
|
7151
|
|
|
|
|
|
|
# cannot close if there was no opening |
7152
|
47
|
|
|
|
|
95
|
my $cd_aa = $rcurrent_depth->[$aa]; |
7153
|
47
|
100
|
|
|
|
187
|
return unless ( $cd_aa > 0 ); |
7154
|
|
|
|
|
|
|
|
7155
|
|
|
|
|
|
|
# check that any other brace types $bb contained within would be balanced |
7156
|
8
|
|
|
|
|
29
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
7157
|
8
|
50
|
|
|
|
24
|
next if ( $bb == $aa ); |
7158
|
|
|
|
|
|
|
return |
7159
|
|
|
|
|
|
|
unless ( |
7160
|
8
|
50
|
|
|
|
41
|
$rdepth_array->[$aa][$bb][$cd_aa] == $rcurrent_depth->[$bb] ); |
7161
|
|
|
|
|
|
|
} |
7162
|
|
|
|
|
|
|
|
7163
|
|
|
|
|
|
|
# OK, everything will be balanced |
7164
|
0
|
|
|
|
|
0
|
return 1; |
7165
|
|
|
|
|
|
|
} ## end sub is_balanced_closing_container |
7166
|
|
|
|
|
|
|
|
7167
|
|
|
|
|
|
|
sub decrease_nesting_depth { |
7168
|
|
|
|
|
|
|
|
7169
|
4561
|
|
|
4561
|
0
|
9104
|
my ( $self, $aa, $pos ) = @_; |
7170
|
|
|
|
|
|
|
|
7171
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, |
7172
|
|
|
|
|
|
|
# $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth |
7173
|
|
|
|
|
|
|
# $statement_type |
7174
|
4561
|
|
|
|
|
7024
|
my $seqno = 0; |
7175
|
4561
|
|
|
|
|
7249
|
my $input_line_number = $self->[_last_line_number_]; |
7176
|
4561
|
|
|
|
|
7721
|
my $input_line = $self->[_line_of_text_]; |
7177
|
|
|
|
|
|
|
|
7178
|
4561
|
|
|
|
|
6882
|
my $outdent = 0; |
7179
|
4561
|
|
|
|
|
6766
|
$total_depth--; |
7180
|
4561
|
|
|
|
|
7878
|
my $cd_aa = $rcurrent_depth->[$aa]; |
7181
|
4561
|
50
|
|
|
|
9255
|
if ( $cd_aa > 0 ) { |
7182
|
|
|
|
|
|
|
|
7183
|
|
|
|
|
|
|
# set a flag for un-indenting after seeing a nested ternary statement |
7184
|
4561
|
|
|
|
|
8338
|
$seqno = $rcurrent_sequence_number->[$aa][$cd_aa]; |
7185
|
4561
|
100
|
|
|
|
9757
|
if ( $aa == QUESTION_COLON ) { |
7186
|
187
|
|
|
|
|
556
|
$outdent = $rnested_ternary_flag->[$cd_aa]; |
7187
|
|
|
|
|
|
|
} |
7188
|
|
|
|
|
|
|
|
7189
|
|
|
|
|
|
|
# Fix part #2 for git82: use saved type for propagation of type 'Z' |
7190
|
|
|
|
|
|
|
# through type L-R braces. Perl seems to allow ${bareword} |
7191
|
|
|
|
|
|
|
# as an indirect object, but nothing much more complex than that. |
7192
|
|
|
|
|
|
|
( $statement_type, my $saved_type, my $saved_token ) = |
7193
|
4561
|
|
|
|
|
6634
|
@{ $rnested_statement_type->[$aa][ $rcurrent_depth->[$aa] ] }; |
|
4561
|
|
|
|
|
12980
|
|
7194
|
4561
|
50
|
100
|
|
|
16127
|
if ( $aa == BRACE |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
7195
|
|
|
|
|
|
|
&& $saved_type eq 'Z' |
7196
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
7197
|
|
|
|
|
|
|
&& $rbrace_structural_type->[$brace_depth] eq 'L' ) |
7198
|
|
|
|
|
|
|
{ |
7199
|
1
|
|
|
|
|
4
|
$last_nonblank_type = $saved_type; |
7200
|
|
|
|
|
|
|
} |
7201
|
|
|
|
|
|
|
|
7202
|
|
|
|
|
|
|
# check that any brace types $bb contained within are balanced |
7203
|
4561
|
|
|
|
|
12842
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
7204
|
18244
|
100
|
|
|
|
32516
|
next if ( $bb == $aa ); |
7205
|
|
|
|
|
|
|
|
7206
|
13683
|
50
|
|
|
|
31970
|
unless ( |
7207
|
|
|
|
|
|
|
$rdepth_array->[$aa][$bb][$cd_aa] == $rcurrent_depth->[$bb] ) |
7208
|
|
|
|
|
|
|
{ |
7209
|
0
|
|
|
|
|
0
|
my $diff = |
7210
|
|
|
|
|
|
|
$rcurrent_depth->[$bb] - $rdepth_array->[$aa][$bb][$cd_aa]; |
7211
|
|
|
|
|
|
|
|
7212
|
|
|
|
|
|
|
# don't whine too many times |
7213
|
0
|
|
|
|
|
0
|
my $saw_brace_error = $self->get_saw_brace_error(); |
7214
|
0
|
0
|
0
|
|
|
0
|
if ( |
|
|
|
0
|
|
|
|
|
7215
|
|
|
|
|
|
|
$saw_brace_error <= MAX_NAG_MESSAGES |
7216
|
|
|
|
|
|
|
|
7217
|
|
|
|
|
|
|
# if too many closing types have occurred, we probably |
7218
|
|
|
|
|
|
|
# already caught this error |
7219
|
|
|
|
|
|
|
&& ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) |
7220
|
|
|
|
|
|
|
) |
7221
|
|
|
|
|
|
|
{ |
7222
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
7223
|
0
|
|
|
|
|
0
|
my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa]; |
7224
|
0
|
|
|
|
|
0
|
my $sl = $rsl->[0]; |
7225
|
0
|
|
|
|
|
0
|
my $rel = [ $input_line_number, $input_line, $pos ]; |
7226
|
0
|
|
|
|
|
0
|
my $el = $rel->[0]; |
7227
|
0
|
|
|
|
|
0
|
my ($ess); |
7228
|
|
|
|
|
|
|
|
7229
|
0
|
0
|
0
|
|
|
0
|
if ( $diff == 1 || $diff == -1 ) { |
7230
|
0
|
|
|
|
|
0
|
$ess = EMPTY_STRING; |
7231
|
|
|
|
|
|
|
} |
7232
|
|
|
|
|
|
|
else { |
7233
|
0
|
|
|
|
|
0
|
$ess = 's'; |
7234
|
|
|
|
|
|
|
} |
7235
|
0
|
0
|
|
|
|
0
|
my $bname = |
7236
|
|
|
|
|
|
|
( $diff > 0 ) |
7237
|
|
|
|
|
|
|
? $opening_brace_names[$bb] |
7238
|
|
|
|
|
|
|
: $closing_brace_names[$bb]; |
7239
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rsl}, '^' ); |
|
0
|
|
|
|
|
0
|
|
7240
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
7241
|
|
|
|
|
|
|
Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el |
7242
|
|
|
|
|
|
|
EOM |
7243
|
|
|
|
|
|
|
|
7244
|
0
|
0
|
|
|
|
0
|
if ( $diff > 0 ) { |
7245
|
0
|
|
|
|
|
0
|
my $rml = |
7246
|
|
|
|
|
|
|
$rstarting_line_of_current_depth->[$bb] |
7247
|
|
|
|
|
|
|
[ $rcurrent_depth->[$bb] ]; |
7248
|
0
|
|
|
|
|
0
|
my $ml = $rml->[0]; |
7249
|
0
|
|
|
|
|
0
|
$msg .= |
7250
|
|
|
|
|
|
|
" The most recent un-matched $bname is on line $ml\n"; |
7251
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rml}, '^' ); |
|
0
|
|
|
|
|
0
|
|
7252
|
|
|
|
|
|
|
} |
7253
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rel}, '^' ); |
|
0
|
|
|
|
|
0
|
|
7254
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
7255
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
7256
|
|
|
|
|
|
|
} |
7257
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
7258
|
|
|
|
|
|
|
} |
7259
|
|
|
|
|
|
|
} |
7260
|
4561
|
|
|
|
|
8410
|
$rcurrent_depth->[$aa]--; |
7261
|
|
|
|
|
|
|
} |
7262
|
|
|
|
|
|
|
else { |
7263
|
|
|
|
|
|
|
|
7264
|
0
|
|
|
|
|
0
|
my $saw_brace_error = $self->get_saw_brace_error(); |
7265
|
0
|
0
|
|
|
|
0
|
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { |
7266
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
7267
|
|
|
|
|
|
|
There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number |
7268
|
|
|
|
|
|
|
EOM |
7269
|
0
|
|
|
|
|
0
|
$self->indicate_error( $msg, $input_line_number, $input_line, $pos, |
7270
|
|
|
|
|
|
|
'^' ); |
7271
|
|
|
|
|
|
|
} |
7272
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
7273
|
|
|
|
|
|
|
|
7274
|
|
|
|
|
|
|
# keep track of errors in braces alone (ignoring ternary nesting errors) |
7275
|
0
|
0
|
|
|
|
0
|
$self->[_true_brace_error_count_]++ |
7276
|
|
|
|
|
|
|
if ( $closing_brace_names[$aa] ne "':'" ); |
7277
|
|
|
|
|
|
|
} |
7278
|
4561
|
|
|
|
|
12189
|
return ( $seqno, $outdent ); |
7279
|
|
|
|
|
|
|
} ## end sub decrease_nesting_depth |
7280
|
|
|
|
|
|
|
|
7281
|
|
|
|
|
|
|
sub check_final_nesting_depths { |
7282
|
|
|
|
|
|
|
|
7283
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, $rstarting_line_of_current_depth |
7284
|
556
|
|
|
556
|
0
|
1346
|
my $self = shift; |
7285
|
|
|
|
|
|
|
|
7286
|
556
|
|
|
|
|
2342
|
for my $aa ( 0 .. @closing_brace_names - 1 ) { |
7287
|
|
|
|
|
|
|
|
7288
|
2224
|
|
|
|
|
3752
|
my $cd_aa = $rcurrent_depth->[$aa]; |
7289
|
2224
|
50
|
|
|
|
5343
|
if ($cd_aa) { |
7290
|
0
|
|
|
|
|
0
|
my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa]; |
7291
|
0
|
|
|
|
|
0
|
my $sl = $rsl->[0]; |
7292
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
7293
|
|
|
|
|
|
|
Final nesting depth of $opening_brace_names[$aa]s is $cd_aa |
7294
|
|
|
|
|
|
|
The most recent un-matched $opening_brace_names[$aa] is on line $sl |
7295
|
|
|
|
|
|
|
EOM |
7296
|
0
|
|
|
|
|
0
|
$self->indicate_error( $msg, @{$rsl}, '^' ); |
|
0
|
|
|
|
|
0
|
|
7297
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
7298
|
|
|
|
|
|
|
} |
7299
|
|
|
|
|
|
|
} |
7300
|
556
|
|
|
|
|
1513
|
return; |
7301
|
|
|
|
|
|
|
} ## end sub check_final_nesting_depths |
7302
|
|
|
|
|
|
|
|
7303
|
|
|
|
|
|
|
####################################################################### |
7304
|
|
|
|
|
|
|
# Tokenizer routines for looking ahead in input stream |
7305
|
|
|
|
|
|
|
####################################################################### |
7306
|
|
|
|
|
|
|
|
7307
|
|
|
|
|
|
|
sub peek_ahead_for_n_nonblank_pre_tokens { |
7308
|
|
|
|
|
|
|
|
7309
|
|
|
|
|
|
|
# returns next n pretokens if they exist |
7310
|
|
|
|
|
|
|
# returns undef's if hits eof without seeing any pretokens |
7311
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
7312
|
170
|
|
|
170
|
0
|
454
|
my ( $self, $max_pretokens ) = @_; |
7313
|
170
|
|
|
|
|
307
|
my $line; |
7314
|
170
|
|
|
|
|
289
|
my $i = 0; |
7315
|
170
|
|
|
|
|
588
|
my ( $rpre_tokens, $rmap, $rpre_types ); |
7316
|
|
|
|
|
|
|
|
7317
|
170
|
|
|
|
|
749
|
while ( $line = $self->[_line_buffer_object_]->peek_ahead( $i++ ) ) { |
7318
|
182
|
|
|
|
|
975
|
$line =~ s/^\s*//; # trim leading blanks |
7319
|
182
|
100
|
|
|
|
642
|
next if ( length($line) <= 0 ); # skip blank |
7320
|
176
|
100
|
|
|
|
601
|
next if ( $line =~ /^#/ ); # skip comment |
7321
|
162
|
|
|
|
|
433
|
( $rpre_tokens, $rmap, $rpre_types ) = |
7322
|
|
|
|
|
|
|
pre_tokenize( $line, $max_pretokens ); |
7323
|
162
|
|
|
|
|
464
|
last; |
7324
|
|
|
|
|
|
|
} |
7325
|
170
|
|
|
|
|
656
|
return ( $rpre_tokens, $rpre_types ); |
7326
|
|
|
|
|
|
|
} ## end sub peek_ahead_for_n_nonblank_pre_tokens |
7327
|
|
|
|
|
|
|
|
7328
|
|
|
|
|
|
|
# look ahead for next non-blank, non-comment line of code |
7329
|
|
|
|
|
|
|
sub peek_ahead_for_nonblank_token { |
7330
|
|
|
|
|
|
|
|
7331
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
7332
|
115
|
|
|
115
|
0
|
374
|
my ( $self, $rtokens, $max_token_index ) = @_; |
7333
|
115
|
|
|
|
|
216
|
my $line; |
7334
|
115
|
|
|
|
|
235
|
my $i = 0; |
7335
|
|
|
|
|
|
|
|
7336
|
115
|
|
|
|
|
586
|
while ( $line = $self->[_line_buffer_object_]->peek_ahead( $i++ ) ) { |
7337
|
159
|
|
|
|
|
863
|
$line =~ s/^\s*//; # trim leading blanks |
7338
|
159
|
100
|
|
|
|
619
|
next if ( length($line) <= 0 ); # skip blank |
7339
|
134
|
100
|
|
|
|
551
|
next if ( $line =~ /^#/ ); # skip comment |
7340
|
|
|
|
|
|
|
|
7341
|
|
|
|
|
|
|
# Updated from 2 to 3 to get trigraphs, added for case b1175 |
7342
|
113
|
|
|
|
|
415
|
my ( $rtok, $rmap, $rtype ) = pre_tokenize( $line, 3 ); |
7343
|
113
|
|
|
|
|
423
|
my $j = $max_token_index + 1; |
7344
|
|
|
|
|
|
|
|
7345
|
113
|
|
|
|
|
265
|
foreach my $tok ( @{$rtok} ) { |
|
113
|
|
|
|
|
368
|
|
7346
|
327
|
100
|
|
|
|
901
|
last if ( $tok =~ "\n" ); |
7347
|
294
|
|
|
|
|
740
|
$rtokens->[ ++$j ] = $tok; |
7348
|
|
|
|
|
|
|
} |
7349
|
113
|
|
|
|
|
452
|
last; |
7350
|
|
|
|
|
|
|
} |
7351
|
115
|
|
|
|
|
364
|
return; |
7352
|
|
|
|
|
|
|
} ## end sub peek_ahead_for_nonblank_token |
7353
|
|
|
|
|
|
|
|
7354
|
|
|
|
|
|
|
####################################################################### |
7355
|
|
|
|
|
|
|
# Tokenizer guessing routines for ambiguous situations |
7356
|
|
|
|
|
|
|
####################################################################### |
7357
|
|
|
|
|
|
|
|
7358
|
|
|
|
|
|
|
sub guess_if_pattern_or_conditional { |
7359
|
|
|
|
|
|
|
|
7360
|
|
|
|
|
|
|
# this routine is called when we have encountered a ? following an |
7361
|
|
|
|
|
|
|
# unknown bareword, and we must decide if it starts a pattern or not |
7362
|
|
|
|
|
|
|
# input parameters: |
7363
|
|
|
|
|
|
|
# $i - token index of the ? starting possible pattern |
7364
|
|
|
|
|
|
|
# output parameters: |
7365
|
|
|
|
|
|
|
# $is_pattern = 0 if probably not pattern, =1 if probably a pattern |
7366
|
|
|
|
|
|
|
# msg = a warning or diagnostic message |
7367
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
7368
|
|
|
|
|
|
|
|
7369
|
11
|
|
|
11
|
0
|
43
|
my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; |
7370
|
11
|
|
|
|
|
28
|
my $is_pattern = 0; |
7371
|
11
|
|
|
|
|
48
|
my $msg = "guessing that ? after $last_nonblank_token starts a "; |
7372
|
|
|
|
|
|
|
|
7373
|
11
|
50
|
|
|
|
44
|
if ( $i >= $max_token_index ) { |
7374
|
0
|
|
|
|
|
0
|
$msg .= "conditional (no end to pattern found on the line)\n"; |
7375
|
|
|
|
|
|
|
} |
7376
|
|
|
|
|
|
|
else { |
7377
|
11
|
|
|
|
|
30
|
my $ibeg = $i; |
7378
|
11
|
|
|
|
|
27
|
$i = $ibeg + 1; |
7379
|
11
|
|
|
|
|
34
|
my $next_token = $rtokens->[$i]; # first token after ? |
7380
|
|
|
|
|
|
|
|
7381
|
|
|
|
|
|
|
# look for a possible ending ? on this line.. |
7382
|
11
|
|
|
|
|
35
|
my $in_quote = 1; |
7383
|
11
|
|
|
|
|
25
|
my $quote_depth = 0; |
7384
|
11
|
|
|
|
|
28
|
my $quote_character = EMPTY_STRING; |
7385
|
11
|
|
|
|
|
20
|
my $quote_pos = 0; |
7386
|
11
|
|
|
|
|
31
|
my $quoted_string; |
7387
|
|
|
|
|
|
|
( |
7388
|
|
|
|
|
|
|
|
7389
|
11
|
|
|
|
|
54
|
$i, |
7390
|
|
|
|
|
|
|
$in_quote, |
7391
|
|
|
|
|
|
|
$quote_character, |
7392
|
|
|
|
|
|
|
$quote_pos, |
7393
|
|
|
|
|
|
|
$quote_depth, |
7394
|
|
|
|
|
|
|
$quoted_string, |
7395
|
|
|
|
|
|
|
|
7396
|
|
|
|
|
|
|
) = $self->follow_quoted_string( |
7397
|
|
|
|
|
|
|
|
7398
|
|
|
|
|
|
|
$ibeg, |
7399
|
|
|
|
|
|
|
$in_quote, |
7400
|
|
|
|
|
|
|
$rtokens, |
7401
|
|
|
|
|
|
|
$quote_character, |
7402
|
|
|
|
|
|
|
$quote_pos, |
7403
|
|
|
|
|
|
|
$quote_depth, |
7404
|
|
|
|
|
|
|
$max_token_index, |
7405
|
|
|
|
|
|
|
|
7406
|
|
|
|
|
|
|
); |
7407
|
|
|
|
|
|
|
|
7408
|
11
|
50
|
|
|
|
70
|
if ($in_quote) { |
7409
|
|
|
|
|
|
|
|
7410
|
|
|
|
|
|
|
# we didn't find an ending ? on this line, |
7411
|
|
|
|
|
|
|
# so we bias towards conditional |
7412
|
11
|
|
|
|
|
33
|
$is_pattern = 0; |
7413
|
11
|
|
|
|
|
44
|
$msg .= "conditional (no ending ? on this line)\n"; |
7414
|
|
|
|
|
|
|
|
7415
|
|
|
|
|
|
|
# we found an ending ?, so we bias towards a pattern |
7416
|
|
|
|
|
|
|
} |
7417
|
|
|
|
|
|
|
else { |
7418
|
|
|
|
|
|
|
|
7419
|
|
|
|
|
|
|
# Watch out for an ending ? in quotes, like this |
7420
|
|
|
|
|
|
|
# my $case_flag = File::Spec->case_tolerant ? '(?i)' : ''; |
7421
|
0
|
|
|
|
|
0
|
my $s_quote = 0; |
7422
|
0
|
|
|
|
|
0
|
my $d_quote = 0; |
7423
|
0
|
|
|
|
|
0
|
my $colons = 0; |
7424
|
0
|
|
|
|
|
0
|
foreach my $ii ( $ibeg + 1 .. $i - 1 ) { |
7425
|
0
|
|
|
|
|
0
|
my $tok = $rtokens->[$ii]; |
7426
|
0
|
0
|
|
|
|
0
|
if ( $tok eq ":" ) { $colons++ } |
|
0
|
|
|
|
|
0
|
|
7427
|
0
|
0
|
|
|
|
0
|
if ( $tok eq "'" ) { $s_quote++ } |
|
0
|
|
|
|
|
0
|
|
7428
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '"' ) { $d_quote++ } |
|
0
|
|
|
|
|
0
|
|
7429
|
|
|
|
|
|
|
} |
7430
|
0
|
0
|
0
|
|
|
0
|
if ( $s_quote % 2 || $d_quote % 2 || $colons ) { |
|
|
0
|
0
|
|
|
|
|
7431
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7432
|
0
|
|
|
|
|
0
|
$msg .= "found ending ? but unbalanced quote chars\n"; |
7433
|
|
|
|
|
|
|
} |
7434
|
|
|
|
|
|
|
elsif ( |
7435
|
|
|
|
|
|
|
$self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) |
7436
|
|
|
|
|
|
|
{ |
7437
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7438
|
0
|
|
|
|
|
0
|
$msg .= "pattern (found ending ? and pattern expected)\n"; |
7439
|
|
|
|
|
|
|
} |
7440
|
|
|
|
|
|
|
else { |
7441
|
0
|
|
|
|
|
0
|
$msg .= "pattern (uncertain, but found ending ?)\n"; |
7442
|
|
|
|
|
|
|
} |
7443
|
|
|
|
|
|
|
} |
7444
|
|
|
|
|
|
|
} |
7445
|
11
|
|
|
|
|
43
|
return ( $is_pattern, $msg ); |
7446
|
|
|
|
|
|
|
} ## end sub guess_if_pattern_or_conditional |
7447
|
|
|
|
|
|
|
|
7448
|
|
|
|
|
|
|
my %is_known_constant; |
7449
|
|
|
|
|
|
|
my %is_known_function; |
7450
|
|
|
|
|
|
|
|
7451
|
|
|
|
|
|
|
BEGIN { |
7452
|
|
|
|
|
|
|
|
7453
|
|
|
|
|
|
|
# Constants like 'pi' in Trig.pm are common |
7454
|
38
|
|
|
38
|
|
283
|
my @q = qw(pi pi2 pi4 pip2 pip4); |
7455
|
38
|
|
|
|
|
297
|
@{is_known_constant}{@q} = (1) x scalar(@q); |
7456
|
|
|
|
|
|
|
|
7457
|
|
|
|
|
|
|
# parenless calls of 'ok' are common |
7458
|
38
|
|
|
|
|
139
|
@q = qw( ok ); |
7459
|
38
|
|
|
|
|
67026
|
@{is_known_function}{@q} = (1) x scalar(@q); |
7460
|
|
|
|
|
|
|
} ## end BEGIN |
7461
|
|
|
|
|
|
|
|
7462
|
|
|
|
|
|
|
sub guess_if_pattern_or_division { |
7463
|
|
|
|
|
|
|
|
7464
|
|
|
|
|
|
|
# this routine is called when we have encountered a / following an |
7465
|
|
|
|
|
|
|
# unknown bareword, and we must decide if it starts a pattern or is a |
7466
|
|
|
|
|
|
|
# division |
7467
|
|
|
|
|
|
|
# input parameters: |
7468
|
|
|
|
|
|
|
# $i - token index of the / starting possible pattern |
7469
|
|
|
|
|
|
|
# output parameters: |
7470
|
|
|
|
|
|
|
# $is_pattern = 0 if probably division, =1 if probably a pattern |
7471
|
|
|
|
|
|
|
# msg = a warning or diagnostic message |
7472
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
7473
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; |
7474
|
0
|
|
|
|
|
0
|
my $is_pattern = 0; |
7475
|
0
|
|
|
|
|
0
|
my $msg = "guessing that / after $last_nonblank_token starts a "; |
7476
|
|
|
|
|
|
|
|
7477
|
0
|
0
|
|
|
|
0
|
if ( $i >= $max_token_index ) { |
7478
|
0
|
|
|
|
|
0
|
$msg .= "division (no end to pattern found on the line)\n"; |
7479
|
|
|
|
|
|
|
} |
7480
|
|
|
|
|
|
|
else { |
7481
|
0
|
|
|
|
|
0
|
my $ibeg = $i; |
7482
|
0
|
|
|
|
|
0
|
my $divide_possible = |
7483
|
|
|
|
|
|
|
$self->is_possible_numerator( $i, $rtokens, $max_token_index ); |
7484
|
|
|
|
|
|
|
|
7485
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible < 0 ) { |
7486
|
0
|
|
|
|
|
0
|
$msg = "pattern (division not possible here)\n"; |
7487
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7488
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
7489
|
|
|
|
|
|
|
} |
7490
|
|
|
|
|
|
|
|
7491
|
0
|
|
|
|
|
0
|
$i = $ibeg + 1; |
7492
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[$i]; # first token after slash |
7493
|
|
|
|
|
|
|
|
7494
|
|
|
|
|
|
|
# One of the things we can look at is the spacing around the slash. |
7495
|
|
|
|
|
|
|
# There # are four possible spacings around the first slash: |
7496
|
|
|
|
|
|
|
# |
7497
|
|
|
|
|
|
|
# return pi/two;#/; -/- |
7498
|
|
|
|
|
|
|
# return pi/ two;#/; -/+ |
7499
|
|
|
|
|
|
|
# return pi / two;#/; +/+ |
7500
|
|
|
|
|
|
|
# return pi /two;#/; +/- <-- possible pattern |
7501
|
|
|
|
|
|
|
# |
7502
|
|
|
|
|
|
|
# Spacing rule: a space before the slash but not after the slash |
7503
|
|
|
|
|
|
|
# usually indicates a pattern. We can use this to break ties. |
7504
|
|
|
|
|
|
|
|
7505
|
0
|
|
0
|
|
|
0
|
my $is_pattern_by_spacing = |
7506
|
|
|
|
|
|
|
( $i > 1 && $next_token !~ m/^\s/ && $rtokens->[ $i - 2 ] =~ m/^\s/ ); |
7507
|
|
|
|
|
|
|
|
7508
|
|
|
|
|
|
|
# look for a possible ending / on this line.. |
7509
|
0
|
|
|
|
|
0
|
my $in_quote = 1; |
7510
|
0
|
|
|
|
|
0
|
my $quote_depth = 0; |
7511
|
0
|
|
|
|
|
0
|
my $quote_character = EMPTY_STRING; |
7512
|
0
|
|
|
|
|
0
|
my $quote_pos = 0; |
7513
|
0
|
|
|
|
|
0
|
my $quoted_string; |
7514
|
|
|
|
|
|
|
( |
7515
|
0
|
|
|
|
|
0
|
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, |
7516
|
|
|
|
|
|
|
$quoted_string |
7517
|
|
|
|
|
|
|
) |
7518
|
|
|
|
|
|
|
= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens, |
7519
|
|
|
|
|
|
|
$quote_character, $quote_pos, $quote_depth, $max_token_index ); |
7520
|
|
|
|
|
|
|
|
7521
|
0
|
0
|
|
|
|
0
|
if ($in_quote) { |
7522
|
|
|
|
|
|
|
|
7523
|
|
|
|
|
|
|
# we didn't find an ending / on this line, so we bias towards |
7524
|
|
|
|
|
|
|
# division |
7525
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible >= 0 ) { |
7526
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7527
|
0
|
|
|
|
|
0
|
$msg .= "division (no ending / on this line)\n"; |
7528
|
|
|
|
|
|
|
} |
7529
|
|
|
|
|
|
|
else { |
7530
|
|
|
|
|
|
|
|
7531
|
|
|
|
|
|
|
# assuming a multi-line pattern ... this is risky, but division |
7532
|
|
|
|
|
|
|
# does not seem possible. If this fails, it would either be due |
7533
|
|
|
|
|
|
|
# to a syntax error in the code, or the division_expected logic |
7534
|
|
|
|
|
|
|
# needs to be fixed. |
7535
|
0
|
|
|
|
|
0
|
$msg = "multi-line pattern (division not possible)\n"; |
7536
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7537
|
|
|
|
|
|
|
} |
7538
|
|
|
|
|
|
|
} |
7539
|
|
|
|
|
|
|
|
7540
|
|
|
|
|
|
|
# we found an ending /, so we bias slightly towards a pattern |
7541
|
|
|
|
|
|
|
else { |
7542
|
|
|
|
|
|
|
|
7543
|
0
|
|
|
|
|
0
|
my $pattern_expected = |
7544
|
|
|
|
|
|
|
$self->pattern_expected( $i, $rtokens, $max_token_index ); |
7545
|
|
|
|
|
|
|
|
7546
|
0
|
0
|
|
|
|
0
|
if ( $pattern_expected >= 0 ) { |
7547
|
|
|
|
|
|
|
|
7548
|
|
|
|
|
|
|
# pattern looks possible... |
7549
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible >= 0 ) { |
7550
|
|
|
|
|
|
|
|
7551
|
|
|
|
|
|
|
# Both pattern and divide can work here... |
7552
|
|
|
|
|
|
|
|
7553
|
|
|
|
|
|
|
# Increase weight of divide if a pure number follows |
7554
|
0
|
|
|
|
|
0
|
$divide_possible += $next_token =~ /^\d+$/; |
7555
|
|
|
|
|
|
|
|
7556
|
|
|
|
|
|
|
# Check for known constants in the numerator, like 'pi' |
7557
|
0
|
0
|
|
|
|
0
|
if ( $is_known_constant{$last_nonblank_token} ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7558
|
0
|
|
|
|
|
0
|
$msg .= |
7559
|
|
|
|
|
|
|
"division (pattern works too but saw known constant '$last_nonblank_token')\n"; |
7560
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7561
|
|
|
|
|
|
|
} |
7562
|
|
|
|
|
|
|
|
7563
|
|
|
|
|
|
|
# A very common bare word in pattern expressions is 'ok' |
7564
|
|
|
|
|
|
|
elsif ( $is_known_function{$last_nonblank_token} ) { |
7565
|
0
|
|
|
|
|
0
|
$msg .= |
7566
|
|
|
|
|
|
|
"pattern (division works too but saw '$last_nonblank_token')\n"; |
7567
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7568
|
|
|
|
|
|
|
} |
7569
|
|
|
|
|
|
|
|
7570
|
|
|
|
|
|
|
# If one rule is more definite, use it |
7571
|
|
|
|
|
|
|
elsif ( $divide_possible > $pattern_expected ) { |
7572
|
0
|
|
|
|
|
0
|
$msg .= |
7573
|
|
|
|
|
|
|
"division (more likely based on following tokens)\n"; |
7574
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7575
|
|
|
|
|
|
|
} |
7576
|
|
|
|
|
|
|
|
7577
|
|
|
|
|
|
|
# otherwise, use the spacing rule |
7578
|
|
|
|
|
|
|
elsif ($is_pattern_by_spacing) { |
7579
|
0
|
|
|
|
|
0
|
$msg .= |
7580
|
|
|
|
|
|
|
"pattern (guess on spacing, but division possible too)\n"; |
7581
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7582
|
|
|
|
|
|
|
} |
7583
|
|
|
|
|
|
|
else { |
7584
|
0
|
|
|
|
|
0
|
$msg .= |
7585
|
|
|
|
|
|
|
"division (guess on spacing, but pattern is possible too)\n"; |
7586
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7587
|
|
|
|
|
|
|
} |
7588
|
|
|
|
|
|
|
} |
7589
|
|
|
|
|
|
|
|
7590
|
|
|
|
|
|
|
# divide_possible < 0 means divide can not work here |
7591
|
|
|
|
|
|
|
else { |
7592
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7593
|
0
|
|
|
|
|
0
|
$msg .= "pattern (division not possible)\n"; |
7594
|
|
|
|
|
|
|
} |
7595
|
|
|
|
|
|
|
} |
7596
|
|
|
|
|
|
|
|
7597
|
|
|
|
|
|
|
# pattern does not look possible... |
7598
|
|
|
|
|
|
|
else { |
7599
|
|
|
|
|
|
|
|
7600
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible >= 0 ) { |
7601
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7602
|
0
|
|
|
|
|
0
|
$msg .= "division (pattern not possible)\n"; |
7603
|
|
|
|
|
|
|
} |
7604
|
|
|
|
|
|
|
|
7605
|
|
|
|
|
|
|
# Neither pattern nor divide look possible...go by spacing |
7606
|
|
|
|
|
|
|
else { |
7607
|
0
|
0
|
|
|
|
0
|
if ($is_pattern_by_spacing) { |
7608
|
0
|
|
|
|
|
0
|
$msg .= "pattern (guess on spacing)\n"; |
7609
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7610
|
|
|
|
|
|
|
} |
7611
|
|
|
|
|
|
|
else { |
7612
|
0
|
|
|
|
|
0
|
$msg .= "division (guess on spacing)\n"; |
7613
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7614
|
|
|
|
|
|
|
} |
7615
|
|
|
|
|
|
|
} |
7616
|
|
|
|
|
|
|
} |
7617
|
|
|
|
|
|
|
} |
7618
|
|
|
|
|
|
|
} |
7619
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
7620
|
|
|
|
|
|
|
} ## end sub guess_if_pattern_or_division |
7621
|
|
|
|
|
|
|
|
7622
|
|
|
|
|
|
|
# try to resolve here-doc vs. shift by looking ahead for |
7623
|
|
|
|
|
|
|
# non-code or the end token (currently only looks for end token) |
7624
|
|
|
|
|
|
|
# returns 1 if it is probably a here doc, 0 if not |
7625
|
|
|
|
|
|
|
sub guess_if_here_doc { |
7626
|
|
|
|
|
|
|
|
7627
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $next_token ) = @_; |
7628
|
|
|
|
|
|
|
|
7629
|
|
|
|
|
|
|
# This is how many lines we will search for a target as part of the |
7630
|
|
|
|
|
|
|
# guessing strategy. It is a constant because there is probably |
7631
|
|
|
|
|
|
|
# little reason to change it. |
7632
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package $ris_constant, |
7633
|
0
|
|
|
|
|
0
|
my $HERE_DOC_WINDOW = 40; |
7634
|
|
|
|
|
|
|
|
7635
|
0
|
|
|
|
|
0
|
my $here_doc_expected = 0; |
7636
|
0
|
|
|
|
|
0
|
my $line; |
7637
|
0
|
|
|
|
|
0
|
my $k = 0; |
7638
|
0
|
|
|
|
|
0
|
my $msg = "checking <<"; |
7639
|
|
|
|
|
|
|
|
7640
|
0
|
|
|
|
|
0
|
while ( $line = $self->[_line_buffer_object_]->peek_ahead( $k++ ) ) { |
7641
|
0
|
|
|
|
|
0
|
chomp $line; |
7642
|
|
|
|
|
|
|
|
7643
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /^$next_token$/ ) { |
7644
|
0
|
|
|
|
|
0
|
$msg .= " -- found target $next_token ahead $k lines\n"; |
7645
|
0
|
|
|
|
|
0
|
$here_doc_expected = 1; # got it |
7646
|
0
|
|
|
|
|
0
|
last; |
7647
|
|
|
|
|
|
|
} |
7648
|
0
|
0
|
|
|
|
0
|
last if ( $k >= $HERE_DOC_WINDOW ); |
7649
|
|
|
|
|
|
|
} |
7650
|
|
|
|
|
|
|
|
7651
|
0
|
0
|
|
|
|
0
|
unless ($here_doc_expected) { |
7652
|
|
|
|
|
|
|
|
7653
|
0
|
0
|
|
|
|
0
|
if ( !defined($line) ) { |
7654
|
0
|
|
|
|
|
0
|
$here_doc_expected = -1; # hit eof without seeing target |
7655
|
0
|
|
|
|
|
0
|
$msg .= " -- must be shift; target $next_token not in file\n"; |
7656
|
|
|
|
|
|
|
|
7657
|
|
|
|
|
|
|
} |
7658
|
|
|
|
|
|
|
else { # still unsure..taking a wild guess |
7659
|
|
|
|
|
|
|
|
7660
|
0
|
0
|
|
|
|
0
|
if ( !$ris_constant->{$current_package}{$next_token} ) { |
7661
|
0
|
|
|
|
|
0
|
$here_doc_expected = 1; |
7662
|
0
|
|
|
|
|
0
|
$msg .= |
7663
|
|
|
|
|
|
|
" -- guessing it's a here-doc ($next_token not a constant)\n"; |
7664
|
|
|
|
|
|
|
} |
7665
|
|
|
|
|
|
|
else { |
7666
|
0
|
|
|
|
|
0
|
$msg .= |
7667
|
|
|
|
|
|
|
" -- guessing it's a shift ($next_token is a constant)\n"; |
7668
|
|
|
|
|
|
|
} |
7669
|
|
|
|
|
|
|
} |
7670
|
|
|
|
|
|
|
} |
7671
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry($msg); |
7672
|
0
|
|
|
|
|
0
|
return $here_doc_expected; |
7673
|
|
|
|
|
|
|
} ## end sub guess_if_here_doc |
7674
|
|
|
|
|
|
|
|
7675
|
|
|
|
|
|
|
####################################################################### |
7676
|
|
|
|
|
|
|
# Tokenizer Routines for scanning identifiers and related items |
7677
|
|
|
|
|
|
|
####################################################################### |
7678
|
|
|
|
|
|
|
|
7679
|
|
|
|
|
|
|
sub scan_bare_identifier_do { |
7680
|
|
|
|
|
|
|
|
7681
|
|
|
|
|
|
|
# this routine is called to scan a token starting with an alphanumeric |
7682
|
|
|
|
|
|
|
# variable or package separator, :: or '. |
7683
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, |
7684
|
|
|
|
|
|
|
# $last_nonblank_type, $rparen_type, $paren_depth |
7685
|
|
|
|
|
|
|
|
7686
|
1672
|
|
|
1672
|
0
|
6003
|
my ( $self, $input_line, $i, $tok, $type, $prototype, $rtoken_map, |
7687
|
|
|
|
|
|
|
$max_token_index ) |
7688
|
|
|
|
|
|
|
= @_; |
7689
|
1672
|
|
|
|
|
2654
|
my $i_begin = $i; |
7690
|
1672
|
|
|
|
|
2806
|
my $package = undef; |
7691
|
|
|
|
|
|
|
|
7692
|
1672
|
|
|
|
|
2487
|
my $i_beg = $i; |
7693
|
|
|
|
|
|
|
|
7694
|
|
|
|
|
|
|
# we have to back up one pretoken at a :: since each : is one pretoken |
7695
|
1672
|
100
|
|
|
|
4142
|
if ( $tok eq '::' ) { $i_beg-- } |
|
9
|
|
|
|
|
19
|
|
7696
|
1672
|
50
|
|
|
|
3817
|
if ( $tok eq '->' ) { $i_beg-- } |
|
0
|
|
|
|
|
0
|
|
7697
|
1672
|
|
|
|
|
2995
|
my $pos_beg = $rtoken_map->[$i_beg]; |
7698
|
1672
|
|
|
|
|
4921
|
pos($input_line) = $pos_beg; |
7699
|
|
|
|
|
|
|
|
7700
|
|
|
|
|
|
|
# Examples: |
7701
|
|
|
|
|
|
|
# A::B::C |
7702
|
|
|
|
|
|
|
# A:: |
7703
|
|
|
|
|
|
|
# ::A |
7704
|
|
|
|
|
|
|
# A'B |
7705
|
1672
|
50
|
|
|
|
12484
|
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { |
7706
|
|
|
|
|
|
|
|
7707
|
1672
|
|
|
|
|
3345
|
my $pos = pos($input_line); |
7708
|
1672
|
|
|
|
|
2842
|
my $numc = $pos - $pos_beg; |
7709
|
1672
|
|
|
|
|
3597
|
$tok = substr( $input_line, $pos_beg, $numc ); |
7710
|
|
|
|
|
|
|
|
7711
|
|
|
|
|
|
|
# type 'w' includes anything without leading type info |
7712
|
|
|
|
|
|
|
# ($,%,@,*) including something like abc::def::ghi |
7713
|
1672
|
|
|
|
|
2837
|
$type = 'w'; |
7714
|
|
|
|
|
|
|
|
7715
|
1672
|
|
|
|
|
2784
|
my $sub_name = EMPTY_STRING; |
7716
|
1672
|
100
|
|
|
|
4756
|
if ( defined($2) ) { $sub_name = $2; } |
|
1667
|
|
|
|
|
3264
|
|
7717
|
1672
|
100
|
|
|
|
4212
|
if ( defined($1) ) { |
7718
|
274
|
|
|
|
|
639
|
$package = $1; |
7719
|
|
|
|
|
|
|
|
7720
|
|
|
|
|
|
|
# patch: don't allow isolated package name which just ends |
7721
|
|
|
|
|
|
|
# in the old style package separator (single quote). Example: |
7722
|
|
|
|
|
|
|
# use CGI':all'; |
7723
|
274
|
50
|
66
|
|
|
1097
|
if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { |
7724
|
0
|
|
|
|
|
0
|
$pos--; |
7725
|
|
|
|
|
|
|
} |
7726
|
|
|
|
|
|
|
|
7727
|
274
|
|
|
|
|
783
|
$package =~ s/\'/::/g; |
7728
|
274
|
100
|
|
|
|
828
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
9
|
|
|
|
|
23
|
|
7729
|
274
|
|
|
|
|
1224
|
$package =~ s/::$//; |
7730
|
|
|
|
|
|
|
} |
7731
|
|
|
|
|
|
|
else { |
7732
|
1398
|
|
|
|
|
2695
|
$package = $current_package; |
7733
|
|
|
|
|
|
|
|
7734
|
|
|
|
|
|
|
# patched for c043, part 1: keyword does not follow '->' |
7735
|
1398
|
50
|
66
|
|
|
5374
|
if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) { |
7736
|
0
|
|
|
|
|
0
|
$type = 'k'; |
7737
|
|
|
|
|
|
|
} |
7738
|
|
|
|
|
|
|
} |
7739
|
|
|
|
|
|
|
|
7740
|
|
|
|
|
|
|
# if it is a bareword.. patched for c043, part 2: not following '->' |
7741
|
1672
|
100
|
66
|
|
|
8080
|
if ( $type eq 'w' && $last_nonblank_type ne '->' ) { |
7742
|
|
|
|
|
|
|
|
7743
|
|
|
|
|
|
|
# check for v-string with leading 'v' type character |
7744
|
|
|
|
|
|
|
# (This seems to have precedence over filehandle, type 'Y') |
7745
|
1003
|
100
|
66
|
|
|
14264
|
if ( $tok =~ /^v\d[_\d]*$/ ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
7746
|
|
|
|
|
|
|
|
7747
|
|
|
|
|
|
|
# we only have the first part - something like 'v101' - |
7748
|
|
|
|
|
|
|
# look for more |
7749
|
2
|
50
|
|
|
|
16
|
if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { |
7750
|
2
|
|
|
|
|
8
|
$pos = pos($input_line); |
7751
|
2
|
|
|
|
|
4
|
$numc = $pos - $pos_beg; |
7752
|
2
|
|
|
|
|
5
|
$tok = substr( $input_line, $pos_beg, $numc ); |
7753
|
|
|
|
|
|
|
} |
7754
|
2
|
|
|
|
|
8
|
$type = 'v'; |
7755
|
|
|
|
|
|
|
|
7756
|
|
|
|
|
|
|
# warn if this version can't handle v-strings |
7757
|
2
|
|
|
|
|
16
|
$self->report_v_string($tok); |
7758
|
|
|
|
|
|
|
} |
7759
|
|
|
|
|
|
|
|
7760
|
|
|
|
|
|
|
elsif ( $ris_constant->{$package}{$sub_name} ) { |
7761
|
12
|
|
|
|
|
39
|
$type = 'C'; |
7762
|
|
|
|
|
|
|
} |
7763
|
|
|
|
|
|
|
|
7764
|
|
|
|
|
|
|
# bareword after sort has implied empty prototype; for example: |
7765
|
|
|
|
|
|
|
# @sorted = sort numerically ( 53, 29, 11, 32, 7 ); |
7766
|
|
|
|
|
|
|
# This has priority over whatever the user has specified. |
7767
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq 'sort' |
7768
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'k' ) |
7769
|
|
|
|
|
|
|
{ |
7770
|
1
|
|
|
|
|
3
|
$type = 'Z'; |
7771
|
|
|
|
|
|
|
} |
7772
|
|
|
|
|
|
|
|
7773
|
|
|
|
|
|
|
# Note: strangely, perl does not seem to really let you create |
7774
|
|
|
|
|
|
|
# functions which act like eval and do, in the sense that eval |
7775
|
|
|
|
|
|
|
# and do may have operators following the final }, but any operators |
7776
|
|
|
|
|
|
|
# that you create with prototype (&) apparently do not allow |
7777
|
|
|
|
|
|
|
# trailing operators, only terms. This seems strange. |
7778
|
|
|
|
|
|
|
# If this ever changes, here is the update |
7779
|
|
|
|
|
|
|
# to make perltidy behave accordingly: |
7780
|
|
|
|
|
|
|
|
7781
|
|
|
|
|
|
|
# elsif ( $ris_block_function->{$package}{$tok} ) { |
7782
|
|
|
|
|
|
|
# $tok='eval'; # patch to do braces like eval - doesn't work |
7783
|
|
|
|
|
|
|
# $type = 'k'; |
7784
|
|
|
|
|
|
|
#} |
7785
|
|
|
|
|
|
|
# TODO: This could become a separate type to allow for different |
7786
|
|
|
|
|
|
|
# future behavior: |
7787
|
|
|
|
|
|
|
elsif ( $ris_block_function->{$package}{$sub_name} ) { |
7788
|
0
|
|
|
|
|
0
|
$type = 'G'; |
7789
|
|
|
|
|
|
|
} |
7790
|
|
|
|
|
|
|
elsif ( $ris_block_list_function->{$package}{$sub_name} ) { |
7791
|
0
|
|
|
|
|
0
|
$type = 'G'; |
7792
|
|
|
|
|
|
|
} |
7793
|
|
|
|
|
|
|
elsif ( $ris_user_function->{$package}{$sub_name} ) { |
7794
|
6
|
|
|
|
|
14
|
$type = 'U'; |
7795
|
6
|
|
|
|
|
433
|
$prototype = $ruser_function_prototype->{$package}{$sub_name}; |
7796
|
|
|
|
|
|
|
} |
7797
|
|
|
|
|
|
|
|
7798
|
|
|
|
|
|
|
# check for indirect object |
7799
|
|
|
|
|
|
|
elsif ( |
7800
|
|
|
|
|
|
|
|
7801
|
|
|
|
|
|
|
# added 2001-03-27: must not be followed immediately by '(' |
7802
|
|
|
|
|
|
|
# see fhandle.t |
7803
|
|
|
|
|
|
|
( $input_line !~ m/\G\(/gc ) |
7804
|
|
|
|
|
|
|
|
7805
|
|
|
|
|
|
|
# and |
7806
|
|
|
|
|
|
|
&& ( |
7807
|
|
|
|
|
|
|
|
7808
|
|
|
|
|
|
|
# preceded by keyword like 'print', 'printf' and friends |
7809
|
|
|
|
|
|
|
$is_indirect_object_taker{$last_nonblank_token} |
7810
|
|
|
|
|
|
|
|
7811
|
|
|
|
|
|
|
# or preceded by something like 'print(' or 'printf(' |
7812
|
|
|
|
|
|
|
|| ( |
7813
|
|
|
|
|
|
|
( $last_nonblank_token eq '(' ) |
7814
|
|
|
|
|
|
|
&& $is_indirect_object_taker{ |
7815
|
|
|
|
|
|
|
$rparen_type->[$paren_depth] |
7816
|
|
|
|
|
|
|
} |
7817
|
|
|
|
|
|
|
|
7818
|
|
|
|
|
|
|
) |
7819
|
|
|
|
|
|
|
) |
7820
|
|
|
|
|
|
|
) |
7821
|
|
|
|
|
|
|
{ |
7822
|
|
|
|
|
|
|
|
7823
|
|
|
|
|
|
|
# may not be indirect object unless followed by a space; |
7824
|
|
|
|
|
|
|
# updated 2021-01-16 to consider newline to be a space. |
7825
|
|
|
|
|
|
|
# updated for case b990 to look for either ';' or space |
7826
|
4
|
50
|
33
|
|
|
63
|
if ( pos($input_line) == length($input_line) |
7827
|
|
|
|
|
|
|
|| $input_line =~ m/\G[;\s]/gc ) |
7828
|
|
|
|
|
|
|
{ |
7829
|
4
|
|
|
|
|
14
|
$type = 'Y'; |
7830
|
|
|
|
|
|
|
|
7831
|
|
|
|
|
|
|
# Abandon Hope ... |
7832
|
|
|
|
|
|
|
# Perl's indirect object notation is a very bad |
7833
|
|
|
|
|
|
|
# thing and can cause subtle bugs, especially for |
7834
|
|
|
|
|
|
|
# beginning programmers. And I haven't even been |
7835
|
|
|
|
|
|
|
# able to figure out a sane warning scheme which |
7836
|
|
|
|
|
|
|
# doesn't get in the way of good scripts. |
7837
|
|
|
|
|
|
|
|
7838
|
|
|
|
|
|
|
# Complain if a filehandle has any lower case |
7839
|
|
|
|
|
|
|
# letters. This is suggested good practice. |
7840
|
|
|
|
|
|
|
# Use 'sub_name' because something like |
7841
|
|
|
|
|
|
|
# main::MYHANDLE is ok for filehandle |
7842
|
4
|
100
|
|
|
|
23
|
if ( $sub_name =~ /[a-z]/ ) { |
7843
|
|
|
|
|
|
|
|
7844
|
|
|
|
|
|
|
# could be bug caused by older perltidy if |
7845
|
|
|
|
|
|
|
# followed by '(' |
7846
|
1
|
50
|
|
|
|
12
|
if ( $input_line =~ m/\G\s*\(/gc ) { |
7847
|
1
|
|
|
|
|
9
|
$self->complain( |
7848
|
|
|
|
|
|
|
"Caution: unknown word '$tok' in indirect object slot\n" |
7849
|
|
|
|
|
|
|
); |
7850
|
|
|
|
|
|
|
} |
7851
|
|
|
|
|
|
|
} |
7852
|
|
|
|
|
|
|
} |
7853
|
|
|
|
|
|
|
|
7854
|
|
|
|
|
|
|
# bareword not followed by a space -- may not be filehandle |
7855
|
|
|
|
|
|
|
# (may be function call defined in a 'use' statement) |
7856
|
|
|
|
|
|
|
else { |
7857
|
0
|
|
|
|
|
0
|
$type = 'Z'; |
7858
|
|
|
|
|
|
|
} |
7859
|
|
|
|
|
|
|
} |
7860
|
|
|
|
|
|
|
} |
7861
|
|
|
|
|
|
|
|
7862
|
|
|
|
|
|
|
# Now we must convert back from character position |
7863
|
|
|
|
|
|
|
# to pre_token index. |
7864
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but who knows |
7865
|
1672
|
|
|
|
|
2854
|
my $error; |
7866
|
1672
|
|
|
|
|
4811
|
( $i, $error ) = |
7867
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
7868
|
1672
|
50
|
|
|
|
4629
|
if ($error) { |
7869
|
0
|
|
|
|
|
0
|
$self->warning( |
7870
|
|
|
|
|
|
|
"scan_bare_identifier: Possibly invalid tokenization\n"); |
7871
|
|
|
|
|
|
|
} |
7872
|
|
|
|
|
|
|
} |
7873
|
|
|
|
|
|
|
|
7874
|
|
|
|
|
|
|
# no match but line not blank - could be syntax error |
7875
|
|
|
|
|
|
|
# perl will take '::' alone without complaint |
7876
|
|
|
|
|
|
|
else { |
7877
|
0
|
|
|
|
|
0
|
$type = 'w'; |
7878
|
|
|
|
|
|
|
|
7879
|
|
|
|
|
|
|
# change this warning to log message if it becomes annoying |
7880
|
0
|
|
|
|
|
0
|
$self->warning("didn't find identifier after leading ::\n"); |
7881
|
|
|
|
|
|
|
} |
7882
|
1672
|
|
|
|
|
7089
|
return ( $i, $tok, $type, $prototype ); |
7883
|
|
|
|
|
|
|
} ## end sub scan_bare_identifier_do |
7884
|
|
|
|
|
|
|
|
7885
|
|
|
|
|
|
|
sub scan_id_do { |
7886
|
|
|
|
|
|
|
|
7887
|
|
|
|
|
|
|
# This is the new scanner and will eventually replace scan_identifier. |
7888
|
|
|
|
|
|
|
# Only type 'sub' and 'package' are implemented. |
7889
|
|
|
|
|
|
|
# Token types $ * % @ & -> are not yet implemented. |
7890
|
|
|
|
|
|
|
# |
7891
|
|
|
|
|
|
|
# Scan identifier following a type token. |
7892
|
|
|
|
|
|
|
# The type of call depends on $id_scan_state: $id_scan_state = '' |
7893
|
|
|
|
|
|
|
# for starting call, in which case $tok must be the token defining |
7894
|
|
|
|
|
|
|
# the type. |
7895
|
|
|
|
|
|
|
# |
7896
|
|
|
|
|
|
|
# If the type token is the last nonblank token on the line, a value |
7897
|
|
|
|
|
|
|
# of $id_scan_state = $tok is returned, indicating that further |
7898
|
|
|
|
|
|
|
# calls must be made to get the identifier. If the type token is |
7899
|
|
|
|
|
|
|
# not the last nonblank token on the line, the identifier is |
7900
|
|
|
|
|
|
|
# scanned and handled and a value of '' is returned. |
7901
|
|
|
|
|
|
|
|
7902
|
330
|
|
|
330
|
0
|
1189
|
my ( $self, $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, |
7903
|
|
|
|
|
|
|
$max_token_index ) |
7904
|
|
|
|
|
|
|
= @_; |
7905
|
38
|
|
|
38
|
|
410
|
use constant DEBUG_NSCAN => 0; |
|
38
|
|
|
|
|
133
|
|
|
38
|
|
|
|
|
46761
|
|
7906
|
330
|
|
|
|
|
673
|
my $type = EMPTY_STRING; |
7907
|
330
|
|
|
|
|
669
|
my ( $i_beg, $pos_beg ); |
7908
|
|
|
|
|
|
|
|
7909
|
|
|
|
|
|
|
#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; |
7910
|
|
|
|
|
|
|
#my ($a,$b,$c) = caller; |
7911
|
|
|
|
|
|
|
#print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; |
7912
|
|
|
|
|
|
|
|
7913
|
|
|
|
|
|
|
# on re-entry, start scanning at first token on the line |
7914
|
330
|
100
|
|
|
|
825
|
if ($id_scan_state) { |
7915
|
10
|
|
|
|
|
39
|
$i_beg = $i; |
7916
|
10
|
|
|
|
|
40
|
$type = EMPTY_STRING; |
7917
|
|
|
|
|
|
|
} |
7918
|
|
|
|
|
|
|
|
7919
|
|
|
|
|
|
|
# on initial entry, start scanning just after type token |
7920
|
|
|
|
|
|
|
else { |
7921
|
320
|
|
|
|
|
594
|
$i_beg = $i + 1; |
7922
|
320
|
|
|
|
|
547
|
$id_scan_state = $tok; |
7923
|
320
|
|
|
|
|
693
|
$type = 't'; |
7924
|
|
|
|
|
|
|
} |
7925
|
|
|
|
|
|
|
|
7926
|
|
|
|
|
|
|
# find $i_beg = index of next nonblank token, |
7927
|
|
|
|
|
|
|
# and handle empty lines |
7928
|
330
|
|
|
|
|
573
|
my $blank_line = 0; |
7929
|
330
|
|
|
|
|
736
|
my $next_nonblank_token = $rtokens->[$i_beg]; |
7930
|
330
|
100
|
|
|
|
950
|
if ( $i_beg > $max_token_index ) { |
7931
|
2
|
|
|
|
|
5
|
$blank_line = 1; |
7932
|
|
|
|
|
|
|
} |
7933
|
|
|
|
|
|
|
else { |
7934
|
|
|
|
|
|
|
|
7935
|
|
|
|
|
|
|
# only a '#' immediately after a '$' is not a comment |
7936
|
328
|
50
|
|
|
|
1004
|
if ( $next_nonblank_token eq '#' ) { |
7937
|
0
|
0
|
|
|
|
0
|
unless ( $tok eq '$' ) { |
7938
|
0
|
|
|
|
|
0
|
$blank_line = 1; |
7939
|
|
|
|
|
|
|
} |
7940
|
|
|
|
|
|
|
} |
7941
|
|
|
|
|
|
|
|
7942
|
328
|
100
|
|
|
|
1545
|
if ( $next_nonblank_token =~ /^\s/ ) { |
7943
|
308
|
|
|
|
|
1152
|
( $next_nonblank_token, $i_beg ) = |
7944
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i_beg, $rtokens, |
7945
|
|
|
|
|
|
|
$max_token_index ); |
7946
|
308
|
100
|
|
|
|
1714
|
if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { |
7947
|
4
|
|
|
|
|
16
|
$blank_line = 1; |
7948
|
|
|
|
|
|
|
} |
7949
|
|
|
|
|
|
|
} |
7950
|
|
|
|
|
|
|
} |
7951
|
|
|
|
|
|
|
|
7952
|
|
|
|
|
|
|
# handle non-blank line; identifier, if any, must follow |
7953
|
330
|
100
|
|
|
|
1006
|
unless ($blank_line) { |
7954
|
|
|
|
|
|
|
|
7955
|
324
|
100
|
|
|
|
906
|
if ( $is_sub{$id_scan_state} ) { |
|
|
50
|
|
|
|
|
|
7956
|
298
|
|
|
|
|
3324
|
( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub( |
7957
|
|
|
|
|
|
|
{ |
7958
|
|
|
|
|
|
|
input_line => $input_line, |
7959
|
|
|
|
|
|
|
i => $i, |
7960
|
|
|
|
|
|
|
i_beg => $i_beg, |
7961
|
|
|
|
|
|
|
tok => $tok, |
7962
|
|
|
|
|
|
|
type => $type, |
7963
|
|
|
|
|
|
|
rtokens => $rtokens, |
7964
|
|
|
|
|
|
|
rtoken_map => $rtoken_map, |
7965
|
|
|
|
|
|
|
id_scan_state => $id_scan_state, |
7966
|
|
|
|
|
|
|
max_token_index => $max_token_index, |
7967
|
|
|
|
|
|
|
} |
7968
|
|
|
|
|
|
|
); |
7969
|
|
|
|
|
|
|
} |
7970
|
|
|
|
|
|
|
|
7971
|
|
|
|
|
|
|
elsif ( $is_package{$id_scan_state} ) { |
7972
|
26
|
|
|
|
|
104
|
( $i, $tok, $type ) = |
7973
|
|
|
|
|
|
|
$self->do_scan_package( $input_line, $i, $i_beg, $tok, $type, |
7974
|
|
|
|
|
|
|
$rtokens, $rtoken_map, $max_token_index ); |
7975
|
26
|
|
|
|
|
61
|
$id_scan_state = EMPTY_STRING; |
7976
|
|
|
|
|
|
|
} |
7977
|
|
|
|
|
|
|
|
7978
|
|
|
|
|
|
|
else { |
7979
|
0
|
|
|
|
|
0
|
$self->warning("invalid token in scan_id: $tok\n"); |
7980
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
7981
|
|
|
|
|
|
|
} |
7982
|
|
|
|
|
|
|
} |
7983
|
|
|
|
|
|
|
|
7984
|
330
|
50
|
33
|
|
|
1964
|
if ( $id_scan_state && ( !defined($type) || !$type ) ) { |
|
|
|
66
|
|
|
|
|
7985
|
|
|
|
|
|
|
|
7986
|
|
|
|
|
|
|
# shouldn't happen: |
7987
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
7988
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
7989
|
|
|
|
|
|
|
Program bug in scan_id: undefined type but scan_state=$id_scan_state |
7990
|
|
|
|
|
|
|
EOM |
7991
|
|
|
|
|
|
|
} |
7992
|
|
|
|
|
|
|
$self->warning( |
7993
|
0
|
|
|
|
|
0
|
"Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n" |
7994
|
|
|
|
|
|
|
); |
7995
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
7996
|
|
|
|
|
|
|
} |
7997
|
|
|
|
|
|
|
|
7998
|
330
|
|
|
|
|
545
|
DEBUG_NSCAN && do { |
7999
|
|
|
|
|
|
|
print STDOUT |
8000
|
|
|
|
|
|
|
"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; |
8001
|
|
|
|
|
|
|
}; |
8002
|
330
|
|
|
|
|
1414
|
return ( $i, $tok, $type, $id_scan_state ); |
8003
|
|
|
|
|
|
|
} ## end sub scan_id_do |
8004
|
|
|
|
|
|
|
|
8005
|
|
|
|
|
|
|
sub check_prototype { |
8006
|
136
|
|
|
136
|
0
|
434
|
my ( $proto, $package, $subname ) = @_; |
8007
|
136
|
50
|
33
|
|
|
794
|
return unless ( defined($package) && defined($subname) ); |
8008
|
136
|
100
|
|
|
|
423
|
if ( defined($proto) ) { |
8009
|
34
|
|
|
|
|
202
|
$proto =~ s/^\s*\(\s*//; |
8010
|
34
|
|
|
|
|
157
|
$proto =~ s/\s*\)$//; |
8011
|
34
|
100
|
|
|
|
85
|
if ($proto) { |
8012
|
5
|
|
|
|
|
24
|
$ris_user_function->{$package}{$subname} = 1; |
8013
|
5
|
|
|
|
|
26
|
$ruser_function_prototype->{$package}{$subname} = "($proto)"; |
8014
|
|
|
|
|
|
|
|
8015
|
|
|
|
|
|
|
# prototypes containing '&' must be treated specially.. |
8016
|
5
|
100
|
|
|
|
23
|
if ( $proto =~ /\&/ ) { |
8017
|
|
|
|
|
|
|
|
8018
|
|
|
|
|
|
|
# right curly braces of prototypes ending in |
8019
|
|
|
|
|
|
|
# '&' may be followed by an operator |
8020
|
1
|
50
|
|
|
|
10
|
if ( $proto =~ /\&$/ ) { |
|
|
50
|
|
|
|
|
|
8021
|
0
|
|
|
|
|
0
|
$ris_block_function->{$package}{$subname} = 1; |
8022
|
|
|
|
|
|
|
} |
8023
|
|
|
|
|
|
|
|
8024
|
|
|
|
|
|
|
# right curly braces of prototypes NOT ending in |
8025
|
|
|
|
|
|
|
# '&' may NOT be followed by an operator |
8026
|
|
|
|
|
|
|
elsif ( $proto !~ /\&$/ ) { |
8027
|
1
|
|
|
|
|
4
|
$ris_block_list_function->{$package}{$subname} = 1; |
8028
|
|
|
|
|
|
|
} |
8029
|
|
|
|
|
|
|
} |
8030
|
|
|
|
|
|
|
} |
8031
|
|
|
|
|
|
|
else { |
8032
|
29
|
|
|
|
|
113
|
$ris_constant->{$package}{$subname} = 1; |
8033
|
|
|
|
|
|
|
} |
8034
|
|
|
|
|
|
|
} |
8035
|
|
|
|
|
|
|
else { |
8036
|
102
|
|
|
|
|
325
|
$ris_user_function->{$package}{$subname} = 1; |
8037
|
|
|
|
|
|
|
} |
8038
|
136
|
|
|
|
|
304
|
return; |
8039
|
|
|
|
|
|
|
} ## end sub check_prototype |
8040
|
|
|
|
|
|
|
|
8041
|
|
|
|
|
|
|
sub do_scan_package { |
8042
|
|
|
|
|
|
|
|
8043
|
|
|
|
|
|
|
# do_scan_package parses a package name |
8044
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of the first nonblank |
8045
|
|
|
|
|
|
|
# token following a 'package' token. |
8046
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package, |
8047
|
|
|
|
|
|
|
|
8048
|
|
|
|
|
|
|
# package NAMESPACE |
8049
|
|
|
|
|
|
|
# package NAMESPACE VERSION |
8050
|
|
|
|
|
|
|
# package NAMESPACE BLOCK |
8051
|
|
|
|
|
|
|
# package NAMESPACE VERSION BLOCK |
8052
|
|
|
|
|
|
|
# |
8053
|
|
|
|
|
|
|
# If VERSION is provided, package sets the $VERSION variable in the given |
8054
|
|
|
|
|
|
|
# namespace to a version object with the VERSION provided. VERSION must be |
8055
|
|
|
|
|
|
|
# a "strict" style version number as defined by the version module: a |
8056
|
|
|
|
|
|
|
# positive decimal number (integer or decimal-fraction) without |
8057
|
|
|
|
|
|
|
# exponentiation or else a dotted-decimal v-string with a leading 'v' |
8058
|
|
|
|
|
|
|
# character and at least three components. |
8059
|
|
|
|
|
|
|
# reference http://perldoc.perl.org/functions/package.html |
8060
|
|
|
|
|
|
|
|
8061
|
|
|
|
|
|
|
my ( |
8062
|
26
|
|
|
26
|
0
|
90
|
$self, $input_line, $i, |
8063
|
|
|
|
|
|
|
$i_beg, $tok, $type, |
8064
|
|
|
|
|
|
|
$rtokens, $rtoken_map, $max_token_index |
8065
|
|
|
|
|
|
|
) = @_; |
8066
|
26
|
|
|
|
|
52
|
my $package = undef; |
8067
|
26
|
|
|
|
|
52
|
my $pos_beg = $rtoken_map->[$i_beg]; |
8068
|
26
|
|
|
|
|
87
|
pos($input_line) = $pos_beg; |
8069
|
|
|
|
|
|
|
|
8070
|
|
|
|
|
|
|
# handle non-blank line; package name, if any, must follow |
8071
|
26
|
50
|
|
|
|
221
|
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w*)/gc ) { |
8072
|
26
|
|
|
|
|
69
|
$package = $1; |
8073
|
26
|
50
|
33
|
|
|
157
|
$package = ( defined($1) && $1 ) ? $1 : 'main'; |
8074
|
26
|
|
|
|
|
75
|
$package =~ s/\'/::/g; |
8075
|
26
|
50
|
|
|
|
82
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
0
|
|
|
|
|
0
|
|
8076
|
26
|
|
|
|
|
58
|
$package =~ s/::$//; |
8077
|
26
|
|
|
|
|
56
|
my $pos = pos($input_line); |
8078
|
26
|
|
|
|
|
57
|
my $numc = $pos - $pos_beg; |
8079
|
26
|
|
|
|
|
82
|
$tok = 'package ' . substr( $input_line, $pos_beg, $numc ); |
8080
|
26
|
|
|
|
|
55
|
$type = 'i'; |
8081
|
|
|
|
|
|
|
|
8082
|
|
|
|
|
|
|
# Now we must convert back from character position |
8083
|
|
|
|
|
|
|
# to pre_token index. |
8084
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but ? |
8085
|
26
|
|
|
|
|
44
|
my $error; |
8086
|
26
|
|
|
|
|
97
|
( $i, $error ) = |
8087
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
8088
|
26
|
50
|
|
|
|
87
|
if ($error) { $self->warning("Possibly invalid package\n") } |
|
0
|
|
|
|
|
0
|
|
8089
|
26
|
|
|
|
|
70
|
$current_package = $package; |
8090
|
|
|
|
|
|
|
|
8091
|
|
|
|
|
|
|
# we should now have package NAMESPACE |
8092
|
|
|
|
|
|
|
# now expecting VERSION, BLOCK, or ; to follow ... |
8093
|
|
|
|
|
|
|
# package NAMESPACE VERSION |
8094
|
|
|
|
|
|
|
# package NAMESPACE BLOCK |
8095
|
|
|
|
|
|
|
# package NAMESPACE VERSION BLOCK |
8096
|
26
|
|
|
|
|
81
|
my ( $next_nonblank_token, $i_next ) = |
8097
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
8098
|
|
|
|
|
|
|
|
8099
|
|
|
|
|
|
|
# check that something recognizable follows, but do not parse. |
8100
|
|
|
|
|
|
|
# A VERSION number will be parsed later as a number or v-string in the |
8101
|
|
|
|
|
|
|
# normal way. What is important is to set the statement type if |
8102
|
|
|
|
|
|
|
# everything looks okay so that the operator_expected() routine |
8103
|
|
|
|
|
|
|
# knows that the number is in a package statement. |
8104
|
|
|
|
|
|
|
# Examples of valid primitive tokens that might follow are: |
8105
|
|
|
|
|
|
|
# 1235 . ; { } v3 v |
8106
|
|
|
|
|
|
|
# FIX: added a '#' since a side comment may also follow |
8107
|
|
|
|
|
|
|
# Added ':' for class attributes (for --use-feature=class, rt145706) |
8108
|
26
|
50
|
|
|
|
120
|
if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#\:])|v\d|\d+$/ ) { |
8109
|
26
|
|
|
|
|
72
|
$statement_type = $tok; |
8110
|
|
|
|
|
|
|
} |
8111
|
|
|
|
|
|
|
else { |
8112
|
0
|
|
|
|
|
0
|
$self->warning( |
8113
|
|
|
|
|
|
|
"Unexpected '$next_nonblank_token' after package name '$tok'\n" |
8114
|
|
|
|
|
|
|
); |
8115
|
|
|
|
|
|
|
} |
8116
|
|
|
|
|
|
|
} |
8117
|
|
|
|
|
|
|
|
8118
|
|
|
|
|
|
|
# no match but line not blank -- |
8119
|
|
|
|
|
|
|
# could be a label with name package, like package: , for example. |
8120
|
|
|
|
|
|
|
else { |
8121
|
0
|
|
|
|
|
0
|
$type = 'k'; |
8122
|
|
|
|
|
|
|
} |
8123
|
|
|
|
|
|
|
|
8124
|
26
|
|
|
|
|
103
|
return ( $i, $tok, $type ); |
8125
|
|
|
|
|
|
|
} ## end sub do_scan_package |
8126
|
|
|
|
|
|
|
|
8127
|
|
|
|
|
|
|
{ ## begin closure for sub scan_complex_identifier |
8128
|
|
|
|
|
|
|
|
8129
|
38
|
|
|
38
|
|
371
|
use constant DEBUG_SCAN_ID => 0; |
|
38
|
|
|
|
|
141
|
|
|
38
|
|
|
|
|
5293
|
|
8130
|
|
|
|
|
|
|
|
8131
|
|
|
|
|
|
|
# Constant hash: |
8132
|
|
|
|
|
|
|
my %is_special_variable_char; |
8133
|
|
|
|
|
|
|
|
8134
|
|
|
|
|
|
|
BEGIN { |
8135
|
|
|
|
|
|
|
|
8136
|
|
|
|
|
|
|
# These are the only characters which can (currently) form special |
8137
|
|
|
|
|
|
|
# variables, like $^W: (issue c066). |
8138
|
38
|
|
|
38
|
|
369
|
my @q = |
8139
|
|
|
|
|
|
|
qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ }; |
8140
|
38
|
|
|
|
|
137681
|
@{is_special_variable_char}{@q} = (1) x scalar(@q); |
8141
|
|
|
|
|
|
|
} ## end BEGIN |
8142
|
|
|
|
|
|
|
|
8143
|
|
|
|
|
|
|
# These are the possible states for this scanner: |
8144
|
|
|
|
|
|
|
my $scan_state_SIGIL = '$'; |
8145
|
|
|
|
|
|
|
my $scan_state_ALPHA = 'A'; |
8146
|
|
|
|
|
|
|
my $scan_state_COLON = ':'; |
8147
|
|
|
|
|
|
|
my $scan_state_LPAREN = '('; |
8148
|
|
|
|
|
|
|
my $scan_state_RPAREN = ')'; |
8149
|
|
|
|
|
|
|
my $scan_state_AMPERSAND = '&'; |
8150
|
|
|
|
|
|
|
my $scan_state_SPLIT = '^'; |
8151
|
|
|
|
|
|
|
|
8152
|
|
|
|
|
|
|
# Only these non-blank states may be returned to caller: |
8153
|
|
|
|
|
|
|
my %is_returnable_scan_state = ( |
8154
|
|
|
|
|
|
|
$scan_state_SIGIL => 1, |
8155
|
|
|
|
|
|
|
$scan_state_AMPERSAND => 1, |
8156
|
|
|
|
|
|
|
); |
8157
|
|
|
|
|
|
|
|
8158
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: |
8159
|
|
|
|
|
|
|
# $context, $last_nonblank_token, $last_nonblank_type |
8160
|
|
|
|
|
|
|
|
8161
|
|
|
|
|
|
|
#----------- |
8162
|
|
|
|
|
|
|
# call args: |
8163
|
|
|
|
|
|
|
#----------- |
8164
|
|
|
|
|
|
|
my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, |
8165
|
|
|
|
|
|
|
$expecting, $container_type ); |
8166
|
|
|
|
|
|
|
|
8167
|
|
|
|
|
|
|
#------------------------------------------- |
8168
|
|
|
|
|
|
|
# my variables, re-initialized on each call: |
8169
|
|
|
|
|
|
|
#------------------------------------------- |
8170
|
|
|
|
|
|
|
my $i_begin; # starting index $i |
8171
|
|
|
|
|
|
|
my $type; # returned identifier type |
8172
|
|
|
|
|
|
|
my $tok_begin; # starting token |
8173
|
|
|
|
|
|
|
my $tok; # returned token |
8174
|
|
|
|
|
|
|
my $id_scan_state_begin; # starting scan state |
8175
|
|
|
|
|
|
|
my $identifier_begin; # starting identifier |
8176
|
|
|
|
|
|
|
my $i_save; # a last good index, in case of error |
8177
|
|
|
|
|
|
|
my $message; # hold error message for log file |
8178
|
|
|
|
|
|
|
my $tok_is_blank; |
8179
|
|
|
|
|
|
|
my $last_tok_is_blank; |
8180
|
|
|
|
|
|
|
my $in_prototype_or_signature; |
8181
|
|
|
|
|
|
|
my $saw_alpha; |
8182
|
|
|
|
|
|
|
my $saw_type; |
8183
|
|
|
|
|
|
|
my $allow_tick; |
8184
|
|
|
|
|
|
|
|
8185
|
|
|
|
|
|
|
sub initialize_my_scan_id_vars { |
8186
|
|
|
|
|
|
|
|
8187
|
|
|
|
|
|
|
# Initialize all 'my' vars on entry |
8188
|
486
|
|
|
486
|
0
|
883
|
$i_begin = $i; |
8189
|
486
|
|
|
|
|
871
|
$type = EMPTY_STRING; |
8190
|
486
|
|
|
|
|
995
|
$tok_begin = $rtokens->[$i_begin]; |
8191
|
486
|
|
|
|
|
854
|
$tok = $tok_begin; |
8192
|
486
|
50
|
|
|
|
1369
|
if ( $tok_begin eq ':' ) { $tok_begin = '::' } |
|
0
|
|
|
|
|
0
|
|
8193
|
486
|
|
|
|
|
890
|
$id_scan_state_begin = $id_scan_state; |
8194
|
486
|
|
|
|
|
812
|
$identifier_begin = $identifier; |
8195
|
486
|
|
|
|
|
816
|
$i_save = undef; |
8196
|
|
|
|
|
|
|
|
8197
|
486
|
|
|
|
|
888
|
$message = EMPTY_STRING; |
8198
|
486
|
|
|
|
|
864
|
$tok_is_blank = undef; # a flag to speed things up |
8199
|
486
|
|
|
|
|
802
|
$last_tok_is_blank = undef; |
8200
|
|
|
|
|
|
|
|
8201
|
486
|
|
100
|
|
|
1702
|
$in_prototype_or_signature = |
8202
|
|
|
|
|
|
|
$container_type && $container_type =~ /^sub\b/; |
8203
|
|
|
|
|
|
|
|
8204
|
|
|
|
|
|
|
# these flags will be used to help figure out the type: |
8205
|
486
|
|
|
|
|
794
|
$saw_alpha = undef; |
8206
|
486
|
|
|
|
|
762
|
$saw_type = undef; |
8207
|
|
|
|
|
|
|
|
8208
|
|
|
|
|
|
|
# allow old package separator (') except in 'use' statement |
8209
|
486
|
|
|
|
|
904
|
$allow_tick = ( $last_nonblank_token ne 'use' ); |
8210
|
486
|
|
|
|
|
842
|
return; |
8211
|
|
|
|
|
|
|
} ## end sub initialize_my_scan_id_vars |
8212
|
|
|
|
|
|
|
|
8213
|
|
|
|
|
|
|
#---------------------------------- |
8214
|
|
|
|
|
|
|
# Routines for handling scan states |
8215
|
|
|
|
|
|
|
#---------------------------------- |
8216
|
|
|
|
|
|
|
sub do_id_scan_state_dollar { |
8217
|
|
|
|
|
|
|
|
8218
|
514
|
|
|
514
|
0
|
889
|
my $self = shift; |
8219
|
|
|
|
|
|
|
|
8220
|
|
|
|
|
|
|
# We saw a sigil, now looking to start a variable name |
8221
|
514
|
100
|
66
|
|
|
4149
|
if ( $tok eq '$' ) { |
|
|
100
|
33
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
8222
|
|
|
|
|
|
|
|
8223
|
31
|
|
|
|
|
144
|
$identifier .= $tok; |
8224
|
|
|
|
|
|
|
|
8225
|
|
|
|
|
|
|
# we've got a punctuation variable if end of line (punct.t) |
8226
|
31
|
50
|
|
|
|
161
|
if ( $i == $max_token_index ) { |
8227
|
0
|
|
|
|
|
0
|
$type = 'i'; |
8228
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8229
|
|
|
|
|
|
|
} |
8230
|
|
|
|
|
|
|
} |
8231
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { # alphanumeric .. |
8232
|
253
|
|
|
|
|
489
|
$saw_alpha = 1; |
8233
|
253
|
|
|
|
|
495
|
$identifier .= $tok; |
8234
|
|
|
|
|
|
|
|
8235
|
|
|
|
|
|
|
# now need :: except for special digit vars like '$1' (c208) |
8236
|
253
|
100
|
|
|
|
862
|
$id_scan_state = $tok =~ /^\d/ ? EMPTY_STRING : $scan_state_COLON; |
8237
|
|
|
|
|
|
|
} |
8238
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { |
8239
|
16
|
|
|
|
|
92
|
$id_scan_state = $scan_state_ALPHA; |
8240
|
16
|
|
|
|
|
61
|
$identifier .= $tok; |
8241
|
|
|
|
|
|
|
} |
8242
|
|
|
|
|
|
|
|
8243
|
|
|
|
|
|
|
# POSTDEFREF ->@ ->% ->& ->* |
8244
|
|
|
|
|
|
|
elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) { |
8245
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8246
|
|
|
|
|
|
|
} |
8247
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. |
8248
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8249
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
8250
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8251
|
|
|
|
|
|
|
|
8252
|
|
|
|
|
|
|
# Perl will accept leading digits in identifiers, |
8253
|
|
|
|
|
|
|
# although they may not always produce useful results. |
8254
|
|
|
|
|
|
|
# Something like $main::0 is ok. But this also works: |
8255
|
|
|
|
|
|
|
# |
8256
|
|
|
|
|
|
|
# sub howdy::123::bubba{ print "bubba $54321!\n" } |
8257
|
|
|
|
|
|
|
# howdy::123::bubba(); |
8258
|
|
|
|
|
|
|
# |
8259
|
|
|
|
|
|
|
} |
8260
|
|
|
|
|
|
|
elsif ( $tok eq '#' ) { |
8261
|
|
|
|
|
|
|
|
8262
|
99
|
|
|
|
|
256
|
my $is_punct_var = $identifier eq '$$'; |
8263
|
|
|
|
|
|
|
|
8264
|
|
|
|
|
|
|
# side comment or identifier? |
8265
|
99
|
100
|
66
|
|
|
2034
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
8266
|
|
|
|
|
|
|
|
8267
|
|
|
|
|
|
|
# A '#' starts a comment if it follows a space. For example, |
8268
|
|
|
|
|
|
|
# the following is equivalent to $ans=40. |
8269
|
|
|
|
|
|
|
# my $ # |
8270
|
|
|
|
|
|
|
# ans = 40; |
8271
|
|
|
|
|
|
|
!$last_tok_is_blank |
8272
|
|
|
|
|
|
|
|
8273
|
|
|
|
|
|
|
# a # inside a prototype or signature can only start a |
8274
|
|
|
|
|
|
|
# comment |
8275
|
|
|
|
|
|
|
&& !$in_prototype_or_signature |
8276
|
|
|
|
|
|
|
|
8277
|
|
|
|
|
|
|
# these are valid punctuation vars: *# %# @# $# |
8278
|
|
|
|
|
|
|
# May also be '$#array' or POSTDEFREF ->$# |
8279
|
|
|
|
|
|
|
&& ( $identifier =~ /^[\%\@\$\*]$/ |
8280
|
|
|
|
|
|
|
|| $identifier =~ /\$$/ ) |
8281
|
|
|
|
|
|
|
|
8282
|
|
|
|
|
|
|
# but a '#' after '$$' is a side comment; see c147 |
8283
|
|
|
|
|
|
|
&& !$is_punct_var |
8284
|
|
|
|
|
|
|
|
8285
|
|
|
|
|
|
|
) |
8286
|
|
|
|
|
|
|
{ |
8287
|
95
|
|
|
|
|
275
|
$identifier .= $tok; # keep same state, a $ could follow |
8288
|
|
|
|
|
|
|
} |
8289
|
|
|
|
|
|
|
else { |
8290
|
|
|
|
|
|
|
|
8291
|
|
|
|
|
|
|
# otherwise it is a side comment |
8292
|
4
|
50
|
|
|
|
21
|
if ( $identifier eq '->' ) { } |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
8293
|
0
|
|
|
|
|
0
|
elsif ($is_punct_var) { $type = 'i' } |
8294
|
4
|
|
|
|
|
6
|
elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' } |
8295
|
0
|
|
|
|
|
0
|
else { $type = 'i' } |
8296
|
4
|
|
|
|
|
7
|
$i = $i_save; |
8297
|
4
|
|
|
|
|
10
|
$id_scan_state = EMPTY_STRING; |
8298
|
|
|
|
|
|
|
} |
8299
|
|
|
|
|
|
|
} |
8300
|
|
|
|
|
|
|
|
8301
|
|
|
|
|
|
|
elsif ( $tok eq '{' ) { |
8302
|
|
|
|
|
|
|
|
8303
|
|
|
|
|
|
|
# check for something like ${#} or ${?}, where ? is a special char |
8304
|
38
|
100
|
100
|
|
|
539
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
8305
|
|
|
|
|
|
|
( |
8306
|
|
|
|
|
|
|
$identifier eq '$' |
8307
|
|
|
|
|
|
|
|| $identifier eq '@' |
8308
|
|
|
|
|
|
|
|| $identifier eq '$#' |
8309
|
|
|
|
|
|
|
) |
8310
|
|
|
|
|
|
|
&& $i + 2 <= $max_token_index |
8311
|
|
|
|
|
|
|
&& $rtokens->[ $i + 2 ] eq '}' |
8312
|
|
|
|
|
|
|
&& $rtokens->[ $i + 1 ] !~ /[\s\w]/ |
8313
|
|
|
|
|
|
|
) |
8314
|
|
|
|
|
|
|
{ |
8315
|
1
|
|
|
|
|
4
|
my $next2 = $rtokens->[ $i + 2 ]; |
8316
|
1
|
|
|
|
|
3
|
my $next1 = $rtokens->[ $i + 1 ]; |
8317
|
1
|
|
|
|
|
3
|
$identifier .= $tok . $next1 . $next2; |
8318
|
1
|
|
|
|
|
3
|
$i += 2; |
8319
|
1
|
|
|
|
|
2
|
$id_scan_state = EMPTY_STRING; |
8320
|
|
|
|
|
|
|
} |
8321
|
|
|
|
|
|
|
else { |
8322
|
|
|
|
|
|
|
|
8323
|
|
|
|
|
|
|
# skip something like ${xxx} or ->{ |
8324
|
37
|
|
|
|
|
92
|
$id_scan_state = EMPTY_STRING; |
8325
|
|
|
|
|
|
|
|
8326
|
|
|
|
|
|
|
# if this is the first token of a line, any tokens for this |
8327
|
|
|
|
|
|
|
# identifier have already been accumulated |
8328
|
37
|
100
|
66
|
|
|
180
|
if ( $identifier eq '$' || $i == 0 ) { |
8329
|
26
|
|
|
|
|
52
|
$identifier = EMPTY_STRING; |
8330
|
|
|
|
|
|
|
} |
8331
|
37
|
|
|
|
|
80
|
$i = $i_save; |
8332
|
|
|
|
|
|
|
} |
8333
|
|
|
|
|
|
|
} |
8334
|
|
|
|
|
|
|
|
8335
|
|
|
|
|
|
|
# space ok after leading $ % * & @ |
8336
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { |
8337
|
|
|
|
|
|
|
|
8338
|
20
|
|
|
|
|
65
|
$tok_is_blank = 1; |
8339
|
|
|
|
|
|
|
|
8340
|
|
|
|
|
|
|
# note: an id with a leading '&' does not actually come this way |
8341
|
20
|
50
|
|
|
|
106
|
if ( $identifier =~ /^[\$\%\*\&\@]/ ) { |
|
|
0
|
|
|
|
|
|
8342
|
|
|
|
|
|
|
|
8343
|
20
|
100
|
|
|
|
70
|
if ( length($identifier) > 1 ) { |
8344
|
8
|
|
|
|
|
17
|
$id_scan_state = EMPTY_STRING; |
8345
|
8
|
|
|
|
|
18
|
$i = $i_save; |
8346
|
8
|
|
|
|
|
18
|
$type = 'i'; # probably punctuation variable |
8347
|
|
|
|
|
|
|
} |
8348
|
|
|
|
|
|
|
else { |
8349
|
|
|
|
|
|
|
|
8350
|
|
|
|
|
|
|
# fix c139: trim line-ending type 't' |
8351
|
12
|
100
|
|
|
|
59
|
if ( $i == $max_token_index ) { |
|
|
100
|
|
|
|
|
|
8352
|
1
|
|
|
|
|
3
|
$i = $i_save; |
8353
|
1
|
|
|
|
|
3
|
$type = 't'; |
8354
|
|
|
|
|
|
|
} |
8355
|
|
|
|
|
|
|
|
8356
|
|
|
|
|
|
|
# spaces after $'s are common, and space after @ |
8357
|
|
|
|
|
|
|
# is harmless, so only complain about space |
8358
|
|
|
|
|
|
|
# after other type characters. Space after $ and |
8359
|
|
|
|
|
|
|
# @ will be removed in formatting. Report space |
8360
|
|
|
|
|
|
|
# after % and * because they might indicate a |
8361
|
|
|
|
|
|
|
# parsing error. In other words '% ' might be a |
8362
|
|
|
|
|
|
|
# modulo operator. Delete this warning if it |
8363
|
|
|
|
|
|
|
# gets annoying. |
8364
|
|
|
|
|
|
|
elsif ( $identifier !~ /^[\@\$]$/ ) { |
8365
|
1
|
|
|
|
|
5
|
$message = |
8366
|
|
|
|
|
|
|
"Space in identifier, following $identifier\n"; |
8367
|
|
|
|
|
|
|
} |
8368
|
|
|
|
|
|
|
else { |
8369
|
|
|
|
|
|
|
## ok: silently accept space after '$' and '@' sigils |
8370
|
|
|
|
|
|
|
} |
8371
|
|
|
|
|
|
|
} |
8372
|
|
|
|
|
|
|
} |
8373
|
|
|
|
|
|
|
|
8374
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
8375
|
|
|
|
|
|
|
|
8376
|
|
|
|
|
|
|
# space after '->' is ok except at line end .. |
8377
|
|
|
|
|
|
|
# so trim line-ending in type '->' (fixes c139) |
8378
|
0
|
0
|
|
|
|
0
|
if ( $i == $max_token_index ) { |
8379
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8380
|
0
|
|
|
|
|
0
|
$type = '->'; |
8381
|
|
|
|
|
|
|
} |
8382
|
|
|
|
|
|
|
} |
8383
|
|
|
|
|
|
|
|
8384
|
|
|
|
|
|
|
# stop at space after something other than -> or sigil |
8385
|
|
|
|
|
|
|
# Example of what can arrive here: |
8386
|
|
|
|
|
|
|
# eval { $MyClass->$$ }; |
8387
|
|
|
|
|
|
|
else { |
8388
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8389
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8390
|
0
|
|
|
|
|
0
|
$type = 'i'; |
8391
|
|
|
|
|
|
|
} |
8392
|
|
|
|
|
|
|
} |
8393
|
|
|
|
|
|
|
elsif ( $tok eq '^' ) { |
8394
|
|
|
|
|
|
|
|
8395
|
|
|
|
|
|
|
# check for some special variables like $^ $^W |
8396
|
11
|
50
|
|
|
|
47
|
if ( $identifier =~ /^[\$\*\@\%]$/ ) { |
8397
|
11
|
|
|
|
|
34
|
$identifier .= $tok; |
8398
|
11
|
|
|
|
|
25
|
$type = 'i'; |
8399
|
|
|
|
|
|
|
|
8400
|
|
|
|
|
|
|
# There may be one more character, not a space, after the ^ |
8401
|
11
|
|
|
|
|
31
|
my $next1 = $rtokens->[ $i + 1 ]; |
8402
|
11
|
|
|
|
|
28
|
my $chr = substr( $next1, 0, 1 ); |
8403
|
11
|
100
|
|
|
|
47
|
if ( $is_special_variable_char{$chr} ) { |
8404
|
|
|
|
|
|
|
|
8405
|
|
|
|
|
|
|
# It is something like $^W |
8406
|
|
|
|
|
|
|
# Test case (c066) : $^Oeq'linux' |
8407
|
9
|
|
|
|
|
20
|
$i++; |
8408
|
9
|
|
|
|
|
19
|
$identifier .= $next1; |
8409
|
|
|
|
|
|
|
|
8410
|
|
|
|
|
|
|
# If pretoken $next1 is more than one character long, |
8411
|
|
|
|
|
|
|
# set a flag indicating that it needs to be split. |
8412
|
9
|
100
|
|
|
|
39
|
$id_scan_state = |
8413
|
|
|
|
|
|
|
( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING; |
8414
|
|
|
|
|
|
|
} |
8415
|
|
|
|
|
|
|
else { |
8416
|
|
|
|
|
|
|
|
8417
|
|
|
|
|
|
|
# it is just $^ |
8418
|
|
|
|
|
|
|
# Simple test case (c065): '$aa=$^if($bb)'; |
8419
|
2
|
|
|
|
|
4
|
$id_scan_state = EMPTY_STRING; |
8420
|
|
|
|
|
|
|
} |
8421
|
|
|
|
|
|
|
} |
8422
|
|
|
|
|
|
|
else { |
8423
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8424
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8425
|
|
|
|
|
|
|
} |
8426
|
|
|
|
|
|
|
} |
8427
|
|
|
|
|
|
|
else { # something else |
8428
|
|
|
|
|
|
|
|
8429
|
46
|
100
|
66
|
|
|
428
|
if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8430
|
|
|
|
|
|
|
|
8431
|
|
|
|
|
|
|
# We might be in an extrusion of |
8432
|
|
|
|
|
|
|
# sub foo2 ( $first, $, $third ) { |
8433
|
|
|
|
|
|
|
# looking at a line starting with a comma, like |
8434
|
|
|
|
|
|
|
# $ |
8435
|
|
|
|
|
|
|
# , |
8436
|
|
|
|
|
|
|
# in this case the comma ends the signature variable |
8437
|
|
|
|
|
|
|
# '$' which will have been previously marked type 't' |
8438
|
|
|
|
|
|
|
# rather than 'i'. |
8439
|
3
|
100
|
|
|
|
10
|
if ( $i == $i_begin ) { |
8440
|
1
|
|
|
|
|
10
|
$identifier = EMPTY_STRING; |
8441
|
1
|
|
|
|
|
4
|
$type = EMPTY_STRING; |
8442
|
|
|
|
|
|
|
} |
8443
|
|
|
|
|
|
|
|
8444
|
|
|
|
|
|
|
# at a # we have to mark as type 't' because more may |
8445
|
|
|
|
|
|
|
# follow, otherwise, in a signature we can let '$' be an |
8446
|
|
|
|
|
|
|
# identifier here for better formatting. |
8447
|
|
|
|
|
|
|
# See 'mangle4.in' for a test case. |
8448
|
|
|
|
|
|
|
else { |
8449
|
2
|
|
|
|
|
4
|
$type = 'i'; |
8450
|
2
|
50
|
33
|
|
|
12
|
if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) { |
8451
|
0
|
|
|
|
|
0
|
$type = 't'; |
8452
|
|
|
|
|
|
|
} |
8453
|
2
|
|
|
|
|
6
|
$i = $i_save; |
8454
|
|
|
|
|
|
|
} |
8455
|
3
|
|
|
|
|
5
|
$id_scan_state = EMPTY_STRING; |
8456
|
|
|
|
|
|
|
} |
8457
|
|
|
|
|
|
|
|
8458
|
|
|
|
|
|
|
# check for various punctuation variables |
8459
|
|
|
|
|
|
|
elsif ( $identifier =~ /^[\$\*\@\%]$/ ) { |
8460
|
35
|
|
|
|
|
117
|
$identifier .= $tok; |
8461
|
|
|
|
|
|
|
} |
8462
|
|
|
|
|
|
|
|
8463
|
|
|
|
|
|
|
# POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#* |
8464
|
|
|
|
|
|
|
elsif ($tok eq '*' |
8465
|
|
|
|
|
|
|
&& $identifier =~ /\-\>([\@\%\$\*\&]|\$\#)$/ ) |
8466
|
|
|
|
|
|
|
{ |
8467
|
6
|
|
|
|
|
13
|
$identifier .= $tok; |
8468
|
|
|
|
|
|
|
} |
8469
|
|
|
|
|
|
|
|
8470
|
|
|
|
|
|
|
elsif ( $identifier eq '$#' ) { |
8471
|
|
|
|
|
|
|
|
8472
|
2
|
50
|
|
|
|
12
|
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } |
|
0
|
50
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
8473
|
|
|
|
|
|
|
|
8474
|
|
|
|
|
|
|
# perl seems to allow just these: $#: $#- $#+ |
8475
|
|
|
|
|
|
|
elsif ( $tok =~ /^[\:\-\+]$/ ) { |
8476
|
0
|
|
|
|
|
0
|
$type = 'i'; |
8477
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8478
|
|
|
|
|
|
|
} |
8479
|
|
|
|
|
|
|
else { |
8480
|
2
|
|
|
|
|
6
|
$i = $i_save; |
8481
|
2
|
|
|
|
|
28
|
$self->write_logfile_entry( |
8482
|
|
|
|
|
|
|
'Use of $# is deprecated' . "\n" ); |
8483
|
|
|
|
|
|
|
} |
8484
|
|
|
|
|
|
|
} |
8485
|
|
|
|
|
|
|
elsif ( $identifier eq '$$' ) { |
8486
|
|
|
|
|
|
|
|
8487
|
|
|
|
|
|
|
# perl does not allow references to punctuation |
8488
|
|
|
|
|
|
|
# variables without braces. For example, this |
8489
|
|
|
|
|
|
|
# won't work: |
8490
|
|
|
|
|
|
|
# $:=\4; |
8491
|
|
|
|
|
|
|
# $a = $$:; |
8492
|
|
|
|
|
|
|
# You would have to use |
8493
|
|
|
|
|
|
|
# $a = ${$:}; |
8494
|
|
|
|
|
|
|
|
8495
|
|
|
|
|
|
|
# '$$' alone is punctuation variable for PID |
8496
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8497
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '{' ) { $type = 't' } |
|
0
|
|
|
|
|
0
|
|
8498
|
0
|
|
|
|
|
0
|
else { $type = 'i' } |
8499
|
|
|
|
|
|
|
} |
8500
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
8501
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8502
|
|
|
|
|
|
|
} |
8503
|
|
|
|
|
|
|
else { |
8504
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8505
|
0
|
0
|
|
|
|
0
|
if ( length($identifier) == 1 ) { |
8506
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
8507
|
|
|
|
|
|
|
} |
8508
|
|
|
|
|
|
|
} |
8509
|
46
|
|
|
|
|
109
|
$id_scan_state = EMPTY_STRING; |
8510
|
|
|
|
|
|
|
} |
8511
|
514
|
|
|
|
|
973
|
return; |
8512
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_dollar |
8513
|
|
|
|
|
|
|
|
8514
|
|
|
|
|
|
|
sub do_id_scan_state_alpha { |
8515
|
|
|
|
|
|
|
|
8516
|
113
|
|
|
113
|
0
|
260
|
my $self = shift; |
8517
|
|
|
|
|
|
|
|
8518
|
|
|
|
|
|
|
# looking for alphanumeric after :: |
8519
|
113
|
|
|
|
|
418
|
$tok_is_blank = $tok =~ /^\s*$/; |
8520
|
|
|
|
|
|
|
|
8521
|
113
|
100
|
33
|
|
|
520
|
if ( $tok =~ /^\w/ ) { # found it |
|
|
50
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
50
|
|
|
|
|
|
8522
|
100
|
|
|
|
|
201
|
$identifier .= $tok; |
8523
|
100
|
|
|
|
|
174
|
$id_scan_state = $scan_state_COLON; # now need :: |
8524
|
100
|
|
|
|
|
173
|
$saw_alpha = 1; |
8525
|
|
|
|
|
|
|
} |
8526
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { |
8527
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8528
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
8529
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8530
|
|
|
|
|
|
|
} |
8531
|
|
|
|
|
|
|
elsif ( $tok_is_blank && $identifier =~ /^sub / ) { |
8532
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_LPAREN; |
8533
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8534
|
|
|
|
|
|
|
} |
8535
|
|
|
|
|
|
|
elsif ( $tok eq '(' && $identifier =~ /^sub / ) { |
8536
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; |
8537
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8538
|
|
|
|
|
|
|
} |
8539
|
|
|
|
|
|
|
else { |
8540
|
13
|
|
|
|
|
26
|
$id_scan_state = EMPTY_STRING; |
8541
|
13
|
|
|
|
|
21
|
$i = $i_save; |
8542
|
|
|
|
|
|
|
} |
8543
|
113
|
|
|
|
|
191
|
return; |
8544
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_alpha |
8545
|
|
|
|
|
|
|
|
8546
|
|
|
|
|
|
|
sub do_id_scan_state_colon { |
8547
|
|
|
|
|
|
|
|
8548
|
434
|
|
|
434
|
0
|
816
|
my $self = shift; |
8549
|
|
|
|
|
|
|
|
8550
|
|
|
|
|
|
|
# looking for possible :: after alphanumeric |
8551
|
|
|
|
|
|
|
|
8552
|
434
|
|
|
|
|
1517
|
$tok_is_blank = $tok =~ /^\s*$/; |
8553
|
|
|
|
|
|
|
|
8554
|
434
|
100
|
66
|
|
|
3722
|
if ( $tok eq '::' ) { # got it |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
8555
|
97
|
|
|
|
|
184
|
$identifier .= $tok; |
8556
|
97
|
|
|
|
|
164
|
$id_scan_state = $scan_state_ALPHA; # now require alpha |
8557
|
|
|
|
|
|
|
} |
8558
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { # more alphanumeric is ok here |
8559
|
20
|
|
|
|
|
66
|
$identifier .= $tok; |
8560
|
20
|
|
|
|
|
46
|
$id_scan_state = $scan_state_COLON; # now need :: |
8561
|
20
|
|
|
|
|
36
|
$saw_alpha = 1; |
8562
|
|
|
|
|
|
|
} |
8563
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # tick |
8564
|
|
|
|
|
|
|
|
8565
|
12
|
50
|
|
|
|
30
|
if ( $is_keyword{$identifier} ) { |
8566
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # that's all |
8567
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8568
|
|
|
|
|
|
|
} |
8569
|
|
|
|
|
|
|
else { |
8570
|
12
|
|
|
|
|
23
|
$identifier .= $tok; |
8571
|
|
|
|
|
|
|
} |
8572
|
|
|
|
|
|
|
} |
8573
|
|
|
|
|
|
|
elsif ( $tok_is_blank && $identifier =~ /^sub / ) { |
8574
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_LPAREN; |
8575
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8576
|
|
|
|
|
|
|
} |
8577
|
|
|
|
|
|
|
elsif ( $tok eq '(' && $identifier =~ /^sub / ) { |
8578
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; |
8579
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8580
|
|
|
|
|
|
|
} |
8581
|
|
|
|
|
|
|
else { |
8582
|
305
|
|
|
|
|
609
|
$id_scan_state = EMPTY_STRING; # that's all |
8583
|
305
|
|
|
|
|
511
|
$i = $i_save; |
8584
|
|
|
|
|
|
|
} |
8585
|
434
|
|
|
|
|
747
|
return; |
8586
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_colon |
8587
|
|
|
|
|
|
|
|
8588
|
|
|
|
|
|
|
sub do_id_scan_state_left_paren { |
8589
|
|
|
|
|
|
|
|
8590
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
8591
|
|
|
|
|
|
|
|
8592
|
|
|
|
|
|
|
# looking for possible '(' of a prototype |
8593
|
|
|
|
|
|
|
|
8594
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '(' ) { # got it |
|
|
0
|
|
|
|
|
|
8595
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8596
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; # now find the end of it |
8597
|
|
|
|
|
|
|
} |
8598
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { # blank - keep going |
8599
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8600
|
0
|
|
|
|
|
0
|
$tok_is_blank = 1; |
8601
|
|
|
|
|
|
|
} |
8602
|
|
|
|
|
|
|
else { |
8603
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # that's all - no prototype |
8604
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8605
|
|
|
|
|
|
|
} |
8606
|
0
|
|
|
|
|
0
|
return; |
8607
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_left_paren |
8608
|
|
|
|
|
|
|
|
8609
|
|
|
|
|
|
|
sub do_id_scan_state_right_paren { |
8610
|
|
|
|
|
|
|
|
8611
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
8612
|
|
|
|
|
|
|
|
8613
|
|
|
|
|
|
|
# looking for a ')' of prototype to close a '(' |
8614
|
|
|
|
|
|
|
|
8615
|
0
|
|
|
|
|
0
|
$tok_is_blank = $tok =~ /^\s*$/; |
8616
|
|
|
|
|
|
|
|
8617
|
0
|
0
|
|
|
|
0
|
if ( $tok eq ')' ) { # got it |
|
|
0
|
|
|
|
|
|
8618
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8619
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # all done |
8620
|
|
|
|
|
|
|
} |
8621
|
|
|
|
|
|
|
elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { |
8622
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8623
|
|
|
|
|
|
|
} |
8624
|
|
|
|
|
|
|
else { # probable error in script, but keep going |
8625
|
0
|
|
|
|
|
0
|
warning("Unexpected '$tok' while seeking end of prototype\n"); |
8626
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8627
|
|
|
|
|
|
|
} |
8628
|
0
|
|
|
|
|
0
|
return; |
8629
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_right_paren |
8630
|
|
|
|
|
|
|
|
8631
|
|
|
|
|
|
|
sub do_id_scan_state_ampersand { |
8632
|
|
|
|
|
|
|
|
8633
|
105
|
|
|
105
|
0
|
277
|
my $self = shift; |
8634
|
|
|
|
|
|
|
|
8635
|
|
|
|
|
|
|
# Starting sub call after seeing an '&' |
8636
|
|
|
|
|
|
|
|
8637
|
105
|
100
|
33
|
|
|
673
|
if ( $tok =~ /^[\$\w]/ ) { # alphanumeric .. |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8638
|
88
|
|
|
|
|
200
|
$id_scan_state = $scan_state_COLON; # now need :: |
8639
|
88
|
|
|
|
|
169
|
$saw_alpha = 1; |
8640
|
88
|
|
|
|
|
193
|
$identifier .= $tok; |
8641
|
|
|
|
|
|
|
} |
8642
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. |
8643
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
8644
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8645
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8646
|
|
|
|
|
|
|
} |
8647
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { # allow space |
8648
|
2
|
|
|
|
|
5
|
$tok_is_blank = 1; |
8649
|
|
|
|
|
|
|
|
8650
|
|
|
|
|
|
|
# fix c139: trim line-ending type 't' |
8651
|
2
|
50
|
33
|
|
|
12
|
if ( length($identifier) == 1 && $i == $max_token_index ) { |
8652
|
2
|
|
|
|
|
3
|
$i = $i_save; |
8653
|
2
|
|
|
|
|
5
|
$type = 't'; |
8654
|
|
|
|
|
|
|
} |
8655
|
|
|
|
|
|
|
} |
8656
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { # leading :: |
8657
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_ALPHA; # accept alpha next |
8658
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8659
|
|
|
|
|
|
|
} |
8660
|
|
|
|
|
|
|
elsif ( $tok eq '{' ) { |
8661
|
15
|
50
|
33
|
|
|
66
|
if ( $identifier eq '&' || $i == 0 ) { |
8662
|
15
|
|
|
|
|
42
|
$identifier = EMPTY_STRING; |
8663
|
|
|
|
|
|
|
} |
8664
|
15
|
|
|
|
|
34
|
$i = $i_save; |
8665
|
15
|
|
|
|
|
31
|
$id_scan_state = EMPTY_STRING; |
8666
|
|
|
|
|
|
|
} |
8667
|
|
|
|
|
|
|
elsif ( $tok eq '^' ) { |
8668
|
0
|
0
|
|
|
|
0
|
if ( $identifier eq '&' ) { |
8669
|
|
|
|
|
|
|
|
8670
|
|
|
|
|
|
|
# Special variable (c066) |
8671
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8672
|
0
|
|
|
|
|
0
|
$type = '&'; |
8673
|
|
|
|
|
|
|
|
8674
|
|
|
|
|
|
|
# There may be one more character, not a space, after the ^ |
8675
|
0
|
|
|
|
|
0
|
my $next1 = $rtokens->[ $i + 1 ]; |
8676
|
0
|
|
|
|
|
0
|
my $chr = substr( $next1, 0, 1 ); |
8677
|
0
|
0
|
|
|
|
0
|
if ( $is_special_variable_char{$chr} ) { |
8678
|
|
|
|
|
|
|
|
8679
|
|
|
|
|
|
|
# It is something like &^O |
8680
|
0
|
|
|
|
|
0
|
$i++; |
8681
|
0
|
|
|
|
|
0
|
$identifier .= $next1; |
8682
|
|
|
|
|
|
|
|
8683
|
|
|
|
|
|
|
# If pretoken $next1 is more than one character long, |
8684
|
|
|
|
|
|
|
# set a flag indicating that it needs to be split. |
8685
|
0
|
0
|
|
|
|
0
|
$id_scan_state = |
8686
|
|
|
|
|
|
|
( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING; |
8687
|
|
|
|
|
|
|
} |
8688
|
|
|
|
|
|
|
else { |
8689
|
|
|
|
|
|
|
|
8690
|
|
|
|
|
|
|
# it is &^ |
8691
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8692
|
|
|
|
|
|
|
} |
8693
|
|
|
|
|
|
|
} |
8694
|
|
|
|
|
|
|
else { |
8695
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
8696
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8697
|
|
|
|
|
|
|
} |
8698
|
|
|
|
|
|
|
} |
8699
|
|
|
|
|
|
|
else { |
8700
|
|
|
|
|
|
|
|
8701
|
|
|
|
|
|
|
# punctuation variable? |
8702
|
|
|
|
|
|
|
# testfile: cunningham4.pl |
8703
|
|
|
|
|
|
|
# |
8704
|
|
|
|
|
|
|
# We have to be careful here. If we are in an unknown state, |
8705
|
|
|
|
|
|
|
# we will reject the punctuation variable. In the following |
8706
|
|
|
|
|
|
|
# example the '&' is a binary operator but we are in an unknown |
8707
|
|
|
|
|
|
|
# state because there is no sigil on 'Prima', so we don't |
8708
|
|
|
|
|
|
|
# know what it is. But it is a bad guess that |
8709
|
|
|
|
|
|
|
# '&~' is a function variable. |
8710
|
|
|
|
|
|
|
# $self->{text}->{colorMap}->[ |
8711
|
|
|
|
|
|
|
# Prima::PodView::COLOR_CODE_FOREGROUND |
8712
|
|
|
|
|
|
|
# & ~tb::COLOR_INDEX ] = |
8713
|
|
|
|
|
|
|
# $sec->{ColorCode} |
8714
|
|
|
|
|
|
|
|
8715
|
|
|
|
|
|
|
# Fix for case c033: a '#' here starts a side comment |
8716
|
0
|
0
|
0
|
|
|
0
|
if ( $identifier eq '&' && $expecting && $tok ne '#' ) { |
|
|
|
0
|
|
|
|
|
8717
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8718
|
|
|
|
|
|
|
} |
8719
|
|
|
|
|
|
|
else { |
8720
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
8721
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8722
|
0
|
|
|
|
|
0
|
$type = '&'; |
8723
|
|
|
|
|
|
|
} |
8724
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8725
|
|
|
|
|
|
|
} |
8726
|
105
|
|
|
|
|
203
|
return; |
8727
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_ampersand |
8728
|
|
|
|
|
|
|
|
8729
|
|
|
|
|
|
|
#------------------- |
8730
|
|
|
|
|
|
|
# hash of scanner subs |
8731
|
|
|
|
|
|
|
#------------------- |
8732
|
|
|
|
|
|
|
my $scan_identifier_code = { |
8733
|
|
|
|
|
|
|
$scan_state_SIGIL => \&do_id_scan_state_dollar, |
8734
|
|
|
|
|
|
|
$scan_state_ALPHA => \&do_id_scan_state_alpha, |
8735
|
|
|
|
|
|
|
$scan_state_COLON => \&do_id_scan_state_colon, |
8736
|
|
|
|
|
|
|
$scan_state_LPAREN => \&do_id_scan_state_left_paren, |
8737
|
|
|
|
|
|
|
$scan_state_RPAREN => \&do_id_scan_state_right_paren, |
8738
|
|
|
|
|
|
|
$scan_state_AMPERSAND => \&do_id_scan_state_ampersand, |
8739
|
|
|
|
|
|
|
}; |
8740
|
|
|
|
|
|
|
|
8741
|
|
|
|
|
|
|
sub scan_complex_identifier { |
8742
|
|
|
|
|
|
|
|
8743
|
|
|
|
|
|
|
# This routine assembles tokens into identifiers. It maintains a |
8744
|
|
|
|
|
|
|
# scan state, id_scan_state. It updates id_scan_state based upon |
8745
|
|
|
|
|
|
|
# current id_scan_state and token, and returns an updated |
8746
|
|
|
|
|
|
|
# id_scan_state and the next index after the identifier. |
8747
|
|
|
|
|
|
|
|
8748
|
|
|
|
|
|
|
# This routine now serves a a backup for sub scan_simple_identifier |
8749
|
|
|
|
|
|
|
# which handles most identifiers. |
8750
|
|
|
|
|
|
|
|
8751
|
|
|
|
|
|
|
# Note that $self must be a 'my' variable and not be a closure |
8752
|
|
|
|
|
|
|
# variables like the other args. Otherwise it will not get |
8753
|
|
|
|
|
|
|
# deleted by a DESTROY call at the end of a file. Then an |
8754
|
|
|
|
|
|
|
# attempt to create multiple tokenizers can occur when multiple |
8755
|
|
|
|
|
|
|
# files are processed, causing an error. |
8756
|
|
|
|
|
|
|
|
8757
|
|
|
|
|
|
|
( |
8758
|
486
|
|
|
486
|
0
|
2689
|
my $self, $i, $id_scan_state, $identifier, $rtokens, |
8759
|
|
|
|
|
|
|
$max_token_index, $expecting, $container_type |
8760
|
|
|
|
|
|
|
) = @_; |
8761
|
|
|
|
|
|
|
|
8762
|
|
|
|
|
|
|
# return flag telling caller to split the pretoken |
8763
|
486
|
|
|
|
|
2485
|
my $split_pretoken_flag; |
8764
|
|
|
|
|
|
|
|
8765
|
|
|
|
|
|
|
#------------------- |
8766
|
|
|
|
|
|
|
# Initialize my vars |
8767
|
|
|
|
|
|
|
#------------------- |
8768
|
|
|
|
|
|
|
|
8769
|
486
|
|
|
|
|
1628
|
initialize_my_scan_id_vars(); |
8770
|
|
|
|
|
|
|
|
8771
|
|
|
|
|
|
|
#-------------------------------------------------------- |
8772
|
|
|
|
|
|
|
# get started by defining a type and a state if necessary |
8773
|
|
|
|
|
|
|
#-------------------------------------------------------- |
8774
|
|
|
|
|
|
|
|
8775
|
486
|
100
|
|
|
|
1286
|
if ( !$id_scan_state ) { |
8776
|
479
|
|
|
|
|
843
|
$context = UNKNOWN_CONTEXT; |
8777
|
|
|
|
|
|
|
|
8778
|
|
|
|
|
|
|
# fixup for digraph |
8779
|
479
|
50
|
|
|
|
1273
|
if ( $tok eq '>' ) { |
8780
|
0
|
|
|
|
|
0
|
$tok = '->'; |
8781
|
0
|
|
|
|
|
0
|
$tok_begin = $tok; |
8782
|
|
|
|
|
|
|
} |
8783
|
479
|
|
|
|
|
858
|
$identifier = $tok; |
8784
|
|
|
|
|
|
|
|
8785
|
479
|
100
|
100
|
|
|
3030
|
if ( $last_nonblank_token eq '->' ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8786
|
6
|
|
|
|
|
14
|
$identifier = '->' . $identifier; |
8787
|
6
|
|
|
|
|
12
|
$id_scan_state = $scan_state_SIGIL; |
8788
|
|
|
|
|
|
|
} |
8789
|
|
|
|
|
|
|
elsif ( $tok eq '$' || $tok eq '*' ) { |
8790
|
293
|
|
|
|
|
564
|
$id_scan_state = $scan_state_SIGIL; |
8791
|
293
|
|
|
|
|
529
|
$context = SCALAR_CONTEXT; |
8792
|
|
|
|
|
|
|
} |
8793
|
|
|
|
|
|
|
elsif ( $tok eq '%' || $tok eq '@' ) { |
8794
|
77
|
|
|
|
|
159
|
$id_scan_state = $scan_state_SIGIL; |
8795
|
77
|
|
|
|
|
147
|
$context = LIST_CONTEXT; |
8796
|
|
|
|
|
|
|
} |
8797
|
|
|
|
|
|
|
elsif ( $tok eq '&' ) { |
8798
|
103
|
|
|
|
|
220
|
$id_scan_state = $scan_state_AMPERSAND; |
8799
|
|
|
|
|
|
|
} |
8800
|
|
|
|
|
|
|
elsif ( $tok eq 'sub' or $tok eq 'package' ) { |
8801
|
0
|
|
|
|
|
0
|
$saw_alpha = 0; # 'sub' is considered type info here |
8802
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_SIGIL; |
8803
|
0
|
|
|
|
|
0
|
$identifier .= |
8804
|
|
|
|
|
|
|
SPACE; # need a space to separate sub from sub name |
8805
|
|
|
|
|
|
|
} |
8806
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { |
8807
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_ALPHA; |
8808
|
|
|
|
|
|
|
} |
8809
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { |
8810
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; |
8811
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8812
|
|
|
|
|
|
|
} |
8813
|
|
|
|
|
|
|
elsif ( $tok eq '->' ) { |
8814
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_SIGIL; |
8815
|
|
|
|
|
|
|
} |
8816
|
|
|
|
|
|
|
else { |
8817
|
|
|
|
|
|
|
|
8818
|
|
|
|
|
|
|
# shouldn't happen: bad call parameter |
8819
|
0
|
|
|
|
|
0
|
my $msg = |
8820
|
|
|
|
|
|
|
"Program bug detected: scan_complex_identifier received bad starting token = '$tok'\n"; |
8821
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { $self->Fault($msg) } |
8822
|
0
|
0
|
|
|
|
0
|
if ( !$self->[_in_error_] ) { |
8823
|
0
|
|
|
|
|
0
|
warning($msg); |
8824
|
0
|
|
|
|
|
0
|
$self->[_in_error_] = 1; |
8825
|
|
|
|
|
|
|
} |
8826
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8827
|
|
|
|
|
|
|
|
8828
|
|
|
|
|
|
|
# emergency return |
8829
|
0
|
|
|
|
|
0
|
goto RETURN; |
8830
|
|
|
|
|
|
|
} |
8831
|
479
|
|
|
|
|
912
|
$saw_type = !$saw_alpha; |
8832
|
|
|
|
|
|
|
} |
8833
|
|
|
|
|
|
|
else { |
8834
|
7
|
|
|
|
|
22
|
$i--; |
8835
|
7
|
|
|
|
|
32
|
$saw_alpha = ( $tok =~ /^\w/ ); |
8836
|
7
|
|
|
|
|
18
|
$saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); |
8837
|
|
|
|
|
|
|
|
8838
|
|
|
|
|
|
|
# check for a valid starting state |
8839
|
7
|
|
|
|
|
16
|
if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) { |
8840
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
8841
|
|
|
|
|
|
|
Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state' |
8842
|
|
|
|
|
|
|
EOM |
8843
|
|
|
|
|
|
|
} |
8844
|
|
|
|
|
|
|
} |
8845
|
|
|
|
|
|
|
|
8846
|
|
|
|
|
|
|
#------------------------------ |
8847
|
|
|
|
|
|
|
# loop to gather the identifier |
8848
|
|
|
|
|
|
|
#------------------------------ |
8849
|
|
|
|
|
|
|
|
8850
|
486
|
|
|
|
|
861
|
$i_save = $i; |
8851
|
|
|
|
|
|
|
|
8852
|
486
|
|
100
|
|
|
2188
|
while ( $i < $max_token_index && $id_scan_state ) { |
8853
|
|
|
|
|
|
|
|
8854
|
|
|
|
|
|
|
# Be sure we have code to handle this state before we proceed |
8855
|
1169
|
|
|
|
|
2648
|
my $code = $scan_identifier_code->{$id_scan_state}; |
8856
|
1169
|
100
|
|
|
|
2381
|
if ( !$code ) { |
8857
|
|
|
|
|
|
|
|
8858
|
3
|
50
|
|
|
|
14
|
if ( $id_scan_state eq $scan_state_SPLIT ) { |
8859
|
|
|
|
|
|
|
## OK: this is the signal to exit and split the pretoken |
8860
|
|
|
|
|
|
|
} |
8861
|
|
|
|
|
|
|
|
8862
|
|
|
|
|
|
|
# unknown state - should not happen |
8863
|
|
|
|
|
|
|
else { |
8864
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
8865
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
8866
|
|
|
|
|
|
|
Unknown scan state in sub scan_complex_identifier: '$id_scan_state' |
8867
|
|
|
|
|
|
|
Scan state at sub entry was '$id_scan_state_begin' |
8868
|
|
|
|
|
|
|
EOM |
8869
|
|
|
|
|
|
|
} |
8870
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8871
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8872
|
|
|
|
|
|
|
} |
8873
|
3
|
|
|
|
|
6
|
last; |
8874
|
|
|
|
|
|
|
} |
8875
|
|
|
|
|
|
|
|
8876
|
|
|
|
|
|
|
# Remember the starting index for progress check below |
8877
|
1166
|
|
|
|
|
1804
|
my $i_start_loop = $i; |
8878
|
|
|
|
|
|
|
|
8879
|
1166
|
|
|
|
|
1941
|
$last_tok_is_blank = $tok_is_blank; |
8880
|
1166
|
100
|
|
|
|
2092
|
if ($tok_is_blank) { $tok_is_blank = undef } |
|
11
|
|
|
|
|
29
|
|
8881
|
1155
|
|
|
|
|
1674
|
else { $i_save = $i } |
8882
|
|
|
|
|
|
|
|
8883
|
1166
|
|
|
|
|
2117
|
$tok = $rtokens->[ ++$i ]; |
8884
|
|
|
|
|
|
|
|
8885
|
|
|
|
|
|
|
# patch to make digraph :: if necessary |
8886
|
1166
|
100
|
100
|
|
|
3119
|
if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) { |
8887
|
113
|
|
|
|
|
261
|
$tok = '::'; |
8888
|
113
|
|
|
|
|
172
|
$i++; |
8889
|
|
|
|
|
|
|
} |
8890
|
|
|
|
|
|
|
|
8891
|
1166
|
|
|
|
|
3438
|
$code->($self); |
8892
|
|
|
|
|
|
|
|
8893
|
|
|
|
|
|
|
# check for forward progress: a decrease in the index $i |
8894
|
|
|
|
|
|
|
# implies that scanning has finished |
8895
|
1166
|
100
|
|
|
|
3979
|
last if ( $i <= $i_start_loop ); |
8896
|
|
|
|
|
|
|
|
8897
|
|
|
|
|
|
|
} ## end of main loop |
8898
|
|
|
|
|
|
|
|
8899
|
|
|
|
|
|
|
#------------- |
8900
|
|
|
|
|
|
|
# Check result |
8901
|
|
|
|
|
|
|
#------------- |
8902
|
|
|
|
|
|
|
|
8903
|
|
|
|
|
|
|
# Be sure a valid state is returned |
8904
|
486
|
100
|
|
|
|
1293
|
if ($id_scan_state) { |
8905
|
|
|
|
|
|
|
|
8906
|
20
|
100
|
|
|
|
86
|
if ( !$is_returnable_scan_state{$id_scan_state} ) { |
8907
|
|
|
|
|
|
|
|
8908
|
13
|
100
|
|
|
|
57
|
if ( $id_scan_state eq $scan_state_SPLIT ) { |
8909
|
3
|
|
|
|
|
5
|
$split_pretoken_flag = 1; |
8910
|
|
|
|
|
|
|
} |
8911
|
|
|
|
|
|
|
|
8912
|
13
|
50
|
|
|
|
53
|
if ( $id_scan_state eq $scan_state_RPAREN ) { |
8913
|
0
|
|
|
|
|
0
|
warning( |
8914
|
|
|
|
|
|
|
"Hit end of line while seeking ) to end prototype\n"); |
8915
|
|
|
|
|
|
|
} |
8916
|
|
|
|
|
|
|
|
8917
|
13
|
|
|
|
|
28
|
$id_scan_state = EMPTY_STRING; |
8918
|
|
|
|
|
|
|
} |
8919
|
|
|
|
|
|
|
|
8920
|
|
|
|
|
|
|
# Patch: the deprecated variable $# does not combine with anything |
8921
|
|
|
|
|
|
|
# on the next line. |
8922
|
20
|
50
|
|
|
|
65
|
if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING } |
|
0
|
|
|
|
|
0
|
|
8923
|
|
|
|
|
|
|
} |
8924
|
|
|
|
|
|
|
|
8925
|
|
|
|
|
|
|
# Be sure the token index is valid |
8926
|
486
|
50
|
|
|
|
1303
|
if ( $i < 0 ) { $i = 0 } |
|
0
|
|
|
|
|
0
|
|
8927
|
|
|
|
|
|
|
|
8928
|
|
|
|
|
|
|
# Be sure a token type is defined |
8929
|
486
|
100
|
|
|
|
1229
|
if ( !$type ) { |
8930
|
|
|
|
|
|
|
|
8931
|
458
|
100
|
|
|
|
1049
|
if ($saw_type) { |
|
|
100
|
|
|
|
|
|
8932
|
|
|
|
|
|
|
|
8933
|
452
|
100
|
33
|
|
|
3562
|
if ($saw_alpha) { |
|
|
50
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
8934
|
|
|
|
|
|
|
|
8935
|
|
|
|
|
|
|
# The type without the -> should be the same as with the -> so |
8936
|
|
|
|
|
|
|
# that if they get separated we get the same bond strengths, |
8937
|
|
|
|
|
|
|
# etc. See b1234 |
8938
|
348
|
50
|
33
|
|
|
1387
|
if ( $identifier =~ /^->/ |
|
|
|
33
|
|
|
|
|
8939
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
8940
|
|
|
|
|
|
|
&& substr( $identifier, 2, 1 ) =~ /^\w/ ) |
8941
|
|
|
|
|
|
|
{ |
8942
|
0
|
|
|
|
|
0
|
$type = 'w'; |
8943
|
|
|
|
|
|
|
} |
8944
|
348
|
|
|
|
|
732
|
else { $type = 'i' } |
8945
|
|
|
|
|
|
|
} |
8946
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
8947
|
0
|
|
|
|
|
0
|
$type = '->'; |
8948
|
|
|
|
|
|
|
} |
8949
|
|
|
|
|
|
|
elsif ( |
8950
|
|
|
|
|
|
|
( length($identifier) > 1 ) |
8951
|
|
|
|
|
|
|
|
8952
|
|
|
|
|
|
|
# In something like '@$=' we have an identifier '@$' |
8953
|
|
|
|
|
|
|
# In something like '$${' we have type '$$' (and only |
8954
|
|
|
|
|
|
|
# part of an identifier) |
8955
|
|
|
|
|
|
|
&& !( $identifier =~ /\$$/ && $tok eq '{' ) |
8956
|
|
|
|
|
|
|
|
8957
|
|
|
|
|
|
|
## && ( $identifier !~ /^(sub |package )$/ ) |
8958
|
|
|
|
|
|
|
&& $identifier ne 'sub ' |
8959
|
|
|
|
|
|
|
&& $identifier ne 'package ' |
8960
|
|
|
|
|
|
|
) |
8961
|
|
|
|
|
|
|
{ |
8962
|
53
|
|
|
|
|
1730
|
$type = 'i'; |
8963
|
|
|
|
|
|
|
} |
8964
|
51
|
|
|
|
|
119
|
else { $type = 't' } |
8965
|
|
|
|
|
|
|
} |
8966
|
|
|
|
|
|
|
elsif ($saw_alpha) { |
8967
|
|
|
|
|
|
|
|
8968
|
|
|
|
|
|
|
# type 'w' includes anything without leading type info |
8969
|
|
|
|
|
|
|
# ($,%,@,*) including something like abc::def::ghi |
8970
|
5
|
|
|
|
|
10
|
$type = 'w'; |
8971
|
|
|
|
|
|
|
|
8972
|
|
|
|
|
|
|
# Fix for b1337, if restarting scan after line break between |
8973
|
|
|
|
|
|
|
# '->' or sigil and identifier name, use type 'i' |
8974
|
5
|
50
|
33
|
|
|
32
|
if ( $id_scan_state_begin |
8975
|
|
|
|
|
|
|
&& $identifier =~ /^([\$\%\@\*\&]|->)/ ) |
8976
|
|
|
|
|
|
|
{ |
8977
|
5
|
|
|
|
|
12
|
$type = 'i'; |
8978
|
|
|
|
|
|
|
} |
8979
|
|
|
|
|
|
|
} |
8980
|
|
|
|
|
|
|
else { |
8981
|
1
|
|
|
|
|
13
|
$type = EMPTY_STRING; |
8982
|
|
|
|
|
|
|
} # this can happen on a restart |
8983
|
|
|
|
|
|
|
} |
8984
|
|
|
|
|
|
|
|
8985
|
|
|
|
|
|
|
# See if we formed an identifier... |
8986
|
486
|
100
|
|
|
|
2053
|
if ($identifier) { |
8987
|
444
|
|
|
|
|
827
|
$tok = $identifier; |
8988
|
444
|
100
|
|
|
|
1057
|
if ($message) { $self->write_logfile_entry($message) } |
|
1
|
|
|
|
|
7
|
|
8989
|
|
|
|
|
|
|
} |
8990
|
|
|
|
|
|
|
|
8991
|
|
|
|
|
|
|
# did not find an identifier, back up |
8992
|
|
|
|
|
|
|
else { |
8993
|
42
|
|
|
|
|
85
|
$tok = $tok_begin; |
8994
|
42
|
|
|
|
|
75
|
$i = $i_begin; |
8995
|
|
|
|
|
|
|
} |
8996
|
|
|
|
|
|
|
|
8997
|
|
|
|
|
|
|
RETURN: |
8998
|
|
|
|
|
|
|
|
8999
|
486
|
|
|
|
|
1124
|
DEBUG_SCAN_ID && do { |
9000
|
|
|
|
|
|
|
my ( $a, $b, $c ) = caller; |
9001
|
|
|
|
|
|
|
print STDOUT |
9002
|
|
|
|
|
|
|
"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; |
9003
|
|
|
|
|
|
|
print STDOUT |
9004
|
|
|
|
|
|
|
"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; |
9005
|
|
|
|
|
|
|
}; |
9006
|
486
|
|
|
|
|
2326
|
return ( $i, $tok, $type, $id_scan_state, $identifier, |
9007
|
|
|
|
|
|
|
$split_pretoken_flag ); |
9008
|
|
|
|
|
|
|
} ## end sub scan_complex_identifier |
9009
|
|
|
|
|
|
|
} ## end closure for sub scan_complex_identifier |
9010
|
|
|
|
|
|
|
|
9011
|
|
|
|
|
|
|
{ ## closure for sub do_scan_sub |
9012
|
|
|
|
|
|
|
|
9013
|
|
|
|
|
|
|
my %warn_if_lexical; |
9014
|
|
|
|
|
|
|
|
9015
|
|
|
|
|
|
|
BEGIN { |
9016
|
|
|
|
|
|
|
|
9017
|
|
|
|
|
|
|
# lexical subs with these names can cause parsing errors in this version |
9018
|
38
|
|
|
38
|
|
279
|
my @q = qw( m q qq qr qw qx s tr y ); |
9019
|
38
|
|
|
|
|
3277
|
@{warn_if_lexical}{@q} = (1) x scalar(@q); |
9020
|
|
|
|
|
|
|
} ## end BEGIN |
9021
|
|
|
|
|
|
|
|
9022
|
|
|
|
|
|
|
# saved package and subnames in case prototype is on separate line |
9023
|
|
|
|
|
|
|
my ( $package_saved, $subname_saved ); |
9024
|
|
|
|
|
|
|
|
9025
|
|
|
|
|
|
|
# initialize subname each time a new 'sub' keyword is encountered |
9026
|
|
|
|
|
|
|
sub initialize_subname { |
9027
|
294
|
|
|
294
|
0
|
615
|
$package_saved = EMPTY_STRING; |
9028
|
294
|
|
|
|
|
576
|
$subname_saved = EMPTY_STRING; |
9029
|
294
|
|
|
|
|
511
|
return; |
9030
|
|
|
|
|
|
|
} |
9031
|
|
|
|
|
|
|
|
9032
|
|
|
|
|
|
|
use constant { |
9033
|
38
|
|
|
|
|
87204
|
SUB_CALL => 1, |
9034
|
|
|
|
|
|
|
PAREN_CALL => 2, |
9035
|
|
|
|
|
|
|
PROTOTYPE_CALL => 3, |
9036
|
38
|
|
|
38
|
|
321
|
}; |
|
38
|
|
|
|
|
116
|
|
9037
|
|
|
|
|
|
|
|
9038
|
|
|
|
|
|
|
sub do_scan_sub { |
9039
|
|
|
|
|
|
|
|
9040
|
|
|
|
|
|
|
# do_scan_sub parses a sub name and prototype. |
9041
|
|
|
|
|
|
|
|
9042
|
|
|
|
|
|
|
# At present there are three basic CALL TYPES which are |
9043
|
|
|
|
|
|
|
# distinguished by the starting value of '$tok': |
9044
|
|
|
|
|
|
|
# 1. $tok='sub', id_scan_state='sub' |
9045
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of the first nonblank |
9046
|
|
|
|
|
|
|
# token following a 'sub' token. |
9047
|
|
|
|
|
|
|
# 2. $tok='(', id_scan_state='sub', |
9048
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of a '(' which may |
9049
|
|
|
|
|
|
|
# start a prototype. |
9050
|
|
|
|
|
|
|
# 3. $tok='prototype', id_scan_state='prototype' |
9051
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of a '(' which is |
9052
|
|
|
|
|
|
|
# preceded by ': prototype' and has $id_scan_state eq 'prototype' |
9053
|
|
|
|
|
|
|
|
9054
|
|
|
|
|
|
|
# Examples: |
9055
|
|
|
|
|
|
|
|
9056
|
|
|
|
|
|
|
# A single type 1 call will get both the sub and prototype |
9057
|
|
|
|
|
|
|
# sub foo1 ( $$ ) { } |
9058
|
|
|
|
|
|
|
# ^ |
9059
|
|
|
|
|
|
|
|
9060
|
|
|
|
|
|
|
# The subname will be obtained with a 'sub' call |
9061
|
|
|
|
|
|
|
# The prototype on line 2 will be obtained with a '(' call |
9062
|
|
|
|
|
|
|
# sub foo1 |
9063
|
|
|
|
|
|
|
# ^ <---call type 1 |
9064
|
|
|
|
|
|
|
# ( $$ ) { } |
9065
|
|
|
|
|
|
|
# ^ <---call type 2 |
9066
|
|
|
|
|
|
|
|
9067
|
|
|
|
|
|
|
# The subname will be obtained with a 'sub' call |
9068
|
|
|
|
|
|
|
# The prototype will be obtained with a 'prototype' call |
9069
|
|
|
|
|
|
|
# sub foo1 ( $x, $y ) : prototype ( $$ ) { } |
9070
|
|
|
|
|
|
|
# ^ <---type 1 ^ <---type 3 |
9071
|
|
|
|
|
|
|
|
9072
|
|
|
|
|
|
|
# TODO: add future error checks to be sure we have a valid |
9073
|
|
|
|
|
|
|
# sub name. For example, 'sub &doit' is wrong. Also, be sure |
9074
|
|
|
|
|
|
|
# a name is given if and only if a non-anonymous sub is |
9075
|
|
|
|
|
|
|
# appropriate. |
9076
|
|
|
|
|
|
|
# USES GLOBAL VARS: $current_package, $last_nonblank_token, |
9077
|
|
|
|
|
|
|
# $rsaw_function_definition, |
9078
|
|
|
|
|
|
|
# $statement_type |
9079
|
|
|
|
|
|
|
|
9080
|
300
|
|
|
300
|
0
|
760
|
my ( $self, $rinput_hash ) = @_; |
9081
|
|
|
|
|
|
|
|
9082
|
300
|
|
|
|
|
705
|
my $input_line = $rinput_hash->{input_line}; |
9083
|
300
|
|
|
|
|
600
|
my $i = $rinput_hash->{i}; |
9084
|
300
|
|
|
|
|
824
|
my $i_beg = $rinput_hash->{i_beg}; |
9085
|
300
|
|
|
|
|
652
|
my $tok = $rinput_hash->{tok}; |
9086
|
300
|
|
|
|
|
571
|
my $type = $rinput_hash->{type}; |
9087
|
300
|
|
|
|
|
600
|
my $rtokens = $rinput_hash->{rtokens}; |
9088
|
300
|
|
|
|
|
564
|
my $rtoken_map = $rinput_hash->{rtoken_map}; |
9089
|
300
|
|
|
|
|
568
|
my $id_scan_state = $rinput_hash->{id_scan_state}; |
9090
|
300
|
|
|
|
|
547
|
my $max_token_index = $rinput_hash->{max_token_index}; |
9091
|
|
|
|
|
|
|
|
9092
|
300
|
|
|
|
|
540
|
my $i_entry = $i; |
9093
|
|
|
|
|
|
|
|
9094
|
|
|
|
|
|
|
# Determine the CALL TYPE |
9095
|
|
|
|
|
|
|
# 1=sub |
9096
|
|
|
|
|
|
|
# 2=( |
9097
|
|
|
|
|
|
|
# 3=prototype |
9098
|
300
|
100
|
|
|
|
1019
|
my $call_type = |
|
|
100
|
|
|
|
|
|
9099
|
|
|
|
|
|
|
$tok eq 'prototype' ? PROTOTYPE_CALL |
9100
|
|
|
|
|
|
|
: $tok eq '(' ? PAREN_CALL |
9101
|
|
|
|
|
|
|
: SUB_CALL; |
9102
|
|
|
|
|
|
|
|
9103
|
300
|
|
|
|
|
509
|
$id_scan_state = EMPTY_STRING; # normally we get everything in one call |
9104
|
300
|
|
|
|
|
559
|
my $subname = $subname_saved; |
9105
|
300
|
|
|
|
|
558
|
my $package = $package_saved; |
9106
|
300
|
|
|
|
|
623
|
my $proto = undef; |
9107
|
300
|
|
|
|
|
547
|
my $attrs = undef; |
9108
|
300
|
|
|
|
|
493
|
my $match; |
9109
|
|
|
|
|
|
|
|
9110
|
300
|
|
|
|
|
568
|
my $pos_beg = $rtoken_map->[$i_beg]; |
9111
|
300
|
|
|
|
|
1034
|
pos($input_line) = $pos_beg; |
9112
|
|
|
|
|
|
|
|
9113
|
|
|
|
|
|
|
# Look for the sub NAME if this is a SUB call |
9114
|
300
|
100
|
100
|
|
|
2774
|
if ( |
9115
|
|
|
|
|
|
|
$call_type == SUB_CALL |
9116
|
|
|
|
|
|
|
&& $input_line =~ m/\G\s* |
9117
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
9118
|
|
|
|
|
|
|
(\w+) # NAME - required |
9119
|
|
|
|
|
|
|
/gcx |
9120
|
|
|
|
|
|
|
) |
9121
|
|
|
|
|
|
|
{ |
9122
|
121
|
|
|
|
|
313
|
$match = 1; |
9123
|
121
|
|
|
|
|
337
|
$subname = $2; |
9124
|
|
|
|
|
|
|
|
9125
|
121
|
|
33
|
|
|
452
|
my $is_lexical_sub = |
9126
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' && $last_nonblank_token eq 'my'; |
9127
|
121
|
0
|
33
|
|
|
383
|
if ( $is_lexical_sub && $1 ) { |
9128
|
0
|
|
|
|
|
0
|
$self->warning("'my' sub $subname cannot be in package '$1'\n"); |
9129
|
0
|
|
|
|
|
0
|
$is_lexical_sub = 0; |
9130
|
|
|
|
|
|
|
} |
9131
|
|
|
|
|
|
|
|
9132
|
121
|
50
|
|
|
|
360
|
if ($is_lexical_sub) { |
9133
|
|
|
|
|
|
|
|
9134
|
|
|
|
|
|
|
# lexical subs use the block sequence number as a package name |
9135
|
0
|
|
|
|
|
0
|
my $seqno = |
9136
|
|
|
|
|
|
|
$rcurrent_sequence_number->[BRACE] |
9137
|
|
|
|
|
|
|
[ $rcurrent_depth->[BRACE] ]; |
9138
|
0
|
0
|
|
|
|
0
|
$seqno = 1 unless ( defined($seqno) ); |
9139
|
0
|
|
|
|
|
0
|
$package = $seqno; |
9140
|
0
|
0
|
|
|
|
0
|
if ( $warn_if_lexical{$subname} ) { |
9141
|
0
|
|
|
|
|
0
|
$self->warning( |
9142
|
|
|
|
|
|
|
"'my' sub '$subname' matches a builtin name and may not be handled correctly in this perltidy version.\n" |
9143
|
|
|
|
|
|
|
); |
9144
|
|
|
|
|
|
|
|
9145
|
|
|
|
|
|
|
# This may end badly, it is safest to block formatting |
9146
|
|
|
|
|
|
|
# For an example, see perl527/lexsub.t (issue c203) |
9147
|
0
|
|
|
|
|
0
|
$self->[_in_trouble_] = 1; |
9148
|
|
|
|
|
|
|
} |
9149
|
|
|
|
|
|
|
} |
9150
|
|
|
|
|
|
|
else { |
9151
|
121
|
100
|
66
|
|
|
813
|
$package = ( defined($1) && $1 ) ? $1 : $current_package; |
9152
|
121
|
|
|
|
|
438
|
$package =~ s/\'/::/g; |
9153
|
121
|
50
|
|
|
|
457
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
0
|
|
|
|
|
0
|
|
9154
|
121
|
|
|
|
|
308
|
$package =~ s/::$//; |
9155
|
|
|
|
|
|
|
} |
9156
|
|
|
|
|
|
|
|
9157
|
121
|
|
|
|
|
261
|
my $pos = pos($input_line); |
9158
|
121
|
|
|
|
|
260
|
my $numc = $pos - $pos_beg; |
9159
|
121
|
|
|
|
|
376
|
$tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); |
9160
|
121
|
|
|
|
|
253
|
$type = 'i'; |
9161
|
|
|
|
|
|
|
|
9162
|
|
|
|
|
|
|
# remember the sub name in case another call is needed to |
9163
|
|
|
|
|
|
|
# get the prototype |
9164
|
121
|
|
|
|
|
267
|
$package_saved = $package; |
9165
|
121
|
|
|
|
|
286
|
$subname_saved = $subname; |
9166
|
|
|
|
|
|
|
} |
9167
|
|
|
|
|
|
|
|
9168
|
|
|
|
|
|
|
# Now look for PROTO ATTRS for all call types |
9169
|
|
|
|
|
|
|
# Look for prototype/attributes which are usually on the same |
9170
|
|
|
|
|
|
|
# line as the sub name but which might be on a separate line. |
9171
|
|
|
|
|
|
|
# For example, we might have an anonymous sub with attributes, |
9172
|
|
|
|
|
|
|
# or a prototype on a separate line from its sub name |
9173
|
|
|
|
|
|
|
|
9174
|
|
|
|
|
|
|
# NOTE: We only want to parse PROTOTYPES here. If we see anything that |
9175
|
|
|
|
|
|
|
# does not look like a prototype, we assume it is a SIGNATURE and we |
9176
|
|
|
|
|
|
|
# will stop and let the the standard tokenizer handle it. In |
9177
|
|
|
|
|
|
|
# particular, we stop if we see any nested parens, braces, or commas. |
9178
|
|
|
|
|
|
|
# Also note, a valid prototype cannot contain any alphabetic character |
9179
|
|
|
|
|
|
|
# -- see https://perldoc.perl.org/perlsub |
9180
|
|
|
|
|
|
|
# But it appears that an underscore is valid in a prototype, so the |
9181
|
|
|
|
|
|
|
# regex below uses [A-Za-z] rather than \w |
9182
|
|
|
|
|
|
|
# This is the old regex which has been replaced: |
9183
|
|
|
|
|
|
|
# $input_line =~ m/\G(\s*\([^\)\(\}\{\,#]*\))? # PROTO |
9184
|
300
|
|
|
|
|
1085
|
my $saw_opening_paren = $input_line =~ /\G\s*\(/; |
9185
|
300
|
100
|
100
|
|
|
2896
|
if ( |
|
|
|
66
|
|
|
|
|
9186
|
|
|
|
|
|
|
$input_line =~ m/\G(\s*\([^\)\(\}\{\,#A-Za-z]*\))? # PROTO |
9187
|
|
|
|
|
|
|
(\s*:)? # ATTRS leading ':' |
9188
|
|
|
|
|
|
|
/gcx |
9189
|
|
|
|
|
|
|
&& ( $1 || $2 ) |
9190
|
|
|
|
|
|
|
) |
9191
|
|
|
|
|
|
|
{ |
9192
|
45
|
|
|
|
|
119
|
$proto = $1; |
9193
|
45
|
|
|
|
|
100
|
$attrs = $2; |
9194
|
|
|
|
|
|
|
|
9195
|
|
|
|
|
|
|
# Append the prototype to the starting token if it is 'sub' or |
9196
|
|
|
|
|
|
|
# 'prototype'. This is not necessary but for compatibility with |
9197
|
|
|
|
|
|
|
# previous versions when the -csc flag is used: |
9198
|
45
|
100
|
100
|
|
|
298
|
if ( $proto && ( $match || $call_type == PROTOTYPE_CALL ) ) { |
|
|
100
|
100
|
|
|
|
|
9199
|
24
|
|
|
|
|
64
|
$tok .= $proto; |
9200
|
|
|
|
|
|
|
} |
9201
|
|
|
|
|
|
|
|
9202
|
|
|
|
|
|
|
# If we just entered the sub at an opening paren on this call, not |
9203
|
|
|
|
|
|
|
# a following :prototype, label it with the previous token. This is |
9204
|
|
|
|
|
|
|
# necessary to propagate the sub name to its opening block. |
9205
|
|
|
|
|
|
|
elsif ( $call_type == PAREN_CALL ) { |
9206
|
2
|
|
|
|
|
8
|
$tok = $last_nonblank_token; |
9207
|
|
|
|
|
|
|
} |
9208
|
|
|
|
|
|
|
|
9209
|
45
|
|
100
|
|
|
161
|
$match ||= 1; |
9210
|
|
|
|
|
|
|
|
9211
|
|
|
|
|
|
|
# Patch part #1 to fixes cases b994 and b1053: |
9212
|
|
|
|
|
|
|
# Mark an anonymous sub keyword without prototype as type 'k', i.e. |
9213
|
|
|
|
|
|
|
# 'sub : lvalue { ...' |
9214
|
45
|
|
|
|
|
95
|
$type = 'i'; |
9215
|
45
|
100
|
100
|
|
|
202
|
if ( $tok eq 'sub' && !$proto ) { $type = 'k' } |
|
2
|
|
|
|
|
5
|
|
9216
|
|
|
|
|
|
|
} |
9217
|
|
|
|
|
|
|
|
9218
|
300
|
100
|
|
|
|
902
|
if ($match) { |
9219
|
|
|
|
|
|
|
|
9220
|
|
|
|
|
|
|
# ATTRS: if there are attributes, back up and let the ':' be |
9221
|
|
|
|
|
|
|
# found later by the scanner. |
9222
|
136
|
|
|
|
|
309
|
my $pos = pos($input_line); |
9223
|
136
|
100
|
|
|
|
406
|
if ($attrs) { |
9224
|
15
|
|
|
|
|
33
|
$pos -= length($attrs); |
9225
|
|
|
|
|
|
|
} |
9226
|
|
|
|
|
|
|
|
9227
|
136
|
|
|
|
|
312
|
my $next_nonblank_token = $tok; |
9228
|
|
|
|
|
|
|
|
9229
|
|
|
|
|
|
|
# catch case of line with leading ATTR ':' after anonymous sub |
9230
|
136
|
100
|
100
|
|
|
557
|
if ( $pos == $pos_beg && $tok eq ':' ) { |
9231
|
1
|
|
|
|
|
2
|
$type = 'A'; |
9232
|
1
|
|
|
|
|
5
|
$self->[_in_attribute_list_] = 1; |
9233
|
|
|
|
|
|
|
} |
9234
|
|
|
|
|
|
|
|
9235
|
|
|
|
|
|
|
# Otherwise, if we found a match we must convert back from |
9236
|
|
|
|
|
|
|
# string position to the pre_token index for continued parsing. |
9237
|
|
|
|
|
|
|
else { |
9238
|
|
|
|
|
|
|
|
9239
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but ? |
9240
|
135
|
|
|
|
|
247
|
my $error; |
9241
|
135
|
|
|
|
|
544
|
( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, |
9242
|
|
|
|
|
|
|
$max_token_index ); |
9243
|
135
|
50
|
|
|
|
448
|
if ($error) { $self->warning("Possibly invalid sub\n") } |
|
0
|
|
|
|
|
0
|
|
9244
|
|
|
|
|
|
|
|
9245
|
|
|
|
|
|
|
# Patch part #2 to fixes cases b994 and b1053: |
9246
|
|
|
|
|
|
|
# Do not let spaces be part of the token of an anonymous sub |
9247
|
|
|
|
|
|
|
# keyword which we marked as type 'k' above...i.e. for |
9248
|
|
|
|
|
|
|
# something like: |
9249
|
|
|
|
|
|
|
# 'sub : lvalue { ...' |
9250
|
|
|
|
|
|
|
# Back up and let it be parsed as a blank |
9251
|
135
|
50
|
66
|
|
|
621
|
if ( $type eq 'k' |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
9252
|
|
|
|
|
|
|
&& $attrs |
9253
|
|
|
|
|
|
|
&& $i > $i_entry |
9254
|
|
|
|
|
|
|
&& substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ ) |
9255
|
|
|
|
|
|
|
{ |
9256
|
2
|
|
|
|
|
5
|
$i--; |
9257
|
|
|
|
|
|
|
} |
9258
|
|
|
|
|
|
|
|
9259
|
|
|
|
|
|
|
# check for multiple definitions of a sub |
9260
|
135
|
|
|
|
|
388
|
( $next_nonblank_token, my $i_next ) = |
9261
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, |
9262
|
|
|
|
|
|
|
$max_token_index ); |
9263
|
|
|
|
|
|
|
} |
9264
|
|
|
|
|
|
|
|
9265
|
136
|
100
|
|
|
|
759
|
if ( $next_nonblank_token =~ /^(\s*|#)$/ ) |
9266
|
|
|
|
|
|
|
{ # skip blank or side comment |
9267
|
7
|
|
|
|
|
49
|
my ( $rpre_tokens, $rpre_types ) = |
9268
|
|
|
|
|
|
|
$self->peek_ahead_for_n_nonblank_pre_tokens(1); |
9269
|
7
|
50
|
33
|
|
|
39
|
if ( defined($rpre_tokens) && @{$rpre_tokens} ) { |
|
7
|
|
|
|
|
31
|
|
9270
|
7
|
|
|
|
|
26
|
$next_nonblank_token = $rpre_tokens->[0]; |
9271
|
|
|
|
|
|
|
} |
9272
|
|
|
|
|
|
|
else { |
9273
|
0
|
|
|
|
|
0
|
$next_nonblank_token = '}'; |
9274
|
|
|
|
|
|
|
} |
9275
|
|
|
|
|
|
|
} |
9276
|
|
|
|
|
|
|
|
9277
|
|
|
|
|
|
|
# See what's next... |
9278
|
136
|
100
|
|
|
|
570
|
if ( $next_nonblank_token eq '{' ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9279
|
104
|
100
|
|
|
|
364
|
if ($subname) { |
9280
|
|
|
|
|
|
|
|
9281
|
|
|
|
|
|
|
# Check for multiple definitions of a sub, but |
9282
|
|
|
|
|
|
|
# it is ok to have multiple sub BEGIN, etc, |
9283
|
|
|
|
|
|
|
# so we do not complain if name is all caps |
9284
|
94
|
50
|
33
|
|
|
616
|
if ( $rsaw_function_definition->{$subname}{$package} |
9285
|
|
|
|
|
|
|
&& $subname !~ /^[A-Z]+$/ ) |
9286
|
|
|
|
|
|
|
{ |
9287
|
|
|
|
|
|
|
my $lno = |
9288
|
0
|
|
|
|
|
0
|
$rsaw_function_definition->{$subname}{$package}; |
9289
|
0
|
0
|
|
|
|
0
|
if ( $package =~ /^\d/ ) { |
9290
|
0
|
|
|
|
|
0
|
$self->warning( |
9291
|
|
|
|
|
|
|
"already saw definition of lexical 'sub $subname' at line $lno\n" |
9292
|
|
|
|
|
|
|
); |
9293
|
|
|
|
|
|
|
|
9294
|
|
|
|
|
|
|
} |
9295
|
|
|
|
|
|
|
else { |
9296
|
0
|
|
|
|
|
0
|
$self->warning( |
9297
|
|
|
|
|
|
|
"already saw definition of 'sub $subname' in package '$package' at line $lno\n" |
9298
|
|
|
|
|
|
|
) unless (DEVEL_MODE); |
9299
|
|
|
|
|
|
|
} |
9300
|
|
|
|
|
|
|
} |
9301
|
94
|
|
|
|
|
368
|
$rsaw_function_definition->{$subname}{$package} = |
9302
|
|
|
|
|
|
|
$self->[_last_line_number_]; |
9303
|
|
|
|
|
|
|
} |
9304
|
|
|
|
|
|
|
} |
9305
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq ';' ) { |
9306
|
|
|
|
|
|
|
} |
9307
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq '}' ) { |
9308
|
|
|
|
|
|
|
} |
9309
|
|
|
|
|
|
|
|
9310
|
|
|
|
|
|
|
# ATTRS - if an attribute list follows, remember the name |
9311
|
|
|
|
|
|
|
# of the sub so the next opening brace can be labeled. |
9312
|
|
|
|
|
|
|
# Setting 'statement_type' causes any ':'s to introduce |
9313
|
|
|
|
|
|
|
# attributes. |
9314
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq ':' ) { |
9315
|
16
|
100
|
|
|
|
62
|
if ( $call_type == SUB_CALL ) { |
9316
|
14
|
100
|
|
|
|
53
|
$statement_type = |
9317
|
|
|
|
|
|
|
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub'; |
9318
|
|
|
|
|
|
|
} |
9319
|
|
|
|
|
|
|
} |
9320
|
|
|
|
|
|
|
|
9321
|
|
|
|
|
|
|
# if we stopped before an open paren ... |
9322
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq '(' ) { |
9323
|
|
|
|
|
|
|
|
9324
|
|
|
|
|
|
|
# If we DID NOT see this paren above then it must be on the |
9325
|
|
|
|
|
|
|
# next line so we will set a flag to come back here and see if |
9326
|
|
|
|
|
|
|
# it is a PROTOTYPE |
9327
|
|
|
|
|
|
|
|
9328
|
|
|
|
|
|
|
# Otherwise, we assume it is a SIGNATURE rather than a |
9329
|
|
|
|
|
|
|
# PROTOTYPE and let the normal tokenizer handle it as a list |
9330
|
15
|
100
|
|
|
|
49
|
if ( !$saw_opening_paren ) { |
9331
|
4
|
|
|
|
|
11
|
$id_scan_state = 'sub'; # we must come back to get proto |
9332
|
|
|
|
|
|
|
} |
9333
|
15
|
50
|
|
|
|
73
|
if ( $call_type == SUB_CALL ) { |
9334
|
15
|
50
|
|
|
|
71
|
$statement_type = |
9335
|
|
|
|
|
|
|
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub'; |
9336
|
|
|
|
|
|
|
} |
9337
|
|
|
|
|
|
|
} |
9338
|
|
|
|
|
|
|
elsif ($next_nonblank_token) { # EOF technically ok |
9339
|
|
|
|
|
|
|
|
9340
|
0
|
0
|
0
|
|
|
0
|
if ( $rinput_hash->{tok} eq 'method' && $call_type == SUB_CALL ) |
9341
|
|
|
|
|
|
|
{ |
9342
|
|
|
|
|
|
|
# For a method call, silently ignore this error (rt145706) |
9343
|
|
|
|
|
|
|
# to avoid needless warnings. Example which can produce it: |
9344
|
|
|
|
|
|
|
# test(method Pack (), "method"); |
9345
|
|
|
|
|
|
|
|
9346
|
|
|
|
|
|
|
# TODO: scan for use feature 'class' and: |
9347
|
|
|
|
|
|
|
# - if we saw 'use feature 'class' then issue the warning. |
9348
|
|
|
|
|
|
|
# - if we did not see use feature 'class' then issue the |
9349
|
|
|
|
|
|
|
# warning and suggest turning off --use-feature=class |
9350
|
|
|
|
|
|
|
} |
9351
|
|
|
|
|
|
|
else { |
9352
|
0
|
0
|
|
|
|
0
|
$subname = EMPTY_STRING unless defined($subname); |
9353
|
0
|
|
|
|
|
0
|
$self->warning( |
9354
|
|
|
|
|
|
|
"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" |
9355
|
|
|
|
|
|
|
); |
9356
|
|
|
|
|
|
|
} |
9357
|
|
|
|
|
|
|
} |
9358
|
136
|
|
|
|
|
678
|
check_prototype( $proto, $package, $subname ); |
9359
|
|
|
|
|
|
|
} |
9360
|
|
|
|
|
|
|
|
9361
|
|
|
|
|
|
|
# no match to either sub name or prototype, but line not blank |
9362
|
|
|
|
|
|
|
else { |
9363
|
|
|
|
|
|
|
|
9364
|
|
|
|
|
|
|
} |
9365
|
300
|
|
|
|
|
1585
|
return ( $i, $tok, $type, $id_scan_state ); |
9366
|
|
|
|
|
|
|
} ## end sub do_scan_sub |
9367
|
|
|
|
|
|
|
} |
9368
|
|
|
|
|
|
|
|
9369
|
|
|
|
|
|
|
######################################################################### |
9370
|
|
|
|
|
|
|
# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS |
9371
|
|
|
|
|
|
|
######################################################################### |
9372
|
|
|
|
|
|
|
|
9373
|
|
|
|
|
|
|
sub find_next_nonblank_token { |
9374
|
6154
|
|
|
6154
|
0
|
12052
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9375
|
|
|
|
|
|
|
|
9376
|
|
|
|
|
|
|
# Returns the next nonblank token after the token at index $i |
9377
|
|
|
|
|
|
|
# To skip past a side comment, and any subsequent block comments |
9378
|
|
|
|
|
|
|
# and blank lines, call with i=$max_token_index |
9379
|
|
|
|
|
|
|
|
9380
|
6154
|
100
|
|
|
|
13259
|
if ( $i >= $max_token_index ) { |
9381
|
117
|
100
|
|
|
|
643
|
if ( !peeked_ahead() ) { |
9382
|
115
|
|
|
|
|
382
|
peeked_ahead(1); |
9383
|
115
|
|
|
|
|
600
|
$self->peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); |
9384
|
|
|
|
|
|
|
} |
9385
|
|
|
|
|
|
|
} |
9386
|
|
|
|
|
|
|
|
9387
|
6154
|
|
|
|
|
10894
|
my $next_nonblank_token = $rtokens->[ ++$i ]; |
9388
|
6154
|
50
|
33
|
|
|
22890
|
return ( SPACE, $i ) |
9389
|
|
|
|
|
|
|
unless ( defined($next_nonblank_token) && length($next_nonblank_token) ); |
9390
|
|
|
|
|
|
|
|
9391
|
|
|
|
|
|
|
# Quick test for nonblank ascii char. Note that we just have to |
9392
|
|
|
|
|
|
|
# examine the first character here. |
9393
|
6154
|
|
|
|
|
12510
|
my $ord = ord( substr( $next_nonblank_token, 0, 1 ) ); |
9394
|
6154
|
100
|
66
|
|
|
23682
|
if ( $ord >= ORD_PRINTABLE_MIN |
|
|
50
|
33
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9395
|
|
|
|
|
|
|
&& $ord <= ORD_PRINTABLE_MAX ) |
9396
|
|
|
|
|
|
|
{ |
9397
|
2353
|
|
|
|
|
7058
|
return ( $next_nonblank_token, $i ); |
9398
|
|
|
|
|
|
|
} |
9399
|
|
|
|
|
|
|
|
9400
|
|
|
|
|
|
|
# Quick test to skip over an ascii space or tab |
9401
|
|
|
|
|
|
|
elsif ( $ord == ORD_SPACE || $ord == ORD_TAB ) { |
9402
|
3801
|
|
|
|
|
6783
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9403
|
3801
|
50
|
|
|
|
8444
|
return ( SPACE, $i ) unless defined($next_nonblank_token); |
9404
|
|
|
|
|
|
|
} |
9405
|
|
|
|
|
|
|
|
9406
|
|
|
|
|
|
|
# Slow test to skip over something else identified as whitespace |
9407
|
|
|
|
|
|
|
elsif ( $next_nonblank_token =~ /^\s*$/ ) { |
9408
|
0
|
|
|
|
|
0
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9409
|
0
|
0
|
|
|
|
0
|
return ( SPACE, $i ) unless defined($next_nonblank_token); |
9410
|
|
|
|
|
|
|
} |
9411
|
|
|
|
|
|
|
|
9412
|
|
|
|
|
|
|
# We should be at a nonblank now |
9413
|
3801
|
|
|
|
|
10821
|
return ( $next_nonblank_token, $i ); |
9414
|
|
|
|
|
|
|
} ## end sub find_next_nonblank_token |
9415
|
|
|
|
|
|
|
|
9416
|
|
|
|
|
|
|
sub find_next_noncomment_type { |
9417
|
98
|
|
|
98
|
0
|
358
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9418
|
|
|
|
|
|
|
|
9419
|
|
|
|
|
|
|
# Given the current character position, look ahead past any comments |
9420
|
|
|
|
|
|
|
# and blank lines and return the next token, including digraphs and |
9421
|
|
|
|
|
|
|
# trigraphs. |
9422
|
|
|
|
|
|
|
|
9423
|
98
|
|
|
|
|
515
|
my ( $next_nonblank_token, $i_next ) = |
9424
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
9425
|
|
|
|
|
|
|
|
9426
|
|
|
|
|
|
|
# skip past any side comment |
9427
|
98
|
50
|
|
|
|
551
|
if ( $next_nonblank_token eq '#' ) { |
9428
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
9429
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i_next, $rtokens, |
9430
|
|
|
|
|
|
|
$max_token_index ); |
9431
|
|
|
|
|
|
|
} |
9432
|
|
|
|
|
|
|
|
9433
|
|
|
|
|
|
|
# check for a digraph |
9434
|
98
|
50
|
33
|
|
|
927
|
if ( $next_nonblank_token |
|
|
|
33
|
|
|
|
|
9435
|
|
|
|
|
|
|
&& $next_nonblank_token ne SPACE |
9436
|
|
|
|
|
|
|
&& defined( $rtokens->[ $i_next + 1 ] ) ) |
9437
|
|
|
|
|
|
|
{ |
9438
|
98
|
|
|
|
|
305
|
my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ]; |
9439
|
98
|
100
|
|
|
|
408
|
if ( $is_digraph{$test2} ) { |
9440
|
15
|
|
|
|
|
48
|
$next_nonblank_token = $test2; |
9441
|
15
|
|
|
|
|
67
|
$i_next = $i_next + 1; |
9442
|
|
|
|
|
|
|
|
9443
|
|
|
|
|
|
|
# check for a trigraph |
9444
|
15
|
50
|
|
|
|
67
|
if ( defined( $rtokens->[ $i_next + 1 ] ) ) { |
9445
|
15
|
|
|
|
|
54
|
my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ]; |
9446
|
15
|
50
|
|
|
|
83
|
if ( $is_trigraph{$test3} ) { |
9447
|
0
|
|
|
|
|
0
|
$next_nonblank_token = $test3; |
9448
|
0
|
|
|
|
|
0
|
$i_next = $i_next + 1; |
9449
|
|
|
|
|
|
|
} |
9450
|
|
|
|
|
|
|
} |
9451
|
|
|
|
|
|
|
} |
9452
|
|
|
|
|
|
|
} |
9453
|
|
|
|
|
|
|
|
9454
|
98
|
|
|
|
|
330
|
return ( $next_nonblank_token, $i_next ); |
9455
|
|
|
|
|
|
|
} ## end sub find_next_noncomment_type |
9456
|
|
|
|
|
|
|
|
9457
|
|
|
|
|
|
|
sub is_possible_numerator { |
9458
|
|
|
|
|
|
|
|
9459
|
|
|
|
|
|
|
# Look at the next non-comment character and decide if it could be a |
9460
|
|
|
|
|
|
|
# numerator. Return |
9461
|
|
|
|
|
|
|
# 1 - yes |
9462
|
|
|
|
|
|
|
# 0 - can't tell |
9463
|
|
|
|
|
|
|
# -1 - no |
9464
|
|
|
|
|
|
|
|
9465
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9466
|
0
|
|
|
|
|
0
|
my $is_possible_numerator = 0; |
9467
|
|
|
|
|
|
|
|
9468
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[ $i + 1 ]; |
9469
|
0
|
0
|
|
|
|
0
|
if ( $next_token eq '=' ) { $i++; } # handle /= |
|
0
|
|
|
|
|
0
|
|
9470
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
9471
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
9472
|
|
|
|
|
|
|
|
9473
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '#' ) { |
9474
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
9475
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, $rtokens, |
9476
|
|
|
|
|
|
|
$max_token_index ); |
9477
|
|
|
|
|
|
|
} |
9478
|
|
|
|
|
|
|
|
9479
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { |
|
|
0
|
|
|
|
|
|
9480
|
0
|
|
|
|
|
0
|
$is_possible_numerator = 1; |
9481
|
|
|
|
|
|
|
} |
9482
|
|
|
|
|
|
|
elsif ( $next_nonblank_token =~ /^\s*$/ ) { |
9483
|
0
|
|
|
|
|
0
|
$is_possible_numerator = 0; |
9484
|
|
|
|
|
|
|
} |
9485
|
|
|
|
|
|
|
else { |
9486
|
0
|
|
|
|
|
0
|
$is_possible_numerator = -1; |
9487
|
|
|
|
|
|
|
} |
9488
|
|
|
|
|
|
|
|
9489
|
0
|
|
|
|
|
0
|
return $is_possible_numerator; |
9490
|
|
|
|
|
|
|
} ## end sub is_possible_numerator |
9491
|
|
|
|
|
|
|
|
9492
|
|
|
|
|
|
|
{ ## closure for sub pattern_expected |
9493
|
|
|
|
|
|
|
my %pattern_test; |
9494
|
|
|
|
|
|
|
|
9495
|
|
|
|
|
|
|
BEGIN { |
9496
|
|
|
|
|
|
|
|
9497
|
|
|
|
|
|
|
# List of tokens which may follow a pattern. Note that we will not |
9498
|
|
|
|
|
|
|
# have formed digraphs at this point, so we will see '&' instead of |
9499
|
|
|
|
|
|
|
# '&&' and '|' instead of '||' |
9500
|
|
|
|
|
|
|
|
9501
|
|
|
|
|
|
|
# /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ |
9502
|
38
|
|
|
38
|
|
303
|
my @q = qw( & && | || ? : + - * and or while if unless); |
9503
|
38
|
|
|
|
|
155
|
push @q, ')', '}', ']', '>', ',', ';'; |
9504
|
38
|
|
|
|
|
175624
|
@{pattern_test}{@q} = (1) x scalar(@q); |
9505
|
|
|
|
|
|
|
} ## end BEGIN |
9506
|
|
|
|
|
|
|
|
9507
|
|
|
|
|
|
|
sub pattern_expected { |
9508
|
|
|
|
|
|
|
|
9509
|
|
|
|
|
|
|
# This a filter for a possible pattern. |
9510
|
|
|
|
|
|
|
# It looks at the token after a possible pattern and tries to |
9511
|
|
|
|
|
|
|
# determine if that token could end a pattern. |
9512
|
|
|
|
|
|
|
# returns - |
9513
|
|
|
|
|
|
|
# 1 - yes |
9514
|
|
|
|
|
|
|
# 0 - can't tell |
9515
|
|
|
|
|
|
|
# -1 - no |
9516
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9517
|
0
|
|
|
|
|
0
|
my $is_pattern = 0; |
9518
|
|
|
|
|
|
|
|
9519
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[ $i + 1 ]; |
9520
|
0
|
0
|
|
|
|
0
|
if ( $next_token =~ /^[msixpodualgc]/ ) { |
9521
|
0
|
|
|
|
|
0
|
$i++; |
9522
|
|
|
|
|
|
|
} # skip possible modifier |
9523
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
9524
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
9525
|
|
|
|
|
|
|
|
9526
|
0
|
0
|
|
|
|
0
|
if ( $pattern_test{$next_nonblank_token} ) { |
9527
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
9528
|
|
|
|
|
|
|
} |
9529
|
|
|
|
|
|
|
else { |
9530
|
|
|
|
|
|
|
|
9531
|
|
|
|
|
|
|
# Added '#' to fix issue c044 |
9532
|
0
|
0
|
0
|
|
|
0
|
if ( $next_nonblank_token =~ /^\s*$/ |
9533
|
|
|
|
|
|
|
|| $next_nonblank_token eq '#' ) |
9534
|
|
|
|
|
|
|
{ |
9535
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
9536
|
|
|
|
|
|
|
} |
9537
|
|
|
|
|
|
|
else { |
9538
|
0
|
|
|
|
|
0
|
$is_pattern = -1; |
9539
|
|
|
|
|
|
|
} |
9540
|
|
|
|
|
|
|
} |
9541
|
0
|
|
|
|
|
0
|
return $is_pattern; |
9542
|
|
|
|
|
|
|
} ## end sub pattern_expected |
9543
|
|
|
|
|
|
|
} |
9544
|
|
|
|
|
|
|
|
9545
|
|
|
|
|
|
|
sub find_next_nonblank_token_on_this_line { |
9546
|
453
|
|
|
453
|
0
|
1049
|
my ( $i, $rtokens, $max_token_index ) = @_; |
9547
|
453
|
|
|
|
|
754
|
my $next_nonblank_token; |
9548
|
|
|
|
|
|
|
|
9549
|
453
|
100
|
|
|
|
1111
|
if ( $i < $max_token_index ) { |
9550
|
445
|
|
|
|
|
953
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9551
|
|
|
|
|
|
|
|
9552
|
445
|
100
|
|
|
|
1913
|
if ( $next_nonblank_token =~ /^\s*$/ ) { |
9553
|
|
|
|
|
|
|
|
9554
|
120
|
100
|
|
|
|
480
|
if ( $i < $max_token_index ) { |
9555
|
118
|
|
|
|
|
369
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9556
|
|
|
|
|
|
|
} |
9557
|
|
|
|
|
|
|
} |
9558
|
|
|
|
|
|
|
} |
9559
|
|
|
|
|
|
|
else { |
9560
|
8
|
|
|
|
|
26
|
$next_nonblank_token = EMPTY_STRING; |
9561
|
|
|
|
|
|
|
} |
9562
|
453
|
|
|
|
|
1435
|
return ( $next_nonblank_token, $i ); |
9563
|
|
|
|
|
|
|
} ## end sub find_next_nonblank_token_on_this_line |
9564
|
|
|
|
|
|
|
|
9565
|
|
|
|
|
|
|
sub find_angle_operator_termination { |
9566
|
|
|
|
|
|
|
|
9567
|
|
|
|
|
|
|
# We are looking at a '<' and want to know if it is an angle operator. |
9568
|
|
|
|
|
|
|
# We are to return: |
9569
|
|
|
|
|
|
|
# $i = pretoken index of ending '>' if found, current $i otherwise |
9570
|
|
|
|
|
|
|
# $type = 'Q' if found, '>' otherwise |
9571
|
8
|
|
|
8
|
0
|
42
|
my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) |
9572
|
|
|
|
|
|
|
= @_; |
9573
|
8
|
|
|
|
|
17
|
my $i = $i_beg; |
9574
|
8
|
|
|
|
|
21
|
my $type = '<'; |
9575
|
8
|
|
|
|
|
33
|
pos($input_line) = 1 + $rtoken_map->[$i]; |
9576
|
|
|
|
|
|
|
|
9577
|
8
|
|
|
|
|
19
|
my $filter; |
9578
|
|
|
|
|
|
|
|
9579
|
|
|
|
|
|
|
# we just have to find the next '>' if a term is expected |
9580
|
8
|
100
|
|
|
|
48
|
if ( $expecting == TERM ) { $filter = '[\>]' } |
|
6
|
50
|
|
|
|
19
|
|
9581
|
|
|
|
|
|
|
|
9582
|
|
|
|
|
|
|
# we have to guess if we don't know what is expected |
9583
|
2
|
|
|
|
|
4
|
elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } |
9584
|
|
|
|
|
|
|
|
9585
|
|
|
|
|
|
|
# shouldn't happen - we shouldn't be here if operator is expected |
9586
|
|
|
|
|
|
|
else { |
9587
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
9588
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
9589
|
|
|
|
|
|
|
Bad call to find_angle_operator_termination |
9590
|
|
|
|
|
|
|
EOM |
9591
|
|
|
|
|
|
|
} |
9592
|
0
|
|
|
|
|
0
|
return ( $i, $type ); |
9593
|
|
|
|
|
|
|
} |
9594
|
|
|
|
|
|
|
|
9595
|
|
|
|
|
|
|
# To illustrate what we might be looking at, in case we are |
9596
|
|
|
|
|
|
|
# guessing, here are some examples of valid angle operators |
9597
|
|
|
|
|
|
|
# (or file globs): |
9598
|
|
|
|
|
|
|
# <tmp_imp/*> |
9599
|
|
|
|
|
|
|
# <FH> |
9600
|
|
|
|
|
|
|
# <$fh> |
9601
|
|
|
|
|
|
|
# <*.c *.h> |
9602
|
|
|
|
|
|
|
# <_> |
9603
|
|
|
|
|
|
|
# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) |
9604
|
|
|
|
|
|
|
# <${PREFIX}*img*.$IMAGE_TYPE> |
9605
|
|
|
|
|
|
|
# <img*.$IMAGE_TYPE> |
9606
|
|
|
|
|
|
|
# <Timg*.$IMAGE_TYPE> |
9607
|
|
|
|
|
|
|
# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> |
9608
|
|
|
|
|
|
|
# |
9609
|
|
|
|
|
|
|
# Here are some examples of lines which do not have angle operators: |
9610
|
|
|
|
|
|
|
# return unless $self->[2]++ < $#{$self->[1]}; |
9611
|
|
|
|
|
|
|
# < 2 || @$t > |
9612
|
|
|
|
|
|
|
# |
9613
|
|
|
|
|
|
|
# the following line from dlister.pl caused trouble: |
9614
|
|
|
|
|
|
|
# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; |
9615
|
|
|
|
|
|
|
# |
9616
|
|
|
|
|
|
|
# If the '<' starts an angle operator, it must end on this line and |
9617
|
|
|
|
|
|
|
# it must not have certain characters like ';' and '=' in it. I use |
9618
|
|
|
|
|
|
|
# this to limit the testing. This filter should be improved if |
9619
|
|
|
|
|
|
|
# possible. |
9620
|
|
|
|
|
|
|
|
9621
|
8
|
50
|
|
|
|
171
|
if ( $input_line =~ /($filter)/g ) { |
9622
|
|
|
|
|
|
|
|
9623
|
8
|
50
|
|
|
|
48
|
if ( $1 eq '>' ) { |
9624
|
|
|
|
|
|
|
|
9625
|
|
|
|
|
|
|
# We MAY have found an angle operator termination if we get |
9626
|
|
|
|
|
|
|
# here, but we need to do more to be sure we haven't been |
9627
|
|
|
|
|
|
|
# fooled. |
9628
|
8
|
|
|
|
|
34
|
my $pos = pos($input_line); |
9629
|
|
|
|
|
|
|
|
9630
|
8
|
|
|
|
|
20
|
my $pos_beg = $rtoken_map->[$i]; |
9631
|
8
|
|
|
|
|
28
|
my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); |
9632
|
|
|
|
|
|
|
|
9633
|
|
|
|
|
|
|
# Test for '<' after possible filehandle, issue c103 |
9634
|
|
|
|
|
|
|
# print $fh <>; # syntax error |
9635
|
|
|
|
|
|
|
# print $fh <DATA>; # ok |
9636
|
|
|
|
|
|
|
# print $fh < DATA>; # syntax error at '>' |
9637
|
|
|
|
|
|
|
# print STDERR < DATA>; # ok, prints word 'DATA' |
9638
|
|
|
|
|
|
|
# print BLABLA <DATA>; # ok; does nothing unless BLABLA is defined |
9639
|
8
|
100
|
|
|
|
32
|
if ( $last_nonblank_type eq 'Z' ) { |
9640
|
|
|
|
|
|
|
|
9641
|
|
|
|
|
|
|
# $str includes brackets; something like '<DATA>' |
9642
|
1
|
0
|
33
|
|
|
22
|
if ( substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/ |
9643
|
|
|
|
|
|
|
&& substr( $str, 1, 1 ) !~ /[A-Za-z_]/ ) |
9644
|
|
|
|
|
|
|
{ |
9645
|
0
|
|
|
|
|
0
|
return ( $i, $type ); |
9646
|
|
|
|
|
|
|
} |
9647
|
|
|
|
|
|
|
} |
9648
|
|
|
|
|
|
|
|
9649
|
|
|
|
|
|
|
# Reject if the closing '>' follows a '-' as in: |
9650
|
|
|
|
|
|
|
# if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } |
9651
|
8
|
100
|
|
|
|
36
|
if ( $expecting eq UNKNOWN ) { |
9652
|
2
|
|
|
|
|
4
|
my $check = substr( $input_line, $pos - 2, 1 ); |
9653
|
2
|
100
|
|
|
|
12
|
if ( $check eq '-' ) { |
9654
|
1
|
|
|
|
|
5
|
return ( $i, $type ); |
9655
|
|
|
|
|
|
|
} |
9656
|
|
|
|
|
|
|
} |
9657
|
|
|
|
|
|
|
|
9658
|
|
|
|
|
|
|
######################################debug##### |
9659
|
|
|
|
|
|
|
#$self->write_diagnostics( "ANGLE? :$str\n"); |
9660
|
|
|
|
|
|
|
#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; |
9661
|
|
|
|
|
|
|
######################################debug##### |
9662
|
7
|
|
|
|
|
16
|
$type = 'Q'; |
9663
|
7
|
|
|
|
|
13
|
my $error; |
9664
|
7
|
|
|
|
|
33
|
( $i, $error ) = |
9665
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
9666
|
|
|
|
|
|
|
|
9667
|
|
|
|
|
|
|
# It may be possible that a quote ends midway in a pretoken. |
9668
|
|
|
|
|
|
|
# If this happens, it may be necessary to split the pretoken. |
9669
|
7
|
50
|
|
|
|
34
|
if ($error) { |
9670
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
9671
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
9672
|
|
|
|
|
|
|
unexpected error condition returned by inverse_pretoken_map |
9673
|
|
|
|
|
|
|
EOM |
9674
|
|
|
|
|
|
|
} |
9675
|
|
|
|
|
|
|
$self->warning( |
9676
|
0
|
|
|
|
|
0
|
"Possible tokinization error..please check this line\n"); |
9677
|
|
|
|
|
|
|
} |
9678
|
|
|
|
|
|
|
|
9679
|
|
|
|
|
|
|
# Check for accidental formatting of a markup language doc... |
9680
|
|
|
|
|
|
|
# Formatting will be skipped if we set _html_tag_count_ and |
9681
|
|
|
|
|
|
|
# also set a warning of any kind. |
9682
|
7
|
|
|
|
|
26
|
my $is_html_tag; |
9683
|
7
|
|
33
|
|
|
33
|
my $is_first_string = |
9684
|
|
|
|
|
|
|
$i_beg == 0 && $self->[_last_line_number_] == 1; |
9685
|
|
|
|
|
|
|
|
9686
|
|
|
|
|
|
|
# html comment '<!...' of any type |
9687
|
7
|
50
|
33
|
|
|
85
|
if ( $str =~ /^<\s*!/ ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9688
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
9689
|
0
|
0
|
|
|
|
0
|
if ($is_first_string) { |
9690
|
0
|
|
|
|
|
0
|
$self->warning( |
9691
|
|
|
|
|
|
|
"looks like a markup language, continuing error checks\n" |
9692
|
|
|
|
|
|
|
); |
9693
|
|
|
|
|
|
|
} |
9694
|
|
|
|
|
|
|
} |
9695
|
|
|
|
|
|
|
|
9696
|
|
|
|
|
|
|
# html end tag, something like </h1> |
9697
|
|
|
|
|
|
|
elsif ( $str =~ /^<\s*\/\w+\s*>$/ ) { |
9698
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
9699
|
|
|
|
|
|
|
} |
9700
|
|
|
|
|
|
|
|
9701
|
|
|
|
|
|
|
# xml prolog? |
9702
|
|
|
|
|
|
|
elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) { |
9703
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
9704
|
0
|
|
|
|
|
0
|
$self->warning( |
9705
|
|
|
|
|
|
|
"looks like a markup language, continuing error checks\n"); |
9706
|
|
|
|
|
|
|
} |
9707
|
|
|
|
|
|
|
|
9708
|
7
|
50
|
|
|
|
32
|
if ($is_html_tag) { |
9709
|
0
|
|
|
|
|
0
|
$self->[_html_tag_count_]++; |
9710
|
|
|
|
|
|
|
} |
9711
|
|
|
|
|
|
|
|
9712
|
|
|
|
|
|
|
# count blanks on inside of brackets |
9713
|
7
|
|
|
|
|
35
|
my $blank_count = 0; |
9714
|
7
|
100
|
|
|
|
47
|
$blank_count++ if ( $str =~ /<\s+/ ); |
9715
|
7
|
100
|
|
|
|
37
|
$blank_count++ if ( $str =~ /\s+>/ ); |
9716
|
|
|
|
|
|
|
|
9717
|
|
|
|
|
|
|
# Now let's see where we stand.... |
9718
|
|
|
|
|
|
|
# OK if math op not possible |
9719
|
7
|
100
|
|
|
|
38
|
if ( $expecting == TERM ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9720
|
|
|
|
|
|
|
} |
9721
|
|
|
|
|
|
|
|
9722
|
|
|
|
|
|
|
elsif ($is_html_tag) { |
9723
|
|
|
|
|
|
|
} |
9724
|
|
|
|
|
|
|
|
9725
|
|
|
|
|
|
|
# OK if there are no more than 2 non-blank pre-tokens inside |
9726
|
|
|
|
|
|
|
# (not possible to write 2 token math between < and >) |
9727
|
|
|
|
|
|
|
# This catches most common cases |
9728
|
|
|
|
|
|
|
elsif ( $i <= $i_beg + 3 + $blank_count ) { |
9729
|
|
|
|
|
|
|
|
9730
|
|
|
|
|
|
|
# No longer any need to document this common case |
9731
|
|
|
|
|
|
|
## $self->write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); |
9732
|
|
|
|
|
|
|
} |
9733
|
|
|
|
|
|
|
|
9734
|
|
|
|
|
|
|
# OK if there is some kind of identifier inside |
9735
|
|
|
|
|
|
|
# print $fh <tvg::INPUT>; |
9736
|
|
|
|
|
|
|
elsif ( $str =~ /^<\s*\$?(\w|::|\s)+\s*>$/ ) { |
9737
|
0
|
|
|
|
|
0
|
$self->write_diagnostics("ANGLE (contains identifier): $str\n"); |
9738
|
|
|
|
|
|
|
} |
9739
|
|
|
|
|
|
|
|
9740
|
|
|
|
|
|
|
# Not sure.. |
9741
|
|
|
|
|
|
|
else { |
9742
|
|
|
|
|
|
|
|
9743
|
|
|
|
|
|
|
# Let's try a Brace Test: any braces inside must balance |
9744
|
0
|
|
|
|
|
0
|
my $br = 0; |
9745
|
0
|
|
|
|
|
0
|
while ( $str =~ /\{/g ) { $br++ } |
|
0
|
|
|
|
|
0
|
|
9746
|
0
|
|
|
|
|
0
|
while ( $str =~ /\}/g ) { $br-- } |
|
0
|
|
|
|
|
0
|
|
9747
|
0
|
|
|
|
|
0
|
my $sb = 0; |
9748
|
0
|
|
|
|
|
0
|
while ( $str =~ /\[/g ) { $sb++ } |
|
0
|
|
|
|
|
0
|
|
9749
|
0
|
|
|
|
|
0
|
while ( $str =~ /\]/g ) { $sb-- } |
|
0
|
|
|
|
|
0
|
|
9750
|
0
|
|
|
|
|
0
|
my $pr = 0; |
9751
|
0
|
|
|
|
|
0
|
while ( $str =~ /\(/g ) { $pr++ } |
|
0
|
|
|
|
|
0
|
|
9752
|
0
|
|
|
|
|
0
|
while ( $str =~ /\)/g ) { $pr-- } |
|
0
|
|
|
|
|
0
|
|
9753
|
|
|
|
|
|
|
|
9754
|
|
|
|
|
|
|
# if braces do not balance - not angle operator |
9755
|
0
|
0
|
0
|
|
|
0
|
if ( $br || $sb || $pr ) { |
|
|
|
0
|
|
|
|
|
9756
|
0
|
|
|
|
|
0
|
$i = $i_beg; |
9757
|
0
|
|
|
|
|
0
|
$type = '<'; |
9758
|
0
|
|
|
|
|
0
|
$self->write_diagnostics( |
9759
|
|
|
|
|
|
|
"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); |
9760
|
|
|
|
|
|
|
} |
9761
|
|
|
|
|
|
|
|
9762
|
|
|
|
|
|
|
# we should keep doing more checks here...to be continued |
9763
|
|
|
|
|
|
|
# Tentatively accepting this as a valid angle operator. |
9764
|
|
|
|
|
|
|
# There are lots more things that can be checked. |
9765
|
|
|
|
|
|
|
else { |
9766
|
0
|
|
|
|
|
0
|
$self->write_diagnostics( |
9767
|
|
|
|
|
|
|
"ANGLE-Guessing yes: $str expecting=$expecting\n"); |
9768
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
9769
|
|
|
|
|
|
|
"Guessing angle operator here: $str\n"); |
9770
|
|
|
|
|
|
|
} |
9771
|
|
|
|
|
|
|
} |
9772
|
|
|
|
|
|
|
} |
9773
|
|
|
|
|
|
|
|
9774
|
|
|
|
|
|
|
# didn't find ending > |
9775
|
|
|
|
|
|
|
else { |
9776
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
9777
|
0
|
|
|
|
|
0
|
$self->warning("No ending > for angle operator\n"); |
9778
|
|
|
|
|
|
|
} |
9779
|
|
|
|
|
|
|
} |
9780
|
|
|
|
|
|
|
} |
9781
|
7
|
|
|
|
|
48
|
return ( $i, $type ); |
9782
|
|
|
|
|
|
|
} ## end sub find_angle_operator_termination |
9783
|
|
|
|
|
|
|
|
9784
|
|
|
|
|
|
|
sub scan_number_do { |
9785
|
|
|
|
|
|
|
|
9786
|
|
|
|
|
|
|
# scan a number in any of the formats that Perl accepts |
9787
|
|
|
|
|
|
|
# Underbars (_) are allowed in decimal numbers. |
9788
|
|
|
|
|
|
|
# input parameters - |
9789
|
|
|
|
|
|
|
# $input_line - the string to scan |
9790
|
|
|
|
|
|
|
# $i - pre_token index to start scanning |
9791
|
|
|
|
|
|
|
# $rtoken_map - reference to the pre_token map giving starting |
9792
|
|
|
|
|
|
|
# character position in $input_line of token $i |
9793
|
|
|
|
|
|
|
# output parameters - |
9794
|
|
|
|
|
|
|
# $i - last pre_token index of the number just scanned |
9795
|
|
|
|
|
|
|
# number - the number (characters); or undef if not a number |
9796
|
|
|
|
|
|
|
|
9797
|
629
|
|
|
629
|
0
|
1620
|
my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = |
9798
|
|
|
|
|
|
|
@_; |
9799
|
629
|
|
|
|
|
1133
|
my $pos_beg = $rtoken_map->[$i]; |
9800
|
629
|
|
|
|
|
919
|
my $pos; |
9801
|
629
|
|
|
|
|
987
|
my $i_begin = $i; |
9802
|
629
|
|
|
|
|
1001
|
my $number = undef; |
9803
|
629
|
|
|
|
|
1338
|
my $type = $input_type; |
9804
|
|
|
|
|
|
|
|
9805
|
629
|
|
|
|
|
1382
|
my $first_char = substr( $input_line, $pos_beg, 1 ); |
9806
|
|
|
|
|
|
|
|
9807
|
|
|
|
|
|
|
# Look for bad starting characters; Shouldn't happen.. |
9808
|
629
|
50
|
|
|
|
2814
|
if ( $first_char !~ /[\d\.\+\-Ee]/ ) { |
9809
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
9810
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
9811
|
|
|
|
|
|
|
Program bug - scan_number given bad first character = '$first_char' |
9812
|
|
|
|
|
|
|
EOM |
9813
|
|
|
|
|
|
|
} |
9814
|
0
|
|
|
|
|
0
|
return ( $i, $type, $number ); |
9815
|
|
|
|
|
|
|
} |
9816
|
|
|
|
|
|
|
|
9817
|
|
|
|
|
|
|
# handle v-string without leading 'v' character ('Two Dot' rule) |
9818
|
|
|
|
|
|
|
# (vstring.t) |
9819
|
|
|
|
|
|
|
# Here is the format prior to including underscores: |
9820
|
|
|
|
|
|
|
## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { |
9821
|
629
|
|
|
|
|
1866
|
pos($input_line) = $pos_beg; |
9822
|
629
|
50
|
|
|
|
3017
|
if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) { |
9823
|
0
|
|
|
|
|
0
|
$pos = pos($input_line); |
9824
|
0
|
|
|
|
|
0
|
my $numc = $pos - $pos_beg; |
9825
|
0
|
|
|
|
|
0
|
$number = substr( $input_line, $pos_beg, $numc ); |
9826
|
0
|
|
|
|
|
0
|
$type = 'v'; |
9827
|
0
|
|
|
|
|
0
|
$self->report_v_string($number); |
9828
|
|
|
|
|
|
|
} |
9829
|
|
|
|
|
|
|
|
9830
|
|
|
|
|
|
|
# handle octal, hex, binary |
9831
|
629
|
50
|
|
|
|
1631
|
if ( !defined($number) ) { |
9832
|
629
|
|
|
|
|
1380
|
pos($input_line) = $pos_beg; |
9833
|
|
|
|
|
|
|
|
9834
|
|
|
|
|
|
|
# Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0' |
9835
|
|
|
|
|
|
|
# For reference, the format prior to hex floating point is: |
9836
|
|
|
|
|
|
|
# /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) |
9837
|
|
|
|
|
|
|
# (hex) (octal) (binary) |
9838
|
629
|
100
|
|
|
|
2467
|
if ( |
9839
|
|
|
|
|
|
|
$input_line =~ |
9840
|
|
|
|
|
|
|
|
9841
|
|
|
|
|
|
|
/\G[+-]?0( # leading [signed] 0 |
9842
|
|
|
|
|
|
|
|
9843
|
|
|
|
|
|
|
# a hex float, i.e. '0x0.b17217f7d1cf78p0' |
9844
|
|
|
|
|
|
|
([xX][0-9a-fA-F_]* # X and optional leading digits |
9845
|
|
|
|
|
|
|
(\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction |
9846
|
|
|
|
|
|
|
[Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit |
9847
|
|
|
|
|
|
|
[0-9a-fA-F_]*) # optional Additional exponent digits |
9848
|
|
|
|
|
|
|
|
9849
|
|
|
|
|
|
|
# or hex integer |
9850
|
|
|
|
|
|
|
|([xX][0-9a-fA-F_]+) |
9851
|
|
|
|
|
|
|
|
9852
|
|
|
|
|
|
|
# or octal fraction |
9853
|
|
|
|
|
|
|
|([oO]?[0-7_]+ # string of octal digits |
9854
|
|
|
|
|
|
|
(\.([0-7][0-7_]*)?)? # optional decimal and fraction |
9855
|
|
|
|
|
|
|
[Pp][+-]?[0-7] # REQUIRED exponent, no underscore |
9856
|
|
|
|
|
|
|
[0-7_]*) # Additional exponent digits with underscores |
9857
|
|
|
|
|
|
|
|
9858
|
|
|
|
|
|
|
# or octal integer |
9859
|
|
|
|
|
|
|
|([oO]?[0-7_]+) # string of octal digits |
9860
|
|
|
|
|
|
|
|
9861
|
|
|
|
|
|
|
# or a binary float |
9862
|
|
|
|
|
|
|
|([bB][01_]* # 'b' with string of binary digits |
9863
|
|
|
|
|
|
|
(\.([01][01_]*)?)? # optional decimal and fraction |
9864
|
|
|
|
|
|
|
[Pp][+-]?[01] # Required exponent indicator, no underscore |
9865
|
|
|
|
|
|
|
[01_]*) # additional exponent bits |
9866
|
|
|
|
|
|
|
|
9867
|
|
|
|
|
|
|
# or binary integer |
9868
|
|
|
|
|
|
|
|([bB][01_]+) # 'b' with string of binary digits |
9869
|
|
|
|
|
|
|
|
9870
|
|
|
|
|
|
|
)/gx |
9871
|
|
|
|
|
|
|
) |
9872
|
|
|
|
|
|
|
{ |
9873
|
72
|
|
|
|
|
149
|
$pos = pos($input_line); |
9874
|
72
|
|
|
|
|
129
|
my $numc = $pos - $pos_beg; |
9875
|
72
|
|
|
|
|
157
|
$number = substr( $input_line, $pos_beg, $numc ); |
9876
|
72
|
|
|
|
|
132
|
$type = 'n'; |
9877
|
|
|
|
|
|
|
} |
9878
|
|
|
|
|
|
|
} |
9879
|
|
|
|
|
|
|
|
9880
|
|
|
|
|
|
|
# handle decimal |
9881
|
629
|
100
|
|
|
|
1481
|
if ( !defined($number) ) { |
9882
|
557
|
|
|
|
|
1137
|
pos($input_line) = $pos_beg; |
9883
|
|
|
|
|
|
|
|
9884
|
557
|
50
|
|
|
|
2728
|
if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { |
9885
|
557
|
|
|
|
|
1067
|
$pos = pos($input_line); |
9886
|
|
|
|
|
|
|
|
9887
|
|
|
|
|
|
|
# watch out for things like 0..40 which would give 0. by this; |
9888
|
557
|
100
|
100
|
|
|
2067
|
if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) |
9889
|
|
|
|
|
|
|
&& ( substr( $input_line, $pos, 1 ) eq '.' ) ) |
9890
|
|
|
|
|
|
|
{ |
9891
|
37
|
|
|
|
|
58
|
$pos--; |
9892
|
|
|
|
|
|
|
} |
9893
|
557
|
|
|
|
|
929
|
my $numc = $pos - $pos_beg; |
9894
|
557
|
|
|
|
|
1089
|
$number = substr( $input_line, $pos_beg, $numc ); |
9895
|
557
|
|
|
|
|
1052
|
$type = 'n'; |
9896
|
|
|
|
|
|
|
} |
9897
|
|
|
|
|
|
|
} |
9898
|
|
|
|
|
|
|
|
9899
|
|
|
|
|
|
|
# filter out non-numbers like e + - . e2 .e3 +e6 |
9900
|
|
|
|
|
|
|
# the rule: at least one digit, and any 'e' must be preceded by a digit |
9901
|
629
|
100
|
66
|
|
|
3205
|
if ( |
|
|
|
66
|
|
|
|
|
9902
|
|
|
|
|
|
|
$number !~ /\d/ # no digits |
9903
|
|
|
|
|
|
|
|| ( $number =~ /^(.*)[eE]/ |
9904
|
|
|
|
|
|
|
&& $1 !~ /\d/ ) # or no digits before the 'e' |
9905
|
|
|
|
|
|
|
) |
9906
|
|
|
|
|
|
|
{ |
9907
|
303
|
|
|
|
|
479
|
$number = undef; |
9908
|
303
|
|
|
|
|
530
|
$type = $input_type; |
9909
|
303
|
|
|
|
|
1195
|
return ( $i, $type, $number ); |
9910
|
|
|
|
|
|
|
} |
9911
|
|
|
|
|
|
|
|
9912
|
|
|
|
|
|
|
# Found a number; now we must convert back from character position |
9913
|
|
|
|
|
|
|
# to pre_token index. An error here implies user syntax error. |
9914
|
|
|
|
|
|
|
# An example would be an invalid octal number like '009'. |
9915
|
326
|
|
|
|
|
581
|
my $error; |
9916
|
326
|
|
|
|
|
905
|
( $i, $error ) = |
9917
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
9918
|
326
|
50
|
|
|
|
918
|
if ($error) { $self->warning("Possibly invalid number\n") } |
|
0
|
|
|
|
|
0
|
|
9919
|
|
|
|
|
|
|
|
9920
|
326
|
|
|
|
|
1239
|
return ( $i, $type, $number ); |
9921
|
|
|
|
|
|
|
} ## end sub scan_number_do |
9922
|
|
|
|
|
|
|
|
9923
|
|
|
|
|
|
|
sub inverse_pretoken_map { |
9924
|
|
|
|
|
|
|
|
9925
|
|
|
|
|
|
|
# Starting with the current pre_token index $i, scan forward until |
9926
|
|
|
|
|
|
|
# finding the index of the next pre_token whose position is $pos. |
9927
|
2166
|
|
|
2166
|
0
|
5131
|
my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; |
9928
|
2166
|
|
|
|
|
3656
|
my $error = 0; |
9929
|
|
|
|
|
|
|
|
9930
|
2166
|
|
|
|
|
5526
|
while ( ++$i <= $max_token_index ) { |
9931
|
|
|
|
|
|
|
|
9932
|
4032
|
100
|
|
|
|
9339
|
if ( $pos <= $rtoken_map->[$i] ) { |
9933
|
|
|
|
|
|
|
|
9934
|
|
|
|
|
|
|
# Let the calling routine handle errors in which we do not |
9935
|
|
|
|
|
|
|
# land on a pre-token boundary. It can happen by running |
9936
|
|
|
|
|
|
|
# perltidy on some non-perl scripts, for example. |
9937
|
2131
|
50
|
|
|
|
5285
|
if ( $pos < $rtoken_map->[$i] ) { $error = 1 } |
|
0
|
|
|
|
|
0
|
|
9938
|
2131
|
|
|
|
|
3442
|
$i--; |
9939
|
2131
|
|
|
|
|
3690
|
last; |
9940
|
|
|
|
|
|
|
} |
9941
|
|
|
|
|
|
|
} |
9942
|
2166
|
|
|
|
|
5461
|
return ( $i, $error ); |
9943
|
|
|
|
|
|
|
} ## end sub inverse_pretoken_map |
9944
|
|
|
|
|
|
|
|
9945
|
|
|
|
|
|
|
sub find_here_doc { |
9946
|
|
|
|
|
|
|
|
9947
|
|
|
|
|
|
|
# find the target of a here document, if any |
9948
|
|
|
|
|
|
|
# input parameters: |
9949
|
|
|
|
|
|
|
# $i - token index of the second < of << |
9950
|
|
|
|
|
|
|
# ($i must be less than the last token index if this is called) |
9951
|
|
|
|
|
|
|
# output parameters: |
9952
|
|
|
|
|
|
|
# $found_target = 0 didn't find target; =1 found target |
9953
|
|
|
|
|
|
|
# HERE_TARGET - the target string (may be empty string) |
9954
|
|
|
|
|
|
|
# $i - unchanged if not here doc, |
9955
|
|
|
|
|
|
|
# or index of the last token of the here target |
9956
|
|
|
|
|
|
|
# $saw_error - flag noting unbalanced quote on here target |
9957
|
9
|
|
|
9
|
0
|
40
|
my ( $self, $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; |
9958
|
|
|
|
|
|
|
|
9959
|
9
|
|
|
|
|
23
|
my $ibeg = $i; |
9960
|
9
|
|
|
|
|
25
|
my $found_target = 0; |
9961
|
9
|
|
|
|
|
23
|
my $here_doc_target = EMPTY_STRING; |
9962
|
9
|
|
|
|
|
30
|
my $here_quote_character = EMPTY_STRING; |
9963
|
9
|
|
|
|
|
26
|
my $saw_error = 0; |
9964
|
9
|
|
|
|
|
30
|
my ( $next_nonblank_token, $i_next_nonblank, $next_token ); |
9965
|
9
|
|
|
|
|
55
|
$next_token = $rtokens->[ $i + 1 ]; |
9966
|
|
|
|
|
|
|
|
9967
|
|
|
|
|
|
|
# perl allows a backslash before the target string (heredoc.t) |
9968
|
9
|
|
|
|
|
26
|
my $backslash = 0; |
9969
|
9
|
50
|
|
|
|
51
|
if ( $next_token eq '\\' ) { |
9970
|
0
|
|
|
|
|
0
|
$backslash = 1; |
9971
|
0
|
|
|
|
|
0
|
$next_token = $rtokens->[ $i + 2 ]; |
9972
|
|
|
|
|
|
|
} |
9973
|
|
|
|
|
|
|
|
9974
|
9
|
|
|
|
|
55
|
( $next_nonblank_token, $i_next_nonblank ) = |
9975
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); |
9976
|
|
|
|
|
|
|
|
9977
|
9
|
100
|
33
|
|
|
73
|
if ( $next_nonblank_token =~ /[\'\"\`]/ ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9978
|
|
|
|
|
|
|
|
9979
|
6
|
|
|
|
|
21
|
my $in_quote = 1; |
9980
|
6
|
|
|
|
|
15
|
my $quote_depth = 0; |
9981
|
6
|
|
|
|
|
19
|
my $quote_pos = 0; |
9982
|
6
|
|
|
|
|
15
|
my $quoted_string; |
9983
|
|
|
|
|
|
|
|
9984
|
|
|
|
|
|
|
( |
9985
|
6
|
|
|
|
|
41
|
$i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, |
9986
|
|
|
|
|
|
|
$quoted_string |
9987
|
|
|
|
|
|
|
) |
9988
|
|
|
|
|
|
|
= $self->follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, |
9989
|
|
|
|
|
|
|
$here_quote_character, $quote_pos, $quote_depth, $max_token_index ); |
9990
|
|
|
|
|
|
|
|
9991
|
6
|
50
|
|
|
|
44
|
if ($in_quote) { # didn't find end of quote, so no target found |
9992
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
9993
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
9994
|
0
|
|
|
|
|
0
|
$self->warning( |
9995
|
|
|
|
|
|
|
"Did not find here-doc string terminator ($here_quote_character) before end of line \n" |
9996
|
|
|
|
|
|
|
); |
9997
|
0
|
|
|
|
|
0
|
$saw_error = 1; |
9998
|
|
|
|
|
|
|
} |
9999
|
|
|
|
|
|
|
} |
10000
|
|
|
|
|
|
|
else { # found ending quote |
10001
|
6
|
|
|
|
|
19
|
$found_target = 1; |
10002
|
|
|
|
|
|
|
|
10003
|
6
|
|
|
|
|
14
|
my $tokj; |
10004
|
6
|
|
|
|
|
958
|
foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) { |
10005
|
6
|
|
|
|
|
24
|
$tokj = $rtokens->[$j]; |
10006
|
|
|
|
|
|
|
|
10007
|
|
|
|
|
|
|
# we have to remove any backslash before the quote character |
10008
|
|
|
|
|
|
|
# so that the here-doc-target exactly matches this string |
10009
|
|
|
|
|
|
|
next |
10010
|
6
|
0
|
33
|
|
|
39
|
if ( $tokj eq "\\" |
|
|
|
33
|
|
|
|
|
10011
|
|
|
|
|
|
|
&& $j < $i - 1 |
10012
|
|
|
|
|
|
|
&& $rtokens->[ $j + 1 ] eq $here_quote_character ); |
10013
|
6
|
|
|
|
|
27
|
$here_doc_target .= $tokj; |
10014
|
|
|
|
|
|
|
} |
10015
|
|
|
|
|
|
|
} |
10016
|
|
|
|
|
|
|
} |
10017
|
|
|
|
|
|
|
|
10018
|
|
|
|
|
|
|
elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { |
10019
|
0
|
|
|
|
|
0
|
$found_target = 1; |
10020
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
10021
|
|
|
|
|
|
|
"found blank here-target after <<; suggest using \"\"\n"); |
10022
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
10023
|
|
|
|
|
|
|
} |
10024
|
|
|
|
|
|
|
elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << |
10025
|
|
|
|
|
|
|
|
10026
|
3
|
|
|
|
|
12
|
my $here_doc_expected; |
10027
|
3
|
50
|
|
|
|
12
|
if ( $expecting == UNKNOWN ) { |
10028
|
0
|
|
|
|
|
0
|
$here_doc_expected = $self->guess_if_here_doc($next_token); |
10029
|
|
|
|
|
|
|
} |
10030
|
|
|
|
|
|
|
else { |
10031
|
3
|
|
|
|
|
11
|
$here_doc_expected = 1; |
10032
|
|
|
|
|
|
|
} |
10033
|
|
|
|
|
|
|
|
10034
|
3
|
50
|
|
|
|
11
|
if ($here_doc_expected) { |
10035
|
3
|
|
|
|
|
8
|
$found_target = 1; |
10036
|
3
|
|
|
|
|
7
|
$here_doc_target = $next_token; |
10037
|
3
|
|
|
|
|
9
|
$i = $ibeg + 1; |
10038
|
|
|
|
|
|
|
} |
10039
|
|
|
|
|
|
|
|
10040
|
|
|
|
|
|
|
} |
10041
|
|
|
|
|
|
|
else { |
10042
|
|
|
|
|
|
|
|
10043
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
10044
|
0
|
|
|
|
|
0
|
$found_target = 1; |
10045
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("Note: bare here-doc operator <<\n"); |
10046
|
|
|
|
|
|
|
} |
10047
|
|
|
|
|
|
|
else { |
10048
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
10049
|
|
|
|
|
|
|
} |
10050
|
|
|
|
|
|
|
} |
10051
|
|
|
|
|
|
|
|
10052
|
|
|
|
|
|
|
# patch to neglect any prepended backslash |
10053
|
9
|
50
|
33
|
|
|
80
|
if ( $found_target && $backslash ) { $i++ } |
|
0
|
|
|
|
|
0
|
|
10054
|
|
|
|
|
|
|
|
10055
|
9
|
|
|
|
|
54
|
return ( $found_target, $here_doc_target, $here_quote_character, $i, |
10056
|
|
|
|
|
|
|
$saw_error ); |
10057
|
|
|
|
|
|
|
} ## end sub find_here_doc |
10058
|
|
|
|
|
|
|
|
10059
|
|
|
|
|
|
|
sub do_quote { |
10060
|
|
|
|
|
|
|
|
10061
|
|
|
|
|
|
|
# follow (or continue following) quoted string(s) |
10062
|
|
|
|
|
|
|
# $in_quote return code: |
10063
|
|
|
|
|
|
|
# 0 - ok, found end |
10064
|
|
|
|
|
|
|
# 1 - still must find end of quote whose target is $quote_character |
10065
|
|
|
|
|
|
|
# 2 - still looking for end of first of two quotes |
10066
|
|
|
|
|
|
|
# |
10067
|
|
|
|
|
|
|
# Returns updated strings: |
10068
|
|
|
|
|
|
|
# $quoted_string_1 = quoted string seen while in_quote=1 |
10069
|
|
|
|
|
|
|
# $quoted_string_2 = quoted string seen while in_quote=2 |
10070
|
|
|
|
|
|
|
my ( |
10071
|
|
|
|
|
|
|
|
10072
|
2763
|
|
|
2763
|
0
|
8255
|
$self, |
10073
|
|
|
|
|
|
|
$i, |
10074
|
|
|
|
|
|
|
$in_quote, |
10075
|
|
|
|
|
|
|
$quote_character, |
10076
|
|
|
|
|
|
|
$quote_pos, |
10077
|
|
|
|
|
|
|
$quote_depth, |
10078
|
|
|
|
|
|
|
$quoted_string_1, |
10079
|
|
|
|
|
|
|
$quoted_string_2, |
10080
|
|
|
|
|
|
|
$rtokens, |
10081
|
|
|
|
|
|
|
$rtoken_map, |
10082
|
|
|
|
|
|
|
$max_token_index, |
10083
|
|
|
|
|
|
|
|
10084
|
|
|
|
|
|
|
) = @_; |
10085
|
|
|
|
|
|
|
|
10086
|
2763
|
|
|
|
|
4362
|
my $quoted_string; |
10087
|
2763
|
100
|
|
|
|
6388
|
if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow |
10088
|
29
|
|
|
|
|
67
|
my $ibeg = $i; |
10089
|
|
|
|
|
|
|
( |
10090
|
29
|
|
|
|
|
126
|
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, |
10091
|
|
|
|
|
|
|
$quoted_string |
10092
|
|
|
|
|
|
|
) |
10093
|
|
|
|
|
|
|
= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens, |
10094
|
|
|
|
|
|
|
$quote_character, $quote_pos, $quote_depth, $max_token_index ); |
10095
|
29
|
|
|
|
|
88
|
$quoted_string_2 .= $quoted_string; |
10096
|
29
|
50
|
|
|
|
225
|
if ( $in_quote == 1 ) { |
10097
|
29
|
50
|
|
|
|
114
|
if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } |
|
0
|
|
|
|
|
0
|
|
10098
|
29
|
|
|
|
|
76
|
$quote_character = EMPTY_STRING; |
10099
|
|
|
|
|
|
|
} |
10100
|
|
|
|
|
|
|
else { |
10101
|
0
|
|
|
|
|
0
|
$quoted_string_2 .= "\n"; |
10102
|
|
|
|
|
|
|
} |
10103
|
|
|
|
|
|
|
} |
10104
|
|
|
|
|
|
|
|
10105
|
2763
|
50
|
|
|
|
6007
|
if ( $in_quote == 1 ) { # one (more) quote to follow |
10106
|
2763
|
|
|
|
|
4181
|
my $ibeg = $i; |
10107
|
|
|
|
|
|
|
( |
10108
|
2763
|
|
|
|
|
7635
|
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, |
10109
|
|
|
|
|
|
|
$quoted_string |
10110
|
|
|
|
|
|
|
) |
10111
|
|
|
|
|
|
|
= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens, |
10112
|
|
|
|
|
|
|
$quote_character, $quote_pos, $quote_depth, $max_token_index ); |
10113
|
2763
|
|
|
|
|
5860
|
$quoted_string_1 .= $quoted_string; |
10114
|
2763
|
100
|
|
|
|
6719
|
if ( $in_quote == 1 ) { |
10115
|
183
|
|
|
|
|
404
|
$quoted_string_1 .= "\n"; |
10116
|
|
|
|
|
|
|
} |
10117
|
|
|
|
|
|
|
} |
10118
|
|
|
|
|
|
|
return ( |
10119
|
|
|
|
|
|
|
|
10120
|
2763
|
|
|
|
|
9482
|
$i, |
10121
|
|
|
|
|
|
|
$in_quote, |
10122
|
|
|
|
|
|
|
$quote_character, |
10123
|
|
|
|
|
|
|
$quote_pos, |
10124
|
|
|
|
|
|
|
$quote_depth, |
10125
|
|
|
|
|
|
|
$quoted_string_1, |
10126
|
|
|
|
|
|
|
$quoted_string_2, |
10127
|
|
|
|
|
|
|
|
10128
|
|
|
|
|
|
|
); |
10129
|
|
|
|
|
|
|
} ## end sub do_quote |
10130
|
|
|
|
|
|
|
|
10131
|
|
|
|
|
|
|
sub follow_quoted_string { |
10132
|
|
|
|
|
|
|
|
10133
|
|
|
|
|
|
|
# scan for a specific token, skipping escaped characters |
10134
|
|
|
|
|
|
|
# if the quote character is blank, use the first non-blank character |
10135
|
|
|
|
|
|
|
# input parameters: |
10136
|
|
|
|
|
|
|
# $rtokens = reference to the array of tokens |
10137
|
|
|
|
|
|
|
# $i = the token index of the first character to search |
10138
|
|
|
|
|
|
|
# $in_quote = number of quoted strings being followed |
10139
|
|
|
|
|
|
|
# $beginning_tok = the starting quote character |
10140
|
|
|
|
|
|
|
# $quote_pos = index to check next for alphanumeric delimiter |
10141
|
|
|
|
|
|
|
# output parameters: |
10142
|
|
|
|
|
|
|
# $i = the token index of the ending quote character |
10143
|
|
|
|
|
|
|
# $in_quote = decremented if found end, unchanged if not |
10144
|
|
|
|
|
|
|
# $beginning_tok = the starting quote character |
10145
|
|
|
|
|
|
|
# $quote_pos = index to check next for alphanumeric delimiter |
10146
|
|
|
|
|
|
|
# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. |
10147
|
|
|
|
|
|
|
# $quoted_string = the text of the quote (without quotation tokens) |
10148
|
|
|
|
|
|
|
my ( |
10149
|
|
|
|
|
|
|
|
10150
|
2809
|
|
|
2809
|
0
|
6366
|
$self, |
10151
|
|
|
|
|
|
|
$i_beg, |
10152
|
|
|
|
|
|
|
$in_quote, |
10153
|
|
|
|
|
|
|
$rtokens, |
10154
|
|
|
|
|
|
|
$beginning_tok, |
10155
|
|
|
|
|
|
|
$quote_pos, |
10156
|
|
|
|
|
|
|
$quote_depth, |
10157
|
|
|
|
|
|
|
$max_token_index, |
10158
|
|
|
|
|
|
|
|
10159
|
|
|
|
|
|
|
) = @_; |
10160
|
|
|
|
|
|
|
|
10161
|
2809
|
|
|
|
|
4314
|
my ( $tok, $end_tok ); |
10162
|
2809
|
|
|
|
|
4629
|
my $i = $i_beg - 1; |
10163
|
2809
|
|
|
|
|
4779
|
my $quoted_string = EMPTY_STRING; |
10164
|
|
|
|
|
|
|
|
10165
|
2809
|
|
|
|
|
3947
|
0 && do { |
10166
|
|
|
|
|
|
|
print STDOUT |
10167
|
|
|
|
|
|
|
"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; |
10168
|
|
|
|
|
|
|
}; |
10169
|
|
|
|
|
|
|
|
10170
|
|
|
|
|
|
|
# get the corresponding end token |
10171
|
2809
|
100
|
|
|
|
13063
|
if ( $beginning_tok !~ /^\s*$/ ) { |
10172
|
183
|
|
|
|
|
908
|
$end_tok = matching_end_token($beginning_tok); |
10173
|
|
|
|
|
|
|
} |
10174
|
|
|
|
|
|
|
|
10175
|
|
|
|
|
|
|
# a blank token means we must find and use the first non-blank one |
10176
|
|
|
|
|
|
|
else { |
10177
|
2626
|
100
|
|
|
|
6550
|
my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> |
10178
|
|
|
|
|
|
|
|
10179
|
2626
|
|
|
|
|
6261
|
while ( $i < $max_token_index ) { |
10180
|
2626
|
|
|
|
|
5089
|
$tok = $rtokens->[ ++$i ]; |
10181
|
|
|
|
|
|
|
|
10182
|
2626
|
50
|
|
|
|
7867
|
if ( $tok !~ /^\s*$/ ) { |
10183
|
|
|
|
|
|
|
|
10184
|
2626
|
50
|
66
|
|
|
7596
|
if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { |
10185
|
0
|
|
|
|
|
0
|
$i = $max_token_index; |
10186
|
|
|
|
|
|
|
} |
10187
|
|
|
|
|
|
|
else { |
10188
|
|
|
|
|
|
|
|
10189
|
2626
|
100
|
|
|
|
5522
|
if ( length($tok) > 1 ) { |
10190
|
1
|
50
|
|
|
|
5
|
if ( $quote_pos <= 0 ) { $quote_pos = 1 } |
|
1
|
|
|
|
|
3
|
|
10191
|
1
|
|
|
|
|
10
|
$beginning_tok = substr( $tok, $quote_pos - 1, 1 ); |
10192
|
|
|
|
|
|
|
} |
10193
|
|
|
|
|
|
|
else { |
10194
|
2625
|
|
|
|
|
4624
|
$beginning_tok = $tok; |
10195
|
2625
|
|
|
|
|
4394
|
$quote_pos = 0; |
10196
|
|
|
|
|
|
|
} |
10197
|
2626
|
|
|
|
|
6392
|
$end_tok = matching_end_token($beginning_tok); |
10198
|
2626
|
|
|
|
|
4481
|
$quote_depth = 1; |
10199
|
2626
|
|
|
|
|
4626
|
last; |
10200
|
|
|
|
|
|
|
} |
10201
|
|
|
|
|
|
|
} |
10202
|
|
|
|
|
|
|
else { |
10203
|
0
|
|
|
|
|
0
|
$allow_quote_comments = 1; |
10204
|
|
|
|
|
|
|
} |
10205
|
|
|
|
|
|
|
} |
10206
|
|
|
|
|
|
|
} |
10207
|
|
|
|
|
|
|
|
10208
|
|
|
|
|
|
|
# There are two different loops which search for the ending quote |
10209
|
|
|
|
|
|
|
# character. In the rare case of an alphanumeric quote delimiter, we |
10210
|
|
|
|
|
|
|
# have to look through alphanumeric tokens character-by-character, since |
10211
|
|
|
|
|
|
|
# the pre-tokenization process combines multiple alphanumeric |
10212
|
|
|
|
|
|
|
# characters, whereas for a non-alphanumeric delimiter, only tokens of |
10213
|
|
|
|
|
|
|
# length 1 can match. |
10214
|
|
|
|
|
|
|
|
10215
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
10216
|
|
|
|
|
|
|
# Case 1 (rare): loop for case of alphanumeric quote delimiter.. |
10217
|
|
|
|
|
|
|
# "quote_pos" is the position the current word to begin searching |
10218
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
10219
|
2809
|
100
|
|
|
|
7408
|
if ( $beginning_tok =~ /\w/ ) { |
10220
|
|
|
|
|
|
|
|
10221
|
|
|
|
|
|
|
# Note this because it is not recommended practice except |
10222
|
|
|
|
|
|
|
# for obfuscated perl contests |
10223
|
1
|
50
|
|
|
|
4
|
if ( $in_quote == 1 ) { |
10224
|
1
|
|
|
|
|
6
|
$self->write_logfile_entry( |
10225
|
|
|
|
|
|
|
"Note: alphanumeric quote delimiter ($beginning_tok) \n"); |
10226
|
|
|
|
|
|
|
} |
10227
|
|
|
|
|
|
|
|
10228
|
|
|
|
|
|
|
# Note: changed < to <= here to fix c109. Relying on extra end blanks. |
10229
|
1
|
|
|
|
|
7
|
while ( $i <= $max_token_index ) { |
10230
|
|
|
|
|
|
|
|
10231
|
4
|
100
|
66
|
|
|
14
|
if ( $quote_pos == 0 || ( $i < 0 ) ) { |
10232
|
3
|
|
|
|
|
9
|
$tok = $rtokens->[ ++$i ]; |
10233
|
|
|
|
|
|
|
|
10234
|
3
|
100
|
|
|
|
9
|
if ( $tok eq '\\' ) { |
10235
|
|
|
|
|
|
|
|
10236
|
|
|
|
|
|
|
# retain backslash unless it hides the end token |
10237
|
1
|
50
|
|
|
|
6
|
$quoted_string .= $tok |
10238
|
|
|
|
|
|
|
unless $rtokens->[ $i + 1 ] eq $end_tok; |
10239
|
1
|
|
|
|
|
4
|
$quote_pos++; |
10240
|
1
|
50
|
|
|
|
7
|
last if ( $i >= $max_token_index ); |
10241
|
1
|
|
|
|
|
6
|
$tok = $rtokens->[ ++$i ]; |
10242
|
|
|
|
|
|
|
} |
10243
|
|
|
|
|
|
|
} |
10244
|
4
|
|
|
|
|
8
|
my $old_pos = $quote_pos; |
10245
|
|
|
|
|
|
|
|
10246
|
4
|
50
|
33
|
|
|
20
|
unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) |
|
|
|
33
|
|
|
|
|
10247
|
|
|
|
|
|
|
{ |
10248
|
|
|
|
|
|
|
|
10249
|
|
|
|
|
|
|
} |
10250
|
4
|
|
|
|
|
8
|
$quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); |
10251
|
|
|
|
|
|
|
|
10252
|
4
|
100
|
|
|
|
11
|
if ( $quote_pos > 0 ) { |
10253
|
|
|
|
|
|
|
|
10254
|
1
|
|
|
|
|
3
|
$quoted_string .= |
10255
|
|
|
|
|
|
|
substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); |
10256
|
|
|
|
|
|
|
|
10257
|
|
|
|
|
|
|
# NOTE: any quote modifiers will be at the end of '$tok'. If we |
10258
|
|
|
|
|
|
|
# wanted to check them, this is the place to get them. But |
10259
|
|
|
|
|
|
|
# this quote form is rarely used in practice, so it isn't |
10260
|
|
|
|
|
|
|
# worthwhile. |
10261
|
|
|
|
|
|
|
|
10262
|
1
|
|
|
|
|
3
|
$quote_depth--; |
10263
|
|
|
|
|
|
|
|
10264
|
1
|
50
|
|
|
|
5
|
if ( $quote_depth == 0 ) { |
10265
|
1
|
|
|
|
|
2
|
$in_quote--; |
10266
|
1
|
|
|
|
|
4
|
last; |
10267
|
|
|
|
|
|
|
} |
10268
|
|
|
|
|
|
|
} |
10269
|
|
|
|
|
|
|
else { |
10270
|
3
|
50
|
|
|
|
8
|
if ( $old_pos <= length($tok) ) { |
10271
|
3
|
|
|
|
|
8
|
$quoted_string .= substr( $tok, $old_pos ); |
10272
|
|
|
|
|
|
|
} |
10273
|
|
|
|
|
|
|
} |
10274
|
|
|
|
|
|
|
} |
10275
|
|
|
|
|
|
|
} |
10276
|
|
|
|
|
|
|
|
10277
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
10278
|
|
|
|
|
|
|
# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. |
10279
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
10280
|
|
|
|
|
|
|
else { |
10281
|
|
|
|
|
|
|
|
10282
|
2808
|
|
|
|
|
6515
|
while ( $i < $max_token_index ) { |
10283
|
10785
|
|
|
|
|
17025
|
$tok = $rtokens->[ ++$i ]; |
10284
|
|
|
|
|
|
|
|
10285
|
10785
|
100
|
|
|
|
25425
|
if ( $tok eq $end_tok ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
10286
|
2615
|
|
|
|
|
4015
|
$quote_depth--; |
10287
|
|
|
|
|
|
|
|
10288
|
2615
|
100
|
|
|
|
5979
|
if ( $quote_depth == 0 ) { |
10289
|
2614
|
|
|
|
|
3734
|
$in_quote--; |
10290
|
2614
|
|
|
|
|
4189
|
last; |
10291
|
|
|
|
|
|
|
} |
10292
|
|
|
|
|
|
|
} |
10293
|
|
|
|
|
|
|
elsif ( $tok eq $beginning_tok ) { |
10294
|
1
|
|
|
|
|
3
|
$quote_depth++; |
10295
|
|
|
|
|
|
|
} |
10296
|
|
|
|
|
|
|
elsif ( $tok eq '\\' ) { |
10297
|
|
|
|
|
|
|
|
10298
|
|
|
|
|
|
|
# retain backslash unless it hides the beginning or end token |
10299
|
376
|
|
|
|
|
901
|
$tok = $rtokens->[ ++$i ]; |
10300
|
376
|
100
|
100
|
|
|
2224
|
$quoted_string .= '\\' |
10301
|
|
|
|
|
|
|
unless ( $tok eq $end_tok || $tok eq $beginning_tok ); |
10302
|
|
|
|
|
|
|
} |
10303
|
8171
|
|
|
|
|
15304
|
$quoted_string .= $tok; |
10304
|
|
|
|
|
|
|
} |
10305
|
|
|
|
|
|
|
} |
10306
|
2809
|
50
|
|
|
|
6316
|
if ( $i > $max_token_index ) { $i = $max_token_index } |
|
0
|
|
|
|
|
0
|
|
10307
|
|
|
|
|
|
|
return ( |
10308
|
|
|
|
|
|
|
|
10309
|
2809
|
|
|
|
|
10143
|
$i, |
10310
|
|
|
|
|
|
|
$in_quote, |
10311
|
|
|
|
|
|
|
$beginning_tok, |
10312
|
|
|
|
|
|
|
$quote_pos, |
10313
|
|
|
|
|
|
|
$quote_depth, |
10314
|
|
|
|
|
|
|
$quoted_string, |
10315
|
|
|
|
|
|
|
|
10316
|
|
|
|
|
|
|
); |
10317
|
|
|
|
|
|
|
} ## end sub follow_quoted_string |
10318
|
|
|
|
|
|
|
|
10319
|
|
|
|
|
|
|
sub indicate_error { |
10320
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg, $line_number, $input_line, $pos, $carrat ) = @_; |
10321
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
10322
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
10323
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( $line_number, $input_line, $pos, |
10324
|
|
|
|
|
|
|
$carrat ); |
10325
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
10326
|
0
|
|
|
|
|
0
|
return; |
10327
|
|
|
|
|
|
|
} ## end sub indicate_error |
10328
|
|
|
|
|
|
|
|
10329
|
|
|
|
|
|
|
sub write_error_indicator_pair { |
10330
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $line_number, $input_line, $pos, $carrat ) = @_; |
10331
|
0
|
|
|
|
|
0
|
my ( $offset, $numbered_line, $underline ) = |
10332
|
|
|
|
|
|
|
make_numbered_line( $line_number, $input_line, $pos ); |
10333
|
0
|
|
|
|
|
0
|
$underline = write_on_underline( $underline, $pos - $offset, $carrat ); |
10334
|
0
|
|
|
|
|
0
|
$self->warning( $numbered_line . "\n" ); |
10335
|
0
|
|
|
|
|
0
|
$underline =~ s/\s*$//; |
10336
|
0
|
|
|
|
|
0
|
$self->warning( $underline . "\n" ); |
10337
|
0
|
|
|
|
|
0
|
return; |
10338
|
|
|
|
|
|
|
} ## end sub write_error_indicator_pair |
10339
|
|
|
|
|
|
|
|
10340
|
|
|
|
|
|
|
sub make_numbered_line { |
10341
|
|
|
|
|
|
|
|
10342
|
|
|
|
|
|
|
# Given an input line, its line number, and a character position of |
10343
|
|
|
|
|
|
|
# interest, create a string not longer than 80 characters of the form |
10344
|
|
|
|
|
|
|
# $lineno: sub_string |
10345
|
|
|
|
|
|
|
# such that the sub_string of $str contains the position of interest |
10346
|
|
|
|
|
|
|
# |
10347
|
|
|
|
|
|
|
# Here is an example of what we want, in this case we add trailing |
10348
|
|
|
|
|
|
|
# '...' because the line is long. |
10349
|
|
|
|
|
|
|
# |
10350
|
|
|
|
|
|
|
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... |
10351
|
|
|
|
|
|
|
# |
10352
|
|
|
|
|
|
|
# Here is another example, this time in which we used leading '...' |
10353
|
|
|
|
|
|
|
# because of excessive length: |
10354
|
|
|
|
|
|
|
# |
10355
|
|
|
|
|
|
|
# 2: ... er of the World Wide Web Consortium's |
10356
|
|
|
|
|
|
|
# |
10357
|
|
|
|
|
|
|
# input parameters are: |
10358
|
|
|
|
|
|
|
# $lineno = line number |
10359
|
|
|
|
|
|
|
# $str = the text of the line |
10360
|
|
|
|
|
|
|
# $pos = position of interest (the error) : 0 = first character |
10361
|
|
|
|
|
|
|
# |
10362
|
|
|
|
|
|
|
# We return : |
10363
|
|
|
|
|
|
|
# - $offset = an offset which corrects the position in case we only |
10364
|
|
|
|
|
|
|
# display part of a line, such that $pos-$offset is the effective |
10365
|
|
|
|
|
|
|
# position from the start of the displayed line. |
10366
|
|
|
|
|
|
|
# - $numbered_line = the numbered line as above, |
10367
|
|
|
|
|
|
|
# - $underline = a blank 'underline' which is all spaces with the same |
10368
|
|
|
|
|
|
|
# number of characters as the numbered line. |
10369
|
|
|
|
|
|
|
|
10370
|
0
|
|
|
0
|
0
|
0
|
my ( $lineno, $str, $pos ) = @_; |
10371
|
0
|
0
|
|
|
|
0
|
my $offset = ( $pos < 60 ) ? 0 : $pos - 40; |
10372
|
0
|
|
|
|
|
0
|
my $excess = length($str) - $offset - 68; |
10373
|
0
|
0
|
|
|
|
0
|
my $numc = ( $excess > 0 ) ? 68 : undef; |
10374
|
|
|
|
|
|
|
|
10375
|
0
|
0
|
|
|
|
0
|
if ( defined($numc) ) { |
10376
|
0
|
0
|
|
|
|
0
|
if ( $offset == 0 ) { |
10377
|
0
|
|
|
|
|
0
|
$str = substr( $str, $offset, $numc - 4 ) . " ..."; |
10378
|
|
|
|
|
|
|
} |
10379
|
|
|
|
|
|
|
else { |
10380
|
0
|
|
|
|
|
0
|
$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; |
10381
|
|
|
|
|
|
|
} |
10382
|
|
|
|
|
|
|
} |
10383
|
|
|
|
|
|
|
else { |
10384
|
|
|
|
|
|
|
|
10385
|
0
|
0
|
|
|
|
0
|
if ( $offset == 0 ) { |
10386
|
|
|
|
|
|
|
} |
10387
|
|
|
|
|
|
|
else { |
10388
|
0
|
|
|
|
|
0
|
$str = "... " . substr( $str, $offset + 4 ); |
10389
|
|
|
|
|
|
|
} |
10390
|
|
|
|
|
|
|
} |
10391
|
|
|
|
|
|
|
|
10392
|
0
|
|
|
|
|
0
|
my $numbered_line = sprintf( "%d: ", $lineno ); |
10393
|
0
|
|
|
|
|
0
|
$offset -= length($numbered_line); |
10394
|
0
|
|
|
|
|
0
|
$numbered_line .= $str; |
10395
|
0
|
|
|
|
|
0
|
my $underline = SPACE x length($numbered_line); |
10396
|
0
|
|
|
|
|
0
|
return ( $offset, $numbered_line, $underline ); |
10397
|
|
|
|
|
|
|
} ## end sub make_numbered_line |
10398
|
|
|
|
|
|
|
|
10399
|
|
|
|
|
|
|
sub write_on_underline { |
10400
|
|
|
|
|
|
|
|
10401
|
|
|
|
|
|
|
# The "underline" is a string that shows where an error is; it starts |
10402
|
|
|
|
|
|
|
# out as a string of blanks with the same length as the numbered line of |
10403
|
|
|
|
|
|
|
# code above it, and we have to add marking to show where an error is. |
10404
|
|
|
|
|
|
|
# In the example below, we want to write the string '--^' just below |
10405
|
|
|
|
|
|
|
# the line of bad code: |
10406
|
|
|
|
|
|
|
# |
10407
|
|
|
|
|
|
|
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... |
10408
|
|
|
|
|
|
|
# ---^ |
10409
|
|
|
|
|
|
|
# We are given the current underline string, plus a position and a |
10410
|
|
|
|
|
|
|
# string to write on it. |
10411
|
|
|
|
|
|
|
# |
10412
|
|
|
|
|
|
|
# In the above example, there will be 2 calls to do this: |
10413
|
|
|
|
|
|
|
# First call: $pos=19, pos_chr=^ |
10414
|
|
|
|
|
|
|
# Second call: $pos=16, pos_chr=--- |
10415
|
|
|
|
|
|
|
# |
10416
|
|
|
|
|
|
|
# This is a trivial thing to do with substr, but there is some |
10417
|
|
|
|
|
|
|
# checking to do. |
10418
|
|
|
|
|
|
|
|
10419
|
0
|
|
|
0
|
0
|
0
|
my ( $underline, $pos, $pos_chr ) = @_; |
10420
|
|
|
|
|
|
|
|
10421
|
|
|
|
|
|
|
# check for error..shouldn't happen |
10422
|
0
|
0
|
0
|
|
|
0
|
unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { |
10423
|
0
|
|
|
|
|
0
|
return $underline; |
10424
|
|
|
|
|
|
|
} |
10425
|
0
|
|
|
|
|
0
|
my $excess = length($pos_chr) + $pos - length($underline); |
10426
|
0
|
0
|
|
|
|
0
|
if ( $excess > 0 ) { |
10427
|
0
|
|
|
|
|
0
|
$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); |
10428
|
|
|
|
|
|
|
} |
10429
|
0
|
|
|
|
|
0
|
substr( $underline, $pos, length($pos_chr) ) = $pos_chr; |
10430
|
0
|
|
|
|
|
0
|
return ($underline); |
10431
|
|
|
|
|
|
|
} ## end sub write_on_underline |
10432
|
|
|
|
|
|
|
|
10433
|
|
|
|
|
|
|
sub pre_tokenize { |
10434
|
|
|
|
|
|
|
|
10435
|
6170
|
|
|
6170
|
0
|
12148
|
my ( $str, $max_tokens_wanted ) = @_; |
10436
|
|
|
|
|
|
|
|
10437
|
|
|
|
|
|
|
# Input parameter: |
10438
|
|
|
|
|
|
|
# $max_tokens_wanted > 0 to stop on reaching this many tokens. |
10439
|
|
|
|
|
|
|
# = 0 means get all tokens |
10440
|
|
|
|
|
|
|
|
10441
|
|
|
|
|
|
|
# Break a string, $str, into a sequence of preliminary tokens. We |
10442
|
|
|
|
|
|
|
# are interested in these types of tokens: |
10443
|
|
|
|
|
|
|
# words (type='w'), example: 'max_tokens_wanted' |
10444
|
|
|
|
|
|
|
# digits (type = 'd'), example: '0755' |
10445
|
|
|
|
|
|
|
# whitespace (type = 'b'), example: ' ' |
10446
|
|
|
|
|
|
|
# any other single character (i.e. punct; type = the character itself). |
10447
|
|
|
|
|
|
|
# We cannot do better than this yet because we might be in a quoted |
10448
|
|
|
|
|
|
|
# string or pattern. Caller sets $max_tokens_wanted to 0 to get all |
10449
|
|
|
|
|
|
|
# tokens. |
10450
|
|
|
|
|
|
|
|
10451
|
|
|
|
|
|
|
# An advantage of doing this pre-tokenization step is that it keeps almost |
10452
|
|
|
|
|
|
|
# all of the regex work highly localized. A disadvantage is that in some |
10453
|
|
|
|
|
|
|
# very rare instances we will have to go back and split a pre-token. |
10454
|
|
|
|
|
|
|
|
10455
|
|
|
|
|
|
|
# Return parameters: |
10456
|
6170
|
|
|
|
|
10414
|
my @tokens = (); # array of the tokens themselves |
10457
|
6170
|
|
|
|
|
12611
|
my @token_map = (0); # string position of start of each token |
10458
|
6170
|
|
|
|
|
9807
|
my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct |
10459
|
|
|
|
|
|
|
|
10460
|
6170
|
|
|
|
|
9638
|
do { |
10461
|
|
|
|
|
|
|
|
10462
|
|
|
|
|
|
|
# whitespace |
10463
|
82014
|
100
|
|
|
|
267997
|
if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } |
|
18684
|
100
|
|
|
|
32629
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
10464
|
|
|
|
|
|
|
|
10465
|
|
|
|
|
|
|
# numbers |
10466
|
|
|
|
|
|
|
# note that this must come before words! |
10467
|
2593
|
|
|
|
|
5527
|
elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } |
10468
|
|
|
|
|
|
|
|
10469
|
|
|
|
|
|
|
# words |
10470
|
16628
|
|
|
|
|
30404
|
elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } |
10471
|
|
|
|
|
|
|
|
10472
|
|
|
|
|
|
|
# single-character punctuation |
10473
|
38075
|
|
|
|
|
80674
|
elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } |
10474
|
|
|
|
|
|
|
|
10475
|
|
|
|
|
|
|
# that's all.. |
10476
|
|
|
|
|
|
|
else { |
10477
|
6034
|
|
|
|
|
37446
|
return ( \@tokens, \@token_map, \@type ); |
10478
|
|
|
|
|
|
|
} |
10479
|
|
|
|
|
|
|
|
10480
|
75980
|
|
|
|
|
135096
|
push @tokens, $1; |
10481
|
75980
|
|
|
|
|
157063
|
push @token_map, pos($str); |
10482
|
|
|
|
|
|
|
|
10483
|
|
|
|
|
|
|
} while ( --$max_tokens_wanted != 0 ); |
10484
|
|
|
|
|
|
|
|
10485
|
136
|
|
|
|
|
814
|
return ( \@tokens, \@token_map, \@type ); |
10486
|
|
|
|
|
|
|
} ## end sub pre_tokenize |
10487
|
|
|
|
|
|
|
|
10488
|
|
|
|
|
|
|
sub show_tokens { |
10489
|
|
|
|
|
|
|
|
10490
|
|
|
|
|
|
|
# this is an old debug routine |
10491
|
|
|
|
|
|
|
# not called, but saved for reference |
10492
|
0
|
|
|
0
|
0
|
0
|
my ( $rtokens, $rtoken_map ) = @_; |
10493
|
0
|
|
|
|
|
0
|
my $num = scalar( @{$rtokens} ); |
|
0
|
|
|
|
|
0
|
|
10494
|
|
|
|
|
|
|
|
10495
|
0
|
|
|
|
|
0
|
foreach my $i ( 0 .. $num - 1 ) { |
10496
|
0
|
|
|
|
|
0
|
my $len = length( $rtokens->[$i] ); |
10497
|
0
|
|
|
|
|
0
|
print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n"; |
10498
|
|
|
|
|
|
|
} |
10499
|
0
|
|
|
|
|
0
|
return; |
10500
|
|
|
|
|
|
|
} ## end sub show_tokens |
10501
|
|
|
|
|
|
|
|
10502
|
|
|
|
|
|
|
{ ## closure for sub matching end token |
10503
|
|
|
|
|
|
|
my %matching_end_token; |
10504
|
|
|
|
|
|
|
|
10505
|
|
|
|
|
|
|
BEGIN { |
10506
|
38
|
|
|
38
|
|
60305
|
%matching_end_token = ( |
10507
|
|
|
|
|
|
|
'{' => '}', |
10508
|
|
|
|
|
|
|
'(' => ')', |
10509
|
|
|
|
|
|
|
'[' => ']', |
10510
|
|
|
|
|
|
|
'<' => '>', |
10511
|
|
|
|
|
|
|
); |
10512
|
|
|
|
|
|
|
} ## end BEGIN |
10513
|
|
|
|
|
|
|
|
10514
|
|
|
|
|
|
|
sub matching_end_token { |
10515
|
|
|
|
|
|
|
|
10516
|
|
|
|
|
|
|
# return closing character for a pattern |
10517
|
2993
|
|
|
2993
|
0
|
4834
|
my $beginning_token = shift; |
10518
|
2993
|
100
|
|
|
|
7936
|
if ( $matching_end_token{$beginning_token} ) { |
10519
|
373
|
|
|
|
|
961
|
return $matching_end_token{$beginning_token}; |
10520
|
|
|
|
|
|
|
} |
10521
|
2620
|
|
|
|
|
5860
|
return ($beginning_token); |
10522
|
|
|
|
|
|
|
} ## end sub matching_end_token |
10523
|
|
|
|
|
|
|
} |
10524
|
|
|
|
|
|
|
|
10525
|
|
|
|
|
|
|
sub dump_token_types { |
10526
|
0
|
|
|
0
|
0
|
|
my ( $class, $fh ) = @_; |
10527
|
|
|
|
|
|
|
|
10528
|
|
|
|
|
|
|
# This should be the latest list of token types in use |
10529
|
|
|
|
|
|
|
# adding NEW_TOKENS: add a comment here |
10530
|
0
|
|
|
|
|
|
$fh->print(<<'END_OF_LIST'); |
10531
|
|
|
|
|
|
|
|
10532
|
|
|
|
|
|
|
Here is a list of the token types currently used for lines of type 'CODE'. |
10533
|
|
|
|
|
|
|
For the following tokens, the "type" of a token is just the token itself. |
10534
|
|
|
|
|
|
|
|
10535
|
|
|
|
|
|
|
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> |
10536
|
|
|
|
|
|
|
( ) <= >= == =~ !~ != ++ -- /= x= |
10537
|
|
|
|
|
|
|
... **= <<= >>= &&= ||= //= <=> |
10538
|
|
|
|
|
|
|
, + - / * | % ! x ~ = \ ? : . < > ^ & |
10539
|
|
|
|
|
|
|
|
10540
|
|
|
|
|
|
|
The following additional token types are defined: |
10541
|
|
|
|
|
|
|
|
10542
|
|
|
|
|
|
|
type meaning |
10543
|
|
|
|
|
|
|
b blank (white space) |
10544
|
|
|
|
|
|
|
{ indent: opening structural curly brace or square bracket or paren |
10545
|
|
|
|
|
|
|
(code block, anonymous hash reference, or anonymous array reference) |
10546
|
|
|
|
|
|
|
} outdent: right structural curly brace or square bracket or paren |
10547
|
|
|
|
|
|
|
[ left non-structural square bracket (enclosing an array index) |
10548
|
|
|
|
|
|
|
] right non-structural square bracket |
10549
|
|
|
|
|
|
|
( left non-structural paren (all but a list right of an =) |
10550
|
|
|
|
|
|
|
) right non-structural paren |
10551
|
|
|
|
|
|
|
L left non-structural curly brace (enclosing a key) |
10552
|
|
|
|
|
|
|
R right non-structural curly brace |
10553
|
|
|
|
|
|
|
; terminal semicolon |
10554
|
|
|
|
|
|
|
f indicates a semicolon in a "for" statement |
10555
|
|
|
|
|
|
|
h here_doc operator << |
10556
|
|
|
|
|
|
|
# a comment |
10557
|
|
|
|
|
|
|
Q indicates a quote or pattern |
10558
|
|
|
|
|
|
|
q indicates a qw quote block |
10559
|
|
|
|
|
|
|
k a perl keyword |
10560
|
|
|
|
|
|
|
C user-defined constant or constant function (with void prototype = ()) |
10561
|
|
|
|
|
|
|
U user-defined function taking parameters |
10562
|
|
|
|
|
|
|
G user-defined function taking block parameter (like grep/map/eval) |
10563
|
|
|
|
|
|
|
M (unused, but reserved for subroutine definition name) |
10564
|
|
|
|
|
|
|
P (unused, but -html uses it to label pod text) |
10565
|
|
|
|
|
|
|
t type indicater such as %,$,@,*,&,sub |
10566
|
|
|
|
|
|
|
w bare word (perhaps a subroutine call) |
10567
|
|
|
|
|
|
|
i identifier of some type (with leading %, $, @, *, &, sub, -> ) |
10568
|
|
|
|
|
|
|
n a number |
10569
|
|
|
|
|
|
|
v a v-string |
10570
|
|
|
|
|
|
|
F a file test operator (like -e) |
10571
|
|
|
|
|
|
|
Y File handle |
10572
|
|
|
|
|
|
|
Z identifier in indirect object slot: may be file handle, object |
10573
|
|
|
|
|
|
|
J LABEL: code block label |
10574
|
|
|
|
|
|
|
j LABEL after next, last, redo, goto |
10575
|
|
|
|
|
|
|
p unary + |
10576
|
|
|
|
|
|
|
m unary - |
10577
|
|
|
|
|
|
|
pp pre-increment operator ++ |
10578
|
|
|
|
|
|
|
mm pre-decrement operator -- |
10579
|
|
|
|
|
|
|
A : used as attribute separator |
10580
|
|
|
|
|
|
|
|
10581
|
|
|
|
|
|
|
Here are the '_line_type' codes used internally: |
10582
|
|
|
|
|
|
|
SYSTEM - system-specific code before hash-bang line |
10583
|
|
|
|
|
|
|
CODE - line of perl code (including comments) |
10584
|
|
|
|
|
|
|
POD_START - line starting pod, such as '=head' |
10585
|
|
|
|
|
|
|
POD - pod documentation text |
10586
|
|
|
|
|
|
|
POD_END - last line of pod section, '=cut' |
10587
|
|
|
|
|
|
|
HERE - text of here-document |
10588
|
|
|
|
|
|
|
HERE_END - last line of here-doc (target word) |
10589
|
|
|
|
|
|
|
FORMAT - format section |
10590
|
|
|
|
|
|
|
FORMAT_END - last line of format section, '.' |
10591
|
|
|
|
|
|
|
SKIP - code skipping section |
10592
|
|
|
|
|
|
|
SKIP_END - last line of code skipping section, '#>>V' |
10593
|
|
|
|
|
|
|
DATA_START - __DATA__ line |
10594
|
|
|
|
|
|
|
DATA - unidentified text following __DATA__ |
10595
|
|
|
|
|
|
|
END_START - __END__ line |
10596
|
|
|
|
|
|
|
END - unidentified text following __END__ |
10597
|
|
|
|
|
|
|
ERROR - we are in big trouble, probably not a perl script |
10598
|
|
|
|
|
|
|
END_OF_LIST |
10599
|
|
|
|
|
|
|
|
10600
|
0
|
|
|
|
|
|
return; |
10601
|
|
|
|
|
|
|
} ## end sub dump_token_types |
10602
|
|
|
|
|
|
|
|
10603
|
|
|
|
|
|
|
BEGIN { |
10604
|
|
|
|
|
|
|
|
10605
|
|
|
|
|
|
|
# These names are used in error messages |
10606
|
38
|
|
|
38
|
|
371
|
@opening_brace_names = qw# '{' '[' '(' '?' #; |
10607
|
38
|
|
|
|
|
163
|
@closing_brace_names = qw# '}' ']' ')' ':' #; |
10608
|
|
|
|
|
|
|
|
10609
|
38
|
|
|
|
|
89
|
my @q; |
10610
|
|
|
|
|
|
|
|
10611
|
38
|
|
|
|
|
227
|
my @digraphs = qw( |
10612
|
|
|
|
|
|
|
.. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <> |
10613
|
|
|
|
|
|
|
<= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. |
10614
|
|
|
|
|
|
|
); |
10615
|
38
|
|
|
|
|
807
|
@is_digraph{@digraphs} = (1) x scalar(@digraphs); |
10616
|
|
|
|
|
|
|
|
10617
|
38
|
|
|
|
|
212
|
@q = qw( |
10618
|
|
|
|
|
|
|
. : < > * & | / - = + - % ^ ! x ~ |
10619
|
|
|
|
|
|
|
); |
10620
|
38
|
|
|
|
|
462
|
@can_start_digraph{@q} = (1) x scalar(@q); |
10621
|
|
|
|
|
|
|
|
10622
|
38
|
|
|
|
|
218
|
my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~); |
10623
|
38
|
|
|
|
|
317
|
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); |
10624
|
|
|
|
|
|
|
|
10625
|
38
|
|
|
|
|
158
|
my @tetragraphs = qw( <<>> ); |
10626
|
38
|
|
|
|
|
170
|
@is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs); |
10627
|
|
|
|
|
|
|
|
10628
|
|
|
|
|
|
|
# make a hash of all valid token types for self-checking the tokenizer |
10629
|
|
|
|
|
|
|
# (adding NEW_TOKENS : select a new character and add to this list) |
10630
|
38
|
|
|
|
|
641
|
my @valid_token_types = qw# |
10631
|
|
|
|
|
|
|
A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v |
10632
|
|
|
|
|
|
|
{ } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & |
10633
|
|
|
|
|
|
|
#; |
10634
|
38
|
|
|
|
|
355
|
push( @valid_token_types, @digraphs ); |
10635
|
38
|
|
|
|
|
183
|
push( @valid_token_types, @trigraphs ); |
10636
|
38
|
|
|
|
|
98
|
push( @valid_token_types, @tetragraphs ); |
10637
|
38
|
|
|
|
|
92
|
push( @valid_token_types, ( '#', ',', 'CORE::' ) ); |
10638
|
38
|
|
|
|
|
1474
|
@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); |
10639
|
|
|
|
|
|
|
|
10640
|
|
|
|
|
|
|
# a list of file test letters, as in -e (Table 3-4 of 'camel 3') |
10641
|
38
|
|
|
|
|
264
|
my @file_test_operators = |
10642
|
|
|
|
|
|
|
qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); |
10643
|
38
|
|
|
|
|
808
|
@is_file_test_operator{@file_test_operators} = |
10644
|
|
|
|
|
|
|
(1) x scalar(@file_test_operators); |
10645
|
|
|
|
|
|
|
|
10646
|
|
|
|
|
|
|
# these functions have prototypes of the form (&), so when they are |
10647
|
|
|
|
|
|
|
# followed by a block, that block MAY BE followed by an operator. |
10648
|
|
|
|
|
|
|
# Smartmatch operator ~~ may be followed by anonymous hash or array ref |
10649
|
38
|
|
|
|
|
211
|
@q = qw( do eval ); |
10650
|
38
|
|
|
|
|
148
|
@is_block_operator{@q} = (1) x scalar(@q); |
10651
|
|
|
|
|
|
|
|
10652
|
|
|
|
|
|
|
# these functions allow an identifier in the indirect object slot |
10653
|
38
|
|
|
|
|
117
|
@q = qw( print printf sort exec system say); |
10654
|
38
|
|
|
|
|
237
|
@is_indirect_object_taker{@q} = (1) x scalar(@q); |
10655
|
|
|
|
|
|
|
|
10656
|
|
|
|
|
|
|
# Note: 'field' will be added by sub check_options if --use-feature=class |
10657
|
38
|
|
|
|
|
115
|
@q = qw(my our state); |
10658
|
38
|
|
|
|
|
162
|
@is_my_our_state{@q} = (1) x scalar(@q); |
10659
|
|
|
|
|
|
|
|
10660
|
|
|
|
|
|
|
# These tokens may precede a code block |
10661
|
|
|
|
|
|
|
# patched for SWITCH/CASE/CATCH. Actually these could be removed |
10662
|
|
|
|
|
|
|
# now and we could let the extended-syntax coding handle them. |
10663
|
|
|
|
|
|
|
# Added 'default' for Switch::Plain. |
10664
|
|
|
|
|
|
|
# Note: 'ADJUST' will be added by sub check_options if --use-feature=class |
10665
|
38
|
|
|
|
|
211
|
@q = |
10666
|
|
|
|
|
|
|
qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else |
10667
|
|
|
|
|
|
|
unless do while until eval for foreach map grep sort |
10668
|
|
|
|
|
|
|
switch case given when default catch try finally); |
10669
|
38
|
|
|
|
|
863
|
@is_code_block_token{@q} = (1) x scalar(@q); |
10670
|
|
|
|
|
|
|
|
10671
|
|
|
|
|
|
|
# Note: this hash was formerly named '%is_not_zero_continuation_block_type' |
10672
|
|
|
|
|
|
|
# to contrast it with the block types in '%is_zero_continuation_block_type' |
10673
|
38
|
|
|
|
|
210
|
@q = qw( sort map grep eval do ); |
10674
|
38
|
|
|
|
|
155
|
@is_sort_map_grep_eval_do{@q} = (1) x scalar(@q); |
10675
|
|
|
|
|
|
|
|
10676
|
38
|
|
|
|
|
119
|
@q = qw( sort map grep ); |
10677
|
38
|
|
|
|
|
124
|
@is_sort_map_grep{@q} = (1) x scalar(@q); |
10678
|
|
|
|
|
|
|
|
10679
|
38
|
|
|
|
|
97
|
%is_grep_alias = (); |
10680
|
|
|
|
|
|
|
|
10681
|
|
|
|
|
|
|
# I'll build the list of keywords incrementally |
10682
|
38
|
|
|
|
|
99
|
my @Keywords = (); |
10683
|
|
|
|
|
|
|
|
10684
|
|
|
|
|
|
|
# keywords and tokens after which a value or pattern is expected, |
10685
|
|
|
|
|
|
|
# but not an operator. In other words, these should consume terms |
10686
|
|
|
|
|
|
|
# to their right, or at least they are not expected to be followed |
10687
|
|
|
|
|
|
|
# immediately by operators. |
10688
|
38
|
|
|
|
|
1492
|
my @value_requestor = qw( |
10689
|
|
|
|
|
|
|
AUTOLOAD |
10690
|
|
|
|
|
|
|
BEGIN |
10691
|
|
|
|
|
|
|
CHECK |
10692
|
|
|
|
|
|
|
DESTROY |
10693
|
|
|
|
|
|
|
END |
10694
|
|
|
|
|
|
|
EQ |
10695
|
|
|
|
|
|
|
GE |
10696
|
|
|
|
|
|
|
GT |
10697
|
|
|
|
|
|
|
INIT |
10698
|
|
|
|
|
|
|
LE |
10699
|
|
|
|
|
|
|
LT |
10700
|
|
|
|
|
|
|
NE |
10701
|
|
|
|
|
|
|
UNITCHECK |
10702
|
|
|
|
|
|
|
abs |
10703
|
|
|
|
|
|
|
accept |
10704
|
|
|
|
|
|
|
alarm |
10705
|
|
|
|
|
|
|
and |
10706
|
|
|
|
|
|
|
atan2 |
10707
|
|
|
|
|
|
|
bind |
10708
|
|
|
|
|
|
|
binmode |
10709
|
|
|
|
|
|
|
bless |
10710
|
|
|
|
|
|
|
break |
10711
|
|
|
|
|
|
|
caller |
10712
|
|
|
|
|
|
|
chdir |
10713
|
|
|
|
|
|
|
chmod |
10714
|
|
|
|
|
|
|
chomp |
10715
|
|
|
|
|
|
|
chop |
10716
|
|
|
|
|
|
|
chown |
10717
|
|
|
|
|
|
|
chr |
10718
|
|
|
|
|
|
|
chroot |
10719
|
|
|
|
|
|
|
close |
10720
|
|
|
|
|
|
|
closedir |
10721
|
|
|
|
|
|
|
cmp |
10722
|
|
|
|
|
|
|
connect |
10723
|
|
|
|
|
|
|
continue |
10724
|
|
|
|
|
|
|
cos |
10725
|
|
|
|
|
|
|
crypt |
10726
|
|
|
|
|
|
|
dbmclose |
10727
|
|
|
|
|
|
|
dbmopen |
10728
|
|
|
|
|
|
|
defined |
10729
|
|
|
|
|
|
|
delete |
10730
|
|
|
|
|
|
|
die |
10731
|
|
|
|
|
|
|
dump |
10732
|
|
|
|
|
|
|
each |
10733
|
|
|
|
|
|
|
else |
10734
|
|
|
|
|
|
|
elsif |
10735
|
|
|
|
|
|
|
eof |
10736
|
|
|
|
|
|
|
eq |
10737
|
|
|
|
|
|
|
evalbytes |
10738
|
|
|
|
|
|
|
exec |
10739
|
|
|
|
|
|
|
exists |
10740
|
|
|
|
|
|
|
exit |
10741
|
|
|
|
|
|
|
exp |
10742
|
|
|
|
|
|
|
fc |
10743
|
|
|
|
|
|
|
fcntl |
10744
|
|
|
|
|
|
|
fileno |
10745
|
|
|
|
|
|
|
flock |
10746
|
|
|
|
|
|
|
for |
10747
|
|
|
|
|
|
|
foreach |
10748
|
|
|
|
|
|
|
formline |
10749
|
|
|
|
|
|
|
ge |
10750
|
|
|
|
|
|
|
getc |
10751
|
|
|
|
|
|
|
getgrgid |
10752
|
|
|
|
|
|
|
getgrnam |
10753
|
|
|
|
|
|
|
gethostbyaddr |
10754
|
|
|
|
|
|
|
gethostbyname |
10755
|
|
|
|
|
|
|
getnetbyaddr |
10756
|
|
|
|
|
|
|
getnetbyname |
10757
|
|
|
|
|
|
|
getpeername |
10758
|
|
|
|
|
|
|
getpgrp |
10759
|
|
|
|
|
|
|
getpriority |
10760
|
|
|
|
|
|
|
getprotobyname |
10761
|
|
|
|
|
|
|
getprotobynumber |
10762
|
|
|
|
|
|
|
getpwnam |
10763
|
|
|
|
|
|
|
getpwuid |
10764
|
|
|
|
|
|
|
getservbyname |
10765
|
|
|
|
|
|
|
getservbyport |
10766
|
|
|
|
|
|
|
getsockname |
10767
|
|
|
|
|
|
|
getsockopt |
10768
|
|
|
|
|
|
|
glob |
10769
|
|
|
|
|
|
|
gmtime |
10770
|
|
|
|
|
|
|
goto |
10771
|
|
|
|
|
|
|
grep |
10772
|
|
|
|
|
|
|
gt |
10773
|
|
|
|
|
|
|
hex |
10774
|
|
|
|
|
|
|
if |
10775
|
|
|
|
|
|
|
index |
10776
|
|
|
|
|
|
|
int |
10777
|
|
|
|
|
|
|
ioctl |
10778
|
|
|
|
|
|
|
join |
10779
|
|
|
|
|
|
|
keys |
10780
|
|
|
|
|
|
|
kill |
10781
|
|
|
|
|
|
|
last |
10782
|
|
|
|
|
|
|
lc |
10783
|
|
|
|
|
|
|
lcfirst |
10784
|
|
|
|
|
|
|
le |
10785
|
|
|
|
|
|
|
length |
10786
|
|
|
|
|
|
|
link |
10787
|
|
|
|
|
|
|
listen |
10788
|
|
|
|
|
|
|
local |
10789
|
|
|
|
|
|
|
localtime |
10790
|
|
|
|
|
|
|
lock |
10791
|
|
|
|
|
|
|
log |
10792
|
|
|
|
|
|
|
lstat |
10793
|
|
|
|
|
|
|
lt |
10794
|
|
|
|
|
|
|
map |
10795
|
|
|
|
|
|
|
mkdir |
10796
|
|
|
|
|
|
|
msgctl |
10797
|
|
|
|
|
|
|
msgget |
10798
|
|
|
|
|
|
|
msgrcv |
10799
|
|
|
|
|
|
|
msgsnd |
10800
|
|
|
|
|
|
|
my |
10801
|
|
|
|
|
|
|
ne |
10802
|
|
|
|
|
|
|
next |
10803
|
|
|
|
|
|
|
no |
10804
|
|
|
|
|
|
|
not |
10805
|
|
|
|
|
|
|
oct |
10806
|
|
|
|
|
|
|
open |
10807
|
|
|
|
|
|
|
opendir |
10808
|
|
|
|
|
|
|
or |
10809
|
|
|
|
|
|
|
ord |
10810
|
|
|
|
|
|
|
our |
10811
|
|
|
|
|
|
|
pack |
10812
|
|
|
|
|
|
|
pipe |
10813
|
|
|
|
|
|
|
pop |
10814
|
|
|
|
|
|
|
pos |
10815
|
|
|
|
|
|
|
print |
10816
|
|
|
|
|
|
|
printf |
10817
|
|
|
|
|
|
|
prototype |
10818
|
|
|
|
|
|
|
push |
10819
|
|
|
|
|
|
|
quotemeta |
10820
|
|
|
|
|
|
|
rand |
10821
|
|
|
|
|
|
|
read |
10822
|
|
|
|
|
|
|
readdir |
10823
|
|
|
|
|
|
|
readlink |
10824
|
|
|
|
|
|
|
readline |
10825
|
|
|
|
|
|
|
readpipe |
10826
|
|
|
|
|
|
|
recv |
10827
|
|
|
|
|
|
|
redo |
10828
|
|
|
|
|
|
|
ref |
10829
|
|
|
|
|
|
|
rename |
10830
|
|
|
|
|
|
|
require |
10831
|
|
|
|
|
|
|
reset |
10832
|
|
|
|
|
|
|
return |
10833
|
|
|
|
|
|
|
reverse |
10834
|
|
|
|
|
|
|
rewinddir |
10835
|
|
|
|
|
|
|
rindex |
10836
|
|
|
|
|
|
|
rmdir |
10837
|
|
|
|
|
|
|
scalar |
10838
|
|
|
|
|
|
|
seek |
10839
|
|
|
|
|
|
|
seekdir |
10840
|
|
|
|
|
|
|
select |
10841
|
|
|
|
|
|
|
semctl |
10842
|
|
|
|
|
|
|
semget |
10843
|
|
|
|
|
|
|
semop |
10844
|
|
|
|
|
|
|
send |
10845
|
|
|
|
|
|
|
sethostent |
10846
|
|
|
|
|
|
|
setnetent |
10847
|
|
|
|
|
|
|
setpgrp |
10848
|
|
|
|
|
|
|
setpriority |
10849
|
|
|
|
|
|
|
setprotoent |
10850
|
|
|
|
|
|
|
setservent |
10851
|
|
|
|
|
|
|
setsockopt |
10852
|
|
|
|
|
|
|
shift |
10853
|
|
|
|
|
|
|
shmctl |
10854
|
|
|
|
|
|
|
shmget |
10855
|
|
|
|
|
|
|
shmread |
10856
|
|
|
|
|
|
|
shmwrite |
10857
|
|
|
|
|
|
|
shutdown |
10858
|
|
|
|
|
|
|
sin |
10859
|
|
|
|
|
|
|
sleep |
10860
|
|
|
|
|
|
|
socket |
10861
|
|
|
|
|
|
|
socketpair |
10862
|
|
|
|
|
|
|
sort |
10863
|
|
|
|
|
|
|
splice |
10864
|
|
|
|
|
|
|
split |
10865
|
|
|
|
|
|
|
sprintf |
10866
|
|
|
|
|
|
|
sqrt |
10867
|
|
|
|
|
|
|
srand |
10868
|
|
|
|
|
|
|
stat |
10869
|
|
|
|
|
|
|
state |
10870
|
|
|
|
|
|
|
study |
10871
|
|
|
|
|
|
|
substr |
10872
|
|
|
|
|
|
|
symlink |
10873
|
|
|
|
|
|
|
syscall |
10874
|
|
|
|
|
|
|
sysopen |
10875
|
|
|
|
|
|
|
sysread |
10876
|
|
|
|
|
|
|
sysseek |
10877
|
|
|
|
|
|
|
system |
10878
|
|
|
|
|
|
|
syswrite |
10879
|
|
|
|
|
|
|
tell |
10880
|
|
|
|
|
|
|
telldir |
10881
|
|
|
|
|
|
|
tie |
10882
|
|
|
|
|
|
|
tied |
10883
|
|
|
|
|
|
|
truncate |
10884
|
|
|
|
|
|
|
uc |
10885
|
|
|
|
|
|
|
ucfirst |
10886
|
|
|
|
|
|
|
umask |
10887
|
|
|
|
|
|
|
undef |
10888
|
|
|
|
|
|
|
unless |
10889
|
|
|
|
|
|
|
unlink |
10890
|
|
|
|
|
|
|
unpack |
10891
|
|
|
|
|
|
|
unshift |
10892
|
|
|
|
|
|
|
untie |
10893
|
|
|
|
|
|
|
until |
10894
|
|
|
|
|
|
|
use |
10895
|
|
|
|
|
|
|
utime |
10896
|
|
|
|
|
|
|
values |
10897
|
|
|
|
|
|
|
vec |
10898
|
|
|
|
|
|
|
waitpid |
10899
|
|
|
|
|
|
|
warn |
10900
|
|
|
|
|
|
|
while |
10901
|
|
|
|
|
|
|
write |
10902
|
|
|
|
|
|
|
xor |
10903
|
|
|
|
|
|
|
|
10904
|
|
|
|
|
|
|
switch |
10905
|
|
|
|
|
|
|
case |
10906
|
|
|
|
|
|
|
default |
10907
|
|
|
|
|
|
|
given |
10908
|
|
|
|
|
|
|
when |
10909
|
|
|
|
|
|
|
err |
10910
|
|
|
|
|
|
|
say |
10911
|
|
|
|
|
|
|
isa |
10912
|
|
|
|
|
|
|
|
10913
|
|
|
|
|
|
|
catch |
10914
|
|
|
|
|
|
|
|
10915
|
|
|
|
|
|
|
); |
10916
|
|
|
|
|
|
|
|
10917
|
|
|
|
|
|
|
# Note: 'ADJUST', 'field' are added by sub check_options |
10918
|
|
|
|
|
|
|
# if --use-feature=class |
10919
|
|
|
|
|
|
|
|
10920
|
|
|
|
|
|
|
# patched above for SWITCH/CASE given/when err say |
10921
|
|
|
|
|
|
|
# 'err' is a fairly safe addition. |
10922
|
|
|
|
|
|
|
# Added 'default' for Switch::Plain. Note that we could also have |
10923
|
|
|
|
|
|
|
# a separate set of keywords to include if we see 'use Switch::Plain' |
10924
|
38
|
|
|
|
|
1551
|
push( @Keywords, @value_requestor ); |
10925
|
|
|
|
|
|
|
|
10926
|
|
|
|
|
|
|
# These are treated the same but are not keywords: |
10927
|
38
|
|
|
|
|
174
|
my @extra_vr = qw( |
10928
|
|
|
|
|
|
|
constant |
10929
|
|
|
|
|
|
|
vars |
10930
|
|
|
|
|
|
|
); |
10931
|
38
|
|
|
|
|
290
|
push( @value_requestor, @extra_vr ); |
10932
|
|
|
|
|
|
|
|
10933
|
38
|
|
|
|
|
4778
|
@expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); |
10934
|
|
|
|
|
|
|
|
10935
|
|
|
|
|
|
|
# this list contains keywords which do not look for arguments, |
10936
|
|
|
|
|
|
|
# so that they might be followed by an operator, or at least |
10937
|
|
|
|
|
|
|
# not a term. |
10938
|
38
|
|
|
|
|
344
|
my @operator_requestor = qw( |
10939
|
|
|
|
|
|
|
endgrent |
10940
|
|
|
|
|
|
|
endhostent |
10941
|
|
|
|
|
|
|
endnetent |
10942
|
|
|
|
|
|
|
endprotoent |
10943
|
|
|
|
|
|
|
endpwent |
10944
|
|
|
|
|
|
|
endservent |
10945
|
|
|
|
|
|
|
fork |
10946
|
|
|
|
|
|
|
getgrent |
10947
|
|
|
|
|
|
|
gethostent |
10948
|
|
|
|
|
|
|
getlogin |
10949
|
|
|
|
|
|
|
getnetent |
10950
|
|
|
|
|
|
|
getppid |
10951
|
|
|
|
|
|
|
getprotoent |
10952
|
|
|
|
|
|
|
getpwent |
10953
|
|
|
|
|
|
|
getservent |
10954
|
|
|
|
|
|
|
setgrent |
10955
|
|
|
|
|
|
|
setpwent |
10956
|
|
|
|
|
|
|
time |
10957
|
|
|
|
|
|
|
times |
10958
|
|
|
|
|
|
|
wait |
10959
|
|
|
|
|
|
|
wantarray |
10960
|
|
|
|
|
|
|
); |
10961
|
|
|
|
|
|
|
|
10962
|
38
|
|
|
|
|
175
|
push( @Keywords, @operator_requestor ); |
10963
|
|
|
|
|
|
|
|
10964
|
|
|
|
|
|
|
# These are treated the same but are not considered keywords: |
10965
|
38
|
|
|
|
|
129
|
my @extra_or = qw( |
10966
|
|
|
|
|
|
|
STDERR |
10967
|
|
|
|
|
|
|
STDIN |
10968
|
|
|
|
|
|
|
STDOUT |
10969
|
|
|
|
|
|
|
); |
10970
|
|
|
|
|
|
|
|
10971
|
38
|
|
|
|
|
169
|
push( @operator_requestor, @extra_or ); |
10972
|
|
|
|
|
|
|
|
10973
|
38
|
|
|
|
|
887
|
@expecting_operator_token{@operator_requestor} = |
10974
|
|
|
|
|
|
|
(1) x scalar(@operator_requestor); |
10975
|
|
|
|
|
|
|
|
10976
|
|
|
|
|
|
|
# these token TYPES expect trailing operator but not a term |
10977
|
|
|
|
|
|
|
# note: ++ and -- are post-increment and decrement, 'C' = constant |
10978
|
38
|
|
|
|
|
248
|
my @operator_requestor_types = qw( ++ -- C <> q ); |
10979
|
|
|
|
|
|
|
|
10980
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
10981
|
38
|
|
|
|
|
152
|
@expecting_operator_types{@operator_requestor_types} = |
10982
|
|
|
|
|
|
|
(1) x scalar(@operator_requestor_types); |
10983
|
|
|
|
|
|
|
|
10984
|
|
|
|
|
|
|
# these token TYPES consume values (terms) |
10985
|
|
|
|
|
|
|
# note: pp and mm are pre-increment and decrement |
10986
|
|
|
|
|
|
|
# f=semicolon in for, F=file test operator |
10987
|
38
|
|
|
|
|
920
|
my @value_requestor_type = qw# |
10988
|
|
|
|
|
|
|
L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x |
10989
|
|
|
|
|
|
|
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= |
10990
|
|
|
|
|
|
|
<= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ <<~ |
10991
|
|
|
|
|
|
|
f F pp mm Y p m U J G j >> << ^ t |
10992
|
|
|
|
|
|
|
~. ^. |. &. ^.= |.= &.= |
10993
|
|
|
|
|
|
|
#; |
10994
|
38
|
|
|
|
|
216
|
push( @value_requestor_type, ',' ) |
10995
|
|
|
|
|
|
|
; # (perl doesn't like a ',' in a qw block) |
10996
|
|
|
|
|
|
|
|
10997
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
10998
|
38
|
|
|
|
|
1008
|
@expecting_term_types{@value_requestor_type} = |
10999
|
|
|
|
|
|
|
(1) x scalar(@value_requestor_type); |
11000
|
|
|
|
|
|
|
|
11001
|
|
|
|
|
|
|
# Note: the following valid token types are not assigned here to |
11002
|
|
|
|
|
|
|
# hashes requesting to be followed by values or terms, but are |
11003
|
|
|
|
|
|
|
# instead currently hard-coded into sub operator_expected: |
11004
|
|
|
|
|
|
|
# ) -> :: Q R Z ] b h i k n v w } # |
11005
|
|
|
|
|
|
|
|
11006
|
|
|
|
|
|
|
# For simple syntax checking, it is nice to have a list of operators which |
11007
|
|
|
|
|
|
|
# will really be unhappy if not followed by a term. This includes most |
11008
|
|
|
|
|
|
|
# of the above... |
11009
|
38
|
|
|
|
|
1304
|
@really_want_term{@value_requestor_type} = |
11010
|
|
|
|
|
|
|
(1) x scalar(@value_requestor_type); |
11011
|
|
|
|
|
|
|
|
11012
|
|
|
|
|
|
|
# with these exceptions... |
11013
|
38
|
|
|
|
|
194
|
delete $really_want_term{'U'}; # user sub, depends on prototype |
11014
|
38
|
|
|
|
|
86
|
delete $really_want_term{'F'}; # file test works on $_ if no following term |
11015
|
38
|
|
|
|
|
91
|
delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; |
11016
|
|
|
|
|
|
|
# let perl do it |
11017
|
38
|
|
|
|
|
177
|
@q = qw(q qq qx qr s y tr m); |
11018
|
38
|
|
|
|
|
227
|
@is_q_qq_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q); |
11019
|
|
|
|
|
|
|
|
11020
|
|
|
|
|
|
|
# Note added 'qw' here |
11021
|
38
|
|
|
|
|
165
|
@q = qw(q qq qw qx qr s y tr m); |
11022
|
38
|
|
|
|
|
3963
|
@is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q); |
11023
|
|
|
|
|
|
|
|
11024
|
|
|
|
|
|
|
# Note: 'class' will be added by sub check_options if -use-feature=class |
11025
|
38
|
|
|
|
|
265
|
@q = qw(package); |
11026
|
38
|
|
|
|
|
139
|
@is_package{@q} = (1) x scalar(@q); |
11027
|
|
|
|
|
|
|
|
11028
|
38
|
|
|
|
|
86
|
@q = qw( ? : ); |
11029
|
38
|
|
|
|
|
98
|
push @q, ','; |
11030
|
38
|
|
|
|
|
107
|
@is_comma_question_colon{@q} = (1) x scalar(@q); |
11031
|
|
|
|
|
|
|
|
11032
|
38
|
|
|
|
|
157
|
@q = qw( if elsif unless ); |
11033
|
38
|
|
|
|
|
105
|
@is_if_elsif_unless{@q} = (1) x scalar(@q); |
11034
|
|
|
|
|
|
|
|
11035
|
38
|
|
|
|
|
83
|
@q = qw( ; t ); |
11036
|
38
|
|
|
|
|
95
|
@is_semicolon_or_t{@q} = (1) x scalar(@q); |
11037
|
|
|
|
|
|
|
|
11038
|
38
|
|
|
|
|
104
|
@q = qw( if elsif unless case when ); |
11039
|
38
|
|
|
|
|
111
|
@is_if_elsif_unless_case_when{@q} = (1) x scalar(@q); |
11040
|
|
|
|
|
|
|
|
11041
|
|
|
|
|
|
|
# Hash of other possible line endings which may occur. |
11042
|
|
|
|
|
|
|
# Keep these coordinated with the regex where this is used. |
11043
|
|
|
|
|
|
|
# Note: chr(13) = chr(015)="\r". |
11044
|
38
|
|
|
|
|
119
|
@q = ( chr(13), chr(29), chr(26) ); |
11045
|
38
|
|
|
|
|
209
|
@other_line_endings{@q} = (1) x scalar(@q); |
11046
|
|
|
|
|
|
|
|
11047
|
|
|
|
|
|
|
# These keywords are handled specially in the tokenizer code: |
11048
|
38
|
|
|
|
|
226
|
my @special_keywords = qw( |
11049
|
|
|
|
|
|
|
do |
11050
|
|
|
|
|
|
|
eval |
11051
|
|
|
|
|
|
|
format |
11052
|
|
|
|
|
|
|
m |
11053
|
|
|
|
|
|
|
package |
11054
|
|
|
|
|
|
|
q |
11055
|
|
|
|
|
|
|
qq |
11056
|
|
|
|
|
|
|
qr |
11057
|
|
|
|
|
|
|
qw |
11058
|
|
|
|
|
|
|
qx |
11059
|
|
|
|
|
|
|
s |
11060
|
|
|
|
|
|
|
sub |
11061
|
|
|
|
|
|
|
tr |
11062
|
|
|
|
|
|
|
y |
11063
|
|
|
|
|
|
|
); |
11064
|
38
|
|
|
|
|
185
|
push( @Keywords, @special_keywords ); |
11065
|
|
|
|
|
|
|
|
11066
|
|
|
|
|
|
|
# Keywords after which list formatting may be used |
11067
|
|
|
|
|
|
|
# WARNING: do not include |map|grep|eval or perl may die on |
11068
|
|
|
|
|
|
|
# syntax errors (map1.t). |
11069
|
38
|
|
|
|
|
685
|
my @keyword_taking_list = qw( |
11070
|
|
|
|
|
|
|
and |
11071
|
|
|
|
|
|
|
chmod |
11072
|
|
|
|
|
|
|
chomp |
11073
|
|
|
|
|
|
|
chop |
11074
|
|
|
|
|
|
|
chown |
11075
|
|
|
|
|
|
|
dbmopen |
11076
|
|
|
|
|
|
|
die |
11077
|
|
|
|
|
|
|
elsif |
11078
|
|
|
|
|
|
|
exec |
11079
|
|
|
|
|
|
|
fcntl |
11080
|
|
|
|
|
|
|
for |
11081
|
|
|
|
|
|
|
foreach |
11082
|
|
|
|
|
|
|
formline |
11083
|
|
|
|
|
|
|
getsockopt |
11084
|
|
|
|
|
|
|
if |
11085
|
|
|
|
|
|
|
index |
11086
|
|
|
|
|
|
|
ioctl |
11087
|
|
|
|
|
|
|
join |
11088
|
|
|
|
|
|
|
kill |
11089
|
|
|
|
|
|
|
local |
11090
|
|
|
|
|
|
|
msgctl |
11091
|
|
|
|
|
|
|
msgrcv |
11092
|
|
|
|
|
|
|
msgsnd |
11093
|
|
|
|
|
|
|
my |
11094
|
|
|
|
|
|
|
open |
11095
|
|
|
|
|
|
|
or |
11096
|
|
|
|
|
|
|
our |
11097
|
|
|
|
|
|
|
pack |
11098
|
|
|
|
|
|
|
print |
11099
|
|
|
|
|
|
|
printf |
11100
|
|
|
|
|
|
|
push |
11101
|
|
|
|
|
|
|
read |
11102
|
|
|
|
|
|
|
readpipe |
11103
|
|
|
|
|
|
|
recv |
11104
|
|
|
|
|
|
|
return |
11105
|
|
|
|
|
|
|
reverse |
11106
|
|
|
|
|
|
|
rindex |
11107
|
|
|
|
|
|
|
seek |
11108
|
|
|
|
|
|
|
select |
11109
|
|
|
|
|
|
|
semctl |
11110
|
|
|
|
|
|
|
semget |
11111
|
|
|
|
|
|
|
send |
11112
|
|
|
|
|
|
|
setpriority |
11113
|
|
|
|
|
|
|
setsockopt |
11114
|
|
|
|
|
|
|
shmctl |
11115
|
|
|
|
|
|
|
shmget |
11116
|
|
|
|
|
|
|
shmread |
11117
|
|
|
|
|
|
|
shmwrite |
11118
|
|
|
|
|
|
|
socket |
11119
|
|
|
|
|
|
|
socketpair |
11120
|
|
|
|
|
|
|
sort |
11121
|
|
|
|
|
|
|
splice |
11122
|
|
|
|
|
|
|
split |
11123
|
|
|
|
|
|
|
sprintf |
11124
|
|
|
|
|
|
|
state |
11125
|
|
|
|
|
|
|
substr |
11126
|
|
|
|
|
|
|
syscall |
11127
|
|
|
|
|
|
|
sysopen |
11128
|
|
|
|
|
|
|
sysread |
11129
|
|
|
|
|
|
|
sysseek |
11130
|
|
|
|
|
|
|
system |
11131
|
|
|
|
|
|
|
syswrite |
11132
|
|
|
|
|
|
|
tie |
11133
|
|
|
|
|
|
|
unless |
11134
|
|
|
|
|
|
|
unlink |
11135
|
|
|
|
|
|
|
unpack |
11136
|
|
|
|
|
|
|
unshift |
11137
|
|
|
|
|
|
|
until |
11138
|
|
|
|
|
|
|
vec |
11139
|
|
|
|
|
|
|
warn |
11140
|
|
|
|
|
|
|
while |
11141
|
|
|
|
|
|
|
given |
11142
|
|
|
|
|
|
|
when |
11143
|
|
|
|
|
|
|
); |
11144
|
|
|
|
|
|
|
|
11145
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
11146
|
38
|
|
|
|
|
1214
|
@is_keyword_taking_list{@keyword_taking_list} = |
11147
|
|
|
|
|
|
|
(1) x scalar(@keyword_taking_list); |
11148
|
|
|
|
|
|
|
|
11149
|
|
|
|
|
|
|
# perl functions which may be unary operators. |
11150
|
|
|
|
|
|
|
|
11151
|
|
|
|
|
|
|
# This list is used to decide if a pattern delimited by slashes, /pattern/, |
11152
|
|
|
|
|
|
|
# can follow one of these keywords. |
11153
|
38
|
|
|
|
|
191
|
@q = qw( |
11154
|
|
|
|
|
|
|
chomp eof eval fc lc pop shift uc undef |
11155
|
|
|
|
|
|
|
); |
11156
|
|
|
|
|
|
|
|
11157
|
38
|
|
|
|
|
238
|
@is_keyword_rejecting_slash_as_pattern_delimiter{@q} = |
11158
|
|
|
|
|
|
|
(1) x scalar(@q); |
11159
|
|
|
|
|
|
|
|
11160
|
|
|
|
|
|
|
# These are keywords for which an arg may optionally be omitted. They are |
11161
|
|
|
|
|
|
|
# currently only used to disambiguate a ? used as a ternary from one used |
11162
|
|
|
|
|
|
|
# as a (deprecated) pattern delimiter. In the future, they might be used |
11163
|
|
|
|
|
|
|
# to give a warning about ambiguous syntax before a /. |
11164
|
|
|
|
|
|
|
# Note: split has been omitted (see note below). |
11165
|
38
|
|
|
|
|
592
|
my @keywords_taking_optional_arg = qw( |
11166
|
|
|
|
|
|
|
abs |
11167
|
|
|
|
|
|
|
alarm |
11168
|
|
|
|
|
|
|
caller |
11169
|
|
|
|
|
|
|
chdir |
11170
|
|
|
|
|
|
|
chomp |
11171
|
|
|
|
|
|
|
chop |
11172
|
|
|
|
|
|
|
chr |
11173
|
|
|
|
|
|
|
chroot |
11174
|
|
|
|
|
|
|
close |
11175
|
|
|
|
|
|
|
cos |
11176
|
|
|
|
|
|
|
defined |
11177
|
|
|
|
|
|
|
die |
11178
|
|
|
|
|
|
|
eof |
11179
|
|
|
|
|
|
|
eval |
11180
|
|
|
|
|
|
|
evalbytes |
11181
|
|
|
|
|
|
|
exit |
11182
|
|
|
|
|
|
|
exp |
11183
|
|
|
|
|
|
|
fc |
11184
|
|
|
|
|
|
|
getc |
11185
|
|
|
|
|
|
|
glob |
11186
|
|
|
|
|
|
|
gmtime |
11187
|
|
|
|
|
|
|
hex |
11188
|
|
|
|
|
|
|
int |
11189
|
|
|
|
|
|
|
last |
11190
|
|
|
|
|
|
|
lc |
11191
|
|
|
|
|
|
|
lcfirst |
11192
|
|
|
|
|
|
|
length |
11193
|
|
|
|
|
|
|
localtime |
11194
|
|
|
|
|
|
|
log |
11195
|
|
|
|
|
|
|
lstat |
11196
|
|
|
|
|
|
|
mkdir |
11197
|
|
|
|
|
|
|
next |
11198
|
|
|
|
|
|
|
oct |
11199
|
|
|
|
|
|
|
ord |
11200
|
|
|
|
|
|
|
pop |
11201
|
|
|
|
|
|
|
pos |
11202
|
|
|
|
|
|
|
print |
11203
|
|
|
|
|
|
|
printf |
11204
|
|
|
|
|
|
|
prototype |
11205
|
|
|
|
|
|
|
quotemeta |
11206
|
|
|
|
|
|
|
rand |
11207
|
|
|
|
|
|
|
readline |
11208
|
|
|
|
|
|
|
readlink |
11209
|
|
|
|
|
|
|
readpipe |
11210
|
|
|
|
|
|
|
redo |
11211
|
|
|
|
|
|
|
ref |
11212
|
|
|
|
|
|
|
require |
11213
|
|
|
|
|
|
|
reset |
11214
|
|
|
|
|
|
|
reverse |
11215
|
|
|
|
|
|
|
rmdir |
11216
|
|
|
|
|
|
|
say |
11217
|
|
|
|
|
|
|
select |
11218
|
|
|
|
|
|
|
shift |
11219
|
|
|
|
|
|
|
sin |
11220
|
|
|
|
|
|
|
sleep |
11221
|
|
|
|
|
|
|
sqrt |
11222
|
|
|
|
|
|
|
srand |
11223
|
|
|
|
|
|
|
stat |
11224
|
|
|
|
|
|
|
study |
11225
|
|
|
|
|
|
|
tell |
11226
|
|
|
|
|
|
|
uc |
11227
|
|
|
|
|
|
|
ucfirst |
11228
|
|
|
|
|
|
|
umask |
11229
|
|
|
|
|
|
|
undef |
11230
|
|
|
|
|
|
|
unlink |
11231
|
|
|
|
|
|
|
warn |
11232
|
|
|
|
|
|
|
write |
11233
|
|
|
|
|
|
|
); |
11234
|
38
|
|
|
|
|
887
|
@is_keyword_taking_optional_arg{@keywords_taking_optional_arg} = |
11235
|
|
|
|
|
|
|
(1) x scalar(@keywords_taking_optional_arg); |
11236
|
|
|
|
|
|
|
|
11237
|
|
|
|
|
|
|
# This list is used to decide if a pattern delimited by question marks, |
11238
|
|
|
|
|
|
|
# ?pattern?, can follow one of these keywords. Note that from perl 5.22 |
11239
|
|
|
|
|
|
|
# on, a ?pattern? is not recognized, so we can be much more strict than |
11240
|
|
|
|
|
|
|
# with a /pattern/. Note that 'split' is not in this list. In current |
11241
|
|
|
|
|
|
|
# versions of perl a question following split must be a ternary, but |
11242
|
|
|
|
|
|
|
# in older versions it could be a pattern. The guessing algorithm will |
11243
|
|
|
|
|
|
|
# decide. We are combining two lists here to simplify the test. |
11244
|
38
|
|
|
|
|
810
|
@q = ( @keywords_taking_optional_arg, @operator_requestor ); |
11245
|
38
|
|
|
|
|
1437
|
@is_keyword_rejecting_question_as_pattern_delimiter{@q} = |
11246
|
|
|
|
|
|
|
(1) x scalar(@q); |
11247
|
|
|
|
|
|
|
|
11248
|
|
|
|
|
|
|
# These are not used in any way yet |
11249
|
|
|
|
|
|
|
# my @unused_keywords = qw( |
11250
|
|
|
|
|
|
|
# __FILE__ |
11251
|
|
|
|
|
|
|
# __LINE__ |
11252
|
|
|
|
|
|
|
# __PACKAGE__ |
11253
|
|
|
|
|
|
|
# ); |
11254
|
|
|
|
|
|
|
|
11255
|
|
|
|
|
|
|
# The list of keywords was originally extracted from function 'keyword' in |
11256
|
|
|
|
|
|
|
# perl file toke.c version 5.005.03, using this utility, plus a |
11257
|
|
|
|
|
|
|
# little editing: (file getkwd.pl): |
11258
|
|
|
|
|
|
|
# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } |
11259
|
|
|
|
|
|
|
# Add 'get' prefix where necessary, then split into the above lists. |
11260
|
|
|
|
|
|
|
# This list should be updated as necessary. |
11261
|
|
|
|
|
|
|
# The list should not contain these special variables: |
11262
|
|
|
|
|
|
|
# ARGV DATA ENV SIG STDERR STDIN STDOUT |
11263
|
|
|
|
|
|
|
# __DATA__ __END__ |
11264
|
|
|
|
|
|
|
|
11265
|
38
|
|
|
|
|
8823
|
@is_keyword{@Keywords} = (1) x scalar(@Keywords); |
11266
|
|
|
|
|
|
|
} ## end BEGIN |
11267
|
|
|
|
|
|
|
1; |