line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
##################################################################### |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# The Perl::Tidy::Tokenizer package is essentially a filter which |
4
|
|
|
|
|
|
|
# reads lines of perl source code from a source object and provides |
5
|
|
|
|
|
|
|
# corresponding tokenized lines through its get_line() method. Lines |
6
|
|
|
|
|
|
|
# flow from the source_object to the caller like this: |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# source_object --> Tokenizer --> calling routine |
9
|
|
|
|
|
|
|
# get_line() get_line() line_of_tokens |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# The source object can be a STRING ref, an ARRAY ref, or an object with a |
12
|
|
|
|
|
|
|
# get_line() method which supplies one line (a character string) perl call. |
13
|
|
|
|
|
|
|
# The Tokenizer returns a reference to a data structure 'line_of_tokens' |
14
|
|
|
|
|
|
|
# containing one tokenized line for each call to its get_line() method. |
15
|
|
|
|
|
|
|
# |
16
|
|
|
|
|
|
|
# NOTE: This is not a real class. Only one tokenizer my be used. |
17
|
|
|
|
|
|
|
# |
18
|
|
|
|
|
|
|
######################################################################## |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
package Perl::Tidy::Tokenizer; |
21
|
39
|
|
|
39
|
|
307
|
use strict; |
|
39
|
|
|
|
|
147
|
|
|
39
|
|
|
|
|
1330
|
|
22
|
39
|
|
|
39
|
|
254
|
use warnings; |
|
39
|
|
|
|
|
101
|
|
|
39
|
|
|
|
|
1153
|
|
23
|
39
|
|
|
39
|
|
206
|
use English qw( -no_match_vars ); |
|
39
|
|
|
|
|
105
|
|
|
39
|
|
|
|
|
218
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our $VERSION = '20230912'; |
26
|
|
|
|
|
|
|
|
27
|
39
|
|
|
39
|
|
14536
|
use Carp; |
|
39
|
|
|
|
|
130
|
|
|
39
|
|
|
|
|
2290
|
|
28
|
|
|
|
|
|
|
|
29
|
39
|
|
|
39
|
|
289
|
use constant DEVEL_MODE => 0; |
|
39
|
|
|
|
|
107
|
|
|
39
|
|
|
|
|
2572
|
|
30
|
39
|
|
|
39
|
|
279
|
use constant EMPTY_STRING => q{}; |
|
39
|
|
|
|
|
121
|
|
|
39
|
|
|
|
|
2593
|
|
31
|
39
|
|
|
39
|
|
263
|
use constant SPACE => q{ }; |
|
39
|
|
|
|
|
98
|
|
|
39
|
|
|
|
|
2483
|
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Decimal values of some ascii characters for quick checks |
34
|
39
|
|
|
39
|
|
315
|
use constant ORD_TAB => 9; |
|
39
|
|
|
|
|
83
|
|
|
39
|
|
|
|
|
2105
|
|
35
|
39
|
|
|
39
|
|
284
|
use constant ORD_SPACE => 32; |
|
39
|
|
|
|
|
124
|
|
|
39
|
|
|
|
|
2213
|
|
36
|
39
|
|
|
39
|
|
280
|
use constant ORD_PRINTABLE_MIN => 33; |
|
39
|
|
|
|
|
95
|
|
|
39
|
|
|
|
|
2140
|
|
37
|
39
|
|
|
39
|
|
259
|
use constant ORD_PRINTABLE_MAX => 126; |
|
39
|
|
|
|
|
108
|
|
|
39
|
|
|
|
|
8817
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# GLOBAL VARIABLES which change during tokenization: |
40
|
|
|
|
|
|
|
# These could also be stored in $self but it is more convenient and |
41
|
|
|
|
|
|
|
# efficient to make them global lexical variables. |
42
|
|
|
|
|
|
|
# INITIALIZER: sub prepare_for_a_new_file |
43
|
|
|
|
|
|
|
my ( |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
$brace_depth, |
46
|
|
|
|
|
|
|
$context, |
47
|
|
|
|
|
|
|
$current_package, |
48
|
|
|
|
|
|
|
$last_nonblank_block_type, |
49
|
|
|
|
|
|
|
$last_nonblank_token, |
50
|
|
|
|
|
|
|
$last_nonblank_type, |
51
|
|
|
|
|
|
|
$next_sequence_number, |
52
|
|
|
|
|
|
|
$paren_depth, |
53
|
|
|
|
|
|
|
$rbrace_context, |
54
|
|
|
|
|
|
|
$rbrace_package, |
55
|
|
|
|
|
|
|
$rbrace_structural_type, |
56
|
|
|
|
|
|
|
$rbrace_type, |
57
|
|
|
|
|
|
|
$rcurrent_depth, |
58
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
59
|
|
|
|
|
|
|
$rdepth_array, |
60
|
|
|
|
|
|
|
$ris_block_function, |
61
|
|
|
|
|
|
|
$ris_block_list_function, |
62
|
|
|
|
|
|
|
$ris_constant, |
63
|
|
|
|
|
|
|
$ris_user_function, |
64
|
|
|
|
|
|
|
$rnested_statement_type, |
65
|
|
|
|
|
|
|
$rnested_ternary_flag, |
66
|
|
|
|
|
|
|
$rparen_semicolon_count, |
67
|
|
|
|
|
|
|
$rparen_vars, |
68
|
|
|
|
|
|
|
$rparen_type, |
69
|
|
|
|
|
|
|
$rsaw_function_definition, |
70
|
|
|
|
|
|
|
$rsaw_use_module, |
71
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
72
|
|
|
|
|
|
|
$rsquare_bracket_type, |
73
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
74
|
|
|
|
|
|
|
$rtotal_depth, |
75
|
|
|
|
|
|
|
$ruser_function_prototype, |
76
|
|
|
|
|
|
|
$square_bracket_depth, |
77
|
|
|
|
|
|
|
$statement_type, |
78
|
|
|
|
|
|
|
$total_depth, |
79
|
|
|
|
|
|
|
); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
my ( |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# GLOBAL CONSTANTS for routines in this package, |
84
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block. |
85
|
|
|
|
|
|
|
%can_start_digraph, |
86
|
|
|
|
|
|
|
%expecting_operator_token, |
87
|
|
|
|
|
|
|
%expecting_operator_types, |
88
|
|
|
|
|
|
|
%expecting_term_token, |
89
|
|
|
|
|
|
|
%expecting_term_types, |
90
|
|
|
|
|
|
|
%is_block_operator, |
91
|
|
|
|
|
|
|
%is_digraph, |
92
|
|
|
|
|
|
|
%is_file_test_operator, |
93
|
|
|
|
|
|
|
%is_if_elsif_unless, |
94
|
|
|
|
|
|
|
%is_if_elsif_unless_case_when, |
95
|
|
|
|
|
|
|
%is_indirect_object_taker, |
96
|
|
|
|
|
|
|
%is_keyword_rejecting_question_as_pattern_delimiter, |
97
|
|
|
|
|
|
|
%is_keyword_rejecting_slash_as_pattern_delimiter, |
98
|
|
|
|
|
|
|
%is_keyword_taking_list, |
99
|
|
|
|
|
|
|
%is_keyword_taking_optional_arg, |
100
|
|
|
|
|
|
|
%is_q_qq_qw_qx_qr_s_y_tr_m, |
101
|
|
|
|
|
|
|
%is_q_qq_qx_qr_s_y_tr_m, |
102
|
|
|
|
|
|
|
%is_semicolon_or_t, |
103
|
|
|
|
|
|
|
%is_sort_map_grep, |
104
|
|
|
|
|
|
|
%is_sort_map_grep_eval_do, |
105
|
|
|
|
|
|
|
%is_tetragraph, |
106
|
|
|
|
|
|
|
%is_trigraph, |
107
|
|
|
|
|
|
|
%is_valid_token_type, |
108
|
|
|
|
|
|
|
%other_line_endings, |
109
|
|
|
|
|
|
|
%really_want_term, |
110
|
|
|
|
|
|
|
@closing_brace_names, |
111
|
|
|
|
|
|
|
@opening_brace_names, |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# GLOBAL CONSTANT hash lookup table of operator expected values |
114
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block |
115
|
|
|
|
|
|
|
%op_expected_table, |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# GLOBAL VARIABLES which are constant after being configured. |
118
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block and modified by sub check_options |
119
|
|
|
|
|
|
|
%is_code_block_token, |
120
|
|
|
|
|
|
|
%is_keyword, |
121
|
|
|
|
|
|
|
%is_my_our_state, |
122
|
|
|
|
|
|
|
%is_package, |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# INITIALIZER: sub check_options |
125
|
|
|
|
|
|
|
$code_skipping_pattern_begin, |
126
|
|
|
|
|
|
|
$code_skipping_pattern_end, |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
$rOpts_code_skipping, |
129
|
|
|
|
|
|
|
$rOpts_code_skipping_begin, |
130
|
|
|
|
|
|
|
$rOpts_starting_indentation_level, |
131
|
|
|
|
|
|
|
$rOpts_indent_columns, |
132
|
|
|
|
|
|
|
$rOpts_look_for_hash_bang, |
133
|
|
|
|
|
|
|
$rOpts_look_for_autoloader, |
134
|
|
|
|
|
|
|
$rOpts_look_for_selfloader, |
135
|
|
|
|
|
|
|
$rOpts_trim_qw, |
136
|
|
|
|
|
|
|
$rOpts_extended_syntax, |
137
|
|
|
|
|
|
|
$rOpts_continuation_indentation, |
138
|
|
|
|
|
|
|
$rOpts_outdent_labels, |
139
|
|
|
|
|
|
|
$rOpts_maximum_level_errors, |
140
|
|
|
|
|
|
|
$rOpts_maximum_unexpected_errors, |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
$tabsize, |
143
|
|
|
|
|
|
|
%is_END_DATA_format_sub, |
144
|
|
|
|
|
|
|
%is_grep_alias, |
145
|
|
|
|
|
|
|
%is_sub, |
146
|
|
|
|
|
|
|
$guess_if_method, |
147
|
|
|
|
|
|
|
); |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# possible values of operator_expected() |
150
|
39
|
|
|
39
|
|
1581
|
use constant TERM => -1; |
|
39
|
|
|
|
|
99
|
|
|
39
|
|
|
|
|
2270
|
|
151
|
39
|
|
|
39
|
|
269
|
use constant UNKNOWN => 0; |
|
39
|
|
|
|
|
86
|
|
|
39
|
|
|
|
|
2222
|
|
152
|
39
|
|
|
39
|
|
285
|
use constant OPERATOR => 1; |
|
39
|
|
|
|
|
149
|
|
|
39
|
|
|
|
|
2227
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# possible values of context |
155
|
39
|
|
|
39
|
|
341
|
use constant SCALAR_CONTEXT => -1; |
|
39
|
|
|
|
|
151
|
|
|
39
|
|
|
|
|
2168
|
|
156
|
39
|
|
|
39
|
|
278
|
use constant UNKNOWN_CONTEXT => 0; |
|
39
|
|
|
|
|
139
|
|
|
39
|
|
|
|
|
2172
|
|
157
|
39
|
|
|
39
|
|
308
|
use constant LIST_CONTEXT => 1; |
|
39
|
|
|
|
|
155
|
|
|
39
|
|
|
|
|
2427
|
|
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# Maximum number of little messages; probably need not be changed. |
160
|
39
|
|
|
39
|
|
307
|
use constant MAX_NAG_MESSAGES => 6; |
|
39
|
|
|
|
|
101
|
|
|
39
|
|
|
|
|
7947
|
|
161
|
|
|
|
|
|
|
|
162
|
0
|
|
|
|
|
0
|
BEGIN { |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# Array index names for $self. |
165
|
|
|
|
|
|
|
# Do not combine with other BEGIN blocks (c101). |
166
|
39
|
|
|
39
|
|
284507
|
my $i = 0; |
167
|
|
|
|
|
|
|
use constant { |
168
|
39
|
|
|
|
|
18312
|
_rhere_target_list_ => $i++, |
169
|
|
|
|
|
|
|
_in_here_doc_ => $i++, |
170
|
|
|
|
|
|
|
_here_doc_target_ => $i++, |
171
|
|
|
|
|
|
|
_here_quote_character_ => $i++, |
172
|
|
|
|
|
|
|
_in_data_ => $i++, |
173
|
|
|
|
|
|
|
_in_end_ => $i++, |
174
|
|
|
|
|
|
|
_in_format_ => $i++, |
175
|
|
|
|
|
|
|
_in_error_ => $i++, |
176
|
|
|
|
|
|
|
_in_trouble_ => $i++, |
177
|
|
|
|
|
|
|
_warning_count_ => $i++, |
178
|
|
|
|
|
|
|
_html_tag_count_ => $i++, |
179
|
|
|
|
|
|
|
_in_pod_ => $i++, |
180
|
|
|
|
|
|
|
_in_skipped_ => $i++, |
181
|
|
|
|
|
|
|
_in_attribute_list_ => $i++, |
182
|
|
|
|
|
|
|
_in_quote_ => $i++, |
183
|
|
|
|
|
|
|
_quote_target_ => $i++, |
184
|
|
|
|
|
|
|
_line_start_quote_ => $i++, |
185
|
|
|
|
|
|
|
_starting_level_ => $i++, |
186
|
|
|
|
|
|
|
_know_starting_level_ => $i++, |
187
|
|
|
|
|
|
|
_last_line_number_ => $i++, |
188
|
|
|
|
|
|
|
_saw_perl_dash_P_ => $i++, |
189
|
|
|
|
|
|
|
_saw_perl_dash_w_ => $i++, |
190
|
|
|
|
|
|
|
_saw_use_strict_ => $i++, |
191
|
|
|
|
|
|
|
_saw_v_string_ => $i++, |
192
|
|
|
|
|
|
|
_saw_brace_error_ => $i++, |
193
|
|
|
|
|
|
|
_hit_bug_ => $i++, |
194
|
|
|
|
|
|
|
_look_for_autoloader_ => $i++, |
195
|
|
|
|
|
|
|
_look_for_selfloader_ => $i++, |
196
|
|
|
|
|
|
|
_saw_autoloader_ => $i++, |
197
|
|
|
|
|
|
|
_saw_selfloader_ => $i++, |
198
|
|
|
|
|
|
|
_saw_hash_bang_ => $i++, |
199
|
|
|
|
|
|
|
_saw_end_ => $i++, |
200
|
|
|
|
|
|
|
_saw_data_ => $i++, |
201
|
|
|
|
|
|
|
_saw_negative_indentation_ => $i++, |
202
|
|
|
|
|
|
|
_started_tokenizing_ => $i++, |
203
|
|
|
|
|
|
|
_debugger_object_ => $i++, |
204
|
|
|
|
|
|
|
_diagnostics_object_ => $i++, |
205
|
|
|
|
|
|
|
_logger_object_ => $i++, |
206
|
|
|
|
|
|
|
_unexpected_error_count_ => $i++, |
207
|
|
|
|
|
|
|
_started_looking_for_here_target_at_ => $i++, |
208
|
|
|
|
|
|
|
_nearly_matched_here_target_at_ => $i++, |
209
|
|
|
|
|
|
|
_line_of_text_ => $i++, |
210
|
|
|
|
|
|
|
_rlower_case_labels_at_ => $i++, |
211
|
|
|
|
|
|
|
_maximum_level_ => $i++, |
212
|
|
|
|
|
|
|
_true_brace_error_count_ => $i++, |
213
|
|
|
|
|
|
|
_rOpts_ => $i++, |
214
|
|
|
|
|
|
|
_rinput_lines_ => $i++, |
215
|
|
|
|
|
|
|
_input_line_index_next_ => $i++, |
216
|
39
|
|
|
39
|
|
290
|
}; |
|
39
|
|
|
|
|
116
|
|
217
|
|
|
|
|
|
|
} ## end BEGIN |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
{ ## closure for subs to count instances |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# methods to count instances |
222
|
|
|
|
|
|
|
my $_count = 0; |
223
|
0
|
|
|
0
|
0
|
0
|
sub get_count { return $_count; } |
224
|
562
|
|
|
562
|
|
2495
|
sub _increment_count { return ++$_count } |
225
|
562
|
|
|
562
|
|
1343
|
sub _decrement_count { return --$_count } |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub DESTROY { |
229
|
562
|
|
|
562
|
|
1415
|
my $self = shift; |
230
|
562
|
|
|
|
|
3016
|
$self->_decrement_count(); |
231
|
562
|
|
|
|
|
8022
|
return; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub AUTOLOAD { |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# Catch any undefined sub calls so that we are sure to get |
237
|
|
|
|
|
|
|
# some diagnostic information. This sub should never be called |
238
|
|
|
|
|
|
|
# except for a programming error. |
239
|
0
|
|
|
0
|
|
0
|
our $AUTOLOAD; |
240
|
0
|
0
|
|
|
|
0
|
return if ( $AUTOLOAD =~ /\bDESTROY$/ ); |
241
|
0
|
|
|
|
|
0
|
my ( $pkg, $fname, $lno ) = caller(); |
242
|
0
|
|
|
|
|
0
|
my $my_package = __PACKAGE__; |
243
|
0
|
|
|
|
|
0
|
print {*STDERR} <<EOM; |
|
0
|
|
|
|
|
0
|
|
244
|
|
|
|
|
|
|
====================================================================== |
245
|
|
|
|
|
|
|
Error detected in package '$my_package', version $VERSION |
246
|
|
|
|
|
|
|
Received unexpected AUTOLOAD call for sub '$AUTOLOAD' |
247
|
|
|
|
|
|
|
Called from package: '$pkg' |
248
|
|
|
|
|
|
|
Called from File '$fname' at line '$lno' |
249
|
|
|
|
|
|
|
This error is probably due to a recent programming change |
250
|
|
|
|
|
|
|
====================================================================== |
251
|
|
|
|
|
|
|
EOM |
252
|
0
|
|
|
|
|
0
|
exit 1; |
253
|
|
|
|
|
|
|
} ## end sub AUTOLOAD |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub Die { |
256
|
0
|
|
|
0
|
0
|
0
|
my ($msg) = @_; |
257
|
0
|
|
|
|
|
0
|
Perl::Tidy::Die($msg); |
258
|
0
|
|
|
|
|
0
|
croak "unexpected return from Perl::Tidy::Die"; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
sub Fault { |
262
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# This routine is called for errors that really should not occur |
265
|
|
|
|
|
|
|
# except if there has been a bug introduced by a recent program change. |
266
|
|
|
|
|
|
|
# Please add comments at calls to Fault to explain why the call |
267
|
|
|
|
|
|
|
# should not occur, and where to look to fix it. |
268
|
0
|
|
|
|
|
0
|
my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); |
269
|
0
|
|
|
|
|
0
|
my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); |
270
|
0
|
|
|
|
|
0
|
my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); |
271
|
0
|
|
|
|
|
0
|
my $pkg = __PACKAGE__; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# Catch potential error of Fault not called as a method |
274
|
0
|
|
|
|
|
0
|
my $input_stream_name; |
275
|
0
|
0
|
|
|
|
0
|
if ( !ref($self) ) { |
276
|
0
|
|
|
|
|
0
|
$msg = "Fault not called as a method - please fix\n"; |
277
|
0
|
0
|
0
|
|
|
0
|
if ( $self && length($self) < 200 ) { $msg .= $self } |
|
0
|
|
|
|
|
0
|
|
278
|
0
|
|
|
|
|
0
|
$self = undef; |
279
|
0
|
|
|
|
|
0
|
$input_stream_name = "(UNKNOWN)"; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
else { |
282
|
0
|
|
|
|
|
0
|
$input_stream_name = $self->get_input_stream_name(); |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
0
|
Die(<<EOM); |
286
|
|
|
|
|
|
|
============================================================================== |
287
|
|
|
|
|
|
|
While operating on input stream with name: '$input_stream_name' |
288
|
|
|
|
|
|
|
A fault was detected at line $line0 of sub '$subroutine1' |
289
|
|
|
|
|
|
|
in file '$filename1' |
290
|
|
|
|
|
|
|
which was called from line $line1 of sub '$subroutine2' |
291
|
|
|
|
|
|
|
Message: '$msg' |
292
|
|
|
|
|
|
|
This is probably an error introduced by a recent programming change. |
293
|
|
|
|
|
|
|
$pkg reports VERSION='$VERSION'. |
294
|
|
|
|
|
|
|
============================================================================== |
295
|
|
|
|
|
|
|
EOM |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# We shouldn't get here, but this return is to keep Perl-Critic from |
298
|
|
|
|
|
|
|
# complaining. |
299
|
0
|
|
|
|
|
0
|
return; |
300
|
|
|
|
|
|
|
} ## end sub Fault |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub make_code_skipping_pattern { |
303
|
1120
|
|
|
1120
|
0
|
3122
|
my ( $rOpts, $opt_name, $default ) = @_; |
304
|
1120
|
|
|
|
|
2549
|
my $param = $rOpts->{$opt_name}; |
305
|
1120
|
100
|
|
|
|
2899
|
if ( !$param ) { $param = $default } |
|
1118
|
|
|
|
|
2042
|
|
306
|
1120
|
|
|
|
|
4634
|
$param =~ s/^\s*//; # allow leading spaces to be like format-skipping |
307
|
1120
|
50
|
|
|
|
5015
|
if ( $param !~ /^#/ ) { |
308
|
0
|
|
|
|
|
0
|
Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n"); |
309
|
|
|
|
|
|
|
} |
310
|
1120
|
|
|
|
|
3561
|
my $pattern = '^\s*' . $param . '\b'; |
311
|
1120
|
50
|
|
|
|
3362
|
if ( Perl::Tidy::Formatter::bad_pattern($pattern) ) { |
312
|
0
|
|
|
|
|
0
|
Die( |
313
|
|
|
|
|
|
|
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n" |
314
|
|
|
|
|
|
|
); |
315
|
|
|
|
|
|
|
} |
316
|
1120
|
|
|
|
|
3885
|
return $pattern; |
317
|
|
|
|
|
|
|
} ## end sub make_code_skipping_pattern |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub check_options { |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
# Check Tokenizer parameters |
322
|
560
|
|
|
560
|
0
|
1641
|
my $rOpts = shift; |
323
|
|
|
|
|
|
|
|
324
|
560
|
|
|
|
|
2221
|
%is_sub = (); |
325
|
560
|
|
|
|
|
1876
|
$is_sub{'sub'} = 1; |
326
|
|
|
|
|
|
|
|
327
|
560
|
|
|
|
|
4258
|
%is_END_DATA_format_sub = ( |
328
|
|
|
|
|
|
|
'__END__' => 1, |
329
|
|
|
|
|
|
|
'__DATA__' => 1, |
330
|
|
|
|
|
|
|
'format' => 1, |
331
|
|
|
|
|
|
|
'sub' => 1, |
332
|
|
|
|
|
|
|
); |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
# Install any aliases to 'sub' |
335
|
560
|
100
|
|
|
|
2292
|
if ( $rOpts->{'sub-alias-list'} ) { |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# Note that any 'sub-alias-list' has been preprocessed to |
338
|
|
|
|
|
|
|
# be a trimmed, space-separated list which includes 'sub' |
339
|
|
|
|
|
|
|
# for example, it might be 'sub method fun' |
340
|
3
|
|
|
|
|
31
|
my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'}; |
341
|
3
|
|
|
|
|
17
|
foreach my $word (@sub_alias_list) { |
342
|
11
|
|
|
|
|
30
|
$is_sub{$word} = 1; |
343
|
11
|
|
|
|
|
24
|
$is_END_DATA_format_sub{$word} = 1; |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# Set global flag to say if we have to guess if bareword 'method' is |
348
|
|
|
|
|
|
|
# a sub when 'method' is in %is_sub. This will be true unless: |
349
|
|
|
|
|
|
|
# (1) the user entered 'method' as sub alias, or |
350
|
|
|
|
|
|
|
# (2) the user set --use-feature=class |
351
|
|
|
|
|
|
|
# In these two cases we can assume that 'method' is a sub alias. |
352
|
560
|
|
|
|
|
1406
|
$guess_if_method = 1; |
353
|
560
|
100
|
|
|
|
2168
|
if ( $is_sub{'method'} ) { $guess_if_method = 0 } |
|
2
|
|
|
|
|
7
|
|
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
#------------------------------------------------ |
356
|
|
|
|
|
|
|
# Update hash values for any -use-feature options |
357
|
|
|
|
|
|
|
#------------------------------------------------ |
358
|
|
|
|
|
|
|
|
359
|
560
|
|
|
|
|
1315
|
my $use_feature_class = 1; |
360
|
560
|
50
|
|
|
|
2133
|
if ( $rOpts->{'use-feature'} ) { |
361
|
0
|
0
|
|
|
|
0
|
if ( $rOpts->{'use-feature'} =~ /\bnoclass\b/ ) { |
|
|
0
|
|
|
|
|
|
362
|
0
|
|
|
|
|
0
|
$use_feature_class = 0; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
elsif ( $rOpts->{'use-feature'} =~ /\bclass\b/ ) { |
365
|
0
|
|
|
|
|
0
|
$guess_if_method = 0; |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
else { |
368
|
|
|
|
|
|
|
## neither 'class' nor 'noclass' seen so use default |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# These are the main updates for this option. There are additional |
373
|
|
|
|
|
|
|
# changes elsewhere, usually indicated with a comment 'rt145706' |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# Update hash values for use_feature=class, added for rt145706 |
376
|
|
|
|
|
|
|
# see 'perlclass.pod' |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# IMPORTANT: We are changing global hash values initially set in a BEGIN |
379
|
|
|
|
|
|
|
# block. Values must be defined (true or false) for each of these new |
380
|
|
|
|
|
|
|
# words whether true or false. Otherwise, programs using the module which |
381
|
|
|
|
|
|
|
# change options between runs (such as test code) will have |
382
|
|
|
|
|
|
|
# incorrect settings and fail. |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
# There are 4 new keywords: |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
# 'class' - treated specially as generalization of 'package' |
387
|
|
|
|
|
|
|
# Note: we must not set 'class' to be a keyword to avoid problems |
388
|
|
|
|
|
|
|
# with older uses. |
389
|
560
|
|
|
|
|
2014
|
$is_package{'class'} = $use_feature_class; |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# 'method' - treated like sub using the sub-alias-list option |
392
|
|
|
|
|
|
|
# Note: we must not set 'method' to be a keyword to avoid problems |
393
|
|
|
|
|
|
|
# with older uses. |
394
|
560
|
50
|
|
|
|
1886
|
if ($use_feature_class) { |
395
|
560
|
|
|
|
|
1565
|
$is_sub{'method'} = 1; |
396
|
560
|
|
|
|
|
1433
|
$is_END_DATA_format_sub{'method'} = 1; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
# 'field' - added as a keyword, and works like 'my' |
400
|
560
|
|
|
|
|
1680
|
$is_keyword{'field'} = $use_feature_class; |
401
|
560
|
|
|
|
|
1603
|
$is_my_our_state{'field'} = $use_feature_class; |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# 'ADJUST' - added as a keyword and works like 'BEGIN' |
404
|
|
|
|
|
|
|
# TODO: if ADJUST gets a paren list, this will need to be updated |
405
|
560
|
|
|
|
|
1363
|
$is_keyword{'ADJUST'} = $use_feature_class; |
406
|
560
|
|
|
|
|
1431
|
$is_code_block_token{'ADJUST'} = $use_feature_class; |
407
|
|
|
|
|
|
|
|
408
|
560
|
|
|
|
|
1984
|
%is_grep_alias = (); |
409
|
560
|
50
|
|
|
|
1939
|
if ( $rOpts->{'grep-alias-list'} ) { |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
# Note that 'grep-alias-list' has been preprocessed to be a trimmed, |
412
|
|
|
|
|
|
|
# space-separated list |
413
|
560
|
|
|
|
|
5512
|
my @q = split /\s+/, $rOpts->{'grep-alias-list'}; |
414
|
560
|
|
|
|
|
5176
|
@{is_grep_alias}{@q} = (1) x scalar(@q); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
560
|
|
|
|
|
2022
|
$rOpts_starting_indentation_level = $rOpts->{'starting-indentation-level'}; |
418
|
560
|
|
|
|
|
1614
|
$rOpts_indent_columns = $rOpts->{'indent-columns'}; |
419
|
560
|
|
|
|
|
1348
|
$rOpts_look_for_hash_bang = $rOpts->{'look-for-hash-bang'}; |
420
|
560
|
|
|
|
|
1296
|
$rOpts_look_for_autoloader = $rOpts->{'look-for-autoloader'}; |
421
|
560
|
|
|
|
|
1259
|
$rOpts_look_for_selfloader = $rOpts->{'look-for-selfloader'}; |
422
|
560
|
|
|
|
|
1269
|
$rOpts_trim_qw = $rOpts->{'trim-qw'}; |
423
|
560
|
|
|
|
|
1254
|
$rOpts_extended_syntax = $rOpts->{'extended-syntax'}; |
424
|
560
|
|
|
|
|
1252
|
$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; |
425
|
560
|
|
|
|
|
1207
|
$rOpts_outdent_labels = $rOpts->{'outdent-labels'}; |
426
|
560
|
|
|
|
|
1221
|
$rOpts_maximum_level_errors = $rOpts->{'maximum-level-errors'}; |
427
|
560
|
|
|
|
|
1264
|
$rOpts_maximum_unexpected_errors = $rOpts->{'maximum-unexpected-errors'}; |
428
|
560
|
|
|
|
|
1214
|
$rOpts_code_skipping = $rOpts->{'code-skipping'}; |
429
|
560
|
|
|
|
|
1241
|
$rOpts_code_skipping_begin = $rOpts->{'code-skipping-begin'}; |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
# In the Tokenizer, --indent-columns is just used for guessing old |
432
|
|
|
|
|
|
|
# indentation, and must be positive. If -i=0 is used for this run (which |
433
|
|
|
|
|
|
|
# is possible) we'll just guess that the old run used 4 spaces per level. |
434
|
560
|
100
|
|
|
|
2028
|
if ( !$rOpts_indent_columns ) { $rOpts_indent_columns = 4 } |
|
12
|
|
|
|
|
28
|
|
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# Define $tabsize, the number of spaces per tab for use in |
437
|
|
|
|
|
|
|
# guessing the indentation of source lines with leading tabs. |
438
|
|
|
|
|
|
|
# Assume same as for this run if tabs are used, otherwise assume |
439
|
|
|
|
|
|
|
# a default value, typically 8 |
440
|
|
|
|
|
|
|
$tabsize = |
441
|
|
|
|
|
|
|
$rOpts->{'entab-leading-whitespace'} |
442
|
|
|
|
|
|
|
? $rOpts->{'entab-leading-whitespace'} |
443
|
|
|
|
|
|
|
: $rOpts->{'tabs'} ? $rOpts->{'indent-columns'} |
444
|
560
|
50
|
|
|
|
2769
|
: $rOpts->{'default-tabsize'}; |
|
|
100
|
|
|
|
|
|
445
|
560
|
50
|
|
|
|
2048
|
if ( !$tabsize ) { $tabsize = 8 } |
|
0
|
|
|
|
|
0
|
|
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
$code_skipping_pattern_begin = |
448
|
560
|
|
|
|
|
2699
|
make_code_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' ); |
449
|
560
|
|
|
|
|
2045
|
$code_skipping_pattern_end = |
450
|
|
|
|
|
|
|
make_code_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' ); |
451
|
|
|
|
|
|
|
|
452
|
560
|
|
|
|
|
2001
|
return; |
453
|
|
|
|
|
|
|
} ## end sub check_options |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
sub new { |
456
|
|
|
|
|
|
|
|
457
|
562
|
|
|
562
|
0
|
3192
|
my ( $class, @args ) = @_; |
458
|
|
|
|
|
|
|
|
459
|
562
|
|
|
|
|
5657
|
my %defaults = ( |
460
|
|
|
|
|
|
|
source_object => undef, |
461
|
|
|
|
|
|
|
debugger_object => undef, |
462
|
|
|
|
|
|
|
diagnostics_object => undef, |
463
|
|
|
|
|
|
|
logger_object => undef, |
464
|
|
|
|
|
|
|
starting_level => undef, |
465
|
|
|
|
|
|
|
starting_line_number => 1, |
466
|
|
|
|
|
|
|
rOpts => {}, |
467
|
|
|
|
|
|
|
); |
468
|
562
|
|
|
|
|
4155
|
my %args = ( %defaults, @args ); |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
# we are given an object with a get_line() method to supply source lines |
471
|
562
|
|
|
|
|
2177
|
my $source_object = $args{source_object}; |
472
|
562
|
|
|
|
|
1491
|
my $rOpts = $args{rOpts}; |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# Check call args |
475
|
562
|
50
|
|
|
|
2166
|
if ( !defined($source_object) ) { |
476
|
0
|
|
|
|
|
0
|
Die( |
477
|
|
|
|
|
|
|
"Perl::Tidy::Tokenizer::new called without a 'source_object' parameter\n" |
478
|
|
|
|
|
|
|
); |
479
|
|
|
|
|
|
|
} |
480
|
562
|
50
|
|
|
|
2365
|
if ( !ref($source_object) ) { |
481
|
0
|
|
|
|
|
0
|
Die(<<EOM); |
482
|
|
|
|
|
|
|
sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference; |
483
|
|
|
|
|
|
|
'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method |
484
|
|
|
|
|
|
|
EOM |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
# Tokenizer state data is as follows: |
488
|
|
|
|
|
|
|
# _rhere_target_list_ reference to list of here-doc targets |
489
|
|
|
|
|
|
|
# _here_doc_target_ the target string for a here document |
490
|
|
|
|
|
|
|
# _here_quote_character_ the type of here-doc quoting (" ' ` or none) |
491
|
|
|
|
|
|
|
# to determine if interpolation is done |
492
|
|
|
|
|
|
|
# _quote_target_ character we seek if chasing a quote |
493
|
|
|
|
|
|
|
# _line_start_quote_ line where we started looking for a long quote |
494
|
|
|
|
|
|
|
# _in_here_doc_ flag indicating if we are in a here-doc |
495
|
|
|
|
|
|
|
# _in_pod_ flag set if we are in pod documentation |
496
|
|
|
|
|
|
|
# _in_skipped_ flag set if we are in a skipped section |
497
|
|
|
|
|
|
|
# _in_error_ flag set if we saw severe error (binary in script) |
498
|
|
|
|
|
|
|
# _in_trouble_ set if we saw a troublesome lexical like 'my sub s' |
499
|
|
|
|
|
|
|
# _warning_count_ number of calls to logger sub warning |
500
|
|
|
|
|
|
|
# _html_tag_count_ number of apparent html tags seen (indicates html) |
501
|
|
|
|
|
|
|
# _in_data_ flag set if we are in __DATA__ section |
502
|
|
|
|
|
|
|
# _in_end_ flag set if we are in __END__ section |
503
|
|
|
|
|
|
|
# _in_format_ flag set if we are in a format description |
504
|
|
|
|
|
|
|
# _in_attribute_list_ flag telling if we are looking for attributes |
505
|
|
|
|
|
|
|
# _in_quote_ flag telling if we are chasing a quote |
506
|
|
|
|
|
|
|
# _starting_level_ indentation level of first line |
507
|
|
|
|
|
|
|
# _diagnostics_object_ place to write debugging information |
508
|
|
|
|
|
|
|
# _unexpected_error_count_ error count used to limit output |
509
|
|
|
|
|
|
|
# _lower_case_labels_at_ line numbers where lower case labels seen |
510
|
|
|
|
|
|
|
# _hit_bug_ program bug detected |
511
|
|
|
|
|
|
|
|
512
|
562
|
|
|
|
|
1339
|
my $self = []; |
513
|
562
|
|
|
|
|
1543
|
$self->[_rhere_target_list_] = []; |
514
|
562
|
|
|
|
|
1524
|
$self->[_in_here_doc_] = 0; |
515
|
562
|
|
|
|
|
1865
|
$self->[_here_doc_target_] = EMPTY_STRING; |
516
|
562
|
|
|
|
|
1478
|
$self->[_here_quote_character_] = EMPTY_STRING; |
517
|
562
|
|
|
|
|
1499
|
$self->[_in_data_] = 0; |
518
|
562
|
|
|
|
|
1528
|
$self->[_in_end_] = 0; |
519
|
562
|
|
|
|
|
1566
|
$self->[_in_format_] = 0; |
520
|
562
|
|
|
|
|
1335
|
$self->[_in_error_] = 0; |
521
|
562
|
|
|
|
|
1467
|
$self->[_in_trouble_] = 0; |
522
|
562
|
|
|
|
|
1290
|
$self->[_warning_count_] = 0; |
523
|
562
|
|
|
|
|
1387
|
$self->[_html_tag_count_] = 0; |
524
|
562
|
|
|
|
|
1287
|
$self->[_in_pod_] = 0; |
525
|
562
|
|
|
|
|
1392
|
$self->[_in_skipped_] = 0; |
526
|
562
|
|
|
|
|
1364
|
$self->[_in_attribute_list_] = 0; |
527
|
562
|
|
|
|
|
1319
|
$self->[_in_quote_] = 0; |
528
|
562
|
|
|
|
|
1441
|
$self->[_quote_target_] = EMPTY_STRING; |
529
|
562
|
|
|
|
|
1266
|
$self->[_line_start_quote_] = -1; |
530
|
562
|
|
|
|
|
1330
|
$self->[_starting_level_] = $args{starting_level}; |
531
|
562
|
|
|
|
|
1888
|
$self->[_know_starting_level_] = defined( $args{starting_level} ); |
532
|
562
|
|
|
|
|
1877
|
$self->[_last_line_number_] = $args{starting_line_number} - 1; |
533
|
562
|
|
|
|
|
1405
|
$self->[_saw_perl_dash_P_] = 0; |
534
|
562
|
|
|
|
|
1219
|
$self->[_saw_perl_dash_w_] = 0; |
535
|
562
|
|
|
|
|
1411
|
$self->[_saw_use_strict_] = 0; |
536
|
562
|
|
|
|
|
1267
|
$self->[_saw_v_string_] = 0; |
537
|
562
|
|
|
|
|
1268
|
$self->[_saw_brace_error_] = 0; |
538
|
562
|
|
|
|
|
1212
|
$self->[_hit_bug_] = 0; |
539
|
562
|
|
|
|
|
1315
|
$self->[_look_for_autoloader_] = $rOpts_look_for_autoloader; |
540
|
562
|
|
|
|
|
1500
|
$self->[_look_for_selfloader_] = $rOpts_look_for_selfloader; |
541
|
562
|
|
|
|
|
1318
|
$self->[_saw_autoloader_] = 0; |
542
|
562
|
|
|
|
|
1626
|
$self->[_saw_selfloader_] = 0; |
543
|
562
|
|
|
|
|
1227
|
$self->[_saw_hash_bang_] = 0; |
544
|
562
|
|
|
|
|
1247
|
$self->[_saw_end_] = 0; |
545
|
562
|
|
|
|
|
1186
|
$self->[_saw_data_] = 0; |
546
|
562
|
|
|
|
|
1399
|
$self->[_saw_negative_indentation_] = 0; |
547
|
562
|
|
|
|
|
1273
|
$self->[_started_tokenizing_] = 0; |
548
|
562
|
|
|
|
|
1325
|
$self->[_debugger_object_] = $args{debugger_object}; |
549
|
562
|
|
|
|
|
1393
|
$self->[_diagnostics_object_] = $args{diagnostics_object}; |
550
|
562
|
|
|
|
|
1371
|
$self->[_logger_object_] = $args{logger_object}; |
551
|
562
|
|
|
|
|
1233
|
$self->[_unexpected_error_count_] = 0; |
552
|
562
|
|
|
|
|
1212
|
$self->[_started_looking_for_here_target_at_] = 0; |
553
|
562
|
|
|
|
|
1400
|
$self->[_nearly_matched_here_target_at_] = undef; |
554
|
562
|
|
|
|
|
1396
|
$self->[_line_of_text_] = EMPTY_STRING; |
555
|
562
|
|
|
|
|
1167
|
$self->[_rlower_case_labels_at_] = undef; |
556
|
562
|
|
|
|
|
1292
|
$self->[_maximum_level_] = 0; |
557
|
562
|
|
|
|
|
1304
|
$self->[_true_brace_error_count_] = 0; |
558
|
562
|
|
|
|
|
1283
|
$self->[_rOpts_] = $rOpts; |
559
|
|
|
|
|
|
|
|
560
|
562
|
|
|
|
|
1254
|
bless $self, $class; |
561
|
|
|
|
|
|
|
|
562
|
562
|
|
|
|
|
3422
|
$self->prepare_for_a_new_file($source_object); |
563
|
562
|
|
|
|
|
3268
|
$self->find_starting_indentation_level(); |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
# This is not a full class yet, so die if an attempt is made to |
566
|
|
|
|
|
|
|
# create more than one object. |
567
|
|
|
|
|
|
|
|
568
|
562
|
50
|
|
|
|
2684
|
if ( _increment_count() > 1 ) { |
569
|
0
|
|
|
|
|
0
|
confess |
570
|
|
|
|
|
|
|
"Attempt to create more than 1 object in $class, which is not a true class yet\n"; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
562
|
|
|
|
|
4259
|
return $self; |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
} ## end sub new |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
# Called externally |
578
|
|
|
|
|
|
|
sub get_unexpected_error_count { |
579
|
4
|
|
|
4
|
0
|
14
|
my ($self) = @_; |
580
|
4
|
|
|
|
|
19
|
return $self->[_unexpected_error_count_]; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
# Called externally |
584
|
|
|
|
|
|
|
sub is_keyword { |
585
|
2796
|
|
|
2796
|
0
|
5090
|
my ($str) = @_; |
586
|
2796
|
|
|
|
|
10028
|
return $is_keyword{$str}; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
590
|
|
|
|
|
|
|
# Line input routines, previously handled by the LineBuffer class |
591
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
592
|
|
|
|
|
|
|
sub make_source_array { |
593
|
|
|
|
|
|
|
|
594
|
562
|
|
|
562
|
0
|
1745
|
my ( $self, $line_source_object ) = @_; |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
# Convert the source into an array of lines |
597
|
562
|
|
|
|
|
1464
|
my $rinput_lines = []; |
598
|
|
|
|
|
|
|
|
599
|
562
|
|
|
|
|
1589
|
my $rsource = ref($line_source_object); |
600
|
|
|
|
|
|
|
|
601
|
562
|
50
|
|
|
|
3677
|
if ( !$rsource ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
# shouldn't happen: this should have been checked in sub new |
604
|
0
|
|
|
|
|
0
|
$self->Fault(<<EOM); |
605
|
|
|
|
|
|
|
sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference; |
606
|
|
|
|
|
|
|
'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method |
607
|
|
|
|
|
|
|
EOM |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
# handle an ARRAY ref |
611
|
|
|
|
|
|
|
elsif ( $rsource eq 'ARRAY' ) { |
612
|
0
|
|
|
|
|
0
|
$rinput_lines = $line_source_object; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# handle a SCALAR ref |
616
|
|
|
|
|
|
|
elsif ( $rsource eq 'SCALAR' ) { |
617
|
562
|
|
|
|
|
1074
|
my @lines = split /^/, ${$line_source_object}; |
|
562
|
|
|
|
|
9247
|
|
618
|
562
|
|
|
|
|
2316
|
$rinput_lines = \@lines; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
# handle an object - must have a get_line method |
622
|
|
|
|
|
|
|
else { |
623
|
0
|
|
|
|
|
0
|
while ( my $line = $line_source_object->get_line() ) { |
624
|
0
|
|
|
|
|
0
|
push( @{$rinput_lines}, $line ); |
|
0
|
|
|
|
|
0
|
|
625
|
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
562
|
|
|
|
|
2139
|
$self->[_rinput_lines_] = $rinput_lines; |
629
|
562
|
|
|
|
|
1566
|
$self->[_input_line_index_next_] = 0; |
630
|
562
|
|
|
|
|
1390
|
return; |
631
|
|
|
|
|
|
|
} ## end sub make_source_array |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
sub peek_ahead { |
634
|
1235
|
|
|
1235
|
0
|
3077
|
my ( $self, $buffer_index ) = @_; |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
# look $buffer_index lines ahead of the current location without disturbing |
637
|
|
|
|
|
|
|
# the input |
638
|
1235
|
|
|
|
|
2259
|
my $line; |
639
|
1235
|
|
|
|
|
2626
|
my $rinput_lines = $self->[_rinput_lines_]; |
640
|
1235
|
|
|
|
|
2687
|
my $line_index = $buffer_index + $self->[_input_line_index_next_]; |
641
|
1235
|
100
|
|
|
|
2156
|
if ( $line_index < @{$rinput_lines} ) { |
|
1235
|
|
|
|
|
3502
|
|
642
|
1223
|
|
|
|
|
2701
|
$line = $rinput_lines->[$line_index]; |
643
|
|
|
|
|
|
|
} |
644
|
1235
|
|
|
|
|
4880
|
return $line; |
645
|
|
|
|
|
|
|
} ## end sub peek_ahead |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
#----------------------------------------- |
648
|
|
|
|
|
|
|
# interface to Perl::Tidy::Logger routines |
649
|
|
|
|
|
|
|
#----------------------------------------- |
650
|
|
|
|
|
|
|
sub warning { |
651
|
|
|
|
|
|
|
|
652
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
653
|
|
|
|
|
|
|
|
654
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
655
|
0
|
|
|
|
|
0
|
$self->[_warning_count_]++; |
656
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
657
|
0
|
|
|
|
|
0
|
my $msg_line_number = $self->[_last_line_number_]; |
658
|
0
|
|
|
|
|
0
|
$logger_object->warning( $msg, $msg_line_number ); |
659
|
|
|
|
|
|
|
} |
660
|
0
|
|
|
|
|
0
|
return; |
661
|
|
|
|
|
|
|
} ## end sub warning |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
sub get_input_stream_name { |
664
|
|
|
|
|
|
|
|
665
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
666
|
|
|
|
|
|
|
|
667
|
0
|
|
|
|
|
0
|
my $input_stream_name = EMPTY_STRING; |
668
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
669
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
670
|
0
|
|
|
|
|
0
|
$input_stream_name = $logger_object->get_input_stream_name(); |
671
|
|
|
|
|
|
|
} |
672
|
0
|
|
|
|
|
0
|
return $input_stream_name; |
673
|
|
|
|
|
|
|
} ## end sub get_input_stream_name |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
sub complain { |
676
|
|
|
|
|
|
|
|
677
|
32
|
|
|
32
|
0
|
105
|
my ( $self, $msg ) = @_; |
678
|
|
|
|
|
|
|
|
679
|
32
|
|
|
|
|
84
|
my $logger_object = $self->[_logger_object_]; |
680
|
32
|
50
|
|
|
|
112
|
if ($logger_object) { |
681
|
32
|
|
|
|
|
66
|
my $input_line_number = $self->[_last_line_number_]; |
682
|
32
|
|
|
|
|
179
|
$logger_object->complain( $msg, $input_line_number ); |
683
|
|
|
|
|
|
|
} |
684
|
32
|
|
|
|
|
95
|
return; |
685
|
|
|
|
|
|
|
} ## end sub complain |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
sub write_logfile_entry { |
688
|
|
|
|
|
|
|
|
689
|
1860
|
|
|
1860
|
0
|
5072
|
my ( $self, $msg ) = @_; |
690
|
|
|
|
|
|
|
|
691
|
1860
|
|
|
|
|
3638
|
my $logger_object = $self->[_logger_object_]; |
692
|
1860
|
100
|
|
|
|
4637
|
if ($logger_object) { |
693
|
1854
|
|
|
|
|
5576
|
$logger_object->write_logfile_entry($msg); |
694
|
|
|
|
|
|
|
} |
695
|
1860
|
|
|
|
|
5933
|
return; |
696
|
|
|
|
|
|
|
} ## end sub write_logfile_entry |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
sub interrupt_logfile { |
699
|
|
|
|
|
|
|
|
700
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
701
|
|
|
|
|
|
|
|
702
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
703
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
704
|
0
|
|
|
|
|
0
|
$logger_object->interrupt_logfile(); |
705
|
|
|
|
|
|
|
} |
706
|
0
|
|
|
|
|
0
|
return; |
707
|
|
|
|
|
|
|
} ## end sub interrupt_logfile |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
sub resume_logfile { |
710
|
|
|
|
|
|
|
|
711
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
712
|
|
|
|
|
|
|
|
713
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
714
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
715
|
0
|
|
|
|
|
0
|
$logger_object->resume_logfile(); |
716
|
|
|
|
|
|
|
} |
717
|
0
|
|
|
|
|
0
|
return; |
718
|
|
|
|
|
|
|
} ## end sub resume_logfile |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
sub brace_warning { |
721
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
722
|
0
|
|
|
|
|
0
|
$self->[_saw_brace_error_]++; |
723
|
|
|
|
|
|
|
|
724
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
725
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
726
|
0
|
|
|
|
|
0
|
my $msg_line_number = $self->[_last_line_number_]; |
727
|
0
|
|
|
|
|
0
|
$logger_object->brace_warning( $msg, $msg_line_number ); |
728
|
|
|
|
|
|
|
} |
729
|
0
|
|
|
|
|
0
|
return; |
730
|
|
|
|
|
|
|
} ## end sub brace_warning |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
sub increment_brace_error { |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
# This is same as sub brace_warning but without a message |
735
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
736
|
0
|
|
|
|
|
0
|
$self->[_saw_brace_error_]++; |
737
|
|
|
|
|
|
|
|
738
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
739
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
740
|
0
|
|
|
|
|
0
|
$logger_object->increment_brace_error(); |
741
|
|
|
|
|
|
|
} |
742
|
0
|
|
|
|
|
0
|
return; |
743
|
|
|
|
|
|
|
} ## end sub increment_brace_error |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
sub get_saw_brace_error { |
746
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
747
|
0
|
|
|
|
|
0
|
return $self->[_saw_brace_error_]; |
748
|
|
|
|
|
|
|
} ## end sub get_saw_brace_error |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
sub report_definite_bug { |
751
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
752
|
0
|
|
|
|
|
0
|
$self->[_hit_bug_] = 1; |
753
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
754
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
755
|
0
|
|
|
|
|
0
|
$logger_object->report_definite_bug(); |
756
|
|
|
|
|
|
|
} |
757
|
0
|
|
|
|
|
0
|
return; |
758
|
|
|
|
|
|
|
} ## end sub report_definite_bug |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
#------------------------------------- |
761
|
|
|
|
|
|
|
# Interface to Perl::Tidy::Diagnostics |
762
|
|
|
|
|
|
|
#------------------------------------- |
763
|
|
|
|
|
|
|
sub write_diagnostics { |
764
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
765
|
0
|
|
|
|
|
0
|
my $input_line_number = $self->[_last_line_number_]; |
766
|
0
|
|
|
|
|
0
|
my $diagnostics_object = $self->[_diagnostics_object_]; |
767
|
0
|
0
|
|
|
|
0
|
if ($diagnostics_object) { |
768
|
0
|
|
|
|
|
0
|
$diagnostics_object->write_diagnostics( $msg, $input_line_number ); |
769
|
|
|
|
|
|
|
} |
770
|
0
|
|
|
|
|
0
|
return; |
771
|
|
|
|
|
|
|
} ## end sub write_diagnostics |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
sub report_tokenization_errors { |
774
|
|
|
|
|
|
|
|
775
|
562
|
|
|
562
|
0
|
1910
|
my ($self) = @_; |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
# Report any tokenization errors and return a flag '$severe_error'. |
778
|
|
|
|
|
|
|
# Set $severe_error = 1 if the tokenization errors are so severe that |
779
|
|
|
|
|
|
|
# the formatter should not attempt to format the file. Instead, it will |
780
|
|
|
|
|
|
|
# just output the file verbatim. |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
# set severe error flag if tokenizer has encountered file reading problems |
783
|
|
|
|
|
|
|
# (i.e. unexpected binary characters) |
784
|
|
|
|
|
|
|
# or code which may not be formatted correctly (such as 'my sub q') |
785
|
|
|
|
|
|
|
# The difference between _in_error_ and _in_trouble_ is that |
786
|
|
|
|
|
|
|
# _in_error_ stops the tokenizer immediately whereas |
787
|
|
|
|
|
|
|
# _in_trouble_ lets the tokenizer finish so that all errors are seen |
788
|
|
|
|
|
|
|
# Both block formatting and cause the input stream to be output verbatim. |
789
|
562
|
|
33
|
|
|
3616
|
my $severe_error = $self->[_in_error_] || $self->[_in_trouble_]; |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
# And do not format if it looks like an html file (c209) |
792
|
562
|
|
33
|
|
|
4390
|
$severe_error ||= $self->[_html_tag_count_] && $self->[_warning_count_]; |
|
|
|
33
|
|
|
|
|
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
# Inform the logger object on length of input stream |
795
|
562
|
|
|
|
|
1884
|
my $logger_object = $self->[_logger_object_]; |
796
|
562
|
100
|
|
|
|
2076
|
if ($logger_object) { |
797
|
560
|
|
|
|
|
1403
|
my $last_line_number = $self->[_last_line_number_]; |
798
|
560
|
|
|
|
|
2906
|
$logger_object->set_last_input_line_number($last_line_number); |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
|
801
|
562
|
|
|
|
|
1485
|
my $maxle = $rOpts_maximum_level_errors; |
802
|
562
|
|
|
|
|
1244
|
my $maxue = $rOpts_maximum_unexpected_errors; |
803
|
562
|
50
|
|
|
|
1843
|
$maxle = 1 unless defined($maxle); |
804
|
562
|
50
|
|
|
|
1658
|
$maxue = 0 unless defined($maxue); |
805
|
|
|
|
|
|
|
|
806
|
562
|
|
|
|
|
2432
|
my $level = get_indentation_level(); |
807
|
562
|
50
|
|
|
|
2393
|
if ( $level != $self->[_starting_level_] ) { |
808
|
0
|
|
|
|
|
0
|
$self->warning("final indentation level: $level\n"); |
809
|
0
|
|
|
|
|
0
|
my $level_diff = $self->[_starting_level_] - $level; |
810
|
0
|
0
|
|
|
|
0
|
if ( $level_diff < 0 ) { $level_diff = -$level_diff } |
|
0
|
|
|
|
|
0
|
|
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
# Set severe error flag if the level error is greater than 1. |
813
|
|
|
|
|
|
|
# The formatter can function for any level error but it is probably |
814
|
|
|
|
|
|
|
# best not to attempt formatting for a high level error. |
815
|
0
|
0
|
0
|
|
|
0
|
if ( $maxle >= 0 && $level_diff > $maxle ) { |
816
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
817
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
818
|
|
|
|
|
|
|
Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting |
819
|
|
|
|
|
|
|
EOM |
820
|
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
|
823
|
562
|
|
|
|
|
3213
|
$self->check_final_nesting_depths(); |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
# Likewise, large numbers of brace errors usually indicate non-perl |
826
|
|
|
|
|
|
|
# scripts, so set the severe error flag at a low number. This is similar |
827
|
|
|
|
|
|
|
# to the level check, but different because braces may balance but be |
828
|
|
|
|
|
|
|
# incorrectly interlaced. |
829
|
562
|
50
|
|
|
|
2855
|
if ( $self->[_true_brace_error_count_] > 2 ) { |
830
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
831
|
|
|
|
|
|
|
} |
832
|
|
|
|
|
|
|
|
833
|
562
|
50
|
66
|
|
|
2662
|
if ( $rOpts_look_for_hash_bang |
834
|
|
|
|
|
|
|
&& !$self->[_saw_hash_bang_] ) |
835
|
|
|
|
|
|
|
{ |
836
|
0
|
|
|
|
|
0
|
$self->warning( |
837
|
|
|
|
|
|
|
"hit EOF without seeing hash-bang line; maybe don't need -x?\n"); |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
|
840
|
562
|
50
|
|
|
|
1888
|
if ( $self->[_in_format_] ) { |
841
|
0
|
|
|
|
|
0
|
$self->warning("hit EOF while in format description\n"); |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
|
844
|
562
|
50
|
|
|
|
2012
|
if ( $self->[_in_skipped_] ) { |
845
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
846
|
|
|
|
|
|
|
"hit EOF while in lines skipped with --code-skipping\n"); |
847
|
|
|
|
|
|
|
} |
848
|
|
|
|
|
|
|
|
849
|
562
|
50
|
|
|
|
2051
|
if ( $self->[_in_pod_] ) { |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
# Just write log entry if this is after __END__ or __DATA__ |
852
|
|
|
|
|
|
|
# because this happens to often, and it is not likely to be |
853
|
|
|
|
|
|
|
# a parsing error. |
854
|
0
|
0
|
0
|
|
|
0
|
if ( $self->[_saw_data_] || $self->[_saw_end_] ) { |
855
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
856
|
|
|
|
|
|
|
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" |
857
|
|
|
|
|
|
|
); |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
else { |
861
|
0
|
|
|
|
|
0
|
$self->complain( |
862
|
|
|
|
|
|
|
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" |
863
|
|
|
|
|
|
|
); |
864
|
|
|
|
|
|
|
} |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
} |
867
|
|
|
|
|
|
|
|
868
|
562
|
50
|
|
|
|
2043
|
if ( $self->[_in_here_doc_] ) { |
869
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
870
|
0
|
|
|
|
|
0
|
my $here_doc_target = $self->[_here_doc_target_]; |
871
|
0
|
|
|
|
|
0
|
my $started_looking_for_here_target_at = |
872
|
|
|
|
|
|
|
$self->[_started_looking_for_here_target_at_]; |
873
|
0
|
0
|
|
|
|
0
|
if ($here_doc_target) { |
874
|
0
|
|
|
|
|
0
|
$self->warning( |
875
|
|
|
|
|
|
|
"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" |
876
|
|
|
|
|
|
|
); |
877
|
|
|
|
|
|
|
} |
878
|
|
|
|
|
|
|
else { |
879
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
880
|
|
|
|
|
|
|
Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string. |
881
|
|
|
|
|
|
|
(Perl will match to the end of file but this may not be intended). |
882
|
|
|
|
|
|
|
EOM |
883
|
|
|
|
|
|
|
} |
884
|
0
|
|
|
|
|
0
|
my $nearly_matched_here_target_at = |
885
|
|
|
|
|
|
|
$self->[_nearly_matched_here_target_at_]; |
886
|
0
|
0
|
|
|
|
0
|
if ($nearly_matched_here_target_at) { |
887
|
0
|
|
|
|
|
0
|
$self->warning( |
888
|
|
|
|
|
|
|
"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" |
889
|
|
|
|
|
|
|
); |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
# Something is seriously wrong if we ended inside a quote |
894
|
562
|
50
|
|
|
|
1966
|
if ( $self->[_in_quote_] ) { |
895
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
896
|
0
|
|
|
|
|
0
|
my $line_start_quote = $self->[_line_start_quote_]; |
897
|
0
|
|
|
|
|
0
|
my $quote_target = $self->[_quote_target_]; |
898
|
0
|
0
|
|
|
|
0
|
my $what = |
899
|
|
|
|
|
|
|
( $self->[_in_attribute_list_] ) |
900
|
|
|
|
|
|
|
? "attribute list" |
901
|
|
|
|
|
|
|
: "quote/pattern"; |
902
|
0
|
|
|
|
|
0
|
$self->warning( |
903
|
|
|
|
|
|
|
"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" |
904
|
|
|
|
|
|
|
); |
905
|
|
|
|
|
|
|
} |
906
|
|
|
|
|
|
|
|
907
|
562
|
50
|
|
|
|
1891
|
if ( $self->[_hit_bug_] ) { |
908
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
# Multiple "unexpected" type tokenization errors usually indicate parsing |
912
|
|
|
|
|
|
|
# non-perl scripts, or that something is seriously wrong, so we should |
913
|
|
|
|
|
|
|
# avoid formatting them. This can happen for example if we run perltidy on |
914
|
|
|
|
|
|
|
# a shell script or an html file. But unfortunately this check can |
915
|
|
|
|
|
|
|
# interfere with some extended syntaxes, such as RPerl, so it has to be off |
916
|
|
|
|
|
|
|
# by default. |
917
|
562
|
|
|
|
|
1447
|
my $ue_count = $self->[_unexpected_error_count_]; |
918
|
562
|
50
|
33
|
|
|
2324
|
if ( $maxue > 0 && $ue_count > $maxue ) { |
919
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
920
|
|
|
|
|
|
|
Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting |
921
|
|
|
|
|
|
|
EOM |
922
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
|
925
|
562
|
100
|
|
|
|
1952
|
if ( !$self->[_saw_perl_dash_w_] ) { |
926
|
546
|
50
|
|
|
|
2310
|
if ( $] < 5.006 ) { |
927
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("Suggest including '-w parameter'\n"); |
928
|
|
|
|
|
|
|
} |
929
|
|
|
|
|
|
|
else { |
930
|
546
|
|
|
|
|
2006
|
$self->write_logfile_entry("Suggest including 'use warnings;'\n"); |
931
|
|
|
|
|
|
|
} |
932
|
|
|
|
|
|
|
} |
933
|
|
|
|
|
|
|
|
934
|
562
|
50
|
|
|
|
3460
|
if ( $self->[_saw_perl_dash_P_] ) { |
935
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
936
|
|
|
|
|
|
|
"Use of -P parameter for defines is discouraged\n"); |
937
|
|
|
|
|
|
|
} |
938
|
|
|
|
|
|
|
|
939
|
562
|
100
|
|
|
|
2579
|
if ( !$self->[_saw_use_strict_] ) { |
940
|
548
|
|
|
|
|
1707
|
$self->write_logfile_entry("Suggest including 'use strict;'\n"); |
941
|
|
|
|
|
|
|
} |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
# it is suggested that labels have at least one upper case character |
944
|
|
|
|
|
|
|
# for legibility and to avoid code breakage as new keywords are introduced |
945
|
562
|
100
|
|
|
|
3575
|
if ( $self->[_rlower_case_labels_at_] ) { |
946
|
12
|
|
|
|
|
38
|
my @lower_case_labels_at = @{ $self->[_rlower_case_labels_at_] }; |
|
12
|
|
|
|
|
55
|
|
947
|
12
|
|
|
|
|
47
|
$self->write_logfile_entry( |
948
|
|
|
|
|
|
|
"Suggest using upper case characters in label(s)\n"); |
949
|
12
|
|
|
|
|
72
|
local $LIST_SEPARATOR = ')('; |
950
|
12
|
|
|
|
|
99
|
$self->write_logfile_entry( |
951
|
|
|
|
|
|
|
" defined at line(s): (@lower_case_labels_at)\n"); |
952
|
|
|
|
|
|
|
} |
953
|
562
|
|
|
|
|
2156
|
return $severe_error; |
954
|
|
|
|
|
|
|
} ## end sub report_tokenization_errors |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
sub report_v_string { |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
# warn if this version can't handle v-strings |
959
|
2
|
|
|
2
|
0
|
8
|
my ( $self, $tok ) = @_; |
960
|
2
|
50
|
|
|
|
13
|
if ( !$self->[_saw_v_string_] ) { |
961
|
2
|
|
|
|
|
5
|
$self->[_saw_v_string_] = $self->[_last_line_number_]; |
962
|
|
|
|
|
|
|
} |
963
|
2
|
50
|
|
|
|
10
|
if ( $] < 5.006 ) { |
964
|
0
|
|
|
|
|
0
|
$self->warning( |
965
|
|
|
|
|
|
|
"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" |
966
|
|
|
|
|
|
|
); |
967
|
|
|
|
|
|
|
} |
968
|
2
|
|
|
|
|
6
|
return; |
969
|
|
|
|
|
|
|
} ## end sub report_v_string |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
sub is_valid_token_type { |
972
|
3
|
|
|
3
|
0
|
6
|
my ($type) = @_; |
973
|
3
|
|
|
|
|
15
|
return $is_valid_token_type{$type}; |
974
|
|
|
|
|
|
|
} |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
sub log_numbered_msg { |
977
|
167
|
|
|
167
|
0
|
458
|
my ( $self, $msg ) = @_; |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
# write input line number + message to logfile |
980
|
167
|
|
|
|
|
350
|
my $input_line_number = $self->[_last_line_number_]; |
981
|
167
|
|
|
|
|
793
|
$self->write_logfile_entry("Line $input_line_number: $msg"); |
982
|
167
|
|
|
|
|
383
|
return; |
983
|
|
|
|
|
|
|
} ## end sub log_numbered_msg |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
sub get_line { |
986
|
|
|
|
|
|
|
|
987
|
8230
|
|
|
8230
|
0
|
15243
|
my $self = shift; |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
# Read the next input line and tokenize it |
990
|
|
|
|
|
|
|
# Returns: |
991
|
|
|
|
|
|
|
# $line_of_tokens = ref to hash of info for the tokenized line |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: |
994
|
|
|
|
|
|
|
# $brace_depth, $square_bracket_depth, $paren_depth |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
# get the next line from the input array |
997
|
8230
|
|
|
|
|
11566
|
my $input_line; |
998
|
8230
|
|
|
|
|
13964
|
my $rinput_lines = $self->[_rinput_lines_]; |
999
|
8230
|
|
|
|
|
12579
|
my $line_index = $self->[_input_line_index_next_]; |
1000
|
8230
|
100
|
|
|
|
12222
|
if ( $line_index < @{$rinput_lines} ) { |
|
8230
|
|
|
|
|
18590
|
|
1001
|
7668
|
|
|
|
|
16174
|
$input_line = $rinput_lines->[ $line_index++ ]; |
1002
|
7668
|
|
|
|
|
12299
|
$self->[_input_line_index_next_] = $line_index; |
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
|
1005
|
8230
|
|
|
|
|
14061
|
$self->[_line_of_text_] = $input_line; |
1006
|
|
|
|
|
|
|
|
1007
|
8230
|
100
|
|
|
|
19902
|
return if ( !defined($input_line) ); |
1008
|
|
|
|
|
|
|
|
1009
|
7668
|
|
|
|
|
12308
|
my $input_line_number = ++$self->[_last_line_number_]; |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
# Find and remove what characters terminate this line, including any |
1012
|
|
|
|
|
|
|
# control r |
1013
|
7668
|
|
|
|
|
11981
|
my $input_line_separator = EMPTY_STRING; |
1014
|
7668
|
100
|
|
|
|
19789
|
if ( chomp($input_line) ) { |
1015
|
7667
|
|
|
|
|
16826
|
$input_line_separator = $INPUT_RECORD_SEPARATOR; |
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
# The first test here very significantly speeds things up, but be sure to |
1019
|
|
|
|
|
|
|
# keep the regex and hash %other_line_endings the same. |
1020
|
7668
|
100
|
|
|
|
23205
|
if ( $other_line_endings{ substr( $input_line, -1 ) } ) { |
1021
|
24
|
50
|
|
|
|
238
|
if ( $input_line =~ s/([\r\035\032])+$// ) { |
1022
|
24
|
|
|
|
|
81
|
$input_line_separator = $1 . $input_line_separator; |
1023
|
|
|
|
|
|
|
} |
1024
|
|
|
|
|
|
|
} |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
# for backwards compatibility we keep the line text terminated with |
1027
|
|
|
|
|
|
|
# a newline character |
1028
|
7668
|
|
|
|
|
15772
|
$input_line .= "\n"; |
1029
|
7668
|
|
|
|
|
13311
|
$self->[_line_of_text_] = $input_line; |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
# create a data structure describing this line which will be |
1032
|
|
|
|
|
|
|
# returned to the caller. |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
# _line_type codes are: |
1035
|
|
|
|
|
|
|
# SYSTEM - system-specific code before hash-bang line |
1036
|
|
|
|
|
|
|
# CODE - line of perl code (including comments) |
1037
|
|
|
|
|
|
|
# POD_START - line starting pod, such as '=head' |
1038
|
|
|
|
|
|
|
# POD - pod documentation text |
1039
|
|
|
|
|
|
|
# POD_END - last line of pod section, '=cut' |
1040
|
|
|
|
|
|
|
# HERE - text of here-document |
1041
|
|
|
|
|
|
|
# HERE_END - last line of here-doc (target word) |
1042
|
|
|
|
|
|
|
# FORMAT - format section |
1043
|
|
|
|
|
|
|
# FORMAT_END - last line of format section, '.' |
1044
|
|
|
|
|
|
|
# SKIP - code skipping section |
1045
|
|
|
|
|
|
|
# SKIP_END - last line of code skipping section, '#>>V' |
1046
|
|
|
|
|
|
|
# DATA_START - __DATA__ line |
1047
|
|
|
|
|
|
|
# DATA - unidentified text following __DATA__ |
1048
|
|
|
|
|
|
|
# END_START - __END__ line |
1049
|
|
|
|
|
|
|
# END - unidentified text following __END__ |
1050
|
|
|
|
|
|
|
# ERROR - we are in big trouble, probably not a perl script |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
# Other variables: |
1053
|
|
|
|
|
|
|
# _curly_brace_depth - depth of curly braces at start of line |
1054
|
|
|
|
|
|
|
# _square_bracket_depth - depth of square brackets at start of line |
1055
|
|
|
|
|
|
|
# _paren_depth - depth of parens at start of line |
1056
|
|
|
|
|
|
|
# _starting_in_quote - this line continues a multi-line quote |
1057
|
|
|
|
|
|
|
# (so don't trim leading blanks!) |
1058
|
|
|
|
|
|
|
# _ending_in_quote - this line ends in a multi-line quote |
1059
|
|
|
|
|
|
|
# (so don't trim trailing blanks!) |
1060
|
7668
|
|
|
|
|
42755
|
my $line_of_tokens = { |
1061
|
|
|
|
|
|
|
_line_type => 'EOF', |
1062
|
|
|
|
|
|
|
_line_text => $input_line, |
1063
|
|
|
|
|
|
|
_line_number => $input_line_number, |
1064
|
|
|
|
|
|
|
_guessed_indentation_level => 0, |
1065
|
|
|
|
|
|
|
_curly_brace_depth => $brace_depth, |
1066
|
|
|
|
|
|
|
_square_bracket_depth => $square_bracket_depth, |
1067
|
|
|
|
|
|
|
_paren_depth => $paren_depth, |
1068
|
|
|
|
|
|
|
_quote_character => EMPTY_STRING, |
1069
|
|
|
|
|
|
|
## Skip these needless initializations for efficiency: |
1070
|
|
|
|
|
|
|
## _rtoken_type => undef, |
1071
|
|
|
|
|
|
|
## _rtokens => undef, |
1072
|
|
|
|
|
|
|
## _rlevels => undef, |
1073
|
|
|
|
|
|
|
## _rblock_type => undef, |
1074
|
|
|
|
|
|
|
## _rtype_sequence => undef, |
1075
|
|
|
|
|
|
|
## _rci_levels => undef, |
1076
|
|
|
|
|
|
|
## _starting_in_quote => 0, |
1077
|
|
|
|
|
|
|
## _ending_in_quote => 0, |
1078
|
|
|
|
|
|
|
}; |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
# must print line unchanged if we are in a here document |
1081
|
7668
|
100
|
|
|
|
40835
|
if ( $self->[_in_here_doc_] ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
|
1083
|
24
|
|
|
|
|
91
|
$line_of_tokens->{_line_type} = 'HERE'; |
1084
|
24
|
|
|
|
|
73
|
my $here_doc_target = $self->[_here_doc_target_]; |
1085
|
24
|
|
|
|
|
56
|
my $here_quote_character = $self->[_here_quote_character_]; |
1086
|
24
|
|
|
|
|
49
|
my $candidate_target = $input_line; |
1087
|
24
|
|
|
|
|
49
|
chomp $candidate_target; |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
# Handle <<~ targets, which are indicated here by a leading space on |
1090
|
|
|
|
|
|
|
# the here quote character |
1091
|
24
|
100
|
|
|
|
97
|
if ( $here_quote_character =~ /^\s/ ) { |
1092
|
4
|
|
|
|
|
15
|
$candidate_target =~ s/^\s*//; |
1093
|
|
|
|
|
|
|
} |
1094
|
24
|
100
|
|
|
|
80
|
if ( $candidate_target eq $here_doc_target ) { |
1095
|
9
|
|
|
|
|
49
|
$self->[_nearly_matched_here_target_at_] = undef; |
1096
|
9
|
|
|
|
|
26
|
$line_of_tokens->{_line_type} = 'HERE_END'; |
1097
|
9
|
|
|
|
|
56
|
$self->log_numbered_msg("Exiting HERE document $here_doc_target\n"); |
1098
|
|
|
|
|
|
|
|
1099
|
9
|
|
|
|
|
35
|
my $rhere_target_list = $self->[_rhere_target_list_]; |
1100
|
9
|
50
|
|
|
|
41
|
if ( @{$rhere_target_list} ) { # there can be multiple here targets |
|
9
|
|
|
|
|
40
|
|
1101
|
|
|
|
|
|
|
( $here_doc_target, $here_quote_character ) = |
1102
|
0
|
|
|
|
|
0
|
@{ shift @{$rhere_target_list} }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1103
|
0
|
|
|
|
|
0
|
$self->[_here_doc_target_] = $here_doc_target; |
1104
|
0
|
|
|
|
|
0
|
$self->[_here_quote_character_] = $here_quote_character; |
1105
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
1106
|
|
|
|
|
|
|
"Entering HERE document $here_doc_target\n"); |
1107
|
0
|
|
|
|
|
0
|
$self->[_nearly_matched_here_target_at_] = undef; |
1108
|
0
|
|
|
|
|
0
|
$self->[_started_looking_for_here_target_at_] = |
1109
|
|
|
|
|
|
|
$input_line_number; |
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
else { |
1112
|
9
|
|
|
|
|
33
|
$self->[_in_here_doc_] = 0; |
1113
|
9
|
|
|
|
|
24
|
$self->[_here_doc_target_] = EMPTY_STRING; |
1114
|
9
|
|
|
|
|
28
|
$self->[_here_quote_character_] = EMPTY_STRING; |
1115
|
|
|
|
|
|
|
} |
1116
|
|
|
|
|
|
|
} |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
# check for error of extra whitespace |
1119
|
|
|
|
|
|
|
# note for PERL6: leading whitespace is allowed |
1120
|
|
|
|
|
|
|
else { |
1121
|
15
|
|
|
|
|
146
|
$candidate_target =~ s/\s*$//; |
1122
|
15
|
|
|
|
|
78
|
$candidate_target =~ s/^\s*//; |
1123
|
15
|
50
|
|
|
|
64
|
if ( $candidate_target eq $here_doc_target ) { |
1124
|
0
|
|
|
|
|
0
|
$self->[_nearly_matched_here_target_at_] = $input_line_number; |
1125
|
|
|
|
|
|
|
} |
1126
|
|
|
|
|
|
|
} |
1127
|
24
|
|
|
|
|
99
|
return $line_of_tokens; |
1128
|
|
|
|
|
|
|
} |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# Print line unchanged if we are in a format section |
1131
|
|
|
|
|
|
|
elsif ( $self->[_in_format_] ) { |
1132
|
|
|
|
|
|
|
|
1133
|
3
|
100
|
|
|
|
28
|
if ( $input_line =~ /^\.[\s#]*$/ ) { |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
# Decrement format depth count at a '.' after a 'format' |
1136
|
1
|
|
|
|
|
14
|
$self->[_in_format_]--; |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
# This is the end when count reaches 0 |
1139
|
1
|
50
|
|
|
|
7
|
if ( !$self->[_in_format_] ) { |
1140
|
1
|
|
|
|
|
7
|
$self->log_numbered_msg("Exiting format section\n"); |
1141
|
1
|
|
|
|
|
3
|
$line_of_tokens->{_line_type} = 'FORMAT_END'; |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
# Make the tokenizer mark an opening brace which follows |
1144
|
|
|
|
|
|
|
# as a code block. Fixes issue c202/t032. |
1145
|
1
|
|
|
|
|
4
|
$last_nonblank_token = ';'; |
1146
|
1
|
|
|
|
|
7
|
$last_nonblank_type = ';'; |
1147
|
|
|
|
|
|
|
} |
1148
|
|
|
|
|
|
|
} |
1149
|
|
|
|
|
|
|
else { |
1150
|
2
|
|
|
|
|
6
|
$line_of_tokens->{_line_type} = 'FORMAT'; |
1151
|
2
|
50
|
|
|
|
10
|
if ( $input_line =~ /^\s*format\s+\w+/ ) { |
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
# Increment format depth count at a 'format' within a 'format' |
1154
|
|
|
|
|
|
|
# This is a simple way to handle nested formats (issue c019). |
1155
|
0
|
|
|
|
|
0
|
$self->[_in_format_]++; |
1156
|
|
|
|
|
|
|
} |
1157
|
|
|
|
|
|
|
} |
1158
|
3
|
|
|
|
|
11
|
return $line_of_tokens; |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
# must print line unchanged if we are in pod documentation |
1162
|
|
|
|
|
|
|
elsif ( $self->[_in_pod_] ) { |
1163
|
|
|
|
|
|
|
|
1164
|
47
|
|
|
|
|
114
|
$line_of_tokens->{_line_type} = 'POD'; |
1165
|
47
|
100
|
|
|
|
231
|
if ( $input_line =~ /^=cut/ ) { |
1166
|
20
|
|
|
|
|
74
|
$line_of_tokens->{_line_type} = 'POD_END'; |
1167
|
20
|
|
|
|
|
80
|
$self->log_numbered_msg("Exiting POD section\n"); |
1168
|
20
|
|
|
|
|
54
|
$self->[_in_pod_] = 0; |
1169
|
|
|
|
|
|
|
} |
1170
|
47
|
50
|
33
|
|
|
149
|
if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) { |
1171
|
0
|
|
|
|
|
0
|
$self->warning( |
1172
|
|
|
|
|
|
|
"Hash-bang in pod can cause older versions of perl to fail! \n" |
1173
|
|
|
|
|
|
|
); |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
|
1176
|
47
|
|
|
|
|
181
|
return $line_of_tokens; |
1177
|
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
# print line unchanged if in skipped section |
1180
|
|
|
|
|
|
|
elsif ( $self->[_in_skipped_] ) { |
1181
|
|
|
|
|
|
|
|
1182
|
8
|
|
|
|
|
22
|
$line_of_tokens->{_line_type} = 'SKIP'; |
1183
|
8
|
100
|
|
|
|
93
|
if ( $input_line =~ /$code_skipping_pattern_end/ ) { |
|
|
50
|
|
|
|
|
|
1184
|
2
|
|
|
|
|
11
|
$line_of_tokens->{_line_type} = 'SKIP_END'; |
1185
|
2
|
|
|
|
|
10
|
$self->log_numbered_msg("Exiting code-skipping section\n"); |
1186
|
2
|
|
|
|
|
8
|
$self->[_in_skipped_] = 0; |
1187
|
|
|
|
|
|
|
} |
1188
|
|
|
|
|
|
|
elsif ( $input_line =~ /$code_skipping_pattern_begin/ ) { |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
# warn of duplicate starting comment lines, git #118 |
1191
|
0
|
|
|
|
|
0
|
my $lno = $self->[_in_skipped_]; |
1192
|
0
|
|
|
|
|
0
|
$self->warning( |
1193
|
|
|
|
|
|
|
"Already in code-skipping section which started at line $lno\n" |
1194
|
|
|
|
|
|
|
); |
1195
|
|
|
|
|
|
|
} |
1196
|
|
|
|
|
|
|
else { |
1197
|
|
|
|
|
|
|
## ok - not a code-skipping control line |
1198
|
|
|
|
|
|
|
} |
1199
|
8
|
|
|
|
|
32
|
return $line_of_tokens; |
1200
|
|
|
|
|
|
|
} |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
# must print line unchanged if we have seen a severe error (i.e., we |
1203
|
|
|
|
|
|
|
# are seeing illegal tokens and cannot continue. Syntax errors do |
1204
|
|
|
|
|
|
|
# not pass this route). Calling routine can decide what to do, but |
1205
|
|
|
|
|
|
|
# the default can be to just pass all lines as if they were after __END__ |
1206
|
|
|
|
|
|
|
elsif ( $self->[_in_error_] ) { |
1207
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'ERROR'; |
1208
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1209
|
|
|
|
|
|
|
} |
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
# print line unchanged if we are __DATA__ section |
1212
|
|
|
|
|
|
|
elsif ( $self->[_in_data_] ) { |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
# ...but look for POD |
1215
|
|
|
|
|
|
|
# Note that the _in_data and _in_end flags remain set |
1216
|
|
|
|
|
|
|
# so that we return to that state after seeing the |
1217
|
|
|
|
|
|
|
# end of a pod section |
1218
|
1
|
50
|
33
|
|
|
9
|
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { |
1219
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1220
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg("Entering POD section\n"); |
1221
|
0
|
|
|
|
|
0
|
$self->[_in_pod_] = 1; |
1222
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
else { |
1225
|
1
|
|
|
|
|
3
|
$line_of_tokens->{_line_type} = 'DATA'; |
1226
|
1
|
|
|
|
|
5
|
return $line_of_tokens; |
1227
|
|
|
|
|
|
|
} |
1228
|
|
|
|
|
|
|
} |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
# print line unchanged if we are in __END__ section |
1231
|
|
|
|
|
|
|
elsif ( $self->[_in_end_] ) { |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
# ...but look for POD |
1234
|
|
|
|
|
|
|
# Note that the _in_data and _in_end flags remain set |
1235
|
|
|
|
|
|
|
# so that we return to that state after seeing the |
1236
|
|
|
|
|
|
|
# end of a pod section |
1237
|
48
|
100
|
66
|
|
|
273
|
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { |
1238
|
6
|
|
|
|
|
34
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1239
|
6
|
|
|
|
|
39
|
$self->log_numbered_msg("Entering POD section\n"); |
1240
|
6
|
|
|
|
|
34
|
$self->[_in_pod_] = 1; |
1241
|
6
|
|
|
|
|
27
|
return $line_of_tokens; |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
else { |
1244
|
42
|
|
|
|
|
75
|
$line_of_tokens->{_line_type} = 'END'; |
1245
|
42
|
|
|
|
|
119
|
return $line_of_tokens; |
1246
|
|
|
|
|
|
|
} |
1247
|
|
|
|
|
|
|
} |
1248
|
|
|
|
|
|
|
else { |
1249
|
|
|
|
|
|
|
## ok |
1250
|
|
|
|
|
|
|
} |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
# check for a hash-bang line if we haven't seen one |
1253
|
7537
|
100
|
100
|
|
|
34371
|
if ( !$self->[_saw_hash_bang_] |
|
|
|
66
|
|
|
|
|
1254
|
|
|
|
|
|
|
&& substr( $input_line, 0, 2 ) eq '#!' |
1255
|
|
|
|
|
|
|
&& $input_line =~ /^\#\!.*perl\b/ ) |
1256
|
|
|
|
|
|
|
{ |
1257
|
15
|
|
|
|
|
62
|
$self->[_saw_hash_bang_] = $input_line_number; |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
# check for -w and -P flags |
1260
|
15
|
50
|
|
|
|
100
|
if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { |
1261
|
0
|
|
|
|
|
0
|
$self->[_saw_perl_dash_P_] = 1; |
1262
|
|
|
|
|
|
|
} |
1263
|
|
|
|
|
|
|
|
1264
|
15
|
100
|
|
|
|
95
|
if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { |
1265
|
8
|
|
|
|
|
24
|
$self->[_saw_perl_dash_w_] = 1; |
1266
|
|
|
|
|
|
|
} |
1267
|
|
|
|
|
|
|
|
1268
|
15
|
100
|
33
|
|
|
138
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
1269
|
|
|
|
|
|
|
$input_line_number > 1 |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
# leave any hash bang in a BEGIN block alone |
1272
|
|
|
|
|
|
|
# i.e. see 'debugger-duck_type.t' |
1273
|
|
|
|
|
|
|
&& !( |
1274
|
|
|
|
|
|
|
$last_nonblank_block_type |
1275
|
|
|
|
|
|
|
&& $last_nonblank_block_type eq 'BEGIN' |
1276
|
|
|
|
|
|
|
) |
1277
|
|
|
|
|
|
|
&& !$rOpts_look_for_hash_bang |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
# Try to avoid giving a false alarm at a simple comment. |
1280
|
|
|
|
|
|
|
# These look like valid hash-bang lines: |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
1283
|
|
|
|
|
|
|
#! /usr/bin/perl -w |
1284
|
|
|
|
|
|
|
#!c:\perl\bin\perl.exe |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
# These are comments: |
1287
|
|
|
|
|
|
|
#! I love perl |
1288
|
|
|
|
|
|
|
#! sunos does not yet provide a /usr/bin/perl |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
# Comments typically have multiple spaces, which suggests |
1291
|
|
|
|
|
|
|
# the filter |
1292
|
|
|
|
|
|
|
&& $input_line =~ /^\#\!(\s+)?(\S+)?perl/ |
1293
|
|
|
|
|
|
|
) |
1294
|
|
|
|
|
|
|
{ |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
# this is helpful for VMS systems; we may have accidentally |
1297
|
|
|
|
|
|
|
# tokenized some DCL commands |
1298
|
1
|
50
|
|
|
|
5
|
if ( $self->[_started_tokenizing_] ) { |
1299
|
0
|
|
|
|
|
0
|
$self->warning( |
1300
|
|
|
|
|
|
|
"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" |
1301
|
|
|
|
|
|
|
); |
1302
|
|
|
|
|
|
|
} |
1303
|
|
|
|
|
|
|
else { |
1304
|
1
|
|
|
|
|
5
|
$self->complain("Useless hash-bang after line 1\n"); |
1305
|
|
|
|
|
|
|
} |
1306
|
|
|
|
|
|
|
} |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
# Report the leading hash-bang as a system line |
1309
|
|
|
|
|
|
|
# This will prevent -dac from deleting it |
1310
|
|
|
|
|
|
|
else { |
1311
|
14
|
|
|
|
|
60
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
1312
|
14
|
|
|
|
|
97
|
return $line_of_tokens; |
1313
|
|
|
|
|
|
|
} |
1314
|
|
|
|
|
|
|
} |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
# wait for a hash-bang before parsing if the user invoked us with -x |
1317
|
7523
|
100
|
100
|
|
|
18648
|
if ( $rOpts_look_for_hash_bang |
1318
|
|
|
|
|
|
|
&& !$self->[_saw_hash_bang_] ) |
1319
|
|
|
|
|
|
|
{ |
1320
|
5
|
|
|
|
|
8
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
1321
|
5
|
|
|
|
|
18
|
return $line_of_tokens; |
1322
|
|
|
|
|
|
|
} |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
# a first line of the form ': #' will be marked as SYSTEM |
1325
|
|
|
|
|
|
|
# since lines of this form may be used by tcsh |
1326
|
7518
|
50
|
66
|
|
|
18877
|
if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { |
1327
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
1328
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1329
|
|
|
|
|
|
|
} |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
# now we know that it is ok to tokenize the line... |
1332
|
|
|
|
|
|
|
# the line tokenizer will modify any of these private variables: |
1333
|
|
|
|
|
|
|
# _rhere_target_list_ |
1334
|
|
|
|
|
|
|
# _in_data_ |
1335
|
|
|
|
|
|
|
# _in_end_ |
1336
|
|
|
|
|
|
|
# _in_format_ |
1337
|
|
|
|
|
|
|
# _in_error_ |
1338
|
|
|
|
|
|
|
# _in_skipped_ |
1339
|
|
|
|
|
|
|
# _in_pod_ |
1340
|
|
|
|
|
|
|
# _in_quote_ |
1341
|
7518
|
|
|
|
|
23512
|
$self->tokenize_this_line($line_of_tokens); |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
# Now finish defining the return structure and return it |
1344
|
7518
|
|
|
|
|
15959
|
$line_of_tokens->{_ending_in_quote} = $self->[_in_quote_]; |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
# handle severe error (binary data in script) |
1347
|
7518
|
50
|
|
|
|
17578
|
if ( $self->[_in_error_] ) { |
1348
|
0
|
|
|
|
|
0
|
$self->[_in_quote_] = 0; # to avoid any more messages |
1349
|
0
|
|
|
|
|
0
|
$self->warning("Giving up after error\n"); |
1350
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'ERROR'; |
1351
|
0
|
|
|
|
|
0
|
reset_indentation_level(0); # avoid error messages |
1352
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1353
|
|
|
|
|
|
|
} |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
# handle start of pod documentation |
1356
|
7518
|
100
|
|
|
|
15941
|
if ( $self->[_in_pod_] ) { |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
# This gets tricky..above a __DATA__ or __END__ section, perl |
1359
|
|
|
|
|
|
|
# accepts '=cut' as the start of pod section. But afterwards, |
1360
|
|
|
|
|
|
|
# only pod utilities see it and they may ignore an =cut without |
1361
|
|
|
|
|
|
|
# leading =head. In any case, this isn't good. |
1362
|
14
|
50
|
|
|
|
83
|
if ( $input_line =~ /^=cut\b/ ) { |
1363
|
0
|
0
|
0
|
|
|
0
|
if ( $self->[_saw_data_] || $self->[_saw_end_] ) { |
1364
|
0
|
|
|
|
|
0
|
$self->complain("=cut while not in pod ignored\n"); |
1365
|
0
|
|
|
|
|
0
|
$self->[_in_pod_] = 0; |
1366
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_END'; |
1367
|
|
|
|
|
|
|
} |
1368
|
|
|
|
|
|
|
else { |
1369
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1370
|
0
|
|
|
|
|
0
|
if ( !DEVEL_MODE ) { |
1371
|
0
|
|
|
|
|
0
|
$self->warning( |
1372
|
|
|
|
|
|
|
"=cut starts a pod section .. this can fool pod utilities.\n" |
1373
|
|
|
|
|
|
|
); |
1374
|
|
|
|
|
|
|
} |
1375
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg("Entering POD section\n"); |
1376
|
|
|
|
|
|
|
} |
1377
|
|
|
|
|
|
|
} |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
else { |
1380
|
14
|
|
|
|
|
45
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1381
|
14
|
|
|
|
|
54
|
$self->log_numbered_msg("Entering POD section\n"); |
1382
|
|
|
|
|
|
|
} |
1383
|
|
|
|
|
|
|
|
1384
|
14
|
|
|
|
|
71
|
return $line_of_tokens; |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
# handle start of skipped section |
1388
|
7504
|
100
|
|
|
|
15048
|
if ( $self->[_in_skipped_] ) { |
1389
|
|
|
|
|
|
|
|
1390
|
2
|
|
|
|
|
12
|
$line_of_tokens->{_line_type} = 'SKIP'; |
1391
|
2
|
|
|
|
|
12
|
$self->log_numbered_msg("Entering code-skipping section\n"); |
1392
|
2
|
|
|
|
|
9
|
return $line_of_tokens; |
1393
|
|
|
|
|
|
|
} |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
# see if this line contains here doc targets |
1396
|
7502
|
|
|
|
|
11986
|
my $rhere_target_list = $self->[_rhere_target_list_]; |
1397
|
7502
|
100
|
|
|
|
10457
|
if ( @{$rhere_target_list} ) { |
|
7502
|
|
|
|
|
16825
|
|
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
my ( $here_doc_target, $here_quote_character ) = |
1400
|
9
|
|
|
|
|
61
|
@{ shift @{$rhere_target_list} }; |
|
9
|
|
|
|
|
33
|
|
|
9
|
|
|
|
|
51
|
|
1401
|
9
|
|
|
|
|
37
|
$self->[_in_here_doc_] = 1; |
1402
|
9
|
|
|
|
|
27
|
$self->[_here_doc_target_] = $here_doc_target; |
1403
|
9
|
|
|
|
|
28
|
$self->[_here_quote_character_] = $here_quote_character; |
1404
|
9
|
|
|
|
|
74
|
$self->log_numbered_msg("Entering HERE document $here_doc_target\n"); |
1405
|
9
|
|
|
|
|
38
|
$self->[_started_looking_for_here_target_at_] = $input_line_number; |
1406
|
|
|
|
|
|
|
} |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
# NOTE: __END__ and __DATA__ statements are written unformatted |
1409
|
|
|
|
|
|
|
# because they can theoretically contain additional characters |
1410
|
|
|
|
|
|
|
# which are not tokenized (and cannot be read with <DATA> either!). |
1411
|
7502
|
100
|
|
|
|
20544
|
if ( $self->[_in_data_] ) { |
|
|
100
|
|
|
|
|
|
1412
|
1
|
|
|
|
|
4
|
$line_of_tokens->{_line_type} = 'DATA_START'; |
1413
|
1
|
|
|
|
|
6
|
$self->log_numbered_msg("Starting __DATA__ section\n"); |
1414
|
1
|
|
|
|
|
3
|
$self->[_saw_data_] = 1; |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
# keep parsing after __DATA__ if use SelfLoader was seen |
1417
|
1
|
50
|
|
|
|
3
|
if ( $self->[_saw_selfloader_] ) { |
1418
|
0
|
|
|
|
|
0
|
$self->[_in_data_] = 0; |
1419
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
1420
|
|
|
|
|
|
|
"SelfLoader seen, continuing; -nlsl deactivates\n"); |
1421
|
|
|
|
|
|
|
} |
1422
|
|
|
|
|
|
|
|
1423
|
1
|
|
|
|
|
6
|
return $line_of_tokens; |
1424
|
|
|
|
|
|
|
} |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
elsif ( $self->[_in_end_] ) { |
1427
|
6
|
|
|
|
|
31
|
$line_of_tokens->{_line_type} = 'END_START'; |
1428
|
6
|
|
|
|
|
33
|
$self->log_numbered_msg("Starting __END__ section\n"); |
1429
|
6
|
|
|
|
|
16
|
$self->[_saw_end_] = 1; |
1430
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
# keep parsing after __END__ if use AutoLoader was seen |
1432
|
6
|
50
|
|
|
|
30
|
if ( $self->[_saw_autoloader_] ) { |
1433
|
0
|
|
|
|
|
0
|
$self->[_in_end_] = 0; |
1434
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
1435
|
|
|
|
|
|
|
"AutoLoader seen, continuing; -nlal deactivates\n"); |
1436
|
|
|
|
|
|
|
} |
1437
|
6
|
|
|
|
|
33
|
return $line_of_tokens; |
1438
|
|
|
|
|
|
|
} |
1439
|
|
|
|
|
|
|
else { |
1440
|
|
|
|
|
|
|
## ok: not in __END__ or __DATA__ |
1441
|
|
|
|
|
|
|
} |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
# now, finally, we know that this line is type 'CODE' |
1444
|
7495
|
|
|
|
|
14161
|
$line_of_tokens->{_line_type} = 'CODE'; |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
# remember if we have seen any real code |
1447
|
7495
|
100
|
100
|
|
|
24100
|
if ( !$self->[_started_tokenizing_] |
|
|
|
100
|
|
|
|
|
1448
|
|
|
|
|
|
|
&& $input_line !~ /^\s*$/ |
1449
|
|
|
|
|
|
|
&& $input_line !~ /^\s*#/ ) |
1450
|
|
|
|
|
|
|
{ |
1451
|
558
|
|
|
|
|
2411
|
$self->[_started_tokenizing_] = 1; |
1452
|
|
|
|
|
|
|
} |
1453
|
|
|
|
|
|
|
|
1454
|
7495
|
100
|
|
|
|
15908
|
if ( $self->[_debugger_object_] ) { |
1455
|
7
|
|
|
|
|
38
|
$self->[_debugger_object_]->write_debug_entry($line_of_tokens); |
1456
|
|
|
|
|
|
|
} |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
# Note: if keyword 'format' occurs in this line code, it is still CODE |
1459
|
|
|
|
|
|
|
# (keyword 'format' need not start a line) |
1460
|
7495
|
100
|
|
|
|
15580
|
if ( $self->[_in_format_] ) { |
1461
|
1
|
|
|
|
|
22
|
$self->log_numbered_msg("Entering format section\n"); |
1462
|
|
|
|
|
|
|
} |
1463
|
|
|
|
|
|
|
|
1464
|
7495
|
100
|
100
|
|
|
28246
|
if ( $self->[_in_quote_] |
|
|
100
|
100
|
|
|
|
|
1465
|
|
|
|
|
|
|
and ( $self->[_line_start_quote_] < 0 ) ) |
1466
|
|
|
|
|
|
|
{ |
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { |
1469
|
49
|
100
|
|
|
|
391
|
if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) { |
1470
|
48
|
|
|
|
|
130
|
$self->[_line_start_quote_] = $input_line_number; |
1471
|
48
|
|
|
|
|
341
|
$self->log_numbered_msg( |
1472
|
|
|
|
|
|
|
"Start multi-line quote or pattern ending in $quote_target\n"); |
1473
|
|
|
|
|
|
|
} |
1474
|
|
|
|
|
|
|
} |
1475
|
|
|
|
|
|
|
elsif ( ( $self->[_line_start_quote_] >= 0 ) |
1476
|
|
|
|
|
|
|
&& !$self->[_in_quote_] ) |
1477
|
|
|
|
|
|
|
{ |
1478
|
48
|
|
|
|
|
166
|
$self->[_line_start_quote_] = -1; |
1479
|
48
|
|
|
|
|
196
|
$self->log_numbered_msg("End of multi-line quote or pattern\n"); |
1480
|
|
|
|
|
|
|
} |
1481
|
|
|
|
|
|
|
else { |
1482
|
|
|
|
|
|
|
## ok |
1483
|
|
|
|
|
|
|
} |
1484
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
# we are returning a line of CODE |
1486
|
7495
|
|
|
|
|
29868
|
return $line_of_tokens; |
1487
|
|
|
|
|
|
|
} ## end sub get_line |
1488
|
|
|
|
|
|
|
|
1489
|
|
|
|
|
|
|
sub find_starting_indentation_level { |
1490
|
|
|
|
|
|
|
|
1491
|
|
|
|
|
|
|
# We need to find the indentation level of the first line of the |
1492
|
|
|
|
|
|
|
# script being formatted. Often it will be zero for an entire file, |
1493
|
|
|
|
|
|
|
# but if we are formatting a local block of code (within an editor for |
1494
|
|
|
|
|
|
|
# example) it may not be zero. The user may specify this with the |
1495
|
|
|
|
|
|
|
# -sil=n parameter but normally doesn't so we have to guess. |
1496
|
|
|
|
|
|
|
# |
1497
|
562
|
|
|
562
|
0
|
1814
|
my ($self) = @_; |
1498
|
562
|
|
|
|
|
1433
|
my $starting_level = 0; |
1499
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
# use value if given as parameter |
1501
|
562
|
100
|
|
|
|
2813
|
if ( $self->[_know_starting_level_] ) { |
|
|
100
|
|
|
|
|
|
1502
|
1
|
|
|
|
|
4
|
$starting_level = $self->[_starting_level_]; |
1503
|
|
|
|
|
|
|
} |
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
# if we know there is a hash_bang line, the level must be zero |
1506
|
|
|
|
|
|
|
elsif ($rOpts_look_for_hash_bang) { |
1507
|
1
|
|
|
|
|
3
|
$self->[_know_starting_level_] = 1; |
1508
|
|
|
|
|
|
|
} |
1509
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
# otherwise figure it out from the input file |
1511
|
|
|
|
|
|
|
else { |
1512
|
560
|
|
|
|
|
1530
|
my $line; |
1513
|
560
|
|
|
|
|
1119
|
my $i = 0; |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
# keep looking at lines until we find a hash bang or piece of code |
1516
|
|
|
|
|
|
|
# ( or, for now, an =pod line) |
1517
|
560
|
|
|
|
|
1311
|
my $msg = EMPTY_STRING; |
1518
|
560
|
|
|
|
|
1178
|
my $in_code_skipping; |
1519
|
560
|
|
|
|
|
2899
|
while ( $line = $self->peek_ahead( $i++ ) ) { |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
# if first line is #! then assume starting level is zero |
1522
|
871
|
100
|
100
|
|
|
5640
|
if ( $i == 1 && $line =~ /^\#\!/ ) { |
1523
|
13
|
|
|
|
|
40
|
$starting_level = 0; |
1524
|
13
|
|
|
|
|
43
|
last; |
1525
|
|
|
|
|
|
|
} |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
# ignore lines fenced off with code-skipping comments |
1528
|
858
|
100
|
|
|
|
4602
|
if ( $line =~ /^\s*#/ ) { |
1529
|
296
|
50
|
|
|
|
1129
|
if ( !$in_code_skipping ) { |
1530
|
296
|
50
|
33
|
|
|
2988
|
if ( $rOpts_code_skipping |
1531
|
|
|
|
|
|
|
&& $line =~ /$code_skipping_pattern_begin/ ) |
1532
|
|
|
|
|
|
|
{ |
1533
|
0
|
|
|
|
|
0
|
$in_code_skipping = 1; |
1534
|
|
|
|
|
|
|
} |
1535
|
|
|
|
|
|
|
} |
1536
|
|
|
|
|
|
|
else { |
1537
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /$code_skipping_pattern_end/ ) { |
1538
|
0
|
|
|
|
|
0
|
$in_code_skipping = 0; |
1539
|
|
|
|
|
|
|
} |
1540
|
|
|
|
|
|
|
} |
1541
|
296
|
|
|
|
|
1030
|
next; |
1542
|
|
|
|
|
|
|
} |
1543
|
562
|
50
|
|
|
|
2356
|
next if ($in_code_skipping); |
1544
|
|
|
|
|
|
|
|
1545
|
562
|
100
|
|
|
|
3712
|
next if ( $line =~ /^\s*$/ ); # skip past blank lines |
1546
|
|
|
|
|
|
|
|
1547
|
544
|
|
|
|
|
2778
|
$starting_level = $self->guess_old_indentation_level($line); |
1548
|
544
|
|
|
|
|
1374
|
last; |
1549
|
|
|
|
|
|
|
} |
1550
|
560
|
|
|
|
|
2662
|
$msg = "Line $i implies starting-indentation-level = $starting_level\n"; |
1551
|
560
|
|
|
|
|
3021
|
$self->write_logfile_entry("$msg"); |
1552
|
|
|
|
|
|
|
} |
1553
|
562
|
|
|
|
|
2315
|
$self->[_starting_level_] = $starting_level; |
1554
|
562
|
|
|
|
|
3659
|
reset_indentation_level($starting_level); |
1555
|
562
|
|
|
|
|
1103
|
return; |
1556
|
|
|
|
|
|
|
} ## end sub find_starting_indentation_level |
1557
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
sub guess_old_indentation_level { |
1559
|
544
|
|
|
544
|
0
|
1803
|
my ( $self, $line ) = @_; |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
# Guess the indentation level of an input line. |
1562
|
|
|
|
|
|
|
# |
1563
|
|
|
|
|
|
|
# For the first line of code this result will define the starting |
1564
|
|
|
|
|
|
|
# indentation level. It will mainly be non-zero when perltidy is applied |
1565
|
|
|
|
|
|
|
# within an editor to a local block of code. |
1566
|
|
|
|
|
|
|
# |
1567
|
|
|
|
|
|
|
# This is an impossible task in general because we can't know what tabs |
1568
|
|
|
|
|
|
|
# meant for the old script and how many spaces were used for one |
1569
|
|
|
|
|
|
|
# indentation level in the given input script. For example it may have |
1570
|
|
|
|
|
|
|
# been previously formatted with -i=7 -et=3. But we can at least try to |
1571
|
|
|
|
|
|
|
# make sure that perltidy guesses correctly if it is applied repeatedly to |
1572
|
|
|
|
|
|
|
# a block of code within an editor, so that the block stays at the same |
1573
|
|
|
|
|
|
|
# level when perltidy is applied repeatedly. |
1574
|
|
|
|
|
|
|
# |
1575
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
1576
|
544
|
|
|
|
|
1190
|
my $level = 0; |
1577
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
# find leading tabs, spaces, and any statement label |
1579
|
544
|
|
|
|
|
1150
|
my $spaces = 0; |
1580
|
544
|
50
|
|
|
|
4607
|
if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
# If there are leading tabs, we use the tab scheme for this run, if |
1583
|
|
|
|
|
|
|
# any, so that the code will remain stable when editing. |
1584
|
544
|
100
|
|
|
|
2655
|
if ($1) { $spaces += length($1) * $tabsize } |
|
2
|
|
|
|
|
10
|
|
1585
|
|
|
|
|
|
|
|
1586
|
544
|
100
|
|
|
|
2379
|
if ($2) { $spaces += length($2) } |
|
79
|
|
|
|
|
301
|
|
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
# correct for outdented labels |
1589
|
544
|
100
|
66
|
|
|
2744
|
if ( $3 && $rOpts_outdent_labels ) { |
1590
|
1
|
|
|
|
|
6
|
$spaces += $rOpts_continuation_indentation; |
1591
|
|
|
|
|
|
|
} |
1592
|
|
|
|
|
|
|
} |
1593
|
|
|
|
|
|
|
|
1594
|
544
|
|
|
|
|
2118
|
$level = int( $spaces / $rOpts_indent_columns ); |
1595
|
544
|
|
|
|
|
1444
|
return ($level); |
1596
|
|
|
|
|
|
|
} ## end sub guess_old_indentation_level |
1597
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
# This is a currently unused debug routine |
1599
|
|
|
|
|
|
|
sub dump_functions { |
1600
|
|
|
|
|
|
|
|
1601
|
0
|
|
|
0
|
0
|
0
|
my $fh = *STDOUT; |
1602
|
0
|
|
|
|
|
0
|
foreach my $pkg ( keys %{$ris_user_function} ) { |
|
0
|
|
|
|
|
0
|
|
1603
|
0
|
|
|
|
|
0
|
$fh->print("\nnon-constant subs in package $pkg\n"); |
1604
|
|
|
|
|
|
|
|
1605
|
0
|
|
|
|
|
0
|
foreach my $sub ( keys %{ $ris_user_function->{$pkg} } ) { |
|
0
|
|
|
|
|
0
|
|
1606
|
0
|
|
|
|
|
0
|
my $msg = EMPTY_STRING; |
1607
|
0
|
0
|
|
|
|
0
|
if ( $ris_block_list_function->{$pkg}{$sub} ) { |
1608
|
0
|
|
|
|
|
0
|
$msg = 'block_list'; |
1609
|
|
|
|
|
|
|
} |
1610
|
|
|
|
|
|
|
|
1611
|
0
|
0
|
|
|
|
0
|
if ( $ris_block_function->{$pkg}{$sub} ) { |
1612
|
0
|
|
|
|
|
0
|
$msg = 'block'; |
1613
|
|
|
|
|
|
|
} |
1614
|
0
|
|
|
|
|
0
|
$fh->print("$sub $msg\n"); |
1615
|
|
|
|
|
|
|
} |
1616
|
|
|
|
|
|
|
} |
1617
|
|
|
|
|
|
|
|
1618
|
0
|
|
|
|
|
0
|
foreach my $pkg ( keys %{$ris_constant} ) { |
|
0
|
|
|
|
|
0
|
|
1619
|
0
|
|
|
|
|
0
|
$fh->print("\nconstants and constant subs in package $pkg\n"); |
1620
|
|
|
|
|
|
|
|
1621
|
0
|
|
|
|
|
0
|
foreach my $sub ( keys %{ $ris_constant->{$pkg} } ) { |
|
0
|
|
|
|
|
0
|
|
1622
|
0
|
|
|
|
|
0
|
$fh->print("$sub\n"); |
1623
|
|
|
|
|
|
|
} |
1624
|
|
|
|
|
|
|
} |
1625
|
0
|
|
|
|
|
0
|
return; |
1626
|
|
|
|
|
|
|
} ## end sub dump_functions |
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
sub prepare_for_a_new_file { |
1629
|
|
|
|
|
|
|
|
1630
|
562
|
|
|
562
|
0
|
1903
|
my ( $self, $source_object ) = @_; |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
# copy the source object lines to an array of lines |
1633
|
562
|
|
|
|
|
3019
|
$self->make_source_array($source_object); |
1634
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
# previous tokens needed to determine what to expect next |
1636
|
562
|
|
|
|
|
1767
|
$last_nonblank_token = ';'; # the only possible starting state which |
1637
|
562
|
|
|
|
|
1573
|
$last_nonblank_type = ';'; # will make a leading brace a code block |
1638
|
562
|
|
|
|
|
1398
|
$last_nonblank_block_type = EMPTY_STRING; |
1639
|
|
|
|
|
|
|
|
1640
|
|
|
|
|
|
|
# scalars for remembering statement types across multiple lines |
1641
|
562
|
|
|
|
|
1371
|
$statement_type = EMPTY_STRING; # '' or 'use' or 'sub..' or 'case..' |
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
# scalars for remembering where we are in the file |
1644
|
562
|
|
|
|
|
1401
|
$current_package = "main"; |
1645
|
562
|
|
|
|
|
1140
|
$context = UNKNOWN_CONTEXT; |
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
# hashes used to remember function information |
1648
|
562
|
|
|
|
|
2028
|
$ris_constant = {}; # user-defined constants |
1649
|
562
|
|
|
|
|
1983
|
$ris_user_function = {}; # user-defined functions |
1650
|
562
|
|
|
|
|
1632
|
$ruser_function_prototype = {}; # their prototypes |
1651
|
562
|
|
|
|
|
1651
|
$ris_block_function = {}; |
1652
|
562
|
|
|
|
|
1645
|
$ris_block_list_function = {}; |
1653
|
562
|
|
|
|
|
1465
|
$rsaw_function_definition = {}; |
1654
|
562
|
|
|
|
|
1341
|
$rsaw_use_module = {}; |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
# variables used to track depths of various containers |
1657
|
|
|
|
|
|
|
# and report nesting errors |
1658
|
562
|
|
|
|
|
1176
|
$paren_depth = 0; |
1659
|
562
|
|
|
|
|
1064
|
$brace_depth = 0; |
1660
|
562
|
|
|
|
|
1086
|
$square_bracket_depth = 0; |
1661
|
562
|
|
|
|
|
2535
|
$rcurrent_depth = [ (0) x scalar @closing_brace_names ]; |
1662
|
562
|
|
|
|
|
1277
|
$total_depth = 0; |
1663
|
562
|
|
|
|
|
2224
|
$rtotal_depth = []; |
1664
|
562
|
|
|
|
|
2071
|
$rcurrent_sequence_number = []; |
1665
|
562
|
|
|
|
|
1252
|
$next_sequence_number = 2; # The value 1 is reserved for SEQ_ROOT |
1666
|
|
|
|
|
|
|
|
1667
|
562
|
|
|
|
|
1851
|
$rparen_type = []; |
1668
|
562
|
|
|
|
|
1637
|
$rparen_semicolon_count = []; |
1669
|
562
|
|
|
|
|
2421
|
$rparen_vars = []; |
1670
|
562
|
|
|
|
|
1893
|
$rbrace_type = []; |
1671
|
562
|
|
|
|
|
1726
|
$rbrace_structural_type = []; |
1672
|
562
|
|
|
|
|
1489
|
$rbrace_context = []; |
1673
|
562
|
|
|
|
|
1633
|
$rbrace_package = []; |
1674
|
562
|
|
|
|
|
1598
|
$rsquare_bracket_type = []; |
1675
|
562
|
|
|
|
|
1529
|
$rsquare_bracket_structural_type = []; |
1676
|
562
|
|
|
|
|
3534
|
$rdepth_array = []; |
1677
|
562
|
|
|
|
|
1306
|
$rnested_ternary_flag = []; |
1678
|
562
|
|
|
|
|
3776
|
$rnested_statement_type = []; |
1679
|
562
|
|
|
|
|
3253
|
$rstarting_line_of_current_depth = []; |
1680
|
|
|
|
|
|
|
|
1681
|
562
|
|
|
|
|
1820
|
$rparen_type->[$paren_depth] = EMPTY_STRING; |
1682
|
562
|
|
|
|
|
1651
|
$rparen_semicolon_count->[$paren_depth] = 0; |
1683
|
562
|
|
|
|
|
1561
|
$rparen_vars->[$paren_depth] = []; |
1684
|
562
|
|
|
|
|
1657
|
$rbrace_type->[$brace_depth] = ';'; # identify opening brace as code block |
1685
|
562
|
|
|
|
|
1542
|
$rbrace_structural_type->[$brace_depth] = EMPTY_STRING; |
1686
|
562
|
|
|
|
|
1433
|
$rbrace_context->[$brace_depth] = UNKNOWN_CONTEXT; |
1687
|
562
|
|
|
|
|
1331
|
$rbrace_package->[$paren_depth] = $current_package; |
1688
|
562
|
|
|
|
|
1270
|
$rsquare_bracket_type->[$square_bracket_depth] = EMPTY_STRING; |
1689
|
562
|
|
|
|
|
1475
|
$rsquare_bracket_structural_type->[$square_bracket_depth] = EMPTY_STRING; |
1690
|
|
|
|
|
|
|
|
1691
|
562
|
|
|
|
|
2804
|
initialize_tokenizer_state(); |
1692
|
562
|
|
|
|
|
1085
|
return; |
1693
|
|
|
|
|
|
|
} ## end sub prepare_for_a_new_file |
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
{ ## closure for sub tokenize_this_line |
1696
|
|
|
|
|
|
|
|
1697
|
39
|
|
|
39
|
|
396
|
use constant BRACE => 0; |
|
39
|
|
|
|
|
130
|
|
|
39
|
|
|
|
|
3022
|
|
1698
|
39
|
|
|
39
|
|
323
|
use constant SQUARE_BRACKET => 1; |
|
39
|
|
|
|
|
130
|
|
|
39
|
|
|
|
|
2317
|
|
1699
|
39
|
|
|
39
|
|
326
|
use constant PAREN => 2; |
|
39
|
|
|
|
|
138
|
|
|
39
|
|
|
|
|
3787
|
|
1700
|
39
|
|
|
39
|
|
308
|
use constant QUESTION_COLON => 3; |
|
39
|
|
|
|
|
118
|
|
|
39
|
|
|
|
|
84547
|
|
1701
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
# TV1: scalars for processing one LINE. |
1703
|
|
|
|
|
|
|
# Re-initialized on each entry to sub tokenize_this_line. |
1704
|
|
|
|
|
|
|
my ( |
1705
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
1706
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
1707
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
1708
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
1709
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
1710
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
1711
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
1712
|
|
|
|
|
|
|
); |
1713
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
# TV2: refs to ARRAYS for processing one LINE |
1715
|
|
|
|
|
|
|
# Re-initialized on each call. |
1716
|
|
|
|
|
|
|
my $routput_token_list = []; # stack of output token indexes |
1717
|
|
|
|
|
|
|
my $routput_token_type = []; # token types |
1718
|
|
|
|
|
|
|
my $routput_block_type = []; # types of code block |
1719
|
|
|
|
|
|
|
my $routput_container_type = []; # paren types, such as if, elsif, .. |
1720
|
|
|
|
|
|
|
my $routput_type_sequence = []; # nesting sequential number |
1721
|
|
|
|
|
|
|
my $routput_indent_flag = []; # |
1722
|
|
|
|
|
|
|
|
1723
|
|
|
|
|
|
|
# TV3: SCALARS for quote variables. These are initialized with a |
1724
|
|
|
|
|
|
|
# subroutine call and continually updated as lines are processed. |
1725
|
|
|
|
|
|
|
my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, |
1726
|
|
|
|
|
|
|
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); |
1727
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
# TV4: SCALARS for multi-line identifiers and |
1729
|
|
|
|
|
|
|
# statements. These are initialized with a subroutine call |
1730
|
|
|
|
|
|
|
# and continually updated as lines are processed. |
1731
|
|
|
|
|
|
|
my ( $id_scan_state, $identifier, $want_paren ); |
1732
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
# TV5: SCALARS for tracking indentation level. |
1734
|
|
|
|
|
|
|
# Initialized once and continually updated as lines are |
1735
|
|
|
|
|
|
|
# processed. |
1736
|
|
|
|
|
|
|
my ( |
1737
|
|
|
|
|
|
|
$nesting_token_string, $nesting_block_string, |
1738
|
|
|
|
|
|
|
$nesting_block_flag, $level_in_tokenizer, |
1739
|
|
|
|
|
|
|
); |
1740
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
# TV6: SCALARS for remembering several previous |
1742
|
|
|
|
|
|
|
# tokens. Initialized once and continually updated as |
1743
|
|
|
|
|
|
|
# lines are processed. |
1744
|
|
|
|
|
|
|
my ( |
1745
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
1746
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
1747
|
|
|
|
|
|
|
$last_nonblank_prototype, |
1748
|
|
|
|
|
|
|
); |
1749
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
# ---------------------------------------------------------------- |
1751
|
|
|
|
|
|
|
# beginning of tokenizer variable access and manipulation routines |
1752
|
|
|
|
|
|
|
# ---------------------------------------------------------------- |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
sub initialize_tokenizer_state { |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
# GV1: initialized once |
1757
|
|
|
|
|
|
|
# TV1: initialized on each call |
1758
|
|
|
|
|
|
|
# TV2: initialized on each call |
1759
|
|
|
|
|
|
|
# TV3: |
1760
|
562
|
|
|
562
|
0
|
1230
|
$in_quote = 0; |
1761
|
562
|
|
|
|
|
1388
|
$quote_type = 'Q'; |
1762
|
562
|
|
|
|
|
1277
|
$quote_character = EMPTY_STRING; |
1763
|
562
|
|
|
|
|
1107
|
$quote_pos = 0; |
1764
|
562
|
|
|
|
|
1135
|
$quote_depth = 0; |
1765
|
562
|
|
|
|
|
1100
|
$quoted_string_1 = EMPTY_STRING; |
1766
|
562
|
|
|
|
|
1251
|
$quoted_string_2 = EMPTY_STRING; |
1767
|
562
|
|
|
|
|
1109
|
$allowed_quote_modifiers = EMPTY_STRING; |
1768
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
# TV4: |
1770
|
562
|
|
|
|
|
1101
|
$id_scan_state = EMPTY_STRING; |
1771
|
562
|
|
|
|
|
1205
|
$identifier = EMPTY_STRING; |
1772
|
562
|
|
|
|
|
1201
|
$want_paren = EMPTY_STRING; |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
# TV5: |
1775
|
562
|
|
|
|
|
1300
|
$nesting_token_string = EMPTY_STRING; |
1776
|
562
|
|
|
|
|
1249
|
$nesting_block_string = '1'; # initially in a block |
1777
|
562
|
|
|
|
|
1056
|
$nesting_block_flag = 1; |
1778
|
562
|
|
|
|
|
1089
|
$level_in_tokenizer = 0; |
1779
|
|
|
|
|
|
|
|
1780
|
|
|
|
|
|
|
# TV6: |
1781
|
562
|
|
|
|
|
1170
|
$last_nonblank_container_type = EMPTY_STRING; |
1782
|
562
|
|
|
|
|
1188
|
$last_nonblank_type_sequence = EMPTY_STRING; |
1783
|
562
|
|
|
|
|
1178
|
$last_last_nonblank_token = ';'; |
1784
|
562
|
|
|
|
|
1192
|
$last_last_nonblank_type = ';'; |
1785
|
562
|
|
|
|
|
1178
|
$last_nonblank_prototype = EMPTY_STRING; |
1786
|
562
|
|
|
|
|
1075
|
return; |
1787
|
|
|
|
|
|
|
} ## end sub initialize_tokenizer_state |
1788
|
|
|
|
|
|
|
|
1789
|
|
|
|
|
|
|
sub save_tokenizer_state { |
1790
|
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
# Global variables: |
1792
|
0
|
|
|
0
|
0
|
0
|
my $rGV1 = [ |
1793
|
|
|
|
|
|
|
$brace_depth, |
1794
|
|
|
|
|
|
|
$context, |
1795
|
|
|
|
|
|
|
$current_package, |
1796
|
|
|
|
|
|
|
$last_nonblank_block_type, |
1797
|
|
|
|
|
|
|
$last_nonblank_token, |
1798
|
|
|
|
|
|
|
$last_nonblank_type, |
1799
|
|
|
|
|
|
|
$next_sequence_number, |
1800
|
|
|
|
|
|
|
$paren_depth, |
1801
|
|
|
|
|
|
|
$rbrace_context, |
1802
|
|
|
|
|
|
|
$rbrace_package, |
1803
|
|
|
|
|
|
|
$rbrace_structural_type, |
1804
|
|
|
|
|
|
|
$rbrace_type, |
1805
|
|
|
|
|
|
|
$rcurrent_depth, |
1806
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
1807
|
|
|
|
|
|
|
$rdepth_array, |
1808
|
|
|
|
|
|
|
$ris_block_function, |
1809
|
|
|
|
|
|
|
$ris_block_list_function, |
1810
|
|
|
|
|
|
|
$ris_constant, |
1811
|
|
|
|
|
|
|
$ris_user_function, |
1812
|
|
|
|
|
|
|
$rnested_statement_type, |
1813
|
|
|
|
|
|
|
$rnested_ternary_flag, |
1814
|
|
|
|
|
|
|
$rparen_semicolon_count, |
1815
|
|
|
|
|
|
|
$rparen_vars, |
1816
|
|
|
|
|
|
|
$rparen_type, |
1817
|
|
|
|
|
|
|
$rsaw_function_definition, |
1818
|
|
|
|
|
|
|
$rsaw_use_module, |
1819
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
1820
|
|
|
|
|
|
|
$rsquare_bracket_type, |
1821
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
1822
|
|
|
|
|
|
|
$rtotal_depth, |
1823
|
|
|
|
|
|
|
$ruser_function_prototype, |
1824
|
|
|
|
|
|
|
$square_bracket_depth, |
1825
|
|
|
|
|
|
|
$statement_type, |
1826
|
|
|
|
|
|
|
$total_depth, |
1827
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
]; |
1829
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
# Tokenizer closure variables: |
1831
|
0
|
|
|
|
|
0
|
my $rTV1 = [ |
1832
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
1833
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
1834
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
1835
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
1836
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
1837
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
1838
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
1839
|
|
|
|
|
|
|
]; |
1840
|
|
|
|
|
|
|
|
1841
|
0
|
|
|
|
|
0
|
my $rTV2 = [ |
1842
|
|
|
|
|
|
|
$routput_token_list, $routput_token_type, |
1843
|
|
|
|
|
|
|
$routput_block_type, $routput_container_type, |
1844
|
|
|
|
|
|
|
$routput_type_sequence, $routput_indent_flag, |
1845
|
|
|
|
|
|
|
]; |
1846
|
|
|
|
|
|
|
|
1847
|
0
|
|
|
|
|
0
|
my $rTV3 = [ |
1848
|
|
|
|
|
|
|
$in_quote, $quote_type, |
1849
|
|
|
|
|
|
|
$quote_character, $quote_pos, |
1850
|
|
|
|
|
|
|
$quote_depth, $quoted_string_1, |
1851
|
|
|
|
|
|
|
$quoted_string_2, $allowed_quote_modifiers, |
1852
|
|
|
|
|
|
|
]; |
1853
|
|
|
|
|
|
|
|
1854
|
0
|
|
|
|
|
0
|
my $rTV4 = [ $id_scan_state, $identifier, $want_paren ]; |
1855
|
|
|
|
|
|
|
|
1856
|
0
|
|
|
|
|
0
|
my $rTV5 = [ |
1857
|
|
|
|
|
|
|
$nesting_token_string, $nesting_block_string, |
1858
|
|
|
|
|
|
|
$nesting_block_flag, $level_in_tokenizer, |
1859
|
|
|
|
|
|
|
]; |
1860
|
|
|
|
|
|
|
|
1861
|
0
|
|
|
|
|
0
|
my $rTV6 = [ |
1862
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
1863
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
1864
|
|
|
|
|
|
|
$last_nonblank_prototype, |
1865
|
|
|
|
|
|
|
]; |
1866
|
0
|
|
|
|
|
0
|
return [ $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; |
1867
|
|
|
|
|
|
|
} ## end sub save_tokenizer_state |
1868
|
|
|
|
|
|
|
|
1869
|
|
|
|
|
|
|
sub restore_tokenizer_state { |
1870
|
0
|
|
|
0
|
0
|
0
|
my ($rstate) = @_; |
1871
|
0
|
|
|
|
|
0
|
my ( $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; |
|
0
|
|
|
|
|
0
|
|
1872
|
|
|
|
|
|
|
|
1873
|
|
|
|
|
|
|
( |
1874
|
|
|
|
|
|
|
$brace_depth, |
1875
|
|
|
|
|
|
|
$context, |
1876
|
|
|
|
|
|
|
$current_package, |
1877
|
|
|
|
|
|
|
$last_nonblank_block_type, |
1878
|
|
|
|
|
|
|
$last_nonblank_token, |
1879
|
|
|
|
|
|
|
$last_nonblank_type, |
1880
|
|
|
|
|
|
|
$next_sequence_number, |
1881
|
|
|
|
|
|
|
$paren_depth, |
1882
|
|
|
|
|
|
|
$rbrace_context, |
1883
|
|
|
|
|
|
|
$rbrace_package, |
1884
|
|
|
|
|
|
|
$rbrace_structural_type, |
1885
|
|
|
|
|
|
|
$rbrace_type, |
1886
|
|
|
|
|
|
|
$rcurrent_depth, |
1887
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
1888
|
|
|
|
|
|
|
$rdepth_array, |
1889
|
|
|
|
|
|
|
$ris_block_function, |
1890
|
|
|
|
|
|
|
$ris_block_list_function, |
1891
|
|
|
|
|
|
|
$ris_constant, |
1892
|
|
|
|
|
|
|
$ris_user_function, |
1893
|
|
|
|
|
|
|
$rnested_statement_type, |
1894
|
|
|
|
|
|
|
$rnested_ternary_flag, |
1895
|
|
|
|
|
|
|
$rparen_semicolon_count, |
1896
|
|
|
|
|
|
|
$rparen_vars, |
1897
|
|
|
|
|
|
|
$rparen_type, |
1898
|
|
|
|
|
|
|
$rsaw_function_definition, |
1899
|
|
|
|
|
|
|
$rsaw_use_module, |
1900
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
1901
|
|
|
|
|
|
|
$rsquare_bracket_type, |
1902
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
1903
|
|
|
|
|
|
|
$rtotal_depth, |
1904
|
|
|
|
|
|
|
$ruser_function_prototype, |
1905
|
|
|
|
|
|
|
$square_bracket_depth, |
1906
|
|
|
|
|
|
|
$statement_type, |
1907
|
|
|
|
|
|
|
$total_depth, |
1908
|
|
|
|
|
|
|
|
1909
|
0
|
|
|
|
|
0
|
) = @{$rGV1}; |
|
0
|
|
|
|
|
0
|
|
1910
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
( |
1912
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
1913
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
1914
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
1915
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
1916
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
1917
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
1918
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
1919
|
0
|
|
|
|
|
0
|
) = @{$rTV1}; |
|
0
|
|
|
|
|
0
|
|
1920
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
( |
1922
|
|
|
|
|
|
|
$routput_token_list, $routput_token_type, |
1923
|
|
|
|
|
|
|
$routput_block_type, $routput_container_type, |
1924
|
|
|
|
|
|
|
$routput_type_sequence, $routput_indent_flag, |
1925
|
0
|
|
|
|
|
0
|
) = @{$rTV2}; |
|
0
|
|
|
|
|
0
|
|
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
( |
1928
|
|
|
|
|
|
|
$in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, |
1929
|
|
|
|
|
|
|
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, |
1930
|
0
|
|
|
|
|
0
|
) = @{$rTV3}; |
|
0
|
|
|
|
|
0
|
|
1931
|
|
|
|
|
|
|
|
1932
|
0
|
|
|
|
|
0
|
( $id_scan_state, $identifier, $want_paren ) = @{$rTV4}; |
|
0
|
|
|
|
|
0
|
|
1933
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
( |
1935
|
|
|
|
|
|
|
$nesting_token_string, $nesting_block_string, |
1936
|
|
|
|
|
|
|
$nesting_block_flag, $level_in_tokenizer, |
1937
|
0
|
|
|
|
|
0
|
) = @{$rTV5}; |
|
0
|
|
|
|
|
0
|
|
1938
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
( |
1940
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
1941
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
1942
|
|
|
|
|
|
|
$last_nonblank_prototype, |
1943
|
0
|
|
|
|
|
0
|
) = @{$rTV6}; |
|
0
|
|
|
|
|
0
|
|
1944
|
0
|
|
|
|
|
0
|
return; |
1945
|
|
|
|
|
|
|
} ## end sub restore_tokenizer_state |
1946
|
|
|
|
|
|
|
|
1947
|
|
|
|
|
|
|
sub split_pretoken { |
1948
|
|
|
|
|
|
|
|
1949
|
8
|
|
|
8
|
0
|
18
|
my ( $self, $numc ) = @_; |
1950
|
|
|
|
|
|
|
|
1951
|
|
|
|
|
|
|
# Split the leading $numc characters from the current token (at index=$i) |
1952
|
|
|
|
|
|
|
# which is pre-type 'w' and insert the remainder back into the pretoken |
1953
|
|
|
|
|
|
|
# stream with appropriate settings. Since we are splitting a pre-type 'w', |
1954
|
|
|
|
|
|
|
# there are three cases, depending on if the remainder starts with a digit: |
1955
|
|
|
|
|
|
|
# Case 1: remainder is type 'd', all digits |
1956
|
|
|
|
|
|
|
# Case 2: remainder is type 'd' and type 'w': digits and other characters |
1957
|
|
|
|
|
|
|
# Case 3: remainder is type 'w' |
1958
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
# Examples, for $numc=1: |
1960
|
|
|
|
|
|
|
# $tok => $tok_0 $tok_1 $tok_2 |
1961
|
|
|
|
|
|
|
# 'x10' => 'x' '10' # case 1 |
1962
|
|
|
|
|
|
|
# 'x10if' => 'x' '10' 'if' # case 2 |
1963
|
|
|
|
|
|
|
# '0ne => 'O' 'ne' # case 3 |
1964
|
|
|
|
|
|
|
|
1965
|
|
|
|
|
|
|
# where: |
1966
|
|
|
|
|
|
|
# $tok_1 is a possible string of digits (pre-type 'd') |
1967
|
|
|
|
|
|
|
# $tok_2 is a possible word (pre-type 'w') |
1968
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
# return 1 if successful |
1970
|
|
|
|
|
|
|
# return undef if error (shouldn't happen) |
1971
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
# Calling routine should update '$type' and '$tok' if successful. |
1973
|
|
|
|
|
|
|
|
1974
|
8
|
|
|
|
|
21
|
my $pretoken = $rtokens->[$i]; |
1975
|
8
|
50
|
33
|
|
|
86
|
if ( $pretoken |
|
|
|
33
|
|
|
|
|
1976
|
|
|
|
|
|
|
&& length($pretoken) > $numc |
1977
|
|
|
|
|
|
|
&& substr( $pretoken, $numc ) =~ /^(\d*)(.*)$/ ) |
1978
|
|
|
|
|
|
|
{ |
1979
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
# Split $tok into up to 3 tokens: |
1981
|
8
|
|
|
|
|
20
|
my $tok_0 = substr( $pretoken, 0, $numc ); |
1982
|
8
|
50
|
|
|
|
32
|
my $tok_1 = defined($1) ? $1 : EMPTY_STRING; |
1983
|
8
|
50
|
|
|
|
27
|
my $tok_2 = defined($2) ? $2 : EMPTY_STRING; |
1984
|
|
|
|
|
|
|
|
1985
|
8
|
|
|
|
|
17
|
my $len_0 = length($tok_0); |
1986
|
8
|
|
|
|
|
19
|
my $len_1 = length($tok_1); |
1987
|
8
|
|
|
|
|
14
|
my $len_2 = length($tok_2); |
1988
|
|
|
|
|
|
|
|
1989
|
8
|
|
|
|
|
16
|
my $pre_type_0 = 'w'; |
1990
|
8
|
|
|
|
|
16
|
my $pre_type_1 = 'd'; |
1991
|
8
|
|
|
|
|
13
|
my $pre_type_2 = 'w'; |
1992
|
|
|
|
|
|
|
|
1993
|
8
|
|
|
|
|
15
|
my $pos_0 = $rtoken_map->[$i]; |
1994
|
8
|
|
|
|
|
15
|
my $pos_1 = $pos_0 + $len_0; |
1995
|
8
|
|
|
|
|
17
|
my $pos_2 = $pos_1 + $len_1; |
1996
|
|
|
|
|
|
|
|
1997
|
8
|
|
|
|
|
15
|
my $isplice = $i + 1; |
1998
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
# Splice in any digits |
2000
|
8
|
100
|
|
|
|
28
|
if ($len_1) { |
2001
|
5
|
|
|
|
|
8
|
splice @{$rtoken_map}, $isplice, 0, $pos_1; |
|
5
|
|
|
|
|
19
|
|
2002
|
5
|
|
|
|
|
11
|
splice @{$rtokens}, $isplice, 0, $tok_1; |
|
5
|
|
|
|
|
15
|
|
2003
|
5
|
|
|
|
|
10
|
splice @{$rtoken_type}, $isplice, 0, $pre_type_1; |
|
5
|
|
|
|
|
16
|
|
2004
|
5
|
|
|
|
|
7
|
$max_token_index++; |
2005
|
5
|
|
|
|
|
9
|
$isplice++; |
2006
|
|
|
|
|
|
|
} |
2007
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
# Splice in any trailing word |
2009
|
8
|
100
|
|
|
|
20
|
if ($len_2) { |
2010
|
4
|
|
|
|
|
6
|
splice @{$rtoken_map}, $isplice, 0, $pos_2; |
|
4
|
|
|
|
|
14
|
|
2011
|
4
|
|
|
|
|
6
|
splice @{$rtokens}, $isplice, 0, $tok_2; |
|
4
|
|
|
|
|
9
|
|
2012
|
4
|
|
|
|
|
8
|
splice @{$rtoken_type}, $isplice, 0, $pre_type_2; |
|
4
|
|
|
|
|
8
|
|
2013
|
4
|
|
|
|
|
7
|
$max_token_index++; |
2014
|
|
|
|
|
|
|
} |
2015
|
|
|
|
|
|
|
|
2016
|
8
|
|
|
|
|
19
|
$rtokens->[$i] = $tok_0; |
2017
|
8
|
|
|
|
|
31
|
return 1; |
2018
|
|
|
|
|
|
|
} |
2019
|
|
|
|
|
|
|
else { |
2020
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
# Shouldn't get here |
2022
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
2023
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
2024
|
|
|
|
|
|
|
While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken() |
2025
|
|
|
|
|
|
|
EOM |
2026
|
|
|
|
|
|
|
} |
2027
|
|
|
|
|
|
|
} |
2028
|
0
|
|
|
|
|
0
|
return; |
2029
|
|
|
|
|
|
|
} ## end sub split_pretoken |
2030
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
sub get_indentation_level { |
2032
|
562
|
|
|
562
|
0
|
1603
|
return $level_in_tokenizer; |
2033
|
|
|
|
|
|
|
} |
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
sub reset_indentation_level { |
2036
|
562
|
|
|
562
|
0
|
1573
|
$level_in_tokenizer = shift; |
2037
|
562
|
|
|
|
|
1208
|
return; |
2038
|
|
|
|
|
|
|
} |
2039
|
|
|
|
|
|
|
|
2040
|
|
|
|
|
|
|
sub peeked_ahead { |
2041
|
252
|
|
|
252
|
0
|
465
|
my $flag = shift; |
2042
|
252
|
100
|
|
|
|
631
|
$peeked_ahead = defined($flag) ? $flag : $peeked_ahead; |
2043
|
252
|
|
|
|
|
927
|
return $peeked_ahead; |
2044
|
|
|
|
|
|
|
} |
2045
|
|
|
|
|
|
|
|
2046
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2047
|
|
|
|
|
|
|
# end of tokenizer variable access and manipulation routines |
2048
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2049
|
|
|
|
|
|
|
|
2050
|
|
|
|
|
|
|
#------------------------------ |
2051
|
|
|
|
|
|
|
# beginning of tokenizer hashes |
2052
|
|
|
|
|
|
|
#------------------------------ |
2053
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); |
2055
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
# These block types terminate statements and do not need a trailing |
2057
|
|
|
|
|
|
|
# semicolon |
2058
|
|
|
|
|
|
|
# patched for SWITCH/CASE/ |
2059
|
|
|
|
|
|
|
# NOTE: not currently used but may be used in the future |
2060
|
|
|
|
|
|
|
my %is_zero_continuation_block_type; |
2061
|
|
|
|
|
|
|
my @q; |
2062
|
|
|
|
|
|
|
@q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; |
2063
|
|
|
|
|
|
|
if elsif else unless while until for foreach switch case given when); |
2064
|
|
|
|
|
|
|
@is_zero_continuation_block_type{@q} = (1) x scalar(@q); |
2065
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
my %is_logical_container; |
2067
|
|
|
|
|
|
|
@q = qw(if elsif unless while and or err not && ! || for foreach); |
2068
|
|
|
|
|
|
|
@is_logical_container{@q} = (1) x scalar(@q); |
2069
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
my %is_binary_type; |
2071
|
|
|
|
|
|
|
@q = qw(|| &&); |
2072
|
|
|
|
|
|
|
@is_binary_type{@q} = (1) x scalar(@q); |
2073
|
|
|
|
|
|
|
|
2074
|
|
|
|
|
|
|
my %is_binary_keyword; |
2075
|
|
|
|
|
|
|
@q = qw(and or err eq ne cmp); |
2076
|
|
|
|
|
|
|
@is_binary_keyword{@q} = (1) x scalar(@q); |
2077
|
|
|
|
|
|
|
|
2078
|
|
|
|
|
|
|
# 'L' is token for opening { at hash key |
2079
|
|
|
|
|
|
|
my %is_opening_type; |
2080
|
|
|
|
|
|
|
@q = qw< L { ( [ >; |
2081
|
|
|
|
|
|
|
@is_opening_type{@q} = (1) x scalar(@q); |
2082
|
|
|
|
|
|
|
|
2083
|
|
|
|
|
|
|
my %is_opening_or_ternary_type; |
2084
|
|
|
|
|
|
|
push @q, '?'; |
2085
|
|
|
|
|
|
|
@is_opening_or_ternary_type{@q} = (1) x scalar(@q); |
2086
|
|
|
|
|
|
|
|
2087
|
|
|
|
|
|
|
# 'R' is token for closing } at hash key |
2088
|
|
|
|
|
|
|
my %is_closing_type; |
2089
|
|
|
|
|
|
|
@q = qw< R } ) ] >; |
2090
|
|
|
|
|
|
|
@is_closing_type{@q} = (1) x scalar(@q); |
2091
|
|
|
|
|
|
|
|
2092
|
|
|
|
|
|
|
my %is_closing_or_ternary_type; |
2093
|
|
|
|
|
|
|
push @q, ':'; |
2094
|
|
|
|
|
|
|
@is_closing_or_ternary_type{@q} = (1) x scalar(@q); |
2095
|
|
|
|
|
|
|
|
2096
|
|
|
|
|
|
|
my %is_redo_last_next_goto; |
2097
|
|
|
|
|
|
|
@q = qw(redo last next goto); |
2098
|
|
|
|
|
|
|
@is_redo_last_next_goto{@q} = (1) x scalar(@q); |
2099
|
|
|
|
|
|
|
|
2100
|
|
|
|
|
|
|
my %is_use_require; |
2101
|
|
|
|
|
|
|
@q = qw(use require); |
2102
|
|
|
|
|
|
|
@is_use_require{@q} = (1) x scalar(@q); |
2103
|
|
|
|
|
|
|
|
2104
|
|
|
|
|
|
|
# This hash holds the array index in $self for these keywords: |
2105
|
|
|
|
|
|
|
# Fix for issue c035: removed 'format' from this hash |
2106
|
|
|
|
|
|
|
my %is_END_DATA = ( |
2107
|
|
|
|
|
|
|
'__END__' => _in_end_, |
2108
|
|
|
|
|
|
|
'__DATA__' => _in_data_, |
2109
|
|
|
|
|
|
|
); |
2110
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
my %is_list_end_type; |
2112
|
|
|
|
|
|
|
@q = qw( ; { } ); |
2113
|
|
|
|
|
|
|
push @q, ','; |
2114
|
|
|
|
|
|
|
@is_list_end_type{@q} = (1) x scalar(@q); |
2115
|
|
|
|
|
|
|
|
2116
|
|
|
|
|
|
|
# original ref: camel 3 p 147, |
2117
|
|
|
|
|
|
|
# but perl may accept undocumented flags |
2118
|
|
|
|
|
|
|
# perl 5.10 adds 'p' (preserve) |
2119
|
|
|
|
|
|
|
# Perl version 5.22 added 'n' |
2120
|
|
|
|
|
|
|
# From http://perldoc.perl.org/perlop.html we have |
2121
|
|
|
|
|
|
|
# /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc |
2122
|
|
|
|
|
|
|
# s/PATTERN/REPLACEMENT/msixpodualngcer |
2123
|
|
|
|
|
|
|
# y/SEARCHLIST/REPLACEMENTLIST/cdsr |
2124
|
|
|
|
|
|
|
# tr/SEARCHLIST/REPLACEMENTLIST/cdsr |
2125
|
|
|
|
|
|
|
# qr/STRING/msixpodualn |
2126
|
|
|
|
|
|
|
my %quote_modifiers = ( |
2127
|
|
|
|
|
|
|
's' => '[msixpodualngcer]', |
2128
|
|
|
|
|
|
|
'y' => '[cdsr]', |
2129
|
|
|
|
|
|
|
'tr' => '[cdsr]', |
2130
|
|
|
|
|
|
|
'm' => '[msixpodualngc]', |
2131
|
|
|
|
|
|
|
'qr' => '[msixpodualn]', |
2132
|
|
|
|
|
|
|
'q' => EMPTY_STRING, |
2133
|
|
|
|
|
|
|
'qq' => EMPTY_STRING, |
2134
|
|
|
|
|
|
|
'qw' => EMPTY_STRING, |
2135
|
|
|
|
|
|
|
'qx' => EMPTY_STRING, |
2136
|
|
|
|
|
|
|
); |
2137
|
|
|
|
|
|
|
|
2138
|
|
|
|
|
|
|
# table showing how many quoted things to look for after quote operator.. |
2139
|
|
|
|
|
|
|
# s, y, tr have 2 (pattern and replacement) |
2140
|
|
|
|
|
|
|
# others have 1 (pattern only) |
2141
|
|
|
|
|
|
|
my %quote_items = ( |
2142
|
|
|
|
|
|
|
's' => 2, |
2143
|
|
|
|
|
|
|
'y' => 2, |
2144
|
|
|
|
|
|
|
'tr' => 2, |
2145
|
|
|
|
|
|
|
'm' => 1, |
2146
|
|
|
|
|
|
|
'qr' => 1, |
2147
|
|
|
|
|
|
|
'q' => 1, |
2148
|
|
|
|
|
|
|
'qq' => 1, |
2149
|
|
|
|
|
|
|
'qw' => 1, |
2150
|
|
|
|
|
|
|
'qx' => 1, |
2151
|
|
|
|
|
|
|
); |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
my %is_for_foreach; |
2154
|
|
|
|
|
|
|
@q = qw(for foreach); |
2155
|
|
|
|
|
|
|
@is_for_foreach{@q} = (1) x scalar(@q); |
2156
|
|
|
|
|
|
|
|
2157
|
|
|
|
|
|
|
# These keywords may introduce blocks after parenthesized expressions, |
2158
|
|
|
|
|
|
|
# in the form: |
2159
|
|
|
|
|
|
|
# keyword ( .... ) { BLOCK } |
2160
|
|
|
|
|
|
|
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' |
2161
|
|
|
|
|
|
|
# NOTE for --use-feature=class: if ADJUST blocks eventually take a |
2162
|
|
|
|
|
|
|
# parameter list, then ADJUST might need to be added to this list (see |
2163
|
|
|
|
|
|
|
# perlclass.pod) |
2164
|
|
|
|
|
|
|
my %is_blocktype_with_paren; |
2165
|
|
|
|
|
|
|
@q = |
2166
|
|
|
|
|
|
|
qw(if elsif unless while until for foreach switch case given when catch); |
2167
|
|
|
|
|
|
|
@is_blocktype_with_paren{@q} = (1) x scalar(@q); |
2168
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
my %is_case_default; |
2170
|
|
|
|
|
|
|
@q = qw(case default); |
2171
|
|
|
|
|
|
|
@is_case_default{@q} = (1) x scalar(@q); |
2172
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
#------------------------ |
2174
|
|
|
|
|
|
|
# end of tokenizer hashes |
2175
|
|
|
|
|
|
|
#------------------------ |
2176
|
|
|
|
|
|
|
|
2177
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2178
|
|
|
|
|
|
|
# beginning of various scanner interface routines |
2179
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2180
|
|
|
|
|
|
|
sub scan_replacement_text { |
2181
|
|
|
|
|
|
|
|
2182
|
|
|
|
|
|
|
# check for here-docs in replacement text invoked by |
2183
|
|
|
|
|
|
|
# a substitution operator with executable modifier 'e'. |
2184
|
|
|
|
|
|
|
# |
2185
|
|
|
|
|
|
|
# given: |
2186
|
|
|
|
|
|
|
# $replacement_text |
2187
|
|
|
|
|
|
|
# return: |
2188
|
|
|
|
|
|
|
# $rht = reference to any here-doc targets |
2189
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $replacement_text ) = @_; |
2190
|
|
|
|
|
|
|
|
2191
|
|
|
|
|
|
|
# quick check |
2192
|
0
|
0
|
|
|
|
0
|
return if ( $replacement_text !~ /<</ ); |
2193
|
|
|
|
|
|
|
|
2194
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
2195
|
|
|
|
|
|
|
"scanning replacement text for here-doc targets\n"); |
2196
|
|
|
|
|
|
|
|
2197
|
|
|
|
|
|
|
# save the logger object for error messages |
2198
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
2199
|
|
|
|
|
|
|
|
2200
|
|
|
|
|
|
|
# save all lexical variables |
2201
|
0
|
|
|
|
|
0
|
my $rstate = save_tokenizer_state(); |
2202
|
0
|
|
|
|
|
0
|
_decrement_count(); # avoid error check for multiple tokenizers |
2203
|
|
|
|
|
|
|
|
2204
|
|
|
|
|
|
|
# make a new tokenizer |
2205
|
0
|
|
|
|
|
0
|
my $tokenizer = Perl::Tidy::Tokenizer->new( |
2206
|
|
|
|
|
|
|
source_object => \$replacement_text, |
2207
|
|
|
|
|
|
|
logger_object => $logger_object, |
2208
|
|
|
|
|
|
|
starting_line_number => $input_line_number, |
2209
|
|
|
|
|
|
|
); |
2210
|
|
|
|
|
|
|
|
2211
|
|
|
|
|
|
|
# scan the replacement text |
2212
|
0
|
|
|
|
|
0
|
while ( $tokenizer->get_line() ) { } |
2213
|
|
|
|
|
|
|
|
2214
|
|
|
|
|
|
|
# remove any here doc targets |
2215
|
0
|
|
|
|
|
0
|
my $rht = undef; |
2216
|
0
|
0
|
|
|
|
0
|
if ( $tokenizer->[_in_here_doc_] ) { |
2217
|
0
|
|
|
|
|
0
|
$rht = []; |
2218
|
0
|
|
|
|
|
0
|
push @{$rht}, |
|
0
|
|
|
|
|
0
|
|
2219
|
|
|
|
|
|
|
[ |
2220
|
|
|
|
|
|
|
$tokenizer->[_here_doc_target_], |
2221
|
|
|
|
|
|
|
$tokenizer->[_here_quote_character_] |
2222
|
|
|
|
|
|
|
]; |
2223
|
0
|
0
|
|
|
|
0
|
if ( $tokenizer->[_rhere_target_list_] ) { |
2224
|
0
|
|
|
|
|
0
|
push @{$rht}, @{ $tokenizer->[_rhere_target_list_] }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
2225
|
0
|
|
|
|
|
0
|
$tokenizer->[_rhere_target_list_] = undef; |
2226
|
|
|
|
|
|
|
} |
2227
|
0
|
|
|
|
|
0
|
$tokenizer->[_in_here_doc_] = undef; |
2228
|
|
|
|
|
|
|
} |
2229
|
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
# now its safe to report errors |
2231
|
0
|
|
|
|
|
0
|
my $severe_error = $tokenizer->report_tokenization_errors(); |
2232
|
|
|
|
|
|
|
|
2233
|
|
|
|
|
|
|
# TODO: Could propagate a severe error up |
2234
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
# restore all tokenizer lexical variables |
2236
|
0
|
|
|
|
|
0
|
restore_tokenizer_state($rstate); |
2237
|
|
|
|
|
|
|
|
2238
|
|
|
|
|
|
|
# return the here doc targets |
2239
|
0
|
|
|
|
|
0
|
return $rht; |
2240
|
|
|
|
|
|
|
} ## end sub scan_replacement_text |
2241
|
|
|
|
|
|
|
|
2242
|
|
|
|
|
|
|
sub scan_bare_identifier { |
2243
|
1674
|
|
|
1674
|
0
|
3031
|
my $self = shift; |
2244
|
1674
|
|
|
|
|
5209
|
( $i, $tok, $type, $prototype ) = |
2245
|
|
|
|
|
|
|
$self->scan_bare_identifier_do( $input_line, $i, $tok, $type, |
2246
|
|
|
|
|
|
|
$prototype, $rtoken_map, $max_token_index ); |
2247
|
1674
|
|
|
|
|
3444
|
return; |
2248
|
|
|
|
|
|
|
} ## end sub scan_bare_identifier |
2249
|
|
|
|
|
|
|
|
2250
|
|
|
|
|
|
|
sub scan_identifier { |
2251
|
|
|
|
|
|
|
|
2252
|
486
|
|
|
486
|
0
|
913
|
my $self = shift; |
2253
|
|
|
|
|
|
|
|
2254
|
|
|
|
|
|
|
( |
2255
|
486
|
|
|
|
|
1975
|
$i, $tok, $type, $id_scan_state, $identifier, |
2256
|
|
|
|
|
|
|
my $split_pretoken_flag |
2257
|
|
|
|
|
|
|
) |
2258
|
|
|
|
|
|
|
= $self->scan_complex_identifier( $i, $id_scan_state, $identifier, |
2259
|
|
|
|
|
|
|
$rtokens, $max_token_index, $expecting, |
2260
|
|
|
|
|
|
|
$rparen_type->[$paren_depth] ); |
2261
|
|
|
|
|
|
|
|
2262
|
|
|
|
|
|
|
# Check for signal to fix a special variable adjacent to a keyword, |
2263
|
|
|
|
|
|
|
# such as '$^One$0'. |
2264
|
486
|
100
|
|
|
|
1492
|
if ($split_pretoken_flag) { |
2265
|
|
|
|
|
|
|
|
2266
|
|
|
|
|
|
|
# Try to fix it by splitting the pretoken |
2267
|
3
|
50
|
33
|
|
|
41
|
if ( $i > 0 |
|
|
|
33
|
|
|
|
|
2268
|
|
|
|
|
|
|
&& $rtokens->[ $i - 1 ] eq '^' |
2269
|
|
|
|
|
|
|
&& $self->split_pretoken(1) ) |
2270
|
|
|
|
|
|
|
{ |
2271
|
3
|
|
|
|
|
9
|
$identifier = substr( $identifier, 0, 3 ); |
2272
|
3
|
|
|
|
|
5
|
$tok = $identifier; |
2273
|
|
|
|
|
|
|
} |
2274
|
|
|
|
|
|
|
else { |
2275
|
|
|
|
|
|
|
|
2276
|
|
|
|
|
|
|
# This shouldn't happen ... |
2277
|
0
|
|
|
|
|
0
|
my $var = substr( $tok, 0, 3 ); |
2278
|
0
|
|
|
|
|
0
|
my $excess = substr( $tok, 3 ); |
2279
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
2280
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
2281
|
|
|
|
|
|
|
$input_line_number: Trouble parsing at characters '$excess' after special variable '$var'. |
2282
|
|
|
|
|
|
|
A space may be needed after '$var'. |
2283
|
|
|
|
|
|
|
EOM |
2284
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
2285
|
|
|
|
|
|
|
} |
2286
|
|
|
|
|
|
|
} |
2287
|
486
|
|
|
|
|
958
|
return; |
2288
|
|
|
|
|
|
|
} ## end sub scan_identifier |
2289
|
|
|
|
|
|
|
|
2290
|
39
|
|
|
39
|
|
403
|
use constant VERIFY_FASTSCAN => 0; |
|
39
|
|
|
|
|
141
|
|
|
39
|
|
|
|
|
5185
|
|
2291
|
|
|
|
|
|
|
my %fast_scan_context; |
2292
|
|
|
|
|
|
|
|
2293
|
|
|
|
|
|
|
BEGIN { |
2294
|
39
|
|
|
39
|
|
49293
|
%fast_scan_context = ( |
2295
|
|
|
|
|
|
|
'$' => SCALAR_CONTEXT, |
2296
|
|
|
|
|
|
|
'*' => SCALAR_CONTEXT, |
2297
|
|
|
|
|
|
|
'@' => LIST_CONTEXT, |
2298
|
|
|
|
|
|
|
'%' => LIST_CONTEXT, |
2299
|
|
|
|
|
|
|
'&' => UNKNOWN_CONTEXT, |
2300
|
|
|
|
|
|
|
); |
2301
|
|
|
|
|
|
|
} ## end BEGIN |
2302
|
|
|
|
|
|
|
|
2303
|
|
|
|
|
|
|
sub scan_simple_identifier { |
2304
|
|
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
# This is a wrapper for sub scan_identifier. It does a fast preliminary |
2306
|
|
|
|
|
|
|
# scan for certain common identifiers: |
2307
|
|
|
|
|
|
|
# '$var', '@var', %var, *var, &var, '@{...}', '%{...}' |
2308
|
|
|
|
|
|
|
# If it does not find one of these, or this is a restart, it calls the |
2309
|
|
|
|
|
|
|
# original scanner directly. |
2310
|
|
|
|
|
|
|
|
2311
|
|
|
|
|
|
|
# This gives the same results as the full scanner in about 1/4 the |
2312
|
|
|
|
|
|
|
# total runtime for a typical input stream. |
2313
|
|
|
|
|
|
|
|
2314
|
|
|
|
|
|
|
# Notation: |
2315
|
|
|
|
|
|
|
# $var * 2 |
2316
|
|
|
|
|
|
|
# ^^ ^ |
2317
|
|
|
|
|
|
|
# || | |
2318
|
|
|
|
|
|
|
# || ---- $i_next [= next nonblank pretoken ] |
2319
|
|
|
|
|
|
|
# |----$i_plus_1 [= a bareword ] |
2320
|
|
|
|
|
|
|
# ---$i_begin [= a sigil] |
2321
|
|
|
|
|
|
|
|
2322
|
4793
|
|
|
4793
|
0
|
7352
|
my $self = shift; |
2323
|
|
|
|
|
|
|
|
2324
|
4793
|
|
|
|
|
7288
|
my $i_begin = $i; |
2325
|
4793
|
|
|
|
|
7126
|
my $tok_begin = $tok; |
2326
|
4793
|
|
|
|
|
7882
|
my $i_plus_1 = $i + 1; |
2327
|
4793
|
|
|
|
|
7137
|
my $fast_scan_type; |
2328
|
|
|
|
|
|
|
|
2329
|
|
|
|
|
|
|
#------------------------------------------------------- |
2330
|
|
|
|
|
|
|
# Do full scan for anything following a pointer, such as |
2331
|
|
|
|
|
|
|
# $cref->&*; # a postderef |
2332
|
|
|
|
|
|
|
#------------------------------------------------------- |
2333
|
4793
|
100
|
66
|
|
|
27980
|
if ( $last_nonblank_token eq '->' ) { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
33
|
|
|
|
|
2334
|
|
|
|
|
|
|
|
2335
|
|
|
|
|
|
|
} |
2336
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
#------------------------------ |
2338
|
|
|
|
|
|
|
# quick scan with leading sigil |
2339
|
|
|
|
|
|
|
#------------------------------ |
2340
|
|
|
|
|
|
|
elsif ( !$id_scan_state |
2341
|
|
|
|
|
|
|
&& $i_plus_1 <= $max_token_index |
2342
|
|
|
|
|
|
|
&& $fast_scan_context{$tok} ) |
2343
|
|
|
|
|
|
|
{ |
2344
|
4680
|
|
|
|
|
8227
|
$context = $fast_scan_context{$tok}; |
2345
|
|
|
|
|
|
|
|
2346
|
|
|
|
|
|
|
# look for $var, @var, ... |
2347
|
4680
|
100
|
100
|
|
|
11395
|
if ( $rtoken_type->[$i_plus_1] eq 'w' ) { |
|
|
100
|
66
|
|
|
|
|
2348
|
4392
|
|
|
|
|
7101
|
my $pretype_next = EMPTY_STRING; |
2349
|
4392
|
100
|
|
|
|
9430
|
if ( $i_plus_1 < $max_token_index ) { |
2350
|
4276
|
|
|
|
|
6741
|
my $i_next = $i_plus_1 + 1; |
2351
|
4276
|
100
|
100
|
|
|
13227
|
if ( $rtoken_type->[$i_next] eq 'b' |
2352
|
|
|
|
|
|
|
&& $i_next < $max_token_index ) |
2353
|
|
|
|
|
|
|
{ |
2354
|
1708
|
|
|
|
|
2986
|
$i_next += 1; |
2355
|
|
|
|
|
|
|
} |
2356
|
4276
|
|
|
|
|
7393
|
$pretype_next = $rtoken_type->[$i_next]; |
2357
|
|
|
|
|
|
|
} |
2358
|
4392
|
100
|
100
|
|
|
15824
|
if ( $pretype_next ne ':' && $pretype_next ne "'" ) { |
2359
|
|
|
|
|
|
|
|
2360
|
|
|
|
|
|
|
# Found type 'i' like '$var', '@var', or '%var' |
2361
|
4284
|
|
|
|
|
8325
|
$identifier = $tok . $rtokens->[$i_plus_1]; |
2362
|
4284
|
|
|
|
|
6609
|
$tok = $identifier; |
2363
|
4284
|
|
|
|
|
6995
|
$type = 'i'; |
2364
|
4284
|
|
|
|
|
6198
|
$i = $i_plus_1; |
2365
|
4284
|
|
|
|
|
7347
|
$fast_scan_type = $type; |
2366
|
|
|
|
|
|
|
} |
2367
|
|
|
|
|
|
|
} |
2368
|
|
|
|
|
|
|
|
2369
|
|
|
|
|
|
|
# Look for @{ or %{ . |
2370
|
|
|
|
|
|
|
# But we must let the full scanner handle things ${ because it may |
2371
|
|
|
|
|
|
|
# keep going to get a complete identifier like '${#}' . |
2372
|
|
|
|
|
|
|
elsif ( |
2373
|
|
|
|
|
|
|
$rtoken_type->[$i_plus_1] eq '{' |
2374
|
|
|
|
|
|
|
&& ( $tok_begin eq '@' |
2375
|
|
|
|
|
|
|
|| $tok_begin eq '%' ) |
2376
|
|
|
|
|
|
|
) |
2377
|
|
|
|
|
|
|
{ |
2378
|
|
|
|
|
|
|
|
2379
|
30
|
|
|
|
|
76
|
$identifier = $tok; |
2380
|
30
|
|
|
|
|
62
|
$type = 't'; |
2381
|
30
|
|
|
|
|
53
|
$fast_scan_type = $type; |
2382
|
|
|
|
|
|
|
} |
2383
|
|
|
|
|
|
|
else { |
2384
|
|
|
|
|
|
|
## out of tricks |
2385
|
|
|
|
|
|
|
} |
2386
|
|
|
|
|
|
|
} |
2387
|
|
|
|
|
|
|
|
2388
|
|
|
|
|
|
|
#--------------------------- |
2389
|
|
|
|
|
|
|
# Quick scan with leading -> |
2390
|
|
|
|
|
|
|
# Look for ->[ and ->{ |
2391
|
|
|
|
|
|
|
#--------------------------- |
2392
|
|
|
|
|
|
|
elsif ( |
2393
|
|
|
|
|
|
|
$tok eq '->' |
2394
|
|
|
|
|
|
|
&& $i < $max_token_index |
2395
|
|
|
|
|
|
|
&& ( $rtokens->[$i_plus_1] eq '{' |
2396
|
|
|
|
|
|
|
|| $rtokens->[$i_plus_1] eq '[' ) |
2397
|
|
|
|
|
|
|
) |
2398
|
|
|
|
|
|
|
{ |
2399
|
0
|
|
|
|
|
0
|
$type = $tok; |
2400
|
0
|
|
|
|
|
0
|
$fast_scan_type = $type; |
2401
|
0
|
|
|
|
|
0
|
$identifier = $tok; |
2402
|
0
|
|
|
|
|
0
|
$context = UNKNOWN_CONTEXT; |
2403
|
|
|
|
|
|
|
} |
2404
|
|
|
|
|
|
|
else { |
2405
|
|
|
|
|
|
|
## out of tricks |
2406
|
|
|
|
|
|
|
} |
2407
|
|
|
|
|
|
|
|
2408
|
|
|
|
|
|
|
#-------------------------------------- |
2409
|
|
|
|
|
|
|
# Verify correctness during development |
2410
|
|
|
|
|
|
|
#-------------------------------------- |
2411
|
4793
|
|
|
|
|
6692
|
if ( VERIFY_FASTSCAN && $fast_scan_type ) { |
2412
|
|
|
|
|
|
|
|
2413
|
|
|
|
|
|
|
# We will call the full method |
2414
|
|
|
|
|
|
|
my $identifier_simple = $identifier; |
2415
|
|
|
|
|
|
|
my $tok_simple = $tok; |
2416
|
|
|
|
|
|
|
my $i_simple = $i; |
2417
|
|
|
|
|
|
|
my $context_simple = $context; |
2418
|
|
|
|
|
|
|
|
2419
|
|
|
|
|
|
|
$tok = $tok_begin; |
2420
|
|
|
|
|
|
|
$i = $i_begin; |
2421
|
|
|
|
|
|
|
$self->scan_identifier(); |
2422
|
|
|
|
|
|
|
|
2423
|
|
|
|
|
|
|
if ( $tok ne $tok_simple |
2424
|
|
|
|
|
|
|
|| $type ne $fast_scan_type |
2425
|
|
|
|
|
|
|
|| $i != $i_simple |
2426
|
|
|
|
|
|
|
|| $identifier ne $identifier_simple |
2427
|
|
|
|
|
|
|
|| $id_scan_state |
2428
|
|
|
|
|
|
|
|| $context ne $context_simple ) |
2429
|
|
|
|
|
|
|
{ |
2430
|
|
|
|
|
|
|
print {*STDERR} <<EOM; |
2431
|
|
|
|
|
|
|
scan_simple_identifier differs from scan_identifier: |
2432
|
|
|
|
|
|
|
simple: i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple |
2433
|
|
|
|
|
|
|
full: i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state |
2434
|
|
|
|
|
|
|
EOM |
2435
|
|
|
|
|
|
|
} |
2436
|
|
|
|
|
|
|
} |
2437
|
|
|
|
|
|
|
|
2438
|
|
|
|
|
|
|
#------------------------------------------------- |
2439
|
|
|
|
|
|
|
# call full scanner if fast method did not succeed |
2440
|
|
|
|
|
|
|
#------------------------------------------------- |
2441
|
4793
|
100
|
|
|
|
10040
|
if ( !$fast_scan_type ) { |
2442
|
479
|
|
|
|
|
1675
|
$self->scan_identifier(); |
2443
|
|
|
|
|
|
|
} |
2444
|
4793
|
|
|
|
|
8359
|
return; |
2445
|
|
|
|
|
|
|
} ## end sub scan_simple_identifier |
2446
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
sub method_ok_here { |
2448
|
|
|
|
|
|
|
|
2449
|
|
|
|
|
|
|
# Return: |
2450
|
|
|
|
|
|
|
# false if this is definitely an invalid method declaration |
2451
|
|
|
|
|
|
|
# true otherwise (even if not sure) |
2452
|
|
|
|
|
|
|
|
2453
|
|
|
|
|
|
|
# We are trying to avoid problems with old uses of 'method' |
2454
|
|
|
|
|
|
|
# when --use-feature=class is set (rt145706). |
2455
|
|
|
|
|
|
|
# For example, this should cause a return of 'false': |
2456
|
|
|
|
|
|
|
|
2457
|
|
|
|
|
|
|
# method paint => sub { |
2458
|
|
|
|
|
|
|
# return; |
2459
|
|
|
|
|
|
|
# }; |
2460
|
|
|
|
|
|
|
|
2461
|
6
|
|
|
6
|
0
|
18
|
my $self = shift; |
2462
|
|
|
|
|
|
|
|
2463
|
|
|
|
|
|
|
# from do_scan_sub: |
2464
|
6
|
|
|
|
|
14
|
my $i_beg = $i + 1; |
2465
|
6
|
|
|
|
|
13
|
my $pos_beg = $rtoken_map->[$i_beg]; |
2466
|
6
|
|
|
|
|
19
|
pos($input_line) = $pos_beg; |
2467
|
|
|
|
|
|
|
|
2468
|
|
|
|
|
|
|
# TEST 1: look a valid sub NAME |
2469
|
6
|
50
|
|
|
|
43
|
if ( |
2470
|
|
|
|
|
|
|
$input_line =~ m{\G\s* |
2471
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
2472
|
|
|
|
|
|
|
(\w+) # NAME - required |
2473
|
|
|
|
|
|
|
}gcx |
2474
|
|
|
|
|
|
|
) |
2475
|
|
|
|
|
|
|
{ |
2476
|
|
|
|
|
|
|
# For possible future use.. |
2477
|
6
|
|
|
|
|
15
|
my $subname = $2; |
2478
|
6
|
50
|
|
|
|
27
|
my $package = $1 ? $1 : EMPTY_STRING; |
2479
|
|
|
|
|
|
|
} |
2480
|
|
|
|
|
|
|
else { |
2481
|
0
|
|
|
|
|
0
|
return; |
2482
|
|
|
|
|
|
|
} |
2483
|
|
|
|
|
|
|
|
2484
|
|
|
|
|
|
|
# TEST 2: look for invalid characters after name, such as here: |
2485
|
|
|
|
|
|
|
# method paint => sub { |
2486
|
|
|
|
|
|
|
# ... |
2487
|
|
|
|
|
|
|
# } |
2488
|
6
|
|
|
|
|
14
|
my $next_char = EMPTY_STRING; |
2489
|
6
|
100
|
|
|
|
31
|
if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 } |
|
5
|
|
|
|
|
14
|
|
2490
|
6
|
100
|
66
|
|
|
38
|
if ( !$next_char || $next_char eq '#' ) { |
2491
|
1
|
|
|
|
|
5
|
( $next_char, my $i_next ) = |
2492
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
2493
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
2494
|
|
|
|
|
|
|
} |
2495
|
|
|
|
|
|
|
|
2496
|
6
|
50
|
|
|
|
21
|
if ( !$next_char ) { |
2497
|
|
|
|
|
|
|
|
2498
|
|
|
|
|
|
|
# out of characters - give up |
2499
|
0
|
|
|
|
|
0
|
return; |
2500
|
|
|
|
|
|
|
} |
2501
|
|
|
|
|
|
|
|
2502
|
|
|
|
|
|
|
# Possibly valid next token types: |
2503
|
|
|
|
|
|
|
# '(' could start prototype or signature |
2504
|
|
|
|
|
|
|
# ':' could start ATTRIBUTE |
2505
|
|
|
|
|
|
|
# '{' cold start BLOCK |
2506
|
|
|
|
|
|
|
# ';' or '}' could end a statement |
2507
|
6
|
100
|
|
|
|
30
|
if ( $next_char !~ /^[\(\:\{\;\}]/ ) { |
2508
|
|
|
|
|
|
|
|
2509
|
|
|
|
|
|
|
# This does not match use feature 'class' syntax |
2510
|
3
|
|
|
|
|
13
|
return; |
2511
|
|
|
|
|
|
|
} |
2512
|
|
|
|
|
|
|
|
2513
|
|
|
|
|
|
|
# We will stop here and assume that this is valid syntax for |
2514
|
|
|
|
|
|
|
# use feature 'class'. |
2515
|
3
|
|
|
|
|
18
|
return 1; |
2516
|
|
|
|
|
|
|
} ## end sub method_ok_here |
2517
|
|
|
|
|
|
|
|
2518
|
|
|
|
|
|
|
sub class_ok_here { |
2519
|
|
|
|
|
|
|
|
2520
|
|
|
|
|
|
|
# Return: |
2521
|
|
|
|
|
|
|
# false if this is definitely an invalid class declaration |
2522
|
|
|
|
|
|
|
# true otherwise (even if not sure) |
2523
|
|
|
|
|
|
|
|
2524
|
|
|
|
|
|
|
# We are trying to avoid problems with old uses of 'class' |
2525
|
|
|
|
|
|
|
# when --use-feature=class is set (rt145706). We look ahead |
2526
|
|
|
|
|
|
|
# see if this use of 'class' is obviously inconsistent with |
2527
|
|
|
|
|
|
|
# the syntax of use feature 'class'. This allows the default |
2528
|
|
|
|
|
|
|
# setting --use-feature=class to work for old syntax too. |
2529
|
|
|
|
|
|
|
|
2530
|
|
|
|
|
|
|
# Valid class declarations look like |
2531
|
|
|
|
|
|
|
# class NAME ?ATTRS ?VERSION ?BLOCK |
2532
|
|
|
|
|
|
|
# where ATTRS VERSION and BLOCK are optional |
2533
|
|
|
|
|
|
|
|
2534
|
|
|
|
|
|
|
# For example, this should produce a return of 'false': |
2535
|
|
|
|
|
|
|
# |
2536
|
|
|
|
|
|
|
# class ExtendsBasicAttributes is BasicAttributes{ |
2537
|
|
|
|
|
|
|
|
2538
|
6
|
|
|
6
|
0
|
13
|
my $self = shift; |
2539
|
|
|
|
|
|
|
|
2540
|
|
|
|
|
|
|
# TEST 1: class stmt can only go where a new statment can start |
2541
|
6
|
50
|
|
|
|
15
|
if ( !new_statement_ok() ) { return } |
|
0
|
|
|
|
|
0
|
|
2542
|
|
|
|
|
|
|
|
2543
|
6
|
|
|
|
|
14
|
my $i_beg = $i + 1; |
2544
|
6
|
|
|
|
|
12
|
my $pos_beg = $rtoken_map->[$i_beg]; |
2545
|
6
|
|
|
|
|
19
|
pos($input_line) = $pos_beg; |
2546
|
|
|
|
|
|
|
|
2547
|
|
|
|
|
|
|
# TEST 2: look for a valid NAME |
2548
|
6
|
50
|
|
|
|
36
|
if ( |
2549
|
|
|
|
|
|
|
$input_line =~ m{\G\s* |
2550
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
2551
|
|
|
|
|
|
|
(\w+) # NAME - required |
2552
|
|
|
|
|
|
|
}gcx |
2553
|
|
|
|
|
|
|
) |
2554
|
|
|
|
|
|
|
{ |
2555
|
|
|
|
|
|
|
# For possible future use.. |
2556
|
6
|
|
|
|
|
17
|
my $subname = $2; |
2557
|
6
|
100
|
|
|
|
20
|
my $package = $1 ? $1 : EMPTY_STRING; |
2558
|
|
|
|
|
|
|
} |
2559
|
|
|
|
|
|
|
else { |
2560
|
0
|
|
|
|
|
0
|
return; |
2561
|
|
|
|
|
|
|
} |
2562
|
|
|
|
|
|
|
|
2563
|
|
|
|
|
|
|
# TEST 3: look for valid characters after NAME |
2564
|
6
|
|
|
|
|
13
|
my $next_char = EMPTY_STRING; |
2565
|
6
|
100
|
|
|
|
23
|
if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 } |
|
5
|
|
|
|
|
11
|
|
2566
|
6
|
100
|
66
|
|
|
29
|
if ( !$next_char || $next_char eq '#' ) { |
2567
|
1
|
|
|
|
|
12
|
( $next_char, my $i_next ) = |
2568
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
2569
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
2570
|
|
|
|
|
|
|
} |
2571
|
6
|
50
|
|
|
|
16
|
if ( !$next_char ) { |
2572
|
|
|
|
|
|
|
|
2573
|
|
|
|
|
|
|
# out of characters - give up |
2574
|
0
|
|
|
|
|
0
|
return; |
2575
|
|
|
|
|
|
|
} |
2576
|
|
|
|
|
|
|
|
2577
|
|
|
|
|
|
|
# Must see one of: ATTRIBUTE, VERSION, BLOCK, or end stmt |
2578
|
|
|
|
|
|
|
|
2579
|
|
|
|
|
|
|
# Possibly valid next token types: |
2580
|
|
|
|
|
|
|
# ':' could start ATTRIBUTE |
2581
|
|
|
|
|
|
|
# '\d' could start VERSION |
2582
|
|
|
|
|
|
|
# '{' cold start BLOCK |
2583
|
|
|
|
|
|
|
# ';' could end a statement |
2584
|
|
|
|
|
|
|
# '}' could end statement but would be strange |
2585
|
|
|
|
|
|
|
|
2586
|
6
|
100
|
|
|
|
23
|
if ( $next_char !~ /^[\:\d\{\;\}]/ ) { |
2587
|
|
|
|
|
|
|
|
2588
|
|
|
|
|
|
|
# This does not match use feature 'class' syntax |
2589
|
2
|
|
|
|
|
10
|
return; |
2590
|
|
|
|
|
|
|
} |
2591
|
|
|
|
|
|
|
|
2592
|
|
|
|
|
|
|
# We will stop here and assume that this is valid syntax for |
2593
|
|
|
|
|
|
|
# use feature 'class'. |
2594
|
4
|
|
|
|
|
15
|
return 1; |
2595
|
|
|
|
|
|
|
} ## end sub class_ok_here |
2596
|
|
|
|
|
|
|
|
2597
|
|
|
|
|
|
|
sub scan_id { |
2598
|
332
|
|
|
332
|
0
|
690
|
my $self = shift; |
2599
|
332
|
|
|
|
|
1382
|
( $i, $tok, $type, $id_scan_state ) = |
2600
|
|
|
|
|
|
|
$self->scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, |
2601
|
|
|
|
|
|
|
$id_scan_state, $max_token_index ); |
2602
|
332
|
|
|
|
|
802
|
return; |
2603
|
|
|
|
|
|
|
} ## end sub scan_id |
2604
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
sub scan_number { |
2606
|
629
|
|
|
629
|
0
|
1069
|
my $self = shift; |
2607
|
629
|
|
|
|
|
949
|
my $number; |
2608
|
629
|
|
|
|
|
1826
|
( $i, $type, $number ) = |
2609
|
|
|
|
|
|
|
$self->scan_number_do( $input_line, $i, $rtoken_map, $type, |
2610
|
|
|
|
|
|
|
$max_token_index ); |
2611
|
629
|
|
|
|
|
1512
|
return $number; |
2612
|
|
|
|
|
|
|
} ## end sub scan_number |
2613
|
|
|
|
|
|
|
|
2614
|
39
|
|
|
39
|
|
373
|
use constant VERIFY_FASTNUM => 0; |
|
39
|
|
|
|
|
100
|
|
|
39
|
|
|
|
|
30580
|
|
2615
|
|
|
|
|
|
|
|
2616
|
|
|
|
|
|
|
sub scan_number_fast { |
2617
|
|
|
|
|
|
|
|
2618
|
|
|
|
|
|
|
# This is a wrapper for sub scan_number. It does a fast preliminary |
2619
|
|
|
|
|
|
|
# scan for a simple integer. It calls the original scan_number if it |
2620
|
|
|
|
|
|
|
# does not find one. |
2621
|
|
|
|
|
|
|
|
2622
|
2277
|
|
|
2277
|
0
|
3615
|
my $self = shift; |
2623
|
2277
|
|
|
|
|
3611
|
my $i_begin = $i; |
2624
|
2277
|
|
|
|
|
3574
|
my $tok_begin = $tok; |
2625
|
2277
|
|
|
|
|
3251
|
my $number; |
2626
|
|
|
|
|
|
|
|
2627
|
|
|
|
|
|
|
#--------------------------------- |
2628
|
|
|
|
|
|
|
# Quick check for (signed) integer |
2629
|
|
|
|
|
|
|
#--------------------------------- |
2630
|
|
|
|
|
|
|
|
2631
|
|
|
|
|
|
|
# This will be the string of digits: |
2632
|
2277
|
|
|
|
|
3723
|
my $i_d = $i; |
2633
|
2277
|
|
|
|
|
3626
|
my $tok_d = $tok; |
2634
|
2277
|
|
|
|
|
4030
|
my $typ_d = $rtoken_type->[$i_d]; |
2635
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
# check for signed integer |
2637
|
2277
|
|
|
|
|
3739
|
my $sign = EMPTY_STRING; |
2638
|
2277
|
50
|
66
|
|
|
7009
|
if ( $typ_d ne 'd' |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
2639
|
|
|
|
|
|
|
&& ( $typ_d eq '+' || $typ_d eq '-' ) |
2640
|
|
|
|
|
|
|
&& $i_d < $max_token_index ) |
2641
|
|
|
|
|
|
|
{ |
2642
|
343
|
|
|
|
|
581
|
$sign = $tok_d; |
2643
|
343
|
|
|
|
|
625
|
$i_d++; |
2644
|
343
|
|
|
|
|
646
|
$tok_d = $rtokens->[$i_d]; |
2645
|
343
|
|
|
|
|
647
|
$typ_d = $rtoken_type->[$i_d]; |
2646
|
|
|
|
|
|
|
} |
2647
|
|
|
|
|
|
|
|
2648
|
|
|
|
|
|
|
# Handle integers |
2649
|
2277
|
100
|
100
|
|
|
17023
|
if ( |
|
|
|
100
|
|
|
|
|
2650
|
|
|
|
|
|
|
$typ_d eq 'd' |
2651
|
|
|
|
|
|
|
&& ( |
2652
|
|
|
|
|
|
|
$i_d == $max_token_index |
2653
|
|
|
|
|
|
|
|| ( $i_d < $max_token_index |
2654
|
|
|
|
|
|
|
&& $rtoken_type->[ $i_d + 1 ] ne '.' |
2655
|
|
|
|
|
|
|
&& $rtoken_type->[ $i_d + 1 ] ne 'w' ) |
2656
|
|
|
|
|
|
|
) |
2657
|
|
|
|
|
|
|
) |
2658
|
|
|
|
|
|
|
{ |
2659
|
|
|
|
|
|
|
# Let let full scanner handle multi-digit integers beginning with |
2660
|
|
|
|
|
|
|
# '0' because there could be error messages. For example, '009' is |
2661
|
|
|
|
|
|
|
# not a valid number. |
2662
|
|
|
|
|
|
|
|
2663
|
1715
|
100
|
100
|
|
|
7373
|
if ( $tok_d eq '0' || substr( $tok_d, 0, 1 ) ne '0' ) { |
2664
|
1658
|
|
|
|
|
3124
|
$number = $sign . $tok_d; |
2665
|
1658
|
|
|
|
|
2696
|
$type = 'n'; |
2666
|
1658
|
|
|
|
|
4018
|
$i = $i_d; |
2667
|
|
|
|
|
|
|
} |
2668
|
|
|
|
|
|
|
} |
2669
|
|
|
|
|
|
|
|
2670
|
|
|
|
|
|
|
#-------------------------------------- |
2671
|
|
|
|
|
|
|
# Verify correctness during development |
2672
|
|
|
|
|
|
|
#-------------------------------------- |
2673
|
2277
|
|
|
|
|
3305
|
if ( VERIFY_FASTNUM && defined($number) ) { |
2674
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
# We will call the full method |
2676
|
|
|
|
|
|
|
my $type_simple = $type; |
2677
|
|
|
|
|
|
|
my $i_simple = $i; |
2678
|
|
|
|
|
|
|
my $number_simple = $number; |
2679
|
|
|
|
|
|
|
|
2680
|
|
|
|
|
|
|
$tok = $tok_begin; |
2681
|
|
|
|
|
|
|
$i = $i_begin; |
2682
|
|
|
|
|
|
|
$number = $self->scan_number(); |
2683
|
|
|
|
|
|
|
|
2684
|
|
|
|
|
|
|
if ( $type ne $type_simple |
2685
|
|
|
|
|
|
|
|| ( $i != $i_simple && $i <= $max_token_index ) |
2686
|
|
|
|
|
|
|
|| $number ne $number_simple ) |
2687
|
|
|
|
|
|
|
{ |
2688
|
|
|
|
|
|
|
print {*STDERR} <<EOM; |
2689
|
|
|
|
|
|
|
scan_number_fast differs from scan_number: |
2690
|
|
|
|
|
|
|
simple: i=$i_simple, type=$type_simple, number=$number_simple |
2691
|
|
|
|
|
|
|
full: i=$i, type=$type, number=$number |
2692
|
|
|
|
|
|
|
EOM |
2693
|
|
|
|
|
|
|
} |
2694
|
|
|
|
|
|
|
} |
2695
|
|
|
|
|
|
|
|
2696
|
|
|
|
|
|
|
#---------------------------------------- |
2697
|
|
|
|
|
|
|
# call full scanner if may not be integer |
2698
|
|
|
|
|
|
|
#---------------------------------------- |
2699
|
2277
|
100
|
|
|
|
5476
|
if ( !defined($number) ) { |
2700
|
619
|
|
|
|
|
1682
|
$number = $self->scan_number(); |
2701
|
|
|
|
|
|
|
} |
2702
|
2277
|
|
|
|
|
5480
|
return $number; |
2703
|
|
|
|
|
|
|
} ## end sub scan_number_fast |
2704
|
|
|
|
|
|
|
|
2705
|
|
|
|
|
|
|
# a sub to warn if token found where term expected |
2706
|
|
|
|
|
|
|
sub error_if_expecting_TERM { |
2707
|
9
|
|
|
9
|
0
|
21
|
my $self = shift; |
2708
|
9
|
50
|
|
|
|
43
|
if ( $expecting == TERM ) { |
2709
|
9
|
50
|
|
|
|
37
|
if ( $really_want_term{$last_nonblank_type} ) { |
2710
|
0
|
|
|
|
|
0
|
$self->report_unexpected( $tok, "term", $i_tok, |
2711
|
|
|
|
|
|
|
$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line ); |
2712
|
0
|
|
|
|
|
0
|
return 1; |
2713
|
|
|
|
|
|
|
} |
2714
|
|
|
|
|
|
|
} |
2715
|
9
|
|
|
|
|
21
|
return; |
2716
|
|
|
|
|
|
|
} ## end sub error_if_expecting_TERM |
2717
|
|
|
|
|
|
|
|
2718
|
|
|
|
|
|
|
# a sub to warn if token found where operator expected |
2719
|
|
|
|
|
|
|
sub error_if_expecting_OPERATOR { |
2720
|
769
|
|
|
769
|
0
|
1631
|
my ( $self, $thing ) = @_; |
2721
|
769
|
50
|
|
|
|
1829
|
if ( $expecting == OPERATOR ) { |
2722
|
0
|
0
|
|
|
|
0
|
if ( !defined($thing) ) { $thing = $tok } |
|
0
|
|
|
|
|
0
|
|
2723
|
0
|
|
|
|
|
0
|
$self->report_unexpected( $thing, "operator", $i_tok, |
2724
|
|
|
|
|
|
|
$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line ); |
2725
|
0
|
0
|
|
|
|
0
|
if ( $i_tok == 0 ) { |
2726
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
2727
|
0
|
|
|
|
|
0
|
$self->warning("Missing ';' or ',' above?\n"); |
2728
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
2729
|
|
|
|
|
|
|
} |
2730
|
0
|
|
|
|
|
0
|
return 1; |
2731
|
|
|
|
|
|
|
} |
2732
|
769
|
|
|
|
|
1491
|
return; |
2733
|
|
|
|
|
|
|
} ## end sub error_if_expecting_OPERATOR |
2734
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2736
|
|
|
|
|
|
|
# end scanner interfaces |
2737
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2738
|
|
|
|
|
|
|
|
2739
|
|
|
|
|
|
|
#------------------ |
2740
|
|
|
|
|
|
|
# Tokenization subs |
2741
|
|
|
|
|
|
|
#------------------ |
2742
|
|
|
|
|
|
|
sub do_GREATER_THAN_SIGN { |
2743
|
|
|
|
|
|
|
|
2744
|
31
|
|
|
31
|
0
|
92
|
my $self = shift; |
2745
|
|
|
|
|
|
|
|
2746
|
|
|
|
|
|
|
# '>' |
2747
|
31
|
50
|
|
|
|
118
|
$self->error_if_expecting_TERM() |
2748
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
2749
|
31
|
|
|
|
|
76
|
return; |
2750
|
|
|
|
|
|
|
} ## end sub do_GREATER_THAN_SIGN |
2751
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
sub do_VERTICAL_LINE { |
2753
|
|
|
|
|
|
|
|
2754
|
4
|
|
|
4
|
0
|
9
|
my $self = shift; |
2755
|
|
|
|
|
|
|
|
2756
|
|
|
|
|
|
|
# '|' |
2757
|
4
|
50
|
|
|
|
19
|
$self->error_if_expecting_TERM() |
2758
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
2759
|
4
|
|
|
|
|
11
|
return; |
2760
|
|
|
|
|
|
|
} ## end sub do_VERTICAL_LINE |
2761
|
|
|
|
|
|
|
|
2762
|
|
|
|
|
|
|
# An identifier in possible indirect object location followed by any of |
2763
|
|
|
|
|
|
|
# these tokens: -> , ; } (plus others) is not an indirect object. Fix c257. |
2764
|
|
|
|
|
|
|
my %Z_test_hash; |
2765
|
|
|
|
|
|
|
|
2766
|
|
|
|
|
|
|
BEGIN { |
2767
|
39
|
|
|
39
|
|
468
|
my @qZ = qw# |
2768
|
|
|
|
|
|
|
-> ; } ) ] |
2769
|
|
|
|
|
|
|
=> =~ = == !~ || >= != *= .. && |= .= -= += <= %= |
2770
|
|
|
|
|
|
|
^= &&= ||= //= <=> |
2771
|
|
|
|
|
|
|
#; |
2772
|
39
|
|
|
|
|
169
|
push @qZ, ','; |
2773
|
39
|
|
|
|
|
354211
|
@{Z_test_hash}{@qZ} = (1) x scalar(@qZ); |
2774
|
|
|
|
|
|
|
} |
2775
|
|
|
|
|
|
|
|
2776
|
|
|
|
|
|
|
sub do_DOLLAR_SIGN { |
2777
|
|
|
|
|
|
|
|
2778
|
4038
|
|
|
4038
|
0
|
7027
|
my $self = shift; |
2779
|
|
|
|
|
|
|
|
2780
|
|
|
|
|
|
|
# '$' |
2781
|
|
|
|
|
|
|
# start looking for a scalar |
2782
|
4038
|
50
|
|
|
|
9222
|
$self->error_if_expecting_OPERATOR("Scalar") |
2783
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
2784
|
4038
|
|
|
|
|
12372
|
$self->scan_simple_identifier(); |
2785
|
|
|
|
|
|
|
|
2786
|
4038
|
100
|
|
|
|
9436
|
if ( $identifier eq '$^W' ) { |
2787
|
1
|
|
|
|
|
4
|
$self->[_saw_perl_dash_w_] = 1; |
2788
|
|
|
|
|
|
|
} |
2789
|
|
|
|
|
|
|
|
2790
|
|
|
|
|
|
|
# Check for identifier in indirect object slot |
2791
|
|
|
|
|
|
|
# (vorboard.pl, sort.t). Something like: |
2792
|
|
|
|
|
|
|
# /^(print|printf|sort|exec|system)$/ |
2793
|
4038
|
100
|
66
|
|
|
32279
|
if ( |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
2794
|
|
|
|
|
|
|
$is_indirect_object_taker{$last_nonblank_token} |
2795
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'k' |
2796
|
|
|
|
|
|
|
|| ( ( $last_nonblank_token eq '(' ) |
2797
|
|
|
|
|
|
|
&& $is_indirect_object_taker{ $rparen_type->[$paren_depth] } ) |
2798
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'w' |
2799
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'U' ) # possible object |
2800
|
|
|
|
|
|
|
) |
2801
|
|
|
|
|
|
|
{ |
2802
|
|
|
|
|
|
|
|
2803
|
|
|
|
|
|
|
# An identifier followed by '->' is not indirect object; |
2804
|
|
|
|
|
|
|
# fixes b1175, b1176. Fix c257: Likewise for other tokens like |
2805
|
|
|
|
|
|
|
# comma, semicolon, closing brace, and single space. |
2806
|
98
|
|
|
|
|
683
|
my ( $next_nonblank_token, $i_next ) = |
2807
|
|
|
|
|
|
|
$self->find_next_noncomment_token( $i, $rtokens, |
2808
|
|
|
|
|
|
|
$max_token_index ); |
2809
|
98
|
100
|
|
|
|
435
|
$type = 'Z' if ( !$Z_test_hash{$next_nonblank_token} ); |
2810
|
|
|
|
|
|
|
} |
2811
|
4038
|
|
|
|
|
6741
|
return; |
2812
|
|
|
|
|
|
|
} ## end sub do_DOLLAR_SIGN |
2813
|
|
|
|
|
|
|
|
2814
|
|
|
|
|
|
|
sub do_LEFT_PARENTHESIS { |
2815
|
|
|
|
|
|
|
|
2816
|
2126
|
|
|
2126
|
0
|
4074
|
my $self = shift; |
2817
|
|
|
|
|
|
|
|
2818
|
|
|
|
|
|
|
# '(' |
2819
|
2126
|
|
|
|
|
3498
|
++$paren_depth; |
2820
|
|
|
|
|
|
|
|
2821
|
|
|
|
|
|
|
# variable to enable check for brace after closing paren (c230) |
2822
|
2126
|
|
|
|
|
3846
|
my $want_brace = EMPTY_STRING; |
2823
|
|
|
|
|
|
|
|
2824
|
2126
|
100
|
|
|
|
6613
|
if ($want_paren) { |
|
|
100
|
|
|
|
|
|
2825
|
240
|
|
|
|
|
591
|
$container_type = $want_paren; |
2826
|
240
|
|
|
|
|
585
|
$want_brace = $want_paren; |
2827
|
240
|
|
|
|
|
492
|
$want_paren = EMPTY_STRING; |
2828
|
|
|
|
|
|
|
} |
2829
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^sub\b/ ) { |
2830
|
14
|
|
|
|
|
35
|
$container_type = $statement_type; |
2831
|
|
|
|
|
|
|
} |
2832
|
|
|
|
|
|
|
else { |
2833
|
1872
|
|
|
|
|
3370
|
$container_type = $last_nonblank_token; |
2834
|
|
|
|
|
|
|
|
2835
|
|
|
|
|
|
|
# We can check for a syntax error here of unexpected '(', |
2836
|
|
|
|
|
|
|
# but this is going to get messy... |
2837
|
1872
|
100
|
100
|
|
|
7495
|
if ( |
2838
|
|
|
|
|
|
|
$expecting == OPERATOR |
2839
|
|
|
|
|
|
|
|
2840
|
|
|
|
|
|
|
# Be sure this is not a method call of the form |
2841
|
|
|
|
|
|
|
# &method(...), $method->(..), &{method}(...), |
2842
|
|
|
|
|
|
|
# $ref[2](list) is ok & short for $ref[2]->(list) |
2843
|
|
|
|
|
|
|
# NOTE: at present, braces in something like &{ xxx } |
2844
|
|
|
|
|
|
|
# are not marked as a block, we might have a method call. |
2845
|
|
|
|
|
|
|
# Added ')' to fix case c017, something like ()()() |
2846
|
|
|
|
|
|
|
&& $last_nonblank_token !~ /^(?:[\]\}\)\&]|\-\>)/ |
2847
|
|
|
|
|
|
|
) |
2848
|
|
|
|
|
|
|
{ |
2849
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
# ref: camel 3 p 703. |
2851
|
3
|
50
|
|
|
|
12
|
if ( $last_last_nonblank_token eq 'do' ) { |
2852
|
0
|
|
|
|
|
0
|
$self->complain( |
2853
|
|
|
|
|
|
|
"do SUBROUTINE is deprecated; consider & or -> notation\n" |
2854
|
|
|
|
|
|
|
); |
2855
|
|
|
|
|
|
|
} |
2856
|
|
|
|
|
|
|
else { |
2857
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
# if this is an empty list, (), then it is not an |
2859
|
|
|
|
|
|
|
# error; for example, we might have a constant pi and |
2860
|
|
|
|
|
|
|
# invoke it with pi() or just pi; |
2861
|
3
|
|
|
|
|
9
|
my ( $next_nonblank_token, $i_next ) = |
2862
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, |
2863
|
|
|
|
|
|
|
$max_token_index ); |
2864
|
|
|
|
|
|
|
|
2865
|
|
|
|
|
|
|
# Patch for c029: give up error check if |
2866
|
|
|
|
|
|
|
# a side comment follows |
2867
|
3
|
50
|
33
|
|
|
19
|
if ( $next_nonblank_token ne ')' |
2868
|
|
|
|
|
|
|
&& $next_nonblank_token ne '#' ) |
2869
|
|
|
|
|
|
|
{ |
2870
|
0
|
|
|
|
|
0
|
my $hint; |
2871
|
|
|
|
|
|
|
|
2872
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR('('); |
2873
|
|
|
|
|
|
|
|
2874
|
0
|
0
|
|
|
|
0
|
if ( $last_nonblank_type eq 'C' ) { |
|
|
0
|
|
|
|
|
|
2875
|
0
|
|
|
|
|
0
|
$hint = |
2876
|
|
|
|
|
|
|
"$last_nonblank_token has a void prototype\n"; |
2877
|
|
|
|
|
|
|
} |
2878
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'i' ) { |
2879
|
0
|
0
|
0
|
|
|
0
|
if ( $i_tok > 0 |
2880
|
|
|
|
|
|
|
&& $last_nonblank_token =~ /^\$/ ) |
2881
|
|
|
|
|
|
|
{ |
2882
|
0
|
|
|
|
|
0
|
$hint = |
2883
|
|
|
|
|
|
|
"Do you mean '$last_nonblank_token->(' ?\n"; |
2884
|
|
|
|
|
|
|
} |
2885
|
|
|
|
|
|
|
} |
2886
|
|
|
|
|
|
|
else { |
2887
|
|
|
|
|
|
|
## no hint |
2888
|
|
|
|
|
|
|
} |
2889
|
0
|
0
|
|
|
|
0
|
if ($hint) { |
2890
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
2891
|
0
|
|
|
|
|
0
|
$self->warning($hint); |
2892
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
2893
|
|
|
|
|
|
|
} |
2894
|
|
|
|
|
|
|
} ## end if ( $next_nonblank_token... |
2895
|
|
|
|
|
|
|
} ## end else [ if ( $last_last_nonblank_token... |
2896
|
|
|
|
|
|
|
} ## end if ( $expecting == OPERATOR... |
2897
|
|
|
|
|
|
|
} |
2898
|
|
|
|
|
|
|
|
2899
|
2126
|
|
|
|
|
7126
|
( $type_sequence, $indent_flag ) = |
2900
|
|
|
|
|
|
|
$self->increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); |
2901
|
|
|
|
|
|
|
|
2902
|
|
|
|
|
|
|
# propagate types down through nested parens |
2903
|
|
|
|
|
|
|
# for example: the second paren in 'if ((' would be structural |
2904
|
|
|
|
|
|
|
# since the first is. |
2905
|
|
|
|
|
|
|
|
2906
|
2126
|
100
|
|
|
|
5623
|
if ( $last_nonblank_token eq '(' ) { |
2907
|
61
|
|
|
|
|
240
|
$type = $last_nonblank_type; |
2908
|
|
|
|
|
|
|
} |
2909
|
|
|
|
|
|
|
|
2910
|
|
|
|
|
|
|
# We exclude parens as structural after a ',' because it |
2911
|
|
|
|
|
|
|
# causes subtle problems with continuation indentation for |
2912
|
|
|
|
|
|
|
# something like this, where the first 'or' will not get |
2913
|
|
|
|
|
|
|
# indented. |
2914
|
|
|
|
|
|
|
# |
2915
|
|
|
|
|
|
|
# assert( |
2916
|
|
|
|
|
|
|
# __LINE__, |
2917
|
|
|
|
|
|
|
# ( not defined $check ) |
2918
|
|
|
|
|
|
|
# or ref $check |
2919
|
|
|
|
|
|
|
# or $check eq "new" |
2920
|
|
|
|
|
|
|
# or $check eq "old", |
2921
|
|
|
|
|
|
|
# ); |
2922
|
|
|
|
|
|
|
# |
2923
|
|
|
|
|
|
|
# Likewise, we exclude parens where a statement can start |
2924
|
|
|
|
|
|
|
# because of problems with continuation indentation, like |
2925
|
|
|
|
|
|
|
# these: |
2926
|
|
|
|
|
|
|
# |
2927
|
|
|
|
|
|
|
# ($firstline =~ /^#\!.*perl/) |
2928
|
|
|
|
|
|
|
# and (print $File::Find::name, "\n") |
2929
|
|
|
|
|
|
|
# and (return 1); |
2930
|
|
|
|
|
|
|
# |
2931
|
|
|
|
|
|
|
# (ref($usage_fref) =~ /CODE/) |
2932
|
|
|
|
|
|
|
# ? &$usage_fref |
2933
|
|
|
|
|
|
|
# : (&blast_usage, &blast_params, &blast_general_params); |
2934
|
|
|
|
|
|
|
|
2935
|
|
|
|
|
|
|
else { |
2936
|
2065
|
|
|
|
|
3606
|
$type = '{'; |
2937
|
|
|
|
|
|
|
} |
2938
|
|
|
|
|
|
|
|
2939
|
2126
|
50
|
|
|
|
5182
|
if ( $last_nonblank_type eq ')' ) { |
2940
|
0
|
|
|
|
|
0
|
$self->warning( |
2941
|
|
|
|
|
|
|
"Syntax error? found token '$last_nonblank_type' then '('\n"); |
2942
|
|
|
|
|
|
|
} |
2943
|
|
|
|
|
|
|
|
2944
|
|
|
|
|
|
|
# git #105: Copy container type and want-brace flag at ') ('; |
2945
|
|
|
|
|
|
|
# propagate the container type onward so that any subsequent brace gets |
2946
|
|
|
|
|
|
|
# correctly marked. I have implemented this as a general rule, which |
2947
|
|
|
|
|
|
|
# should be safe, but if necessary it could be restricted to certain |
2948
|
|
|
|
|
|
|
# container statement types such as 'for'. |
2949
|
2126
|
100
|
|
|
|
4882
|
if ( $last_nonblank_token eq ')' ) { |
2950
|
1
|
|
|
|
|
3
|
my $rvars = $rparen_vars->[$paren_depth]; |
2951
|
1
|
50
|
|
|
|
5
|
if ( defined($rvars) ) { |
2952
|
1
|
|
|
|
|
5
|
$container_type = $rparen_type->[$paren_depth]; |
2953
|
1
|
|
|
|
|
2
|
( my $type_lp, $want_brace ) = @{$rvars}; |
|
1
|
|
|
|
|
8
|
|
2954
|
|
|
|
|
|
|
} |
2955
|
|
|
|
|
|
|
} |
2956
|
|
|
|
|
|
|
|
2957
|
2126
|
|
|
|
|
4378
|
$rparen_type->[$paren_depth] = $container_type; |
2958
|
2126
|
|
|
|
|
5772
|
$rparen_vars->[$paren_depth] = [ $type, $want_brace ]; |
2959
|
2126
|
|
|
|
|
4062
|
$rparen_semicolon_count->[$paren_depth] = 0; |
2960
|
|
|
|
|
|
|
|
2961
|
2126
|
|
|
|
|
3769
|
return; |
2962
|
|
|
|
|
|
|
|
2963
|
|
|
|
|
|
|
} ## end sub do_LEFT_PARENTHESIS |
2964
|
|
|
|
|
|
|
|
2965
|
|
|
|
|
|
|
sub do_RIGHT_PARENTHESIS { |
2966
|
|
|
|
|
|
|
|
2967
|
2126
|
|
|
2126
|
0
|
4292
|
my $self = shift; |
2968
|
|
|
|
|
|
|
|
2969
|
|
|
|
|
|
|
# ')' |
2970
|
2126
|
|
|
|
|
7302
|
( $type_sequence, $indent_flag ) = |
2971
|
|
|
|
|
|
|
$self->decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); |
2972
|
|
|
|
|
|
|
|
2973
|
2126
|
|
|
|
|
4775
|
my $rvars = $rparen_vars->[$paren_depth]; |
2974
|
2126
|
50
|
|
|
|
5588
|
if ( defined($rvars) ) { |
2975
|
2126
|
|
|
|
|
3456
|
my ( $type_lp, $want_brace ) = @{$rvars}; |
|
2126
|
|
|
|
|
4868
|
|
2976
|
2126
|
50
|
33
|
|
|
8624
|
if ( $type_lp && $type_lp eq '{' ) { |
2977
|
2126
|
|
|
|
|
4039
|
$type = '}'; |
2978
|
|
|
|
|
|
|
} |
2979
|
|
|
|
|
|
|
} |
2980
|
|
|
|
|
|
|
|
2981
|
2126
|
|
|
|
|
3895
|
$container_type = $rparen_type->[$paren_depth]; |
2982
|
|
|
|
|
|
|
|
2983
|
|
|
|
|
|
|
# restore statement type as 'sub' at closing paren of a signature |
2984
|
|
|
|
|
|
|
# so that a subsequent ':' is identified as an attribute |
2985
|
2126
|
100
|
|
|
|
6463
|
if ( $container_type =~ /^sub\b/ ) { |
2986
|
24
|
|
|
|
|
58
|
$statement_type = $container_type; |
2987
|
|
|
|
|
|
|
} |
2988
|
|
|
|
|
|
|
|
2989
|
|
|
|
|
|
|
# /^(for|foreach)$/ |
2990
|
2126
|
100
|
|
|
|
5915
|
if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) { |
2991
|
69
|
|
|
|
|
235
|
my $num_sc = $rparen_semicolon_count->[$paren_depth]; |
2992
|
69
|
50
|
66
|
|
|
439
|
if ( $num_sc > 0 && $num_sc != 2 ) { |
2993
|
0
|
|
|
|
|
0
|
$self->warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); |
2994
|
|
|
|
|
|
|
} |
2995
|
|
|
|
|
|
|
} |
2996
|
|
|
|
|
|
|
|
2997
|
2126
|
50
|
|
|
|
5220
|
if ( $paren_depth > 0 ) { $paren_depth-- } |
|
2126
|
|
|
|
|
3343
|
|
2998
|
2126
|
|
|
|
|
4166
|
return; |
2999
|
|
|
|
|
|
|
} ## end sub do_RIGHT_PARENTHESIS |
3000
|
|
|
|
|
|
|
|
3001
|
|
|
|
|
|
|
sub do_COMMA { |
3002
|
|
|
|
|
|
|
|
3003
|
3075
|
|
|
3075
|
0
|
5316
|
my $self = shift; |
3004
|
|
|
|
|
|
|
|
3005
|
|
|
|
|
|
|
# ',' |
3006
|
3075
|
100
|
33
|
|
|
10751
|
if ( $last_nonblank_type eq ',' ) { |
|
|
50
|
|
|
|
|
|
3007
|
10
|
|
|
|
|
39
|
$self->complain("Repeated ','s \n"); |
3008
|
|
|
|
|
|
|
} |
3009
|
|
|
|
|
|
|
|
3010
|
|
|
|
|
|
|
# Note that we have to check both token and type here because a |
3011
|
|
|
|
|
|
|
# comma following a qw list can have last token='(' but type = 'q' |
3012
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) { |
3013
|
0
|
|
|
|
|
0
|
$self->warning("Unexpected leading ',' after a '('\n"); |
3014
|
|
|
|
|
|
|
} |
3015
|
|
|
|
|
|
|
else { |
3016
|
|
|
|
|
|
|
## ok: no complaints needed |
3017
|
|
|
|
|
|
|
} |
3018
|
|
|
|
|
|
|
|
3019
|
|
|
|
|
|
|
# patch for operator_expected: note if we are in the list (use.t) |
3020
|
3075
|
100
|
|
|
|
6436
|
if ( $statement_type eq 'use' ) { $statement_type = '_use' } |
|
6
|
|
|
|
|
15
|
|
3021
|
3075
|
|
|
|
|
4920
|
return; |
3022
|
|
|
|
|
|
|
|
3023
|
|
|
|
|
|
|
} ## end sub do_COMMA |
3024
|
|
|
|
|
|
|
|
3025
|
|
|
|
|
|
|
sub do_SEMICOLON { |
3026
|
|
|
|
|
|
|
|
3027
|
2450
|
|
|
2450
|
0
|
4939
|
my $self = shift; |
3028
|
|
|
|
|
|
|
|
3029
|
|
|
|
|
|
|
# ';' |
3030
|
2450
|
|
|
|
|
4403
|
$context = UNKNOWN_CONTEXT; |
3031
|
2450
|
|
|
|
|
3952
|
$statement_type = EMPTY_STRING; |
3032
|
2450
|
|
|
|
|
4230
|
$want_paren = EMPTY_STRING; |
3033
|
|
|
|
|
|
|
|
3034
|
|
|
|
|
|
|
# /^(for|foreach)$/ |
3035
|
2450
|
100
|
|
|
|
7397
|
if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) |
3036
|
|
|
|
|
|
|
{ # mark ; in for loop |
3037
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
# Be careful: we do not want a semicolon such as the |
3039
|
|
|
|
|
|
|
# following to be included: |
3040
|
|
|
|
|
|
|
# |
3041
|
|
|
|
|
|
|
# for (sort {strcoll($a,$b);} keys %investments) { |
3042
|
|
|
|
|
|
|
|
3043
|
35
|
100
|
66
|
|
|
274
|
if ( $brace_depth == $rdepth_array->[PAREN][BRACE][$paren_depth] |
3044
|
|
|
|
|
|
|
&& $square_bracket_depth == |
3045
|
|
|
|
|
|
|
$rdepth_array->[PAREN][SQUARE_BRACKET][$paren_depth] ) |
3046
|
|
|
|
|
|
|
{ |
3047
|
|
|
|
|
|
|
|
3048
|
34
|
|
|
|
|
78
|
$type = 'f'; |
3049
|
34
|
|
|
|
|
66
|
$rparen_semicolon_count->[$paren_depth]++; |
3050
|
|
|
|
|
|
|
} |
3051
|
|
|
|
|
|
|
} |
3052
|
2450
|
|
|
|
|
4092
|
return; |
3053
|
|
|
|
|
|
|
} ## end sub do_SEMICOLON |
3054
|
|
|
|
|
|
|
|
3055
|
|
|
|
|
|
|
sub do_QUOTATION_MARK { |
3056
|
|
|
|
|
|
|
|
3057
|
1125
|
|
|
1125
|
0
|
2302
|
my $self = shift; |
3058
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
# '"' |
3060
|
1125
|
50
|
|
|
|
4079
|
$self->error_if_expecting_OPERATOR("String") |
3061
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3062
|
1125
|
|
|
|
|
1960
|
$in_quote = 1; |
3063
|
1125
|
|
|
|
|
1914
|
$type = 'Q'; |
3064
|
1125
|
|
|
|
|
1846
|
$allowed_quote_modifiers = EMPTY_STRING; |
3065
|
1125
|
|
|
|
|
1862
|
return; |
3066
|
|
|
|
|
|
|
} ## end sub do_QUOTATION_MARK |
3067
|
|
|
|
|
|
|
|
3068
|
|
|
|
|
|
|
sub do_APOSTROPHE { |
3069
|
|
|
|
|
|
|
|
3070
|
1164
|
|
|
1164
|
0
|
2318
|
my $self = shift; |
3071
|
|
|
|
|
|
|
|
3072
|
|
|
|
|
|
|
# "'" |
3073
|
1164
|
50
|
|
|
|
2905
|
$self->error_if_expecting_OPERATOR("String") |
3074
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3075
|
1164
|
|
|
|
|
1931
|
$in_quote = 1; |
3076
|
1164
|
|
|
|
|
1915
|
$type = 'Q'; |
3077
|
1164
|
|
|
|
|
1829
|
$allowed_quote_modifiers = EMPTY_STRING; |
3078
|
1164
|
|
|
|
|
1832
|
return; |
3079
|
|
|
|
|
|
|
} ## end sub do_APOSTROPHE |
3080
|
|
|
|
|
|
|
|
3081
|
|
|
|
|
|
|
sub do_BACKTICK { |
3082
|
|
|
|
|
|
|
|
3083
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
3084
|
|
|
|
|
|
|
|
3085
|
|
|
|
|
|
|
# '`' |
3086
|
0
|
0
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR("String") |
3087
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3088
|
0
|
|
|
|
|
0
|
$in_quote = 1; |
3089
|
0
|
|
|
|
|
0
|
$type = 'Q'; |
3090
|
0
|
|
|
|
|
0
|
$allowed_quote_modifiers = EMPTY_STRING; |
3091
|
0
|
|
|
|
|
0
|
return; |
3092
|
|
|
|
|
|
|
} ## end sub do_BACKTICK |
3093
|
|
|
|
|
|
|
|
3094
|
|
|
|
|
|
|
sub do_SLASH { |
3095
|
|
|
|
|
|
|
|
3096
|
207
|
|
|
207
|
0
|
505
|
my $self = shift; |
3097
|
|
|
|
|
|
|
|
3098
|
|
|
|
|
|
|
# '/' |
3099
|
207
|
|
|
|
|
364
|
my $is_pattern; |
3100
|
|
|
|
|
|
|
|
3101
|
|
|
|
|
|
|
# a pattern cannot follow certain keywords which take optional |
3102
|
|
|
|
|
|
|
# arguments, like 'shift' and 'pop'. See also '?'. |
3103
|
207
|
50
|
66
|
|
|
995
|
if ( |
|
|
50
|
|
|
|
|
|
3104
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
3105
|
|
|
|
|
|
|
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ |
3106
|
|
|
|
|
|
|
$last_nonblank_token} |
3107
|
|
|
|
|
|
|
) |
3108
|
|
|
|
|
|
|
{ |
3109
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
3110
|
|
|
|
|
|
|
} |
3111
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess.. |
3112
|
0
|
|
|
|
|
0
|
my $msg; |
3113
|
0
|
|
|
|
|
0
|
( $is_pattern, $msg ) = |
3114
|
|
|
|
|
|
|
$self->guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, |
3115
|
|
|
|
|
|
|
$max_token_index ); |
3116
|
|
|
|
|
|
|
|
3117
|
0
|
0
|
|
|
|
0
|
if ($msg) { |
3118
|
0
|
|
|
|
|
0
|
$self->write_diagnostics("DIVIDE:$msg\n"); |
3119
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry($msg); |
3120
|
|
|
|
|
|
|
} |
3121
|
|
|
|
|
|
|
} |
3122
|
207
|
|
|
|
|
435
|
else { $is_pattern = ( $expecting == TERM ) } |
3123
|
|
|
|
|
|
|
|
3124
|
207
|
100
|
|
|
|
512
|
if ($is_pattern) { |
3125
|
78
|
|
|
|
|
148
|
$in_quote = 1; |
3126
|
78
|
|
|
|
|
170
|
$type = 'Q'; |
3127
|
78
|
|
|
|
|
149
|
$allowed_quote_modifiers = '[msixpodualngc]'; |
3128
|
|
|
|
|
|
|
} |
3129
|
|
|
|
|
|
|
else { # not a pattern; check for a /= token |
3130
|
|
|
|
|
|
|
|
3131
|
129
|
50
|
|
|
|
371
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /= |
3132
|
0
|
|
|
|
|
0
|
$i++; |
3133
|
0
|
|
|
|
|
0
|
$tok = '/='; |
3134
|
0
|
|
|
|
|
0
|
$type = $tok; |
3135
|
|
|
|
|
|
|
} |
3136
|
|
|
|
|
|
|
|
3137
|
|
|
|
|
|
|
#DEBUG - collecting info on what tokens follow a divide |
3138
|
|
|
|
|
|
|
# for development of guessing algorithm |
3139
|
|
|
|
|
|
|
## if ( |
3140
|
|
|
|
|
|
|
## $self->is_possible_numerator( $i, $rtokens, |
3141
|
|
|
|
|
|
|
## $max_token_index ) < 0 |
3142
|
|
|
|
|
|
|
## ) |
3143
|
|
|
|
|
|
|
## { |
3144
|
|
|
|
|
|
|
## $self->write_diagnostics("DIVIDE? $input_line\n"); |
3145
|
|
|
|
|
|
|
## } |
3146
|
|
|
|
|
|
|
} |
3147
|
207
|
|
|
|
|
412
|
return; |
3148
|
|
|
|
|
|
|
} ## end sub do_SLASH |
3149
|
|
|
|
|
|
|
|
3150
|
|
|
|
|
|
|
sub do_LEFT_CURLY_BRACKET { |
3151
|
|
|
|
|
|
|
|
3152
|
1670
|
|
|
1670
|
0
|
3516
|
my $self = shift; |
3153
|
|
|
|
|
|
|
|
3154
|
|
|
|
|
|
|
# '{' |
3155
|
|
|
|
|
|
|
# if we just saw a ')', we will label this block with |
3156
|
|
|
|
|
|
|
# its type. We need to do this to allow sub |
3157
|
|
|
|
|
|
|
# code_block_type to determine if this brace starts a |
3158
|
|
|
|
|
|
|
# code block or anonymous hash. (The type of a paren |
3159
|
|
|
|
|
|
|
# pair is the preceding token, such as 'if', 'else', |
3160
|
|
|
|
|
|
|
# etc). |
3161
|
1670
|
|
|
|
|
3068
|
$container_type = EMPTY_STRING; |
3162
|
|
|
|
|
|
|
|
3163
|
|
|
|
|
|
|
# ATTRS: for a '{' following an attribute list, reset |
3164
|
|
|
|
|
|
|
# things to look like we just saw a sub name |
3165
|
|
|
|
|
|
|
# Added 'package' (can be 'class') for --use-feature=class (rt145706) |
3166
|
1670
|
100
|
100
|
|
|
16337
|
if ( substr( $statement_type, 0, 3 ) eq 'sub' ) { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
3167
|
34
|
|
|
|
|
73
|
$last_nonblank_token = $statement_type; |
3168
|
34
|
|
|
|
|
74
|
$last_nonblank_type = 'S'; # c250 change |
3169
|
34
|
|
|
|
|
73
|
$statement_type = EMPTY_STRING; |
3170
|
|
|
|
|
|
|
} |
3171
|
|
|
|
|
|
|
elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) { |
3172
|
4
|
|
|
|
|
6
|
$last_nonblank_token = $statement_type; |
3173
|
4
|
|
|
|
|
8
|
$last_nonblank_type = 'P'; # c250 change |
3174
|
4
|
|
|
|
|
6
|
$statement_type = EMPTY_STRING; |
3175
|
|
|
|
|
|
|
} |
3176
|
|
|
|
|
|
|
|
3177
|
|
|
|
|
|
|
# patch for SWITCH/CASE: hide these keywords from an immediately |
3178
|
|
|
|
|
|
|
# following opening brace |
3179
|
|
|
|
|
|
|
elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) |
3180
|
|
|
|
|
|
|
&& $statement_type eq $last_nonblank_token ) |
3181
|
|
|
|
|
|
|
{ |
3182
|
0
|
|
|
|
|
0
|
$last_nonblank_token = ";"; |
3183
|
|
|
|
|
|
|
} |
3184
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq ')' ) { |
3186
|
242
|
|
|
|
|
745
|
$last_nonblank_token = $rparen_type->[ $paren_depth + 1 ]; |
3187
|
|
|
|
|
|
|
|
3188
|
|
|
|
|
|
|
# defensive move in case of a nesting error (pbug.t) |
3189
|
|
|
|
|
|
|
# in which this ')' had no previous '(' |
3190
|
|
|
|
|
|
|
# this nesting error will have been caught |
3191
|
242
|
50
|
|
|
|
817
|
if ( !defined($last_nonblank_token) ) { |
3192
|
0
|
|
|
|
|
0
|
$last_nonblank_token = 'if'; |
3193
|
|
|
|
|
|
|
} |
3194
|
|
|
|
|
|
|
|
3195
|
|
|
|
|
|
|
# Syntax check at '){' |
3196
|
242
|
100
|
|
|
|
818
|
if ( $is_blocktype_with_paren{$last_nonblank_token} ) { |
3197
|
|
|
|
|
|
|
|
3198
|
228
|
|
|
|
|
596
|
my $rvars = $rparen_vars->[ $paren_depth + 1 ]; |
3199
|
228
|
50
|
|
|
|
756
|
if ( defined($rvars) ) { |
3200
|
228
|
|
|
|
|
469
|
my ( $type_lp, $want_brace ) = @{$rvars}; |
|
228
|
|
|
|
|
879
|
|
3201
|
|
|
|
|
|
|
|
3202
|
|
|
|
|
|
|
# OLD: Now verify that this is not a trailing form |
3203
|
|
|
|
|
|
|
# FIX for git #124: we have to skip this check because |
3204
|
|
|
|
|
|
|
# the 'gather' keyword of List::Gather can operate on |
3205
|
|
|
|
|
|
|
# a full statement, so it isn't possible to be sure |
3206
|
|
|
|
|
|
|
# this is a trailing form. |
3207
|
228
|
|
|
|
|
466
|
if ( 0 && !$want_brace ) { |
3208
|
|
|
|
|
|
|
$self->warning( |
3209
|
|
|
|
|
|
|
"syntax error at ') {', unexpected '{' after closing ')' of a trailing '$last_nonblank_token'\n" |
3210
|
|
|
|
|
|
|
); |
3211
|
|
|
|
|
|
|
} |
3212
|
|
|
|
|
|
|
} |
3213
|
|
|
|
|
|
|
} |
3214
|
|
|
|
|
|
|
else { |
3215
|
14
|
50
|
|
|
|
63
|
if ($rOpts_extended_syntax) { |
3216
|
|
|
|
|
|
|
|
3217
|
|
|
|
|
|
|
# we append a trailing () to mark this as an unknown |
3218
|
|
|
|
|
|
|
# block type. This allows perltidy to format some |
3219
|
|
|
|
|
|
|
# common extensions of perl syntax. |
3220
|
|
|
|
|
|
|
# This is used by sub code_block_type |
3221
|
14
|
|
|
|
|
60
|
$last_nonblank_token .= '()'; |
3222
|
|
|
|
|
|
|
} |
3223
|
|
|
|
|
|
|
else { |
3224
|
0
|
|
|
|
|
0
|
my $list = |
3225
|
|
|
|
|
|
|
join( SPACE, sort keys %is_blocktype_with_paren ); |
3226
|
0
|
|
|
|
|
0
|
$self->warning( |
3227
|
|
|
|
|
|
|
"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n" |
3228
|
|
|
|
|
|
|
); |
3229
|
|
|
|
|
|
|
} |
3230
|
|
|
|
|
|
|
} |
3231
|
|
|
|
|
|
|
} |
3232
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
# patch for paren-less for/foreach glitch, part 2. |
3234
|
|
|
|
|
|
|
# see note below under 'qw' |
3235
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq 'qw' |
3236
|
|
|
|
|
|
|
&& $is_for_foreach{$want_paren} ) |
3237
|
|
|
|
|
|
|
{ |
3238
|
0
|
|
|
|
|
0
|
$last_nonblank_token = $want_paren; |
3239
|
0
|
0
|
|
|
|
0
|
if ( $last_last_nonblank_token eq $want_paren ) { |
3240
|
0
|
|
|
|
|
0
|
$self->warning( |
3241
|
|
|
|
|
|
|
"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" |
3242
|
|
|
|
|
|
|
); |
3243
|
|
|
|
|
|
|
|
3244
|
|
|
|
|
|
|
} |
3245
|
0
|
|
|
|
|
0
|
$want_paren = EMPTY_STRING; |
3246
|
|
|
|
|
|
|
} |
3247
|
|
|
|
|
|
|
else { |
3248
|
|
|
|
|
|
|
## ok: not special |
3249
|
|
|
|
|
|
|
} |
3250
|
|
|
|
|
|
|
|
3251
|
|
|
|
|
|
|
# now identify which of the three possible types of |
3252
|
|
|
|
|
|
|
# curly braces we have: hash index container, anonymous |
3253
|
|
|
|
|
|
|
# hash reference, or code block. |
3254
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
# non-structural (hash index) curly brace pair |
3256
|
|
|
|
|
|
|
# get marked 'L' and 'R' |
3257
|
1670
|
100
|
|
|
|
4904
|
if ( is_non_structural_brace() ) { |
3258
|
367
|
|
|
|
|
1403
|
$type = 'L'; |
3259
|
|
|
|
|
|
|
|
3260
|
|
|
|
|
|
|
# patch for SWITCH/CASE: |
3261
|
|
|
|
|
|
|
# allow paren-less identifier after 'when' |
3262
|
|
|
|
|
|
|
# if the brace is preceded by a space |
3263
|
367
|
0
|
33
|
|
|
1459
|
if ( $statement_type eq 'when' |
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
3264
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'i' |
3265
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'k' |
3266
|
|
|
|
|
|
|
&& ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) |
3267
|
|
|
|
|
|
|
{ |
3268
|
0
|
|
|
|
|
0
|
$type = '{'; |
3269
|
0
|
|
|
|
|
0
|
$block_type = $statement_type; |
3270
|
|
|
|
|
|
|
} |
3271
|
|
|
|
|
|
|
} |
3272
|
|
|
|
|
|
|
|
3273
|
|
|
|
|
|
|
# code and anonymous hash have the same type, '{', but are |
3274
|
|
|
|
|
|
|
# distinguished by 'block_type', |
3275
|
|
|
|
|
|
|
# which will be blank for an anonymous hash |
3276
|
|
|
|
|
|
|
else { |
3277
|
|
|
|
|
|
|
|
3278
|
1303
|
|
|
|
|
4884
|
$block_type = |
3279
|
|
|
|
|
|
|
$self->code_block_type( $i_tok, $rtokens, $rtoken_type, |
3280
|
|
|
|
|
|
|
$max_token_index ); |
3281
|
|
|
|
|
|
|
|
3282
|
|
|
|
|
|
|
# patch to promote bareword type to function taking block |
3283
|
1303
|
100
|
100
|
|
|
6136
|
if ( $block_type |
|
|
|
66
|
|
|
|
|
3284
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
3285
|
|
|
|
|
|
|
&& $last_nonblank_i >= 0 ) |
3286
|
|
|
|
|
|
|
{ |
3287
|
34
|
50
|
|
|
|
164
|
if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { |
3288
|
|
|
|
|
|
|
$routput_token_type->[$last_nonblank_i] = |
3289
|
34
|
100
|
|
|
|
148
|
$is_grep_alias{$block_type} ? 'k' : 'G'; |
3290
|
|
|
|
|
|
|
} |
3291
|
|
|
|
|
|
|
} |
3292
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
# patch for SWITCH/CASE: if we find a stray opening block brace |
3294
|
|
|
|
|
|
|
# where we might accept a 'case' or 'when' block, then take it |
3295
|
1303
|
100
|
100
|
|
|
5462
|
if ( $statement_type eq 'case' |
3296
|
|
|
|
|
|
|
|| $statement_type eq 'when' ) |
3297
|
|
|
|
|
|
|
{ |
3298
|
38
|
100
|
66
|
|
|
221
|
if ( !$block_type || $block_type eq '}' ) { |
3299
|
4
|
|
|
|
|
7
|
$block_type = $statement_type; |
3300
|
|
|
|
|
|
|
} |
3301
|
|
|
|
|
|
|
} |
3302
|
|
|
|
|
|
|
} |
3303
|
|
|
|
|
|
|
|
3304
|
1670
|
|
|
|
|
3983
|
$rbrace_type->[ ++$brace_depth ] = $block_type; |
3305
|
|
|
|
|
|
|
|
3306
|
|
|
|
|
|
|
# Patch for CLASS BLOCK definitions: do not update the package for the |
3307
|
|
|
|
|
|
|
# current depth if this is a BLOCK type definition. |
3308
|
|
|
|
|
|
|
# TODO: should make 'class' separate from 'package' and only do |
3309
|
|
|
|
|
|
|
# this for 'class' |
3310
|
1670
|
100
|
|
|
|
5596
|
$rbrace_package->[$brace_depth] = $current_package |
3311
|
|
|
|
|
|
|
if ( substr( $block_type, 0, 8 ) ne 'package ' ); |
3312
|
|
|
|
|
|
|
|
3313
|
1670
|
|
|
|
|
3507
|
$rbrace_structural_type->[$brace_depth] = $type; |
3314
|
1670
|
|
|
|
|
3221
|
$rbrace_context->[$brace_depth] = $context; |
3315
|
1670
|
|
|
|
|
4958
|
( $type_sequence, $indent_flag ) = |
3316
|
|
|
|
|
|
|
$self->increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); |
3317
|
1670
|
|
|
|
|
3277
|
return; |
3318
|
|
|
|
|
|
|
} ## end sub do_LEFT_CURLY_BRACKET |
3319
|
|
|
|
|
|
|
|
3320
|
|
|
|
|
|
|
sub do_RIGHT_CURLY_BRACKET { |
3321
|
|
|
|
|
|
|
|
3322
|
1670
|
|
|
1670
|
0
|
3481
|
my $self = shift; |
3323
|
|
|
|
|
|
|
|
3324
|
|
|
|
|
|
|
# '}' |
3325
|
1670
|
|
|
|
|
3769
|
$block_type = $rbrace_type->[$brace_depth]; |
3326
|
1670
|
100
|
|
|
|
4162
|
if ($block_type) { $statement_type = EMPTY_STRING } |
|
974
|
|
|
|
|
1918
|
|
3327
|
1670
|
100
|
|
|
|
4031
|
if ( defined( $rbrace_package->[$brace_depth] ) ) { |
3328
|
1666
|
|
|
|
|
3232
|
$current_package = $rbrace_package->[$brace_depth]; |
3329
|
|
|
|
|
|
|
} |
3330
|
|
|
|
|
|
|
|
3331
|
|
|
|
|
|
|
# can happen on brace error (caught elsewhere) |
3332
|
|
|
|
|
|
|
else { |
3333
|
|
|
|
|
|
|
} |
3334
|
1670
|
|
|
|
|
5174
|
( $type_sequence, $indent_flag ) = |
3335
|
|
|
|
|
|
|
$self->decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); |
3336
|
|
|
|
|
|
|
|
3337
|
1670
|
100
|
|
|
|
5262
|
if ( $rbrace_structural_type->[$brace_depth] eq 'L' ) { |
3338
|
367
|
|
|
|
|
848
|
$type = 'R'; |
3339
|
|
|
|
|
|
|
} |
3340
|
|
|
|
|
|
|
|
3341
|
|
|
|
|
|
|
# propagate type information for 'do' and 'eval' blocks, and also |
3342
|
|
|
|
|
|
|
# for smartmatch operator. This is necessary to enable us to know |
3343
|
|
|
|
|
|
|
# if an operator or term is expected next. |
3344
|
1670
|
100
|
|
|
|
4851
|
if ( $is_block_operator{$block_type} ) { |
3345
|
83
|
|
|
|
|
203
|
$tok = $block_type; |
3346
|
|
|
|
|
|
|
} |
3347
|
|
|
|
|
|
|
|
3348
|
1670
|
|
|
|
|
2919
|
$context = $rbrace_context->[$brace_depth]; |
3349
|
1670
|
50
|
|
|
|
4220
|
if ( $brace_depth > 0 ) { $brace_depth--; } |
|
1670
|
|
|
|
|
2588
|
|
3350
|
1670
|
|
|
|
|
2846
|
return; |
3351
|
|
|
|
|
|
|
} ## end sub do_RIGHT_CURLY_BRACKET |
3352
|
|
|
|
|
|
|
|
3353
|
|
|
|
|
|
|
sub do_AMPERSAND { |
3354
|
|
|
|
|
|
|
|
3355
|
126
|
|
|
126
|
0
|
334
|
my $self = shift; |
3356
|
|
|
|
|
|
|
|
3357
|
|
|
|
|
|
|
# '&' = maybe sub call? start looking |
3358
|
|
|
|
|
|
|
# We have to check for sub call unless we are sure we |
3359
|
|
|
|
|
|
|
# are expecting an operator. This example from s2p |
3360
|
|
|
|
|
|
|
# got mistaken as a q operator in an early version: |
3361
|
|
|
|
|
|
|
# print BODY &q(<<'EOT'); |
3362
|
126
|
100
|
|
|
|
433
|
if ( $expecting != OPERATOR ) { |
3363
|
|
|
|
|
|
|
|
3364
|
|
|
|
|
|
|
# But only look for a sub call if we are expecting a term or |
3365
|
|
|
|
|
|
|
# if there is no existing space after the &. |
3366
|
|
|
|
|
|
|
# For example we probably don't want & as sub call here: |
3367
|
|
|
|
|
|
|
# Fcntl::S_IRUSR & $mode; |
3368
|
107
|
100
|
66
|
|
|
444
|
if ( $expecting == TERM || $next_type ne 'b' ) { |
3369
|
104
|
|
|
|
|
320
|
$self->scan_simple_identifier(); |
3370
|
|
|
|
|
|
|
} |
3371
|
|
|
|
|
|
|
} |
3372
|
|
|
|
|
|
|
else { |
3373
|
|
|
|
|
|
|
} |
3374
|
126
|
|
|
|
|
291
|
return; |
3375
|
|
|
|
|
|
|
} ## end sub do_AMPERSAND |
3376
|
|
|
|
|
|
|
|
3377
|
|
|
|
|
|
|
sub do_LESS_THAN_SIGN { |
3378
|
|
|
|
|
|
|
|
3379
|
29
|
|
|
29
|
0
|
86
|
my $self = shift; |
3380
|
|
|
|
|
|
|
|
3381
|
|
|
|
|
|
|
# '<' - angle operator or less than? |
3382
|
29
|
100
|
|
|
|
147
|
if ( $expecting != OPERATOR ) { |
3383
|
8
|
|
|
|
|
51
|
( $i, $type ) = |
3384
|
|
|
|
|
|
|
$self->find_angle_operator_termination( $input_line, $i, |
3385
|
|
|
|
|
|
|
$rtoken_map, $expecting, $max_token_index ); |
3386
|
|
|
|
|
|
|
|
3387
|
|
|
|
|
|
|
## This message is not very helpful and quite confusing if the above |
3388
|
|
|
|
|
|
|
## routine decided not to write a message with the line number. |
3389
|
|
|
|
|
|
|
## if ( $type eq '<' && $expecting == TERM ) { |
3390
|
|
|
|
|
|
|
## $self->error_if_expecting_TERM(); |
3391
|
|
|
|
|
|
|
## $self->interrupt_logfile(); |
3392
|
|
|
|
|
|
|
## $self->warning("Unterminated <> operator?\n"); |
3393
|
|
|
|
|
|
|
## $self->resume_logfile(); |
3394
|
|
|
|
|
|
|
## } |
3395
|
|
|
|
|
|
|
|
3396
|
|
|
|
|
|
|
} |
3397
|
|
|
|
|
|
|
else { |
3398
|
|
|
|
|
|
|
} |
3399
|
29
|
|
|
|
|
65
|
return; |
3400
|
|
|
|
|
|
|
} ## end sub do_LESS_THAN_SIGN |
3401
|
|
|
|
|
|
|
|
3402
|
|
|
|
|
|
|
sub do_QUESTION_MARK { |
3403
|
|
|
|
|
|
|
|
3404
|
187
|
|
|
187
|
0
|
594
|
my $self = shift; |
3405
|
|
|
|
|
|
|
|
3406
|
|
|
|
|
|
|
# '?' = conditional or starting pattern? |
3407
|
187
|
|
|
|
|
455
|
my $is_pattern; |
3408
|
|
|
|
|
|
|
|
3409
|
|
|
|
|
|
|
# Patch for rt #126965 |
3410
|
|
|
|
|
|
|
# a pattern cannot follow certain keywords which take optional |
3411
|
|
|
|
|
|
|
# arguments, like 'shift' and 'pop'. See also '/'. |
3412
|
187
|
100
|
66
|
|
|
1737
|
if ( |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3413
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
3414
|
|
|
|
|
|
|
&& $is_keyword_rejecting_question_as_pattern_delimiter{ |
3415
|
|
|
|
|
|
|
$last_nonblank_token} |
3416
|
|
|
|
|
|
|
) |
3417
|
|
|
|
|
|
|
{ |
3418
|
1
|
|
|
|
|
2
|
$is_pattern = 0; |
3419
|
|
|
|
|
|
|
} |
3420
|
|
|
|
|
|
|
|
3421
|
|
|
|
|
|
|
# patch for RT#131288, user constant function without prototype |
3422
|
|
|
|
|
|
|
# last type is 'U' followed by ?. |
3423
|
|
|
|
|
|
|
elsif ( $last_nonblank_type =~ /^[FUY]$/ ) { |
3424
|
1
|
|
|
|
|
5
|
$is_pattern = 0; |
3425
|
|
|
|
|
|
|
} |
3426
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { |
3427
|
|
|
|
|
|
|
|
3428
|
|
|
|
|
|
|
# In older versions of Perl, a bare ? can be a pattern |
3429
|
|
|
|
|
|
|
# delimiter. In perl version 5.22 this was |
3430
|
|
|
|
|
|
|
# dropped, but we have to support it in order to format |
3431
|
|
|
|
|
|
|
# older programs. See: |
3432
|
|
|
|
|
|
|
## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html |
3433
|
|
|
|
|
|
|
# For example, the following line worked |
3434
|
|
|
|
|
|
|
# at one time: |
3435
|
|
|
|
|
|
|
# ?(.*)? && (print $1,"\n"); |
3436
|
|
|
|
|
|
|
# In current versions it would have to be written with slashes: |
3437
|
|
|
|
|
|
|
# /(.*)/ && (print $1,"\n"); |
3438
|
11
|
|
|
|
|
32
|
my $msg; |
3439
|
11
|
|
|
|
|
72
|
( $is_pattern, $msg ) = |
3440
|
|
|
|
|
|
|
$self->guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, |
3441
|
|
|
|
|
|
|
$max_token_index ); |
3442
|
|
|
|
|
|
|
|
3443
|
11
|
50
|
|
|
|
49
|
if ($msg) { $self->write_logfile_entry($msg) } |
|
11
|
|
|
|
|
48
|
|
3444
|
|
|
|
|
|
|
} |
3445
|
174
|
|
|
|
|
514
|
else { $is_pattern = ( $expecting == TERM ) } |
3446
|
|
|
|
|
|
|
|
3447
|
187
|
50
|
|
|
|
573
|
if ($is_pattern) { |
3448
|
0
|
|
|
|
|
0
|
$in_quote = 1; |
3449
|
0
|
|
|
|
|
0
|
$type = 'Q'; |
3450
|
0
|
|
|
|
|
0
|
$allowed_quote_modifiers = '[msixpodualngc]'; |
3451
|
|
|
|
|
|
|
} |
3452
|
|
|
|
|
|
|
else { |
3453
|
187
|
|
|
|
|
735
|
( $type_sequence, $indent_flag ) = |
3454
|
|
|
|
|
|
|
$self->increase_nesting_depth( QUESTION_COLON, |
3455
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3456
|
|
|
|
|
|
|
} |
3457
|
187
|
|
|
|
|
447
|
return; |
3458
|
|
|
|
|
|
|
} ## end sub do_QUESTION_MARK |
3459
|
|
|
|
|
|
|
|
3460
|
|
|
|
|
|
|
sub do_STAR { |
3461
|
|
|
|
|
|
|
|
3462
|
238
|
|
|
238
|
0
|
538
|
my $self = shift; |
3463
|
|
|
|
|
|
|
|
3464
|
|
|
|
|
|
|
# '*' = typeglob, or multiply? |
3465
|
238
|
50
|
66
|
|
|
893
|
if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) { |
3466
|
0
|
0
|
0
|
|
|
0
|
if ( $next_type ne 'b' |
|
|
|
0
|
|
|
|
|
3467
|
|
|
|
|
|
|
&& $next_type ne '(' |
3468
|
|
|
|
|
|
|
&& $next_type ne '#' ) # Fix c036 |
3469
|
|
|
|
|
|
|
{ |
3470
|
0
|
|
|
|
|
0
|
$expecting = TERM; |
3471
|
|
|
|
|
|
|
} |
3472
|
|
|
|
|
|
|
} |
3473
|
238
|
100
|
|
|
|
636
|
if ( $expecting == TERM ) { |
3474
|
21
|
|
|
|
|
88
|
$self->scan_simple_identifier(); |
3475
|
|
|
|
|
|
|
} |
3476
|
|
|
|
|
|
|
else { |
3477
|
|
|
|
|
|
|
|
3478
|
217
|
50
|
|
|
|
880
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { |
|
|
100
|
|
|
|
|
|
3479
|
0
|
|
|
|
|
0
|
$tok = '*='; |
3480
|
0
|
|
|
|
|
0
|
$type = $tok; |
3481
|
0
|
|
|
|
|
0
|
$i++; |
3482
|
|
|
|
|
|
|
} |
3483
|
|
|
|
|
|
|
elsif ( $rtokens->[ $i + 1 ] eq '*' ) { |
3484
|
36
|
|
|
|
|
95
|
$tok = '**'; |
3485
|
36
|
|
|
|
|
86
|
$type = $tok; |
3486
|
36
|
|
|
|
|
62
|
$i++; |
3487
|
36
|
50
|
|
|
|
125
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { |
3488
|
0
|
|
|
|
|
0
|
$tok = '**='; |
3489
|
0
|
|
|
|
|
0
|
$type = $tok; |
3490
|
0
|
|
|
|
|
0
|
$i++; |
3491
|
|
|
|
|
|
|
} |
3492
|
|
|
|
|
|
|
} |
3493
|
|
|
|
|
|
|
else { |
3494
|
|
|
|
|
|
|
## not multiple characters |
3495
|
|
|
|
|
|
|
} |
3496
|
|
|
|
|
|
|
} |
3497
|
238
|
|
|
|
|
439
|
return; |
3498
|
|
|
|
|
|
|
} ## end sub do_STAR |
3499
|
|
|
|
|
|
|
|
3500
|
|
|
|
|
|
|
sub do_DOT { |
3501
|
|
|
|
|
|
|
|
3502
|
150
|
|
|
150
|
0
|
339
|
my $self = shift; |
3503
|
|
|
|
|
|
|
|
3504
|
|
|
|
|
|
|
# '.' = what kind of . ? |
3505
|
150
|
100
|
|
|
|
482
|
if ( $expecting != OPERATOR ) { |
3506
|
10
|
|
|
|
|
40
|
$self->scan_number(); |
3507
|
10
|
100
|
|
|
|
45
|
if ( $type eq '.' ) { |
3508
|
2
|
50
|
|
|
|
7
|
$self->error_if_expecting_TERM() |
3509
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
3510
|
|
|
|
|
|
|
} |
3511
|
|
|
|
|
|
|
} |
3512
|
|
|
|
|
|
|
else { |
3513
|
|
|
|
|
|
|
} |
3514
|
150
|
|
|
|
|
302
|
return; |
3515
|
|
|
|
|
|
|
} ## end sub do_DOT |
3516
|
|
|
|
|
|
|
|
3517
|
|
|
|
|
|
|
sub do_COLON { |
3518
|
|
|
|
|
|
|
|
3519
|
271
|
|
|
271
|
0
|
714
|
my $self = shift; |
3520
|
|
|
|
|
|
|
|
3521
|
|
|
|
|
|
|
# ':' = label, ternary, attribute, ? |
3522
|
|
|
|
|
|
|
|
3523
|
|
|
|
|
|
|
# if this is the first nonblank character, call it a label |
3524
|
|
|
|
|
|
|
# since perl seems to just swallow it |
3525
|
271
|
50
|
66
|
|
|
3725
|
if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3526
|
0
|
|
|
|
|
0
|
$type = 'J'; |
3527
|
|
|
|
|
|
|
} |
3528
|
|
|
|
|
|
|
|
3529
|
|
|
|
|
|
|
# ATTRS: check for a ':' which introduces an attribute list |
3530
|
|
|
|
|
|
|
# either after a 'sub' keyword or within a paren list |
3531
|
|
|
|
|
|
|
# Added 'package' (can be 'class') for --use-feature=class (rt145706) |
3532
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^(sub|package)\b/ ) { |
3533
|
22
|
|
|
|
|
57
|
$type = 'A'; |
3534
|
22
|
|
|
|
|
50
|
$self->[_in_attribute_list_] = 1; |
3535
|
|
|
|
|
|
|
} |
3536
|
|
|
|
|
|
|
|
3537
|
|
|
|
|
|
|
# Within a signature, unless we are in a ternary. For example, |
3538
|
|
|
|
|
|
|
# from 't/filter_example.t': |
3539
|
|
|
|
|
|
|
# method foo4 ( $class: $bar ) { $class->bar($bar) } |
3540
|
|
|
|
|
|
|
elsif ( $rparen_type->[$paren_depth] =~ /^sub\b/ |
3541
|
|
|
|
|
|
|
&& !is_balanced_closing_container(QUESTION_COLON) ) |
3542
|
|
|
|
|
|
|
{ |
3543
|
1
|
|
|
|
|
6
|
$type = 'A'; |
3544
|
1
|
|
|
|
|
3
|
$self->[_in_attribute_list_] = 1; |
3545
|
|
|
|
|
|
|
} |
3546
|
|
|
|
|
|
|
|
3547
|
|
|
|
|
|
|
# check for scalar attribute, such as |
3548
|
|
|
|
|
|
|
# my $foo : shared = 1; |
3549
|
|
|
|
|
|
|
elsif ($is_my_our_state{$statement_type} |
3550
|
|
|
|
|
|
|
&& $rcurrent_depth->[QUESTION_COLON] == 0 ) |
3551
|
|
|
|
|
|
|
{ |
3552
|
15
|
|
|
|
|
38
|
$type = 'A'; |
3553
|
15
|
|
|
|
|
38
|
$self->[_in_attribute_list_] = 1; |
3554
|
|
|
|
|
|
|
} |
3555
|
|
|
|
|
|
|
|
3556
|
|
|
|
|
|
|
# Look for Switch::Plain syntax if an error would otherwise occur |
3557
|
|
|
|
|
|
|
# here. Note that we do not need to check if the extended syntax |
3558
|
|
|
|
|
|
|
# flag is set because otherwise an error would occur, and we would |
3559
|
|
|
|
|
|
|
# then have to output a message telling the user to set the |
3560
|
|
|
|
|
|
|
# extended syntax flag to avoid the error. |
3561
|
|
|
|
|
|
|
# case 1: { |
3562
|
|
|
|
|
|
|
# default: { |
3563
|
|
|
|
|
|
|
# default: |
3564
|
|
|
|
|
|
|
# Note that the line 'default:' will be parsed as a label elsewhere. |
3565
|
|
|
|
|
|
|
elsif ( $is_case_default{$statement_type} |
3566
|
|
|
|
|
|
|
&& !is_balanced_closing_container(QUESTION_COLON) ) |
3567
|
|
|
|
|
|
|
{ |
3568
|
|
|
|
|
|
|
# mark it as a perltidy label type |
3569
|
46
|
|
|
|
|
100
|
$type = 'J'; |
3570
|
|
|
|
|
|
|
} |
3571
|
|
|
|
|
|
|
|
3572
|
|
|
|
|
|
|
# otherwise, it should be part of a ?/: operator |
3573
|
|
|
|
|
|
|
else { |
3574
|
187
|
|
|
|
|
792
|
( $type_sequence, $indent_flag ) = |
3575
|
|
|
|
|
|
|
$self->decrease_nesting_depth( QUESTION_COLON, |
3576
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3577
|
187
|
50
|
|
|
|
1099
|
if ( $last_nonblank_token eq '?' ) { |
3578
|
0
|
|
|
|
|
0
|
$self->warning("Syntax error near ? :\n"); |
3579
|
|
|
|
|
|
|
} |
3580
|
|
|
|
|
|
|
} |
3581
|
271
|
|
|
|
|
526
|
return; |
3582
|
|
|
|
|
|
|
} ## end sub do_COLON |
3583
|
|
|
|
|
|
|
|
3584
|
|
|
|
|
|
|
sub do_PLUS_SIGN { |
3585
|
|
|
|
|
|
|
|
3586
|
227
|
|
|
227
|
0
|
533
|
my $self = shift; |
3587
|
|
|
|
|
|
|
|
3588
|
|
|
|
|
|
|
# '+' = what kind of plus? |
3589
|
227
|
100
|
|
|
|
896
|
if ( $expecting == TERM ) { |
|
|
100
|
|
|
|
|
|
3590
|
13
|
|
|
|
|
61
|
my $number = $self->scan_number_fast(); |
3591
|
|
|
|
|
|
|
|
3592
|
|
|
|
|
|
|
# unary plus is safest assumption if not a number |
3593
|
13
|
50
|
|
|
|
52
|
if ( !defined($number) ) { $type = 'p'; } |
|
13
|
|
|
|
|
31
|
|
3594
|
|
|
|
|
|
|
} |
3595
|
|
|
|
|
|
|
elsif ( $expecting == OPERATOR ) { |
3596
|
|
|
|
|
|
|
} |
3597
|
|
|
|
|
|
|
else { |
3598
|
3
|
100
|
|
|
|
11
|
if ( $next_type eq 'w' ) { $type = 'p' } |
|
2
|
|
|
|
|
4
|
|
3599
|
|
|
|
|
|
|
} |
3600
|
227
|
|
|
|
|
399
|
return; |
3601
|
|
|
|
|
|
|
} ## end sub do_PLUS_SIGN |
3602
|
|
|
|
|
|
|
|
3603
|
|
|
|
|
|
|
sub do_AT_SIGN { |
3604
|
|
|
|
|
|
|
|
3605
|
438
|
|
|
438
|
0
|
1083
|
my $self = shift; |
3606
|
|
|
|
|
|
|
|
3607
|
|
|
|
|
|
|
# '@' = sigil for array? |
3608
|
438
|
50
|
|
|
|
1398
|
$self->error_if_expecting_OPERATOR("Array") |
3609
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3610
|
438
|
|
|
|
|
1729
|
$self->scan_simple_identifier(); |
3611
|
438
|
|
|
|
|
846
|
return; |
3612
|
|
|
|
|
|
|
} ## end sub do_AT_SIGN |
3613
|
|
|
|
|
|
|
|
3614
|
|
|
|
|
|
|
sub do_PERCENT_SIGN { |
3615
|
|
|
|
|
|
|
|
3616
|
202
|
|
|
202
|
0
|
545
|
my $self = shift; |
3617
|
|
|
|
|
|
|
|
3618
|
|
|
|
|
|
|
# '%' = hash or modulo? |
3619
|
|
|
|
|
|
|
# first guess is hash if no following blank or paren |
3620
|
202
|
50
|
|
|
|
763
|
if ( $expecting == UNKNOWN ) { |
3621
|
0
|
0
|
0
|
|
|
0
|
if ( $next_type ne 'b' && $next_type ne '(' ) { |
3622
|
0
|
|
|
|
|
0
|
$expecting = TERM; |
3623
|
|
|
|
|
|
|
} |
3624
|
|
|
|
|
|
|
} |
3625
|
202
|
100
|
|
|
|
716
|
if ( $expecting == TERM ) { |
3626
|
192
|
|
|
|
|
700
|
$self->scan_simple_identifier(); |
3627
|
|
|
|
|
|
|
} |
3628
|
202
|
|
|
|
|
1509
|
return; |
3629
|
|
|
|
|
|
|
} ## end sub do_PERCENT_SIGN |
3630
|
|
|
|
|
|
|
|
3631
|
|
|
|
|
|
|
sub do_LEFT_SQUARE_BRACKET { |
3632
|
|
|
|
|
|
|
|
3633
|
595
|
|
|
595
|
0
|
1309
|
my $self = shift; |
3634
|
|
|
|
|
|
|
|
3635
|
|
|
|
|
|
|
# '[' |
3636
|
595
|
|
|
|
|
1421
|
$rsquare_bracket_type->[ ++$square_bracket_depth ] = |
3637
|
|
|
|
|
|
|
$last_nonblank_token; |
3638
|
595
|
|
|
|
|
2152
|
( $type_sequence, $indent_flag ) = |
3639
|
|
|
|
|
|
|
$self->increase_nesting_depth( SQUARE_BRACKET, |
3640
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3641
|
|
|
|
|
|
|
|
3642
|
|
|
|
|
|
|
# It may seem odd, but structural square brackets have |
3643
|
|
|
|
|
|
|
# type '{' and '}'. This simplifies the indentation logic. |
3644
|
595
|
100
|
|
|
|
2126
|
if ( !is_non_structural_brace() ) { |
3645
|
288
|
|
|
|
|
762
|
$type = '{'; |
3646
|
|
|
|
|
|
|
} |
3647
|
595
|
|
|
|
|
1449
|
$rsquare_bracket_structural_type->[$square_bracket_depth] = $type; |
3648
|
595
|
|
|
|
|
1150
|
return; |
3649
|
|
|
|
|
|
|
} ## end sub do_LEFT_SQUARE_BRACKET |
3650
|
|
|
|
|
|
|
|
3651
|
|
|
|
|
|
|
sub do_RIGHT_SQUARE_BRACKET { |
3652
|
|
|
|
|
|
|
|
3653
|
595
|
|
|
595
|
0
|
2223
|
my $self = shift; |
3654
|
|
|
|
|
|
|
|
3655
|
|
|
|
|
|
|
# ']' |
3656
|
595
|
|
|
|
|
2244
|
( $type_sequence, $indent_flag ) = |
3657
|
|
|
|
|
|
|
$self->decrease_nesting_depth( SQUARE_BRACKET, |
3658
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3659
|
|
|
|
|
|
|
|
3660
|
595
|
100
|
|
|
|
2297
|
if ( $rsquare_bracket_structural_type->[$square_bracket_depth] eq '{' ) |
3661
|
|
|
|
|
|
|
{ |
3662
|
288
|
|
|
|
|
578
|
$type = '}'; |
3663
|
|
|
|
|
|
|
} |
3664
|
|
|
|
|
|
|
|
3665
|
|
|
|
|
|
|
# propagate type information for smartmatch operator. This is |
3666
|
|
|
|
|
|
|
# necessary to enable us to know if an operator or term is expected |
3667
|
|
|
|
|
|
|
# next. |
3668
|
595
|
100
|
|
|
|
1777
|
if ( $rsquare_bracket_type->[$square_bracket_depth] eq '~~' ) { |
3669
|
20
|
|
|
|
|
43
|
$tok = $rsquare_bracket_type->[$square_bracket_depth]; |
3670
|
|
|
|
|
|
|
} |
3671
|
|
|
|
|
|
|
|
3672
|
595
|
50
|
|
|
|
1594
|
if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } |
|
595
|
|
|
|
|
2032
|
|
3673
|
595
|
|
|
|
|
1040
|
return; |
3674
|
|
|
|
|
|
|
} ## end sub do_RIGHT_SQUARE_BRACKET |
3675
|
|
|
|
|
|
|
|
3676
|
|
|
|
|
|
|
sub do_MINUS_SIGN { |
3677
|
|
|
|
|
|
|
|
3678
|
441
|
|
|
441
|
0
|
995
|
my $self = shift; |
3679
|
|
|
|
|
|
|
|
3680
|
|
|
|
|
|
|
# '-' = what kind of minus? |
3681
|
441
|
100
|
100
|
|
|
3020
|
if ( ( $expecting != OPERATOR ) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3682
|
|
|
|
|
|
|
&& $is_file_test_operator{$next_tok} ) |
3683
|
|
|
|
|
|
|
{ |
3684
|
10
|
|
|
|
|
77
|
my ( $next_nonblank_token, $i_next ) = |
3685
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i + 1, $rtokens, |
3686
|
|
|
|
|
|
|
$max_token_index ); |
3687
|
|
|
|
|
|
|
|
3688
|
|
|
|
|
|
|
# check for a quoted word like "-w=>xx"; |
3689
|
|
|
|
|
|
|
# it is sufficient to just check for a following '=' |
3690
|
10
|
50
|
|
|
|
80
|
if ( $next_nonblank_token eq '=' ) { |
3691
|
0
|
|
|
|
|
0
|
$type = 'm'; |
3692
|
|
|
|
|
|
|
} |
3693
|
|
|
|
|
|
|
else { |
3694
|
10
|
|
|
|
|
24
|
$i++; |
3695
|
10
|
|
|
|
|
32
|
$tok .= $next_tok; |
3696
|
10
|
|
|
|
|
57
|
$type = 'F'; |
3697
|
|
|
|
|
|
|
} |
3698
|
|
|
|
|
|
|
} |
3699
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
3700
|
330
|
|
|
|
|
1070
|
my $number = $self->scan_number_fast(); |
3701
|
|
|
|
|
|
|
|
3702
|
|
|
|
|
|
|
# maybe part of bareword token? unary is safest |
3703
|
330
|
100
|
|
|
|
928
|
if ( !defined($number) ) { $type = 'm'; } |
|
288
|
|
|
|
|
557
|
|
3704
|
|
|
|
|
|
|
|
3705
|
|
|
|
|
|
|
} |
3706
|
|
|
|
|
|
|
elsif ( $expecting == OPERATOR ) { |
3707
|
|
|
|
|
|
|
} |
3708
|
|
|
|
|
|
|
else { |
3709
|
|
|
|
|
|
|
|
3710
|
4
|
50
|
|
|
|
18
|
if ( $next_type eq 'w' ) { |
3711
|
4
|
|
|
|
|
14
|
$type = 'm'; |
3712
|
|
|
|
|
|
|
} |
3713
|
|
|
|
|
|
|
} |
3714
|
441
|
|
|
|
|
760
|
return; |
3715
|
|
|
|
|
|
|
} ## end sub do_MINUS_SIGN |
3716
|
|
|
|
|
|
|
|
3717
|
|
|
|
|
|
|
sub do_CARAT_SIGN { |
3718
|
|
|
|
|
|
|
|
3719
|
12
|
|
|
12
|
0
|
23
|
my $self = shift; |
3720
|
|
|
|
|
|
|
|
3721
|
|
|
|
|
|
|
# '^' |
3722
|
|
|
|
|
|
|
# check for special variables like ${^WARNING_BITS} |
3723
|
12
|
100
|
|
|
|
40
|
if ( $expecting == TERM ) { |
3724
|
|
|
|
|
|
|
|
3725
|
5
|
50
|
33
|
|
|
74
|
if ( $last_nonblank_token eq '{' |
|
|
|
33
|
|
|
|
|
3726
|
|
|
|
|
|
|
&& ( $next_tok !~ /^\d/ ) |
3727
|
|
|
|
|
|
|
&& ( $next_tok =~ /^\w/ ) ) |
3728
|
|
|
|
|
|
|
{ |
3729
|
|
|
|
|
|
|
|
3730
|
5
|
100
|
|
|
|
21
|
if ( $next_tok eq 'W' ) { |
3731
|
1
|
|
|
|
|
2
|
$self->[_saw_perl_dash_w_] = 1; |
3732
|
|
|
|
|
|
|
} |
3733
|
5
|
|
|
|
|
15
|
$tok = $tok . $next_tok; |
3734
|
5
|
|
|
|
|
12
|
$i = $i + 1; |
3735
|
5
|
|
|
|
|
9
|
$type = 'w'; |
3736
|
|
|
|
|
|
|
|
3737
|
|
|
|
|
|
|
# Optional coding to try to catch syntax errors. This can |
3738
|
|
|
|
|
|
|
# be removed if it ever causes incorrect warning messages. |
3739
|
|
|
|
|
|
|
# The '{^' should be preceded by either by a type or '$#' |
3740
|
|
|
|
|
|
|
# Examples: |
3741
|
|
|
|
|
|
|
# $#{^CAPTURE} ok |
3742
|
|
|
|
|
|
|
# *${^LAST_FH}{NAME} ok |
3743
|
|
|
|
|
|
|
# @{^HOWDY} ok |
3744
|
|
|
|
|
|
|
# $hash{^HOWDY} error |
3745
|
|
|
|
|
|
|
|
3746
|
|
|
|
|
|
|
# Note that a type sigil '$' may be tokenized as 'Z' |
3747
|
|
|
|
|
|
|
# after something like 'print', so allow type 'Z' |
3748
|
5
|
0
|
33
|
|
|
19
|
if ( $last_last_nonblank_type ne 't' |
|
|
|
33
|
|
|
|
|
3749
|
|
|
|
|
|
|
&& $last_last_nonblank_type ne 'Z' |
3750
|
|
|
|
|
|
|
&& $last_last_nonblank_token ne '$#' ) |
3751
|
|
|
|
|
|
|
{ |
3752
|
0
|
|
|
|
|
0
|
$self->warning("Possible syntax error near '{^'\n"); |
3753
|
|
|
|
|
|
|
} |
3754
|
|
|
|
|
|
|
} |
3755
|
|
|
|
|
|
|
|
3756
|
|
|
|
|
|
|
else { |
3757
|
0
|
0
|
|
|
|
0
|
if ( !$self->error_if_expecting_TERM() ) { |
3758
|
|
|
|
|
|
|
|
3759
|
|
|
|
|
|
|
# Something like this is valid but strange: |
3760
|
|
|
|
|
|
|
# undef ^I; |
3761
|
0
|
|
|
|
|
0
|
$self->complain("The '^' seems unusual here\n"); |
3762
|
|
|
|
|
|
|
} |
3763
|
|
|
|
|
|
|
} |
3764
|
|
|
|
|
|
|
} |
3765
|
12
|
|
|
|
|
23
|
return; |
3766
|
|
|
|
|
|
|
} ## end sub do_CARAT_SIGN |
3767
|
|
|
|
|
|
|
|
3768
|
|
|
|
|
|
|
sub do_DOUBLE_COLON { |
3769
|
|
|
|
|
|
|
|
3770
|
9
|
|
|
9
|
0
|
21
|
my $self = shift; |
3771
|
|
|
|
|
|
|
|
3772
|
|
|
|
|
|
|
# '::' = probably a sub call |
3773
|
9
|
|
|
|
|
25
|
$self->scan_bare_identifier(); |
3774
|
9
|
|
|
|
|
18
|
return; |
3775
|
|
|
|
|
|
|
} ## end sub do_DOUBLE_COLON |
3776
|
|
|
|
|
|
|
|
3777
|
|
|
|
|
|
|
sub do_LEFT_SHIFT { |
3778
|
|
|
|
|
|
|
|
3779
|
7
|
|
|
7
|
0
|
28
|
my $self = shift; |
3780
|
|
|
|
|
|
|
|
3781
|
|
|
|
|
|
|
# '<<' = maybe a here-doc? |
3782
|
7
|
50
|
|
|
|
36
|
if ( $expecting != OPERATOR ) { |
3783
|
7
|
|
|
|
|
28
|
my ( $found_target, $here_doc_target, $here_quote_character, |
3784
|
|
|
|
|
|
|
$saw_error ); |
3785
|
|
|
|
|
|
|
( |
3786
|
7
|
|
|
|
|
43
|
$found_target, $here_doc_target, $here_quote_character, $i, |
3787
|
|
|
|
|
|
|
$saw_error |
3788
|
|
|
|
|
|
|
) |
3789
|
|
|
|
|
|
|
= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map, |
3790
|
|
|
|
|
|
|
$max_token_index ); |
3791
|
|
|
|
|
|
|
|
3792
|
7
|
50
|
|
|
|
35
|
if ($found_target) { |
|
|
0
|
|
|
|
|
|
3793
|
7
|
|
|
|
|
19
|
push @{$rhere_target_list}, |
|
7
|
|
|
|
|
27
|
|
3794
|
|
|
|
|
|
|
[ $here_doc_target, $here_quote_character ]; |
3795
|
7
|
|
|
|
|
26
|
$type = 'h'; |
3796
|
7
|
50
|
|
|
|
75
|
if ( length($here_doc_target) > 80 ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3797
|
0
|
|
|
|
|
0
|
my $truncated = substr( $here_doc_target, 0, 80 ); |
3798
|
0
|
|
|
|
|
0
|
$self->complain("Long here-target: '$truncated' ...\n"); |
3799
|
|
|
|
|
|
|
} |
3800
|
|
|
|
|
|
|
elsif ( !$here_doc_target ) { |
3801
|
0
|
0
|
|
|
|
0
|
$self->warning( |
3802
|
|
|
|
|
|
|
'Use of bare << to mean <<"" is deprecated' . "\n" ) |
3803
|
|
|
|
|
|
|
if ( !$here_quote_character ); |
3804
|
|
|
|
|
|
|
} |
3805
|
|
|
|
|
|
|
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { |
3806
|
2
|
|
|
|
|
17
|
$self->complain( |
3807
|
|
|
|
|
|
|
"Unconventional here-target: '$here_doc_target'\n"); |
3808
|
|
|
|
|
|
|
} |
3809
|
|
|
|
|
|
|
else { |
3810
|
|
|
|
|
|
|
## ok: nothing to complain about |
3811
|
|
|
|
|
|
|
} |
3812
|
|
|
|
|
|
|
} |
3813
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
3814
|
0
|
0
|
|
|
|
0
|
if ( !$saw_error ) { |
3815
|
|
|
|
|
|
|
|
3816
|
|
|
|
|
|
|
# shouldn't happen..arriving here implies an error in |
3817
|
|
|
|
|
|
|
# the logic in sub 'find_here_doc' |
3818
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
3819
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
3820
|
|
|
|
|
|
|
Program bug; didn't find here doc target |
3821
|
|
|
|
|
|
|
EOM |
3822
|
|
|
|
|
|
|
} |
3823
|
|
|
|
|
|
|
$self->warning( |
3824
|
0
|
|
|
|
|
0
|
"Possible program error: didn't find here doc target\n" |
3825
|
|
|
|
|
|
|
); |
3826
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
3827
|
|
|
|
|
|
|
} |
3828
|
|
|
|
|
|
|
} |
3829
|
|
|
|
|
|
|
|
3830
|
|
|
|
|
|
|
# target not found, expecting == UNKNOWN |
3831
|
|
|
|
|
|
|
else { |
3832
|
|
|
|
|
|
|
# assume it is a shift |
3833
|
|
|
|
|
|
|
} |
3834
|
|
|
|
|
|
|
} |
3835
|
|
|
|
|
|
|
else { |
3836
|
|
|
|
|
|
|
} |
3837
|
7
|
|
|
|
|
21
|
return; |
3838
|
|
|
|
|
|
|
} ## end sub do_LEFT_SHIFT |
3839
|
|
|
|
|
|
|
|
3840
|
|
|
|
|
|
|
sub do_NEW_HERE_DOC { |
3841
|
|
|
|
|
|
|
|
3842
|
|
|
|
|
|
|
# '<<~' = a here-doc, new type added in v26 |
3843
|
|
|
|
|
|
|
|
3844
|
2
|
|
|
2
|
0
|
5
|
my $self = shift; |
3845
|
|
|
|
|
|
|
|
3846
|
|
|
|
|
|
|
return |
3847
|
2
|
50
|
|
|
|
9
|
if ( $i >= $max_token_index ); # here-doc not possible if end of line |
3848
|
2
|
50
|
|
|
|
10
|
if ( $expecting != OPERATOR ) { |
3849
|
2
|
|
|
|
|
5
|
my ( $found_target, $here_doc_target, $here_quote_character, |
3850
|
|
|
|
|
|
|
$saw_error ); |
3851
|
|
|
|
|
|
|
( |
3852
|
2
|
|
|
|
|
14
|
$found_target, $here_doc_target, $here_quote_character, $i, |
3853
|
|
|
|
|
|
|
$saw_error |
3854
|
|
|
|
|
|
|
) |
3855
|
|
|
|
|
|
|
= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map, |
3856
|
|
|
|
|
|
|
$max_token_index ); |
3857
|
|
|
|
|
|
|
|
3858
|
2
|
50
|
|
|
|
9
|
if ($found_target) { |
|
|
0
|
|
|
|
|
|
3859
|
|
|
|
|
|
|
|
3860
|
2
|
50
|
|
|
|
21
|
if ( length($here_doc_target) > 80 ) { |
|
|
50
|
|
|
|
|
|
3861
|
0
|
|
|
|
|
0
|
my $truncated = substr( $here_doc_target, 0, 80 ); |
3862
|
0
|
|
|
|
|
0
|
$self->complain("Long here-target: '$truncated' ...\n"); |
3863
|
|
|
|
|
|
|
} |
3864
|
|
|
|
|
|
|
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { |
3865
|
0
|
|
|
|
|
0
|
$self->complain( |
3866
|
|
|
|
|
|
|
"Unconventional here-target: '$here_doc_target'\n"); |
3867
|
|
|
|
|
|
|
} |
3868
|
|
|
|
|
|
|
else { |
3869
|
|
|
|
|
|
|
## ok: nothing to complain about |
3870
|
|
|
|
|
|
|
} |
3871
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
# Note that we put a leading space on the here quote |
3873
|
|
|
|
|
|
|
# character indicate that it may be preceded by spaces |
3874
|
2
|
|
|
|
|
7
|
$here_quote_character = SPACE . $here_quote_character; |
3875
|
2
|
|
|
|
|
6
|
push @{$rhere_target_list}, |
|
2
|
|
|
|
|
9
|
|
3876
|
|
|
|
|
|
|
[ $here_doc_target, $here_quote_character ]; |
3877
|
2
|
|
|
|
|
6
|
$type = 'h'; |
3878
|
|
|
|
|
|
|
} |
3879
|
|
|
|
|
|
|
|
3880
|
|
|
|
|
|
|
# target not found .. |
3881
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
3882
|
0
|
0
|
|
|
|
0
|
if ( !$saw_error ) { |
3883
|
|
|
|
|
|
|
|
3884
|
|
|
|
|
|
|
# shouldn't happen..arriving here implies an error in |
3885
|
|
|
|
|
|
|
# the logic in sub 'find_here_doc' |
3886
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
3887
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
3888
|
|
|
|
|
|
|
Program bug; didn't find here doc target |
3889
|
|
|
|
|
|
|
EOM |
3890
|
|
|
|
|
|
|
} |
3891
|
|
|
|
|
|
|
$self->warning( |
3892
|
0
|
|
|
|
|
0
|
"Possible program error: didn't find here doc target\n" |
3893
|
|
|
|
|
|
|
); |
3894
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
3895
|
|
|
|
|
|
|
} |
3896
|
|
|
|
|
|
|
} |
3897
|
|
|
|
|
|
|
|
3898
|
|
|
|
|
|
|
# Target not found, expecting==UNKNOWN |
3899
|
|
|
|
|
|
|
else { |
3900
|
0
|
|
|
|
|
0
|
$self->warning("didn't find here doc target after '<<~'\n"); |
3901
|
|
|
|
|
|
|
} |
3902
|
|
|
|
|
|
|
} |
3903
|
|
|
|
|
|
|
else { |
3904
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR(); |
3905
|
|
|
|
|
|
|
} |
3906
|
2
|
|
|
|
|
9
|
return; |
3907
|
|
|
|
|
|
|
} ## end sub do_NEW_HERE_DOC |
3908
|
|
|
|
|
|
|
|
3909
|
|
|
|
|
|
|
sub do_POINTER { |
3910
|
|
|
|
|
|
|
|
3911
|
|
|
|
|
|
|
# '->' |
3912
|
887
|
|
|
887
|
0
|
1680
|
return; |
3913
|
|
|
|
|
|
|
} |
3914
|
|
|
|
|
|
|
|
3915
|
|
|
|
|
|
|
sub do_PLUS_PLUS { |
3916
|
|
|
|
|
|
|
|
3917
|
46
|
|
|
46
|
0
|
178
|
my $self = shift; |
3918
|
|
|
|
|
|
|
|
3919
|
|
|
|
|
|
|
# '++' |
3920
|
|
|
|
|
|
|
# type = 'pp' for pre-increment, '++' for post-increment |
3921
|
46
|
100
|
|
|
|
242
|
if ( $expecting == OPERATOR ) { $type = '++' } |
|
37
|
100
|
|
|
|
122
|
|
3922
|
7
|
|
|
|
|
17
|
elsif ( $expecting == TERM ) { $type = 'pp' } |
3923
|
|
|
|
|
|
|
|
3924
|
|
|
|
|
|
|
# handle ( $expecting == UNKNOWN ) |
3925
|
|
|
|
|
|
|
else { |
3926
|
|
|
|
|
|
|
|
3927
|
|
|
|
|
|
|
# look ahead .. |
3928
|
2
|
|
|
|
|
18
|
my ( $next_nonblank_token, $i_next ) = |
3929
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
3930
|
|
|
|
|
|
|
|
3931
|
|
|
|
|
|
|
# Fix for c042: look past a side comment |
3932
|
2
|
50
|
|
|
|
23
|
if ( $next_nonblank_token eq '#' ) { |
3933
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
3934
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
3935
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
3936
|
|
|
|
|
|
|
} |
3937
|
|
|
|
|
|
|
|
3938
|
2
|
50
|
|
|
|
16
|
if ( $next_nonblank_token eq '$' ) { $type = 'pp' } |
|
0
|
|
|
|
|
0
|
|
3939
|
|
|
|
|
|
|
} |
3940
|
46
|
|
|
|
|
98
|
return; |
3941
|
|
|
|
|
|
|
} ## end sub do_PLUS_PLUS |
3942
|
|
|
|
|
|
|
|
3943
|
|
|
|
|
|
|
sub do_FAT_COMMA { |
3944
|
|
|
|
|
|
|
|
3945
|
1025
|
|
|
1025
|
0
|
1918
|
my $self = shift; |
3946
|
|
|
|
|
|
|
|
3947
|
|
|
|
|
|
|
# '=>' |
3948
|
1025
|
50
|
|
|
|
2439
|
if ( $last_nonblank_type eq $tok ) { |
3949
|
0
|
|
|
|
|
0
|
$self->complain("Repeated '=>'s \n"); |
3950
|
|
|
|
|
|
|
} |
3951
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
# patch for operator_expected: note if we are in the list (use.t) |
3953
|
|
|
|
|
|
|
# TODO: make version numbers a new token type |
3954
|
1025
|
100
|
|
|
|
2393
|
if ( $statement_type eq 'use' ) { $statement_type = '_use' } |
|
18
|
|
|
|
|
44
|
|
3955
|
1025
|
|
|
|
|
1733
|
return; |
3956
|
|
|
|
|
|
|
} ## end sub do_FAT_COMMA |
3957
|
|
|
|
|
|
|
|
3958
|
|
|
|
|
|
|
sub do_MINUS_MINUS { |
3959
|
|
|
|
|
|
|
|
3960
|
2
|
|
|
2
|
0
|
5
|
my $self = shift; |
3961
|
|
|
|
|
|
|
|
3962
|
|
|
|
|
|
|
# '--' |
3963
|
|
|
|
|
|
|
# type = 'mm' for pre-decrement, '--' for post-decrement |
3964
|
|
|
|
|
|
|
|
3965
|
2
|
50
|
|
|
|
13
|
if ( $expecting == OPERATOR ) { $type = '--' } |
|
0
|
50
|
|
|
|
0
|
|
3966
|
2
|
|
|
|
|
6
|
elsif ( $expecting == TERM ) { $type = 'mm' } |
3967
|
|
|
|
|
|
|
|
3968
|
|
|
|
|
|
|
# handle ( $expecting == UNKNOWN ) |
3969
|
|
|
|
|
|
|
else { |
3970
|
|
|
|
|
|
|
|
3971
|
|
|
|
|
|
|
# look ahead .. |
3972
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
3973
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
3974
|
|
|
|
|
|
|
|
3975
|
|
|
|
|
|
|
# Fix for c042: look past a side comment |
3976
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '#' ) { |
3977
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
3978
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
3979
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
3980
|
|
|
|
|
|
|
} |
3981
|
|
|
|
|
|
|
|
3982
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '$' ) { $type = 'mm' } |
|
0
|
|
|
|
|
0
|
|
3983
|
|
|
|
|
|
|
} |
3984
|
|
|
|
|
|
|
|
3985
|
2
|
|
|
|
|
5
|
return; |
3986
|
|
|
|
|
|
|
} ## end sub do_MINUS_MINUS |
3987
|
|
|
|
|
|
|
|
3988
|
|
|
|
|
|
|
sub do_LOGICAL_AND { |
3989
|
|
|
|
|
|
|
|
3990
|
58
|
|
|
58
|
0
|
123
|
my $self = shift; |
3991
|
|
|
|
|
|
|
|
3992
|
|
|
|
|
|
|
# '&&' |
3993
|
58
|
50
|
33
|
|
|
274
|
$self->error_if_expecting_TERM() |
3994
|
|
|
|
|
|
|
if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015 |
3995
|
58
|
|
|
|
|
113
|
return; |
3996
|
|
|
|
|
|
|
} ## end sub do_LOGICAL_AND |
3997
|
|
|
|
|
|
|
|
3998
|
|
|
|
|
|
|
sub do_LOGICAL_OR { |
3999
|
|
|
|
|
|
|
|
4000
|
74
|
|
|
74
|
0
|
181
|
my $self = shift; |
4001
|
|
|
|
|
|
|
|
4002
|
|
|
|
|
|
|
# '||' |
4003
|
74
|
100
|
66
|
|
|
398
|
$self->error_if_expecting_TERM() |
4004
|
|
|
|
|
|
|
if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015 |
4005
|
74
|
|
|
|
|
165
|
return; |
4006
|
|
|
|
|
|
|
} ## end sub do_LOGICAL_OR |
4007
|
|
|
|
|
|
|
|
4008
|
|
|
|
|
|
|
sub do_SLASH_SLASH { |
4009
|
|
|
|
|
|
|
|
4010
|
10
|
|
|
10
|
0
|
20
|
my $self = shift; |
4011
|
|
|
|
|
|
|
|
4012
|
|
|
|
|
|
|
# '//' |
4013
|
10
|
100
|
|
|
|
35
|
$self->error_if_expecting_TERM() |
4014
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
4015
|
10
|
|
|
|
|
20
|
return; |
4016
|
|
|
|
|
|
|
} ## end sub do_SLASH_SLASH |
4017
|
|
|
|
|
|
|
|
4018
|
|
|
|
|
|
|
sub do_DIGITS { |
4019
|
|
|
|
|
|
|
|
4020
|
1934
|
|
|
1934
|
0
|
3337
|
my $self = shift; |
4021
|
|
|
|
|
|
|
|
4022
|
|
|
|
|
|
|
# 'd' = string of digits |
4023
|
1934
|
50
|
|
|
|
4278
|
$self->error_if_expecting_OPERATOR("Number") |
4024
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4025
|
|
|
|
|
|
|
|
4026
|
1934
|
|
|
|
|
4857
|
my $number = $self->scan_number_fast(); |
4027
|
1934
|
50
|
|
|
|
4583
|
if ( !defined($number) ) { |
4028
|
|
|
|
|
|
|
|
4029
|
|
|
|
|
|
|
# shouldn't happen - we should always get a number |
4030
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
4031
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
4032
|
|
|
|
|
|
|
non-number beginning with digit--program bug |
4033
|
|
|
|
|
|
|
EOM |
4034
|
|
|
|
|
|
|
} |
4035
|
|
|
|
|
|
|
$self->warning( |
4036
|
0
|
|
|
|
|
0
|
"Unexpected error condition: non-number beginning with digit\n" |
4037
|
|
|
|
|
|
|
); |
4038
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
4039
|
|
|
|
|
|
|
} |
4040
|
1934
|
|
|
|
|
3206
|
return; |
4041
|
|
|
|
|
|
|
} ## end sub do_DIGITS |
4042
|
|
|
|
|
|
|
|
4043
|
|
|
|
|
|
|
sub do_ATTRIBUTE_LIST { |
4044
|
|
|
|
|
|
|
|
4045
|
39
|
|
|
39
|
0
|
98
|
my ( $self, $next_nonblank_token ) = @_; |
4046
|
|
|
|
|
|
|
|
4047
|
|
|
|
|
|
|
# Called at a bareword encountered while in an attribute list |
4048
|
|
|
|
|
|
|
# returns 'is_attribute': |
4049
|
|
|
|
|
|
|
# true if attribute found |
4050
|
|
|
|
|
|
|
# false if an attribute (continue parsing bareword) |
4051
|
|
|
|
|
|
|
|
4052
|
|
|
|
|
|
|
# treat bare word followed by open paren like qw( |
4053
|
39
|
100
|
|
|
|
113
|
if ( $next_nonblank_token eq '(' ) { |
4054
|
|
|
|
|
|
|
|
4055
|
|
|
|
|
|
|
# For something like: |
4056
|
|
|
|
|
|
|
# : prototype($$) |
4057
|
|
|
|
|
|
|
# we should let do_scan_sub see it so that it can see |
4058
|
|
|
|
|
|
|
# the prototype. All other attributes get parsed as a |
4059
|
|
|
|
|
|
|
# quoted string. |
4060
|
18
|
100
|
|
|
|
88
|
if ( $tok eq 'prototype' ) { |
4061
|
2
|
|
|
|
|
7
|
$id_scan_state = 'prototype'; |
4062
|
|
|
|
|
|
|
|
4063
|
|
|
|
|
|
|
# start just after the word 'prototype' |
4064
|
2
|
|
|
|
|
9
|
my $i_beg = $i + 1; |
4065
|
2
|
|
|
|
|
22
|
( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub( |
4066
|
|
|
|
|
|
|
{ |
4067
|
|
|
|
|
|
|
input_line => $input_line, |
4068
|
|
|
|
|
|
|
i => $i, |
4069
|
|
|
|
|
|
|
i_beg => $i_beg, |
4070
|
|
|
|
|
|
|
tok => $tok, |
4071
|
|
|
|
|
|
|
type => $type, |
4072
|
|
|
|
|
|
|
rtokens => $rtokens, |
4073
|
|
|
|
|
|
|
rtoken_map => $rtoken_map, |
4074
|
|
|
|
|
|
|
id_scan_state => $id_scan_state, |
4075
|
|
|
|
|
|
|
max_token_index => $max_token_index, |
4076
|
|
|
|
|
|
|
} |
4077
|
|
|
|
|
|
|
); |
4078
|
|
|
|
|
|
|
|
4079
|
|
|
|
|
|
|
# If successful, mark as type 'q' to be consistent |
4080
|
|
|
|
|
|
|
# with other attributes. Type 'w' would also work. |
4081
|
2
|
50
|
|
|
|
14
|
if ( $i > $i_beg ) { |
4082
|
2
|
|
|
|
|
5
|
$type = 'q'; |
4083
|
2
|
|
|
|
|
6
|
return 1; |
4084
|
|
|
|
|
|
|
} |
4085
|
|
|
|
|
|
|
|
4086
|
|
|
|
|
|
|
# If not successful, continue and parse as a quote. |
4087
|
|
|
|
|
|
|
} |
4088
|
|
|
|
|
|
|
|
4089
|
|
|
|
|
|
|
# All other attribute lists must be parsed as quotes |
4090
|
|
|
|
|
|
|
# (see 'signatures.t' for good examples) |
4091
|
16
|
|
|
|
|
76
|
$in_quote = $quote_items{'q'}; |
4092
|
16
|
|
|
|
|
48
|
$allowed_quote_modifiers = $quote_modifiers{'q'}; |
4093
|
16
|
|
|
|
|
34
|
$type = 'q'; |
4094
|
16
|
|
|
|
|
31
|
$quote_type = 'q'; |
4095
|
16
|
|
|
|
|
39
|
return 1; |
4096
|
|
|
|
|
|
|
} |
4097
|
|
|
|
|
|
|
|
4098
|
|
|
|
|
|
|
# handle bareword not followed by open paren |
4099
|
|
|
|
|
|
|
else { |
4100
|
21
|
|
|
|
|
57
|
$type = 'w'; |
4101
|
21
|
|
|
|
|
53
|
return 1; |
4102
|
|
|
|
|
|
|
} |
4103
|
|
|
|
|
|
|
|
4104
|
|
|
|
|
|
|
# attribute not found |
4105
|
0
|
|
|
|
|
0
|
return; |
4106
|
|
|
|
|
|
|
} ## end sub do_ATTRIBUTE_LIST |
4107
|
|
|
|
|
|
|
|
4108
|
|
|
|
|
|
|
sub do_QUOTED_BAREWORD { |
4109
|
|
|
|
|
|
|
|
4110
|
786
|
|
|
786
|
0
|
1507
|
my $self = shift; |
4111
|
|
|
|
|
|
|
|
4112
|
|
|
|
|
|
|
# find type of a bareword followed by a '=>' |
4113
|
786
|
100
|
|
|
|
4420
|
if ( $ris_constant->{$current_package}{$tok} ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
4114
|
14
|
|
|
|
|
36
|
$type = 'C'; |
4115
|
|
|
|
|
|
|
} |
4116
|
|
|
|
|
|
|
elsif ( $ris_user_function->{$current_package}{$tok} ) { |
4117
|
0
|
|
|
|
|
0
|
$type = 'U'; |
4118
|
0
|
|
|
|
|
0
|
$prototype = $ruser_function_prototype->{$current_package}{$tok}; |
4119
|
|
|
|
|
|
|
} |
4120
|
|
|
|
|
|
|
elsif ( $tok =~ /^v\d+$/ ) { |
4121
|
0
|
|
|
|
|
0
|
$type = 'v'; |
4122
|
0
|
|
|
|
|
0
|
$self->report_v_string($tok); |
4123
|
|
|
|
|
|
|
} |
4124
|
|
|
|
|
|
|
else { |
4125
|
|
|
|
|
|
|
|
4126
|
|
|
|
|
|
|
# Bareword followed by a fat comma - see 'git18.in' |
4127
|
|
|
|
|
|
|
# If tok is something like 'x17' then it could |
4128
|
|
|
|
|
|
|
# actually be operator x followed by number 17. |
4129
|
|
|
|
|
|
|
# For example, here: |
4130
|
|
|
|
|
|
|
# 123x17 => [ 792, 1224 ], |
4131
|
|
|
|
|
|
|
# (a key of 123 repeated 17 times, perhaps not |
4132
|
|
|
|
|
|
|
# what was intended). We will mark x17 as type |
4133
|
|
|
|
|
|
|
# 'n' and it will be split. If the previous token |
4134
|
|
|
|
|
|
|
# was also a bareword then it is not very clear is |
4135
|
|
|
|
|
|
|
# going on. In this case we will not be sure that |
4136
|
|
|
|
|
|
|
# an operator is expected, so we just mark it as a |
4137
|
|
|
|
|
|
|
# bareword. Perl is a little murky in what it does |
4138
|
|
|
|
|
|
|
# with stuff like this, and its behavior can change |
4139
|
|
|
|
|
|
|
# over time. Something like |
4140
|
|
|
|
|
|
|
# a x18 => [792, 1224], will compile as |
4141
|
|
|
|
|
|
|
# a key with 18 a's. But something like |
4142
|
|
|
|
|
|
|
# push @array, a x18; |
4143
|
|
|
|
|
|
|
# is a syntax error. |
4144
|
772
|
100
|
66
|
|
|
2730
|
if ( |
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
4145
|
|
|
|
|
|
|
$expecting == OPERATOR |
4146
|
|
|
|
|
|
|
&& substr( $tok, 0, 1 ) eq 'x' |
4147
|
|
|
|
|
|
|
&& ( length($tok) == 1 |
4148
|
|
|
|
|
|
|
|| substr( $tok, 1, 1 ) =~ /^\d/ ) |
4149
|
|
|
|
|
|
|
) |
4150
|
|
|
|
|
|
|
{ |
4151
|
3
|
|
|
|
|
8
|
$type = 'n'; |
4152
|
3
|
50
|
|
|
|
12
|
if ( $self->split_pretoken(1) ) { |
4153
|
3
|
|
|
|
|
7
|
$type = 'x'; |
4154
|
3
|
|
|
|
|
6
|
$tok = 'x'; |
4155
|
|
|
|
|
|
|
} |
4156
|
|
|
|
|
|
|
} |
4157
|
|
|
|
|
|
|
else { |
4158
|
|
|
|
|
|
|
|
4159
|
|
|
|
|
|
|
# git #18 |
4160
|
769
|
|
|
|
|
1354
|
$type = 'w'; |
4161
|
769
|
|
|
|
|
1995
|
$self->error_if_expecting_OPERATOR(); |
4162
|
|
|
|
|
|
|
} |
4163
|
|
|
|
|
|
|
} |
4164
|
786
|
|
|
|
|
1284
|
return; |
4165
|
|
|
|
|
|
|
} ## end sub do_QUOTED_BAREWORD |
4166
|
|
|
|
|
|
|
|
4167
|
|
|
|
|
|
|
sub do_X_OPERATOR { |
4168
|
|
|
|
|
|
|
|
4169
|
17
|
|
|
17
|
0
|
51
|
my $self = shift; |
4170
|
|
|
|
|
|
|
|
4171
|
17
|
100
|
|
|
|
73
|
if ( $tok eq 'x' ) { |
4172
|
15
|
50
|
|
|
|
74
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { # x= |
4173
|
0
|
|
|
|
|
0
|
$tok = 'x='; |
4174
|
0
|
|
|
|
|
0
|
$type = $tok; |
4175
|
0
|
|
|
|
|
0
|
$i++; |
4176
|
|
|
|
|
|
|
} |
4177
|
|
|
|
|
|
|
else { |
4178
|
15
|
|
|
|
|
44
|
$type = 'x'; |
4179
|
|
|
|
|
|
|
} |
4180
|
|
|
|
|
|
|
} |
4181
|
|
|
|
|
|
|
else { |
4182
|
|
|
|
|
|
|
|
4183
|
|
|
|
|
|
|
# Split a pretoken like 'x10' into 'x' and '10'. |
4184
|
|
|
|
|
|
|
# Note: In previous versions of perltidy it was marked |
4185
|
|
|
|
|
|
|
# as a number, $type = 'n', and fixed downstream by the |
4186
|
|
|
|
|
|
|
# Formatter. |
4187
|
2
|
|
|
|
|
5
|
$type = 'n'; |
4188
|
2
|
50
|
|
|
|
9
|
if ( $self->split_pretoken(1) ) { |
4189
|
2
|
|
|
|
|
10
|
$type = 'x'; |
4190
|
2
|
|
|
|
|
16
|
$tok = 'x'; |
4191
|
|
|
|
|
|
|
} |
4192
|
|
|
|
|
|
|
} |
4193
|
17
|
|
|
|
|
33
|
return; |
4194
|
|
|
|
|
|
|
} ## end sub do_X_OPERATOR |
4195
|
|
|
|
|
|
|
|
4196
|
|
|
|
|
|
|
sub do_USE_CONSTANT { |
4197
|
|
|
|
|
|
|
|
4198
|
16
|
|
|
16
|
0
|
42
|
my $self = shift; |
4199
|
|
|
|
|
|
|
|
4200
|
16
|
|
|
|
|
62
|
$self->scan_bare_identifier(); |
4201
|
16
|
|
|
|
|
111
|
my ( $next_nonblank_tok2, $i_next2 ) = |
4202
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
4203
|
|
|
|
|
|
|
|
4204
|
16
|
50
|
|
|
|
111
|
if ($next_nonblank_tok2) { |
4205
|
|
|
|
|
|
|
|
4206
|
16
|
100
|
|
|
|
91
|
if ( $is_keyword{$next_nonblank_tok2} ) { |
4207
|
|
|
|
|
|
|
|
4208
|
|
|
|
|
|
|
# Assume qw is used as a quote and okay, as in: |
4209
|
|
|
|
|
|
|
# use constant qw{ DEBUG 0 }; |
4210
|
|
|
|
|
|
|
# Not worth trying to parse for just a warning |
4211
|
|
|
|
|
|
|
|
4212
|
|
|
|
|
|
|
# NOTE: This warning is deactivated because recent |
4213
|
|
|
|
|
|
|
# versions of perl do not complain here, but |
4214
|
|
|
|
|
|
|
# the coding is retained for reference. |
4215
|
1
|
|
|
|
|
2
|
if ( 0 && $next_nonblank_tok2 ne 'qw' ) { |
4216
|
|
|
|
|
|
|
$self->warning( |
4217
|
|
|
|
|
|
|
"Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n" |
4218
|
|
|
|
|
|
|
); |
4219
|
|
|
|
|
|
|
} |
4220
|
|
|
|
|
|
|
} |
4221
|
|
|
|
|
|
|
|
4222
|
|
|
|
|
|
|
else { |
4223
|
15
|
|
|
|
|
61
|
$ris_constant->{$current_package}{$next_nonblank_tok2} = 1; |
4224
|
|
|
|
|
|
|
} |
4225
|
|
|
|
|
|
|
} |
4226
|
16
|
|
|
|
|
41
|
return; |
4227
|
|
|
|
|
|
|
} ## end sub do_USE_CONSTANT |
4228
|
|
|
|
|
|
|
|
4229
|
|
|
|
|
|
|
sub do_KEYWORD { |
4230
|
|
|
|
|
|
|
|
4231
|
2644
|
|
|
2644
|
0
|
4465
|
my $self = shift; |
4232
|
|
|
|
|
|
|
|
4233
|
|
|
|
|
|
|
# found a keyword - set any associated flags |
4234
|
2644
|
|
|
|
|
4563
|
$type = 'k'; |
4235
|
|
|
|
|
|
|
|
4236
|
|
|
|
|
|
|
# Since for and foreach may not be followed immediately |
4237
|
|
|
|
|
|
|
# by an opening paren, we have to remember which keyword |
4238
|
|
|
|
|
|
|
# is associated with the next '(' |
4239
|
|
|
|
|
|
|
# Previously, before update c230 : if ( $is_for_foreach{$tok} ) { |
4240
|
|
|
|
|
|
|
##(if elsif unless while until for foreach switch case given when catch) |
4241
|
2644
|
100
|
|
|
|
7584
|
if ( $is_blocktype_with_paren{$tok} ) { |
4242
|
396
|
100
|
|
|
|
1488
|
if ( new_statement_ok() ) { |
4243
|
309
|
|
|
|
|
721
|
$want_paren = $tok; |
4244
|
|
|
|
|
|
|
} |
4245
|
|
|
|
|
|
|
} |
4246
|
|
|
|
|
|
|
|
4247
|
|
|
|
|
|
|
# recognize 'use' statements, which are special |
4248
|
2644
|
100
|
100
|
|
|
19373
|
if ( $is_use_require{$tok} ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
4249
|
175
|
|
|
|
|
359
|
$statement_type = $tok; |
4250
|
175
|
50
|
|
|
|
511
|
$self->error_if_expecting_OPERATOR() |
4251
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4252
|
|
|
|
|
|
|
} |
4253
|
|
|
|
|
|
|
|
4254
|
|
|
|
|
|
|
# remember my and our to check for trailing ": shared" |
4255
|
|
|
|
|
|
|
elsif ( $is_my_our_state{$tok} ) { |
4256
|
629
|
|
|
|
|
1358
|
$statement_type = $tok; |
4257
|
|
|
|
|
|
|
} |
4258
|
|
|
|
|
|
|
|
4259
|
|
|
|
|
|
|
# Check for misplaced 'elsif' and 'else', but allow isolated |
4260
|
|
|
|
|
|
|
# else or elsif blocks to be formatted. This is indicated |
4261
|
|
|
|
|
|
|
# by a last noblank token of ';' |
4262
|
|
|
|
|
|
|
elsif ( $tok eq 'elsif' ) { |
4263
|
29
|
50
|
66
|
|
|
221
|
if ( |
4264
|
|
|
|
|
|
|
$last_nonblank_token ne ';' |
4265
|
|
|
|
|
|
|
|
4266
|
|
|
|
|
|
|
## !~ /^(if|elsif|unless)$/ |
4267
|
|
|
|
|
|
|
&& !$is_if_elsif_unless{$last_nonblank_block_type} |
4268
|
|
|
|
|
|
|
) |
4269
|
|
|
|
|
|
|
{ |
4270
|
0
|
|
|
|
|
0
|
$self->warning( |
4271
|
|
|
|
|
|
|
"expecting '$tok' to follow one of 'if|elsif|unless'\n"); |
4272
|
|
|
|
|
|
|
} |
4273
|
|
|
|
|
|
|
} |
4274
|
|
|
|
|
|
|
elsif ( $tok eq 'else' ) { |
4275
|
|
|
|
|
|
|
|
4276
|
|
|
|
|
|
|
# patched for SWITCH/CASE |
4277
|
44
|
50
|
66
|
|
|
598
|
if ( |
|
|
|
66
|
|
|
|
|
4278
|
|
|
|
|
|
|
$last_nonblank_token ne ';' |
4279
|
|
|
|
|
|
|
|
4280
|
|
|
|
|
|
|
## !~ /^(if|elsif|unless|case|when)$/ |
4281
|
|
|
|
|
|
|
&& !$is_if_elsif_unless_case_when{$last_nonblank_block_type} |
4282
|
|
|
|
|
|
|
|
4283
|
|
|
|
|
|
|
# patch to avoid an unwanted error message for |
4284
|
|
|
|
|
|
|
# the case of a parenless 'case' (RT 105484): |
4285
|
|
|
|
|
|
|
# switch ( 1 ) { case x { 2 } else { } } |
4286
|
|
|
|
|
|
|
## !~ /^(if|elsif|unless|case|when)$/ |
4287
|
|
|
|
|
|
|
&& !$is_if_elsif_unless_case_when{$statement_type} |
4288
|
|
|
|
|
|
|
) |
4289
|
|
|
|
|
|
|
{ |
4290
|
0
|
|
|
|
|
0
|
$self->warning( |
4291
|
|
|
|
|
|
|
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" |
4292
|
|
|
|
|
|
|
); |
4293
|
|
|
|
|
|
|
} |
4294
|
|
|
|
|
|
|
} |
4295
|
|
|
|
|
|
|
|
4296
|
|
|
|
|
|
|
# patch for SWITCH/CASE if 'case' and 'when are |
4297
|
|
|
|
|
|
|
# treated as keywords. Also 'default' for Switch::Plain |
4298
|
|
|
|
|
|
|
elsif ($tok eq 'when' |
4299
|
|
|
|
|
|
|
|| $tok eq 'case' |
4300
|
|
|
|
|
|
|
|| $tok eq 'default' ) |
4301
|
|
|
|
|
|
|
{ |
4302
|
56
|
|
|
|
|
111
|
$statement_type = $tok; # next '{' is block |
4303
|
|
|
|
|
|
|
} |
4304
|
|
|
|
|
|
|
|
4305
|
|
|
|
|
|
|
# feature 'err' was removed in Perl 5.10. So mark this as |
4306
|
|
|
|
|
|
|
# a bareword unless an operator is expected (see c158). |
4307
|
|
|
|
|
|
|
elsif ( $tok eq 'err' ) { |
4308
|
1
|
50
|
|
|
|
5
|
if ( $expecting != OPERATOR ) { $type = 'w' } |
|
1
|
|
|
|
|
3
|
|
4309
|
|
|
|
|
|
|
} |
4310
|
|
|
|
|
|
|
else { |
4311
|
|
|
|
|
|
|
## no special treatment needed |
4312
|
|
|
|
|
|
|
} |
4313
|
|
|
|
|
|
|
|
4314
|
2644
|
|
|
|
|
4709
|
return; |
4315
|
|
|
|
|
|
|
} ## end sub do_KEYWORD |
4316
|
|
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
sub do_QUOTE_OPERATOR { |
4318
|
|
|
|
|
|
|
|
4319
|
202
|
|
|
202
|
0
|
434
|
my $self = shift; |
4320
|
|
|
|
|
|
|
|
4321
|
202
|
50
|
|
|
|
674
|
if ( $expecting == OPERATOR ) { |
4322
|
|
|
|
|
|
|
|
4323
|
|
|
|
|
|
|
# Be careful not to call an error for a qw quote |
4324
|
|
|
|
|
|
|
# where a parenthesized list is allowed. For example, |
4325
|
|
|
|
|
|
|
# it could also be a for/foreach construct such as |
4326
|
|
|
|
|
|
|
# |
4327
|
|
|
|
|
|
|
# foreach my $key qw\Uno Due Tres Quadro\ { |
4328
|
|
|
|
|
|
|
# print "Set $key\n"; |
4329
|
|
|
|
|
|
|
# } |
4330
|
|
|
|
|
|
|
# |
4331
|
|
|
|
|
|
|
|
4332
|
|
|
|
|
|
|
# Or it could be a function call. |
4333
|
|
|
|
|
|
|
# NOTE: Braces in something like &{ xxx } are not |
4334
|
|
|
|
|
|
|
# marked as a block, we might have a method call. |
4335
|
|
|
|
|
|
|
# &method(...), $method->(..), &{method}(...), |
4336
|
|
|
|
|
|
|
# $ref[2](list) is ok & short for $ref[2]->(list) |
4337
|
|
|
|
|
|
|
# |
4338
|
|
|
|
|
|
|
# See notes in 'sub code_block_type' and |
4339
|
|
|
|
|
|
|
# 'sub is_non_structural_brace' |
4340
|
|
|
|
|
|
|
|
4341
|
|
|
|
|
|
|
my $paren_list_possible = $tok eq 'qw' |
4342
|
|
|
|
|
|
|
&& ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ |
4343
|
0
|
|
0
|
|
|
0
|
|| $is_for_foreach{$want_paren} ); |
4344
|
|
|
|
|
|
|
|
4345
|
0
|
0
|
|
|
|
0
|
if ( !$paren_list_possible ) { |
4346
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR(); |
4347
|
|
|
|
|
|
|
} |
4348
|
|
|
|
|
|
|
} |
4349
|
202
|
|
|
|
|
528
|
$in_quote = $quote_items{$tok}; |
4350
|
202
|
|
|
|
|
537
|
$allowed_quote_modifiers = $quote_modifiers{$tok}; |
4351
|
|
|
|
|
|
|
|
4352
|
|
|
|
|
|
|
# All quote types are 'Q' except possibly qw quotes. |
4353
|
|
|
|
|
|
|
# qw quotes are special in that they may generally be trimmed |
4354
|
|
|
|
|
|
|
# of leading and trailing whitespace. So they are given a |
4355
|
|
|
|
|
|
|
# separate type, 'q', unless requested otherwise. |
4356
|
202
|
100
|
66
|
|
|
982
|
$type = |
4357
|
|
|
|
|
|
|
( $tok eq 'qw' && $rOpts_trim_qw ) |
4358
|
|
|
|
|
|
|
? 'q' |
4359
|
|
|
|
|
|
|
: 'Q'; |
4360
|
202
|
|
|
|
|
404
|
$quote_type = $type; |
4361
|
202
|
|
|
|
|
394
|
return; |
4362
|
|
|
|
|
|
|
} ## end sub do_QUOTE_OPERATOR |
4363
|
|
|
|
|
|
|
|
4364
|
|
|
|
|
|
|
sub do_UNKNOWN_BAREWORD { |
4365
|
|
|
|
|
|
|
|
4366
|
958
|
|
|
958
|
0
|
2322
|
my ( $self, $next_nonblank_token ) = @_; |
4367
|
|
|
|
|
|
|
|
4368
|
958
|
|
|
|
|
3298
|
$self->scan_bare_identifier(); |
4369
|
|
|
|
|
|
|
|
4370
|
958
|
100
|
100
|
|
|
3608
|
if ( $statement_type eq 'use' |
4371
|
|
|
|
|
|
|
&& $last_nonblank_token eq 'use' ) |
4372
|
|
|
|
|
|
|
{ |
4373
|
108
|
|
|
|
|
418
|
$rsaw_use_module->{$current_package}->{$tok} = 1; |
4374
|
|
|
|
|
|
|
} |
4375
|
|
|
|
|
|
|
|
4376
|
958
|
100
|
|
|
|
2590
|
if ( $type eq 'w' ) { |
4377
|
|
|
|
|
|
|
|
4378
|
933
|
50
|
|
|
|
2499
|
if ( $expecting == OPERATOR ) { |
4379
|
|
|
|
|
|
|
|
4380
|
|
|
|
|
|
|
# Patch to avoid error message for RPerl overloaded |
4381
|
|
|
|
|
|
|
# operator functions: use overload |
4382
|
|
|
|
|
|
|
# '+' => \&sse_add, |
4383
|
|
|
|
|
|
|
# '-' => \&sse_sub, |
4384
|
|
|
|
|
|
|
# '*' => \&sse_mul, |
4385
|
|
|
|
|
|
|
# '/' => \&sse_div; |
4386
|
|
|
|
|
|
|
# TODO: this could eventually be generalized |
4387
|
0
|
0
|
0
|
|
|
0
|
if ( $rsaw_use_module->{$current_package}->{'RPerl'} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4388
|
|
|
|
|
|
|
&& $tok =~ /^sse_(mul|div|add|sub)$/ ) |
4389
|
|
|
|
|
|
|
{ |
4390
|
|
|
|
|
|
|
|
4391
|
|
|
|
|
|
|
} |
4392
|
|
|
|
|
|
|
|
4393
|
|
|
|
|
|
|
# Fix part 1 for git #63 in which a comment falls |
4394
|
|
|
|
|
|
|
# between an -> and the following word. An |
4395
|
|
|
|
|
|
|
# alternate fix would be to change operator_expected |
4396
|
|
|
|
|
|
|
# to return an UNKNOWN for this type. |
4397
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq '->' ) { |
4398
|
|
|
|
|
|
|
|
4399
|
|
|
|
|
|
|
} |
4400
|
|
|
|
|
|
|
|
4401
|
|
|
|
|
|
|
# don't complain about possible indirect object |
4402
|
|
|
|
|
|
|
# notation. |
4403
|
|
|
|
|
|
|
# For example: |
4404
|
|
|
|
|
|
|
# package main; |
4405
|
|
|
|
|
|
|
# sub new($) { ... } |
4406
|
|
|
|
|
|
|
# $b = new A::; # calls A::new |
4407
|
|
|
|
|
|
|
# $c = new A; # same thing but suspicious |
4408
|
|
|
|
|
|
|
# This will call A::new but we have a 'new' in |
4409
|
|
|
|
|
|
|
# main:: which looks like a constant. |
4410
|
|
|
|
|
|
|
# |
4411
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'C' ) { |
4412
|
0
|
0
|
|
|
|
0
|
if ( $tok !~ /::$/ ) { |
4413
|
0
|
|
|
|
|
0
|
$self->complain(<<EOM); |
4414
|
|
|
|
|
|
|
Expecting operator after '$last_nonblank_token' but found bare word '$tok' |
4415
|
|
|
|
|
|
|
Maybe indirectet object notation? |
4416
|
|
|
|
|
|
|
EOM |
4417
|
|
|
|
|
|
|
} |
4418
|
|
|
|
|
|
|
} |
4419
|
|
|
|
|
|
|
else { |
4420
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR("bareword"); |
4421
|
|
|
|
|
|
|
} |
4422
|
|
|
|
|
|
|
} |
4423
|
|
|
|
|
|
|
|
4424
|
|
|
|
|
|
|
# mark bare words immediately followed by a paren as |
4425
|
|
|
|
|
|
|
# functions |
4426
|
933
|
|
|
|
|
2502
|
$next_tok = $rtokens->[ $i + 1 ]; |
4427
|
933
|
100
|
|
|
|
2648
|
if ( $next_tok eq '(' ) { |
4428
|
|
|
|
|
|
|
|
4429
|
|
|
|
|
|
|
# Patch for issue c151, where we are processing a snippet and |
4430
|
|
|
|
|
|
|
# have not seen that SPACE is a constant. In this case 'x' is |
4431
|
|
|
|
|
|
|
# probably an operator. The only disadvantage with an incorrect |
4432
|
|
|
|
|
|
|
# guess is that the space after it may be incorrect. For example |
4433
|
|
|
|
|
|
|
# $str .= SPACE x ( 16 - length($str) ); See also b1410. |
4434
|
276
|
50
|
33
|
|
|
1510
|
if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' } |
|
0
|
50
|
|
|
|
0
|
|
4435
|
|
|
|
|
|
|
|
4436
|
|
|
|
|
|
|
# Fix part 2 for git #63. Leave type as 'w' to keep |
4437
|
|
|
|
|
|
|
# the type the same as if the -> were not separated |
4438
|
276
|
|
|
|
|
578
|
elsif ( $last_nonblank_type ne '->' ) { $type = 'U' } |
4439
|
|
|
|
|
|
|
|
4440
|
|
|
|
|
|
|
# not a special case |
4441
|
|
|
|
|
|
|
else { } |
4442
|
|
|
|
|
|
|
|
4443
|
|
|
|
|
|
|
} |
4444
|
|
|
|
|
|
|
|
4445
|
|
|
|
|
|
|
# underscore after file test operator is file handle |
4446
|
933
|
50
|
66
|
|
|
3254
|
if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { |
4447
|
0
|
|
|
|
|
0
|
$type = 'Z'; |
4448
|
|
|
|
|
|
|
} |
4449
|
|
|
|
|
|
|
|
4450
|
|
|
|
|
|
|
# patch for SWITCH/CASE if 'case' and 'when are |
4451
|
|
|
|
|
|
|
# not treated as keywords: |
4452
|
933
|
50
|
33
|
|
|
4873
|
if ( |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
4453
|
|
|
|
|
|
|
( $tok eq 'case' && $rbrace_type->[$brace_depth] eq 'switch' ) |
4454
|
|
|
|
|
|
|
|| ( $tok eq 'when' |
4455
|
|
|
|
|
|
|
&& $rbrace_type->[$brace_depth] eq 'given' ) |
4456
|
|
|
|
|
|
|
) |
4457
|
|
|
|
|
|
|
{ |
4458
|
0
|
|
|
|
|
0
|
$statement_type = $tok; # next '{' is block |
4459
|
0
|
|
|
|
|
0
|
$type = 'k'; # for keyword syntax coloring |
4460
|
|
|
|
|
|
|
} |
4461
|
933
|
100
|
|
|
|
2772
|
if ( $next_nonblank_token eq '(' ) { |
4462
|
|
|
|
|
|
|
|
4463
|
|
|
|
|
|
|
# patch for SWITCH/CASE if switch and given not keywords |
4464
|
|
|
|
|
|
|
# Switch is not a perl 5 keyword, but we will gamble |
4465
|
|
|
|
|
|
|
# and mark switch followed by paren as a keyword. This |
4466
|
|
|
|
|
|
|
# is only necessary to get html syntax coloring nice, |
4467
|
|
|
|
|
|
|
# and does not commit this as being a switch/case. |
4468
|
241
|
50
|
33
|
|
|
1981
|
if ( $tok eq 'switch' || $tok eq 'given' ) { |
|
|
50
|
33
|
|
|
|
|
4469
|
0
|
|
|
|
|
0
|
$type = 'k'; # for keyword syntax coloring |
4470
|
|
|
|
|
|
|
} |
4471
|
|
|
|
|
|
|
|
4472
|
|
|
|
|
|
|
# mark 'x' as operator for something like this (see b1410) |
4473
|
|
|
|
|
|
|
# my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths ); |
4474
|
|
|
|
|
|
|
elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { |
4475
|
0
|
|
|
|
|
0
|
$type = 'x'; |
4476
|
|
|
|
|
|
|
} |
4477
|
|
|
|
|
|
|
else { |
4478
|
|
|
|
|
|
|
## not a special case |
4479
|
|
|
|
|
|
|
} |
4480
|
|
|
|
|
|
|
} |
4481
|
|
|
|
|
|
|
} |
4482
|
958
|
|
|
|
|
1823
|
return; |
4483
|
|
|
|
|
|
|
} ## end sub do_UNKNOWN_BAREWORD |
4484
|
|
|
|
|
|
|
|
4485
|
|
|
|
|
|
|
sub sub_attribute_ok_here { |
4486
|
|
|
|
|
|
|
|
4487
|
35
|
|
|
35
|
0
|
156
|
my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_; |
4488
|
|
|
|
|
|
|
|
4489
|
|
|
|
|
|
|
# Decide if 'sub :' can be the start of a sub attribute list. |
4490
|
|
|
|
|
|
|
# We will decide based on if the colon is followed by a |
4491
|
|
|
|
|
|
|
# bareword which is not a keyword. |
4492
|
|
|
|
|
|
|
# Changed inext+1 to inext to fixed case b1190. |
4493
|
35
|
|
|
|
|
69
|
my $sub_attribute_ok_here; |
4494
|
35
|
50
|
66
|
|
|
171
|
if ( $is_sub{$tok_kw} |
|
|
|
66
|
|
|
|
|
4495
|
|
|
|
|
|
|
&& $expecting != OPERATOR |
4496
|
|
|
|
|
|
|
&& $next_nonblank_token eq ':' ) |
4497
|
|
|
|
|
|
|
{ |
4498
|
3
|
|
|
|
|
11
|
my ( $nn_nonblank_token, $i_nn ) = |
4499
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i_next, $rtokens, |
4500
|
|
|
|
|
|
|
$max_token_index ); |
4501
|
|
|
|
|
|
|
$sub_attribute_ok_here = |
4502
|
|
|
|
|
|
|
$nn_nonblank_token =~ /^\w/ |
4503
|
|
|
|
|
|
|
&& $nn_nonblank_token !~ /^\d/ |
4504
|
3
|
|
66
|
|
|
77
|
&& !$is_keyword{$nn_nonblank_token}; |
4505
|
|
|
|
|
|
|
} |
4506
|
35
|
|
|
|
|
218
|
return $sub_attribute_ok_here; |
4507
|
|
|
|
|
|
|
} ## end sub sub_attribute_ok_here |
4508
|
|
|
|
|
|
|
|
4509
|
|
|
|
|
|
|
sub do_BAREWORD { |
4510
|
|
|
|
|
|
|
|
4511
|
5838
|
|
|
5838
|
0
|
10876
|
my ($self) = @_; |
4512
|
|
|
|
|
|
|
|
4513
|
|
|
|
|
|
|
# handle a bareword token: |
4514
|
|
|
|
|
|
|
# returns |
4515
|
|
|
|
|
|
|
# true if this token ends the current line |
4516
|
|
|
|
|
|
|
# false otherwise |
4517
|
|
|
|
|
|
|
|
4518
|
5838
|
|
|
|
|
16564
|
my ( $next_nonblank_token, $i_next ) = |
4519
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
4520
|
|
|
|
|
|
|
|
4521
|
|
|
|
|
|
|
# a bare word immediately followed by :: is not a keyword; |
4522
|
|
|
|
|
|
|
# use $tok_kw when testing for keywords to avoid a mistake |
4523
|
5838
|
|
|
|
|
10977
|
my $tok_kw = $tok; |
4524
|
5838
|
100
|
100
|
|
|
17244
|
if ( $rtokens->[ $i + 1 ] eq ':' |
4525
|
|
|
|
|
|
|
&& $rtokens->[ $i + 2 ] eq ':' ) |
4526
|
|
|
|
|
|
|
{ |
4527
|
266
|
|
|
|
|
591
|
$tok_kw .= '::'; |
4528
|
|
|
|
|
|
|
} |
4529
|
|
|
|
|
|
|
|
4530
|
5838
|
100
|
|
|
|
12963
|
if ( $self->[_in_attribute_list_] ) { |
4531
|
39
|
|
|
|
|
221
|
my $is_attribute = $self->do_ATTRIBUTE_LIST($next_nonblank_token); |
4532
|
39
|
50
|
|
|
|
131
|
return if ($is_attribute); |
4533
|
|
|
|
|
|
|
} |
4534
|
|
|
|
|
|
|
|
4535
|
|
|
|
|
|
|
#---------------------------------------- |
4536
|
|
|
|
|
|
|
# Starting final if-elsif- chain of tests |
4537
|
|
|
|
|
|
|
#---------------------------------------- |
4538
|
|
|
|
|
|
|
|
4539
|
|
|
|
|
|
|
# This is the return flag: |
4540
|
|
|
|
|
|
|
# true => this is the last token on the line |
4541
|
|
|
|
|
|
|
# false => keep tokenizing the line |
4542
|
5799
|
|
|
|
|
8544
|
my $is_last; |
4543
|
|
|
|
|
|
|
|
4544
|
|
|
|
|
|
|
# The following blocks of code must update these vars: |
4545
|
|
|
|
|
|
|
# $type - the final token type, must always be set |
4546
|
|
|
|
|
|
|
|
4547
|
|
|
|
|
|
|
# In addition, if additional pretokens are added: |
4548
|
|
|
|
|
|
|
# $tok - the final token |
4549
|
|
|
|
|
|
|
# $i - the index of the last pretoken |
4550
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
# They may also need to check and set various flags |
4552
|
|
|
|
|
|
|
|
4553
|
|
|
|
|
|
|
# Scan a bare word following a -> as an identifier; it could |
4554
|
|
|
|
|
|
|
# have a long package name. Fixes c037, c041. |
4555
|
5799
|
100
|
100
|
|
|
92091
|
if ( $last_nonblank_token eq '->' ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
50
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
4556
|
670
|
|
|
|
|
2432
|
$self->scan_bare_identifier(); |
4557
|
|
|
|
|
|
|
|
4558
|
|
|
|
|
|
|
# a bareward after '->' gets type 'i' |
4559
|
670
|
|
|
|
|
1245
|
$type = 'i'; |
4560
|
|
|
|
|
|
|
} |
4561
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
# Quote a word followed by => operator |
4563
|
|
|
|
|
|
|
elsif ( |
4564
|
|
|
|
|
|
|
( $next_nonblank_token eq '=' && $rtokens->[ $i_next + 1 ] eq '>' ) |
4565
|
|
|
|
|
|
|
|
4566
|
|
|
|
|
|
|
# unless the word is __END__ or __DATA__ and is the only word on |
4567
|
|
|
|
|
|
|
# the line. |
4568
|
|
|
|
|
|
|
&& ( !defined( $is_END_DATA{$tok_kw} ) |
4569
|
|
|
|
|
|
|
|| $input_line !~ /^\s*__(?:END|DATA)__\s*$/ ) |
4570
|
|
|
|
|
|
|
) |
4571
|
|
|
|
|
|
|
{ |
4572
|
786
|
|
|
|
|
2431
|
$self->do_QUOTED_BAREWORD(); |
4573
|
|
|
|
|
|
|
} |
4574
|
|
|
|
|
|
|
|
4575
|
|
|
|
|
|
|
# quote a bare word within braces..like xxx->{s}; note that we |
4576
|
|
|
|
|
|
|
# must be sure this is not a structural brace, to avoid |
4577
|
|
|
|
|
|
|
# mistaking {s} in the following for a quoted bare word: |
4578
|
|
|
|
|
|
|
# for(@[){s}bla}BLA} |
4579
|
|
|
|
|
|
|
# Also treat q in something like var{-q} as a bare word, not |
4580
|
|
|
|
|
|
|
# a quote operator |
4581
|
|
|
|
|
|
|
elsif ( |
4582
|
|
|
|
|
|
|
$next_nonblank_token eq '}' |
4583
|
|
|
|
|
|
|
&& ( |
4584
|
|
|
|
|
|
|
$last_nonblank_type eq 'L' |
4585
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'm' |
4586
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'L' ) |
4587
|
|
|
|
|
|
|
) |
4588
|
|
|
|
|
|
|
) |
4589
|
|
|
|
|
|
|
{ |
4590
|
100
|
|
|
|
|
259
|
$type = 'w'; |
4591
|
|
|
|
|
|
|
} |
4592
|
|
|
|
|
|
|
|
4593
|
|
|
|
|
|
|
# handle operator x (now we know it isn't $x=) |
4594
|
|
|
|
|
|
|
elsif ( |
4595
|
|
|
|
|
|
|
$expecting == OPERATOR |
4596
|
|
|
|
|
|
|
&& substr( $tok, 0, 1 ) eq 'x' |
4597
|
|
|
|
|
|
|
&& ( length($tok) == 1 |
4598
|
|
|
|
|
|
|
|| substr( $tok, 1, 1 ) =~ /^\d/ ) |
4599
|
|
|
|
|
|
|
) |
4600
|
|
|
|
|
|
|
{ |
4601
|
17
|
|
|
|
|
95
|
$self->do_X_OPERATOR(); |
4602
|
|
|
|
|
|
|
} |
4603
|
|
|
|
|
|
|
elsif ( $tok_kw eq 'CORE::' ) { |
4604
|
3
|
|
|
|
|
6
|
$type = $tok = $tok_kw; |
4605
|
3
|
|
|
|
|
6
|
$i += 2; |
4606
|
|
|
|
|
|
|
} |
4607
|
|
|
|
|
|
|
elsif ( ( $tok eq 'strict' ) |
4608
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
4609
|
|
|
|
|
|
|
{ |
4610
|
14
|
|
|
|
|
51
|
$self->[_saw_use_strict_] = 1; |
4611
|
14
|
|
|
|
|
96
|
$self->scan_bare_identifier(); |
4612
|
|
|
|
|
|
|
} |
4613
|
|
|
|
|
|
|
|
4614
|
|
|
|
|
|
|
elsif ( ( $tok eq 'warnings' ) |
4615
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
4616
|
|
|
|
|
|
|
{ |
4617
|
7
|
|
|
|
|
28
|
$self->[_saw_perl_dash_w_] = 1; |
4618
|
|
|
|
|
|
|
|
4619
|
|
|
|
|
|
|
# scan as identifier, so that we pick up something like: |
4620
|
|
|
|
|
|
|
# use warnings::register |
4621
|
7
|
|
|
|
|
48
|
$self->scan_bare_identifier(); |
4622
|
|
|
|
|
|
|
} |
4623
|
|
|
|
|
|
|
|
4624
|
|
|
|
|
|
|
elsif ( |
4625
|
|
|
|
|
|
|
$tok eq 'AutoLoader' |
4626
|
|
|
|
|
|
|
&& $self->[_look_for_autoloader_] |
4627
|
|
|
|
|
|
|
&& ( |
4628
|
|
|
|
|
|
|
$last_nonblank_token eq 'use' |
4629
|
|
|
|
|
|
|
|
4630
|
|
|
|
|
|
|
# these regexes are from AutoSplit.pm, which we want |
4631
|
|
|
|
|
|
|
# to mimic |
4632
|
|
|
|
|
|
|
|| $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ |
4633
|
|
|
|
|
|
|
|| $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ |
4634
|
|
|
|
|
|
|
) |
4635
|
|
|
|
|
|
|
) |
4636
|
|
|
|
|
|
|
{ |
4637
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); |
4638
|
0
|
|
|
|
|
0
|
$self->[_saw_autoloader_] = 1; |
4639
|
0
|
|
|
|
|
0
|
$self->[_look_for_autoloader_] = 0; |
4640
|
0
|
|
|
|
|
0
|
$self->scan_bare_identifier(); |
4641
|
|
|
|
|
|
|
} |
4642
|
|
|
|
|
|
|
|
4643
|
|
|
|
|
|
|
elsif ( |
4644
|
|
|
|
|
|
|
$tok eq 'SelfLoader' |
4645
|
|
|
|
|
|
|
&& $self->[_look_for_selfloader_] |
4646
|
|
|
|
|
|
|
&& ( $last_nonblank_token eq 'use' |
4647
|
|
|
|
|
|
|
|| $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ |
4648
|
|
|
|
|
|
|
|| $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) |
4649
|
|
|
|
|
|
|
) |
4650
|
|
|
|
|
|
|
{ |
4651
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); |
4652
|
0
|
|
|
|
|
0
|
$self->[_saw_selfloader_] = 1; |
4653
|
0
|
|
|
|
|
0
|
$self->[_look_for_selfloader_] = 0; |
4654
|
0
|
|
|
|
|
0
|
$self->scan_bare_identifier(); |
4655
|
|
|
|
|
|
|
} |
4656
|
|
|
|
|
|
|
|
4657
|
|
|
|
|
|
|
elsif ( ( $tok eq 'constant' ) |
4658
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
4659
|
|
|
|
|
|
|
{ |
4660
|
16
|
|
|
|
|
82
|
$self->do_USE_CONSTANT(); |
4661
|
|
|
|
|
|
|
} |
4662
|
|
|
|
|
|
|
|
4663
|
|
|
|
|
|
|
# various quote operators |
4664
|
|
|
|
|
|
|
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { |
4665
|
202
|
|
|
|
|
840
|
$self->do_QUOTE_OPERATOR(); |
4666
|
|
|
|
|
|
|
} |
4667
|
|
|
|
|
|
|
|
4668
|
|
|
|
|
|
|
# check for a statement label |
4669
|
|
|
|
|
|
|
elsif ( |
4670
|
|
|
|
|
|
|
( $next_nonblank_token eq ':' ) |
4671
|
|
|
|
|
|
|
&& ( $rtokens->[ $i_next + 1 ] ne ':' ) |
4672
|
|
|
|
|
|
|
&& ( $i_next <= $max_token_index ) # colon on same line |
4673
|
|
|
|
|
|
|
|
4674
|
|
|
|
|
|
|
# like 'sub : lvalue' ? |
4675
|
|
|
|
|
|
|
&& !$self->sub_attribute_ok_here( $tok_kw, $next_nonblank_token, |
4676
|
|
|
|
|
|
|
$i_next ) |
4677
|
|
|
|
|
|
|
&& new_statement_ok() |
4678
|
|
|
|
|
|
|
) |
4679
|
|
|
|
|
|
|
{ |
4680
|
33
|
100
|
|
|
|
177
|
if ( $tok !~ /[A-Z]/ ) { |
4681
|
15
|
|
|
|
|
36
|
push @{ $self->[_rlower_case_labels_at_] }, $input_line_number; |
|
15
|
|
|
|
|
56
|
|
4682
|
|
|
|
|
|
|
} |
4683
|
33
|
|
|
|
|
96
|
$type = 'J'; |
4684
|
33
|
|
|
|
|
92
|
$tok .= ':'; |
4685
|
33
|
|
|
|
|
87
|
$i = $i_next; |
4686
|
|
|
|
|
|
|
} |
4687
|
|
|
|
|
|
|
|
4688
|
|
|
|
|
|
|
# 'sub' or other sub alias |
4689
|
|
|
|
|
|
|
elsif ( $is_sub{$tok_kw} ) { |
4690
|
|
|
|
|
|
|
|
4691
|
|
|
|
|
|
|
# Update for --use-feature=class (rt145706): |
4692
|
|
|
|
|
|
|
# We have to be extra careful to avoid misparsing other uses of |
4693
|
|
|
|
|
|
|
# 'method' in older scripts. |
4694
|
303
|
100
|
100
|
|
|
1544
|
if ( $tok_kw eq 'method' && $guess_if_method ) { |
4695
|
10
|
100
|
66
|
|
|
113
|
if ( $expecting == OPERATOR |
|
|
|
100
|
|
|
|
|
4696
|
|
|
|
|
|
|
|| $next_nonblank_token !~ /^[\w\:]/ |
4697
|
|
|
|
|
|
|
|| !$self->method_ok_here() ) |
4698
|
|
|
|
|
|
|
{ |
4699
|
7
|
|
|
|
|
67
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
4700
|
|
|
|
|
|
|
} |
4701
|
|
|
|
|
|
|
else { |
4702
|
3
|
|
|
|
|
168
|
initialize_subname(); |
4703
|
3
|
|
|
|
|
18
|
$self->scan_id(); |
4704
|
|
|
|
|
|
|
} |
4705
|
|
|
|
|
|
|
} |
4706
|
|
|
|
|
|
|
else { |
4707
|
293
|
50
|
|
|
|
875
|
$self->error_if_expecting_OPERATOR() |
4708
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4709
|
293
|
|
|
|
|
1116
|
initialize_subname(); |
4710
|
293
|
|
|
|
|
1110
|
$self->scan_id(); |
4711
|
|
|
|
|
|
|
} |
4712
|
|
|
|
|
|
|
} |
4713
|
|
|
|
|
|
|
|
4714
|
|
|
|
|
|
|
# 'package' |
4715
|
|
|
|
|
|
|
elsif ( $is_package{$tok_kw} ) { |
4716
|
|
|
|
|
|
|
|
4717
|
|
|
|
|
|
|
# Update for --use-feature=class (rt145706): |
4718
|
|
|
|
|
|
|
# We have to be extra careful because 'class' may be used for other |
4719
|
|
|
|
|
|
|
# purposes on older code; i.e. |
4720
|
|
|
|
|
|
|
# class($x) - valid sub call |
4721
|
|
|
|
|
|
|
# package($x) - error |
4722
|
30
|
100
|
|
|
|
98
|
if ( $tok_kw eq 'class' ) { |
4723
|
8
|
100
|
66
|
|
|
72
|
if ( $expecting == OPERATOR |
|
|
|
100
|
|
|
|
|
4724
|
|
|
|
|
|
|
|| $next_nonblank_token !~ /^[\w\:]/ |
4725
|
|
|
|
|
|
|
|| !$self->class_ok_here() ) |
4726
|
|
|
|
|
|
|
{ |
4727
|
4
|
|
|
|
|
15
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
4728
|
|
|
|
|
|
|
} |
4729
|
4
|
|
|
|
|
10
|
else { $self->scan_id() } |
4730
|
|
|
|
|
|
|
} |
4731
|
|
|
|
|
|
|
else { |
4732
|
22
|
50
|
|
|
|
373
|
$self->error_if_expecting_OPERATOR() |
4733
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4734
|
22
|
|
|
|
|
94
|
$self->scan_id(); |
4735
|
|
|
|
|
|
|
} |
4736
|
|
|
|
|
|
|
} |
4737
|
|
|
|
|
|
|
|
4738
|
|
|
|
|
|
|
# Fix for c035: split 'format' from 'is_format_END_DATA' to be |
4739
|
|
|
|
|
|
|
# more restrictive. Require a new statement to be ok here. |
4740
|
|
|
|
|
|
|
elsif ( $tok_kw eq 'format' && new_statement_ok() ) { |
4741
|
1
|
|
|
|
|
3
|
$type = ';'; # make tokenizer look for TERM next |
4742
|
1
|
|
|
|
|
3
|
$self->[_in_format_] = 1; |
4743
|
1
|
|
|
|
|
3
|
$is_last = 1; ## is last token on this line |
4744
|
|
|
|
|
|
|
} |
4745
|
|
|
|
|
|
|
|
4746
|
|
|
|
|
|
|
# Note on token types for format, __DATA__, __END__: |
4747
|
|
|
|
|
|
|
# It simplifies things to give these type ';', so that when we |
4748
|
|
|
|
|
|
|
# start rescanning we will be expecting a token of type TERM. |
4749
|
|
|
|
|
|
|
# We will switch to type 'k' before outputting the tokens. |
4750
|
|
|
|
|
|
|
elsif ( defined( $is_END_DATA{$tok_kw} ) ) { |
4751
|
7
|
|
|
|
|
26
|
$type = ';'; # make tokenizer look for TERM next |
4752
|
|
|
|
|
|
|
|
4753
|
|
|
|
|
|
|
# Remember that we are in one of these three sections |
4754
|
7
|
|
|
|
|
25
|
$self->[ $is_END_DATA{$tok_kw} ] = 1; |
4755
|
7
|
|
|
|
|
31
|
$is_last = 1; ## is last token on this line |
4756
|
|
|
|
|
|
|
} |
4757
|
|
|
|
|
|
|
|
4758
|
|
|
|
|
|
|
elsif ( $is_keyword{$tok_kw} ) { |
4759
|
2644
|
|
|
|
|
8114
|
$self->do_KEYWORD(); |
4760
|
|
|
|
|
|
|
} |
4761
|
|
|
|
|
|
|
|
4762
|
|
|
|
|
|
|
# check for inline label following |
4763
|
|
|
|
|
|
|
# /^(redo|last|next|goto)$/ |
4764
|
|
|
|
|
|
|
elsif (( $last_nonblank_type eq 'k' ) |
4765
|
|
|
|
|
|
|
&& ( $is_redo_last_next_goto{$last_nonblank_token} ) ) |
4766
|
|
|
|
|
|
|
{ |
4767
|
19
|
|
|
|
|
47
|
$type = 'j'; |
4768
|
|
|
|
|
|
|
} |
4769
|
|
|
|
|
|
|
|
4770
|
|
|
|
|
|
|
# something else -- |
4771
|
|
|
|
|
|
|
else { |
4772
|
947
|
|
|
|
|
3462
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
4773
|
|
|
|
|
|
|
} |
4774
|
|
|
|
|
|
|
|
4775
|
5799
|
|
|
|
|
13062
|
return $is_last; |
4776
|
|
|
|
|
|
|
|
4777
|
|
|
|
|
|
|
} ## end sub do_BAREWORD |
4778
|
|
|
|
|
|
|
|
4779
|
|
|
|
|
|
|
sub do_FOLLOW_QUOTE { |
4780
|
|
|
|
|
|
|
|
4781
|
2768
|
|
|
2768
|
0
|
4495
|
my $self = shift; |
4782
|
|
|
|
|
|
|
|
4783
|
|
|
|
|
|
|
# Continue following a quote on a new line |
4784
|
2768
|
|
|
|
|
4583
|
$type = $quote_type; |
4785
|
|
|
|
|
|
|
|
4786
|
2768
|
100
|
|
|
|
3909
|
if ( !@{$routput_token_list} ) { # initialize if continuation line |
|
2768
|
|
|
|
|
6721
|
|
4787
|
184
|
|
|
|
|
376
|
push( @{$routput_token_list}, $i ); |
|
184
|
|
|
|
|
406
|
|
4788
|
184
|
|
|
|
|
435
|
$routput_token_type->[$i] = $type; |
4789
|
|
|
|
|
|
|
|
4790
|
|
|
|
|
|
|
} |
4791
|
|
|
|
|
|
|
|
4792
|
|
|
|
|
|
|
# scan for the end of the quote or pattern |
4793
|
|
|
|
|
|
|
( |
4794
|
2768
|
|
|
|
|
8584
|
$i, |
4795
|
|
|
|
|
|
|
$in_quote, |
4796
|
|
|
|
|
|
|
$quote_character, |
4797
|
|
|
|
|
|
|
$quote_pos, |
4798
|
|
|
|
|
|
|
$quote_depth, |
4799
|
|
|
|
|
|
|
$quoted_string_1, |
4800
|
|
|
|
|
|
|
$quoted_string_2, |
4801
|
|
|
|
|
|
|
|
4802
|
|
|
|
|
|
|
) = $self->do_quote( |
4803
|
|
|
|
|
|
|
|
4804
|
|
|
|
|
|
|
$i, |
4805
|
|
|
|
|
|
|
$in_quote, |
4806
|
|
|
|
|
|
|
$quote_character, |
4807
|
|
|
|
|
|
|
$quote_pos, |
4808
|
|
|
|
|
|
|
$quote_depth, |
4809
|
|
|
|
|
|
|
$quoted_string_1, |
4810
|
|
|
|
|
|
|
$quoted_string_2, |
4811
|
|
|
|
|
|
|
$rtokens, |
4812
|
|
|
|
|
|
|
$rtoken_map, |
4813
|
|
|
|
|
|
|
$max_token_index, |
4814
|
|
|
|
|
|
|
|
4815
|
|
|
|
|
|
|
); |
4816
|
|
|
|
|
|
|
|
4817
|
|
|
|
|
|
|
# all done if we didn't find it |
4818
|
2768
|
100
|
|
|
|
6871
|
if ($in_quote) { return } |
|
183
|
|
|
|
|
358
|
|
4819
|
|
|
|
|
|
|
|
4820
|
|
|
|
|
|
|
# save pattern and replacement text for rescanning |
4821
|
2585
|
|
|
|
|
4196
|
my $qs1 = $quoted_string_1; |
4822
|
|
|
|
|
|
|
|
4823
|
|
|
|
|
|
|
# re-initialize for next search |
4824
|
2585
|
|
|
|
|
3999
|
$quote_character = EMPTY_STRING; |
4825
|
2585
|
|
|
|
|
3743
|
$quote_pos = 0; |
4826
|
2585
|
|
|
|
|
3947
|
$quote_type = 'Q'; |
4827
|
2585
|
|
|
|
|
3781
|
$quoted_string_1 = EMPTY_STRING; |
4828
|
2585
|
|
|
|
|
3895
|
$quoted_string_2 = EMPTY_STRING; |
4829
|
2585
|
100
|
|
|
|
5710
|
if ( ++$i > $max_token_index ) { return } |
|
116
|
|
|
|
|
345
|
|
4830
|
|
|
|
|
|
|
|
4831
|
|
|
|
|
|
|
# look for any modifiers |
4832
|
2469
|
100
|
|
|
|
5397
|
if ($allowed_quote_modifiers) { |
4833
|
|
|
|
|
|
|
|
4834
|
|
|
|
|
|
|
# check for exact quote modifiers |
4835
|
144
|
100
|
|
|
|
754
|
if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) { |
4836
|
30
|
|
|
|
|
75
|
my $str = $rtokens->[$i]; |
4837
|
30
|
|
|
|
|
71
|
my $saw_modifier_e; |
4838
|
30
|
|
|
|
|
502
|
while ( $str =~ /\G$allowed_quote_modifiers/gc ) { |
4839
|
47
|
|
|
|
|
116
|
my $pos = pos($str); |
4840
|
47
|
|
|
|
|
124
|
my $char = substr( $str, $pos - 1, 1 ); |
4841
|
47
|
|
66
|
|
|
309
|
$saw_modifier_e ||= ( $char eq 'e' ); |
4842
|
|
|
|
|
|
|
} |
4843
|
|
|
|
|
|
|
|
4844
|
|
|
|
|
|
|
# For an 'e' quote modifier we must scan the replacement |
4845
|
|
|
|
|
|
|
# text for here-doc targets... |
4846
|
|
|
|
|
|
|
# but if the modifier starts a new line we can skip |
4847
|
|
|
|
|
|
|
# this because either the here doc will be fully |
4848
|
|
|
|
|
|
|
# contained in the replacement text (so we can |
4849
|
|
|
|
|
|
|
# ignore it) or Perl will not find it. |
4850
|
|
|
|
|
|
|
# See test 'here2.in'. |
4851
|
30
|
50
|
66
|
|
|
155
|
if ( $saw_modifier_e && $i_tok >= 0 ) { |
4852
|
|
|
|
|
|
|
|
4853
|
0
|
|
|
|
|
0
|
my $rht = $self->scan_replacement_text($qs1); |
4854
|
|
|
|
|
|
|
|
4855
|
|
|
|
|
|
|
# Change type from 'Q' to 'h' for quotes with |
4856
|
|
|
|
|
|
|
# here-doc targets so that the formatter (see sub |
4857
|
|
|
|
|
|
|
# process_line_of_CODE) will not make any line |
4858
|
|
|
|
|
|
|
# breaks after this point. |
4859
|
0
|
0
|
|
|
|
0
|
if ($rht) { |
4860
|
0
|
|
|
|
|
0
|
push @{$rhere_target_list}, @{$rht}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
4861
|
0
|
|
|
|
|
0
|
$type = 'h'; |
4862
|
0
|
0
|
|
|
|
0
|
if ( $i_tok < 0 ) { |
4863
|
0
|
|
|
|
|
0
|
my $ilast = $routput_token_list->[-1]; |
4864
|
0
|
|
|
|
|
0
|
$routput_token_type->[$ilast] = $type; |
4865
|
|
|
|
|
|
|
} |
4866
|
|
|
|
|
|
|
} |
4867
|
|
|
|
|
|
|
} |
4868
|
|
|
|
|
|
|
|
4869
|
30
|
50
|
|
|
|
143
|
if ( defined( pos($str) ) ) { |
4870
|
|
|
|
|
|
|
|
4871
|
|
|
|
|
|
|
# matched |
4872
|
30
|
50
|
|
|
|
123
|
if ( pos($str) == length($str) ) { |
4873
|
30
|
50
|
|
|
|
143
|
if ( ++$i > $max_token_index ) { return } |
|
0
|
|
|
|
|
0
|
|
4874
|
|
|
|
|
|
|
} |
4875
|
|
|
|
|
|
|
|
4876
|
|
|
|
|
|
|
# Looks like a joined quote modifier |
4877
|
|
|
|
|
|
|
# and keyword, maybe something like |
4878
|
|
|
|
|
|
|
# s/xxx/yyy/gefor @k=... |
4879
|
|
|
|
|
|
|
# Example is "galgen.pl". Would have to split |
4880
|
|
|
|
|
|
|
# the word and insert a new token in the |
4881
|
|
|
|
|
|
|
# pre-token list. This is so rare that I haven't |
4882
|
|
|
|
|
|
|
# done it. Will just issue a warning citation. |
4883
|
|
|
|
|
|
|
|
4884
|
|
|
|
|
|
|
# This error might also be triggered if my quote |
4885
|
|
|
|
|
|
|
# modifier characters are incomplete |
4886
|
|
|
|
|
|
|
else { |
4887
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
4888
|
|
|
|
|
|
|
|
4889
|
|
|
|
|
|
|
Partial match to quote modifier $allowed_quote_modifiers at word: '$str' |
4890
|
|
|
|
|
|
|
Please put a space between quote modifiers and trailing keywords. |
4891
|
|
|
|
|
|
|
EOM |
4892
|
|
|
|
|
|
|
|
4893
|
|
|
|
|
|
|
# print "token $rtokens->[$i]\n"; |
4894
|
|
|
|
|
|
|
# my $num = length($str) - pos($str); |
4895
|
|
|
|
|
|
|
# $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num); |
4896
|
|
|
|
|
|
|
# print "continuing with new token $rtokens->[$i]\n"; |
4897
|
|
|
|
|
|
|
|
4898
|
|
|
|
|
|
|
# skipping past this token does least damage |
4899
|
0
|
0
|
|
|
|
0
|
if ( ++$i > $max_token_index ) { return } |
|
0
|
|
|
|
|
0
|
|
4900
|
|
|
|
|
|
|
} |
4901
|
|
|
|
|
|
|
} |
4902
|
|
|
|
|
|
|
else { |
4903
|
|
|
|
|
|
|
|
4904
|
|
|
|
|
|
|
# example file: rokicki4.pl |
4905
|
|
|
|
|
|
|
# This error might also be triggered if my quote |
4906
|
|
|
|
|
|
|
# modifier characters are incomplete |
4907
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
4908
|
|
|
|
|
|
|
"Note: found word $str at quote modifier location\n"); |
4909
|
|
|
|
|
|
|
} |
4910
|
|
|
|
|
|
|
} |
4911
|
|
|
|
|
|
|
|
4912
|
|
|
|
|
|
|
# re-initialize |
4913
|
144
|
|
|
|
|
292
|
$allowed_quote_modifiers = EMPTY_STRING; |
4914
|
|
|
|
|
|
|
} |
4915
|
2469
|
|
|
|
|
4340
|
return; |
4916
|
|
|
|
|
|
|
} ## end sub do_FOLLOW_QUOTE |
4917
|
|
|
|
|
|
|
|
4918
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4919
|
|
|
|
|
|
|
# begin hash of code for handling most token types |
4920
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4921
|
|
|
|
|
|
|
my $tokenization_code = { |
4922
|
|
|
|
|
|
|
|
4923
|
|
|
|
|
|
|
'>' => \&do_GREATER_THAN_SIGN, |
4924
|
|
|
|
|
|
|
'|' => \&do_VERTICAL_LINE, |
4925
|
|
|
|
|
|
|
'$' => \&do_DOLLAR_SIGN, |
4926
|
|
|
|
|
|
|
'(' => \&do_LEFT_PARENTHESIS, |
4927
|
|
|
|
|
|
|
')' => \&do_RIGHT_PARENTHESIS, |
4928
|
|
|
|
|
|
|
',' => \&do_COMMA, |
4929
|
|
|
|
|
|
|
';' => \&do_SEMICOLON, |
4930
|
|
|
|
|
|
|
'"' => \&do_QUOTATION_MARK, |
4931
|
|
|
|
|
|
|
"'" => \&do_APOSTROPHE, |
4932
|
|
|
|
|
|
|
'`' => \&do_BACKTICK, |
4933
|
|
|
|
|
|
|
'/' => \&do_SLASH, |
4934
|
|
|
|
|
|
|
'{' => \&do_LEFT_CURLY_BRACKET, |
4935
|
|
|
|
|
|
|
'}' => \&do_RIGHT_CURLY_BRACKET, |
4936
|
|
|
|
|
|
|
'&' => \&do_AMPERSAND, |
4937
|
|
|
|
|
|
|
'<' => \&do_LESS_THAN_SIGN, |
4938
|
|
|
|
|
|
|
'?' => \&do_QUESTION_MARK, |
4939
|
|
|
|
|
|
|
'*' => \&do_STAR, |
4940
|
|
|
|
|
|
|
'.' => \&do_DOT, |
4941
|
|
|
|
|
|
|
':' => \&do_COLON, |
4942
|
|
|
|
|
|
|
'+' => \&do_PLUS_SIGN, |
4943
|
|
|
|
|
|
|
'@' => \&do_AT_SIGN, |
4944
|
|
|
|
|
|
|
'%' => \&do_PERCENT_SIGN, |
4945
|
|
|
|
|
|
|
'[' => \&do_LEFT_SQUARE_BRACKET, |
4946
|
|
|
|
|
|
|
']' => \&do_RIGHT_SQUARE_BRACKET, |
4947
|
|
|
|
|
|
|
'-' => \&do_MINUS_SIGN, |
4948
|
|
|
|
|
|
|
'^' => \&do_CARAT_SIGN, |
4949
|
|
|
|
|
|
|
'::' => \&do_DOUBLE_COLON, |
4950
|
|
|
|
|
|
|
'<<' => \&do_LEFT_SHIFT, |
4951
|
|
|
|
|
|
|
'<<~' => \&do_NEW_HERE_DOC, |
4952
|
|
|
|
|
|
|
'->' => \&do_POINTER, |
4953
|
|
|
|
|
|
|
'++' => \&do_PLUS_PLUS, |
4954
|
|
|
|
|
|
|
'=>' => \&do_FAT_COMMA, |
4955
|
|
|
|
|
|
|
'--' => \&do_MINUS_MINUS, |
4956
|
|
|
|
|
|
|
'&&' => \&do_LOGICAL_AND, |
4957
|
|
|
|
|
|
|
'||' => \&do_LOGICAL_OR, |
4958
|
|
|
|
|
|
|
'//' => \&do_SLASH_SLASH, |
4959
|
|
|
|
|
|
|
|
4960
|
|
|
|
|
|
|
# No special code for these types yet, but syntax checks |
4961
|
|
|
|
|
|
|
# could be added. |
4962
|
|
|
|
|
|
|
## '!' => undef, |
4963
|
|
|
|
|
|
|
## '!=' => undef, |
4964
|
|
|
|
|
|
|
## '!~' => undef, |
4965
|
|
|
|
|
|
|
## '%=' => undef, |
4966
|
|
|
|
|
|
|
## '&&=' => undef, |
4967
|
|
|
|
|
|
|
## '&=' => undef, |
4968
|
|
|
|
|
|
|
## '+=' => undef, |
4969
|
|
|
|
|
|
|
## '-=' => undef, |
4970
|
|
|
|
|
|
|
## '..' => undef, |
4971
|
|
|
|
|
|
|
## '..' => undef, |
4972
|
|
|
|
|
|
|
## '...' => undef, |
4973
|
|
|
|
|
|
|
## '.=' => undef, |
4974
|
|
|
|
|
|
|
## '<<=' => undef, |
4975
|
|
|
|
|
|
|
## '<=' => undef, |
4976
|
|
|
|
|
|
|
## '<=>' => undef, |
4977
|
|
|
|
|
|
|
## '<>' => undef, |
4978
|
|
|
|
|
|
|
## '=' => undef, |
4979
|
|
|
|
|
|
|
## '==' => undef, |
4980
|
|
|
|
|
|
|
## '=~' => undef, |
4981
|
|
|
|
|
|
|
## '>=' => undef, |
4982
|
|
|
|
|
|
|
## '>>' => undef, |
4983
|
|
|
|
|
|
|
## '>>=' => undef, |
4984
|
|
|
|
|
|
|
## '\\' => undef, |
4985
|
|
|
|
|
|
|
## '^=' => undef, |
4986
|
|
|
|
|
|
|
## '|=' => undef, |
4987
|
|
|
|
|
|
|
## '||=' => undef, |
4988
|
|
|
|
|
|
|
## '//=' => undef, |
4989
|
|
|
|
|
|
|
## '~' => undef, |
4990
|
|
|
|
|
|
|
## '~~' => undef, |
4991
|
|
|
|
|
|
|
## '!~~' => undef, |
4992
|
|
|
|
|
|
|
|
4993
|
|
|
|
|
|
|
}; |
4994
|
|
|
|
|
|
|
|
4995
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4996
|
|
|
|
|
|
|
# end hash of code for handling individual token types |
4997
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4998
|
|
|
|
|
|
|
|
4999
|
39
|
|
|
39
|
|
473
|
use constant DEBUG_TOKENIZE => 0; |
|
39
|
|
|
|
|
167
|
|
|
39
|
|
|
|
|
128162
|
|
5000
|
|
|
|
|
|
|
|
5001
|
|
|
|
|
|
|
sub tokenize_this_line { |
5002
|
|
|
|
|
|
|
|
5003
|
|
|
|
|
|
|
# This routine breaks a line of perl code into tokens which are of use in |
5004
|
|
|
|
|
|
|
# indentation and reformatting. One of my goals has been to define tokens |
5005
|
|
|
|
|
|
|
# such that a newline may be inserted between any pair of tokens without |
5006
|
|
|
|
|
|
|
# changing or invalidating the program. This version comes close to this, |
5007
|
|
|
|
|
|
|
# although there are necessarily a few exceptions which must be caught by |
5008
|
|
|
|
|
|
|
# the formatter. Many of these involve the treatment of bare words. |
5009
|
|
|
|
|
|
|
# |
5010
|
|
|
|
|
|
|
# The tokens and their types are returned in arrays. See previous |
5011
|
|
|
|
|
|
|
# routine for their names. |
5012
|
|
|
|
|
|
|
# |
5013
|
|
|
|
|
|
|
# See also the array "valid_token_types" in the BEGIN section for an |
5014
|
|
|
|
|
|
|
# up-to-date list. |
5015
|
|
|
|
|
|
|
# |
5016
|
|
|
|
|
|
|
# To simplify things, token types are either a single character, or they |
5017
|
|
|
|
|
|
|
# are identical to the tokens themselves. |
5018
|
|
|
|
|
|
|
# |
5019
|
|
|
|
|
|
|
# As a debugging aid, the -D flag creates a file containing a side-by-side |
5020
|
|
|
|
|
|
|
# comparison of the input string and its tokenization for each line of a file. |
5021
|
|
|
|
|
|
|
# This is an invaluable debugging aid. |
5022
|
|
|
|
|
|
|
# |
5023
|
|
|
|
|
|
|
# In addition to tokens, and some associated quantities, the tokenizer |
5024
|
|
|
|
|
|
|
# also returns flags indication any special line types. These include |
5025
|
|
|
|
|
|
|
# quotes, here_docs, formats. |
5026
|
|
|
|
|
|
|
# |
5027
|
|
|
|
|
|
|
# ----------------------------------------------------------------------- |
5028
|
|
|
|
|
|
|
# |
5029
|
|
|
|
|
|
|
# How to add NEW_TOKENS: |
5030
|
|
|
|
|
|
|
# |
5031
|
|
|
|
|
|
|
# New token types will undoubtedly be needed in the future both to keep up |
5032
|
|
|
|
|
|
|
# with changes in perl and to help adapt the tokenizer to other applications. |
5033
|
|
|
|
|
|
|
# |
5034
|
|
|
|
|
|
|
# Here are some notes on the minimal steps. I wrote these notes while |
5035
|
|
|
|
|
|
|
# adding the 'v' token type for v-strings, which are things like version |
5036
|
|
|
|
|
|
|
# numbers 5.6.0, and ip addresses, and will use that as an example. ( You |
5037
|
|
|
|
|
|
|
# can use your editor to search for the string "NEW_TOKENS" to find the |
5038
|
|
|
|
|
|
|
# appropriate sections to change): |
5039
|
|
|
|
|
|
|
# |
5040
|
|
|
|
|
|
|
# *. Try to talk somebody else into doing it! If not, .. |
5041
|
|
|
|
|
|
|
# |
5042
|
|
|
|
|
|
|
# *. Make a backup of your current version in case things don't work out! |
5043
|
|
|
|
|
|
|
# |
5044
|
|
|
|
|
|
|
# *. Think of a new, unused character for the token type, and add to |
5045
|
|
|
|
|
|
|
# the array @valid_token_types in the BEGIN section of this package. |
5046
|
|
|
|
|
|
|
# For example, I used 'v' for v-strings. |
5047
|
|
|
|
|
|
|
# |
5048
|
|
|
|
|
|
|
# *. Implement coding to recognize the $type of the token in this routine. |
5049
|
|
|
|
|
|
|
# This is the hardest part, and is best done by imitating or modifying |
5050
|
|
|
|
|
|
|
# some of the existing coding. For example, to recognize v-strings, I |
5051
|
|
|
|
|
|
|
# patched 'sub scan_bare_identifier' to recognize v-strings beginning with |
5052
|
|
|
|
|
|
|
# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. |
5053
|
|
|
|
|
|
|
# |
5054
|
|
|
|
|
|
|
# *. Update sub operator_expected. This update is critically important but |
5055
|
|
|
|
|
|
|
# the coding is trivial. Look at the comments in that routine for help. |
5056
|
|
|
|
|
|
|
# For v-strings, which should behave like numbers, I just added 'v' to the |
5057
|
|
|
|
|
|
|
# regex used to handle numbers and strings (types 'n' and 'Q'). |
5058
|
|
|
|
|
|
|
# |
5059
|
|
|
|
|
|
|
# *. Implement a 'bond strength' rule in sub set_bond_strengths in |
5060
|
|
|
|
|
|
|
# Perl::Tidy::Formatter for breaking lines around this token type. You can |
5061
|
|
|
|
|
|
|
# skip this step and take the default at first, then adjust later to get |
5062
|
|
|
|
|
|
|
# desired results. For adding type 'v', I looked at sub bond_strength and |
5063
|
|
|
|
|
|
|
# saw that number type 'n' was using default strengths, so I didn't do |
5064
|
|
|
|
|
|
|
# anything. I may tune it up someday if I don't like the way line |
5065
|
|
|
|
|
|
|
# breaks with v-strings look. |
5066
|
|
|
|
|
|
|
# |
5067
|
|
|
|
|
|
|
# *. Implement a 'whitespace' rule in sub set_whitespace_flags in |
5068
|
|
|
|
|
|
|
# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine |
5069
|
|
|
|
|
|
|
# and saw that type 'n' used spaces on both sides, so I just added 'v' |
5070
|
|
|
|
|
|
|
# to the array @spaces_both_sides. |
5071
|
|
|
|
|
|
|
# |
5072
|
|
|
|
|
|
|
# *. Update HtmlWriter package so that users can colorize the token as |
5073
|
|
|
|
|
|
|
# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in |
5074
|
|
|
|
|
|
|
# that package. For v-strings, I initially chose to use a default color |
5075
|
|
|
|
|
|
|
# equal to the default for numbers, but it might be nice to change that |
5076
|
|
|
|
|
|
|
# eventually. |
5077
|
|
|
|
|
|
|
# |
5078
|
|
|
|
|
|
|
# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. |
5079
|
|
|
|
|
|
|
# |
5080
|
|
|
|
|
|
|
# *. Run lots and lots of debug tests. Start with special files designed |
5081
|
|
|
|
|
|
|
# to test the new token type. Run with the -D flag to create a .DEBUG |
5082
|
|
|
|
|
|
|
# file which shows the tokenization. When these work ok, test as many old |
5083
|
|
|
|
|
|
|
# scripts as possible. Start with all of the '.t' files in the 'test' |
5084
|
|
|
|
|
|
|
# directory of the distribution file. Compare .tdy output with previous |
5085
|
|
|
|
|
|
|
# version and updated version to see the differences. Then include as |
5086
|
|
|
|
|
|
|
# many more files as possible. My own technique has been to collect a huge |
5087
|
|
|
|
|
|
|
# number of perl scripts (thousands!) into one directory and run perltidy |
5088
|
|
|
|
|
|
|
# *, then run diff between the output of the previous version and the |
5089
|
|
|
|
|
|
|
# current version. |
5090
|
|
|
|
|
|
|
# |
5091
|
|
|
|
|
|
|
# *. For another example, search for the smartmatch operator '~~' |
5092
|
|
|
|
|
|
|
# with your editor to see where updates were made for it. |
5093
|
|
|
|
|
|
|
# |
5094
|
|
|
|
|
|
|
# ----------------------------------------------------------------------- |
5095
|
|
|
|
|
|
|
|
5096
|
7518
|
|
|
7518
|
0
|
15216
|
my ( $self, $line_of_tokens ) = @_; |
5097
|
7518
|
|
|
|
|
14853
|
my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; |
5098
|
|
|
|
|
|
|
|
5099
|
|
|
|
|
|
|
# Extract line number for use in error messages |
5100
|
7518
|
|
|
|
|
12242
|
$input_line_number = $line_of_tokens->{_line_number}; |
5101
|
|
|
|
|
|
|
|
5102
|
|
|
|
|
|
|
# Check for pod documentation |
5103
|
7518
|
100
|
66
|
|
|
20227
|
if ( substr( $untrimmed_input_line, 0, 1 ) eq '=' |
5104
|
|
|
|
|
|
|
&& $untrimmed_input_line =~ /^=[A-Za-z_]/ ) |
5105
|
|
|
|
|
|
|
{ |
5106
|
|
|
|
|
|
|
|
5107
|
|
|
|
|
|
|
# Must not be in multi-line quote |
5108
|
|
|
|
|
|
|
# and must not be in an equation |
5109
|
14
|
|
|
|
|
37
|
my $blank_after_Z = 1; |
5110
|
14
|
50
|
33
|
|
|
121
|
if ( |
5111
|
|
|
|
|
|
|
!$in_quote |
5112
|
|
|
|
|
|
|
&& ( $self->operator_expected( '=', 'b', $blank_after_Z ) == |
5113
|
|
|
|
|
|
|
TERM ) |
5114
|
|
|
|
|
|
|
) |
5115
|
|
|
|
|
|
|
{ |
5116
|
14
|
|
|
|
|
38
|
$self->[_in_pod_] = 1; |
5117
|
14
|
|
|
|
|
31
|
return; |
5118
|
|
|
|
|
|
|
} |
5119
|
|
|
|
|
|
|
} |
5120
|
|
|
|
|
|
|
|
5121
|
7504
|
|
|
|
|
14287
|
$input_line = $untrimmed_input_line; |
5122
|
|
|
|
|
|
|
|
5123
|
7504
|
|
|
|
|
14525
|
chomp $input_line; |
5124
|
|
|
|
|
|
|
|
5125
|
|
|
|
|
|
|
# Reinitialize the multi-line quote flag |
5126
|
7504
|
100
|
100
|
|
|
18811
|
if ( $in_quote && $quote_type eq 'Q' ) { |
5127
|
47
|
|
|
|
|
140
|
$line_of_tokens->{_starting_in_quote} = 1; |
5128
|
|
|
|
|
|
|
} |
5129
|
|
|
|
|
|
|
else { |
5130
|
7457
|
|
|
|
|
14089
|
$line_of_tokens->{_starting_in_quote} = 0; |
5131
|
|
|
|
|
|
|
|
5132
|
|
|
|
|
|
|
# Trim start of this line unless we are continuing a quoted line. |
5133
|
|
|
|
|
|
|
# Do not trim end because we might end in a quote (test: deken4.pl) |
5134
|
|
|
|
|
|
|
# Perl::Tidy::Formatter will delete needless trailing blanks |
5135
|
7457
|
100
|
|
|
|
34428
|
if ( !length($input_line) ) { |
|
|
100
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
|
5137
|
|
|
|
|
|
|
# line is empty |
5138
|
|
|
|
|
|
|
} |
5139
|
|
|
|
|
|
|
elsif ( $input_line =~ m/\S/g ) { |
5140
|
|
|
|
|
|
|
|
5141
|
|
|
|
|
|
|
# There are $spaces blank characters before a nonblank character |
5142
|
6651
|
|
|
|
|
13684
|
my $spaces = pos($input_line) - 1; |
5143
|
6651
|
100
|
|
|
|
15567
|
if ( $spaces > 0 ) { |
5144
|
|
|
|
|
|
|
|
5145
|
|
|
|
|
|
|
# Trim the leading spaces |
5146
|
3547
|
|
|
|
|
9101
|
$input_line = substr( $input_line, $spaces ); |
5147
|
|
|
|
|
|
|
|
5148
|
|
|
|
|
|
|
# Find actual space count if there are leading tabs |
5149
|
3547
|
100
|
66
|
|
|
12836
|
if ( |
5150
|
|
|
|
|
|
|
ord( substr( $untrimmed_input_line, 0, 1 ) ) == ORD_TAB |
5151
|
|
|
|
|
|
|
&& $untrimmed_input_line =~ /^(\t+)/ ) |
5152
|
|
|
|
|
|
|
{ |
5153
|
213
|
|
|
|
|
704
|
$spaces += length($1) * ( $tabsize - 1 ); |
5154
|
|
|
|
|
|
|
} |
5155
|
|
|
|
|
|
|
|
5156
|
|
|
|
|
|
|
# Calculate a guessed level for nonblank lines to avoid |
5157
|
|
|
|
|
|
|
$line_of_tokens->{_guessed_indentation_level} = |
5158
|
3547
|
|
|
|
|
10529
|
int( $spaces / $rOpts_indent_columns ); |
5159
|
|
|
|
|
|
|
} |
5160
|
|
|
|
|
|
|
} |
5161
|
|
|
|
|
|
|
else { |
5162
|
|
|
|
|
|
|
|
5163
|
|
|
|
|
|
|
# line has all blank characters |
5164
|
9
|
|
|
|
|
42
|
$input_line = EMPTY_STRING; |
5165
|
|
|
|
|
|
|
} |
5166
|
|
|
|
|
|
|
} |
5167
|
|
|
|
|
|
|
|
5168
|
7504
|
100
|
|
|
|
16164
|
if ( !$in_quote ) { |
5169
|
|
|
|
|
|
|
|
5170
|
|
|
|
|
|
|
# Optimize handling of a blank line |
5171
|
7320
|
100
|
|
|
|
15891
|
if ( !length($input_line) ) { |
5172
|
806
|
|
|
|
|
2276
|
$line_of_tokens->{_line_type} = 'CODE'; |
5173
|
806
|
|
|
|
|
1887
|
$line_of_tokens->{_rtokens} = []; |
5174
|
806
|
|
|
|
|
1823
|
$line_of_tokens->{_rtoken_type} = []; |
5175
|
806
|
|
|
|
|
1825
|
$line_of_tokens->{_rlevels} = []; |
5176
|
806
|
|
|
|
|
2036
|
$line_of_tokens->{_rci_levels} = []; |
5177
|
806
|
|
|
|
|
1673
|
$line_of_tokens->{_rblock_type} = []; |
5178
|
806
|
|
|
|
|
1956
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
5179
|
806
|
|
|
|
|
2351
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string; |
5180
|
806
|
|
|
|
|
1699
|
return; |
5181
|
|
|
|
|
|
|
} |
5182
|
|
|
|
|
|
|
|
5183
|
|
|
|
|
|
|
# Check comments |
5184
|
6514
|
100
|
|
|
|
15987
|
if ( substr( $input_line, 0, 1 ) eq '#' ) { |
5185
|
|
|
|
|
|
|
|
5186
|
|
|
|
|
|
|
# and check for skipped section |
5187
|
788
|
50
|
66
|
|
|
4686
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
5188
|
|
|
|
|
|
|
( |
5189
|
|
|
|
|
|
|
substr( $input_line, 0, 4 ) eq '#<<V' |
5190
|
|
|
|
|
|
|
|| $rOpts_code_skipping_begin |
5191
|
|
|
|
|
|
|
) |
5192
|
|
|
|
|
|
|
&& $rOpts_code_skipping |
5193
|
|
|
|
|
|
|
&& $input_line =~ /$code_skipping_pattern_begin/ |
5194
|
|
|
|
|
|
|
) |
5195
|
|
|
|
|
|
|
{ |
5196
|
2
|
|
|
|
|
10
|
$self->[_in_skipped_] = $self->[_last_line_number_]; |
5197
|
2
|
|
|
|
|
11
|
return; |
5198
|
|
|
|
|
|
|
} |
5199
|
|
|
|
|
|
|
|
5200
|
|
|
|
|
|
|
# Optional fast processing of a block comment |
5201
|
786
|
|
|
|
|
1858
|
$line_of_tokens->{_line_type} = 'CODE'; |
5202
|
786
|
|
|
|
|
2278
|
$line_of_tokens->{_rtokens} = [$input_line]; |
5203
|
786
|
|
|
|
|
2643
|
$line_of_tokens->{_rtoken_type} = ['#']; |
5204
|
786
|
|
|
|
|
2073
|
$line_of_tokens->{_rlevels} = [$level_in_tokenizer]; |
5205
|
786
|
|
|
|
|
1898
|
$line_of_tokens->{_rci_levels} = [0]; |
5206
|
786
|
|
|
|
|
2416
|
$line_of_tokens->{_rblock_type} = [EMPTY_STRING]; |
5207
|
786
|
|
|
|
|
2858
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
5208
|
786
|
|
|
|
|
2769
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string; |
5209
|
786
|
|
|
|
|
1711
|
return; |
5210
|
|
|
|
|
|
|
} |
5211
|
|
|
|
|
|
|
} |
5212
|
|
|
|
|
|
|
|
5213
|
|
|
|
|
|
|
# update the copy of the line for use in error messages |
5214
|
|
|
|
|
|
|
# This must be exactly what we give the pre_tokenizer |
5215
|
5910
|
|
|
|
|
11908
|
$self->[_line_of_text_] = $input_line; |
5216
|
|
|
|
|
|
|
|
5217
|
|
|
|
|
|
|
# re-initialize for the main loop |
5218
|
5910
|
|
|
|
|
14679
|
$routput_token_list = []; # stack of output token indexes |
5219
|
5910
|
|
|
|
|
18208
|
$routput_token_type = []; # token types |
5220
|
5910
|
|
|
|
|
16039
|
$routput_block_type = []; # types of code block |
5221
|
5910
|
|
|
|
|
15527
|
$routput_container_type = []; # paren types, such as if, elsif, .. |
5222
|
5910
|
|
|
|
|
14322
|
$routput_type_sequence = []; # nesting sequential number |
5223
|
|
|
|
|
|
|
|
5224
|
5910
|
|
|
|
|
9621
|
$rhere_target_list = []; |
5225
|
|
|
|
|
|
|
|
5226
|
5910
|
|
|
|
|
9778
|
$tok = $last_nonblank_token; |
5227
|
5910
|
|
|
|
|
9287
|
$type = $last_nonblank_type; |
5228
|
5910
|
|
|
|
|
9264
|
$prototype = $last_nonblank_prototype; |
5229
|
5910
|
|
|
|
|
8867
|
$last_nonblank_i = -1; |
5230
|
5910
|
|
|
|
|
9371
|
$block_type = $last_nonblank_block_type; |
5231
|
5910
|
|
|
|
|
9190
|
$container_type = $last_nonblank_container_type; |
5232
|
5910
|
|
|
|
|
9125
|
$type_sequence = $last_nonblank_type_sequence; |
5233
|
5910
|
|
|
|
|
9120
|
$indent_flag = 0; |
5234
|
5910
|
|
|
|
|
8220
|
$peeked_ahead = 0; |
5235
|
|
|
|
|
|
|
|
5236
|
5910
|
|
|
|
|
17826
|
$self->tokenizer_main_loop(); |
5237
|
|
|
|
|
|
|
|
5238
|
|
|
|
|
|
|
#----------------------------------------------- |
5239
|
|
|
|
|
|
|
# all done tokenizing this line ... |
5240
|
|
|
|
|
|
|
# now prepare the final list of tokens and types |
5241
|
|
|
|
|
|
|
#----------------------------------------------- |
5242
|
5910
|
|
|
|
|
18117
|
$self->tokenizer_wrapup_line($line_of_tokens); |
5243
|
|
|
|
|
|
|
|
5244
|
5910
|
|
|
|
|
10345
|
return; |
5245
|
|
|
|
|
|
|
} ## end sub tokenize_this_line |
5246
|
|
|
|
|
|
|
|
5247
|
|
|
|
|
|
|
sub tokenizer_main_loop { |
5248
|
|
|
|
|
|
|
|
5249
|
5910
|
|
|
5910
|
0
|
11302
|
my ($self) = @_; |
5250
|
|
|
|
|
|
|
|
5251
|
|
|
|
|
|
|
#--------------------------------- |
5252
|
|
|
|
|
|
|
# Break one input line into tokens |
5253
|
|
|
|
|
|
|
#--------------------------------- |
5254
|
|
|
|
|
|
|
|
5255
|
|
|
|
|
|
|
# start by breaking the line into pre-tokens |
5256
|
5910
|
|
|
|
|
14415
|
( $rtokens, $rtoken_map, $rtoken_type ) = pre_tokenize($input_line); |
5257
|
|
|
|
|
|
|
|
5258
|
5910
|
|
|
|
|
23584
|
$max_token_index = scalar( @{$rtokens} ) - 1; |
|
5910
|
|
|
|
|
10835
|
|
5259
|
5910
|
|
|
|
|
8889
|
push( @{$rtokens}, SPACE, SPACE, SPACE ) |
|
5910
|
|
|
|
|
14956
|
|
5260
|
|
|
|
|
|
|
; # extra whitespace simplifies logic |
5261
|
5910
|
|
|
|
|
9020
|
push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced |
|
5910
|
|
|
|
|
12567
|
|
5262
|
5910
|
|
|
|
|
8617
|
push( @{$rtoken_type}, 'b', 'b', 'b' ); |
|
5910
|
|
|
|
|
12279
|
|
5263
|
|
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
# initialize for main loop |
5265
|
5910
|
|
|
|
|
8527
|
if (0) { #<<< this is not necessary |
5266
|
|
|
|
|
|
|
foreach my $ii ( 0 .. $max_token_index + 3 ) { |
5267
|
|
|
|
|
|
|
$routput_token_type->[$ii] = EMPTY_STRING; |
5268
|
|
|
|
|
|
|
$routput_block_type->[$ii] = EMPTY_STRING; |
5269
|
|
|
|
|
|
|
$routput_container_type->[$ii] = EMPTY_STRING; |
5270
|
|
|
|
|
|
|
$routput_type_sequence->[$ii] = EMPTY_STRING; |
5271
|
|
|
|
|
|
|
$routput_indent_flag->[$ii] = 0; |
5272
|
|
|
|
|
|
|
} |
5273
|
|
|
|
|
|
|
} |
5274
|
|
|
|
|
|
|
|
5275
|
5910
|
|
|
|
|
8753
|
$i = -1; |
5276
|
5910
|
|
|
|
|
8882
|
$i_tok = -1; |
5277
|
|
|
|
|
|
|
|
5278
|
|
|
|
|
|
|
#----------------------- |
5279
|
|
|
|
|
|
|
# main tokenization loop |
5280
|
|
|
|
|
|
|
#----------------------- |
5281
|
|
|
|
|
|
|
|
5282
|
|
|
|
|
|
|
# we are looking at each pre-token of one line and combining them |
5283
|
|
|
|
|
|
|
# into tokens |
5284
|
5910
|
|
|
|
|
13744
|
while ( ++$i <= $max_token_index ) { |
5285
|
|
|
|
|
|
|
|
5286
|
|
|
|
|
|
|
# continue looking for the end of a quote |
5287
|
50826
|
100
|
|
|
|
87559
|
if ($in_quote) { |
5288
|
2768
|
|
|
|
|
9010
|
$self->do_FOLLOW_QUOTE(); |
5289
|
2768
|
100
|
100
|
|
|
10477
|
last if ( $in_quote || $i > $max_token_index ); |
5290
|
|
|
|
|
|
|
} |
5291
|
|
|
|
|
|
|
|
5292
|
50527
|
100
|
100
|
|
|
138837
|
if ( $type ne 'b' && $type ne 'CORE::' ) { |
5293
|
|
|
|
|
|
|
|
5294
|
|
|
|
|
|
|
# try to catch some common errors |
5295
|
35304
|
100
|
100
|
|
|
75603
|
if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { |
5296
|
|
|
|
|
|
|
|
5297
|
1592
|
100
|
|
|
|
5085
|
if ( $last_nonblank_token eq 'eq' ) { |
|
|
50
|
|
|
|
|
|
5298
|
9
|
|
|
|
|
97
|
$self->complain("Should 'eq' be '==' here ?\n"); |
5299
|
|
|
|
|
|
|
} |
5300
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq 'ne' ) { |
5301
|
0
|
|
|
|
|
0
|
$self->complain("Should 'ne' be '!=' here ?\n"); |
5302
|
|
|
|
|
|
|
} |
5303
|
|
|
|
|
|
|
else { |
5304
|
|
|
|
|
|
|
# that's all |
5305
|
|
|
|
|
|
|
} |
5306
|
|
|
|
|
|
|
} |
5307
|
|
|
|
|
|
|
|
5308
|
|
|
|
|
|
|
# fix c090, only rotate vars if a new token will be stored |
5309
|
35304
|
100
|
|
|
|
64839
|
if ( $i_tok >= 0 ) { |
5310
|
|
|
|
|
|
|
|
5311
|
29532
|
|
|
|
|
44087
|
$last_last_nonblank_token = $last_nonblank_token; |
5312
|
29532
|
|
|
|
|
39662
|
$last_last_nonblank_type = $last_nonblank_type; |
5313
|
|
|
|
|
|
|
|
5314
|
29532
|
|
|
|
|
41643
|
$last_nonblank_prototype = $prototype; |
5315
|
29532
|
|
|
|
|
40541
|
$last_nonblank_block_type = $block_type; |
5316
|
29532
|
|
|
|
|
39995
|
$last_nonblank_container_type = $container_type; |
5317
|
29532
|
|
|
|
|
40983
|
$last_nonblank_type_sequence = $type_sequence; |
5318
|
29532
|
|
|
|
|
38478
|
$last_nonblank_i = $i_tok; |
5319
|
|
|
|
|
|
|
|
5320
|
|
|
|
|
|
|
# Fix part #3 for git82: propagate type 'Z' though L-R pair |
5321
|
29532
|
100
|
100
|
|
|
61493
|
if ( !( $type eq 'R' && $last_nonblank_type eq 'Z' ) ) { |
5322
|
29531
|
|
|
|
|
41843
|
$last_nonblank_token = $tok; |
5323
|
29531
|
|
|
|
|
41875
|
$last_nonblank_type = $type; |
5324
|
|
|
|
|
|
|
} |
5325
|
|
|
|
|
|
|
} |
5326
|
|
|
|
|
|
|
|
5327
|
|
|
|
|
|
|
# Patch for c030: Fix things in case a '->' got separated from |
5328
|
|
|
|
|
|
|
# the subsequent identifier by a side comment. We need the |
5329
|
|
|
|
|
|
|
# last_nonblank_token to have a leading -> to avoid triggering |
5330
|
|
|
|
|
|
|
# an operator expected error message at the next '('. See also |
5331
|
|
|
|
|
|
|
# fix for git #63. |
5332
|
35304
|
100
|
|
|
|
65372
|
if ( $last_last_nonblank_token eq '->' ) { |
5333
|
886
|
100
|
66
|
|
|
5498
|
if ( $last_nonblank_type eq 'w' |
5334
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'i' ) |
5335
|
|
|
|
|
|
|
{ |
5336
|
675
|
|
|
|
|
1748
|
$last_nonblank_token = '->' . $last_nonblank_token; |
5337
|
675
|
|
|
|
|
1397
|
$last_nonblank_type = 'i'; |
5338
|
|
|
|
|
|
|
} |
5339
|
|
|
|
|
|
|
} |
5340
|
|
|
|
|
|
|
} |
5341
|
|
|
|
|
|
|
|
5342
|
|
|
|
|
|
|
# store previous token type |
5343
|
50527
|
100
|
|
|
|
87064
|
if ( $i_tok >= 0 ) { |
5344
|
44755
|
|
|
|
|
86851
|
$routput_token_type->[$i_tok] = $type; |
5345
|
44755
|
|
|
|
|
73564
|
$routput_block_type->[$i_tok] = $block_type; |
5346
|
44755
|
|
|
|
|
74748
|
$routput_container_type->[$i_tok] = $container_type; |
5347
|
44755
|
|
|
|
|
70372
|
$routput_type_sequence->[$i_tok] = $type_sequence; |
5348
|
44755
|
|
|
|
|
68197
|
$routput_indent_flag->[$i_tok] = $indent_flag; |
5349
|
|
|
|
|
|
|
} |
5350
|
|
|
|
|
|
|
|
5351
|
|
|
|
|
|
|
# get the next pre-token and type |
5352
|
|
|
|
|
|
|
# $tok and $type will be modified to make the output token |
5353
|
50527
|
|
|
|
|
80462
|
my $pre_tok = $tok = $rtokens->[$i]; # get the next pre-token |
5354
|
50527
|
|
|
|
|
76595
|
my $pre_type = $type = $rtoken_type->[$i]; # and type |
5355
|
|
|
|
|
|
|
|
5356
|
|
|
|
|
|
|
# remember the starting index of this token; we will be updating $i |
5357
|
50527
|
|
|
|
|
66731
|
$i_tok = $i; |
5358
|
|
|
|
|
|
|
|
5359
|
|
|
|
|
|
|
# re-initialize various flags for the next output token |
5360
|
|
|
|
|
|
|
( |
5361
|
|
|
|
|
|
|
|
5362
|
50527
|
|
|
|
|
89619
|
$block_type, |
5363
|
|
|
|
|
|
|
$container_type, |
5364
|
|
|
|
|
|
|
$type_sequence, |
5365
|
|
|
|
|
|
|
$indent_flag, |
5366
|
|
|
|
|
|
|
$prototype, |
5367
|
|
|
|
|
|
|
) |
5368
|
|
|
|
|
|
|
= ( |
5369
|
|
|
|
|
|
|
|
5370
|
|
|
|
|
|
|
EMPTY_STRING, |
5371
|
|
|
|
|
|
|
EMPTY_STRING, |
5372
|
|
|
|
|
|
|
EMPTY_STRING, |
5373
|
|
|
|
|
|
|
0, |
5374
|
|
|
|
|
|
|
EMPTY_STRING, |
5375
|
|
|
|
|
|
|
); |
5376
|
|
|
|
|
|
|
|
5377
|
|
|
|
|
|
|
# this pre-token will start an output token |
5378
|
50527
|
|
|
|
|
64408
|
push( @{$routput_token_list}, $i_tok ); |
|
50527
|
|
|
|
|
84339
|
|
5379
|
|
|
|
|
|
|
|
5380
|
|
|
|
|
|
|
# The search for the full token ends in one of 5 main END NODES: |
5381
|
|
|
|
|
|
|
|
5382
|
|
|
|
|
|
|
#----------------------- |
5383
|
|
|
|
|
|
|
# END NODE 1: whitespace |
5384
|
|
|
|
|
|
|
#----------------------- |
5385
|
50527
|
100
|
|
|
|
107292
|
next if ( $pre_type eq 'b' ); |
5386
|
|
|
|
|
|
|
|
5387
|
|
|
|
|
|
|
#---------------------- |
5388
|
|
|
|
|
|
|
# END NODE 2: a comment |
5389
|
|
|
|
|
|
|
#---------------------- |
5390
|
35162
|
100
|
|
|
|
62299
|
last if ( $pre_type eq '#' ); |
5391
|
|
|
|
|
|
|
|
5392
|
|
|
|
|
|
|
# continue gathering identifier if necessary |
5393
|
34834
|
100
|
|
|
|
60567
|
if ($id_scan_state) { |
5394
|
|
|
|
|
|
|
|
5395
|
17
|
100
|
66
|
|
|
134
|
if ( $is_sub{$id_scan_state} || $is_package{$id_scan_state} ) { |
5396
|
10
|
|
|
|
|
39
|
$self->scan_id(); |
5397
|
|
|
|
|
|
|
} |
5398
|
|
|
|
|
|
|
else { |
5399
|
7
|
|
|
|
|
33
|
$self->scan_identifier(); |
5400
|
|
|
|
|
|
|
} |
5401
|
|
|
|
|
|
|
|
5402
|
17
|
100
|
|
|
|
60
|
if ($id_scan_state) { |
5403
|
|
|
|
|
|
|
|
5404
|
|
|
|
|
|
|
# Still scanning ... |
5405
|
|
|
|
|
|
|
# Check for side comment between sub and prototype (c061) |
5406
|
|
|
|
|
|
|
|
5407
|
|
|
|
|
|
|
# done if nothing left to scan on this line |
5408
|
1
|
50
|
|
|
|
1521
|
last if ( $i > $max_token_index ); |
5409
|
|
|
|
|
|
|
|
5410
|
1
|
|
|
|
|
13
|
my ( $next_nonblank_token, $i_next ) = |
5411
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, |
5412
|
|
|
|
|
|
|
$max_token_index ); |
5413
|
|
|
|
|
|
|
|
5414
|
|
|
|
|
|
|
# done if it was just some trailing space |
5415
|
1
|
50
|
|
|
|
11
|
last if ( $i_next > $max_token_index ); |
5416
|
|
|
|
|
|
|
|
5417
|
|
|
|
|
|
|
# something remains on the line ... must be a side comment |
5418
|
1
|
|
|
|
|
4
|
next; |
5419
|
|
|
|
|
|
|
} |
5420
|
|
|
|
|
|
|
|
5421
|
16
|
100
|
100
|
|
|
113
|
next if ( ( $i > 0 ) || $type ); |
5422
|
|
|
|
|
|
|
|
5423
|
|
|
|
|
|
|
# didn't find any token; start over |
5424
|
7
|
|
|
|
|
15
|
$type = $pre_type; |
5425
|
7
|
|
|
|
|
16
|
$tok = $pre_tok; |
5426
|
|
|
|
|
|
|
} |
5427
|
|
|
|
|
|
|
|
5428
|
|
|
|
|
|
|
#----------------------------------------------------------- |
5429
|
|
|
|
|
|
|
# Combine pre-tokens into digraphs and trigraphs if possible |
5430
|
|
|
|
|
|
|
#----------------------------------------------------------- |
5431
|
|
|
|
|
|
|
|
5432
|
|
|
|
|
|
|
# See if we can make a digraph... |
5433
|
|
|
|
|
|
|
# The following tokens are excluded and handled specially: |
5434
|
|
|
|
|
|
|
# '/=' is excluded because the / might start a pattern. |
5435
|
|
|
|
|
|
|
# 'x=' is excluded since it might be $x=, with $ on previous line |
5436
|
|
|
|
|
|
|
# '**' and *= might be typeglobs of punctuation variables |
5437
|
|
|
|
|
|
|
# I have allowed tokens starting with <, such as <=, |
5438
|
|
|
|
|
|
|
# because I don't think these could be valid angle operators. |
5439
|
|
|
|
|
|
|
# test file: storrs4.pl |
5440
|
34824
|
100
|
100
|
|
|
107308
|
if ( $can_start_digraph{$tok} |
|
|
|
100
|
|
|
|
|
5441
|
|
|
|
|
|
|
&& $i < $max_token_index |
5442
|
|
|
|
|
|
|
&& $is_digraph{ $tok . $rtokens->[ $i + 1 ] } ) |
5443
|
|
|
|
|
|
|
{ |
5444
|
|
|
|
|
|
|
|
5445
|
2561
|
|
|
|
|
4906
|
my $combine_ok = 1; |
5446
|
2561
|
|
|
|
|
5543
|
my $test_tok = $tok . $rtokens->[ $i + 1 ]; |
5447
|
|
|
|
|
|
|
|
5448
|
|
|
|
|
|
|
# check for special cases which cannot be combined |
5449
|
|
|
|
|
|
|
|
5450
|
|
|
|
|
|
|
# '//' must be defined_or operator if an operator is expected. |
5451
|
|
|
|
|
|
|
# TODO: Code for other ambiguous digraphs (/=, x=, **, *=) |
5452
|
|
|
|
|
|
|
# could be migrated here for clarity |
5453
|
|
|
|
|
|
|
|
5454
|
|
|
|
|
|
|
# Patch for RT#102371, misparsing a // in the following snippet: |
5455
|
|
|
|
|
|
|
# state $b //= ccc(); |
5456
|
|
|
|
|
|
|
# The solution is to always accept the digraph (or trigraph) |
5457
|
|
|
|
|
|
|
# after type 'Z' (possible file handle). The reason is that |
5458
|
|
|
|
|
|
|
# sub operator_expected gives TERM expected here, which is |
5459
|
|
|
|
|
|
|
# wrong in this case. |
5460
|
2561
|
100
|
66
|
|
|
7172
|
if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) { |
5461
|
|
|
|
|
|
|
|
5462
|
|
|
|
|
|
|
# note that here $tok = '/' and the next tok and type is '/' |
5463
|
16
|
|
|
|
|
42
|
my $blank_after_Z; |
5464
|
16
|
|
|
|
|
69
|
$expecting = |
5465
|
|
|
|
|
|
|
$self->operator_expected( $tok, '/', $blank_after_Z ); |
5466
|
|
|
|
|
|
|
|
5467
|
|
|
|
|
|
|
# Patched for RT#101547, was 'unless ($expecting==OPERATOR)' |
5468
|
16
|
100
|
|
|
|
51
|
$combine_ok = 0 if ( $expecting == TERM ); |
5469
|
|
|
|
|
|
|
} |
5470
|
|
|
|
|
|
|
|
5471
|
|
|
|
|
|
|
# Patch for RT #114359: mis-parsing of "print $x ** 0.5; |
5472
|
|
|
|
|
|
|
# Accept the digraphs '**' only after type 'Z' |
5473
|
|
|
|
|
|
|
# Otherwise postpone the decision. |
5474
|
2561
|
100
|
|
|
|
5910
|
if ( $test_tok eq '**' ) { |
5475
|
39
|
100
|
|
|
|
180
|
if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 } |
|
37
|
|
|
|
|
79
|
|
5476
|
|
|
|
|
|
|
} |
5477
|
|
|
|
|
|
|
|
5478
|
2561
|
50
|
66
|
|
|
16559
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
5479
|
|
|
|
|
|
|
|
5480
|
|
|
|
|
|
|
# still ok to combine? |
5481
|
|
|
|
|
|
|
$combine_ok |
5482
|
|
|
|
|
|
|
|
5483
|
|
|
|
|
|
|
&& ( $test_tok ne '/=' ) # might be pattern |
5484
|
|
|
|
|
|
|
&& ( $test_tok ne 'x=' ) # might be $x |
5485
|
|
|
|
|
|
|
&& ( $test_tok ne '*=' ) # typeglob? |
5486
|
|
|
|
|
|
|
|
5487
|
|
|
|
|
|
|
# Moved above as part of fix for |
5488
|
|
|
|
|
|
|
# RT #114359: Missparsing of "print $x ** 0.5; |
5489
|
|
|
|
|
|
|
# && ( $test_tok ne '**' ) # typeglob? |
5490
|
|
|
|
|
|
|
) |
5491
|
|
|
|
|
|
|
{ |
5492
|
2520
|
|
|
|
|
4084
|
$tok = $test_tok; |
5493
|
2520
|
|
|
|
|
3764
|
$i++; |
5494
|
|
|
|
|
|
|
|
5495
|
|
|
|
|
|
|
# Now try to assemble trigraphs. Note that all possible |
5496
|
|
|
|
|
|
|
# perl trigraphs can be constructed by appending a character |
5497
|
|
|
|
|
|
|
# to a digraph. |
5498
|
2520
|
|
|
|
|
4690
|
$test_tok = $tok . $rtokens->[ $i + 1 ]; |
5499
|
|
|
|
|
|
|
|
5500
|
2520
|
100
|
|
|
|
6400
|
if ( $is_trigraph{$test_tok} ) { |
5501
|
77
|
|
|
|
|
182
|
$tok = $test_tok; |
5502
|
77
|
|
|
|
|
169
|
$i++; |
5503
|
|
|
|
|
|
|
} |
5504
|
|
|
|
|
|
|
|
5505
|
|
|
|
|
|
|
# The only current tetragraph is the double diamond operator |
5506
|
|
|
|
|
|
|
# and its first three characters are NOT a trigraph, so |
5507
|
|
|
|
|
|
|
# we do can do a special test for it |
5508
|
|
|
|
|
|
|
else { |
5509
|
2443
|
100
|
|
|
|
5991
|
if ( $test_tok eq '<<>' ) { |
5510
|
1
|
|
|
|
|
4
|
$test_tok .= $rtokens->[ $i + 2 ]; |
5511
|
1
|
50
|
|
|
|
5
|
if ( $is_tetragraph{$test_tok} ) { |
5512
|
1
|
|
|
|
|
2
|
$tok = $test_tok; |
5513
|
1
|
|
|
|
|
3
|
$i += 2; |
5514
|
|
|
|
|
|
|
} |
5515
|
|
|
|
|
|
|
} |
5516
|
|
|
|
|
|
|
} |
5517
|
|
|
|
|
|
|
} |
5518
|
|
|
|
|
|
|
} |
5519
|
|
|
|
|
|
|
|
5520
|
34824
|
|
|
|
|
50236
|
$type = $tok; |
5521
|
34824
|
|
|
|
|
57618
|
$next_tok = $rtokens->[ $i + 1 ]; |
5522
|
34824
|
|
|
|
|
52907
|
$next_type = $rtoken_type->[ $i + 1 ]; |
5523
|
|
|
|
|
|
|
|
5524
|
|
|
|
|
|
|
# expecting an operator here? first try table lookup, then function |
5525
|
34824
|
|
|
|
|
65018
|
$expecting = $op_expected_table{$last_nonblank_type}; |
5526
|
34824
|
100
|
|
|
|
66481
|
if ( !defined($expecting) ) { |
5527
|
8397
|
|
100
|
|
|
20226
|
my $blank_after_Z = $last_nonblank_type eq 'Z' |
5528
|
|
|
|
|
|
|
&& ( $i == 0 || $rtoken_type->[ $i - 1 ] eq 'b' ); |
5529
|
8397
|
|
|
|
|
21868
|
$expecting = |
5530
|
|
|
|
|
|
|
$self->operator_expected( $tok, $next_type, $blank_after_Z ); |
5531
|
|
|
|
|
|
|
} |
5532
|
|
|
|
|
|
|
|
5533
|
34824
|
|
|
|
|
44690
|
DEBUG_TOKENIZE && do { |
5534
|
|
|
|
|
|
|
local $LIST_SEPARATOR = ')('; |
5535
|
|
|
|
|
|
|
my @debug_list = ( |
5536
|
|
|
|
|
|
|
$last_nonblank_token, $tok, |
5537
|
|
|
|
|
|
|
$next_tok, $brace_depth, |
5538
|
|
|
|
|
|
|
$rbrace_type->[$brace_depth], $paren_depth, |
5539
|
|
|
|
|
|
|
$rparen_type->[$paren_depth], |
5540
|
|
|
|
|
|
|
); |
5541
|
|
|
|
|
|
|
print {*STDOUT} "TOKENIZE:(@debug_list)\n"; |
5542
|
|
|
|
|
|
|
}; |
5543
|
|
|
|
|
|
|
|
5544
|
|
|
|
|
|
|
# We have the next token, $tok. |
5545
|
|
|
|
|
|
|
# Now we have to examine this token and decide what it is |
5546
|
|
|
|
|
|
|
# and define its $type |
5547
|
|
|
|
|
|
|
|
5548
|
|
|
|
|
|
|
#------------------------ |
5549
|
|
|
|
|
|
|
# END NODE 3: a bare word |
5550
|
|
|
|
|
|
|
#------------------------ |
5551
|
34824
|
100
|
|
|
|
64751
|
if ( $pre_type eq 'w' ) { |
5552
|
5838
|
|
|
|
|
16367
|
my $is_last = $self->do_BAREWORD(); |
5553
|
5838
|
100
|
|
|
|
12259
|
last if ($is_last); |
5554
|
5830
|
|
|
|
|
15005
|
next; |
5555
|
|
|
|
|
|
|
} |
5556
|
|
|
|
|
|
|
|
5557
|
|
|
|
|
|
|
# Turn off attribute list on first non-blank, non-bareword. |
5558
|
|
|
|
|
|
|
# Added '#' to fix c038 (later moved above). |
5559
|
28986
|
|
100
|
|
|
55449
|
$self->[_in_attribute_list_] &&= 0; |
5560
|
|
|
|
|
|
|
|
5561
|
|
|
|
|
|
|
#------------------------------- |
5562
|
|
|
|
|
|
|
# END NODE 4: a string of digits |
5563
|
|
|
|
|
|
|
#------------------------------- |
5564
|
28986
|
100
|
|
|
|
52760
|
if ( $pre_type eq 'd' ) { |
5565
|
1934
|
|
|
|
|
7226
|
$self->do_DIGITS(); |
5566
|
1934
|
|
|
|
|
4448
|
next; |
5567
|
|
|
|
|
|
|
} |
5568
|
|
|
|
|
|
|
|
5569
|
|
|
|
|
|
|
#------------------------------------------ |
5570
|
|
|
|
|
|
|
# END NODE 5: everything else (punctuation) |
5571
|
|
|
|
|
|
|
#------------------------------------------ |
5572
|
27052
|
|
|
|
|
54249
|
my $code = $tokenization_code->{$tok}; |
5573
|
27052
|
100
|
|
|
|
51029
|
if ($code) { |
5574
|
25317
|
|
|
|
|
78446
|
$code->($self); |
5575
|
25317
|
100
|
|
|
|
65930
|
redo if $in_quote; |
5576
|
|
|
|
|
|
|
} |
5577
|
|
|
|
|
|
|
} ## End main tokenizer loop |
5578
|
|
|
|
|
|
|
|
5579
|
|
|
|
|
|
|
# Store the final token |
5580
|
5910
|
100
|
|
|
|
13386
|
if ( $i_tok >= 0 ) { |
5581
|
5772
|
|
|
|
|
12489
|
$routput_token_type->[$i_tok] = $type; |
5582
|
5772
|
|
|
|
|
10635
|
$routput_block_type->[$i_tok] = $block_type; |
5583
|
5772
|
|
|
|
|
10237
|
$routput_container_type->[$i_tok] = $container_type; |
5584
|
5772
|
|
|
|
|
10134
|
$routput_type_sequence->[$i_tok] = $type_sequence; |
5585
|
5772
|
|
|
|
|
10129
|
$routput_indent_flag->[$i_tok] = $indent_flag; |
5586
|
|
|
|
|
|
|
} |
5587
|
|
|
|
|
|
|
|
5588
|
|
|
|
|
|
|
# Remember last nonblank values |
5589
|
5910
|
100
|
100
|
|
|
21752
|
if ( $type ne 'b' && $type ne '#' ) { |
5590
|
|
|
|
|
|
|
|
5591
|
5437
|
|
|
|
|
8758
|
$last_last_nonblank_token = $last_nonblank_token; |
5592
|
5437
|
|
|
|
|
7944
|
$last_last_nonblank_type = $last_nonblank_type; |
5593
|
|
|
|
|
|
|
|
5594
|
5437
|
|
|
|
|
8489
|
$last_nonblank_prototype = $prototype; |
5595
|
5437
|
|
|
|
|
7821
|
$last_nonblank_block_type = $block_type; |
5596
|
5437
|
|
|
|
|
7624
|
$last_nonblank_container_type = $container_type; |
5597
|
5437
|
|
|
|
|
7611
|
$last_nonblank_type_sequence = $type_sequence; |
5598
|
|
|
|
|
|
|
|
5599
|
5437
|
|
|
|
|
7874
|
$last_nonblank_token = $tok; |
5600
|
5437
|
|
|
|
|
7947
|
$last_nonblank_type = $type; |
5601
|
|
|
|
|
|
|
} |
5602
|
|
|
|
|
|
|
|
5603
|
|
|
|
|
|
|
# reset indentation level if necessary at a sub or package |
5604
|
|
|
|
|
|
|
# in an attempt to recover from a nesting error |
5605
|
5910
|
50
|
|
|
|
12251
|
if ( $level_in_tokenizer < 0 ) { |
5606
|
0
|
0
|
|
|
|
0
|
if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { |
5607
|
0
|
|
|
|
|
0
|
reset_indentation_level(0); |
5608
|
0
|
|
|
|
|
0
|
$self->brace_warning("resetting level to 0 at $1 $2\n"); |
5609
|
|
|
|
|
|
|
} |
5610
|
|
|
|
|
|
|
} |
5611
|
|
|
|
|
|
|
|
5612
|
5910
|
|
|
|
|
9705
|
$self->[_in_quote_] = $in_quote; |
5613
|
5910
|
100
|
|
|
|
12980
|
$self->[_quote_target_] = |
5614
|
|
|
|
|
|
|
$in_quote ? matching_end_token($quote_character) : EMPTY_STRING; |
5615
|
5910
|
|
|
|
|
10817
|
$self->[_rhere_target_list_] = $rhere_target_list; |
5616
|
|
|
|
|
|
|
|
5617
|
5910
|
|
|
|
|
9940
|
return; |
5618
|
|
|
|
|
|
|
} ## end sub tokenizer_main_loop |
5619
|
|
|
|
|
|
|
|
5620
|
|
|
|
|
|
|
sub tokenizer_wrapup_line { |
5621
|
5910
|
|
|
5910
|
0
|
12603
|
my ( $self, $line_of_tokens ) = @_; |
5622
|
|
|
|
|
|
|
|
5623
|
|
|
|
|
|
|
#--------------------------------------------------------- |
5624
|
|
|
|
|
|
|
# Package a line of tokens for shipping back to the caller |
5625
|
|
|
|
|
|
|
#--------------------------------------------------------- |
5626
|
|
|
|
|
|
|
|
5627
|
|
|
|
|
|
|
# Arrays to hold token values for this line: |
5628
|
5910
|
|
|
|
|
10181
|
my ( @levels, @block_type, @type_sequence, @token_type, @tokens ); |
5629
|
|
|
|
|
|
|
|
5630
|
5910
|
|
|
|
|
14648
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
5631
|
|
|
|
|
|
|
|
5632
|
|
|
|
|
|
|
# Remember starting nesting block string |
5633
|
5910
|
|
|
|
|
9803
|
my $nesting_block_string_0 = $nesting_block_string; |
5634
|
|
|
|
|
|
|
|
5635
|
|
|
|
|
|
|
#----------------- |
5636
|
|
|
|
|
|
|
# Loop over tokens |
5637
|
|
|
|
|
|
|
#----------------- |
5638
|
|
|
|
|
|
|
# $i is the index of the pretoken which starts this full token |
5639
|
5910
|
|
|
|
|
11508
|
foreach my $i ( @{$routput_token_list} ) { |
|
5910
|
|
|
|
|
12642
|
|
5640
|
|
|
|
|
|
|
|
5641
|
50711
|
|
|
|
|
73613
|
my $type_i = $routput_token_type->[$i]; |
5642
|
|
|
|
|
|
|
|
5643
|
|
|
|
|
|
|
#---------------------------------------- |
5644
|
|
|
|
|
|
|
# Section 1. Handle a non-sequenced token |
5645
|
|
|
|
|
|
|
#---------------------------------------- |
5646
|
50711
|
100
|
|
|
|
79551
|
if ( !$routput_type_sequence->[$i] ) { |
5647
|
|
|
|
|
|
|
|
5648
|
|
|
|
|
|
|
#------------------------------- |
5649
|
|
|
|
|
|
|
# Section 1.1. types ';' and 't' |
5650
|
|
|
|
|
|
|
#------------------------------- |
5651
|
|
|
|
|
|
|
# - output anonymous 'sub' as keyword (type 'k') |
5652
|
|
|
|
|
|
|
# - output __END__, __DATA__, and format as type 'k' instead |
5653
|
|
|
|
|
|
|
# of ';' to make html colors correct, etc. |
5654
|
41555
|
100
|
|
|
|
91878
|
if ( $is_semicolon_or_t{$type_i} ) { |
|
|
50
|
|
|
|
|
|
5655
|
2676
|
|
|
|
|
6227
|
my $tok_i = $rtokens->[$i]; |
5656
|
2676
|
100
|
|
|
|
7839
|
if ( $is_END_DATA_format_sub{$tok_i} ) { |
5657
|
172
|
|
|
|
|
509
|
$type_i = 'k'; |
5658
|
|
|
|
|
|
|
} |
5659
|
|
|
|
|
|
|
} |
5660
|
|
|
|
|
|
|
|
5661
|
|
|
|
|
|
|
#---------------------------------------------- |
5662
|
|
|
|
|
|
|
# Section 1.2. Check for an invalid token type. |
5663
|
|
|
|
|
|
|
#---------------------------------------------- |
5664
|
|
|
|
|
|
|
# This can happen by running perltidy on non-scripts although |
5665
|
|
|
|
|
|
|
# it could also be bug introduced by programming change. Perl |
5666
|
|
|
|
|
|
|
# silently accepts a 032 (^Z) and takes it as the end |
5667
|
|
|
|
|
|
|
elsif ( !$is_valid_token_type{$type_i} ) { |
5668
|
0
|
|
|
|
|
0
|
my $val = ord($type_i); |
5669
|
0
|
|
|
|
|
0
|
$self->warning( |
5670
|
|
|
|
|
|
|
"unexpected character decimal $val ($type_i) in script\n" |
5671
|
|
|
|
|
|
|
); |
5672
|
0
|
|
|
|
|
0
|
$self->[_in_error_] = 1; |
5673
|
|
|
|
|
|
|
} |
5674
|
|
|
|
|
|
|
else { |
5675
|
|
|
|
|
|
|
## ok - valid token type other than ; and t |
5676
|
|
|
|
|
|
|
} |
5677
|
|
|
|
|
|
|
|
5678
|
|
|
|
|
|
|
#---------------------------------------------------- |
5679
|
|
|
|
|
|
|
# Section 1.3. Store values for a non-sequenced token |
5680
|
|
|
|
|
|
|
#---------------------------------------------------- |
5681
|
41555
|
|
|
|
|
67407
|
push( @levels, $level_in_tokenizer ); |
5682
|
41555
|
|
|
|
|
62504
|
push( @block_type, EMPTY_STRING ); |
5683
|
41555
|
|
|
|
|
60042
|
push( @type_sequence, EMPTY_STRING ); |
5684
|
41555
|
|
|
|
|
77747
|
push( @token_type, $type_i ); |
5685
|
|
|
|
|
|
|
|
5686
|
|
|
|
|
|
|
} |
5687
|
|
|
|
|
|
|
|
5688
|
|
|
|
|
|
|
#------------------------------------ |
5689
|
|
|
|
|
|
|
# Section 2. Handle a sequenced token |
5690
|
|
|
|
|
|
|
# One of { [ ( ? : ) ] } |
5691
|
|
|
|
|
|
|
#------------------------------------ |
5692
|
|
|
|
|
|
|
else { |
5693
|
|
|
|
|
|
|
|
5694
|
|
|
|
|
|
|
# $level_i is the level we will store. Levels of braces are |
5695
|
|
|
|
|
|
|
# set so that the leading braces have a HIGHER level than their |
5696
|
|
|
|
|
|
|
# CONTENTS, which is convenient for indentation. |
5697
|
9156
|
|
|
|
|
13762
|
my $level_i = $level_in_tokenizer; |
5698
|
|
|
|
|
|
|
|
5699
|
|
|
|
|
|
|
# $tok_i is the PRE-token. It only equals the token for symbols |
5700
|
9156
|
|
|
|
|
14859
|
my $tok_i = $rtokens->[$i]; |
5701
|
|
|
|
|
|
|
|
5702
|
|
|
|
|
|
|
# $routput_indent_flag->[$i] indicates that we need a change |
5703
|
|
|
|
|
|
|
# in level at a nested ternary, as follows |
5704
|
|
|
|
|
|
|
# 1 => at a nested ternary ? |
5705
|
|
|
|
|
|
|
# -1 => at a nested ternary : |
5706
|
|
|
|
|
|
|
# 0 => otherwise |
5707
|
|
|
|
|
|
|
|
5708
|
|
|
|
|
|
|
#-------------------------------------------- |
5709
|
|
|
|
|
|
|
# Section 2.1 Handle a level-increasing token |
5710
|
|
|
|
|
|
|
#-------------------------------------------- |
5711
|
9156
|
100
|
|
|
|
25078
|
if ( $is_opening_or_ternary_type{$type_i} ) { |
|
|
50
|
|
|
|
|
|
5712
|
|
|
|
|
|
|
|
5713
|
4578
|
100
|
|
|
|
9045
|
if ( $type_i eq '?' ) { |
5714
|
|
|
|
|
|
|
|
5715
|
187
|
100
|
|
|
|
997
|
if ( $routput_indent_flag->[$i] > 0 ) { |
5716
|
8
|
|
|
|
|
39
|
$level_in_tokenizer++; |
5717
|
|
|
|
|
|
|
|
5718
|
|
|
|
|
|
|
# break BEFORE '?' in a nested ternary |
5719
|
8
|
|
|
|
|
19
|
$level_i = $level_in_tokenizer; |
5720
|
8
|
|
|
|
|
22
|
$nesting_block_string .= "$nesting_block_flag"; |
5721
|
|
|
|
|
|
|
|
5722
|
|
|
|
|
|
|
} |
5723
|
|
|
|
|
|
|
} |
5724
|
|
|
|
|
|
|
else { |
5725
|
|
|
|
|
|
|
|
5726
|
4391
|
|
|
|
|
7246
|
$nesting_token_string .= $tok_i; |
5727
|
|
|
|
|
|
|
|
5728
|
4391
|
100
|
100
|
|
|
12454
|
if ( $type_i eq '{' || $type_i eq 'L' ) { |
5729
|
|
|
|
|
|
|
|
5730
|
4084
|
|
|
|
|
6356
|
$level_in_tokenizer++; |
5731
|
|
|
|
|
|
|
|
5732
|
4084
|
100
|
|
|
|
7932
|
if ( $routput_block_type->[$i] ) { |
5733
|
974
|
|
|
|
|
1993
|
$nesting_block_flag = 1; |
5734
|
974
|
|
|
|
|
1985
|
$nesting_block_string .= '1'; |
5735
|
|
|
|
|
|
|
} |
5736
|
|
|
|
|
|
|
else { |
5737
|
3110
|
|
|
|
|
5036
|
$nesting_block_flag = 0; |
5738
|
3110
|
|
|
|
|
5440
|
$nesting_block_string .= '0'; |
5739
|
|
|
|
|
|
|
} |
5740
|
|
|
|
|
|
|
} |
5741
|
|
|
|
|
|
|
} |
5742
|
|
|
|
|
|
|
} |
5743
|
|
|
|
|
|
|
|
5744
|
|
|
|
|
|
|
#--------------------------------------------- |
5745
|
|
|
|
|
|
|
# Section 2.2. Handle a level-decreasing token |
5746
|
|
|
|
|
|
|
#--------------------------------------------- |
5747
|
|
|
|
|
|
|
elsif ( $is_closing_or_ternary_type{$type_i} ) { |
5748
|
|
|
|
|
|
|
|
5749
|
4578
|
100
|
|
|
|
11554
|
if ( $type_i ne ':' ) { |
5750
|
4391
|
|
|
|
|
8104
|
my $char = chop $nesting_token_string; |
5751
|
4391
|
50
|
|
|
|
11553
|
if ( $char ne $matching_start_token{$tok_i} ) { |
5752
|
0
|
|
|
|
|
0
|
$nesting_token_string .= $char . $tok_i; |
5753
|
|
|
|
|
|
|
} |
5754
|
|
|
|
|
|
|
} |
5755
|
|
|
|
|
|
|
|
5756
|
4578
|
100
|
100
|
|
|
15163
|
if ( |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
5757
|
|
|
|
|
|
|
$type_i eq '}' |
5758
|
|
|
|
|
|
|
|| $type_i eq 'R' |
5759
|
|
|
|
|
|
|
|
5760
|
|
|
|
|
|
|
# only the second and higher ? : have levels |
5761
|
|
|
|
|
|
|
|| $type_i eq ':' && $routput_indent_flag->[$i] < 0 |
5762
|
|
|
|
|
|
|
) |
5763
|
|
|
|
|
|
|
{ |
5764
|
|
|
|
|
|
|
|
5765
|
4092
|
|
|
|
|
6355
|
$level_i = --$level_in_tokenizer; |
5766
|
|
|
|
|
|
|
|
5767
|
4092
|
50
|
|
|
|
8211
|
if ( $level_in_tokenizer < 0 ) { |
5768
|
0
|
0
|
|
|
|
0
|
if ( !$self->[_saw_negative_indentation_] ) { |
5769
|
0
|
|
|
|
|
0
|
$self->[_saw_negative_indentation_] = 1; |
5770
|
0
|
|
|
|
|
0
|
$self->warning( |
5771
|
|
|
|
|
|
|
"Starting negative indentation\n"); |
5772
|
|
|
|
|
|
|
} |
5773
|
|
|
|
|
|
|
} |
5774
|
|
|
|
|
|
|
|
5775
|
|
|
|
|
|
|
# restore previous level values |
5776
|
4092
|
50
|
|
|
|
8478
|
if ( length($nesting_block_string) > 1 ) |
5777
|
|
|
|
|
|
|
{ # true for valid script |
5778
|
4092
|
|
|
|
|
6387
|
chop $nesting_block_string; |
5779
|
4092
|
|
|
|
|
7985
|
$nesting_block_flag = |
5780
|
|
|
|
|
|
|
substr( $nesting_block_string, -1 ) eq '1'; |
5781
|
|
|
|
|
|
|
} |
5782
|
|
|
|
|
|
|
|
5783
|
|
|
|
|
|
|
} |
5784
|
|
|
|
|
|
|
} |
5785
|
|
|
|
|
|
|
|
5786
|
|
|
|
|
|
|
#----------------------------------------------------- |
5787
|
|
|
|
|
|
|
# Section 2.3. Unexpected sequenced token type - error |
5788
|
|
|
|
|
|
|
#----------------------------------------------------- |
5789
|
|
|
|
|
|
|
else { |
5790
|
|
|
|
|
|
|
|
5791
|
|
|
|
|
|
|
# The tokenizer should only be assigning sequence numbers |
5792
|
|
|
|
|
|
|
# to types { [ ( ? ) ] } : |
5793
|
0
|
|
|
|
|
0
|
DEVEL_MODE && $self->Fault(<<EOM); |
5794
|
|
|
|
|
|
|
unexpected sequence number on token type $type_i with pre-tok=$tok_i |
5795
|
|
|
|
|
|
|
EOM |
5796
|
|
|
|
|
|
|
} |
5797
|
|
|
|
|
|
|
|
5798
|
|
|
|
|
|
|
#------------------------------------------------ |
5799
|
|
|
|
|
|
|
# Section 2.4. Store values for a sequenced token |
5800
|
|
|
|
|
|
|
#------------------------------------------------ |
5801
|
|
|
|
|
|
|
|
5802
|
|
|
|
|
|
|
# The starting nesting block string, which is used in any .LOG |
5803
|
|
|
|
|
|
|
# output, should include the first token of the line |
5804
|
9156
|
100
|
|
|
|
18992
|
if ( !@levels ) { |
5805
|
1578
|
|
|
|
|
3060
|
$nesting_block_string_0 = $nesting_block_string; |
5806
|
|
|
|
|
|
|
} |
5807
|
|
|
|
|
|
|
|
5808
|
|
|
|
|
|
|
# Store values for a sequenced token |
5809
|
9156
|
|
|
|
|
17555
|
push( @levels, $level_i ); |
5810
|
9156
|
|
|
|
|
17491
|
push( @block_type, $routput_block_type->[$i] ); |
5811
|
9156
|
|
|
|
|
15516
|
push( @type_sequence, $routput_type_sequence->[$i] ); |
5812
|
9156
|
|
|
|
|
19588
|
push( @token_type, $type_i ); |
5813
|
|
|
|
|
|
|
|
5814
|
|
|
|
|
|
|
} |
5815
|
|
|
|
|
|
|
} ## End loop to over tokens |
5816
|
|
|
|
|
|
|
|
5817
|
|
|
|
|
|
|
#--------------------- |
5818
|
|
|
|
|
|
|
# Post-loop operations |
5819
|
|
|
|
|
|
|
#--------------------- |
5820
|
|
|
|
|
|
|
|
5821
|
5910
|
|
|
|
|
14403
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0; |
5822
|
|
|
|
|
|
|
|
5823
|
|
|
|
|
|
|
# Form and store the tokens |
5824
|
5910
|
50
|
|
|
|
13685
|
if (@levels) { |
5825
|
|
|
|
|
|
|
|
5826
|
5910
|
|
|
|
|
8662
|
my $im = shift @{$routput_token_list}; |
|
5910
|
|
|
|
|
10681
|
|
5827
|
5910
|
|
|
|
|
10636
|
my $offset = $rtoken_map->[$im]; |
5828
|
5910
|
|
|
|
|
8412
|
foreach my $i ( @{$routput_token_list} ) { |
|
5910
|
|
|
|
|
10585
|
|
5829
|
44801
|
|
|
|
|
60206
|
my $numc = $rtoken_map->[$i] - $offset; |
5830
|
44801
|
|
|
|
|
80510
|
push( @tokens, substr( $input_line, $offset, $numc ) ); |
5831
|
44801
|
|
|
|
|
55891
|
$offset += $numc; |
5832
|
|
|
|
|
|
|
|
5833
|
44801
|
|
|
|
|
61762
|
if ( DEVEL_MODE && $numc <= 0 ) { |
5834
|
|
|
|
|
|
|
|
5835
|
|
|
|
|
|
|
# Should not happen unless @{$rtoken_map} is corrupted |
5836
|
|
|
|
|
|
|
$self->Fault( |
5837
|
|
|
|
|
|
|
"number of characters is '$numc' but should be >0\n"); |
5838
|
|
|
|
|
|
|
} |
5839
|
|
|
|
|
|
|
} |
5840
|
|
|
|
|
|
|
|
5841
|
|
|
|
|
|
|
# Form and store the final token of this line |
5842
|
5910
|
|
|
|
|
11868
|
my $numc = length($input_line) - $offset; |
5843
|
5910
|
|
|
|
|
12757
|
push( @tokens, substr( $input_line, $offset, $numc ) ); |
5844
|
|
|
|
|
|
|
|
5845
|
5910
|
|
|
|
|
9109
|
if ( DEVEL_MODE && $numc <= 0 ) { |
5846
|
|
|
|
|
|
|
$self->Fault( |
5847
|
|
|
|
|
|
|
"Number of Characters is '$numc' but should be >0\n"); |
5848
|
|
|
|
|
|
|
} |
5849
|
|
|
|
|
|
|
} |
5850
|
|
|
|
|
|
|
|
5851
|
|
|
|
|
|
|
# NOTE: This routine returns ci=0. Eventually '_rci_levels' can be |
5852
|
|
|
|
|
|
|
# removed. The ci values are computed later by sub Formatter::set_ci. |
5853
|
5910
|
|
|
|
|
19910
|
my @ci_levels = (0) x scalar(@levels); |
5854
|
|
|
|
|
|
|
|
5855
|
|
|
|
|
|
|
# Wrap up this line of tokens for shipping to the Formatter |
5856
|
5910
|
|
|
|
|
14143
|
$line_of_tokens->{_rtoken_type} = \@token_type; |
5857
|
5910
|
|
|
|
|
11059
|
$line_of_tokens->{_rtokens} = \@tokens; |
5858
|
5910
|
|
|
|
|
11689
|
$line_of_tokens->{_rblock_type} = \@block_type; |
5859
|
5910
|
|
|
|
|
11826
|
$line_of_tokens->{_rtype_sequence} = \@type_sequence; |
5860
|
5910
|
|
|
|
|
19369
|
$line_of_tokens->{_rlevels} = \@levels; |
5861
|
5910
|
|
|
|
|
12266
|
$line_of_tokens->{_rci_levels} = \@ci_levels; |
5862
|
|
|
|
|
|
|
|
5863
|
5910
|
|
|
|
|
15905
|
return; |
5864
|
|
|
|
|
|
|
} ## end sub tokenizer_wrapup_line |
5865
|
|
|
|
|
|
|
|
5866
|
|
|
|
|
|
|
} ## end tokenize_this_line |
5867
|
|
|
|
|
|
|
|
5868
|
|
|
|
|
|
|
####################################################################### |
5869
|
|
|
|
|
|
|
# Tokenizer routines which assist in identifying token types |
5870
|
|
|
|
|
|
|
####################################################################### |
5871
|
|
|
|
|
|
|
|
5872
|
|
|
|
|
|
|
# Define Global '%op_expected_table' |
5873
|
|
|
|
|
|
|
# = hash table of operator expected values based on last nonblank token |
5874
|
|
|
|
|
|
|
|
5875
|
|
|
|
|
|
|
# exceptions to perl's weird parsing rules after type 'Z' |
5876
|
|
|
|
|
|
|
my %is_weird_parsing_rule_exception; |
5877
|
|
|
|
|
|
|
|
5878
|
|
|
|
|
|
|
my %is_paren_dollar; |
5879
|
|
|
|
|
|
|
|
5880
|
|
|
|
|
|
|
my %is_n_v; |
5881
|
|
|
|
|
|
|
|
5882
|
|
|
|
|
|
|
BEGIN { |
5883
|
|
|
|
|
|
|
|
5884
|
|
|
|
|
|
|
# Always expecting TERM following these types: |
5885
|
|
|
|
|
|
|
# note: this is identical to '@value_requestor_type' defined later. |
5886
|
|
|
|
|
|
|
# Fix for c250: add new type 'P' for package (expecting VERSION or {} |
5887
|
|
|
|
|
|
|
# after package NAMESPACE, so expecting TERM) |
5888
|
|
|
|
|
|
|
# Fix for c250: add new type 'S' for sub (not expecting operator) |
5889
|
39
|
|
|
39
|
|
655
|
my @q = qw( |
5890
|
|
|
|
|
|
|
; ! + x & ? F J - p / Y : % f U ~ A G j L P S * . | ^ < = [ m { \ > t |
5891
|
|
|
|
|
|
|
|| >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /= |
5892
|
|
|
|
|
|
|
&= // >> ~. &. |. ^. |
5893
|
|
|
|
|
|
|
... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ |
5894
|
|
|
|
|
|
|
); |
5895
|
39
|
|
|
|
|
4141
|
push @q, ','; |
5896
|
39
|
|
|
|
|
134
|
push @q, '('; # for completeness, not currently a token type |
5897
|
39
|
|
|
|
|
69
|
push @q, '->'; # was previously in UNKNOWN |
5898
|
39
|
|
|
|
|
1294
|
@{op_expected_table}{@q} = (TERM) x scalar(@q); |
5899
|
|
|
|
|
|
|
|
5900
|
|
|
|
|
|
|
# Always UNKNOWN following these types; |
5901
|
|
|
|
|
|
|
# previously had '->' in this list for c030 |
5902
|
39
|
|
|
|
|
207
|
@q = qw( w ); |
5903
|
39
|
|
|
|
|
108
|
@{op_expected_table}{@q} = (UNKNOWN) x scalar(@q); |
5904
|
|
|
|
|
|
|
|
5905
|
|
|
|
|
|
|
# Always expecting OPERATOR ... |
5906
|
|
|
|
|
|
|
# 'n' and 'v' are currently excluded because they might be VERSION numbers |
5907
|
|
|
|
|
|
|
# 'i' is currently excluded because it might be a package |
5908
|
|
|
|
|
|
|
# 'q' is currently excluded because it might be a prototype |
5909
|
|
|
|
|
|
|
# Fix for c030: removed '->' from this list: |
5910
|
|
|
|
|
|
|
# Fix for c250: added 'i' because new type 'P' was added |
5911
|
39
|
|
|
|
|
116
|
@q = qw( -- C h R ++ ] Q <> i ); ## n v q ); |
5912
|
39
|
|
|
|
|
77
|
push @q, ')'; |
5913
|
39
|
|
|
|
|
263
|
@{op_expected_table}{@q} = (OPERATOR) x scalar(@q); |
5914
|
|
|
|
|
|
|
|
5915
|
|
|
|
|
|
|
# Fix for git #62: added '*' and '%' |
5916
|
39
|
|
|
|
|
110
|
@q = qw( < ? * % ); |
5917
|
39
|
|
|
|
|
123
|
@{is_weird_parsing_rule_exception}{@q} = (1) x scalar(@q); |
5918
|
|
|
|
|
|
|
|
5919
|
39
|
|
|
|
|
88
|
@q = qw<) $>; |
5920
|
39
|
|
|
|
|
99
|
@{is_paren_dollar}{@q} = (1) x scalar(@q); |
5921
|
|
|
|
|
|
|
|
5922
|
39
|
|
|
|
|
85
|
@q = qw( n v ); |
5923
|
39
|
|
|
|
|
1423
|
@{is_n_v}{@q} = (1) x scalar(@q); |
5924
|
|
|
|
|
|
|
|
5925
|
|
|
|
|
|
|
} ## end BEGIN |
5926
|
|
|
|
|
|
|
|
5927
|
39
|
|
|
39
|
|
318
|
use constant DEBUG_OPERATOR_EXPECTED => 0; |
|
39
|
|
|
|
|
85
|
|
|
39
|
|
|
|
|
86888
|
|
5928
|
|
|
|
|
|
|
|
5929
|
|
|
|
|
|
|
sub operator_expected { |
5930
|
|
|
|
|
|
|
|
5931
|
|
|
|
|
|
|
# Returns a parameter indicating what types of tokens can occur next |
5932
|
|
|
|
|
|
|
|
5933
|
|
|
|
|
|
|
# Call format: |
5934
|
|
|
|
|
|
|
# $op_expected = |
5935
|
|
|
|
|
|
|
# $self->operator_expected( $tok, $next_type, $blank_after_Z ); |
5936
|
|
|
|
|
|
|
# where |
5937
|
|
|
|
|
|
|
# $tok is the current token |
5938
|
|
|
|
|
|
|
# $next_type is the type of the next token (blank or not) |
5939
|
|
|
|
|
|
|
# $blank_after_Z = flag for guessing after a type 'Z': |
5940
|
|
|
|
|
|
|
# true if $tok follows type 'Z' with intermediate blank |
5941
|
|
|
|
|
|
|
# false if $tok follows type 'Z' with no intermediate blank |
5942
|
|
|
|
|
|
|
# ignored if $tok does not follow type 'Z' |
5943
|
|
|
|
|
|
|
|
5944
|
|
|
|
|
|
|
# Many perl symbols have two or more meanings. For example, '<<' |
5945
|
|
|
|
|
|
|
# can be a shift operator or a here-doc operator. The |
5946
|
|
|
|
|
|
|
# interpretation of these symbols depends on the current state of |
5947
|
|
|
|
|
|
|
# the tokenizer, which may either be expecting a term or an |
5948
|
|
|
|
|
|
|
# operator. For this example, a << would be a shift if an OPERATOR |
5949
|
|
|
|
|
|
|
# is expected, and a here-doc if a TERM is expected. This routine |
5950
|
|
|
|
|
|
|
# is called to make this decision for any current token. It returns |
5951
|
|
|
|
|
|
|
# one of three possible values: |
5952
|
|
|
|
|
|
|
# |
5953
|
|
|
|
|
|
|
# OPERATOR - operator expected (or at least, not a term) |
5954
|
|
|
|
|
|
|
# UNKNOWN - can't tell |
5955
|
|
|
|
|
|
|
# TERM - a term is expected (or at least, not an operator) |
5956
|
|
|
|
|
|
|
# |
5957
|
|
|
|
|
|
|
# The decision is based on what has been seen so far. This |
5958
|
|
|
|
|
|
|
# information is stored in the "$last_nonblank_type" and |
5959
|
|
|
|
|
|
|
# "$last_nonblank_token" variables. For example, if the |
5960
|
|
|
|
|
|
|
# $last_nonblank_type is '=~', then we are expecting a TERM, whereas |
5961
|
|
|
|
|
|
|
# if $last_nonblank_type is 'n' (numeric), we are expecting an |
5962
|
|
|
|
|
|
|
# OPERATOR. |
5963
|
|
|
|
|
|
|
# |
5964
|
|
|
|
|
|
|
# If a UNKNOWN is returned, the calling routine must guess. A major |
5965
|
|
|
|
|
|
|
# goal of this tokenizer is to minimize the possibility of returning |
5966
|
|
|
|
|
|
|
# UNKNOWN, because a wrong guess can spoil the formatting of a |
5967
|
|
|
|
|
|
|
# script. |
5968
|
|
|
|
|
|
|
# |
5969
|
|
|
|
|
|
|
# Adding NEW_TOKENS: it is critically important that this routine be |
5970
|
|
|
|
|
|
|
# updated to allow it to determine if an operator or term is to be |
5971
|
|
|
|
|
|
|
# expected after the new token. Doing this simply involves adding |
5972
|
|
|
|
|
|
|
# the new token character to one of the regexes in this routine or |
5973
|
|
|
|
|
|
|
# to one of the hash lists |
5974
|
|
|
|
|
|
|
# that it uses, which are initialized in the BEGIN section. |
5975
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, |
5976
|
|
|
|
|
|
|
# $statement_type |
5977
|
|
|
|
|
|
|
|
5978
|
|
|
|
|
|
|
# When possible, token types should be selected such that we can determine |
5979
|
|
|
|
|
|
|
# the 'operator_expected' value by a simple hash lookup. If there are |
5980
|
|
|
|
|
|
|
# exceptions, that is an indication that a new type is needed. |
5981
|
|
|
|
|
|
|
|
5982
|
8427
|
|
|
8427
|
0
|
19669
|
my ( $self, $tok, $next_type, $blank_after_Z ) = @_; |
5983
|
|
|
|
|
|
|
|
5984
|
|
|
|
|
|
|
#-------------------------------------------- |
5985
|
|
|
|
|
|
|
# Section 1: Table lookup will get most cases |
5986
|
|
|
|
|
|
|
#-------------------------------------------- |
5987
|
|
|
|
|
|
|
|
5988
|
|
|
|
|
|
|
# Many types are can be obtained by a table lookup. This typically handles |
5989
|
|
|
|
|
|
|
# more than half of the calls. For speed, the caller may try table lookup |
5990
|
|
|
|
|
|
|
# first before calling this sub. |
5991
|
8427
|
|
|
|
|
13362
|
my $op_expected = $op_expected_table{$last_nonblank_type}; |
5992
|
8427
|
100
|
|
|
|
17089
|
if ( defined($op_expected) ) { |
5993
|
|
|
|
|
|
|
DEBUG_OPERATOR_EXPECTED |
5994
|
24
|
|
|
|
|
58
|
&& print {*STDOUT} |
5995
|
|
|
|
|
|
|
"OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; |
5996
|
24
|
|
|
|
|
85
|
return $op_expected; |
5997
|
|
|
|
|
|
|
} |
5998
|
|
|
|
|
|
|
|
5999
|
|
|
|
|
|
|
#--------------------------------------------- |
6000
|
|
|
|
|
|
|
# Section 2: Handle special cases if necessary |
6001
|
|
|
|
|
|
|
#--------------------------------------------- |
6002
|
|
|
|
|
|
|
|
6003
|
|
|
|
|
|
|
# Types 'k', '}' and 'Z' depend on context |
6004
|
|
|
|
|
|
|
# Types 'n', 'v', 'q' also depend on context. |
6005
|
|
|
|
|
|
|
|
6006
|
|
|
|
|
|
|
# identifier... |
6007
|
|
|
|
|
|
|
# Fix for c250: removed coding for type 'i' because 'i' and new type 'P' |
6008
|
|
|
|
|
|
|
# are now done by hash table lookup |
6009
|
|
|
|
|
|
|
|
6010
|
|
|
|
|
|
|
# keyword... |
6011
|
8403
|
100
|
|
|
|
24838
|
if ( $last_nonblank_type eq 'k' ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6012
|
|
|
|
|
|
|
|
6013
|
|
|
|
|
|
|
# keywords expecting OPERATOR: |
6014
|
2647
|
100
|
|
|
|
12295
|
if ( $expecting_operator_token{$last_nonblank_token} ) { |
|
|
100
|
|
|
|
|
|
6015
|
7
|
|
|
|
|
19
|
$op_expected = OPERATOR; |
6016
|
|
|
|
|
|
|
} |
6017
|
|
|
|
|
|
|
|
6018
|
|
|
|
|
|
|
# keywords expecting TERM: |
6019
|
|
|
|
|
|
|
elsif ( $expecting_term_token{$last_nonblank_token} ) { |
6020
|
|
|
|
|
|
|
|
6021
|
|
|
|
|
|
|
# Exceptions from TERM: |
6022
|
|
|
|
|
|
|
|
6023
|
|
|
|
|
|
|
# // may follow perl functions which may be unary operators |
6024
|
|
|
|
|
|
|
# see test file dor.t (defined or); |
6025
|
2541
|
100
|
100
|
|
|
10557
|
if ( |
|
|
50
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
6026
|
|
|
|
|
|
|
$tok eq '/' |
6027
|
|
|
|
|
|
|
&& $next_type eq '/' |
6028
|
|
|
|
|
|
|
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ |
6029
|
|
|
|
|
|
|
$last_nonblank_token} |
6030
|
|
|
|
|
|
|
) |
6031
|
|
|
|
|
|
|
{ |
6032
|
1
|
|
|
|
|
4
|
$op_expected = OPERATOR; |
6033
|
|
|
|
|
|
|
} |
6034
|
|
|
|
|
|
|
|
6035
|
|
|
|
|
|
|
# Patch to allow a ? following 'split' to be a deprecated pattern |
6036
|
|
|
|
|
|
|
# delimiter. This patch is coordinated with the omission of split |
6037
|
|
|
|
|
|
|
# from the list |
6038
|
|
|
|
|
|
|
# %is_keyword_rejecting_question_as_pattern_delimiter. This patch |
6039
|
|
|
|
|
|
|
# will force perltidy to guess. |
6040
|
|
|
|
|
|
|
elsif ($tok eq '?' |
6041
|
|
|
|
|
|
|
&& $last_nonblank_token eq 'split' ) |
6042
|
|
|
|
|
|
|
{ |
6043
|
0
|
|
|
|
|
0
|
$op_expected = UNKNOWN; |
6044
|
|
|
|
|
|
|
} |
6045
|
|
|
|
|
|
|
else { |
6046
|
2540
|
|
|
|
|
4426
|
$op_expected = TERM; |
6047
|
|
|
|
|
|
|
} |
6048
|
|
|
|
|
|
|
} |
6049
|
|
|
|
|
|
|
else { |
6050
|
99
|
|
|
|
|
292
|
$op_expected = TERM; |
6051
|
|
|
|
|
|
|
} |
6052
|
|
|
|
|
|
|
} ## end type 'k' |
6053
|
|
|
|
|
|
|
|
6054
|
|
|
|
|
|
|
# closing container token... |
6055
|
|
|
|
|
|
|
|
6056
|
|
|
|
|
|
|
# Note that the actual token for type '}' may also be a ')'. |
6057
|
|
|
|
|
|
|
|
6058
|
|
|
|
|
|
|
# Also note that $last_nonblank_token is not the token corresponding to |
6059
|
|
|
|
|
|
|
# $last_nonblank_type when the type is a closing container. In that |
6060
|
|
|
|
|
|
|
# case it is the token before the corresponding opening container token. |
6061
|
|
|
|
|
|
|
# So for example, for this snippet |
6062
|
|
|
|
|
|
|
# $a = do { BLOCK } / 2; |
6063
|
|
|
|
|
|
|
# the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'. |
6064
|
|
|
|
|
|
|
|
6065
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq '}' ) { |
6066
|
3593
|
|
|
|
|
6443
|
$op_expected = UNKNOWN; |
6067
|
|
|
|
|
|
|
|
6068
|
|
|
|
|
|
|
# handle something after 'do' and 'eval' |
6069
|
3593
|
100
|
66
|
|
|
19118
|
if ( $is_block_operator{$last_nonblank_token} ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6070
|
|
|
|
|
|
|
|
6071
|
|
|
|
|
|
|
# something like $a = do { BLOCK } / 2; |
6072
|
82
|
|
|
|
|
222
|
$op_expected = OPERATOR; # block mode following } |
6073
|
|
|
|
|
|
|
} |
6074
|
|
|
|
|
|
|
|
6075
|
|
|
|
|
|
|
# $last_nonblank_token =~ /^(\)|\$|\-\>)/ |
6076
|
|
|
|
|
|
|
elsif ( $is_paren_dollar{ substr( $last_nonblank_token, 0, 1 ) } |
6077
|
|
|
|
|
|
|
|| substr( $last_nonblank_token, 0, 2 ) eq '->' ) |
6078
|
|
|
|
|
|
|
{ |
6079
|
2123
|
|
|
|
|
3604
|
$op_expected = OPERATOR; |
6080
|
2123
|
50
|
|
|
|
5479
|
if ( $last_nonblank_token eq '$' ) { $op_expected = UNKNOWN } |
|
0
|
|
|
|
|
0
|
|
6081
|
|
|
|
|
|
|
} |
6082
|
|
|
|
|
|
|
|
6083
|
|
|
|
|
|
|
# Check for smartmatch operator before preceding brace or square |
6084
|
|
|
|
|
|
|
# bracket. For example, at the ? after the ] in the following |
6085
|
|
|
|
|
|
|
# expressions we are expecting an operator: |
6086
|
|
|
|
|
|
|
# |
6087
|
|
|
|
|
|
|
# qr/3/ ~~ ['1234'] ? 1 : 0; |
6088
|
|
|
|
|
|
|
# map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; |
6089
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '~~' ) { |
6090
|
20
|
|
|
|
|
39
|
$op_expected = OPERATOR; |
6091
|
|
|
|
|
|
|
} |
6092
|
|
|
|
|
|
|
|
6093
|
|
|
|
|
|
|
# A right brace here indicates the end of a simple block. All |
6094
|
|
|
|
|
|
|
# non-structural right braces have type 'R' all braces associated with |
6095
|
|
|
|
|
|
|
# block operator keywords have been given those keywords as |
6096
|
|
|
|
|
|
|
# "last_nonblank_token" and caught above. (This statement is order |
6097
|
|
|
|
|
|
|
# dependent, and must come after checking $last_nonblank_token). |
6098
|
|
|
|
|
|
|
else { |
6099
|
|
|
|
|
|
|
|
6100
|
|
|
|
|
|
|
# patch for dor.t (defined or). |
6101
|
1368
|
50
|
33
|
|
|
6174
|
if ( $tok eq '/' |
|
|
100
|
33
|
|
|
|
|
6102
|
|
|
|
|
|
|
&& $next_type eq '/' |
6103
|
|
|
|
|
|
|
&& $last_nonblank_token eq ']' ) |
6104
|
|
|
|
|
|
|
{ |
6105
|
0
|
|
|
|
|
0
|
$op_expected = OPERATOR; |
6106
|
|
|
|
|
|
|
} |
6107
|
|
|
|
|
|
|
|
6108
|
|
|
|
|
|
|
# Patch for RT #116344: misparse a ternary operator after an |
6109
|
|
|
|
|
|
|
# anonymous hash, like this: |
6110
|
|
|
|
|
|
|
# return ref {} ? 1 : 0; |
6111
|
|
|
|
|
|
|
# The right brace should really be marked type 'R' in this case, |
6112
|
|
|
|
|
|
|
# and it is safest to return an UNKNOWN here. Expecting a TERM will |
6113
|
|
|
|
|
|
|
# cause the '?' to always be interpreted as a pattern delimiter |
6114
|
|
|
|
|
|
|
# rather than introducing a ternary operator. |
6115
|
|
|
|
|
|
|
elsif ( $tok eq '?' ) { |
6116
|
1
|
|
|
|
|
3
|
$op_expected = UNKNOWN; |
6117
|
|
|
|
|
|
|
} |
6118
|
|
|
|
|
|
|
else { |
6119
|
1367
|
|
|
|
|
2408
|
$op_expected = TERM; |
6120
|
|
|
|
|
|
|
} |
6121
|
|
|
|
|
|
|
} |
6122
|
|
|
|
|
|
|
} ## end type '}' |
6123
|
|
|
|
|
|
|
|
6124
|
|
|
|
|
|
|
# number or v-string... |
6125
|
|
|
|
|
|
|
# An exception is for VERSION numbers a 'use' statement. It has the format |
6126
|
|
|
|
|
|
|
# use Module VERSION LIST |
6127
|
|
|
|
|
|
|
# We could avoid this exception by writing a special sub to parse 'use' |
6128
|
|
|
|
|
|
|
# statements and perhaps mark these numbers with a new type V (for VERSION) |
6129
|
|
|
|
|
|
|
##elsif ( $last_nonblank_type =~ /^[nv]$/ ) { |
6130
|
|
|
|
|
|
|
elsif ( $is_n_v{$last_nonblank_type} ) { |
6131
|
1985
|
|
|
|
|
3123
|
$op_expected = OPERATOR; |
6132
|
1985
|
100
|
|
|
|
4503
|
if ( $statement_type eq 'use' ) { |
6133
|
11
|
|
|
|
|
40
|
$op_expected = UNKNOWN; |
6134
|
|
|
|
|
|
|
} |
6135
|
|
|
|
|
|
|
} |
6136
|
|
|
|
|
|
|
|
6137
|
|
|
|
|
|
|
# quote... |
6138
|
|
|
|
|
|
|
# TODO: labeled prototype words would better be given type 'A' or maybe |
6139
|
|
|
|
|
|
|
# 'J'; not 'q'; or maybe mark as type 'Y'? |
6140
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'q' ) { |
6141
|
137
|
50
|
|
|
|
609
|
if ( $last_nonblank_token eq 'prototype' ) { |
|
|
100
|
|
|
|
|
|
6142
|
0
|
|
|
|
|
0
|
$op_expected = TERM; |
6143
|
|
|
|
|
|
|
} |
6144
|
|
|
|
|
|
|
|
6145
|
|
|
|
|
|
|
# update for --use-feature=class (rt145706): |
6146
|
|
|
|
|
|
|
# Look for class VERSION after possible attribute, as in |
6147
|
|
|
|
|
|
|
# class Example::Subclass : isa(Example::Base) 1.345 { ... } |
6148
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^package\b/ ) { |
6149
|
3
|
|
|
|
|
7
|
$op_expected = TERM; |
6150
|
|
|
|
|
|
|
} |
6151
|
|
|
|
|
|
|
|
6152
|
|
|
|
|
|
|
# everything else |
6153
|
|
|
|
|
|
|
else { |
6154
|
134
|
|
|
|
|
291
|
$op_expected = OPERATOR; |
6155
|
|
|
|
|
|
|
} |
6156
|
|
|
|
|
|
|
} |
6157
|
|
|
|
|
|
|
|
6158
|
|
|
|
|
|
|
# file handle or similar |
6159
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'Z' ) { |
6160
|
|
|
|
|
|
|
|
6161
|
|
|
|
|
|
|
# angle.t |
6162
|
40
|
100
|
33
|
|
|
567
|
if ( $last_nonblank_token =~ /^\w/ ) { |
|
|
50
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6163
|
2
|
|
|
|
|
5
|
$op_expected = UNKNOWN; |
6164
|
|
|
|
|
|
|
} |
6165
|
|
|
|
|
|
|
|
6166
|
|
|
|
|
|
|
# Exception to weird parsing rules for 'x(' ... see case b1205: |
6167
|
|
|
|
|
|
|
# In something like 'print $vv x(...' the x is an operator; |
6168
|
|
|
|
|
|
|
# Likewise in 'print $vv x$ww' the x is an operator (case b1207) |
6169
|
|
|
|
|
|
|
# otherwise x follows the weird parsing rules. |
6170
|
|
|
|
|
|
|
elsif ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) { |
6171
|
0
|
|
|
|
|
0
|
$op_expected = OPERATOR; |
6172
|
|
|
|
|
|
|
} |
6173
|
|
|
|
|
|
|
|
6174
|
|
|
|
|
|
|
# The 'weird parsing rules' of next section do not work for '<' and '?' |
6175
|
|
|
|
|
|
|
# It is best to mark them as unknown. Test case: |
6176
|
|
|
|
|
|
|
# print $fh <DATA>; |
6177
|
|
|
|
|
|
|
elsif ( $is_weird_parsing_rule_exception{$tok} ) { |
6178
|
4
|
|
|
|
|
15
|
$op_expected = UNKNOWN; |
6179
|
|
|
|
|
|
|
} |
6180
|
|
|
|
|
|
|
|
6181
|
|
|
|
|
|
|
# For possible file handle like "$a", Perl uses weird parsing rules. |
6182
|
|
|
|
|
|
|
# For example: |
6183
|
|
|
|
|
|
|
# print $a/2,"/hi"; - division |
6184
|
|
|
|
|
|
|
# print $a / 2,"/hi"; - division |
6185
|
|
|
|
|
|
|
# print $a/ 2,"/hi"; - division |
6186
|
|
|
|
|
|
|
# print $a /2,"/hi"; - pattern (and error)! |
6187
|
|
|
|
|
|
|
# Some examples where this logic works okay, for '&','*','+': |
6188
|
|
|
|
|
|
|
# print $fh &xsi_protos(@mods); |
6189
|
|
|
|
|
|
|
# my $x = new $CompressClass *FH; |
6190
|
|
|
|
|
|
|
# print $OUT +( $count % 15 ? ", " : "\n\t" ); |
6191
|
|
|
|
|
|
|
elsif ($blank_after_Z |
6192
|
|
|
|
|
|
|
&& $next_type ne 'b' ) |
6193
|
|
|
|
|
|
|
{ |
6194
|
0
|
|
|
|
|
0
|
$op_expected = TERM; |
6195
|
|
|
|
|
|
|
} |
6196
|
|
|
|
|
|
|
|
6197
|
|
|
|
|
|
|
# Note that '?' and '<' have been moved above |
6198
|
|
|
|
|
|
|
# ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { |
6199
|
|
|
|
|
|
|
elsif ( $tok =~ /^([x\/\+\-\*\%\&\.]|\>\>)$/ ) { |
6200
|
|
|
|
|
|
|
|
6201
|
|
|
|
|
|
|
# Do not complain in 'use' statements, which have special syntax. |
6202
|
|
|
|
|
|
|
# For example, from RT#130344: |
6203
|
|
|
|
|
|
|
# use lib $FindBin::Bin . '/lib'; |
6204
|
9
|
50
|
|
|
|
38
|
if ( $statement_type ne 'use' ) { |
6205
|
9
|
|
|
|
|
37
|
$self->complain( |
6206
|
|
|
|
|
|
|
"operator in possible indirect object location not recommended\n" |
6207
|
|
|
|
|
|
|
); |
6208
|
|
|
|
|
|
|
} |
6209
|
9
|
|
|
|
|
24
|
$op_expected = OPERATOR; |
6210
|
|
|
|
|
|
|
} |
6211
|
|
|
|
|
|
|
|
6212
|
|
|
|
|
|
|
# all other cases |
6213
|
|
|
|
|
|
|
else { |
6214
|
25
|
|
|
|
|
72
|
$op_expected = UNKNOWN; |
6215
|
|
|
|
|
|
|
} |
6216
|
|
|
|
|
|
|
} |
6217
|
|
|
|
|
|
|
|
6218
|
|
|
|
|
|
|
# anything else... |
6219
|
|
|
|
|
|
|
else { |
6220
|
1
|
|
|
|
|
5
|
$op_expected = UNKNOWN; |
6221
|
|
|
|
|
|
|
} |
6222
|
|
|
|
|
|
|
|
6223
|
|
|
|
|
|
|
DEBUG_OPERATOR_EXPECTED |
6224
|
8403
|
|
|
|
|
11569
|
&& print {*STDOUT} |
6225
|
|
|
|
|
|
|
"OPERATOR_EXPECTED: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; |
6226
|
|
|
|
|
|
|
|
6227
|
8403
|
|
|
|
|
15774
|
return $op_expected; |
6228
|
|
|
|
|
|
|
|
6229
|
|
|
|
|
|
|
} ## end sub operator_expected |
6230
|
|
|
|
|
|
|
|
6231
|
|
|
|
|
|
|
sub new_statement_ok { |
6232
|
|
|
|
|
|
|
|
6233
|
|
|
|
|
|
|
# Returns: |
6234
|
|
|
|
|
|
|
# true if a new statement can begin here |
6235
|
|
|
|
|
|
|
# false otherwise |
6236
|
|
|
|
|
|
|
|
6237
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, |
6238
|
|
|
|
|
|
|
# $brace_depth, $rbrace_type |
6239
|
|
|
|
|
|
|
|
6240
|
|
|
|
|
|
|
# Uses: |
6241
|
|
|
|
|
|
|
# - See if a 'class' statement can occur here |
6242
|
|
|
|
|
|
|
# - See if a keyword begins at a new statement; i.e. is an 'if' a |
6243
|
|
|
|
|
|
|
# block if or a trailing if? Also see if 'format' starts a statement. |
6244
|
|
|
|
|
|
|
# - Decide if a ':' is part of a statement label (not a ternary) |
6245
|
|
|
|
|
|
|
|
6246
|
|
|
|
|
|
|
# Curly braces are tricky because some small blocks do not get marked as |
6247
|
|
|
|
|
|
|
# blocks.. |
6248
|
|
|
|
|
|
|
|
6249
|
|
|
|
|
|
|
# if it follows an opening curly brace.. |
6250
|
436
|
100
|
66
|
436
|
0
|
2366
|
if ( $last_nonblank_token eq '{' ) { |
|
|
100
|
|
|
|
|
|
6251
|
|
|
|
|
|
|
|
6252
|
|
|
|
|
|
|
# The safe thing is to return true in all cases because: |
6253
|
|
|
|
|
|
|
# - a ternary ':' cannot occur here |
6254
|
|
|
|
|
|
|
# - an 'if' here, for example, cannot be a trailing if |
6255
|
|
|
|
|
|
|
# See test case c231 for an example. |
6256
|
|
|
|
|
|
|
# This works but could be improved, if necessary, by returning |
6257
|
|
|
|
|
|
|
# 'false' at obvious non-blocks. |
6258
|
59
|
|
|
|
|
245
|
return 1; |
6259
|
|
|
|
|
|
|
} |
6260
|
|
|
|
|
|
|
|
6261
|
|
|
|
|
|
|
# if it follows a closing code block curly brace.. |
6262
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq '}' |
6263
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
6264
|
|
|
|
|
|
|
{ |
6265
|
|
|
|
|
|
|
|
6266
|
|
|
|
|
|
|
# a new statement can follow certain closing block braces ... |
6267
|
|
|
|
|
|
|
# FIXME: The following has worked well but returns true in some cases |
6268
|
|
|
|
|
|
|
# where it really should not. We could fix this by either excluding |
6269
|
|
|
|
|
|
|
# certain blocks, like sort/map/grep/eval/asub or by just including |
6270
|
|
|
|
|
|
|
# certain blocks. |
6271
|
99
|
|
|
|
|
429
|
return $rbrace_type->[$brace_depth]; |
6272
|
|
|
|
|
|
|
} |
6273
|
|
|
|
|
|
|
|
6274
|
|
|
|
|
|
|
# otherwise, it is a label if and only if it follows a ';' (real or fake) |
6275
|
|
|
|
|
|
|
# or another label |
6276
|
|
|
|
|
|
|
else { |
6277
|
278
|
|
100
|
|
|
1892
|
return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); |
6278
|
|
|
|
|
|
|
} |
6279
|
|
|
|
|
|
|
} ## end sub new_statement_ok |
6280
|
|
|
|
|
|
|
|
6281
|
|
|
|
|
|
|
sub code_block_type { |
6282
|
|
|
|
|
|
|
|
6283
|
|
|
|
|
|
|
# Decide if this is a block of code, and its type. |
6284
|
|
|
|
|
|
|
# Must be called only when $type = $token = '{' |
6285
|
|
|
|
|
|
|
# The problem is to distinguish between the start of a block of code |
6286
|
|
|
|
|
|
|
# and the start of an anonymous hash reference |
6287
|
|
|
|
|
|
|
# Returns "" if not code block, otherwise returns 'last_nonblank_token' |
6288
|
|
|
|
|
|
|
# to indicate the type of code block. (For example, 'last_nonblank_token' |
6289
|
|
|
|
|
|
|
# might be 'if' for an if block, 'else' for an else block, etc). |
6290
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, |
6291
|
|
|
|
|
|
|
# $last_nonblank_block_type, $brace_depth, $rbrace_type |
6292
|
|
|
|
|
|
|
|
6293
|
|
|
|
|
|
|
# handle case of multiple '{'s |
6294
|
|
|
|
|
|
|
|
6295
|
|
|
|
|
|
|
# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; |
6296
|
|
|
|
|
|
|
|
6297
|
1303
|
|
|
1303
|
0
|
3321
|
my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; |
6298
|
1303
|
100
|
66
|
|
|
16515
|
if ( $last_nonblank_token eq '{' |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6299
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
6300
|
|
|
|
|
|
|
{ |
6301
|
|
|
|
|
|
|
|
6302
|
|
|
|
|
|
|
# opening brace where a statement may appear is probably |
6303
|
|
|
|
|
|
|
# a code block but might be and anonymous hash reference |
6304
|
90
|
50
|
|
|
|
318
|
if ( $rbrace_type->[$brace_depth] ) { |
6305
|
90
|
|
|
|
|
355
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6306
|
|
|
|
|
|
|
$max_token_index ); |
6307
|
|
|
|
|
|
|
} |
6308
|
|
|
|
|
|
|
|
6309
|
|
|
|
|
|
|
# cannot start a code block within an anonymous hash |
6310
|
|
|
|
|
|
|
else { |
6311
|
0
|
|
|
|
|
0
|
return EMPTY_STRING; |
6312
|
|
|
|
|
|
|
} |
6313
|
|
|
|
|
|
|
} |
6314
|
|
|
|
|
|
|
|
6315
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq ';' ) { |
6316
|
|
|
|
|
|
|
|
6317
|
|
|
|
|
|
|
# an opening brace where a statement may appear is probably |
6318
|
|
|
|
|
|
|
# a code block but might be and anonymous hash reference |
6319
|
48
|
|
|
|
|
274
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6320
|
|
|
|
|
|
|
$max_token_index ); |
6321
|
|
|
|
|
|
|
} |
6322
|
|
|
|
|
|
|
|
6323
|
|
|
|
|
|
|
# handle case of '}{' |
6324
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq '}' |
6325
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
6326
|
|
|
|
|
|
|
{ |
6327
|
|
|
|
|
|
|
|
6328
|
|
|
|
|
|
|
# a } { situation ... |
6329
|
|
|
|
|
|
|
# could be hash reference after code block..(blktype1.t) |
6330
|
9
|
50
|
|
|
|
29
|
if ($last_nonblank_block_type) { |
6331
|
9
|
|
|
|
|
37
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6332
|
|
|
|
|
|
|
$max_token_index ); |
6333
|
|
|
|
|
|
|
} |
6334
|
|
|
|
|
|
|
|
6335
|
|
|
|
|
|
|
# must be a block if it follows a closing hash reference |
6336
|
|
|
|
|
|
|
else { |
6337
|
0
|
|
|
|
|
0
|
return $last_nonblank_token; |
6338
|
|
|
|
|
|
|
} |
6339
|
|
|
|
|
|
|
} |
6340
|
|
|
|
|
|
|
|
6341
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6342
|
|
|
|
|
|
|
# NOTE: braces after type characters start code blocks, but for |
6343
|
|
|
|
|
|
|
# simplicity these are not identified as such. See also |
6344
|
|
|
|
|
|
|
# sub is_non_structural_brace. |
6345
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6346
|
|
|
|
|
|
|
|
6347
|
|
|
|
|
|
|
## elsif ( $last_nonblank_type eq 't' ) { |
6348
|
|
|
|
|
|
|
## return $last_nonblank_token; |
6349
|
|
|
|
|
|
|
## } |
6350
|
|
|
|
|
|
|
|
6351
|
|
|
|
|
|
|
# brace after label: |
6352
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'J' ) { |
6353
|
34
|
|
|
|
|
149
|
return $last_nonblank_token; |
6354
|
|
|
|
|
|
|
} |
6355
|
|
|
|
|
|
|
|
6356
|
|
|
|
|
|
|
# otherwise, look at previous token. This must be a code block if |
6357
|
|
|
|
|
|
|
# it follows any of these: |
6358
|
|
|
|
|
|
|
# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ |
6359
|
|
|
|
|
|
|
elsif ($is_code_block_token{$last_nonblank_token} |
6360
|
|
|
|
|
|
|
|| $is_grep_alias{$last_nonblank_token} ) |
6361
|
|
|
|
|
|
|
{ |
6362
|
|
|
|
|
|
|
|
6363
|
|
|
|
|
|
|
# Bug Patch: Note that the opening brace after the 'if' in the following |
6364
|
|
|
|
|
|
|
# snippet is an anonymous hash ref and not a code block! |
6365
|
|
|
|
|
|
|
# print 'hi' if { x => 1, }->{x}; |
6366
|
|
|
|
|
|
|
# We can identify this situation because the last nonblank type |
6367
|
|
|
|
|
|
|
# will be a keyword (instead of a closing paren) |
6368
|
481
|
50
|
33
|
|
|
2566
|
if ( |
|
|
|
66
|
|
|
|
|
6369
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
6370
|
|
|
|
|
|
|
&& ( $last_nonblank_token eq 'if' |
6371
|
|
|
|
|
|
|
|| $last_nonblank_token eq 'unless' ) |
6372
|
|
|
|
|
|
|
) |
6373
|
|
|
|
|
|
|
{ |
6374
|
0
|
|
|
|
|
0
|
return EMPTY_STRING; |
6375
|
|
|
|
|
|
|
} |
6376
|
|
|
|
|
|
|
else { |
6377
|
481
|
|
|
|
|
1513
|
return $last_nonblank_token; |
6378
|
|
|
|
|
|
|
} |
6379
|
|
|
|
|
|
|
} |
6380
|
|
|
|
|
|
|
|
6381
|
|
|
|
|
|
|
# or a sub or package BLOCK |
6382
|
|
|
|
|
|
|
# Fixed for c250 to include new package type 'P', and change 'i' to 'S' |
6383
|
|
|
|
|
|
|
elsif ( |
6384
|
|
|
|
|
|
|
$last_nonblank_type eq 'P' |
6385
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'S' |
6386
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 't' |
6387
|
|
|
|
|
|
|
&& substr( $last_nonblank_token, 0, 3 ) eq 'sub' ) |
6388
|
|
|
|
|
|
|
) |
6389
|
|
|
|
|
|
|
{ |
6390
|
295
|
|
|
|
|
984
|
return $last_nonblank_token; |
6391
|
|
|
|
|
|
|
} |
6392
|
|
|
|
|
|
|
|
6393
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^(sub|package)\b/ ) { |
6394
|
0
|
|
|
|
|
0
|
return $statement_type; |
6395
|
|
|
|
|
|
|
} |
6396
|
|
|
|
|
|
|
|
6397
|
|
|
|
|
|
|
# user-defined subs with block parameters (like grep/map/eval) |
6398
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'G' ) { |
6399
|
0
|
|
|
|
|
0
|
return $last_nonblank_token; |
6400
|
|
|
|
|
|
|
} |
6401
|
|
|
|
|
|
|
|
6402
|
|
|
|
|
|
|
# check bareword |
6403
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'w' ) { |
6404
|
|
|
|
|
|
|
|
6405
|
|
|
|
|
|
|
# check for syntax 'use MODULE LIST' |
6406
|
|
|
|
|
|
|
# This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031 |
6407
|
22
|
100
|
|
|
|
100
|
return EMPTY_STRING if ( $statement_type eq 'use' ); |
6408
|
|
|
|
|
|
|
|
6409
|
21
|
|
|
|
|
113
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6410
|
|
|
|
|
|
|
$max_token_index ); |
6411
|
|
|
|
|
|
|
} |
6412
|
|
|
|
|
|
|
|
6413
|
|
|
|
|
|
|
# Patch for bug # RT #94338 reported by Daniel Trizen |
6414
|
|
|
|
|
|
|
# for-loop in a parenthesized block-map triggering an error message: |
6415
|
|
|
|
|
|
|
# map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) ); |
6416
|
|
|
|
|
|
|
# Check for a code block within a parenthesized function call |
6417
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '(' ) { |
6418
|
81
|
|
|
|
|
203
|
my $paren_type = $rparen_type->[$paren_depth]; |
6419
|
|
|
|
|
|
|
|
6420
|
|
|
|
|
|
|
# /^(map|grep|sort)$/ |
6421
|
81
|
100
|
66
|
|
|
425
|
if ( $paren_type && $is_sort_map_grep{$paren_type} ) { |
6422
|
|
|
|
|
|
|
|
6423
|
|
|
|
|
|
|
# We will mark this as a code block but use type 't' instead |
6424
|
|
|
|
|
|
|
# of the name of the containing function. This will allow for |
6425
|
|
|
|
|
|
|
# correct parsing but will usually produce better formatting. |
6426
|
|
|
|
|
|
|
# Braces with block type 't' are not broken open automatically |
6427
|
|
|
|
|
|
|
# in the formatter as are other code block types, and this usually |
6428
|
|
|
|
|
|
|
# works best. |
6429
|
1
|
|
|
|
|
4
|
return 't'; # (Not $paren_type) |
6430
|
|
|
|
|
|
|
} |
6431
|
|
|
|
|
|
|
else { |
6432
|
80
|
|
|
|
|
244
|
return EMPTY_STRING; |
6433
|
|
|
|
|
|
|
} |
6434
|
|
|
|
|
|
|
} |
6435
|
|
|
|
|
|
|
|
6436
|
|
|
|
|
|
|
# handle unknown syntax ') {' |
6437
|
|
|
|
|
|
|
# we previously appended a '()' to mark this case |
6438
|
|
|
|
|
|
|
elsif ( $last_nonblank_token =~ /\(\)$/ ) { |
6439
|
14
|
|
|
|
|
50
|
return $last_nonblank_token; |
6440
|
|
|
|
|
|
|
} |
6441
|
|
|
|
|
|
|
|
6442
|
|
|
|
|
|
|
# anything else must be anonymous hash reference |
6443
|
|
|
|
|
|
|
else { |
6444
|
229
|
|
|
|
|
676
|
return EMPTY_STRING; |
6445
|
|
|
|
|
|
|
} |
6446
|
|
|
|
|
|
|
} ## end sub code_block_type |
6447
|
|
|
|
|
|
|
|
6448
|
|
|
|
|
|
|
sub decide_if_code_block { |
6449
|
|
|
|
|
|
|
|
6450
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
6451
|
168
|
|
|
168
|
0
|
462
|
my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; |
6452
|
|
|
|
|
|
|
|
6453
|
168
|
|
|
|
|
591
|
my ( $next_nonblank_token, $i_next ) = |
6454
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
6455
|
|
|
|
|
|
|
|
6456
|
|
|
|
|
|
|
# we are at a '{' where a statement may appear. |
6457
|
|
|
|
|
|
|
# We must decide if this brace starts an anonymous hash or a code |
6458
|
|
|
|
|
|
|
# block. |
6459
|
|
|
|
|
|
|
# return "" if anonymous hash, and $last_nonblank_token otherwise |
6460
|
|
|
|
|
|
|
|
6461
|
|
|
|
|
|
|
# initialize to be code BLOCK |
6462
|
168
|
|
|
|
|
492
|
my $code_block_type = $last_nonblank_token; |
6463
|
|
|
|
|
|
|
|
6464
|
|
|
|
|
|
|
# Check for the common case of an empty anonymous hash reference: |
6465
|
|
|
|
|
|
|
# Maybe something like sub { { } } |
6466
|
168
|
100
|
|
|
|
519
|
if ( $next_nonblank_token eq '}' ) { |
6467
|
5
|
|
|
|
|
14
|
$code_block_type = EMPTY_STRING; |
6468
|
|
|
|
|
|
|
} |
6469
|
|
|
|
|
|
|
|
6470
|
|
|
|
|
|
|
else { |
6471
|
|
|
|
|
|
|
|
6472
|
|
|
|
|
|
|
# To guess if this '{' is an anonymous hash reference, look ahead |
6473
|
|
|
|
|
|
|
# and test as follows: |
6474
|
|
|
|
|
|
|
# |
6475
|
|
|
|
|
|
|
# it is a hash reference if next come: |
6476
|
|
|
|
|
|
|
# - a string or digit followed by a comma or => |
6477
|
|
|
|
|
|
|
# - bareword followed by => |
6478
|
|
|
|
|
|
|
# otherwise it is a code block |
6479
|
|
|
|
|
|
|
# |
6480
|
|
|
|
|
|
|
# Examples of anonymous hash ref: |
6481
|
|
|
|
|
|
|
# {'aa',}; |
6482
|
|
|
|
|
|
|
# {1,2} |
6483
|
|
|
|
|
|
|
# |
6484
|
|
|
|
|
|
|
# Examples of code blocks: |
6485
|
|
|
|
|
|
|
# {1; print "hello\n", 1;} |
6486
|
|
|
|
|
|
|
# {$a,1}; |
6487
|
|
|
|
|
|
|
|
6488
|
|
|
|
|
|
|
# We are only going to look ahead one more (nonblank/comment) line. |
6489
|
|
|
|
|
|
|
# Strange formatting could cause a bad guess, but that's unlikely. |
6490
|
163
|
|
|
|
|
410
|
my @pre_types; |
6491
|
|
|
|
|
|
|
my @pre_tokens; |
6492
|
|
|
|
|
|
|
|
6493
|
|
|
|
|
|
|
# Ignore the rest of this line if it is a side comment |
6494
|
163
|
100
|
|
|
|
541
|
if ( $next_nonblank_token ne '#' ) { |
6495
|
139
|
|
|
|
|
525
|
@pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ]; |
|
139
|
|
|
|
|
815
|
|
6496
|
139
|
|
|
|
|
421
|
@pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ]; |
|
139
|
|
|
|
|
710
|
|
6497
|
|
|
|
|
|
|
} |
6498
|
|
|
|
|
|
|
|
6499
|
|
|
|
|
|
|
# Here 20 is arbitrary but generous, and prevents wasting lots of time |
6500
|
|
|
|
|
|
|
# in mangled files |
6501
|
163
|
|
|
|
|
664
|
my ( $rpre_tokens, $rpre_types ) = |
6502
|
|
|
|
|
|
|
$self->peek_ahead_for_n_nonblank_pre_tokens(20); |
6503
|
163
|
100
|
66
|
|
|
621
|
if ( defined($rpre_types) && @{$rpre_types} ) { |
|
155
|
|
|
|
|
592
|
|
6504
|
155
|
|
|
|
|
315
|
push @pre_types, @{$rpre_types}; |
|
155
|
|
|
|
|
613
|
|
6505
|
155
|
|
|
|
|
296
|
push @pre_tokens, @{$rpre_tokens}; |
|
155
|
|
|
|
|
732
|
|
6506
|
|
|
|
|
|
|
} |
6507
|
|
|
|
|
|
|
|
6508
|
|
|
|
|
|
|
# put a sentinel token to simplify stopping the search |
6509
|
163
|
|
|
|
|
1326
|
push @pre_types, '}'; |
6510
|
163
|
|
|
|
|
322
|
push @pre_types, '}'; |
6511
|
|
|
|
|
|
|
|
6512
|
163
|
|
|
|
|
312
|
my $jbeg = 0; |
6513
|
163
|
100
|
|
|
|
477
|
$jbeg = 1 if $pre_types[0] eq 'b'; |
6514
|
|
|
|
|
|
|
|
6515
|
|
|
|
|
|
|
# first look for one of these |
6516
|
|
|
|
|
|
|
# - bareword |
6517
|
|
|
|
|
|
|
# - bareword with leading - |
6518
|
|
|
|
|
|
|
# - digit |
6519
|
|
|
|
|
|
|
# - quoted string |
6520
|
163
|
|
|
|
|
292
|
my $j = $jbeg; |
6521
|
163
|
100
|
33
|
|
|
1288
|
if ( $pre_types[$j] =~ /^[\'\"]/ ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
6522
|
|
|
|
|
|
|
|
6523
|
|
|
|
|
|
|
# find the closing quote; don't worry about escapes |
6524
|
1
|
|
|
|
|
3
|
my $quote_mark = $pre_types[$j]; |
6525
|
1
|
|
|
|
|
17
|
foreach my $k ( $j + 1 .. @pre_types - 2 ) { |
6526
|
1
|
50
|
|
|
|
6
|
if ( $pre_types[$k] eq $quote_mark ) { |
6527
|
1
|
|
|
|
|
2
|
$j = $k + 1; |
6528
|
|
|
|
|
|
|
##my $next = $pre_types[$j]; |
6529
|
1
|
|
|
|
|
3
|
last; |
6530
|
|
|
|
|
|
|
} |
6531
|
|
|
|
|
|
|
} |
6532
|
|
|
|
|
|
|
} |
6533
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq 'd' ) { |
6534
|
8
|
|
|
|
|
14
|
$j++; |
6535
|
|
|
|
|
|
|
} |
6536
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq 'w' ) { |
6537
|
71
|
|
|
|
|
177
|
$j++; |
6538
|
|
|
|
|
|
|
} |
6539
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { |
6540
|
0
|
|
|
|
|
0
|
$j++; |
6541
|
|
|
|
|
|
|
} |
6542
|
|
|
|
|
|
|
else { |
6543
|
|
|
|
|
|
|
# none of the above |
6544
|
|
|
|
|
|
|
} |
6545
|
163
|
100
|
|
|
|
526
|
if ( $j > $jbeg ) { |
6546
|
|
|
|
|
|
|
|
6547
|
80
|
100
|
|
|
|
332
|
$j++ if $pre_types[$j] eq 'b'; |
6548
|
|
|
|
|
|
|
|
6549
|
|
|
|
|
|
|
# Patched for RT #95708 |
6550
|
80
|
100
|
33
|
|
|
692
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
6551
|
|
|
|
|
|
|
|
6552
|
|
|
|
|
|
|
# it is a comma which is not a pattern delimiter except for qw |
6553
|
|
|
|
|
|
|
( |
6554
|
|
|
|
|
|
|
$pre_types[$j] eq ',' |
6555
|
|
|
|
|
|
|
## !~ /^(s|m|y|tr|qr|q|qq|qx)$/ |
6556
|
|
|
|
|
|
|
&& !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] } |
6557
|
|
|
|
|
|
|
) |
6558
|
|
|
|
|
|
|
|
6559
|
|
|
|
|
|
|
# or a => |
6560
|
|
|
|
|
|
|
|| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) |
6561
|
|
|
|
|
|
|
) |
6562
|
|
|
|
|
|
|
{ |
6563
|
18
|
|
|
|
|
43
|
$code_block_type = EMPTY_STRING; |
6564
|
|
|
|
|
|
|
} |
6565
|
|
|
|
|
|
|
} |
6566
|
|
|
|
|
|
|
|
6567
|
163
|
100
|
|
|
|
556
|
if ($code_block_type) { |
6568
|
|
|
|
|
|
|
|
6569
|
|
|
|
|
|
|
# Patch for cases b1085 b1128: It is uncertain if this is a block. |
6570
|
|
|
|
|
|
|
# If this brace follows a bareword, then append a space as a signal |
6571
|
|
|
|
|
|
|
# to the formatter that this may not be a block brace. To find the |
6572
|
|
|
|
|
|
|
# corresponding code in Formatter.pm search for 'b1085'. |
6573
|
145
|
100
|
|
|
|
1222
|
$code_block_type .= SPACE if ( $code_block_type =~ /^\w/ ); |
6574
|
|
|
|
|
|
|
} |
6575
|
|
|
|
|
|
|
} |
6576
|
|
|
|
|
|
|
|
6577
|
168
|
|
|
|
|
583
|
return $code_block_type; |
6578
|
|
|
|
|
|
|
} ## end sub decide_if_code_block |
6579
|
|
|
|
|
|
|
|
6580
|
|
|
|
|
|
|
sub report_unexpected { |
6581
|
|
|
|
|
|
|
|
6582
|
|
|
|
|
|
|
# report unexpected token type and show where it is |
6583
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
6584
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, |
6585
|
|
|
|
|
|
|
$rpretoken_type, $input_line ) |
6586
|
|
|
|
|
|
|
= @_; |
6587
|
|
|
|
|
|
|
|
6588
|
0
|
0
|
|
|
|
0
|
if ( ++$self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) { |
6589
|
0
|
|
|
|
|
0
|
my $msg = "found $found where $expecting expected"; |
6590
|
0
|
|
|
|
|
0
|
my $pos = $rpretoken_map->[$i_tok]; |
6591
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
6592
|
0
|
|
|
|
|
0
|
my $input_line_number = $self->[_last_line_number_]; |
6593
|
0
|
|
|
|
|
0
|
my ( $offset, $numbered_line, $underline ) = |
6594
|
|
|
|
|
|
|
make_numbered_line( $input_line_number, $input_line, $pos ); |
6595
|
0
|
|
|
|
|
0
|
$underline = write_on_underline( $underline, $pos - $offset, '^' ); |
6596
|
|
|
|
|
|
|
|
6597
|
0
|
|
|
|
|
0
|
my $trailer = EMPTY_STRING; |
6598
|
0
|
0
|
0
|
|
|
0
|
if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { |
6599
|
0
|
|
|
|
|
0
|
my $pos_prev = $rpretoken_map->[$last_nonblank_i]; |
6600
|
0
|
|
|
|
|
0
|
my $num; |
6601
|
0
|
0
|
|
|
|
0
|
if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) { |
6602
|
0
|
|
|
|
|
0
|
$num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev; |
6603
|
|
|
|
|
|
|
} |
6604
|
|
|
|
|
|
|
else { |
6605
|
0
|
|
|
|
|
0
|
$num = $pos - $pos_prev; |
6606
|
|
|
|
|
|
|
} |
6607
|
0
|
0
|
|
|
|
0
|
if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
6608
|
|
|
|
|
|
|
|
6609
|
|
|
|
|
|
|
$underline = |
6610
|
0
|
|
|
|
|
0
|
write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); |
6611
|
0
|
|
|
|
|
0
|
$trailer = " (previous token underlined)"; |
6612
|
|
|
|
|
|
|
} |
6613
|
0
|
|
|
|
|
0
|
$underline =~ s/\s+$//; |
6614
|
0
|
|
|
|
|
0
|
$self->warning( $numbered_line . "\n" ); |
6615
|
0
|
|
|
|
|
0
|
$self->warning( $underline . "\n" ); |
6616
|
0
|
|
|
|
|
0
|
$self->warning( $msg . $trailer . "\n" ); |
6617
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
6618
|
|
|
|
|
|
|
} |
6619
|
0
|
|
|
|
|
0
|
return; |
6620
|
|
|
|
|
|
|
} ## end sub report_unexpected |
6621
|
|
|
|
|
|
|
|
6622
|
|
|
|
|
|
|
my %is_sigil_or_paren; |
6623
|
|
|
|
|
|
|
my %is_R_closing_sb; |
6624
|
|
|
|
|
|
|
|
6625
|
|
|
|
|
|
|
BEGIN { |
6626
|
|
|
|
|
|
|
|
6627
|
39
|
|
|
39
|
|
336
|
my @q = qw< $ & % * @ ) >; |
6628
|
39
|
|
|
|
|
410
|
@{is_sigil_or_paren}{@q} = (1) x scalar(@q); |
6629
|
|
|
|
|
|
|
|
6630
|
39
|
|
|
|
|
236
|
@q = qw(R ]); |
6631
|
39
|
|
|
|
|
86532
|
@{is_R_closing_sb}{@q} = (1) x scalar(@q); |
6632
|
|
|
|
|
|
|
} ## end BEGIN |
6633
|
|
|
|
|
|
|
|
6634
|
|
|
|
|
|
|
sub is_non_structural_brace { |
6635
|
|
|
|
|
|
|
|
6636
|
|
|
|
|
|
|
# Decide if a brace or bracket is structural or non-structural |
6637
|
|
|
|
|
|
|
# by looking at the previous token and type |
6638
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token |
6639
|
|
|
|
|
|
|
|
6640
|
|
|
|
|
|
|
# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. |
6641
|
|
|
|
|
|
|
# Tentatively deactivated because it caused the wrong operator expectation |
6642
|
|
|
|
|
|
|
# for this code: |
6643
|
|
|
|
|
|
|
# $user = @vars[1] / 100; |
6644
|
|
|
|
|
|
|
# Must update sub operator_expected before re-implementing. |
6645
|
|
|
|
|
|
|
# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { |
6646
|
|
|
|
|
|
|
# return 0; |
6647
|
|
|
|
|
|
|
# } |
6648
|
|
|
|
|
|
|
|
6649
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6650
|
|
|
|
|
|
|
# NOTE: braces after type characters start code blocks, but for |
6651
|
|
|
|
|
|
|
# simplicity these are not identified as such. See also |
6652
|
|
|
|
|
|
|
# sub code_block_type |
6653
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6654
|
|
|
|
|
|
|
|
6655
|
|
|
|
|
|
|
##if ($last_nonblank_type eq 't') {return 0} |
6656
|
|
|
|
|
|
|
|
6657
|
|
|
|
|
|
|
# otherwise, it is non-structural if it is decorated |
6658
|
|
|
|
|
|
|
# by type information. |
6659
|
|
|
|
|
|
|
# For example, the '{' here is non-structural: ${xxx} |
6660
|
|
|
|
|
|
|
# Removed '::' to fix c074 |
6661
|
|
|
|
|
|
|
## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ |
6662
|
|
|
|
|
|
|
return ( |
6663
|
|
|
|
|
|
|
## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->)/ |
6664
|
|
|
|
|
|
|
$is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) } |
6665
|
|
|
|
|
|
|
|| substr( $last_nonblank_token, 0, 2 ) eq '->' |
6666
|
|
|
|
|
|
|
|
6667
|
|
|
|
|
|
|
# or if we follow a hash or array closing curly brace or bracket |
6668
|
|
|
|
|
|
|
# For example, the second '{' in this is non-structural: $a{'x'}{'y'} |
6669
|
|
|
|
|
|
|
# because the first '}' would have been given type 'R' |
6670
|
|
|
|
|
|
|
##|| $last_nonblank_type =~ /^([R\]])$/ |
6671
|
2265
|
|
66
|
2265
|
0
|
14950
|
|| $is_R_closing_sb{$last_nonblank_type} |
6672
|
|
|
|
|
|
|
); |
6673
|
|
|
|
|
|
|
} ## end sub is_non_structural_brace |
6674
|
|
|
|
|
|
|
|
6675
|
|
|
|
|
|
|
####################################################################### |
6676
|
|
|
|
|
|
|
# Tokenizer routines for tracking container nesting depths |
6677
|
|
|
|
|
|
|
####################################################################### |
6678
|
|
|
|
|
|
|
|
6679
|
|
|
|
|
|
|
# The following routines keep track of nesting depths of the nesting |
6680
|
|
|
|
|
|
|
# types, ( [ { and ?. This is necessary for determining the indentation |
6681
|
|
|
|
|
|
|
# level, and also for debugging programs. Not only do they keep track of |
6682
|
|
|
|
|
|
|
# nesting depths of the individual brace types, but they check that each |
6683
|
|
|
|
|
|
|
# of the other brace types is balanced within matching pairs. For |
6684
|
|
|
|
|
|
|
# example, if the program sees this sequence: |
6685
|
|
|
|
|
|
|
# |
6686
|
|
|
|
|
|
|
# { ( ( ) } |
6687
|
|
|
|
|
|
|
# |
6688
|
|
|
|
|
|
|
# then it can determine that there is an extra left paren somewhere |
6689
|
|
|
|
|
|
|
# between the { and the }. And so on with every other possible |
6690
|
|
|
|
|
|
|
# combination of outer and inner brace types. For another |
6691
|
|
|
|
|
|
|
# example: |
6692
|
|
|
|
|
|
|
# |
6693
|
|
|
|
|
|
|
# ( [ ..... ] ] ) |
6694
|
|
|
|
|
|
|
# |
6695
|
|
|
|
|
|
|
# which has an extra ] within the parens. |
6696
|
|
|
|
|
|
|
# |
6697
|
|
|
|
|
|
|
# The brace types have indexes 0 .. 3 which are indexes into |
6698
|
|
|
|
|
|
|
# the matrices. |
6699
|
|
|
|
|
|
|
# |
6700
|
|
|
|
|
|
|
# The pair ? : are treated as just another nesting type, with ? acting |
6701
|
|
|
|
|
|
|
# as the opening brace and : acting as the closing brace. |
6702
|
|
|
|
|
|
|
# |
6703
|
|
|
|
|
|
|
# The matrix |
6704
|
|
|
|
|
|
|
# |
6705
|
|
|
|
|
|
|
# $rdepth_array->[$a][$b][ $rcurrent_depth->[$a] ] = $rcurrent_depth->[$b]; |
6706
|
|
|
|
|
|
|
# |
6707
|
|
|
|
|
|
|
# saves the nesting depth of brace type $b (where $b is either of the other |
6708
|
|
|
|
|
|
|
# nesting types) when brace type $a enters a new depth. When this depth |
6709
|
|
|
|
|
|
|
# decreases, a check is made that the current depth of brace types $b is |
6710
|
|
|
|
|
|
|
# unchanged, or otherwise there must have been an error. This can |
6711
|
|
|
|
|
|
|
# be very useful for localizing errors, particularly when perl runs to |
6712
|
|
|
|
|
|
|
# the end of a large file (such as this one) and announces that there |
6713
|
|
|
|
|
|
|
# is a problem somewhere. |
6714
|
|
|
|
|
|
|
# |
6715
|
|
|
|
|
|
|
# A numerical sequence number is maintained for every nesting type, |
6716
|
|
|
|
|
|
|
# so that each matching pair can be uniquely identified in a simple |
6717
|
|
|
|
|
|
|
# way. |
6718
|
|
|
|
|
|
|
|
6719
|
|
|
|
|
|
|
sub increase_nesting_depth { |
6720
|
4578
|
|
|
4578
|
0
|
9285
|
my ( $self, $aa, $pos ) = @_; |
6721
|
|
|
|
|
|
|
|
6722
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, |
6723
|
|
|
|
|
|
|
# $rcurrent_sequence_number, $rdepth_array, |
6724
|
|
|
|
|
|
|
# $rstarting_line_of_current_depth, $statement_type |
6725
|
4578
|
|
|
|
|
8287
|
my $cd_aa = ++$rcurrent_depth->[$aa]; |
6726
|
4578
|
|
|
|
|
6786
|
$total_depth++; |
6727
|
4578
|
|
|
|
|
9092
|
$rtotal_depth->[$aa][$cd_aa] = $total_depth; |
6728
|
4578
|
|
|
|
|
7531
|
my $input_line_number = $self->[_last_line_number_]; |
6729
|
4578
|
|
|
|
|
7817
|
my $input_line = $self->[_line_of_text_]; |
6730
|
|
|
|
|
|
|
|
6731
|
|
|
|
|
|
|
# Sequence numbers increment by number of items. This keeps |
6732
|
|
|
|
|
|
|
# a unique set of numbers but still allows the relative location |
6733
|
|
|
|
|
|
|
# of any type to be determined. |
6734
|
|
|
|
|
|
|
|
6735
|
|
|
|
|
|
|
# make a new unique sequence number |
6736
|
4578
|
|
|
|
|
7800
|
my $seqno = $next_sequence_number++; |
6737
|
|
|
|
|
|
|
|
6738
|
4578
|
|
|
|
|
8231
|
$rcurrent_sequence_number->[$aa][$cd_aa] = $seqno; |
6739
|
|
|
|
|
|
|
|
6740
|
4578
|
|
|
|
|
14795
|
$rstarting_line_of_current_depth->[$aa][$cd_aa] = |
6741
|
|
|
|
|
|
|
[ $input_line_number, $input_line, $pos ]; |
6742
|
|
|
|
|
|
|
|
6743
|
4578
|
|
|
|
|
14152
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
6744
|
18312
|
100
|
|
|
|
33496
|
next if ( $bb == $aa ); |
6745
|
13734
|
|
|
|
|
26884
|
$rdepth_array->[$aa][$bb][$cd_aa] = $rcurrent_depth->[$bb]; |
6746
|
|
|
|
|
|
|
} |
6747
|
|
|
|
|
|
|
|
6748
|
|
|
|
|
|
|
# set a flag for indenting a nested ternary statement |
6749
|
4578
|
|
|
|
|
8568
|
my $indent = 0; |
6750
|
4578
|
100
|
|
|
|
10504
|
if ( $aa == QUESTION_COLON ) { |
6751
|
187
|
|
|
|
|
665
|
$rnested_ternary_flag->[$cd_aa] = 0; |
6752
|
187
|
100
|
|
|
|
713
|
if ( $cd_aa > 1 ) { |
6753
|
17
|
100
|
|
|
|
114
|
if ( $rnested_ternary_flag->[ $cd_aa - 1 ] == 0 ) { |
6754
|
16
|
|
|
|
|
47
|
my $pdepth = $rtotal_depth->[$aa][ $cd_aa - 1 ]; |
6755
|
16
|
100
|
|
|
|
57
|
if ( $pdepth == $total_depth - 1 ) { |
6756
|
8
|
|
|
|
|
14
|
$indent = 1; |
6757
|
8
|
|
|
|
|
23
|
$rnested_ternary_flag->[ $cd_aa - 1 ] = -1; |
6758
|
|
|
|
|
|
|
} |
6759
|
|
|
|
|
|
|
} |
6760
|
|
|
|
|
|
|
} |
6761
|
|
|
|
|
|
|
} |
6762
|
|
|
|
|
|
|
|
6763
|
|
|
|
|
|
|
# Fix part #1 for git82: save last token type for propagation of type 'Z' |
6764
|
4578
|
|
|
|
|
15682
|
$rnested_statement_type->[$aa][$cd_aa] = |
6765
|
|
|
|
|
|
|
[ $statement_type, $last_nonblank_type, $last_nonblank_token ]; |
6766
|
4578
|
|
|
|
|
7775
|
$statement_type = EMPTY_STRING; |
6767
|
4578
|
|
|
|
|
12762
|
return ( $seqno, $indent ); |
6768
|
|
|
|
|
|
|
} ## end sub increase_nesting_depth |
6769
|
|
|
|
|
|
|
|
6770
|
|
|
|
|
|
|
sub is_balanced_closing_container { |
6771
|
|
|
|
|
|
|
|
6772
|
|
|
|
|
|
|
# Return true if a closing container can go here without error |
6773
|
|
|
|
|
|
|
# Return false if not |
6774
|
47
|
|
|
47
|
0
|
125
|
my ($aa) = @_; |
6775
|
|
|
|
|
|
|
|
6776
|
|
|
|
|
|
|
# cannot close if there was no opening |
6777
|
47
|
|
|
|
|
97
|
my $cd_aa = $rcurrent_depth->[$aa]; |
6778
|
47
|
100
|
|
|
|
189
|
return if ( $cd_aa <= 0 ); |
6779
|
|
|
|
|
|
|
|
6780
|
|
|
|
|
|
|
# check that any other brace types $bb contained within would be balanced |
6781
|
8
|
|
|
|
|
36
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
6782
|
8
|
50
|
|
|
|
20
|
next if ( $bb == $aa ); |
6783
|
|
|
|
|
|
|
return |
6784
|
8
|
50
|
|
|
|
43
|
if ( $rdepth_array->[$aa][$bb][$cd_aa] != $rcurrent_depth->[$bb] ); |
6785
|
|
|
|
|
|
|
} |
6786
|
|
|
|
|
|
|
|
6787
|
|
|
|
|
|
|
# OK, everything will be balanced |
6788
|
0
|
|
|
|
|
0
|
return 1; |
6789
|
|
|
|
|
|
|
} ## end sub is_balanced_closing_container |
6790
|
|
|
|
|
|
|
|
6791
|
|
|
|
|
|
|
sub decrease_nesting_depth { |
6792
|
|
|
|
|
|
|
|
6793
|
4578
|
|
|
4578
|
0
|
9194
|
my ( $self, $aa, $pos ) = @_; |
6794
|
|
|
|
|
|
|
|
6795
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, |
6796
|
|
|
|
|
|
|
# $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth |
6797
|
|
|
|
|
|
|
# $statement_type |
6798
|
4578
|
|
|
|
|
7121
|
my $seqno = 0; |
6799
|
4578
|
|
|
|
|
7454
|
my $input_line_number = $self->[_last_line_number_]; |
6800
|
4578
|
|
|
|
|
7779
|
my $input_line = $self->[_line_of_text_]; |
6801
|
|
|
|
|
|
|
|
6802
|
4578
|
|
|
|
|
6830
|
my $outdent = 0; |
6803
|
4578
|
|
|
|
|
6669
|
$total_depth--; |
6804
|
4578
|
|
|
|
|
7889
|
my $cd_aa = $rcurrent_depth->[$aa]; |
6805
|
4578
|
50
|
|
|
|
9576
|
if ( $cd_aa > 0 ) { |
6806
|
|
|
|
|
|
|
|
6807
|
|
|
|
|
|
|
# set a flag for un-indenting after seeing a nested ternary statement |
6808
|
4578
|
|
|
|
|
8321
|
$seqno = $rcurrent_sequence_number->[$aa][$cd_aa]; |
6809
|
4578
|
100
|
|
|
|
9983
|
if ( $aa == QUESTION_COLON ) { |
6810
|
187
|
|
|
|
|
567
|
$outdent = $rnested_ternary_flag->[$cd_aa]; |
6811
|
|
|
|
|
|
|
} |
6812
|
|
|
|
|
|
|
|
6813
|
|
|
|
|
|
|
# Fix part #2 for git82: use saved type for propagation of type 'Z' |
6814
|
|
|
|
|
|
|
# through type L-R braces. Perl seems to allow ${bareword} |
6815
|
|
|
|
|
|
|
# as an indirect object, but nothing much more complex than that. |
6816
|
|
|
|
|
|
|
( $statement_type, my $saved_type, my $saved_token ) = |
6817
|
4578
|
|
|
|
|
6871
|
@{ $rnested_statement_type->[$aa][ $rcurrent_depth->[$aa] ] }; |
|
4578
|
|
|
|
|
13015
|
|
6818
|
4578
|
50
|
100
|
|
|
16056
|
if ( $aa == BRACE |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
6819
|
|
|
|
|
|
|
&& $saved_type eq 'Z' |
6820
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
6821
|
|
|
|
|
|
|
&& $rbrace_structural_type->[$brace_depth] eq 'L' ) |
6822
|
|
|
|
|
|
|
{ |
6823
|
1
|
|
|
|
|
3
|
$last_nonblank_type = $saved_type; |
6824
|
|
|
|
|
|
|
} |
6825
|
|
|
|
|
|
|
|
6826
|
|
|
|
|
|
|
# check that any brace types $bb contained within are balanced |
6827
|
4578
|
|
|
|
|
13217
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
6828
|
18312
|
100
|
|
|
|
32593
|
next if ( $bb == $aa ); |
6829
|
|
|
|
|
|
|
|
6830
|
13734
|
50
|
|
|
|
32116
|
if ( $rdepth_array->[$aa][$bb][$cd_aa] != $rcurrent_depth->[$bb] ) { |
6831
|
0
|
|
|
|
|
0
|
my $diff = |
6832
|
|
|
|
|
|
|
$rcurrent_depth->[$bb] - $rdepth_array->[$aa][$bb][$cd_aa]; |
6833
|
|
|
|
|
|
|
|
6834
|
|
|
|
|
|
|
# don't whine too many times |
6835
|
0
|
|
|
|
|
0
|
my $saw_brace_error = $self->get_saw_brace_error(); |
6836
|
0
|
0
|
0
|
|
|
0
|
if ( |
|
|
|
0
|
|
|
|
|
6837
|
|
|
|
|
|
|
$saw_brace_error <= MAX_NAG_MESSAGES |
6838
|
|
|
|
|
|
|
|
6839
|
|
|
|
|
|
|
# if too many closing types have occurred, we probably |
6840
|
|
|
|
|
|
|
# already caught this error |
6841
|
|
|
|
|
|
|
&& ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) |
6842
|
|
|
|
|
|
|
) |
6843
|
|
|
|
|
|
|
{ |
6844
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
6845
|
0
|
|
|
|
|
0
|
my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa]; |
6846
|
0
|
|
|
|
|
0
|
my $sl = $rsl->[0]; |
6847
|
0
|
|
|
|
|
0
|
my $rel = [ $input_line_number, $input_line, $pos ]; |
6848
|
0
|
|
|
|
|
0
|
my $el = $rel->[0]; |
6849
|
0
|
|
|
|
|
0
|
my ($ess); |
6850
|
|
|
|
|
|
|
|
6851
|
0
|
0
|
0
|
|
|
0
|
if ( $diff == 1 || $diff == -1 ) { |
6852
|
0
|
|
|
|
|
0
|
$ess = EMPTY_STRING; |
6853
|
|
|
|
|
|
|
} |
6854
|
|
|
|
|
|
|
else { |
6855
|
0
|
|
|
|
|
0
|
$ess = 's'; |
6856
|
|
|
|
|
|
|
} |
6857
|
0
|
0
|
|
|
|
0
|
my $bname = |
6858
|
|
|
|
|
|
|
( $diff > 0 ) |
6859
|
|
|
|
|
|
|
? $opening_brace_names[$bb] |
6860
|
|
|
|
|
|
|
: $closing_brace_names[$bb]; |
6861
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rsl}, '^' ); |
|
0
|
|
|
|
|
0
|
|
6862
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
6863
|
|
|
|
|
|
|
Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el |
6864
|
|
|
|
|
|
|
EOM |
6865
|
|
|
|
|
|
|
|
6866
|
0
|
0
|
|
|
|
0
|
if ( $diff > 0 ) { |
6867
|
0
|
|
|
|
|
0
|
my $rml = |
6868
|
|
|
|
|
|
|
$rstarting_line_of_current_depth->[$bb] |
6869
|
|
|
|
|
|
|
[ $rcurrent_depth->[$bb] ]; |
6870
|
0
|
|
|
|
|
0
|
my $ml = $rml->[0]; |
6871
|
0
|
|
|
|
|
0
|
$msg .= |
6872
|
|
|
|
|
|
|
" The most recent un-matched $bname is on line $ml\n"; |
6873
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rml}, '^' ); |
|
0
|
|
|
|
|
0
|
|
6874
|
|
|
|
|
|
|
} |
6875
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rel}, '^' ); |
|
0
|
|
|
|
|
0
|
|
6876
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
6877
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
6878
|
|
|
|
|
|
|
} |
6879
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
6880
|
|
|
|
|
|
|
} |
6881
|
|
|
|
|
|
|
} |
6882
|
4578
|
|
|
|
|
8825
|
$rcurrent_depth->[$aa]--; |
6883
|
|
|
|
|
|
|
} |
6884
|
|
|
|
|
|
|
else { |
6885
|
|
|
|
|
|
|
|
6886
|
0
|
|
|
|
|
0
|
my $saw_brace_error = $self->get_saw_brace_error(); |
6887
|
0
|
0
|
|
|
|
0
|
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { |
6888
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
6889
|
|
|
|
|
|
|
There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number |
6890
|
|
|
|
|
|
|
EOM |
6891
|
0
|
|
|
|
|
0
|
$self->indicate_error( $msg, $input_line_number, $input_line, $pos, |
6892
|
|
|
|
|
|
|
'^' ); |
6893
|
|
|
|
|
|
|
} |
6894
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
6895
|
|
|
|
|
|
|
|
6896
|
|
|
|
|
|
|
# keep track of errors in braces alone (ignoring ternary nesting errors) |
6897
|
0
|
0
|
|
|
|
0
|
$self->[_true_brace_error_count_]++ |
6898
|
|
|
|
|
|
|
if ( $closing_brace_names[$aa] ne "':'" ); |
6899
|
|
|
|
|
|
|
} |
6900
|
4578
|
|
|
|
|
12813
|
return ( $seqno, $outdent ); |
6901
|
|
|
|
|
|
|
} ## end sub decrease_nesting_depth |
6902
|
|
|
|
|
|
|
|
6903
|
|
|
|
|
|
|
sub check_final_nesting_depths { |
6904
|
|
|
|
|
|
|
|
6905
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, $rstarting_line_of_current_depth |
6906
|
562
|
|
|
562
|
0
|
1401
|
my $self = shift; |
6907
|
|
|
|
|
|
|
|
6908
|
562
|
|
|
|
|
2321
|
for my $aa ( 0 .. @closing_brace_names - 1 ) { |
6909
|
|
|
|
|
|
|
|
6910
|
2248
|
|
|
|
|
3728
|
my $cd_aa = $rcurrent_depth->[$aa]; |
6911
|
2248
|
50
|
|
|
|
5218
|
if ($cd_aa) { |
6912
|
0
|
|
|
|
|
0
|
my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa]; |
6913
|
0
|
|
|
|
|
0
|
my $sl = $rsl->[0]; |
6914
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
6915
|
|
|
|
|
|
|
Final nesting depth of $opening_brace_names[$aa]s is $cd_aa |
6916
|
|
|
|
|
|
|
The most recent un-matched $opening_brace_names[$aa] is on line $sl |
6917
|
|
|
|
|
|
|
EOM |
6918
|
0
|
|
|
|
|
0
|
$self->indicate_error( $msg, @{$rsl}, '^' ); |
|
0
|
|
|
|
|
0
|
|
6919
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
6920
|
|
|
|
|
|
|
} |
6921
|
|
|
|
|
|
|
} |
6922
|
562
|
|
|
|
|
1613
|
return; |
6923
|
|
|
|
|
|
|
} ## end sub check_final_nesting_depths |
6924
|
|
|
|
|
|
|
|
6925
|
|
|
|
|
|
|
####################################################################### |
6926
|
|
|
|
|
|
|
# Tokenizer routines for looking ahead in input stream |
6927
|
|
|
|
|
|
|
####################################################################### |
6928
|
|
|
|
|
|
|
|
6929
|
|
|
|
|
|
|
sub peek_ahead_for_n_nonblank_pre_tokens { |
6930
|
|
|
|
|
|
|
|
6931
|
|
|
|
|
|
|
# returns next n pretokens if they exist |
6932
|
|
|
|
|
|
|
# returns undef's if hits eof without seeing any pretokens |
6933
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
6934
|
170
|
|
|
170
|
0
|
424
|
my ( $self, $max_pretokens ) = @_; |
6935
|
170
|
|
|
|
|
287
|
my $line; |
6936
|
170
|
|
|
|
|
291
|
my $i = 0; |
6937
|
170
|
|
|
|
|
361
|
my ( $rpre_tokens, $rmap, $rpre_types ); |
6938
|
|
|
|
|
|
|
|
6939
|
170
|
|
|
|
|
553
|
while ( $line = $self->peek_ahead( $i++ ) ) { |
6940
|
182
|
|
|
|
|
940
|
$line =~ s/^\s*//; # trim leading blanks |
6941
|
182
|
100
|
|
|
|
606
|
next if ( length($line) <= 0 ); # skip blank |
6942
|
176
|
100
|
|
|
|
601
|
next if ( $line =~ /^#/ ); # skip comment |
6943
|
162
|
|
|
|
|
472
|
( $rpre_tokens, $rmap, $rpre_types ) = |
6944
|
|
|
|
|
|
|
pre_tokenize( $line, $max_pretokens ); |
6945
|
162
|
|
|
|
|
463
|
last; |
6946
|
|
|
|
|
|
|
} |
6947
|
170
|
|
|
|
|
629
|
return ( $rpre_tokens, $rpre_types ); |
6948
|
|
|
|
|
|
|
} ## end sub peek_ahead_for_n_nonblank_pre_tokens |
6949
|
|
|
|
|
|
|
|
6950
|
|
|
|
|
|
|
# look ahead for next non-blank, non-comment line of code |
6951
|
|
|
|
|
|
|
sub peek_ahead_for_nonblank_token { |
6952
|
|
|
|
|
|
|
|
6953
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
6954
|
125
|
|
|
125
|
0
|
377
|
my ( $self, $rtokens, $max_token_index ) = @_; |
6955
|
125
|
|
|
|
|
234
|
my $line; |
6956
|
125
|
|
|
|
|
259
|
my $i = 0; |
6957
|
|
|
|
|
|
|
|
6958
|
125
|
|
|
|
|
534
|
while ( $line = $self->peek_ahead( $i++ ) ) { |
6959
|
169
|
|
|
|
|
979
|
$line =~ s/^\s*//; # trim leading blanks |
6960
|
169
|
100
|
|
|
|
682
|
next if ( length($line) <= 0 ); # skip blank |
6961
|
144
|
100
|
|
|
|
619
|
next if ( $line =~ /^#/ ); # skip comment |
6962
|
|
|
|
|
|
|
|
6963
|
|
|
|
|
|
|
# Updated from 2 to 3 to get trigraphs, added for case b1175 |
6964
|
123
|
|
|
|
|
474
|
my ( $rtok, $rmap, $rtype ) = pre_tokenize( $line, 3 ); |
6965
|
123
|
|
|
|
|
407
|
my $j = $max_token_index + 1; |
6966
|
|
|
|
|
|
|
|
6967
|
123
|
|
|
|
|
278
|
foreach my $tok ( @{$rtok} ) { |
|
123
|
|
|
|
|
342
|
|
6968
|
355
|
100
|
|
|
|
950
|
last if ( $tok =~ "\n" ); |
6969
|
320
|
|
|
|
|
801
|
$rtokens->[ ++$j ] = $tok; |
6970
|
|
|
|
|
|
|
} |
6971
|
123
|
|
|
|
|
482
|
last; |
6972
|
|
|
|
|
|
|
} |
6973
|
125
|
|
|
|
|
378
|
return; |
6974
|
|
|
|
|
|
|
} ## end sub peek_ahead_for_nonblank_token |
6975
|
|
|
|
|
|
|
|
6976
|
|
|
|
|
|
|
####################################################################### |
6977
|
|
|
|
|
|
|
# Tokenizer guessing routines for ambiguous situations |
6978
|
|
|
|
|
|
|
####################################################################### |
6979
|
|
|
|
|
|
|
|
6980
|
|
|
|
|
|
|
sub guess_if_pattern_or_conditional { |
6981
|
|
|
|
|
|
|
|
6982
|
|
|
|
|
|
|
# this routine is called when we have encountered a ? following an |
6983
|
|
|
|
|
|
|
# unknown bareword, and we must decide if it starts a pattern or not |
6984
|
|
|
|
|
|
|
# input parameters: |
6985
|
|
|
|
|
|
|
# $i - token index of the ? starting possible pattern |
6986
|
|
|
|
|
|
|
# output parameters: |
6987
|
|
|
|
|
|
|
# $is_pattern = 0 if probably not pattern, =1 if probably a pattern |
6988
|
|
|
|
|
|
|
# msg = a warning or diagnostic message |
6989
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
6990
|
|
|
|
|
|
|
|
6991
|
11
|
|
|
11
|
0
|
45
|
my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; |
6992
|
11
|
|
|
|
|
22
|
my $is_pattern = 0; |
6993
|
11
|
|
|
|
|
55
|
my $msg = "guessing that ? after $last_nonblank_token starts a "; |
6994
|
|
|
|
|
|
|
|
6995
|
11
|
50
|
|
|
|
51
|
if ( $i >= $max_token_index ) { |
6996
|
0
|
|
|
|
|
0
|
$msg .= "conditional (no end to pattern found on the line)\n"; |
6997
|
|
|
|
|
|
|
} |
6998
|
|
|
|
|
|
|
else { |
6999
|
11
|
|
|
|
|
33
|
my $ibeg = $i; |
7000
|
11
|
|
|
|
|
29
|
$i = $ibeg + 1; |
7001
|
11
|
|
|
|
|
35
|
my $next_token = $rtokens->[$i]; # first token after ? |
7002
|
|
|
|
|
|
|
|
7003
|
|
|
|
|
|
|
# look for a possible ending ? on this line.. |
7004
|
11
|
|
|
|
|
28
|
my $in_quote = 1; |
7005
|
11
|
|
|
|
|
25
|
my $quote_depth = 0; |
7006
|
11
|
|
|
|
|
22
|
my $quote_character = EMPTY_STRING; |
7007
|
11
|
|
|
|
|
26
|
my $quote_pos = 0; |
7008
|
11
|
|
|
|
|
21
|
my $quoted_string; |
7009
|
|
|
|
|
|
|
( |
7010
|
|
|
|
|
|
|
|
7011
|
11
|
|
|
|
|
60
|
$i, |
7012
|
|
|
|
|
|
|
$in_quote, |
7013
|
|
|
|
|
|
|
$quote_character, |
7014
|
|
|
|
|
|
|
$quote_pos, |
7015
|
|
|
|
|
|
|
$quote_depth, |
7016
|
|
|
|
|
|
|
$quoted_string, |
7017
|
|
|
|
|
|
|
|
7018
|
|
|
|
|
|
|
) = $self->follow_quoted_string( |
7019
|
|
|
|
|
|
|
|
7020
|
|
|
|
|
|
|
$ibeg, |
7021
|
|
|
|
|
|
|
$in_quote, |
7022
|
|
|
|
|
|
|
$rtokens, |
7023
|
|
|
|
|
|
|
$quote_character, |
7024
|
|
|
|
|
|
|
$quote_pos, |
7025
|
|
|
|
|
|
|
$quote_depth, |
7026
|
|
|
|
|
|
|
$max_token_index, |
7027
|
|
|
|
|
|
|
|
7028
|
|
|
|
|
|
|
); |
7029
|
|
|
|
|
|
|
|
7030
|
11
|
50
|
|
|
|
97
|
if ($in_quote) { |
7031
|
|
|
|
|
|
|
|
7032
|
|
|
|
|
|
|
# we didn't find an ending ? on this line, |
7033
|
|
|
|
|
|
|
# so we bias towards conditional |
7034
|
11
|
|
|
|
|
34
|
$is_pattern = 0; |
7035
|
11
|
|
|
|
|
48
|
$msg .= "conditional (no ending ? on this line)\n"; |
7036
|
|
|
|
|
|
|
|
7037
|
|
|
|
|
|
|
# we found an ending ?, so we bias towards a pattern |
7038
|
|
|
|
|
|
|
} |
7039
|
|
|
|
|
|
|
else { |
7040
|
|
|
|
|
|
|
|
7041
|
|
|
|
|
|
|
# Watch out for an ending ? in quotes, like this |
7042
|
|
|
|
|
|
|
# my $case_flag = File::Spec->case_tolerant ? '(?i)' : ''; |
7043
|
0
|
|
|
|
|
0
|
my $s_quote = 0; |
7044
|
0
|
|
|
|
|
0
|
my $d_quote = 0; |
7045
|
0
|
|
|
|
|
0
|
my $colons = 0; |
7046
|
0
|
|
|
|
|
0
|
foreach my $ii ( $ibeg + 1 .. $i - 1 ) { |
7047
|
0
|
|
|
|
|
0
|
my $tok = $rtokens->[$ii]; |
7048
|
0
|
0
|
|
|
|
0
|
if ( $tok eq ":" ) { $colons++ } |
|
0
|
|
|
|
|
0
|
|
7049
|
0
|
0
|
|
|
|
0
|
if ( $tok eq "'" ) { $s_quote++ } |
|
0
|
|
|
|
|
0
|
|
7050
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '"' ) { $d_quote++ } |
|
0
|
|
|
|
|
0
|
|
7051
|
|
|
|
|
|
|
} |
7052
|
0
|
0
|
0
|
|
|
0
|
if ( $s_quote % 2 || $d_quote % 2 || $colons ) { |
|
|
0
|
0
|
|
|
|
|
7053
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7054
|
0
|
|
|
|
|
0
|
$msg .= "found ending ? but unbalanced quote chars\n"; |
7055
|
|
|
|
|
|
|
} |
7056
|
|
|
|
|
|
|
elsif ( |
7057
|
|
|
|
|
|
|
$self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) |
7058
|
|
|
|
|
|
|
{ |
7059
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7060
|
0
|
|
|
|
|
0
|
$msg .= "pattern (found ending ? and pattern expected)\n"; |
7061
|
|
|
|
|
|
|
} |
7062
|
|
|
|
|
|
|
else { |
7063
|
0
|
|
|
|
|
0
|
$msg .= "pattern (uncertain, but found ending ?)\n"; |
7064
|
|
|
|
|
|
|
} |
7065
|
|
|
|
|
|
|
} |
7066
|
|
|
|
|
|
|
} |
7067
|
11
|
|
|
|
|
45
|
return ( $is_pattern, $msg ); |
7068
|
|
|
|
|
|
|
} ## end sub guess_if_pattern_or_conditional |
7069
|
|
|
|
|
|
|
|
7070
|
|
|
|
|
|
|
my %is_known_constant; |
7071
|
|
|
|
|
|
|
my %is_known_function; |
7072
|
|
|
|
|
|
|
|
7073
|
|
|
|
|
|
|
BEGIN { |
7074
|
|
|
|
|
|
|
|
7075
|
|
|
|
|
|
|
# Constants like 'pi' in Trig.pm are common |
7076
|
39
|
|
|
39
|
|
280
|
my @q = qw(pi pi2 pi4 pip2 pip4); |
7077
|
39
|
|
|
|
|
321
|
@{is_known_constant}{@q} = (1) x scalar(@q); |
7078
|
|
|
|
|
|
|
|
7079
|
|
|
|
|
|
|
# parenless calls of 'ok' are common |
7080
|
39
|
|
|
|
|
127
|
@q = qw( ok ); |
7081
|
39
|
|
|
|
|
70613
|
@{is_known_function}{@q} = (1) x scalar(@q); |
7082
|
|
|
|
|
|
|
} ## end BEGIN |
7083
|
|
|
|
|
|
|
|
7084
|
|
|
|
|
|
|
sub guess_if_pattern_or_division { |
7085
|
|
|
|
|
|
|
|
7086
|
|
|
|
|
|
|
# this routine is called when we have encountered a / following an |
7087
|
|
|
|
|
|
|
# unknown bareword, and we must decide if it starts a pattern or is a |
7088
|
|
|
|
|
|
|
# division |
7089
|
|
|
|
|
|
|
# input parameters: |
7090
|
|
|
|
|
|
|
# $i - token index of the / starting possible pattern |
7091
|
|
|
|
|
|
|
# output parameters: |
7092
|
|
|
|
|
|
|
# $is_pattern = 0 if probably division, =1 if probably a pattern |
7093
|
|
|
|
|
|
|
# msg = a warning or diagnostic message |
7094
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
7095
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; |
7096
|
0
|
|
|
|
|
0
|
my $is_pattern = 0; |
7097
|
0
|
|
|
|
|
0
|
my $msg = "guessing that / after $last_nonblank_token starts a "; |
7098
|
|
|
|
|
|
|
|
7099
|
0
|
0
|
|
|
|
0
|
if ( $i >= $max_token_index ) { |
7100
|
0
|
|
|
|
|
0
|
$msg .= "division (no end to pattern found on the line)\n"; |
7101
|
|
|
|
|
|
|
} |
7102
|
|
|
|
|
|
|
else { |
7103
|
0
|
|
|
|
|
0
|
my $ibeg = $i; |
7104
|
0
|
|
|
|
|
0
|
my $divide_possible = |
7105
|
|
|
|
|
|
|
$self->is_possible_numerator( $i, $rtokens, $max_token_index ); |
7106
|
|
|
|
|
|
|
|
7107
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible < 0 ) { |
7108
|
0
|
|
|
|
|
0
|
$msg = "pattern (division not possible here)\n"; |
7109
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7110
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
7111
|
|
|
|
|
|
|
} |
7112
|
|
|
|
|
|
|
|
7113
|
0
|
|
|
|
|
0
|
$i = $ibeg + 1; |
7114
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[$i]; # first token after slash |
7115
|
|
|
|
|
|
|
|
7116
|
|
|
|
|
|
|
# One of the things we can look at is the spacing around the slash. |
7117
|
|
|
|
|
|
|
# There # are four possible spacings around the first slash: |
7118
|
|
|
|
|
|
|
# |
7119
|
|
|
|
|
|
|
# return pi/two;#/; -/- |
7120
|
|
|
|
|
|
|
# return pi/ two;#/; -/+ |
7121
|
|
|
|
|
|
|
# return pi / two;#/; +/+ |
7122
|
|
|
|
|
|
|
# return pi /two;#/; +/- <-- possible pattern |
7123
|
|
|
|
|
|
|
# |
7124
|
|
|
|
|
|
|
# Spacing rule: a space before the slash but not after the slash |
7125
|
|
|
|
|
|
|
# usually indicates a pattern. We can use this to break ties. |
7126
|
|
|
|
|
|
|
|
7127
|
0
|
|
0
|
|
|
0
|
my $is_pattern_by_spacing = |
7128
|
|
|
|
|
|
|
( $i > 1 && $next_token !~ m/^\s/ && $rtokens->[ $i - 2 ] =~ m/^\s/ ); |
7129
|
|
|
|
|
|
|
|
7130
|
|
|
|
|
|
|
# look for a possible ending / on this line.. |
7131
|
0
|
|
|
|
|
0
|
my $in_quote = 1; |
7132
|
0
|
|
|
|
|
0
|
my $quote_depth = 0; |
7133
|
0
|
|
|
|
|
0
|
my $quote_character = EMPTY_STRING; |
7134
|
0
|
|
|
|
|
0
|
my $quote_pos = 0; |
7135
|
0
|
|
|
|
|
0
|
my $quoted_string; |
7136
|
|
|
|
|
|
|
( |
7137
|
0
|
|
|
|
|
0
|
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, |
7138
|
|
|
|
|
|
|
$quoted_string |
7139
|
|
|
|
|
|
|
) |
7140
|
|
|
|
|
|
|
= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens, |
7141
|
|
|
|
|
|
|
$quote_character, $quote_pos, $quote_depth, $max_token_index ); |
7142
|
|
|
|
|
|
|
|
7143
|
0
|
0
|
|
|
|
0
|
if ($in_quote) { |
7144
|
|
|
|
|
|
|
|
7145
|
|
|
|
|
|
|
# we didn't find an ending / on this line, so we bias towards |
7146
|
|
|
|
|
|
|
# division |
7147
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible >= 0 ) { |
7148
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7149
|
0
|
|
|
|
|
0
|
$msg .= "division (no ending / on this line)\n"; |
7150
|
|
|
|
|
|
|
} |
7151
|
|
|
|
|
|
|
else { |
7152
|
|
|
|
|
|
|
|
7153
|
|
|
|
|
|
|
# assuming a multi-line pattern ... this is risky, but division |
7154
|
|
|
|
|
|
|
# does not seem possible. If this fails, it would either be due |
7155
|
|
|
|
|
|
|
# to a syntax error in the code, or the division_expected logic |
7156
|
|
|
|
|
|
|
# needs to be fixed. |
7157
|
0
|
|
|
|
|
0
|
$msg = "multi-line pattern (division not possible)\n"; |
7158
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7159
|
|
|
|
|
|
|
} |
7160
|
|
|
|
|
|
|
} |
7161
|
|
|
|
|
|
|
|
7162
|
|
|
|
|
|
|
# we found an ending /, so we bias slightly towards a pattern |
7163
|
|
|
|
|
|
|
else { |
7164
|
|
|
|
|
|
|
|
7165
|
0
|
|
|
|
|
0
|
my $pattern_expected = |
7166
|
|
|
|
|
|
|
$self->pattern_expected( $i, $rtokens, $max_token_index ); |
7167
|
|
|
|
|
|
|
|
7168
|
0
|
0
|
|
|
|
0
|
if ( $pattern_expected >= 0 ) { |
7169
|
|
|
|
|
|
|
|
7170
|
|
|
|
|
|
|
# pattern looks possible... |
7171
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible >= 0 ) { |
7172
|
|
|
|
|
|
|
|
7173
|
|
|
|
|
|
|
# Both pattern and divide can work here... |
7174
|
|
|
|
|
|
|
|
7175
|
|
|
|
|
|
|
# Increase weight of divide if a pure number follows |
7176
|
0
|
|
|
|
|
0
|
$divide_possible += $next_token =~ /^\d+$/; |
7177
|
|
|
|
|
|
|
|
7178
|
|
|
|
|
|
|
# Check for known constants in the numerator, like 'pi' |
7179
|
0
|
0
|
|
|
|
0
|
if ( $is_known_constant{$last_nonblank_token} ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7180
|
0
|
|
|
|
|
0
|
$msg .= |
7181
|
|
|
|
|
|
|
"division (pattern works too but saw known constant '$last_nonblank_token')\n"; |
7182
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7183
|
|
|
|
|
|
|
} |
7184
|
|
|
|
|
|
|
|
7185
|
|
|
|
|
|
|
# A very common bare word in pattern expressions is 'ok' |
7186
|
|
|
|
|
|
|
elsif ( $is_known_function{$last_nonblank_token} ) { |
7187
|
0
|
|
|
|
|
0
|
$msg .= |
7188
|
|
|
|
|
|
|
"pattern (division works too but saw '$last_nonblank_token')\n"; |
7189
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7190
|
|
|
|
|
|
|
} |
7191
|
|
|
|
|
|
|
|
7192
|
|
|
|
|
|
|
# If one rule is more definite, use it |
7193
|
|
|
|
|
|
|
elsif ( $divide_possible > $pattern_expected ) { |
7194
|
0
|
|
|
|
|
0
|
$msg .= |
7195
|
|
|
|
|
|
|
"division (more likely based on following tokens)\n"; |
7196
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7197
|
|
|
|
|
|
|
} |
7198
|
|
|
|
|
|
|
|
7199
|
|
|
|
|
|
|
# otherwise, use the spacing rule |
7200
|
|
|
|
|
|
|
elsif ($is_pattern_by_spacing) { |
7201
|
0
|
|
|
|
|
0
|
$msg .= |
7202
|
|
|
|
|
|
|
"pattern (guess on spacing, but division possible too)\n"; |
7203
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7204
|
|
|
|
|
|
|
} |
7205
|
|
|
|
|
|
|
else { |
7206
|
0
|
|
|
|
|
0
|
$msg .= |
7207
|
|
|
|
|
|
|
"division (guess on spacing, but pattern is possible too)\n"; |
7208
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7209
|
|
|
|
|
|
|
} |
7210
|
|
|
|
|
|
|
} |
7211
|
|
|
|
|
|
|
|
7212
|
|
|
|
|
|
|
# divide_possible < 0 means divide can not work here |
7213
|
|
|
|
|
|
|
else { |
7214
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7215
|
0
|
|
|
|
|
0
|
$msg .= "pattern (division not possible)\n"; |
7216
|
|
|
|
|
|
|
} |
7217
|
|
|
|
|
|
|
} |
7218
|
|
|
|
|
|
|
|
7219
|
|
|
|
|
|
|
# pattern does not look possible... |
7220
|
|
|
|
|
|
|
else { |
7221
|
|
|
|
|
|
|
|
7222
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible >= 0 ) { |
7223
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7224
|
0
|
|
|
|
|
0
|
$msg .= "division (pattern not possible)\n"; |
7225
|
|
|
|
|
|
|
} |
7226
|
|
|
|
|
|
|
|
7227
|
|
|
|
|
|
|
# Neither pattern nor divide look possible...go by spacing |
7228
|
|
|
|
|
|
|
else { |
7229
|
0
|
0
|
|
|
|
0
|
if ($is_pattern_by_spacing) { |
7230
|
0
|
|
|
|
|
0
|
$msg .= "pattern (guess on spacing)\n"; |
7231
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7232
|
|
|
|
|
|
|
} |
7233
|
|
|
|
|
|
|
else { |
7234
|
0
|
|
|
|
|
0
|
$msg .= "division (guess on spacing)\n"; |
7235
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7236
|
|
|
|
|
|
|
} |
7237
|
|
|
|
|
|
|
} |
7238
|
|
|
|
|
|
|
} |
7239
|
|
|
|
|
|
|
} |
7240
|
|
|
|
|
|
|
} |
7241
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
7242
|
|
|
|
|
|
|
} ## end sub guess_if_pattern_or_division |
7243
|
|
|
|
|
|
|
|
7244
|
|
|
|
|
|
|
# try to resolve here-doc vs. shift by looking ahead for |
7245
|
|
|
|
|
|
|
# non-code or the end token (currently only looks for end token) |
7246
|
|
|
|
|
|
|
# returns 1 if it is probably a here doc, 0 if not |
7247
|
|
|
|
|
|
|
sub guess_if_here_doc { |
7248
|
|
|
|
|
|
|
|
7249
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $next_token ) = @_; |
7250
|
|
|
|
|
|
|
|
7251
|
|
|
|
|
|
|
# This is how many lines we will search for a target as part of the |
7252
|
|
|
|
|
|
|
# guessing strategy. It is a constant because there is probably |
7253
|
|
|
|
|
|
|
# little reason to change it. |
7254
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package $ris_constant, |
7255
|
0
|
|
|
|
|
0
|
my $HERE_DOC_WINDOW = 40; |
7256
|
|
|
|
|
|
|
|
7257
|
0
|
|
|
|
|
0
|
my $here_doc_expected = 0; |
7258
|
0
|
|
|
|
|
0
|
my $line; |
7259
|
0
|
|
|
|
|
0
|
my $k = 0; |
7260
|
0
|
|
|
|
|
0
|
my $msg = "checking <<"; |
7261
|
|
|
|
|
|
|
|
7262
|
0
|
|
|
|
|
0
|
while ( $line = $self->peek_ahead( $k++ ) ) { |
7263
|
0
|
|
|
|
|
0
|
chomp $line; |
7264
|
|
|
|
|
|
|
|
7265
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /^$next_token$/ ) { |
7266
|
0
|
|
|
|
|
0
|
$msg .= " -- found target $next_token ahead $k lines\n"; |
7267
|
0
|
|
|
|
|
0
|
$here_doc_expected = 1; # got it |
7268
|
0
|
|
|
|
|
0
|
last; |
7269
|
|
|
|
|
|
|
} |
7270
|
0
|
0
|
|
|
|
0
|
last if ( $k >= $HERE_DOC_WINDOW ); |
7271
|
|
|
|
|
|
|
} |
7272
|
|
|
|
|
|
|
|
7273
|
0
|
0
|
|
|
|
0
|
if ( !$here_doc_expected ) { |
7274
|
|
|
|
|
|
|
|
7275
|
0
|
0
|
|
|
|
0
|
if ( !defined($line) ) { |
7276
|
0
|
|
|
|
|
0
|
$here_doc_expected = -1; # hit eof without seeing target |
7277
|
0
|
|
|
|
|
0
|
$msg .= " -- must be shift; target $next_token not in file\n"; |
7278
|
|
|
|
|
|
|
|
7279
|
|
|
|
|
|
|
} |
7280
|
|
|
|
|
|
|
else { # still unsure..taking a wild guess |
7281
|
|
|
|
|
|
|
|
7282
|
0
|
0
|
|
|
|
0
|
if ( !$ris_constant->{$current_package}{$next_token} ) { |
7283
|
0
|
|
|
|
|
0
|
$here_doc_expected = 1; |
7284
|
0
|
|
|
|
|
0
|
$msg .= |
7285
|
|
|
|
|
|
|
" -- guessing it's a here-doc ($next_token not a constant)\n"; |
7286
|
|
|
|
|
|
|
} |
7287
|
|
|
|
|
|
|
else { |
7288
|
0
|
|
|
|
|
0
|
$msg .= |
7289
|
|
|
|
|
|
|
" -- guessing it's a shift ($next_token is a constant)\n"; |
7290
|
|
|
|
|
|
|
} |
7291
|
|
|
|
|
|
|
} |
7292
|
|
|
|
|
|
|
} |
7293
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry($msg); |
7294
|
0
|
|
|
|
|
0
|
return $here_doc_expected; |
7295
|
|
|
|
|
|
|
} ## end sub guess_if_here_doc |
7296
|
|
|
|
|
|
|
|
7297
|
|
|
|
|
|
|
####################################################################### |
7298
|
|
|
|
|
|
|
# Tokenizer Routines for scanning identifiers and related items |
7299
|
|
|
|
|
|
|
####################################################################### |
7300
|
|
|
|
|
|
|
|
7301
|
|
|
|
|
|
|
sub scan_bare_identifier_do { |
7302
|
|
|
|
|
|
|
|
7303
|
|
|
|
|
|
|
# this routine is called to scan a token starting with an alphanumeric |
7304
|
|
|
|
|
|
|
# variable or package separator, :: or '. |
7305
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, |
7306
|
|
|
|
|
|
|
# $last_nonblank_type, $rparen_type, $paren_depth |
7307
|
|
|
|
|
|
|
|
7308
|
1674
|
|
|
1674
|
0
|
5086
|
my ( $self, $input_line, $i, $tok, $type, $prototype, $rtoken_map, |
7309
|
|
|
|
|
|
|
$max_token_index ) |
7310
|
|
|
|
|
|
|
= @_; |
7311
|
1674
|
|
|
|
|
2893
|
my $i_begin = $i; |
7312
|
1674
|
|
|
|
|
2896
|
my $package = undef; |
7313
|
|
|
|
|
|
|
|
7314
|
1674
|
|
|
|
|
2557
|
my $i_beg = $i; |
7315
|
|
|
|
|
|
|
|
7316
|
|
|
|
|
|
|
# we have to back up one pretoken at a :: since each : is one pretoken |
7317
|
1674
|
100
|
|
|
|
4192
|
if ( $tok eq '::' ) { $i_beg-- } |
|
9
|
|
|
|
|
25
|
|
7318
|
1674
|
50
|
|
|
|
3903
|
if ( $tok eq '->' ) { $i_beg-- } |
|
0
|
|
|
|
|
0
|
|
7319
|
1674
|
|
|
|
|
3099
|
my $pos_beg = $rtoken_map->[$i_beg]; |
7320
|
1674
|
|
|
|
|
5097
|
pos($input_line) = $pos_beg; |
7321
|
|
|
|
|
|
|
|
7322
|
|
|
|
|
|
|
# Examples: |
7323
|
|
|
|
|
|
|
# A::B::C |
7324
|
|
|
|
|
|
|
# A:: |
7325
|
|
|
|
|
|
|
# ::A |
7326
|
|
|
|
|
|
|
# A'B |
7327
|
1674
|
50
|
|
|
|
12350
|
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { |
7328
|
|
|
|
|
|
|
|
7329
|
1674
|
|
|
|
|
3373
|
my $pos = pos($input_line); |
7330
|
1674
|
|
|
|
|
2798
|
my $numc = $pos - $pos_beg; |
7331
|
1674
|
|
|
|
|
3805
|
$tok = substr( $input_line, $pos_beg, $numc ); |
7332
|
|
|
|
|
|
|
|
7333
|
|
|
|
|
|
|
# type 'w' includes anything without leading type info |
7334
|
|
|
|
|
|
|
# ($,%,@,*) including something like abc::def::ghi |
7335
|
1674
|
|
|
|
|
2787
|
$type = 'w'; |
7336
|
|
|
|
|
|
|
|
7337
|
1674
|
|
|
|
|
2912
|
my $sub_name = EMPTY_STRING; |
7338
|
1674
|
100
|
|
|
|
4762
|
if ( defined($2) ) { $sub_name = $2; } |
|
1669
|
|
|
|
|
3167
|
|
7339
|
1674
|
100
|
|
|
|
4242
|
if ( defined($1) ) { |
7340
|
274
|
|
|
|
|
669
|
$package = $1; |
7341
|
|
|
|
|
|
|
|
7342
|
|
|
|
|
|
|
# patch: don't allow isolated package name which just ends |
7343
|
|
|
|
|
|
|
# in the old style package separator (single quote). Example: |
7344
|
|
|
|
|
|
|
# use CGI':all'; |
7345
|
274
|
50
|
66
|
|
|
1162
|
if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { |
7346
|
0
|
|
|
|
|
0
|
$pos--; |
7347
|
|
|
|
|
|
|
} |
7348
|
|
|
|
|
|
|
|
7349
|
274
|
|
|
|
|
763
|
$package =~ s/\'/::/g; |
7350
|
274
|
100
|
|
|
|
889
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
9
|
|
|
|
|
39
|
|
7351
|
274
|
|
|
|
|
1202
|
$package =~ s/::$//; |
7352
|
|
|
|
|
|
|
} |
7353
|
|
|
|
|
|
|
else { |
7354
|
1400
|
|
|
|
|
2698
|
$package = $current_package; |
7355
|
|
|
|
|
|
|
|
7356
|
|
|
|
|
|
|
# patched for c043, part 1: keyword does not follow '->' |
7357
|
1400
|
50
|
66
|
|
|
5737
|
if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) { |
7358
|
0
|
|
|
|
|
0
|
$type = 'k'; |
7359
|
|
|
|
|
|
|
} |
7360
|
|
|
|
|
|
|
} |
7361
|
|
|
|
|
|
|
|
7362
|
|
|
|
|
|
|
# if it is a bareword.. patched for c043, part 2: not following '->' |
7363
|
1674
|
100
|
66
|
|
|
7737
|
if ( $type eq 'w' && $last_nonblank_type ne '->' ) { |
7364
|
|
|
|
|
|
|
|
7365
|
|
|
|
|
|
|
# check for v-string with leading 'v' type character |
7366
|
|
|
|
|
|
|
# (This seems to have precedence over filehandle, type 'Y') |
7367
|
1004
|
100
|
66
|
|
|
14658
|
if ( $tok =~ /^v\d[_\d]*$/ ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
7368
|
|
|
|
|
|
|
|
7369
|
|
|
|
|
|
|
# we only have the first part - something like 'v101' - |
7370
|
|
|
|
|
|
|
# look for more |
7371
|
2
|
50
|
|
|
|
14
|
if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { |
7372
|
2
|
|
|
|
|
7
|
$pos = pos($input_line); |
7373
|
2
|
|
|
|
|
4
|
$numc = $pos - $pos_beg; |
7374
|
2
|
|
|
|
|
9
|
$tok = substr( $input_line, $pos_beg, $numc ); |
7375
|
|
|
|
|
|
|
} |
7376
|
2
|
|
|
|
|
4
|
$type = 'v'; |
7377
|
|
|
|
|
|
|
|
7378
|
|
|
|
|
|
|
# warn if this version can't handle v-strings |
7379
|
2
|
|
|
|
|
11
|
$self->report_v_string($tok); |
7380
|
|
|
|
|
|
|
} |
7381
|
|
|
|
|
|
|
|
7382
|
|
|
|
|
|
|
elsif ( $ris_constant->{$package}{$sub_name} ) { |
7383
|
12
|
|
|
|
|
26
|
$type = 'C'; |
7384
|
|
|
|
|
|
|
} |
7385
|
|
|
|
|
|
|
|
7386
|
|
|
|
|
|
|
# bareword after sort has implied empty prototype; for example: |
7387
|
|
|
|
|
|
|
# @sorted = sort numerically ( 53, 29, 11, 32, 7 ); |
7388
|
|
|
|
|
|
|
# This has priority over whatever the user has specified. |
7389
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq 'sort' |
7390
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'k' ) |
7391
|
|
|
|
|
|
|
{ |
7392
|
1
|
|
|
|
|
2
|
$type = 'Z'; |
7393
|
|
|
|
|
|
|
} |
7394
|
|
|
|
|
|
|
|
7395
|
|
|
|
|
|
|
# Note: strangely, perl does not seem to really let you create |
7396
|
|
|
|
|
|
|
# functions which act like eval and do, in the sense that eval |
7397
|
|
|
|
|
|
|
# and do may have operators following the final }, but any operators |
7398
|
|
|
|
|
|
|
# that you create with prototype (&) apparently do not allow |
7399
|
|
|
|
|
|
|
# trailing operators, only terms. This seems strange. |
7400
|
|
|
|
|
|
|
# If this ever changes, here is the update |
7401
|
|
|
|
|
|
|
# to make perltidy behave accordingly: |
7402
|
|
|
|
|
|
|
|
7403
|
|
|
|
|
|
|
# elsif ( $ris_block_function->{$package}{$tok} ) { |
7404
|
|
|
|
|
|
|
# $tok='eval'; # patch to do braces like eval - doesn't work |
7405
|
|
|
|
|
|
|
# $type = 'k'; |
7406
|
|
|
|
|
|
|
#} |
7407
|
|
|
|
|
|
|
# TODO: This could become a separate type to allow for different |
7408
|
|
|
|
|
|
|
# future behavior: |
7409
|
|
|
|
|
|
|
elsif ( $ris_block_function->{$package}{$sub_name} ) { |
7410
|
0
|
|
|
|
|
0
|
$type = 'G'; |
7411
|
|
|
|
|
|
|
} |
7412
|
|
|
|
|
|
|
elsif ( $ris_block_list_function->{$package}{$sub_name} ) { |
7413
|
0
|
|
|
|
|
0
|
$type = 'G'; |
7414
|
|
|
|
|
|
|
} |
7415
|
|
|
|
|
|
|
elsif ( $ris_user_function->{$package}{$sub_name} ) { |
7416
|
6
|
|
|
|
|
19
|
$type = 'U'; |
7417
|
6
|
|
|
|
|
22
|
$prototype = $ruser_function_prototype->{$package}{$sub_name}; |
7418
|
|
|
|
|
|
|
} |
7419
|
|
|
|
|
|
|
|
7420
|
|
|
|
|
|
|
# check for indirect object |
7421
|
|
|
|
|
|
|
elsif ( |
7422
|
|
|
|
|
|
|
|
7423
|
|
|
|
|
|
|
# added 2001-03-27: must not be followed immediately by '(' |
7424
|
|
|
|
|
|
|
# see fhandle.t |
7425
|
|
|
|
|
|
|
( $input_line !~ m/\G\(/gc ) |
7426
|
|
|
|
|
|
|
|
7427
|
|
|
|
|
|
|
# and |
7428
|
|
|
|
|
|
|
&& ( |
7429
|
|
|
|
|
|
|
|
7430
|
|
|
|
|
|
|
# preceded by keyword like 'print', 'printf' and friends |
7431
|
|
|
|
|
|
|
$is_indirect_object_taker{$last_nonblank_token} |
7432
|
|
|
|
|
|
|
|
7433
|
|
|
|
|
|
|
# or preceded by something like 'print(' or 'printf(' |
7434
|
|
|
|
|
|
|
|| ( |
7435
|
|
|
|
|
|
|
( $last_nonblank_token eq '(' ) |
7436
|
|
|
|
|
|
|
&& $is_indirect_object_taker{ |
7437
|
|
|
|
|
|
|
$rparen_type->[$paren_depth] |
7438
|
|
|
|
|
|
|
} |
7439
|
|
|
|
|
|
|
|
7440
|
|
|
|
|
|
|
) |
7441
|
|
|
|
|
|
|
) |
7442
|
|
|
|
|
|
|
) |
7443
|
|
|
|
|
|
|
{ |
7444
|
|
|
|
|
|
|
|
7445
|
|
|
|
|
|
|
# may not be indirect object unless followed by a space; |
7446
|
|
|
|
|
|
|
# updated 2021-01-16 to consider newline to be a space. |
7447
|
|
|
|
|
|
|
# updated for case b990 to look for either ';' or space |
7448
|
4
|
50
|
33
|
|
|
99
|
if ( pos($input_line) == length($input_line) |
7449
|
|
|
|
|
|
|
|| $input_line =~ m/\G[;\s]/gc ) |
7450
|
|
|
|
|
|
|
{ |
7451
|
4
|
|
|
|
|
13
|
$type = 'Y'; |
7452
|
|
|
|
|
|
|
|
7453
|
|
|
|
|
|
|
# Abandon Hope ... |
7454
|
|
|
|
|
|
|
# Perl's indirect object notation is a very bad |
7455
|
|
|
|
|
|
|
# thing and can cause subtle bugs, especially for |
7456
|
|
|
|
|
|
|
# beginning programmers. And I haven't even been |
7457
|
|
|
|
|
|
|
# able to figure out a sane warning scheme which |
7458
|
|
|
|
|
|
|
# doesn't get in the way of good scripts. |
7459
|
|
|
|
|
|
|
|
7460
|
|
|
|
|
|
|
# Complain if a filehandle has any lower case |
7461
|
|
|
|
|
|
|
# letters. This is suggested good practice. |
7462
|
|
|
|
|
|
|
# Use 'sub_name' because something like |
7463
|
|
|
|
|
|
|
# main::MYHANDLE is ok for filehandle |
7464
|
4
|
100
|
|
|
|
23
|
if ( $sub_name =~ /[a-z]/ ) { |
7465
|
|
|
|
|
|
|
|
7466
|
|
|
|
|
|
|
# could be bug caused by older perltidy if |
7467
|
|
|
|
|
|
|
# followed by '(' |
7468
|
1
|
50
|
|
|
|
6
|
if ( $input_line =~ m/\G\s*\(/gc ) { |
7469
|
1
|
|
|
|
|
8
|
$self->complain( |
7470
|
|
|
|
|
|
|
"Caution: unknown word '$tok' in indirect object slot\n" |
7471
|
|
|
|
|
|
|
); |
7472
|
|
|
|
|
|
|
} |
7473
|
|
|
|
|
|
|
} |
7474
|
|
|
|
|
|
|
} |
7475
|
|
|
|
|
|
|
|
7476
|
|
|
|
|
|
|
# bareword not followed by a space -- may not be filehandle |
7477
|
|
|
|
|
|
|
# (may be function call defined in a 'use' statement) |
7478
|
|
|
|
|
|
|
else { |
7479
|
0
|
|
|
|
|
0
|
$type = 'Z'; |
7480
|
|
|
|
|
|
|
} |
7481
|
|
|
|
|
|
|
} |
7482
|
|
|
|
|
|
|
|
7483
|
|
|
|
|
|
|
# none of the above special types |
7484
|
|
|
|
|
|
|
else { |
7485
|
|
|
|
|
|
|
} |
7486
|
|
|
|
|
|
|
} |
7487
|
|
|
|
|
|
|
|
7488
|
|
|
|
|
|
|
# Now we must convert back from character position |
7489
|
|
|
|
|
|
|
# to pre_token index. |
7490
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but who knows |
7491
|
1674
|
|
|
|
|
2854
|
my $error; |
7492
|
1674
|
|
|
|
|
4658
|
( $i, $error ) = |
7493
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
7494
|
1674
|
50
|
|
|
|
4601
|
if ($error) { |
7495
|
0
|
|
|
|
|
0
|
$self->warning( |
7496
|
|
|
|
|
|
|
"scan_bare_identifier: Possibly invalid tokenization\n"); |
7497
|
|
|
|
|
|
|
} |
7498
|
|
|
|
|
|
|
} |
7499
|
|
|
|
|
|
|
|
7500
|
|
|
|
|
|
|
# no match but line not blank - could be syntax error |
7501
|
|
|
|
|
|
|
# perl will take '::' alone without complaint |
7502
|
|
|
|
|
|
|
else { |
7503
|
0
|
|
|
|
|
0
|
$type = 'w'; |
7504
|
|
|
|
|
|
|
|
7505
|
|
|
|
|
|
|
# change this warning to log message if it becomes annoying |
7506
|
0
|
|
|
|
|
0
|
$self->warning("didn't find identifier after leading ::\n"); |
7507
|
|
|
|
|
|
|
} |
7508
|
1674
|
|
|
|
|
6953
|
return ( $i, $tok, $type, $prototype ); |
7509
|
|
|
|
|
|
|
} ## end sub scan_bare_identifier_do |
7510
|
|
|
|
|
|
|
|
7511
|
|
|
|
|
|
|
sub scan_id_do { |
7512
|
|
|
|
|
|
|
|
7513
|
|
|
|
|
|
|
# This is the new scanner and will eventually replace scan_identifier. |
7514
|
|
|
|
|
|
|
# Only type 'sub' and 'package' are implemented. |
7515
|
|
|
|
|
|
|
# Token types $ * % @ & -> are not yet implemented. |
7516
|
|
|
|
|
|
|
# |
7517
|
|
|
|
|
|
|
# Scan identifier following a type token. |
7518
|
|
|
|
|
|
|
# The type of call depends on $id_scan_state: $id_scan_state = '' |
7519
|
|
|
|
|
|
|
# for starting call, in which case $tok must be the token defining |
7520
|
|
|
|
|
|
|
# the type. |
7521
|
|
|
|
|
|
|
# |
7522
|
|
|
|
|
|
|
# If the type token is the last nonblank token on the line, a value |
7523
|
|
|
|
|
|
|
# of $id_scan_state = $tok is returned, indicating that further |
7524
|
|
|
|
|
|
|
# calls must be made to get the identifier. If the type token is |
7525
|
|
|
|
|
|
|
# not the last nonblank token on the line, the identifier is |
7526
|
|
|
|
|
|
|
# scanned and handled and a value of '' is returned. |
7527
|
|
|
|
|
|
|
|
7528
|
332
|
|
|
332
|
0
|
1256
|
my ( $self, $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, |
7529
|
|
|
|
|
|
|
$max_token_index ) |
7530
|
|
|
|
|
|
|
= @_; |
7531
|
39
|
|
|
39
|
|
474
|
use constant DEBUG_NSCAN => 0; |
|
39
|
|
|
|
|
104
|
|
|
39
|
|
|
|
|
49441
|
|
7532
|
332
|
|
|
|
|
705
|
my $type = EMPTY_STRING; |
7533
|
332
|
|
|
|
|
680
|
my ( $i_beg, $pos_beg ); |
7534
|
|
|
|
|
|
|
|
7535
|
|
|
|
|
|
|
#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; |
7536
|
|
|
|
|
|
|
#my ($a,$b,$c) = caller; |
7537
|
|
|
|
|
|
|
#print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; |
7538
|
|
|
|
|
|
|
|
7539
|
|
|
|
|
|
|
# on re-entry, start scanning at first token on the line |
7540
|
332
|
100
|
|
|
|
951
|
if ($id_scan_state) { |
7541
|
10
|
|
|
|
|
20
|
$i_beg = $i; |
7542
|
10
|
|
|
|
|
27
|
$type = EMPTY_STRING; |
7543
|
|
|
|
|
|
|
} |
7544
|
|
|
|
|
|
|
|
7545
|
|
|
|
|
|
|
# on initial entry, start scanning just after type token |
7546
|
|
|
|
|
|
|
else { |
7547
|
322
|
|
|
|
|
621
|
$i_beg = $i + 1; |
7548
|
322
|
|
|
|
|
572
|
$id_scan_state = $tok; |
7549
|
322
|
|
|
|
|
735
|
$type = 't'; |
7550
|
|
|
|
|
|
|
} |
7551
|
|
|
|
|
|
|
|
7552
|
|
|
|
|
|
|
# find $i_beg = index of next nonblank token, |
7553
|
|
|
|
|
|
|
# and handle empty lines |
7554
|
332
|
|
|
|
|
602
|
my $blank_line = 0; |
7555
|
332
|
|
|
|
|
767
|
my $next_nonblank_token = $rtokens->[$i_beg]; |
7556
|
332
|
100
|
|
|
|
937
|
if ( $i_beg > $max_token_index ) { |
7557
|
2
|
|
|
|
|
5
|
$blank_line = 1; |
7558
|
|
|
|
|
|
|
} |
7559
|
|
|
|
|
|
|
else { |
7560
|
|
|
|
|
|
|
|
7561
|
|
|
|
|
|
|
# only a '#' immediately after a '$' is not a comment |
7562
|
330
|
50
|
|
|
|
1084
|
if ( $next_nonblank_token eq '#' ) { |
7563
|
0
|
0
|
|
|
|
0
|
if ( $tok ne '$' ) { |
7564
|
0
|
|
|
|
|
0
|
$blank_line = 1; |
7565
|
|
|
|
|
|
|
} |
7566
|
|
|
|
|
|
|
} |
7567
|
|
|
|
|
|
|
|
7568
|
330
|
100
|
|
|
|
1597
|
if ( $next_nonblank_token =~ /^\s/ ) { |
7569
|
310
|
|
|
|
|
1288
|
( $next_nonblank_token, $i_beg ) = |
7570
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i_beg, $rtokens, |
7571
|
|
|
|
|
|
|
$max_token_index ); |
7572
|
310
|
100
|
|
|
|
1740
|
if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { |
7573
|
4
|
|
|
|
|
12
|
$blank_line = 1; |
7574
|
|
|
|
|
|
|
} |
7575
|
|
|
|
|
|
|
} |
7576
|
|
|
|
|
|
|
} |
7577
|
|
|
|
|
|
|
|
7578
|
|
|
|
|
|
|
# handle non-blank line; identifier, if any, must follow |
7579
|
332
|
100
|
|
|
|
971
|
if ( !$blank_line ) { |
7580
|
|
|
|
|
|
|
|
7581
|
326
|
100
|
|
|
|
1004
|
if ( $is_sub{$id_scan_state} ) { |
|
|
50
|
|
|
|
|
|
7582
|
300
|
|
|
|
|
3371
|
( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub( |
7583
|
|
|
|
|
|
|
{ |
7584
|
|
|
|
|
|
|
input_line => $input_line, |
7585
|
|
|
|
|
|
|
i => $i, |
7586
|
|
|
|
|
|
|
i_beg => $i_beg, |
7587
|
|
|
|
|
|
|
tok => $tok, |
7588
|
|
|
|
|
|
|
type => $type, |
7589
|
|
|
|
|
|
|
rtokens => $rtokens, |
7590
|
|
|
|
|
|
|
rtoken_map => $rtoken_map, |
7591
|
|
|
|
|
|
|
id_scan_state => $id_scan_state, |
7592
|
|
|
|
|
|
|
max_token_index => $max_token_index, |
7593
|
|
|
|
|
|
|
} |
7594
|
|
|
|
|
|
|
); |
7595
|
|
|
|
|
|
|
} |
7596
|
|
|
|
|
|
|
|
7597
|
|
|
|
|
|
|
elsif ( $is_package{$id_scan_state} ) { |
7598
|
26
|
|
|
|
|
97
|
( $i, $tok, $type ) = |
7599
|
|
|
|
|
|
|
$self->do_scan_package( $input_line, $i, $i_beg, $tok, $type, |
7600
|
|
|
|
|
|
|
$rtokens, $rtoken_map, $max_token_index ); |
7601
|
26
|
|
|
|
|
68
|
$id_scan_state = EMPTY_STRING; |
7602
|
|
|
|
|
|
|
} |
7603
|
|
|
|
|
|
|
|
7604
|
|
|
|
|
|
|
else { |
7605
|
0
|
|
|
|
|
0
|
$self->warning("invalid token in scan_id: $tok\n"); |
7606
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
7607
|
|
|
|
|
|
|
} |
7608
|
|
|
|
|
|
|
} |
7609
|
|
|
|
|
|
|
|
7610
|
332
|
50
|
33
|
|
|
1899
|
if ( $id_scan_state && ( !defined($type) || !$type ) ) { |
|
|
|
66
|
|
|
|
|
7611
|
|
|
|
|
|
|
|
7612
|
|
|
|
|
|
|
# shouldn't happen: |
7613
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
7614
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
7615
|
|
|
|
|
|
|
Program bug in scan_id: undefined type but scan_state=$id_scan_state |
7616
|
|
|
|
|
|
|
EOM |
7617
|
|
|
|
|
|
|
} |
7618
|
|
|
|
|
|
|
$self->warning( |
7619
|
0
|
|
|
|
|
0
|
"Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n" |
7620
|
|
|
|
|
|
|
); |
7621
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
7622
|
|
|
|
|
|
|
} |
7623
|
|
|
|
|
|
|
|
7624
|
332
|
|
|
|
|
557
|
DEBUG_NSCAN && do { |
7625
|
|
|
|
|
|
|
print {*STDOUT} |
7626
|
|
|
|
|
|
|
"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; |
7627
|
|
|
|
|
|
|
}; |
7628
|
332
|
|
|
|
|
1444
|
return ( $i, $tok, $type, $id_scan_state ); |
7629
|
|
|
|
|
|
|
} ## end sub scan_id_do |
7630
|
|
|
|
|
|
|
|
7631
|
|
|
|
|
|
|
sub check_prototype { |
7632
|
138
|
|
|
138
|
0
|
482
|
my ( $proto, $package, $subname ) = @_; |
7633
|
138
|
50
|
|
|
|
446
|
return if ( !defined($package) ); |
7634
|
138
|
50
|
|
|
|
450
|
return if ( !defined($subname) ); |
7635
|
138
|
100
|
|
|
|
396
|
if ( defined($proto) ) { |
7636
|
34
|
|
|
|
|
174
|
$proto =~ s/^\s*\(\s*//; |
7637
|
34
|
|
|
|
|
130
|
$proto =~ s/\s*\)$//; |
7638
|
34
|
100
|
|
|
|
103
|
if ($proto) { |
7639
|
5
|
|
|
|
|
26
|
$ris_user_function->{$package}{$subname} = 1; |
7640
|
5
|
|
|
|
|
26
|
$ruser_function_prototype->{$package}{$subname} = "($proto)"; |
7641
|
|
|
|
|
|
|
|
7642
|
|
|
|
|
|
|
# prototypes containing '&' must be treated specially.. |
7643
|
5
|
100
|
|
|
|
29
|
if ( $proto =~ /\&/ ) { |
7644
|
|
|
|
|
|
|
|
7645
|
|
|
|
|
|
|
# right curly braces of prototypes ending in |
7646
|
|
|
|
|
|
|
# '&' may be followed by an operator |
7647
|
1
|
50
|
|
|
|
9
|
if ( $proto =~ /\&$/ ) { |
7648
|
0
|
|
|
|
|
0
|
$ris_block_function->{$package}{$subname} = 1; |
7649
|
|
|
|
|
|
|
} |
7650
|
|
|
|
|
|
|
|
7651
|
|
|
|
|
|
|
# right curly braces of prototypes NOT ending in |
7652
|
|
|
|
|
|
|
# '&' may NOT be followed by an operator |
7653
|
|
|
|
|
|
|
else { |
7654
|
1
|
|
|
|
|
5
|
$ris_block_list_function->{$package}{$subname} = 1; |
7655
|
|
|
|
|
|
|
} |
7656
|
|
|
|
|
|
|
} |
7657
|
|
|
|
|
|
|
} |
7658
|
|
|
|
|
|
|
else { |
7659
|
29
|
|
|
|
|
89
|
$ris_constant->{$package}{$subname} = 1; |
7660
|
|
|
|
|
|
|
} |
7661
|
|
|
|
|
|
|
} |
7662
|
|
|
|
|
|
|
else { |
7663
|
104
|
|
|
|
|
400
|
$ris_user_function->{$package}{$subname} = 1; |
7664
|
|
|
|
|
|
|
} |
7665
|
138
|
|
|
|
|
358
|
return; |
7666
|
|
|
|
|
|
|
} ## end sub check_prototype |
7667
|
|
|
|
|
|
|
|
7668
|
|
|
|
|
|
|
sub do_scan_package { |
7669
|
|
|
|
|
|
|
|
7670
|
|
|
|
|
|
|
# do_scan_package parses a package name |
7671
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of the first nonblank |
7672
|
|
|
|
|
|
|
# token following a 'package' token. |
7673
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package, |
7674
|
|
|
|
|
|
|
|
7675
|
|
|
|
|
|
|
# package NAMESPACE |
7676
|
|
|
|
|
|
|
# package NAMESPACE VERSION |
7677
|
|
|
|
|
|
|
# package NAMESPACE BLOCK |
7678
|
|
|
|
|
|
|
# package NAMESPACE VERSION BLOCK |
7679
|
|
|
|
|
|
|
# |
7680
|
|
|
|
|
|
|
# If VERSION is provided, package sets the $VERSION variable in the given |
7681
|
|
|
|
|
|
|
# namespace to a version object with the VERSION provided. VERSION must be |
7682
|
|
|
|
|
|
|
# a "strict" style version number as defined by the version module: a |
7683
|
|
|
|
|
|
|
# positive decimal number (integer or decimal-fraction) without |
7684
|
|
|
|
|
|
|
# exponentiation or else a dotted-decimal v-string with a leading 'v' |
7685
|
|
|
|
|
|
|
# character and at least three components. |
7686
|
|
|
|
|
|
|
# reference http://perldoc.perl.org/functions/package.html |
7687
|
|
|
|
|
|
|
|
7688
|
|
|
|
|
|
|
my ( |
7689
|
26
|
|
|
26
|
0
|
87
|
$self, $input_line, $i, |
7690
|
|
|
|
|
|
|
$i_beg, $tok, $type, |
7691
|
|
|
|
|
|
|
$rtokens, $rtoken_map, $max_token_index |
7692
|
|
|
|
|
|
|
) = @_; |
7693
|
26
|
|
|
|
|
48
|
my $package = undef; |
7694
|
26
|
|
|
|
|
52
|
my $pos_beg = $rtoken_map->[$i_beg]; |
7695
|
26
|
|
|
|
|
79
|
pos($input_line) = $pos_beg; |
7696
|
|
|
|
|
|
|
|
7697
|
|
|
|
|
|
|
# handle non-blank line; package name, if any, must follow |
7698
|
26
|
50
|
|
|
|
154
|
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w*)/gc ) { |
7699
|
26
|
|
|
|
|
103
|
$package = $1; |
7700
|
26
|
50
|
33
|
|
|
165
|
$package = ( defined($1) && $1 ) ? $1 : 'main'; |
7701
|
26
|
|
|
|
|
77
|
$package =~ s/\'/::/g; |
7702
|
26
|
50
|
|
|
|
83
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
0
|
|
|
|
|
0
|
|
7703
|
26
|
|
|
|
|
56
|
$package =~ s/::$//; |
7704
|
26
|
|
|
|
|
47
|
my $pos = pos($input_line); |
7705
|
26
|
|
|
|
|
47
|
my $numc = $pos - $pos_beg; |
7706
|
26
|
|
|
|
|
77
|
$tok = 'package ' . substr( $input_line, $pos_beg, $numc ); |
7707
|
26
|
|
|
|
|
49
|
$type = 'P'; # Fix for c250, previously 'i' |
7708
|
|
|
|
|
|
|
|
7709
|
|
|
|
|
|
|
# Now we must convert back from character position |
7710
|
|
|
|
|
|
|
# to pre_token index. |
7711
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but ? |
7712
|
26
|
|
|
|
|
43
|
my $error; |
7713
|
26
|
|
|
|
|
111
|
( $i, $error ) = |
7714
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
7715
|
26
|
50
|
|
|
|
79
|
if ($error) { $self->warning("Possibly invalid package\n") } |
|
0
|
|
|
|
|
0
|
|
7716
|
26
|
|
|
|
|
55
|
$current_package = $package; |
7717
|
|
|
|
|
|
|
|
7718
|
|
|
|
|
|
|
# we should now have package NAMESPACE |
7719
|
|
|
|
|
|
|
# now expecting VERSION, BLOCK, or ; to follow ... |
7720
|
|
|
|
|
|
|
# package NAMESPACE VERSION |
7721
|
|
|
|
|
|
|
# package NAMESPACE BLOCK |
7722
|
|
|
|
|
|
|
# package NAMESPACE VERSION BLOCK |
7723
|
26
|
|
|
|
|
80
|
my ( $next_nonblank_token, $i_next ) = |
7724
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
7725
|
|
|
|
|
|
|
|
7726
|
|
|
|
|
|
|
# check that something recognizable follows, but do not parse. |
7727
|
|
|
|
|
|
|
# A VERSION number will be parsed later as a number or v-string in the |
7728
|
|
|
|
|
|
|
# normal way. What is important is to set the statement type if |
7729
|
|
|
|
|
|
|
# everything looks okay so that the operator_expected() routine |
7730
|
|
|
|
|
|
|
# knows that the number is in a package statement. |
7731
|
|
|
|
|
|
|
# Examples of valid primitive tokens that might follow are: |
7732
|
|
|
|
|
|
|
# 1235 . ; { } v3 v |
7733
|
|
|
|
|
|
|
# FIX: added a '#' since a side comment may also follow |
7734
|
|
|
|
|
|
|
# Added ':' for class attributes (for --use-feature=class, rt145706) |
7735
|
26
|
50
|
|
|
|
143
|
if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#\:])|v\d|\d+$/ ) { |
7736
|
26
|
|
|
|
|
75
|
$statement_type = $tok; |
7737
|
|
|
|
|
|
|
} |
7738
|
|
|
|
|
|
|
else { |
7739
|
0
|
|
|
|
|
0
|
$self->warning( |
7740
|
|
|
|
|
|
|
"Unexpected '$next_nonblank_token' after package name '$tok'\n" |
7741
|
|
|
|
|
|
|
); |
7742
|
|
|
|
|
|
|
} |
7743
|
|
|
|
|
|
|
} |
7744
|
|
|
|
|
|
|
|
7745
|
|
|
|
|
|
|
# no match but line not blank -- |
7746
|
|
|
|
|
|
|
# could be a label with name package, like package: , for example. |
7747
|
|
|
|
|
|
|
else { |
7748
|
0
|
|
|
|
|
0
|
$type = 'k'; |
7749
|
|
|
|
|
|
|
} |
7750
|
|
|
|
|
|
|
|
7751
|
26
|
|
|
|
|
98
|
return ( $i, $tok, $type ); |
7752
|
|
|
|
|
|
|
} ## end sub do_scan_package |
7753
|
|
|
|
|
|
|
|
7754
|
|
|
|
|
|
|
{ ## begin closure for sub scan_complex_identifier |
7755
|
|
|
|
|
|
|
|
7756
|
39
|
|
|
39
|
|
405
|
use constant DEBUG_SCAN_ID => 0; |
|
39
|
|
|
|
|
130
|
|
|
39
|
|
|
|
|
5324
|
|
7757
|
|
|
|
|
|
|
|
7758
|
|
|
|
|
|
|
# Constant hash: |
7759
|
|
|
|
|
|
|
my %is_special_variable_char; |
7760
|
|
|
|
|
|
|
|
7761
|
|
|
|
|
|
|
BEGIN { |
7762
|
|
|
|
|
|
|
|
7763
|
|
|
|
|
|
|
# These are the only characters which can (currently) form special |
7764
|
|
|
|
|
|
|
# variables, like $^W: (issue c066). |
7765
|
39
|
|
|
39
|
|
297
|
my @q = |
7766
|
|
|
|
|
|
|
qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ }; |
7767
|
39
|
|
|
|
|
146061
|
@{is_special_variable_char}{@q} = (1) x scalar(@q); |
7768
|
|
|
|
|
|
|
} ## end BEGIN |
7769
|
|
|
|
|
|
|
|
7770
|
|
|
|
|
|
|
# These are the possible states for this scanner: |
7771
|
|
|
|
|
|
|
my $scan_state_SIGIL = '$'; |
7772
|
|
|
|
|
|
|
my $scan_state_ALPHA = 'A'; |
7773
|
|
|
|
|
|
|
my $scan_state_COLON = ':'; |
7774
|
|
|
|
|
|
|
my $scan_state_LPAREN = '('; |
7775
|
|
|
|
|
|
|
my $scan_state_RPAREN = ')'; |
7776
|
|
|
|
|
|
|
my $scan_state_AMPERSAND = '&'; |
7777
|
|
|
|
|
|
|
my $scan_state_SPLIT = '^'; |
7778
|
|
|
|
|
|
|
|
7779
|
|
|
|
|
|
|
# Only these non-blank states may be returned to caller: |
7780
|
|
|
|
|
|
|
my %is_returnable_scan_state = ( |
7781
|
|
|
|
|
|
|
$scan_state_SIGIL => 1, |
7782
|
|
|
|
|
|
|
$scan_state_AMPERSAND => 1, |
7783
|
|
|
|
|
|
|
); |
7784
|
|
|
|
|
|
|
|
7785
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: |
7786
|
|
|
|
|
|
|
# $context, $last_nonblank_token, $last_nonblank_type |
7787
|
|
|
|
|
|
|
|
7788
|
|
|
|
|
|
|
#----------- |
7789
|
|
|
|
|
|
|
# call args: |
7790
|
|
|
|
|
|
|
#----------- |
7791
|
|
|
|
|
|
|
my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, |
7792
|
|
|
|
|
|
|
$expecting, $container_type ); |
7793
|
|
|
|
|
|
|
|
7794
|
|
|
|
|
|
|
#------------------------------------------- |
7795
|
|
|
|
|
|
|
# my variables, re-initialized on each call: |
7796
|
|
|
|
|
|
|
#------------------------------------------- |
7797
|
|
|
|
|
|
|
my $i_begin; # starting index $i |
7798
|
|
|
|
|
|
|
my $type; # returned identifier type |
7799
|
|
|
|
|
|
|
my $tok_begin; # starting token |
7800
|
|
|
|
|
|
|
my $tok; # returned token |
7801
|
|
|
|
|
|
|
my $id_scan_state_begin; # starting scan state |
7802
|
|
|
|
|
|
|
my $identifier_begin; # starting identifier |
7803
|
|
|
|
|
|
|
my $i_save; # a last good index, in case of error |
7804
|
|
|
|
|
|
|
my $message; # hold error message for log file |
7805
|
|
|
|
|
|
|
my $tok_is_blank; |
7806
|
|
|
|
|
|
|
my $last_tok_is_blank; |
7807
|
|
|
|
|
|
|
my $in_prototype_or_signature; |
7808
|
|
|
|
|
|
|
my $saw_alpha; |
7809
|
|
|
|
|
|
|
my $saw_type; |
7810
|
|
|
|
|
|
|
my $allow_tick; |
7811
|
|
|
|
|
|
|
|
7812
|
|
|
|
|
|
|
sub initialize_my_scan_id_vars { |
7813
|
|
|
|
|
|
|
|
7814
|
|
|
|
|
|
|
# Initialize all 'my' vars on entry |
7815
|
486
|
|
|
486
|
0
|
844
|
$i_begin = $i; |
7816
|
486
|
|
|
|
|
882
|
$type = EMPTY_STRING; |
7817
|
486
|
|
|
|
|
970
|
$tok_begin = $rtokens->[$i_begin]; |
7818
|
486
|
|
|
|
|
917
|
$tok = $tok_begin; |
7819
|
486
|
50
|
|
|
|
1374
|
if ( $tok_begin eq ':' ) { $tok_begin = '::' } |
|
0
|
|
|
|
|
0
|
|
7820
|
486
|
|
|
|
|
833
|
$id_scan_state_begin = $id_scan_state; |
7821
|
486
|
|
|
|
|
1839
|
$identifier_begin = $identifier; |
7822
|
486
|
|
|
|
|
842
|
$i_save = undef; |
7823
|
|
|
|
|
|
|
|
7824
|
486
|
|
|
|
|
1146
|
$message = EMPTY_STRING; |
7825
|
486
|
|
|
|
|
839
|
$tok_is_blank = undef; # a flag to speed things up |
7826
|
486
|
|
|
|
|
773
|
$last_tok_is_blank = undef; |
7827
|
|
|
|
|
|
|
|
7828
|
486
|
|
100
|
|
|
1916
|
$in_prototype_or_signature = |
7829
|
|
|
|
|
|
|
$container_type && $container_type =~ /^sub\b/; |
7830
|
|
|
|
|
|
|
|
7831
|
|
|
|
|
|
|
# these flags will be used to help figure out the type: |
7832
|
486
|
|
|
|
|
819
|
$saw_alpha = undef; |
7833
|
486
|
|
|
|
|
783
|
$saw_type = undef; |
7834
|
|
|
|
|
|
|
|
7835
|
|
|
|
|
|
|
# allow old package separator (') except in 'use' statement |
7836
|
486
|
|
|
|
|
932
|
$allow_tick = ( $last_nonblank_token ne 'use' ); |
7837
|
486
|
|
|
|
|
904
|
return; |
7838
|
|
|
|
|
|
|
} ## end sub initialize_my_scan_id_vars |
7839
|
|
|
|
|
|
|
|
7840
|
|
|
|
|
|
|
#---------------------------------- |
7841
|
|
|
|
|
|
|
# Routines for handling scan states |
7842
|
|
|
|
|
|
|
#---------------------------------- |
7843
|
|
|
|
|
|
|
sub do_id_scan_state_dollar { |
7844
|
|
|
|
|
|
|
|
7845
|
514
|
|
|
514
|
0
|
956
|
my $self = shift; |
7846
|
|
|
|
|
|
|
|
7847
|
|
|
|
|
|
|
# We saw a sigil, now looking to start a variable name |
7848
|
514
|
100
|
66
|
|
|
4617
|
if ( $tok eq '$' ) { |
|
|
100
|
33
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
7849
|
|
|
|
|
|
|
|
7850
|
31
|
|
|
|
|
159
|
$identifier .= $tok; |
7851
|
|
|
|
|
|
|
|
7852
|
|
|
|
|
|
|
# we've got a punctuation variable if end of line (punct.t) |
7853
|
31
|
50
|
|
|
|
194
|
if ( $i == $max_token_index ) { |
7854
|
0
|
|
|
|
|
0
|
$type = 'i'; |
7855
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
7856
|
|
|
|
|
|
|
} |
7857
|
|
|
|
|
|
|
} |
7858
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { # alphanumeric .. |
7859
|
253
|
|
|
|
|
541
|
$saw_alpha = 1; |
7860
|
253
|
|
|
|
|
530
|
$identifier .= $tok; |
7861
|
|
|
|
|
|
|
|
7862
|
|
|
|
|
|
|
# now need :: except for special digit vars like '$1' (c208) |
7863
|
253
|
100
|
|
|
|
900
|
$id_scan_state = $tok =~ /^\d/ ? EMPTY_STRING : $scan_state_COLON; |
7864
|
|
|
|
|
|
|
} |
7865
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { |
7866
|
16
|
|
|
|
|
55
|
$id_scan_state = $scan_state_ALPHA; |
7867
|
16
|
|
|
|
|
46
|
$identifier .= $tok; |
7868
|
|
|
|
|
|
|
} |
7869
|
|
|
|
|
|
|
|
7870
|
|
|
|
|
|
|
# POSTDEFREF ->@ ->% ->& ->* |
7871
|
|
|
|
|
|
|
elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) { |
7872
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
7873
|
|
|
|
|
|
|
} |
7874
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. |
7875
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
7876
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
7877
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
7878
|
|
|
|
|
|
|
|
7879
|
|
|
|
|
|
|
# Perl will accept leading digits in identifiers, |
7880
|
|
|
|
|
|
|
# although they may not always produce useful results. |
7881
|
|
|
|
|
|
|
# Something like $main::0 is ok. But this also works: |
7882
|
|
|
|
|
|
|
# |
7883
|
|
|
|
|
|
|
# sub howdy::123::bubba{ print "bubba $54321!\n" } |
7884
|
|
|
|
|
|
|
# howdy::123::bubba(); |
7885
|
|
|
|
|
|
|
# |
7886
|
|
|
|
|
|
|
} |
7887
|
|
|
|
|
|
|
elsif ( $tok eq '#' ) { |
7888
|
|
|
|
|
|
|
|
7889
|
99
|
|
|
|
|
221
|
my $is_punct_var = $identifier eq '$$'; |
7890
|
|
|
|
|
|
|
|
7891
|
|
|
|
|
|
|
# side comment or identifier? |
7892
|
99
|
100
|
66
|
|
|
1099
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
7893
|
|
|
|
|
|
|
|
7894
|
|
|
|
|
|
|
# A '#' starts a comment if it follows a space. For example, |
7895
|
|
|
|
|
|
|
# the following is equivalent to $ans=40. |
7896
|
|
|
|
|
|
|
# my $ # |
7897
|
|
|
|
|
|
|
# ans = 40; |
7898
|
|
|
|
|
|
|
!$last_tok_is_blank |
7899
|
|
|
|
|
|
|
|
7900
|
|
|
|
|
|
|
# a # inside a prototype or signature can only start a |
7901
|
|
|
|
|
|
|
# comment |
7902
|
|
|
|
|
|
|
&& !$in_prototype_or_signature |
7903
|
|
|
|
|
|
|
|
7904
|
|
|
|
|
|
|
# these are valid punctuation vars: *# %# @# $# |
7905
|
|
|
|
|
|
|
# May also be '$#array' or POSTDEFREF ->$# |
7906
|
|
|
|
|
|
|
&& ( $identifier =~ /^[\%\@\$\*]$/ |
7907
|
|
|
|
|
|
|
|| $identifier =~ /\$$/ ) |
7908
|
|
|
|
|
|
|
|
7909
|
|
|
|
|
|
|
# but a '#' after '$$' is a side comment; see c147 |
7910
|
|
|
|
|
|
|
&& !$is_punct_var |
7911
|
|
|
|
|
|
|
|
7912
|
|
|
|
|
|
|
) |
7913
|
|
|
|
|
|
|
{ |
7914
|
95
|
|
|
|
|
260
|
$identifier .= $tok; # keep same state, a $ could follow |
7915
|
|
|
|
|
|
|
} |
7916
|
|
|
|
|
|
|
else { |
7917
|
|
|
|
|
|
|
|
7918
|
|
|
|
|
|
|
# otherwise it is a side comment |
7919
|
4
|
50
|
|
|
|
18
|
if ( $identifier eq '->' ) { } |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
7920
|
0
|
|
|
|
|
0
|
elsif ($is_punct_var) { $type = 'i' } |
7921
|
4
|
|
|
|
|
8
|
elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' } |
7922
|
0
|
|
|
|
|
0
|
else { $type = 'i' } |
7923
|
4
|
|
|
|
|
9
|
$i = $i_save; |
7924
|
4
|
|
|
|
|
9
|
$id_scan_state = EMPTY_STRING; |
7925
|
|
|
|
|
|
|
} |
7926
|
|
|
|
|
|
|
} |
7927
|
|
|
|
|
|
|
|
7928
|
|
|
|
|
|
|
elsif ( $tok eq '{' ) { |
7929
|
|
|
|
|
|
|
|
7930
|
|
|
|
|
|
|
# check for something like ${#} or ${?}, where ? is a special char |
7931
|
38
|
100
|
100
|
|
|
529
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
7932
|
|
|
|
|
|
|
( |
7933
|
|
|
|
|
|
|
$identifier eq '$' |
7934
|
|
|
|
|
|
|
|| $identifier eq '@' |
7935
|
|
|
|
|
|
|
|| $identifier eq '$#' |
7936
|
|
|
|
|
|
|
) |
7937
|
|
|
|
|
|
|
&& $i + 2 <= $max_token_index |
7938
|
|
|
|
|
|
|
&& $rtokens->[ $i + 2 ] eq '}' |
7939
|
|
|
|
|
|
|
&& $rtokens->[ $i + 1 ] !~ /[\s\w]/ |
7940
|
|
|
|
|
|
|
) |
7941
|
|
|
|
|
|
|
{ |
7942
|
1
|
|
|
|
|
7
|
my $next2 = $rtokens->[ $i + 2 ]; |
7943
|
1
|
|
|
|
|
6
|
my $next1 = $rtokens->[ $i + 1 ]; |
7944
|
1
|
|
|
|
|
3
|
$identifier .= $tok . $next1 . $next2; |
7945
|
1
|
|
|
|
|
4
|
$i += 2; |
7946
|
1
|
|
|
|
|
3
|
$id_scan_state = EMPTY_STRING; |
7947
|
|
|
|
|
|
|
} |
7948
|
|
|
|
|
|
|
else { |
7949
|
|
|
|
|
|
|
|
7950
|
|
|
|
|
|
|
# skip something like ${xxx} or ->{ |
7951
|
37
|
|
|
|
|
93
|
$id_scan_state = EMPTY_STRING; |
7952
|
|
|
|
|
|
|
|
7953
|
|
|
|
|
|
|
# if this is the first token of a line, any tokens for this |
7954
|
|
|
|
|
|
|
# identifier have already been accumulated |
7955
|
37
|
100
|
66
|
|
|
170
|
if ( $identifier eq '$' || $i == 0 ) { |
7956
|
26
|
|
|
|
|
56
|
$identifier = EMPTY_STRING; |
7957
|
|
|
|
|
|
|
} |
7958
|
37
|
|
|
|
|
73
|
$i = $i_save; |
7959
|
|
|
|
|
|
|
} |
7960
|
|
|
|
|
|
|
} |
7961
|
|
|
|
|
|
|
|
7962
|
|
|
|
|
|
|
# space ok after leading $ % * & @ |
7963
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { |
7964
|
|
|
|
|
|
|
|
7965
|
20
|
|
|
|
|
70
|
$tok_is_blank = 1; |
7966
|
|
|
|
|
|
|
|
7967
|
|
|
|
|
|
|
# note: an id with a leading '&' does not actually come this way |
7968
|
20
|
50
|
|
|
|
113
|
if ( $identifier =~ /^[\$\%\*\&\@]/ ) { |
|
|
0
|
|
|
|
|
|
7969
|
|
|
|
|
|
|
|
7970
|
20
|
100
|
|
|
|
97
|
if ( length($identifier) > 1 ) { |
7971
|
8
|
|
|
|
|
20
|
$id_scan_state = EMPTY_STRING; |
7972
|
8
|
|
|
|
|
23
|
$i = $i_save; |
7973
|
8
|
|
|
|
|
18
|
$type = 'i'; # probably punctuation variable |
7974
|
|
|
|
|
|
|
} |
7975
|
|
|
|
|
|
|
else { |
7976
|
|
|
|
|
|
|
|
7977
|
|
|
|
|
|
|
# fix c139: trim line-ending type 't' |
7978
|
12
|
100
|
|
|
|
64
|
if ( $i == $max_token_index ) { |
|
|
100
|
|
|
|
|
|
7979
|
1
|
|
|
|
|
2
|
$i = $i_save; |
7980
|
1
|
|
|
|
|
3
|
$type = 't'; |
7981
|
|
|
|
|
|
|
} |
7982
|
|
|
|
|
|
|
|
7983
|
|
|
|
|
|
|
# spaces after $'s are common, and space after @ |
7984
|
|
|
|
|
|
|
# is harmless, so only complain about space |
7985
|
|
|
|
|
|
|
# after other type characters. Space after $ and |
7986
|
|
|
|
|
|
|
# @ will be removed in formatting. Report space |
7987
|
|
|
|
|
|
|
# after % and * because they might indicate a |
7988
|
|
|
|
|
|
|
# parsing error. In other words '% ' might be a |
7989
|
|
|
|
|
|
|
# modulo operator. Delete this warning if it |
7990
|
|
|
|
|
|
|
# gets annoying. |
7991
|
|
|
|
|
|
|
elsif ( $identifier !~ /^[\@\$]$/ ) { |
7992
|
1
|
|
|
|
|
5
|
$message = |
7993
|
|
|
|
|
|
|
"Space in identifier, following $identifier\n"; |
7994
|
|
|
|
|
|
|
} |
7995
|
|
|
|
|
|
|
else { |
7996
|
|
|
|
|
|
|
## ok: silently accept space after '$' and '@' sigils |
7997
|
|
|
|
|
|
|
} |
7998
|
|
|
|
|
|
|
} |
7999
|
|
|
|
|
|
|
} |
8000
|
|
|
|
|
|
|
|
8001
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
8002
|
|
|
|
|
|
|
|
8003
|
|
|
|
|
|
|
# space after '->' is ok except at line end .. |
8004
|
|
|
|
|
|
|
# so trim line-ending in type '->' (fixes c139) |
8005
|
0
|
0
|
|
|
|
0
|
if ( $i == $max_token_index ) { |
8006
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8007
|
0
|
|
|
|
|
0
|
$type = '->'; |
8008
|
|
|
|
|
|
|
} |
8009
|
|
|
|
|
|
|
} |
8010
|
|
|
|
|
|
|
|
8011
|
|
|
|
|
|
|
# stop at space after something other than -> or sigil |
8012
|
|
|
|
|
|
|
# Example of what can arrive here: |
8013
|
|
|
|
|
|
|
# eval { $MyClass->$$ }; |
8014
|
|
|
|
|
|
|
else { |
8015
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8016
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8017
|
0
|
|
|
|
|
0
|
$type = 'i'; |
8018
|
|
|
|
|
|
|
} |
8019
|
|
|
|
|
|
|
} |
8020
|
|
|
|
|
|
|
elsif ( $tok eq '^' ) { |
8021
|
|
|
|
|
|
|
|
8022
|
|
|
|
|
|
|
# check for some special variables like $^ $^W |
8023
|
11
|
50
|
|
|
|
46
|
if ( $identifier =~ /^[\$\*\@\%]$/ ) { |
8024
|
11
|
|
|
|
|
31
|
$identifier .= $tok; |
8025
|
11
|
|
|
|
|
28
|
$type = 'i'; |
8026
|
|
|
|
|
|
|
|
8027
|
|
|
|
|
|
|
# There may be one more character, not a space, after the ^ |
8028
|
11
|
|
|
|
|
26
|
my $next1 = $rtokens->[ $i + 1 ]; |
8029
|
11
|
|
|
|
|
32
|
my $chr = substr( $next1, 0, 1 ); |
8030
|
11
|
100
|
|
|
|
81
|
if ( $is_special_variable_char{$chr} ) { |
8031
|
|
|
|
|
|
|
|
8032
|
|
|
|
|
|
|
# It is something like $^W |
8033
|
|
|
|
|
|
|
# Test case (c066) : $^Oeq'linux' |
8034
|
9
|
|
|
|
|
19
|
$i++; |
8035
|
9
|
|
|
|
|
16
|
$identifier .= $next1; |
8036
|
|
|
|
|
|
|
|
8037
|
|
|
|
|
|
|
# If pretoken $next1 is more than one character long, |
8038
|
|
|
|
|
|
|
# set a flag indicating that it needs to be split. |
8039
|
9
|
100
|
|
|
|
37
|
$id_scan_state = |
8040
|
|
|
|
|
|
|
( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING; |
8041
|
|
|
|
|
|
|
} |
8042
|
|
|
|
|
|
|
else { |
8043
|
|
|
|
|
|
|
|
8044
|
|
|
|
|
|
|
# it is just $^ |
8045
|
|
|
|
|
|
|
# Simple test case (c065): '$aa=$^if($bb)'; |
8046
|
2
|
|
|
|
|
9
|
$id_scan_state = EMPTY_STRING; |
8047
|
|
|
|
|
|
|
} |
8048
|
|
|
|
|
|
|
} |
8049
|
|
|
|
|
|
|
else { |
8050
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8051
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8052
|
|
|
|
|
|
|
} |
8053
|
|
|
|
|
|
|
} |
8054
|
|
|
|
|
|
|
else { # something else |
8055
|
|
|
|
|
|
|
|
8056
|
46
|
100
|
66
|
|
|
438
|
if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8057
|
|
|
|
|
|
|
|
8058
|
|
|
|
|
|
|
# We might be in an extrusion of |
8059
|
|
|
|
|
|
|
# sub foo2 ( $first, $, $third ) { |
8060
|
|
|
|
|
|
|
# looking at a line starting with a comma, like |
8061
|
|
|
|
|
|
|
# $ |
8062
|
|
|
|
|
|
|
# , |
8063
|
|
|
|
|
|
|
# in this case the comma ends the signature variable |
8064
|
|
|
|
|
|
|
# '$' which will have been previously marked type 't' |
8065
|
|
|
|
|
|
|
# rather than 'i'. |
8066
|
3
|
100
|
|
|
|
10
|
if ( $i == $i_begin ) { |
8067
|
1
|
|
|
|
|
3
|
$identifier = EMPTY_STRING; |
8068
|
1
|
|
|
|
|
4
|
$type = EMPTY_STRING; |
8069
|
|
|
|
|
|
|
} |
8070
|
|
|
|
|
|
|
|
8071
|
|
|
|
|
|
|
# at a # we have to mark as type 't' because more may |
8072
|
|
|
|
|
|
|
# follow, otherwise, in a signature we can let '$' be an |
8073
|
|
|
|
|
|
|
# identifier here for better formatting. |
8074
|
|
|
|
|
|
|
# See 'mangle4.in' for a test case. |
8075
|
|
|
|
|
|
|
else { |
8076
|
2
|
|
|
|
|
5
|
$type = 'i'; |
8077
|
2
|
50
|
33
|
|
|
14
|
if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) { |
8078
|
0
|
|
|
|
|
0
|
$type = 't'; |
8079
|
|
|
|
|
|
|
} |
8080
|
2
|
|
|
|
|
3
|
$i = $i_save; |
8081
|
|
|
|
|
|
|
} |
8082
|
3
|
|
|
|
|
6
|
$id_scan_state = EMPTY_STRING; |
8083
|
|
|
|
|
|
|
} |
8084
|
|
|
|
|
|
|
|
8085
|
|
|
|
|
|
|
# check for various punctuation variables |
8086
|
|
|
|
|
|
|
elsif ( $identifier =~ /^[\$\*\@\%]$/ ) { |
8087
|
35
|
|
|
|
|
114
|
$identifier .= $tok; |
8088
|
|
|
|
|
|
|
} |
8089
|
|
|
|
|
|
|
|
8090
|
|
|
|
|
|
|
# POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#* |
8091
|
|
|
|
|
|
|
elsif ($tok eq '*' |
8092
|
|
|
|
|
|
|
&& $identifier =~ /\-\>([\@\%\$\*\&]|\$\#)$/ ) |
8093
|
|
|
|
|
|
|
{ |
8094
|
6
|
|
|
|
|
14
|
$identifier .= $tok; |
8095
|
|
|
|
|
|
|
} |
8096
|
|
|
|
|
|
|
|
8097
|
|
|
|
|
|
|
elsif ( $identifier eq '$#' ) { |
8098
|
|
|
|
|
|
|
|
8099
|
2
|
50
|
|
|
|
12
|
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } |
|
0
|
50
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
8100
|
|
|
|
|
|
|
|
8101
|
|
|
|
|
|
|
# perl seems to allow just these: $#: $#- $#+ |
8102
|
|
|
|
|
|
|
elsif ( $tok =~ /^[\:\-\+]$/ ) { |
8103
|
0
|
|
|
|
|
0
|
$type = 'i'; |
8104
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8105
|
|
|
|
|
|
|
} |
8106
|
|
|
|
|
|
|
else { |
8107
|
2
|
|
|
|
|
5
|
$i = $i_save; |
8108
|
2
|
|
|
|
|
55
|
$self->write_logfile_entry( |
8109
|
|
|
|
|
|
|
'Use of $# is deprecated' . "\n" ); |
8110
|
|
|
|
|
|
|
} |
8111
|
|
|
|
|
|
|
} |
8112
|
|
|
|
|
|
|
elsif ( $identifier eq '$$' ) { |
8113
|
|
|
|
|
|
|
|
8114
|
|
|
|
|
|
|
# perl does not allow references to punctuation |
8115
|
|
|
|
|
|
|
# variables without braces. For example, this |
8116
|
|
|
|
|
|
|
# won't work: |
8117
|
|
|
|
|
|
|
# $:=\4; |
8118
|
|
|
|
|
|
|
# $a = $$:; |
8119
|
|
|
|
|
|
|
# You would have to use |
8120
|
|
|
|
|
|
|
# $a = ${$:}; |
8121
|
|
|
|
|
|
|
|
8122
|
|
|
|
|
|
|
# '$$' alone is punctuation variable for PID |
8123
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8124
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '{' ) { $type = 't' } |
|
0
|
|
|
|
|
0
|
|
8125
|
0
|
|
|
|
|
0
|
else { $type = 'i' } |
8126
|
|
|
|
|
|
|
} |
8127
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
8128
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8129
|
|
|
|
|
|
|
} |
8130
|
|
|
|
|
|
|
else { |
8131
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8132
|
0
|
0
|
|
|
|
0
|
if ( length($identifier) == 1 ) { |
8133
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
8134
|
|
|
|
|
|
|
} |
8135
|
|
|
|
|
|
|
} |
8136
|
46
|
|
|
|
|
118
|
$id_scan_state = EMPTY_STRING; |
8137
|
|
|
|
|
|
|
} |
8138
|
514
|
|
|
|
|
948
|
return; |
8139
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_dollar |
8140
|
|
|
|
|
|
|
|
8141
|
|
|
|
|
|
|
sub do_id_scan_state_alpha { |
8142
|
|
|
|
|
|
|
|
8143
|
113
|
|
|
113
|
0
|
241
|
my $self = shift; |
8144
|
|
|
|
|
|
|
|
8145
|
|
|
|
|
|
|
# looking for alphanumeric after :: |
8146
|
113
|
|
|
|
|
438
|
$tok_is_blank = $tok =~ /^\s*$/; |
8147
|
|
|
|
|
|
|
|
8148
|
113
|
100
|
33
|
|
|
478
|
if ( $tok =~ /^\w/ ) { # found it |
|
|
50
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
50
|
|
|
|
|
|
8149
|
100
|
|
|
|
|
198
|
$identifier .= $tok; |
8150
|
100
|
|
|
|
|
189
|
$id_scan_state = $scan_state_COLON; # now need :: |
8151
|
100
|
|
|
|
|
162
|
$saw_alpha = 1; |
8152
|
|
|
|
|
|
|
} |
8153
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { |
8154
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8155
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
8156
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8157
|
|
|
|
|
|
|
} |
8158
|
|
|
|
|
|
|
elsif ( $tok_is_blank && $identifier =~ /^sub / ) { |
8159
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_LPAREN; |
8160
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8161
|
|
|
|
|
|
|
} |
8162
|
|
|
|
|
|
|
elsif ( $tok eq '(' && $identifier =~ /^sub / ) { |
8163
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; |
8164
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8165
|
|
|
|
|
|
|
} |
8166
|
|
|
|
|
|
|
else { |
8167
|
13
|
|
|
|
|
26
|
$id_scan_state = EMPTY_STRING; |
8168
|
13
|
|
|
|
|
28
|
$i = $i_save; |
8169
|
|
|
|
|
|
|
} |
8170
|
113
|
|
|
|
|
197
|
return; |
8171
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_alpha |
8172
|
|
|
|
|
|
|
|
8173
|
|
|
|
|
|
|
sub do_id_scan_state_colon { |
8174
|
|
|
|
|
|
|
|
8175
|
434
|
|
|
434
|
0
|
874
|
my $self = shift; |
8176
|
|
|
|
|
|
|
|
8177
|
|
|
|
|
|
|
# looking for possible :: after alphanumeric |
8178
|
|
|
|
|
|
|
|
8179
|
434
|
|
|
|
|
1592
|
$tok_is_blank = $tok =~ /^\s*$/; |
8180
|
|
|
|
|
|
|
|
8181
|
434
|
100
|
66
|
|
|
3552
|
if ( $tok eq '::' ) { # got it |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
8182
|
97
|
|
|
|
|
185
|
$identifier .= $tok; |
8183
|
97
|
|
|
|
|
182
|
$id_scan_state = $scan_state_ALPHA; # now require alpha |
8184
|
|
|
|
|
|
|
} |
8185
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { # more alphanumeric is ok here |
8186
|
20
|
|
|
|
|
47
|
$identifier .= $tok; |
8187
|
20
|
|
|
|
|
46
|
$id_scan_state = $scan_state_COLON; # now need :: |
8188
|
20
|
|
|
|
|
41
|
$saw_alpha = 1; |
8189
|
|
|
|
|
|
|
} |
8190
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # tick |
8191
|
|
|
|
|
|
|
|
8192
|
12
|
50
|
|
|
|
29
|
if ( $is_keyword{$identifier} ) { |
8193
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # that's all |
8194
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8195
|
|
|
|
|
|
|
} |
8196
|
|
|
|
|
|
|
else { |
8197
|
12
|
|
|
|
|
19
|
$identifier .= $tok; |
8198
|
|
|
|
|
|
|
} |
8199
|
|
|
|
|
|
|
} |
8200
|
|
|
|
|
|
|
elsif ( $tok_is_blank && $identifier =~ /^sub / ) { |
8201
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_LPAREN; |
8202
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8203
|
|
|
|
|
|
|
} |
8204
|
|
|
|
|
|
|
elsif ( $tok eq '(' && $identifier =~ /^sub / ) { |
8205
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; |
8206
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8207
|
|
|
|
|
|
|
} |
8208
|
|
|
|
|
|
|
else { |
8209
|
305
|
|
|
|
|
557
|
$id_scan_state = EMPTY_STRING; # that's all |
8210
|
305
|
|
|
|
|
533
|
$i = $i_save; |
8211
|
|
|
|
|
|
|
} |
8212
|
434
|
|
|
|
|
709
|
return; |
8213
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_colon |
8214
|
|
|
|
|
|
|
|
8215
|
|
|
|
|
|
|
sub do_id_scan_state_left_paren { |
8216
|
|
|
|
|
|
|
|
8217
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
8218
|
|
|
|
|
|
|
|
8219
|
|
|
|
|
|
|
# looking for possible '(' of a prototype |
8220
|
|
|
|
|
|
|
|
8221
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '(' ) { # got it |
|
|
0
|
|
|
|
|
|
8222
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8223
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; # now find the end of it |
8224
|
|
|
|
|
|
|
} |
8225
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { # blank - keep going |
8226
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8227
|
0
|
|
|
|
|
0
|
$tok_is_blank = 1; |
8228
|
|
|
|
|
|
|
} |
8229
|
|
|
|
|
|
|
else { |
8230
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # that's all - no prototype |
8231
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8232
|
|
|
|
|
|
|
} |
8233
|
0
|
|
|
|
|
0
|
return; |
8234
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_left_paren |
8235
|
|
|
|
|
|
|
|
8236
|
|
|
|
|
|
|
sub do_id_scan_state_right_paren { |
8237
|
|
|
|
|
|
|
|
8238
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
8239
|
|
|
|
|
|
|
|
8240
|
|
|
|
|
|
|
# looking for a ')' of prototype to close a '(' |
8241
|
|
|
|
|
|
|
|
8242
|
0
|
|
|
|
|
0
|
$tok_is_blank = $tok =~ /^\s*$/; |
8243
|
|
|
|
|
|
|
|
8244
|
0
|
0
|
|
|
|
0
|
if ( $tok eq ')' ) { # got it |
|
|
0
|
|
|
|
|
|
8245
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8246
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # all done |
8247
|
|
|
|
|
|
|
} |
8248
|
|
|
|
|
|
|
elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { |
8249
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8250
|
|
|
|
|
|
|
} |
8251
|
|
|
|
|
|
|
else { # probable error in script, but keep going |
8252
|
0
|
|
|
|
|
0
|
warning("Unexpected '$tok' while seeking end of prototype\n"); |
8253
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8254
|
|
|
|
|
|
|
} |
8255
|
0
|
|
|
|
|
0
|
return; |
8256
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_right_paren |
8257
|
|
|
|
|
|
|
|
8258
|
|
|
|
|
|
|
sub do_id_scan_state_ampersand { |
8259
|
|
|
|
|
|
|
|
8260
|
105
|
|
|
105
|
0
|
297
|
my $self = shift; |
8261
|
|
|
|
|
|
|
|
8262
|
|
|
|
|
|
|
# Starting sub call after seeing an '&' |
8263
|
|
|
|
|
|
|
|
8264
|
105
|
100
|
33
|
|
|
686
|
if ( $tok =~ /^[\$\w]/ ) { # alphanumeric .. |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8265
|
88
|
|
|
|
|
229
|
$id_scan_state = $scan_state_COLON; # now need :: |
8266
|
88
|
|
|
|
|
172
|
$saw_alpha = 1; |
8267
|
88
|
|
|
|
|
245
|
$identifier .= $tok; |
8268
|
|
|
|
|
|
|
} |
8269
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. |
8270
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
8271
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8272
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8273
|
|
|
|
|
|
|
} |
8274
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { # allow space |
8275
|
2
|
|
|
|
|
4
|
$tok_is_blank = 1; |
8276
|
|
|
|
|
|
|
|
8277
|
|
|
|
|
|
|
# fix c139: trim line-ending type 't' |
8278
|
2
|
50
|
33
|
|
|
11
|
if ( length($identifier) == 1 && $i == $max_token_index ) { |
8279
|
2
|
|
|
|
|
4
|
$i = $i_save; |
8280
|
2
|
|
|
|
|
4
|
$type = 't'; |
8281
|
|
|
|
|
|
|
} |
8282
|
|
|
|
|
|
|
} |
8283
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { # leading :: |
8284
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_ALPHA; # accept alpha next |
8285
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8286
|
|
|
|
|
|
|
} |
8287
|
|
|
|
|
|
|
elsif ( $tok eq '{' ) { |
8288
|
15
|
50
|
33
|
|
|
75
|
if ( $identifier eq '&' || $i == 0 ) { |
8289
|
15
|
|
|
|
|
35
|
$identifier = EMPTY_STRING; |
8290
|
|
|
|
|
|
|
} |
8291
|
15
|
|
|
|
|
33
|
$i = $i_save; |
8292
|
15
|
|
|
|
|
34
|
$id_scan_state = EMPTY_STRING; |
8293
|
|
|
|
|
|
|
} |
8294
|
|
|
|
|
|
|
elsif ( $tok eq '^' ) { |
8295
|
0
|
0
|
|
|
|
0
|
if ( $identifier eq '&' ) { |
8296
|
|
|
|
|
|
|
|
8297
|
|
|
|
|
|
|
# Special variable (c066) |
8298
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8299
|
0
|
|
|
|
|
0
|
$type = '&'; |
8300
|
|
|
|
|
|
|
|
8301
|
|
|
|
|
|
|
# There may be one more character, not a space, after the ^ |
8302
|
0
|
|
|
|
|
0
|
my $next1 = $rtokens->[ $i + 1 ]; |
8303
|
0
|
|
|
|
|
0
|
my $chr = substr( $next1, 0, 1 ); |
8304
|
0
|
0
|
|
|
|
0
|
if ( $is_special_variable_char{$chr} ) { |
8305
|
|
|
|
|
|
|
|
8306
|
|
|
|
|
|
|
# It is something like &^O |
8307
|
0
|
|
|
|
|
0
|
$i++; |
8308
|
0
|
|
|
|
|
0
|
$identifier .= $next1; |
8309
|
|
|
|
|
|
|
|
8310
|
|
|
|
|
|
|
# If pretoken $next1 is more than one character long, |
8311
|
|
|
|
|
|
|
# set a flag indicating that it needs to be split. |
8312
|
0
|
0
|
|
|
|
0
|
$id_scan_state = |
8313
|
|
|
|
|
|
|
( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING; |
8314
|
|
|
|
|
|
|
} |
8315
|
|
|
|
|
|
|
else { |
8316
|
|
|
|
|
|
|
|
8317
|
|
|
|
|
|
|
# it is &^ |
8318
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8319
|
|
|
|
|
|
|
} |
8320
|
|
|
|
|
|
|
} |
8321
|
|
|
|
|
|
|
else { |
8322
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
8323
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8324
|
|
|
|
|
|
|
} |
8325
|
|
|
|
|
|
|
} |
8326
|
|
|
|
|
|
|
else { |
8327
|
|
|
|
|
|
|
|
8328
|
|
|
|
|
|
|
# punctuation variable? |
8329
|
|
|
|
|
|
|
# testfile: cunningham4.pl |
8330
|
|
|
|
|
|
|
# |
8331
|
|
|
|
|
|
|
# We have to be careful here. If we are in an unknown state, |
8332
|
|
|
|
|
|
|
# we will reject the punctuation variable. In the following |
8333
|
|
|
|
|
|
|
# example the '&' is a binary operator but we are in an unknown |
8334
|
|
|
|
|
|
|
# state because there is no sigil on 'Prima', so we don't |
8335
|
|
|
|
|
|
|
# know what it is. But it is a bad guess that |
8336
|
|
|
|
|
|
|
# '&~' is a function variable. |
8337
|
|
|
|
|
|
|
# $self->{text}->{colorMap}->[ |
8338
|
|
|
|
|
|
|
# Prima::PodView::COLOR_CODE_FOREGROUND |
8339
|
|
|
|
|
|
|
# & ~tb::COLOR_INDEX ] = |
8340
|
|
|
|
|
|
|
# $sec->{ColorCode} |
8341
|
|
|
|
|
|
|
|
8342
|
|
|
|
|
|
|
# Fix for case c033: a '#' here starts a side comment |
8343
|
0
|
0
|
0
|
|
|
0
|
if ( $identifier eq '&' && $expecting && $tok ne '#' ) { |
|
|
|
0
|
|
|
|
|
8344
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8345
|
|
|
|
|
|
|
} |
8346
|
|
|
|
|
|
|
else { |
8347
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
8348
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8349
|
0
|
|
|
|
|
0
|
$type = '&'; |
8350
|
|
|
|
|
|
|
} |
8351
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8352
|
|
|
|
|
|
|
} |
8353
|
105
|
|
|
|
|
221
|
return; |
8354
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_ampersand |
8355
|
|
|
|
|
|
|
|
8356
|
|
|
|
|
|
|
#------------------- |
8357
|
|
|
|
|
|
|
# hash of scanner subs |
8358
|
|
|
|
|
|
|
#------------------- |
8359
|
|
|
|
|
|
|
my $scan_identifier_code = { |
8360
|
|
|
|
|
|
|
$scan_state_SIGIL => \&do_id_scan_state_dollar, |
8361
|
|
|
|
|
|
|
$scan_state_ALPHA => \&do_id_scan_state_alpha, |
8362
|
|
|
|
|
|
|
$scan_state_COLON => \&do_id_scan_state_colon, |
8363
|
|
|
|
|
|
|
$scan_state_LPAREN => \&do_id_scan_state_left_paren, |
8364
|
|
|
|
|
|
|
$scan_state_RPAREN => \&do_id_scan_state_right_paren, |
8365
|
|
|
|
|
|
|
$scan_state_AMPERSAND => \&do_id_scan_state_ampersand, |
8366
|
|
|
|
|
|
|
}; |
8367
|
|
|
|
|
|
|
|
8368
|
|
|
|
|
|
|
sub scan_complex_identifier { |
8369
|
|
|
|
|
|
|
|
8370
|
|
|
|
|
|
|
# This routine assembles tokens into identifiers. It maintains a |
8371
|
|
|
|
|
|
|
# scan state, id_scan_state. It updates id_scan_state based upon |
8372
|
|
|
|
|
|
|
# current id_scan_state and token, and returns an updated |
8373
|
|
|
|
|
|
|
# id_scan_state and the next index after the identifier. |
8374
|
|
|
|
|
|
|
|
8375
|
|
|
|
|
|
|
# This routine now serves a a backup for sub scan_simple_identifier |
8376
|
|
|
|
|
|
|
# which handles most identifiers. |
8377
|
|
|
|
|
|
|
|
8378
|
|
|
|
|
|
|
# Note that $self must be a 'my' variable and not be a closure |
8379
|
|
|
|
|
|
|
# variables like the other args. Otherwise it will not get |
8380
|
|
|
|
|
|
|
# deleted by a DESTROY call at the end of a file. Then an |
8381
|
|
|
|
|
|
|
# attempt to create multiple tokenizers can occur when multiple |
8382
|
|
|
|
|
|
|
# files are processed, causing an error. |
8383
|
|
|
|
|
|
|
|
8384
|
|
|
|
|
|
|
( |
8385
|
486
|
|
|
486
|
0
|
1755
|
my $self, $i, $id_scan_state, $identifier, $rtokens, |
8386
|
|
|
|
|
|
|
$max_token_index, $expecting, $container_type |
8387
|
|
|
|
|
|
|
) = @_; |
8388
|
|
|
|
|
|
|
|
8389
|
|
|
|
|
|
|
# return flag telling caller to split the pretoken |
8390
|
486
|
|
|
|
|
2543
|
my $split_pretoken_flag; |
8391
|
|
|
|
|
|
|
|
8392
|
|
|
|
|
|
|
#------------------- |
8393
|
|
|
|
|
|
|
# Initialize my vars |
8394
|
|
|
|
|
|
|
#------------------- |
8395
|
|
|
|
|
|
|
|
8396
|
486
|
|
|
|
|
1676
|
initialize_my_scan_id_vars(); |
8397
|
|
|
|
|
|
|
|
8398
|
|
|
|
|
|
|
#-------------------------------------------------------- |
8399
|
|
|
|
|
|
|
# get started by defining a type and a state if necessary |
8400
|
|
|
|
|
|
|
#-------------------------------------------------------- |
8401
|
|
|
|
|
|
|
|
8402
|
486
|
100
|
|
|
|
1560
|
if ( !$id_scan_state ) { |
8403
|
479
|
|
|
|
|
861
|
$context = UNKNOWN_CONTEXT; |
8404
|
|
|
|
|
|
|
|
8405
|
|
|
|
|
|
|
# fixup for digraph |
8406
|
479
|
50
|
|
|
|
1369
|
if ( $tok eq '>' ) { |
8407
|
0
|
|
|
|
|
0
|
$tok = '->'; |
8408
|
0
|
|
|
|
|
0
|
$tok_begin = $tok; |
8409
|
|
|
|
|
|
|
} |
8410
|
479
|
|
|
|
|
861
|
$identifier = $tok; |
8411
|
|
|
|
|
|
|
|
8412
|
479
|
100
|
100
|
|
|
3211
|
if ( $last_nonblank_token eq '->' ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8413
|
6
|
|
|
|
|
14
|
$identifier = '->' . $identifier; |
8414
|
6
|
|
|
|
|
13
|
$id_scan_state = $scan_state_SIGIL; |
8415
|
|
|
|
|
|
|
} |
8416
|
|
|
|
|
|
|
elsif ( $tok eq '$' || $tok eq '*' ) { |
8417
|
293
|
|
|
|
|
560
|
$id_scan_state = $scan_state_SIGIL; |
8418
|
293
|
|
|
|
|
507
|
$context = SCALAR_CONTEXT; |
8419
|
|
|
|
|
|
|
} |
8420
|
|
|
|
|
|
|
elsif ( $tok eq '%' || $tok eq '@' ) { |
8421
|
77
|
|
|
|
|
185
|
$id_scan_state = $scan_state_SIGIL; |
8422
|
77
|
|
|
|
|
149
|
$context = LIST_CONTEXT; |
8423
|
|
|
|
|
|
|
} |
8424
|
|
|
|
|
|
|
elsif ( $tok eq '&' ) { |
8425
|
103
|
|
|
|
|
231
|
$id_scan_state = $scan_state_AMPERSAND; |
8426
|
|
|
|
|
|
|
} |
8427
|
|
|
|
|
|
|
elsif ( $tok eq 'sub' or $tok eq 'package' ) { |
8428
|
0
|
|
|
|
|
0
|
$saw_alpha = 0; # 'sub' is considered type info here |
8429
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_SIGIL; |
8430
|
0
|
|
|
|
|
0
|
$identifier .= |
8431
|
|
|
|
|
|
|
SPACE; # need a space to separate sub from sub name |
8432
|
|
|
|
|
|
|
} |
8433
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { |
8434
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_ALPHA; |
8435
|
|
|
|
|
|
|
} |
8436
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { |
8437
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; |
8438
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8439
|
|
|
|
|
|
|
} |
8440
|
|
|
|
|
|
|
elsif ( $tok eq '->' ) { |
8441
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_SIGIL; |
8442
|
|
|
|
|
|
|
} |
8443
|
|
|
|
|
|
|
else { |
8444
|
|
|
|
|
|
|
|
8445
|
|
|
|
|
|
|
# shouldn't happen: bad call parameter |
8446
|
0
|
|
|
|
|
0
|
my $msg = |
8447
|
|
|
|
|
|
|
"Program bug detected: scan_complex_identifier received bad starting token = '$tok'\n"; |
8448
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { $self->Fault($msg) } |
8449
|
0
|
0
|
|
|
|
0
|
if ( !$self->[_in_error_] ) { |
8450
|
0
|
|
|
|
|
0
|
warning($msg); |
8451
|
0
|
|
|
|
|
0
|
$self->[_in_error_] = 1; |
8452
|
|
|
|
|
|
|
} |
8453
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8454
|
|
|
|
|
|
|
|
8455
|
|
|
|
|
|
|
# emergency return |
8456
|
0
|
|
|
|
|
0
|
goto RETURN; |
8457
|
|
|
|
|
|
|
} |
8458
|
479
|
|
|
|
|
960
|
$saw_type = !$saw_alpha; |
8459
|
|
|
|
|
|
|
} |
8460
|
|
|
|
|
|
|
else { |
8461
|
7
|
|
|
|
|
15
|
$i--; |
8462
|
7
|
|
|
|
|
33
|
$saw_alpha = ( $tok =~ /^\w/ ); |
8463
|
7
|
|
|
|
|
21
|
$saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); |
8464
|
|
|
|
|
|
|
|
8465
|
|
|
|
|
|
|
# check for a valid starting state |
8466
|
7
|
|
|
|
|
12
|
if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) { |
8467
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
8468
|
|
|
|
|
|
|
Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state' |
8469
|
|
|
|
|
|
|
EOM |
8470
|
|
|
|
|
|
|
} |
8471
|
|
|
|
|
|
|
} |
8472
|
|
|
|
|
|
|
|
8473
|
|
|
|
|
|
|
#------------------------------ |
8474
|
|
|
|
|
|
|
# loop to gather the identifier |
8475
|
|
|
|
|
|
|
#------------------------------ |
8476
|
|
|
|
|
|
|
|
8477
|
486
|
|
|
|
|
868
|
$i_save = $i; |
8478
|
|
|
|
|
|
|
|
8479
|
486
|
|
100
|
|
|
2159
|
while ( $i < $max_token_index && $id_scan_state ) { |
8480
|
|
|
|
|
|
|
|
8481
|
|
|
|
|
|
|
# Be sure we have code to handle this state before we proceed |
8482
|
1169
|
|
|
|
|
2427
|
my $code = $scan_identifier_code->{$id_scan_state}; |
8483
|
1169
|
100
|
|
|
|
2474
|
if ( !$code ) { |
8484
|
|
|
|
|
|
|
|
8485
|
3
|
50
|
|
|
|
16
|
if ( $id_scan_state eq $scan_state_SPLIT ) { |
8486
|
|
|
|
|
|
|
## OK: this is the signal to exit and split the pretoken |
8487
|
|
|
|
|
|
|
} |
8488
|
|
|
|
|
|
|
|
8489
|
|
|
|
|
|
|
# unknown state - should not happen |
8490
|
|
|
|
|
|
|
else { |
8491
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
8492
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
8493
|
|
|
|
|
|
|
Unknown scan state in sub scan_complex_identifier: '$id_scan_state' |
8494
|
|
|
|
|
|
|
Scan state at sub entry was '$id_scan_state_begin' |
8495
|
|
|
|
|
|
|
EOM |
8496
|
|
|
|
|
|
|
} |
8497
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8498
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8499
|
|
|
|
|
|
|
} |
8500
|
3
|
|
|
|
|
7
|
last; |
8501
|
|
|
|
|
|
|
} |
8502
|
|
|
|
|
|
|
|
8503
|
|
|
|
|
|
|
# Remember the starting index for progress check below |
8504
|
1166
|
|
|
|
|
2684
|
my $i_start_loop = $i; |
8505
|
|
|
|
|
|
|
|
8506
|
1166
|
|
|
|
|
1749
|
$last_tok_is_blank = $tok_is_blank; |
8507
|
1166
|
100
|
|
|
|
2049
|
if ($tok_is_blank) { $tok_is_blank = undef } |
|
11
|
|
|
|
|
25
|
|
8508
|
1155
|
|
|
|
|
1594
|
else { $i_save = $i } |
8509
|
|
|
|
|
|
|
|
8510
|
1166
|
|
|
|
|
2131
|
$tok = $rtokens->[ ++$i ]; |
8511
|
|
|
|
|
|
|
|
8512
|
|
|
|
|
|
|
# patch to make digraph :: if necessary |
8513
|
1166
|
100
|
100
|
|
|
3165
|
if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) { |
8514
|
113
|
|
|
|
|
237
|
$tok = '::'; |
8515
|
113
|
|
|
|
|
193
|
$i++; |
8516
|
|
|
|
|
|
|
} |
8517
|
|
|
|
|
|
|
|
8518
|
1166
|
|
|
|
|
3484
|
$code->($self); |
8519
|
|
|
|
|
|
|
|
8520
|
|
|
|
|
|
|
# check for forward progress: a decrease in the index $i |
8521
|
|
|
|
|
|
|
# implies that scanning has finished |
8522
|
1166
|
100
|
|
|
|
4026
|
last if ( $i <= $i_start_loop ); |
8523
|
|
|
|
|
|
|
|
8524
|
|
|
|
|
|
|
} ## end of main loop |
8525
|
|
|
|
|
|
|
|
8526
|
|
|
|
|
|
|
#------------- |
8527
|
|
|
|
|
|
|
# Check result |
8528
|
|
|
|
|
|
|
#------------- |
8529
|
|
|
|
|
|
|
|
8530
|
|
|
|
|
|
|
# Be sure a valid state is returned |
8531
|
486
|
100
|
|
|
|
1351
|
if ($id_scan_state) { |
8532
|
|
|
|
|
|
|
|
8533
|
20
|
100
|
|
|
|
73
|
if ( !$is_returnable_scan_state{$id_scan_state} ) { |
8534
|
|
|
|
|
|
|
|
8535
|
13
|
100
|
|
|
|
60
|
if ( $id_scan_state eq $scan_state_SPLIT ) { |
8536
|
3
|
|
|
|
|
7
|
$split_pretoken_flag = 1; |
8537
|
|
|
|
|
|
|
} |
8538
|
|
|
|
|
|
|
|
8539
|
13
|
50
|
|
|
|
50
|
if ( $id_scan_state eq $scan_state_RPAREN ) { |
8540
|
0
|
|
|
|
|
0
|
warning( |
8541
|
|
|
|
|
|
|
"Hit end of line while seeking ) to end prototype\n"); |
8542
|
|
|
|
|
|
|
} |
8543
|
|
|
|
|
|
|
|
8544
|
13
|
|
|
|
|
29
|
$id_scan_state = EMPTY_STRING; |
8545
|
|
|
|
|
|
|
} |
8546
|
|
|
|
|
|
|
|
8547
|
|
|
|
|
|
|
# Patch: the deprecated variable $# does not combine with anything |
8548
|
|
|
|
|
|
|
# on the next line. |
8549
|
20
|
50
|
|
|
|
64
|
if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING } |
|
0
|
|
|
|
|
0
|
|
8550
|
|
|
|
|
|
|
} |
8551
|
|
|
|
|
|
|
|
8552
|
|
|
|
|
|
|
# Be sure the token index is valid |
8553
|
486
|
50
|
|
|
|
1338
|
if ( $i < 0 ) { $i = 0 } |
|
0
|
|
|
|
|
0
|
|
8554
|
|
|
|
|
|
|
|
8555
|
|
|
|
|
|
|
# Be sure a token type is defined |
8556
|
486
|
100
|
|
|
|
1244
|
if ( !$type ) { |
8557
|
|
|
|
|
|
|
|
8558
|
458
|
100
|
|
|
|
1154
|
if ($saw_type) { |
|
|
100
|
|
|
|
|
|
8559
|
|
|
|
|
|
|
|
8560
|
452
|
100
|
33
|
|
|
2015
|
if ($saw_alpha) { |
|
|
50
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
8561
|
|
|
|
|
|
|
|
8562
|
|
|
|
|
|
|
# The type without the -> should be the same as with the -> so |
8563
|
|
|
|
|
|
|
# that if they get separated we get the same bond strengths, |
8564
|
|
|
|
|
|
|
# etc. See b1234 |
8565
|
348
|
50
|
33
|
|
|
1399
|
if ( $identifier =~ /^->/ |
|
|
|
33
|
|
|
|
|
8566
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
8567
|
|
|
|
|
|
|
&& substr( $identifier, 2, 1 ) =~ /^\w/ ) |
8568
|
|
|
|
|
|
|
{ |
8569
|
0
|
|
|
|
|
0
|
$type = 'w'; |
8570
|
|
|
|
|
|
|
} |
8571
|
348
|
|
|
|
|
701
|
else { $type = 'i' } |
8572
|
|
|
|
|
|
|
} |
8573
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
8574
|
0
|
|
|
|
|
0
|
$type = '->'; |
8575
|
|
|
|
|
|
|
} |
8576
|
|
|
|
|
|
|
elsif ( |
8577
|
|
|
|
|
|
|
( length($identifier) > 1 ) |
8578
|
|
|
|
|
|
|
|
8579
|
|
|
|
|
|
|
# In something like '@$=' we have an identifier '@$' |
8580
|
|
|
|
|
|
|
# In something like '$${' we have type '$$' (and only |
8581
|
|
|
|
|
|
|
# part of an identifier) |
8582
|
|
|
|
|
|
|
&& !( $identifier =~ /\$$/ && $tok eq '{' ) |
8583
|
|
|
|
|
|
|
|
8584
|
|
|
|
|
|
|
## && ( $identifier !~ /^(sub |package )$/ ) |
8585
|
|
|
|
|
|
|
&& $identifier ne 'sub ' |
8586
|
|
|
|
|
|
|
&& $identifier ne 'package ' |
8587
|
|
|
|
|
|
|
) |
8588
|
|
|
|
|
|
|
{ |
8589
|
53
|
|
|
|
|
153
|
$type = 'i'; |
8590
|
|
|
|
|
|
|
} |
8591
|
51
|
|
|
|
|
113
|
else { $type = 't' } |
8592
|
|
|
|
|
|
|
} |
8593
|
|
|
|
|
|
|
elsif ($saw_alpha) { |
8594
|
|
|
|
|
|
|
|
8595
|
|
|
|
|
|
|
# type 'w' includes anything without leading type info |
8596
|
|
|
|
|
|
|
# ($,%,@,*) including something like abc::def::ghi |
8597
|
5
|
|
|
|
|
13
|
$type = 'w'; |
8598
|
|
|
|
|
|
|
|
8599
|
|
|
|
|
|
|
# Fix for b1337, if restarting scan after line break between |
8600
|
|
|
|
|
|
|
# '->' or sigil and identifier name, use type 'i' |
8601
|
5
|
50
|
33
|
|
|
31
|
if ( $id_scan_state_begin |
8602
|
|
|
|
|
|
|
&& $identifier =~ /^([\$\%\@\*\&]|->)/ ) |
8603
|
|
|
|
|
|
|
{ |
8604
|
5
|
|
|
|
|
12
|
$type = 'i'; |
8605
|
|
|
|
|
|
|
} |
8606
|
|
|
|
|
|
|
} |
8607
|
|
|
|
|
|
|
else { |
8608
|
1
|
|
|
|
|
3
|
$type = EMPTY_STRING; |
8609
|
|
|
|
|
|
|
} # this can happen on a restart |
8610
|
|
|
|
|
|
|
} |
8611
|
|
|
|
|
|
|
|
8612
|
|
|
|
|
|
|
# See if we formed an identifier... |
8613
|
486
|
100
|
|
|
|
1192
|
if ($identifier) { |
8614
|
444
|
|
|
|
|
832
|
$tok = $identifier; |
8615
|
444
|
100
|
|
|
|
1117
|
if ($message) { $self->write_logfile_entry($message) } |
|
1
|
|
|
|
|
5
|
|
8616
|
|
|
|
|
|
|
} |
8617
|
|
|
|
|
|
|
|
8618
|
|
|
|
|
|
|
# did not find an identifier, back up |
8619
|
|
|
|
|
|
|
else { |
8620
|
42
|
|
|
|
|
83
|
$tok = $tok_begin; |
8621
|
42
|
|
|
|
|
69
|
$i = $i_begin; |
8622
|
|
|
|
|
|
|
} |
8623
|
|
|
|
|
|
|
|
8624
|
|
|
|
|
|
|
RETURN: |
8625
|
|
|
|
|
|
|
|
8626
|
486
|
|
|
|
|
736
|
DEBUG_SCAN_ID && do { |
8627
|
|
|
|
|
|
|
my ( $a, $b, $c ) = caller; |
8628
|
|
|
|
|
|
|
print {*STDOUT} |
8629
|
|
|
|
|
|
|
"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; |
8630
|
|
|
|
|
|
|
print {*STDOUT} |
8631
|
|
|
|
|
|
|
"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; |
8632
|
|
|
|
|
|
|
}; |
8633
|
486
|
|
|
|
|
2313
|
return ( $i, $tok, $type, $id_scan_state, $identifier, |
8634
|
|
|
|
|
|
|
$split_pretoken_flag ); |
8635
|
|
|
|
|
|
|
} ## end sub scan_complex_identifier |
8636
|
|
|
|
|
|
|
} ## end closure for sub scan_complex_identifier |
8637
|
|
|
|
|
|
|
|
8638
|
|
|
|
|
|
|
{ ## closure for sub do_scan_sub |
8639
|
|
|
|
|
|
|
|
8640
|
|
|
|
|
|
|
my %warn_if_lexical; |
8641
|
|
|
|
|
|
|
|
8642
|
|
|
|
|
|
|
BEGIN { |
8643
|
|
|
|
|
|
|
|
8644
|
|
|
|
|
|
|
# lexical subs with these names can cause parsing errors in this version |
8645
|
39
|
|
|
39
|
|
297
|
my @q = qw( m q qq qr qw qx s tr y ); |
8646
|
39
|
|
|
|
|
3456
|
@{warn_if_lexical}{@q} = (1) x scalar(@q); |
8647
|
|
|
|
|
|
|
} ## end BEGIN |
8648
|
|
|
|
|
|
|
|
8649
|
|
|
|
|
|
|
# saved package and subnames in case prototype is on separate line |
8650
|
|
|
|
|
|
|
my ( $package_saved, $subname_saved ); |
8651
|
|
|
|
|
|
|
|
8652
|
|
|
|
|
|
|
# initialize subname each time a new 'sub' keyword is encountered |
8653
|
|
|
|
|
|
|
sub initialize_subname { |
8654
|
296
|
|
|
296
|
0
|
650
|
$package_saved = EMPTY_STRING; |
8655
|
296
|
|
|
|
|
614
|
$subname_saved = EMPTY_STRING; |
8656
|
296
|
|
|
|
|
551
|
return; |
8657
|
|
|
|
|
|
|
} |
8658
|
|
|
|
|
|
|
|
8659
|
|
|
|
|
|
|
use constant { |
8660
|
39
|
|
|
|
|
91828
|
SUB_CALL => 1, |
8661
|
|
|
|
|
|
|
PAREN_CALL => 2, |
8662
|
|
|
|
|
|
|
PROTOTYPE_CALL => 3, |
8663
|
39
|
|
|
39
|
|
352
|
}; |
|
39
|
|
|
|
|
150
|
|
8664
|
|
|
|
|
|
|
|
8665
|
|
|
|
|
|
|
sub do_scan_sub { |
8666
|
|
|
|
|
|
|
|
8667
|
|
|
|
|
|
|
# do_scan_sub parses a sub name and prototype. |
8668
|
|
|
|
|
|
|
|
8669
|
|
|
|
|
|
|
# At present there are three basic CALL TYPES which are |
8670
|
|
|
|
|
|
|
# distinguished by the starting value of '$tok': |
8671
|
|
|
|
|
|
|
# 1. $tok='sub', id_scan_state='sub' |
8672
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of the first nonblank |
8673
|
|
|
|
|
|
|
# token following a 'sub' token. |
8674
|
|
|
|
|
|
|
# 2. $tok='(', id_scan_state='sub', |
8675
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of a '(' which may |
8676
|
|
|
|
|
|
|
# start a prototype. |
8677
|
|
|
|
|
|
|
# 3. $tok='prototype', id_scan_state='prototype' |
8678
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of a '(' which is |
8679
|
|
|
|
|
|
|
# preceded by ': prototype' and has $id_scan_state eq 'prototype' |
8680
|
|
|
|
|
|
|
|
8681
|
|
|
|
|
|
|
# Examples: |
8682
|
|
|
|
|
|
|
|
8683
|
|
|
|
|
|
|
# A single type 1 call will get both the sub and prototype |
8684
|
|
|
|
|
|
|
# sub foo1 ( $$ ) { } |
8685
|
|
|
|
|
|
|
# ^ |
8686
|
|
|
|
|
|
|
|
8687
|
|
|
|
|
|
|
# The subname will be obtained with a 'sub' call |
8688
|
|
|
|
|
|
|
# The prototype on line 2 will be obtained with a '(' call |
8689
|
|
|
|
|
|
|
# sub foo1 |
8690
|
|
|
|
|
|
|
# ^ <---call type 1 |
8691
|
|
|
|
|
|
|
# ( $$ ) { } |
8692
|
|
|
|
|
|
|
# ^ <---call type 2 |
8693
|
|
|
|
|
|
|
|
8694
|
|
|
|
|
|
|
# The subname will be obtained with a 'sub' call |
8695
|
|
|
|
|
|
|
# The prototype will be obtained with a 'prototype' call |
8696
|
|
|
|
|
|
|
# sub foo1 ( $x, $y ) : prototype ( $$ ) { } |
8697
|
|
|
|
|
|
|
# ^ <---type 1 ^ <---type 3 |
8698
|
|
|
|
|
|
|
|
8699
|
|
|
|
|
|
|
# TODO: add future error checks to be sure we have a valid |
8700
|
|
|
|
|
|
|
# sub name. For example, 'sub &doit' is wrong. Also, be sure |
8701
|
|
|
|
|
|
|
# a name is given if and only if a non-anonymous sub is |
8702
|
|
|
|
|
|
|
# appropriate. |
8703
|
|
|
|
|
|
|
# USES GLOBAL VARS: $current_package, $last_nonblank_token, |
8704
|
|
|
|
|
|
|
# $rsaw_function_definition, |
8705
|
|
|
|
|
|
|
# $statement_type |
8706
|
|
|
|
|
|
|
|
8707
|
302
|
|
|
302
|
0
|
813
|
my ( $self, $rinput_hash ) = @_; |
8708
|
|
|
|
|
|
|
|
8709
|
302
|
|
|
|
|
754
|
my $input_line = $rinput_hash->{input_line}; |
8710
|
302
|
|
|
|
|
698
|
my $i = $rinput_hash->{i}; |
8711
|
302
|
|
|
|
|
617
|
my $i_beg = $rinput_hash->{i_beg}; |
8712
|
302
|
|
|
|
|
647
|
my $tok = $rinput_hash->{tok}; |
8713
|
302
|
|
|
|
|
670
|
my $type = $rinput_hash->{type}; |
8714
|
302
|
|
|
|
|
591
|
my $rtokens = $rinput_hash->{rtokens}; |
8715
|
302
|
|
|
|
|
569
|
my $rtoken_map = $rinput_hash->{rtoken_map}; |
8716
|
302
|
|
|
|
|
581
|
my $id_scan_state = $rinput_hash->{id_scan_state}; |
8717
|
302
|
|
|
|
|
581
|
my $max_token_index = $rinput_hash->{max_token_index}; |
8718
|
|
|
|
|
|
|
|
8719
|
302
|
|
|
|
|
526
|
my $i_entry = $i; |
8720
|
|
|
|
|
|
|
|
8721
|
|
|
|
|
|
|
# Determine the CALL TYPE |
8722
|
|
|
|
|
|
|
# 1=sub |
8723
|
|
|
|
|
|
|
# 2=( |
8724
|
|
|
|
|
|
|
# 3=prototype |
8725
|
302
|
100
|
|
|
|
1095
|
my $call_type = |
|
|
100
|
|
|
|
|
|
8726
|
|
|
|
|
|
|
$tok eq 'prototype' ? PROTOTYPE_CALL |
8727
|
|
|
|
|
|
|
: $tok eq '(' ? PAREN_CALL |
8728
|
|
|
|
|
|
|
: SUB_CALL; |
8729
|
|
|
|
|
|
|
|
8730
|
302
|
|
|
|
|
1713
|
$id_scan_state = EMPTY_STRING; # normally we get everything in one call |
8731
|
302
|
|
|
|
|
600
|
my $subname = $subname_saved; |
8732
|
302
|
|
|
|
|
551
|
my $package = $package_saved; |
8733
|
302
|
|
|
|
|
642
|
my $proto = undef; |
8734
|
302
|
|
|
|
|
585
|
my $attrs = undef; |
8735
|
302
|
|
|
|
|
529
|
my $match; |
8736
|
|
|
|
|
|
|
|
8737
|
302
|
|
|
|
|
611
|
my $pos_beg = $rtoken_map->[$i_beg]; |
8738
|
302
|
|
|
|
|
1020
|
pos($input_line) = $pos_beg; |
8739
|
|
|
|
|
|
|
|
8740
|
|
|
|
|
|
|
# Look for the sub NAME if this is a SUB call |
8741
|
302
|
100
|
100
|
|
|
2812
|
if ( |
8742
|
|
|
|
|
|
|
$call_type == SUB_CALL |
8743
|
|
|
|
|
|
|
&& $input_line =~ m{\G\s* |
8744
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
8745
|
|
|
|
|
|
|
(\w+) # NAME - required |
8746
|
|
|
|
|
|
|
}gcx |
8747
|
|
|
|
|
|
|
) |
8748
|
|
|
|
|
|
|
{ |
8749
|
123
|
|
|
|
|
305
|
$match = 1; |
8750
|
123
|
|
|
|
|
339
|
$subname = $2; |
8751
|
|
|
|
|
|
|
|
8752
|
123
|
|
33
|
|
|
546
|
my $is_lexical_sub = |
8753
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' && $last_nonblank_token eq 'my'; |
8754
|
123
|
0
|
33
|
|
|
436
|
if ( $is_lexical_sub && $1 ) { |
8755
|
0
|
|
|
|
|
0
|
$self->warning("'my' sub $subname cannot be in package '$1'\n"); |
8756
|
0
|
|
|
|
|
0
|
$is_lexical_sub = 0; |
8757
|
|
|
|
|
|
|
} |
8758
|
|
|
|
|
|
|
|
8759
|
123
|
50
|
|
|
|
398
|
if ($is_lexical_sub) { |
8760
|
|
|
|
|
|
|
|
8761
|
|
|
|
|
|
|
# lexical subs use the block sequence number as a package name |
8762
|
0
|
|
|
|
|
0
|
my $seqno = |
8763
|
|
|
|
|
|
|
$rcurrent_sequence_number->[BRACE] |
8764
|
|
|
|
|
|
|
[ $rcurrent_depth->[BRACE] ]; |
8765
|
0
|
0
|
|
|
|
0
|
$seqno = 1 if ( !defined($seqno) ); |
8766
|
0
|
|
|
|
|
0
|
$package = $seqno; |
8767
|
0
|
0
|
|
|
|
0
|
if ( $warn_if_lexical{$subname} ) { |
8768
|
0
|
|
|
|
|
0
|
$self->warning( |
8769
|
|
|
|
|
|
|
"'my' sub '$subname' matches a builtin name and may not be handled correctly in this perltidy version.\n" |
8770
|
|
|
|
|
|
|
); |
8771
|
|
|
|
|
|
|
|
8772
|
|
|
|
|
|
|
# This may end badly, it is safest to block formatting |
8773
|
|
|
|
|
|
|
# For an example, see perl527/lexsub.t (issue c203) |
8774
|
0
|
|
|
|
|
0
|
$self->[_in_trouble_] = 1; |
8775
|
|
|
|
|
|
|
} |
8776
|
|
|
|
|
|
|
} |
8777
|
|
|
|
|
|
|
else { |
8778
|
123
|
100
|
66
|
|
|
841
|
$package = ( defined($1) && $1 ) ? $1 : $current_package; |
8779
|
123
|
|
|
|
|
437
|
$package =~ s/\'/::/g; |
8780
|
123
|
50
|
|
|
|
466
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
0
|
|
|
|
|
0
|
|
8781
|
123
|
|
|
|
|
356
|
$package =~ s/::$//; |
8782
|
|
|
|
|
|
|
} |
8783
|
|
|
|
|
|
|
|
8784
|
123
|
|
|
|
|
308
|
my $pos = pos($input_line); |
8785
|
123
|
|
|
|
|
284
|
my $numc = $pos - $pos_beg; |
8786
|
123
|
|
|
|
|
426
|
$tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); |
8787
|
123
|
|
|
|
|
270
|
$type = 'S'; ## Fix for c250, was 'i'; |
8788
|
|
|
|
|
|
|
|
8789
|
|
|
|
|
|
|
# remember the sub name in case another call is needed to |
8790
|
|
|
|
|
|
|
# get the prototype |
8791
|
123
|
|
|
|
|
272
|
$package_saved = $package; |
8792
|
123
|
|
|
|
|
303
|
$subname_saved = $subname; |
8793
|
|
|
|
|
|
|
} |
8794
|
|
|
|
|
|
|
|
8795
|
|
|
|
|
|
|
# Now look for PROTO ATTRS for all call types |
8796
|
|
|
|
|
|
|
# Look for prototype/attributes which are usually on the same |
8797
|
|
|
|
|
|
|
# line as the sub name but which might be on a separate line. |
8798
|
|
|
|
|
|
|
# For example, we might have an anonymous sub with attributes, |
8799
|
|
|
|
|
|
|
# or a prototype on a separate line from its sub name |
8800
|
|
|
|
|
|
|
|
8801
|
|
|
|
|
|
|
# NOTE: We only want to parse PROTOTYPES here. If we see anything that |
8802
|
|
|
|
|
|
|
# does not look like a prototype, we assume it is a SIGNATURE and we |
8803
|
|
|
|
|
|
|
# will stop and let the the standard tokenizer handle it. In |
8804
|
|
|
|
|
|
|
# particular, we stop if we see any nested parens, braces, or commas. |
8805
|
|
|
|
|
|
|
# Also note, a valid prototype cannot contain any alphabetic character |
8806
|
|
|
|
|
|
|
# -- see https://perldoc.perl.org/perlsub |
8807
|
|
|
|
|
|
|
# But it appears that an underscore is valid in a prototype, so the |
8808
|
|
|
|
|
|
|
# regex below uses [A-Za-z] rather than \w |
8809
|
|
|
|
|
|
|
# This is the old regex which has been replaced: |
8810
|
|
|
|
|
|
|
# $input_line =~ m/\G(\s*\([^\)\(\}\{\,#]*\))? # PROTO |
8811
|
302
|
|
|
|
|
1132
|
my $saw_opening_paren = $input_line =~ /\G\s*\(/; |
8812
|
302
|
100
|
100
|
|
|
3075
|
if ( |
|
|
|
66
|
|
|
|
|
8813
|
|
|
|
|
|
|
$input_line =~ m{\G(\s*\([^\)\(\}\{\,#A-Za-z]*\))? # PROTO |
8814
|
|
|
|
|
|
|
(\s*:)? # ATTRS leading ':' |
8815
|
|
|
|
|
|
|
}gcx |
8816
|
|
|
|
|
|
|
&& ( $1 || $2 ) |
8817
|
|
|
|
|
|
|
) |
8818
|
|
|
|
|
|
|
{ |
8819
|
45
|
|
|
|
|
148
|
$proto = $1; |
8820
|
45
|
|
|
|
|
111
|
$attrs = $2; |
8821
|
|
|
|
|
|
|
|
8822
|
|
|
|
|
|
|
# Append the prototype to the starting token if it is 'sub' or |
8823
|
|
|
|
|
|
|
# 'prototype'. This is not necessary but for compatibility with |
8824
|
|
|
|
|
|
|
# previous versions when the -csc flag is used: |
8825
|
45
|
100
|
100
|
|
|
299
|
if ( $proto && ( $match || $call_type == PROTOTYPE_CALL ) ) { |
|
|
100
|
100
|
|
|
|
|
8826
|
24
|
|
|
|
|
63
|
$tok .= $proto; |
8827
|
|
|
|
|
|
|
} |
8828
|
|
|
|
|
|
|
|
8829
|
|
|
|
|
|
|
# If we just entered the sub at an opening paren on this call, not |
8830
|
|
|
|
|
|
|
# a following :prototype, label it with the previous token. This is |
8831
|
|
|
|
|
|
|
# necessary to propagate the sub name to its opening block. |
8832
|
|
|
|
|
|
|
elsif ( $call_type == PAREN_CALL ) { |
8833
|
2
|
|
|
|
|
6
|
$tok = $last_nonblank_token; |
8834
|
|
|
|
|
|
|
} |
8835
|
|
|
|
|
|
|
else { |
8836
|
|
|
|
|
|
|
} |
8837
|
|
|
|
|
|
|
|
8838
|
45
|
|
100
|
|
|
164
|
$match ||= 1; |
8839
|
|
|
|
|
|
|
|
8840
|
|
|
|
|
|
|
# Patch part #1 to fixes cases b994 and b1053: |
8841
|
|
|
|
|
|
|
# Mark an anonymous sub keyword without prototype as type 'k', i.e. |
8842
|
|
|
|
|
|
|
# 'sub : lvalue { ...' |
8843
|
45
|
|
|
|
|
89
|
$type = 'S'; ## C250, was 'i'; |
8844
|
45
|
100
|
100
|
|
|
205
|
if ( $tok eq 'sub' && !$proto ) { $type = 'k' } |
|
2
|
|
|
|
|
4
|
|
8845
|
|
|
|
|
|
|
} |
8846
|
|
|
|
|
|
|
|
8847
|
302
|
100
|
|
|
|
937
|
if ($match) { |
8848
|
|
|
|
|
|
|
|
8849
|
|
|
|
|
|
|
# ATTRS: if there are attributes, back up and let the ':' be |
8850
|
|
|
|
|
|
|
# found later by the scanner. |
8851
|
138
|
|
|
|
|
319
|
my $pos = pos($input_line); |
8852
|
138
|
100
|
|
|
|
447
|
if ($attrs) { |
8853
|
15
|
|
|
|
|
31
|
$pos -= length($attrs); |
8854
|
|
|
|
|
|
|
} |
8855
|
|
|
|
|
|
|
|
8856
|
138
|
|
|
|
|
326
|
my $next_nonblank_token = $tok; |
8857
|
|
|
|
|
|
|
|
8858
|
|
|
|
|
|
|
# catch case of line with leading ATTR ':' after anonymous sub |
8859
|
138
|
100
|
100
|
|
|
605
|
if ( $pos == $pos_beg && $tok eq ':' ) { |
8860
|
1
|
|
|
|
|
2
|
$type = 'A'; |
8861
|
1
|
|
|
|
|
4
|
$self->[_in_attribute_list_] = 1; |
8862
|
|
|
|
|
|
|
} |
8863
|
|
|
|
|
|
|
|
8864
|
|
|
|
|
|
|
# Otherwise, if we found a match we must convert back from |
8865
|
|
|
|
|
|
|
# string position to the pre_token index for continued parsing. |
8866
|
|
|
|
|
|
|
else { |
8867
|
|
|
|
|
|
|
|
8868
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but ? |
8869
|
137
|
|
|
|
|
275
|
my $error; |
8870
|
137
|
|
|
|
|
636
|
( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, |
8871
|
|
|
|
|
|
|
$max_token_index ); |
8872
|
137
|
50
|
|
|
|
502
|
if ($error) { $self->warning("Possibly invalid sub\n") } |
|
0
|
|
|
|
|
0
|
|
8873
|
|
|
|
|
|
|
|
8874
|
|
|
|
|
|
|
# Patch part #2 to fixes cases b994 and b1053: |
8875
|
|
|
|
|
|
|
# Do not let spaces be part of the token of an anonymous sub |
8876
|
|
|
|
|
|
|
# keyword which we marked as type 'k' above...i.e. for |
8877
|
|
|
|
|
|
|
# something like: |
8878
|
|
|
|
|
|
|
# 'sub : lvalue { ...' |
8879
|
|
|
|
|
|
|
# Back up and let it be parsed as a blank |
8880
|
137
|
50
|
66
|
|
|
635
|
if ( $type eq 'k' |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
8881
|
|
|
|
|
|
|
&& $attrs |
8882
|
|
|
|
|
|
|
&& $i > $i_entry |
8883
|
|
|
|
|
|
|
&& substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ ) |
8884
|
|
|
|
|
|
|
{ |
8885
|
2
|
|
|
|
|
4
|
$i--; |
8886
|
|
|
|
|
|
|
} |
8887
|
|
|
|
|
|
|
|
8888
|
|
|
|
|
|
|
# check for multiple definitions of a sub |
8889
|
137
|
|
|
|
|
404
|
( $next_nonblank_token, my $i_next ) = |
8890
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, |
8891
|
|
|
|
|
|
|
$max_token_index ); |
8892
|
|
|
|
|
|
|
} |
8893
|
|
|
|
|
|
|
|
8894
|
138
|
100
|
|
|
|
812
|
if ( $next_nonblank_token =~ /^(\s*|#)$/ ) |
8895
|
|
|
|
|
|
|
{ # skip blank or side comment |
8896
|
7
|
|
|
|
|
81
|
my ( $rpre_tokens, $rpre_types ) = |
8897
|
|
|
|
|
|
|
$self->peek_ahead_for_n_nonblank_pre_tokens(1); |
8898
|
7
|
50
|
33
|
|
|
42
|
if ( defined($rpre_tokens) && @{$rpre_tokens} ) { |
|
7
|
|
|
|
|
30
|
|
8899
|
7
|
|
|
|
|
29
|
$next_nonblank_token = $rpre_tokens->[0]; |
8900
|
|
|
|
|
|
|
} |
8901
|
|
|
|
|
|
|
else { |
8902
|
0
|
|
|
|
|
0
|
$next_nonblank_token = '}'; |
8903
|
|
|
|
|
|
|
} |
8904
|
|
|
|
|
|
|
} |
8905
|
|
|
|
|
|
|
|
8906
|
|
|
|
|
|
|
# See what's next... |
8907
|
138
|
100
|
|
|
|
661
|
if ( $next_nonblank_token eq '{' ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8908
|
106
|
100
|
|
|
|
426
|
if ($subname) { |
8909
|
|
|
|
|
|
|
|
8910
|
|
|
|
|
|
|
# Check for multiple definitions of a sub, but |
8911
|
|
|
|
|
|
|
# it is ok to have multiple sub BEGIN, etc, |
8912
|
|
|
|
|
|
|
# so we do not complain if name is all caps |
8913
|
96
|
50
|
33
|
|
|
603
|
if ( $rsaw_function_definition->{$subname}{$package} |
8914
|
|
|
|
|
|
|
&& $subname !~ /^[A-Z]+$/ ) |
8915
|
|
|
|
|
|
|
{ |
8916
|
|
|
|
|
|
|
my $lno = |
8917
|
0
|
|
|
|
|
0
|
$rsaw_function_definition->{$subname}{$package}; |
8918
|
0
|
0
|
|
|
|
0
|
if ( $package =~ /^\d/ ) { |
8919
|
0
|
|
|
|
|
0
|
$self->warning( |
8920
|
|
|
|
|
|
|
"already saw definition of lexical 'sub $subname' at line $lno\n" |
8921
|
|
|
|
|
|
|
); |
8922
|
|
|
|
|
|
|
|
8923
|
|
|
|
|
|
|
} |
8924
|
|
|
|
|
|
|
else { |
8925
|
0
|
|
|
|
|
0
|
if ( !DEVEL_MODE ) { |
8926
|
0
|
|
|
|
|
0
|
$self->warning( |
8927
|
|
|
|
|
|
|
"already saw definition of 'sub $subname' in package '$package' at line $lno\n" |
8928
|
|
|
|
|
|
|
); |
8929
|
|
|
|
|
|
|
} |
8930
|
|
|
|
|
|
|
} |
8931
|
|
|
|
|
|
|
} |
8932
|
96
|
|
|
|
|
381
|
$rsaw_function_definition->{$subname}{$package} = |
8933
|
|
|
|
|
|
|
$self->[_last_line_number_]; |
8934
|
|
|
|
|
|
|
} |
8935
|
|
|
|
|
|
|
} |
8936
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq ';' ) { |
8937
|
|
|
|
|
|
|
} |
8938
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq '}' ) { |
8939
|
|
|
|
|
|
|
} |
8940
|
|
|
|
|
|
|
|
8941
|
|
|
|
|
|
|
# ATTRS - if an attribute list follows, remember the name |
8942
|
|
|
|
|
|
|
# of the sub so the next opening brace can be labeled. |
8943
|
|
|
|
|
|
|
# Setting 'statement_type' causes any ':'s to introduce |
8944
|
|
|
|
|
|
|
# attributes. |
8945
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq ':' ) { |
8946
|
16
|
100
|
|
|
|
52
|
if ( $call_type == SUB_CALL ) { |
8947
|
14
|
100
|
|
|
|
74
|
$statement_type = |
8948
|
|
|
|
|
|
|
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub'; |
8949
|
|
|
|
|
|
|
} |
8950
|
|
|
|
|
|
|
} |
8951
|
|
|
|
|
|
|
|
8952
|
|
|
|
|
|
|
# if we stopped before an open paren ... |
8953
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq '(' ) { |
8954
|
|
|
|
|
|
|
|
8955
|
|
|
|
|
|
|
# If we DID NOT see this paren above then it must be on the |
8956
|
|
|
|
|
|
|
# next line so we will set a flag to come back here and see if |
8957
|
|
|
|
|
|
|
# it is a PROTOTYPE |
8958
|
|
|
|
|
|
|
|
8959
|
|
|
|
|
|
|
# Otherwise, we assume it is a SIGNATURE rather than a |
8960
|
|
|
|
|
|
|
# PROTOTYPE and let the normal tokenizer handle it as a list |
8961
|
15
|
100
|
|
|
|
47
|
if ( !$saw_opening_paren ) { |
8962
|
4
|
|
|
|
|
15
|
$id_scan_state = 'sub'; # we must come back to get proto |
8963
|
|
|
|
|
|
|
} |
8964
|
15
|
50
|
|
|
|
64
|
if ( $call_type == SUB_CALL ) { |
8965
|
15
|
50
|
|
|
|
61
|
$statement_type = |
8966
|
|
|
|
|
|
|
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub'; |
8967
|
|
|
|
|
|
|
} |
8968
|
|
|
|
|
|
|
} |
8969
|
|
|
|
|
|
|
|
8970
|
|
|
|
|
|
|
# something else.. |
8971
|
|
|
|
|
|
|
elsif ($next_nonblank_token) { |
8972
|
|
|
|
|
|
|
|
8973
|
0
|
0
|
0
|
|
|
0
|
if ( $rinput_hash->{tok} eq 'method' && $call_type == SUB_CALL ) |
8974
|
|
|
|
|
|
|
{ |
8975
|
|
|
|
|
|
|
# For a method call, silently ignore this error (rt145706) |
8976
|
|
|
|
|
|
|
# to avoid needless warnings. Example which can produce it: |
8977
|
|
|
|
|
|
|
# test(method Pack (), "method"); |
8978
|
|
|
|
|
|
|
|
8979
|
|
|
|
|
|
|
# TODO: scan for use feature 'class' and: |
8980
|
|
|
|
|
|
|
# - if we saw 'use feature 'class' then issue the warning. |
8981
|
|
|
|
|
|
|
# - if we did not see use feature 'class' then issue the |
8982
|
|
|
|
|
|
|
# warning and suggest turning off --use-feature=class |
8983
|
|
|
|
|
|
|
} |
8984
|
|
|
|
|
|
|
else { |
8985
|
0
|
0
|
|
|
|
0
|
$subname = EMPTY_STRING unless defined($subname); |
8986
|
0
|
|
|
|
|
0
|
$self->warning( |
8987
|
|
|
|
|
|
|
"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" |
8988
|
|
|
|
|
|
|
); |
8989
|
|
|
|
|
|
|
} |
8990
|
|
|
|
|
|
|
} |
8991
|
|
|
|
|
|
|
|
8992
|
|
|
|
|
|
|
# EOF technically ok |
8993
|
|
|
|
|
|
|
else { |
8994
|
|
|
|
|
|
|
} |
8995
|
|
|
|
|
|
|
|
8996
|
138
|
|
|
|
|
536
|
check_prototype( $proto, $package, $subname ); |
8997
|
|
|
|
|
|
|
} |
8998
|
|
|
|
|
|
|
|
8999
|
|
|
|
|
|
|
# no match to either sub name or prototype, but line not blank |
9000
|
|
|
|
|
|
|
else { |
9001
|
|
|
|
|
|
|
|
9002
|
|
|
|
|
|
|
} |
9003
|
302
|
|
|
|
|
1658
|
return ( $i, $tok, $type, $id_scan_state ); |
9004
|
|
|
|
|
|
|
} ## end sub do_scan_sub |
9005
|
|
|
|
|
|
|
} |
9006
|
|
|
|
|
|
|
|
9007
|
|
|
|
|
|
|
######################################################################### |
9008
|
|
|
|
|
|
|
# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS |
9009
|
|
|
|
|
|
|
######################################################################### |
9010
|
|
|
|
|
|
|
|
9011
|
|
|
|
|
|
|
sub find_next_nonblank_token { |
9012
|
6166
|
|
|
6166
|
0
|
12204
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9013
|
|
|
|
|
|
|
|
9014
|
|
|
|
|
|
|
# Returns the next nonblank token after the token at index $i |
9015
|
|
|
|
|
|
|
# To skip past a side comment, and any subsequent block comments |
9016
|
|
|
|
|
|
|
# and blank lines, call with i=$max_token_index |
9017
|
|
|
|
|
|
|
|
9018
|
|
|
|
|
|
|
# Skip any ending blank (fix c258). It would be cleaner if caller passed |
9019
|
|
|
|
|
|
|
# $rtoken_map, so we could check for type 'b', and avoid a regex test, but |
9020
|
|
|
|
|
|
|
# benchmarking shows that this test does not take significant time. So |
9021
|
|
|
|
|
|
|
# that would be a nice update but not essential. Also note that ending |
9022
|
|
|
|
|
|
|
# blanks will not occur for text previously processed by perltidy. |
9023
|
6166
|
100
|
100
|
|
|
18973
|
if ( $i == $max_token_index - 1 |
9024
|
|
|
|
|
|
|
&& $rtokens->[$max_token_index] =~ /^\s+$/ ) |
9025
|
|
|
|
|
|
|
{ |
9026
|
9
|
|
|
|
|
31
|
$i++; |
9027
|
|
|
|
|
|
|
} |
9028
|
|
|
|
|
|
|
|
9029
|
6166
|
100
|
|
|
|
12845
|
if ( $i >= $max_token_index ) { |
9030
|
127
|
100
|
|
|
|
762
|
if ( !peeked_ahead() ) { |
9031
|
125
|
|
|
|
|
458
|
peeked_ahead(1); |
9032
|
125
|
|
|
|
|
727
|
$self->peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); |
9033
|
|
|
|
|
|
|
} |
9034
|
|
|
|
|
|
|
} |
9035
|
|
|
|
|
|
|
|
9036
|
6166
|
|
|
|
|
11341
|
my $next_nonblank_token = $rtokens->[ ++$i ]; |
9037
|
6166
|
50
|
33
|
|
|
22160
|
return ( SPACE, $i ) |
9038
|
|
|
|
|
|
|
if ( !defined($next_nonblank_token) || !length($next_nonblank_token) ); |
9039
|
|
|
|
|
|
|
|
9040
|
|
|
|
|
|
|
# Quick test for nonblank ascii char. Note that we just have to |
9041
|
|
|
|
|
|
|
# examine the first character here. |
9042
|
6166
|
|
|
|
|
12547
|
my $ord = ord( substr( $next_nonblank_token, 0, 1 ) ); |
9043
|
6166
|
100
|
66
|
|
|
24940
|
if ( $ord >= ORD_PRINTABLE_MIN |
|
|
50
|
33
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9044
|
|
|
|
|
|
|
&& $ord <= ORD_PRINTABLE_MAX ) |
9045
|
|
|
|
|
|
|
{ |
9046
|
2353
|
|
|
|
|
7780
|
return ( $next_nonblank_token, $i ); |
9047
|
|
|
|
|
|
|
} |
9048
|
|
|
|
|
|
|
|
9049
|
|
|
|
|
|
|
# Quick test to skip over an ascii space or tab |
9050
|
|
|
|
|
|
|
elsif ( $ord == ORD_SPACE || $ord == ORD_TAB ) { |
9051
|
3813
|
|
|
|
|
6795
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9052
|
3813
|
50
|
|
|
|
8084
|
return ( SPACE, $i ) unless defined($next_nonblank_token); |
9053
|
|
|
|
|
|
|
} |
9054
|
|
|
|
|
|
|
|
9055
|
|
|
|
|
|
|
# Slow test to skip over something else identified as whitespace |
9056
|
|
|
|
|
|
|
elsif ( $next_nonblank_token =~ /^\s*$/ ) { |
9057
|
0
|
|
|
|
|
0
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9058
|
0
|
0
|
|
|
|
0
|
return ( SPACE, $i ) unless defined($next_nonblank_token); |
9059
|
|
|
|
|
|
|
} |
9060
|
|
|
|
|
|
|
else { |
9061
|
|
|
|
|
|
|
## at nonblank |
9062
|
|
|
|
|
|
|
} |
9063
|
|
|
|
|
|
|
|
9064
|
|
|
|
|
|
|
# We should be at a nonblank now |
9065
|
3813
|
|
|
|
|
11494
|
return ( $next_nonblank_token, $i ); |
9066
|
|
|
|
|
|
|
} ## end sub find_next_nonblank_token |
9067
|
|
|
|
|
|
|
|
9068
|
|
|
|
|
|
|
sub find_next_noncomment_token { |
9069
|
98
|
|
|
98
|
0
|
400
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9070
|
|
|
|
|
|
|
|
9071
|
|
|
|
|
|
|
# Given the current character position, look ahead past any comments |
9072
|
|
|
|
|
|
|
# and blank lines and return the next token, including digraphs and |
9073
|
|
|
|
|
|
|
# trigraphs. |
9074
|
|
|
|
|
|
|
|
9075
|
98
|
|
|
|
|
350
|
my ( $next_nonblank_token, $i_next ) = |
9076
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
9077
|
|
|
|
|
|
|
|
9078
|
|
|
|
|
|
|
# skip past any side comment |
9079
|
98
|
50
|
|
|
|
564
|
if ( $next_nonblank_token eq '#' ) { |
9080
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
9081
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i_next, $rtokens, |
9082
|
|
|
|
|
|
|
$max_token_index ); |
9083
|
|
|
|
|
|
|
} |
9084
|
|
|
|
|
|
|
|
9085
|
|
|
|
|
|
|
# check for a digraph |
9086
|
98
|
50
|
33
|
|
|
915
|
if ( $next_nonblank_token |
|
|
|
33
|
|
|
|
|
9087
|
|
|
|
|
|
|
&& $next_nonblank_token ne SPACE |
9088
|
|
|
|
|
|
|
&& defined( $rtokens->[ $i_next + 1 ] ) ) |
9089
|
|
|
|
|
|
|
{ |
9090
|
98
|
|
|
|
|
303
|
my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ]; |
9091
|
98
|
100
|
|
|
|
419
|
if ( $is_digraph{$test2} ) { |
9092
|
15
|
|
|
|
|
41
|
$next_nonblank_token = $test2; |
9093
|
15
|
|
|
|
|
37
|
$i_next = $i_next + 1; |
9094
|
|
|
|
|
|
|
|
9095
|
|
|
|
|
|
|
# check for a trigraph |
9096
|
15
|
50
|
|
|
|
67
|
if ( defined( $rtokens->[ $i_next + 1 ] ) ) { |
9097
|
15
|
|
|
|
|
52
|
my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ]; |
9098
|
15
|
50
|
|
|
|
85
|
if ( $is_trigraph{$test3} ) { |
9099
|
0
|
|
|
|
|
0
|
$next_nonblank_token = $test3; |
9100
|
0
|
|
|
|
|
0
|
$i_next = $i_next + 1; |
9101
|
|
|
|
|
|
|
} |
9102
|
|
|
|
|
|
|
} |
9103
|
|
|
|
|
|
|
} |
9104
|
|
|
|
|
|
|
} |
9105
|
|
|
|
|
|
|
|
9106
|
98
|
|
|
|
|
334
|
return ( $next_nonblank_token, $i_next ); |
9107
|
|
|
|
|
|
|
} ## end sub find_next_noncomment_token |
9108
|
|
|
|
|
|
|
|
9109
|
|
|
|
|
|
|
sub is_possible_numerator { |
9110
|
|
|
|
|
|
|
|
9111
|
|
|
|
|
|
|
# Look at the next non-comment character and decide if it could be a |
9112
|
|
|
|
|
|
|
# numerator. Return |
9113
|
|
|
|
|
|
|
# 1 - yes |
9114
|
|
|
|
|
|
|
# 0 - can't tell |
9115
|
|
|
|
|
|
|
# -1 - no |
9116
|
|
|
|
|
|
|
|
9117
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9118
|
0
|
|
|
|
|
0
|
my $is_possible_numerator = 0; |
9119
|
|
|
|
|
|
|
|
9120
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[ $i + 1 ]; |
9121
|
0
|
0
|
|
|
|
0
|
if ( $next_token eq '=' ) { $i++; } # handle /= |
|
0
|
|
|
|
|
0
|
|
9122
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
9123
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
9124
|
|
|
|
|
|
|
|
9125
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '#' ) { |
9126
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
9127
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, $rtokens, |
9128
|
|
|
|
|
|
|
$max_token_index ); |
9129
|
|
|
|
|
|
|
} |
9130
|
|
|
|
|
|
|
|
9131
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token =~ / [ \( \$ \w \. \@ ] /x ) { |
|
|
0
|
|
|
|
|
|
9132
|
0
|
|
|
|
|
0
|
$is_possible_numerator = 1; |
9133
|
|
|
|
|
|
|
} |
9134
|
|
|
|
|
|
|
elsif ( $next_nonblank_token =~ /^\s*$/ ) { |
9135
|
0
|
|
|
|
|
0
|
$is_possible_numerator = 0; |
9136
|
|
|
|
|
|
|
} |
9137
|
|
|
|
|
|
|
else { |
9138
|
0
|
|
|
|
|
0
|
$is_possible_numerator = -1; |
9139
|
|
|
|
|
|
|
} |
9140
|
|
|
|
|
|
|
|
9141
|
0
|
|
|
|
|
0
|
return $is_possible_numerator; |
9142
|
|
|
|
|
|
|
} ## end sub is_possible_numerator |
9143
|
|
|
|
|
|
|
|
9144
|
|
|
|
|
|
|
{ ## closure for sub pattern_expected |
9145
|
|
|
|
|
|
|
my %pattern_test; |
9146
|
|
|
|
|
|
|
|
9147
|
|
|
|
|
|
|
BEGIN { |
9148
|
|
|
|
|
|
|
|
9149
|
|
|
|
|
|
|
# List of tokens which may follow a pattern. Note that we will not |
9150
|
|
|
|
|
|
|
# have formed digraphs at this point, so we will see '&' instead of |
9151
|
|
|
|
|
|
|
# '&&' and '|' instead of '||' |
9152
|
|
|
|
|
|
|
|
9153
|
|
|
|
|
|
|
# /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ |
9154
|
39
|
|
|
39
|
|
295
|
my @q = qw( & && | || ? : + - * and or while if unless); |
9155
|
39
|
|
|
|
|
159
|
push @q, ')', '}', ']', '>', ',', ';'; |
9156
|
39
|
|
|
|
|
184768
|
@{pattern_test}{@q} = (1) x scalar(@q); |
9157
|
|
|
|
|
|
|
} ## end BEGIN |
9158
|
|
|
|
|
|
|
|
9159
|
|
|
|
|
|
|
sub pattern_expected { |
9160
|
|
|
|
|
|
|
|
9161
|
|
|
|
|
|
|
# This a filter for a possible pattern. |
9162
|
|
|
|
|
|
|
# It looks at the token after a possible pattern and tries to |
9163
|
|
|
|
|
|
|
# determine if that token could end a pattern. |
9164
|
|
|
|
|
|
|
# returns - |
9165
|
|
|
|
|
|
|
# 1 - yes |
9166
|
|
|
|
|
|
|
# 0 - can't tell |
9167
|
|
|
|
|
|
|
# -1 - no |
9168
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9169
|
0
|
|
|
|
|
0
|
my $is_pattern = 0; |
9170
|
|
|
|
|
|
|
|
9171
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[ $i + 1 ]; |
9172
|
0
|
0
|
|
|
|
0
|
if ( $next_token =~ /^[msixpodualgc]/ ) { |
9173
|
0
|
|
|
|
|
0
|
$i++; |
9174
|
|
|
|
|
|
|
} # skip possible modifier |
9175
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
9176
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
9177
|
|
|
|
|
|
|
|
9178
|
0
|
0
|
|
|
|
0
|
if ( $pattern_test{$next_nonblank_token} ) { |
9179
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
9180
|
|
|
|
|
|
|
} |
9181
|
|
|
|
|
|
|
else { |
9182
|
|
|
|
|
|
|
|
9183
|
|
|
|
|
|
|
# Added '#' to fix issue c044 |
9184
|
0
|
0
|
0
|
|
|
0
|
if ( $next_nonblank_token =~ /^\s*$/ |
9185
|
|
|
|
|
|
|
|| $next_nonblank_token eq '#' ) |
9186
|
|
|
|
|
|
|
{ |
9187
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
9188
|
|
|
|
|
|
|
} |
9189
|
|
|
|
|
|
|
else { |
9190
|
0
|
|
|
|
|
0
|
$is_pattern = -1; |
9191
|
|
|
|
|
|
|
} |
9192
|
|
|
|
|
|
|
} |
9193
|
0
|
|
|
|
|
0
|
return $is_pattern; |
9194
|
|
|
|
|
|
|
} ## end sub pattern_expected |
9195
|
|
|
|
|
|
|
} |
9196
|
|
|
|
|
|
|
|
9197
|
|
|
|
|
|
|
sub find_next_nonblank_token_on_this_line { |
9198
|
457
|
|
|
457
|
0
|
1097
|
my ( $i, $rtokens, $max_token_index ) = @_; |
9199
|
457
|
|
|
|
|
758
|
my $next_nonblank_token; |
9200
|
|
|
|
|
|
|
|
9201
|
457
|
100
|
|
|
|
1202
|
if ( $i < $max_token_index ) { |
9202
|
449
|
|
|
|
|
1087
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9203
|
|
|
|
|
|
|
|
9204
|
449
|
100
|
|
|
|
2035
|
if ( $next_nonblank_token =~ /^\s*$/ ) { |
9205
|
|
|
|
|
|
|
|
9206
|
121
|
100
|
|
|
|
569
|
if ( $i < $max_token_index ) { |
9207
|
119
|
|
|
|
|
352
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9208
|
|
|
|
|
|
|
} |
9209
|
|
|
|
|
|
|
} |
9210
|
|
|
|
|
|
|
} |
9211
|
|
|
|
|
|
|
else { |
9212
|
8
|
|
|
|
|
26
|
$next_nonblank_token = EMPTY_STRING; |
9213
|
|
|
|
|
|
|
} |
9214
|
457
|
|
|
|
|
1524
|
return ( $next_nonblank_token, $i ); |
9215
|
|
|
|
|
|
|
} ## end sub find_next_nonblank_token_on_this_line |
9216
|
|
|
|
|
|
|
|
9217
|
|
|
|
|
|
|
sub find_angle_operator_termination { |
9218
|
|
|
|
|
|
|
|
9219
|
|
|
|
|
|
|
# We are looking at a '<' and want to know if it is an angle operator. |
9220
|
|
|
|
|
|
|
# We are to return: |
9221
|
|
|
|
|
|
|
# $i = pretoken index of ending '>' if found, current $i otherwise |
9222
|
|
|
|
|
|
|
# $type = 'Q' if found, '>' otherwise |
9223
|
8
|
|
|
8
|
0
|
31
|
my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) |
9224
|
|
|
|
|
|
|
= @_; |
9225
|
8
|
|
|
|
|
30
|
my $i = $i_beg; |
9226
|
8
|
|
|
|
|
22
|
my $type = '<'; |
9227
|
8
|
|
|
|
|
35
|
pos($input_line) = 1 + $rtoken_map->[$i]; |
9228
|
|
|
|
|
|
|
|
9229
|
8
|
|
|
|
|
22
|
my $filter; |
9230
|
|
|
|
|
|
|
|
9231
|
|
|
|
|
|
|
# we just have to find the next '>' if a term is expected |
9232
|
8
|
100
|
|
|
|
47
|
if ( $expecting == TERM ) { $filter = '[\>]' } |
|
6
|
50
|
|
|
|
17
|
|
9233
|
|
|
|
|
|
|
|
9234
|
|
|
|
|
|
|
# we have to guess if we don't know what is expected |
9235
|
2
|
|
|
|
|
7
|
elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } |
9236
|
|
|
|
|
|
|
|
9237
|
|
|
|
|
|
|
# shouldn't happen - we shouldn't be here if operator is expected |
9238
|
|
|
|
|
|
|
else { |
9239
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
9240
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
9241
|
|
|
|
|
|
|
Bad call to find_angle_operator_termination |
9242
|
|
|
|
|
|
|
EOM |
9243
|
|
|
|
|
|
|
} |
9244
|
0
|
|
|
|
|
0
|
return ( $i, $type ); |
9245
|
|
|
|
|
|
|
} |
9246
|
|
|
|
|
|
|
|
9247
|
|
|
|
|
|
|
# To illustrate what we might be looking at, in case we are |
9248
|
|
|
|
|
|
|
# guessing, here are some examples of valid angle operators |
9249
|
|
|
|
|
|
|
# (or file globs): |
9250
|
|
|
|
|
|
|
# <tmp_imp/*> |
9251
|
|
|
|
|
|
|
# <FH> |
9252
|
|
|
|
|
|
|
# <$fh> |
9253
|
|
|
|
|
|
|
# <*.c *.h> |
9254
|
|
|
|
|
|
|
# <_> |
9255
|
|
|
|
|
|
|
# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) |
9256
|
|
|
|
|
|
|
# <${PREFIX}*img*.$IMAGE_TYPE> |
9257
|
|
|
|
|
|
|
# <img*.$IMAGE_TYPE> |
9258
|
|
|
|
|
|
|
# <Timg*.$IMAGE_TYPE> |
9259
|
|
|
|
|
|
|
# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> |
9260
|
|
|
|
|
|
|
# |
9261
|
|
|
|
|
|
|
# Here are some examples of lines which do not have angle operators: |
9262
|
|
|
|
|
|
|
# return unless $self->[2]++ < $#{$self->[1]}; |
9263
|
|
|
|
|
|
|
# < 2 || @$t > |
9264
|
|
|
|
|
|
|
# |
9265
|
|
|
|
|
|
|
# the following line from dlister.pl caused trouble: |
9266
|
|
|
|
|
|
|
# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; |
9267
|
|
|
|
|
|
|
# |
9268
|
|
|
|
|
|
|
# If the '<' starts an angle operator, it must end on this line and |
9269
|
|
|
|
|
|
|
# it must not have certain characters like ';' and '=' in it. I use |
9270
|
|
|
|
|
|
|
# this to limit the testing. This filter should be improved if |
9271
|
|
|
|
|
|
|
# possible. |
9272
|
|
|
|
|
|
|
|
9273
|
8
|
50
|
|
|
|
204
|
if ( $input_line =~ /($filter)/g ) { |
9274
|
|
|
|
|
|
|
|
9275
|
8
|
50
|
|
|
|
40
|
if ( $1 eq '>' ) { |
9276
|
|
|
|
|
|
|
|
9277
|
|
|
|
|
|
|
# We MAY have found an angle operator termination if we get |
9278
|
|
|
|
|
|
|
# here, but we need to do more to be sure we haven't been |
9279
|
|
|
|
|
|
|
# fooled. |
9280
|
8
|
|
|
|
|
18
|
my $pos = pos($input_line); |
9281
|
|
|
|
|
|
|
|
9282
|
8
|
|
|
|
|
23
|
my $pos_beg = $rtoken_map->[$i]; |
9283
|
8
|
|
|
|
|
27
|
my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); |
9284
|
|
|
|
|
|
|
|
9285
|
|
|
|
|
|
|
# Test for '<' after possible filehandle, issue c103 |
9286
|
|
|
|
|
|
|
# print $fh <>; # syntax error |
9287
|
|
|
|
|
|
|
# print $fh <DATA>; # ok |
9288
|
|
|
|
|
|
|
# print $fh < DATA>; # syntax error at '>' |
9289
|
|
|
|
|
|
|
# print STDERR < DATA>; # ok, prints word 'DATA' |
9290
|
|
|
|
|
|
|
# print BLABLA <DATA>; # ok; does nothing unless BLABLA is defined |
9291
|
8
|
100
|
|
|
|
44
|
if ( $last_nonblank_type eq 'Z' ) { |
9292
|
|
|
|
|
|
|
|
9293
|
|
|
|
|
|
|
# $str includes brackets; something like '<DATA>' |
9294
|
1
|
0
|
33
|
|
|
10
|
if ( substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/ |
9295
|
|
|
|
|
|
|
&& substr( $str, 1, 1 ) !~ /[A-Za-z_]/ ) |
9296
|
|
|
|
|
|
|
{ |
9297
|
0
|
|
|
|
|
0
|
return ( $i, $type ); |
9298
|
|
|
|
|
|
|
} |
9299
|
|
|
|
|
|
|
} |
9300
|
|
|
|
|
|
|
|
9301
|
|
|
|
|
|
|
# Reject if the closing '>' follows a '-' as in: |
9302
|
|
|
|
|
|
|
# if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } |
9303
|
8
|
100
|
|
|
|
35
|
if ( $expecting eq UNKNOWN ) { |
9304
|
2
|
|
|
|
|
5
|
my $check = substr( $input_line, $pos - 2, 1 ); |
9305
|
2
|
100
|
|
|
|
7
|
if ( $check eq '-' ) { |
9306
|
1
|
|
|
|
|
7
|
return ( $i, $type ); |
9307
|
|
|
|
|
|
|
} |
9308
|
|
|
|
|
|
|
} |
9309
|
|
|
|
|
|
|
|
9310
|
|
|
|
|
|
|
######################################debug##### |
9311
|
|
|
|
|
|
|
#$self->write_diagnostics( "ANGLE? :$str\n"); |
9312
|
|
|
|
|
|
|
#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; |
9313
|
|
|
|
|
|
|
######################################debug##### |
9314
|
7
|
|
|
|
|
18
|
$type = 'Q'; |
9315
|
7
|
|
|
|
|
21
|
my $error; |
9316
|
7
|
|
|
|
|
37
|
( $i, $error ) = |
9317
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
9318
|
|
|
|
|
|
|
|
9319
|
|
|
|
|
|
|
# It may be possible that a quote ends midway in a pretoken. |
9320
|
|
|
|
|
|
|
# If this happens, it may be necessary to split the pretoken. |
9321
|
7
|
50
|
|
|
|
36
|
if ($error) { |
9322
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
9323
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
9324
|
|
|
|
|
|
|
unexpected error condition returned by inverse_pretoken_map |
9325
|
|
|
|
|
|
|
EOM |
9326
|
|
|
|
|
|
|
} |
9327
|
|
|
|
|
|
|
$self->warning( |
9328
|
0
|
|
|
|
|
0
|
"Possible tokinization error..please check this line\n"); |
9329
|
|
|
|
|
|
|
} |
9330
|
|
|
|
|
|
|
|
9331
|
|
|
|
|
|
|
# Check for accidental formatting of a markup language doc... |
9332
|
|
|
|
|
|
|
# Formatting will be skipped if we set _html_tag_count_ and |
9333
|
|
|
|
|
|
|
# also set a warning of any kind. |
9334
|
7
|
|
|
|
|
19
|
my $is_html_tag; |
9335
|
7
|
|
33
|
|
|
34
|
my $is_first_string = |
9336
|
|
|
|
|
|
|
$i_beg == 0 && $self->[_last_line_number_] == 1; |
9337
|
|
|
|
|
|
|
|
9338
|
|
|
|
|
|
|
# html comment '<!...' of any type |
9339
|
7
|
50
|
33
|
|
|
102
|
if ( $str =~ /^<\s*!/ ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9340
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
9341
|
0
|
0
|
|
|
|
0
|
if ($is_first_string) { |
9342
|
0
|
|
|
|
|
0
|
$self->warning( |
9343
|
|
|
|
|
|
|
"looks like a markup language, continuing error checks\n" |
9344
|
|
|
|
|
|
|
); |
9345
|
|
|
|
|
|
|
} |
9346
|
|
|
|
|
|
|
} |
9347
|
|
|
|
|
|
|
|
9348
|
|
|
|
|
|
|
# html end tag, something like </h1> |
9349
|
|
|
|
|
|
|
elsif ( $str =~ /^<\s*\/\w+\s*>$/ ) { |
9350
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
9351
|
|
|
|
|
|
|
} |
9352
|
|
|
|
|
|
|
|
9353
|
|
|
|
|
|
|
# xml prolog? |
9354
|
|
|
|
|
|
|
elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) { |
9355
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
9356
|
0
|
|
|
|
|
0
|
$self->warning( |
9357
|
|
|
|
|
|
|
"looks like a markup language, continuing error checks\n"); |
9358
|
|
|
|
|
|
|
} |
9359
|
|
|
|
|
|
|
else { |
9360
|
|
|
|
|
|
|
## doesn't look like a markup tag |
9361
|
|
|
|
|
|
|
} |
9362
|
|
|
|
|
|
|
|
9363
|
7
|
50
|
|
|
|
29
|
if ($is_html_tag) { |
9364
|
0
|
|
|
|
|
0
|
$self->[_html_tag_count_]++; |
9365
|
|
|
|
|
|
|
} |
9366
|
|
|
|
|
|
|
|
9367
|
|
|
|
|
|
|
# count blanks on inside of brackets |
9368
|
7
|
|
|
|
|
17
|
my $blank_count = 0; |
9369
|
7
|
100
|
|
|
|
38
|
$blank_count++ if ( $str =~ /<\s+/ ); |
9370
|
7
|
100
|
|
|
|
36
|
$blank_count++ if ( $str =~ /\s+>/ ); |
9371
|
|
|
|
|
|
|
|
9372
|
|
|
|
|
|
|
# Now let's see where we stand.... |
9373
|
|
|
|
|
|
|
# OK if math op not possible |
9374
|
7
|
100
|
|
|
|
30
|
if ( $expecting == TERM ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9375
|
|
|
|
|
|
|
} |
9376
|
|
|
|
|
|
|
|
9377
|
|
|
|
|
|
|
elsif ($is_html_tag) { |
9378
|
|
|
|
|
|
|
} |
9379
|
|
|
|
|
|
|
|
9380
|
|
|
|
|
|
|
# OK if there are no more than 2 non-blank pre-tokens inside |
9381
|
|
|
|
|
|
|
# (not possible to write 2 token math between < and >) |
9382
|
|
|
|
|
|
|
# This catches most common cases |
9383
|
|
|
|
|
|
|
elsif ( $i <= $i_beg + 3 + $blank_count ) { |
9384
|
|
|
|
|
|
|
|
9385
|
|
|
|
|
|
|
# No longer any need to document this common case |
9386
|
|
|
|
|
|
|
## $self->write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); |
9387
|
|
|
|
|
|
|
} |
9388
|
|
|
|
|
|
|
|
9389
|
|
|
|
|
|
|
# OK if there is some kind of identifier inside |
9390
|
|
|
|
|
|
|
# print $fh <tvg::INPUT>; |
9391
|
|
|
|
|
|
|
elsif ( $str =~ /^<\s*\$?(\w|::|\s)+\s*>$/ ) { |
9392
|
0
|
|
|
|
|
0
|
$self->write_diagnostics("ANGLE (contains identifier): $str\n"); |
9393
|
|
|
|
|
|
|
} |
9394
|
|
|
|
|
|
|
|
9395
|
|
|
|
|
|
|
# Not sure.. |
9396
|
|
|
|
|
|
|
else { |
9397
|
|
|
|
|
|
|
|
9398
|
|
|
|
|
|
|
# Let's try a Brace Test: any braces inside must balance |
9399
|
0
|
|
|
|
|
0
|
my $br = 0; |
9400
|
0
|
|
|
|
|
0
|
while ( $str =~ /\{/g ) { $br++ } |
|
0
|
|
|
|
|
0
|
|
9401
|
0
|
|
|
|
|
0
|
while ( $str =~ /\}/g ) { $br-- } |
|
0
|
|
|
|
|
0
|
|
9402
|
0
|
|
|
|
|
0
|
my $sb = 0; |
9403
|
0
|
|
|
|
|
0
|
while ( $str =~ /\[/g ) { $sb++ } |
|
0
|
|
|
|
|
0
|
|
9404
|
0
|
|
|
|
|
0
|
while ( $str =~ /\]/g ) { $sb-- } |
|
0
|
|
|
|
|
0
|
|
9405
|
0
|
|
|
|
|
0
|
my $pr = 0; |
9406
|
0
|
|
|
|
|
0
|
while ( $str =~ /\(/g ) { $pr++ } |
|
0
|
|
|
|
|
0
|
|
9407
|
0
|
|
|
|
|
0
|
while ( $str =~ /\)/g ) { $pr-- } |
|
0
|
|
|
|
|
0
|
|
9408
|
|
|
|
|
|
|
|
9409
|
|
|
|
|
|
|
# if braces do not balance - not angle operator |
9410
|
0
|
0
|
0
|
|
|
0
|
if ( $br || $sb || $pr ) { |
|
|
|
0
|
|
|
|
|
9411
|
0
|
|
|
|
|
0
|
$i = $i_beg; |
9412
|
0
|
|
|
|
|
0
|
$type = '<'; |
9413
|
0
|
|
|
|
|
0
|
$self->write_diagnostics( |
9414
|
|
|
|
|
|
|
"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); |
9415
|
|
|
|
|
|
|
} |
9416
|
|
|
|
|
|
|
|
9417
|
|
|
|
|
|
|
# we should keep doing more checks here...to be continued |
9418
|
|
|
|
|
|
|
# Tentatively accepting this as a valid angle operator. |
9419
|
|
|
|
|
|
|
# There are lots more things that can be checked. |
9420
|
|
|
|
|
|
|
else { |
9421
|
0
|
|
|
|
|
0
|
$self->write_diagnostics( |
9422
|
|
|
|
|
|
|
"ANGLE-Guessing yes: $str expecting=$expecting\n"); |
9423
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
9424
|
|
|
|
|
|
|
"Guessing angle operator here: $str\n"); |
9425
|
|
|
|
|
|
|
} |
9426
|
|
|
|
|
|
|
} |
9427
|
|
|
|
|
|
|
} |
9428
|
|
|
|
|
|
|
|
9429
|
|
|
|
|
|
|
# didn't find ending > |
9430
|
|
|
|
|
|
|
else { |
9431
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
9432
|
0
|
|
|
|
|
0
|
$self->warning("No ending > for angle operator\n"); |
9433
|
|
|
|
|
|
|
} |
9434
|
|
|
|
|
|
|
} |
9435
|
|
|
|
|
|
|
} |
9436
|
7
|
|
|
|
|
36
|
return ( $i, $type ); |
9437
|
|
|
|
|
|
|
} ## end sub find_angle_operator_termination |
9438
|
|
|
|
|
|
|
|
9439
|
|
|
|
|
|
|
sub scan_number_do { |
9440
|
|
|
|
|
|
|
|
9441
|
|
|
|
|
|
|
# scan a number in any of the formats that Perl accepts |
9442
|
|
|
|
|
|
|
# Underbars (_) are allowed in decimal numbers. |
9443
|
|
|
|
|
|
|
# input parameters - |
9444
|
|
|
|
|
|
|
# $input_line - the string to scan |
9445
|
|
|
|
|
|
|
# $i - pre_token index to start scanning |
9446
|
|
|
|
|
|
|
# $rtoken_map - reference to the pre_token map giving starting |
9447
|
|
|
|
|
|
|
# character position in $input_line of token $i |
9448
|
|
|
|
|
|
|
# output parameters - |
9449
|
|
|
|
|
|
|
# $i - last pre_token index of the number just scanned |
9450
|
|
|
|
|
|
|
# number - the number (characters); or undef if not a number |
9451
|
|
|
|
|
|
|
|
9452
|
629
|
|
|
629
|
0
|
1661
|
my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = |
9453
|
|
|
|
|
|
|
@_; |
9454
|
629
|
|
|
|
|
1156
|
my $pos_beg = $rtoken_map->[$i]; |
9455
|
629
|
|
|
|
|
959
|
my $pos; |
9456
|
629
|
|
|
|
|
982
|
my $i_begin = $i; |
9457
|
629
|
|
|
|
|
1062
|
my $number = undef; |
9458
|
629
|
|
|
|
|
1092
|
my $type = $input_type; |
9459
|
|
|
|
|
|
|
|
9460
|
629
|
|
|
|
|
1439
|
my $first_char = substr( $input_line, $pos_beg, 1 ); |
9461
|
|
|
|
|
|
|
|
9462
|
|
|
|
|
|
|
# Look for bad starting characters; Shouldn't happen.. |
9463
|
629
|
50
|
|
|
|
2807
|
if ( $first_char !~ /[\d\.\+\-Ee]/ ) { |
9464
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
9465
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
9466
|
|
|
|
|
|
|
Program bug - scan_number given bad first character = '$first_char' |
9467
|
|
|
|
|
|
|
EOM |
9468
|
|
|
|
|
|
|
} |
9469
|
0
|
|
|
|
|
0
|
return ( $i, $type, $number ); |
9470
|
|
|
|
|
|
|
} |
9471
|
|
|
|
|
|
|
|
9472
|
|
|
|
|
|
|
# handle v-string without leading 'v' character ('Two Dot' rule) |
9473
|
|
|
|
|
|
|
# (vstring.t) |
9474
|
|
|
|
|
|
|
# Here is the format prior to including underscores: |
9475
|
|
|
|
|
|
|
## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { |
9476
|
629
|
|
|
|
|
1994
|
pos($input_line) = $pos_beg; |
9477
|
629
|
50
|
|
|
|
3072
|
if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) { |
9478
|
0
|
|
|
|
|
0
|
$pos = pos($input_line); |
9479
|
0
|
|
|
|
|
0
|
my $numc = $pos - $pos_beg; |
9480
|
0
|
|
|
|
|
0
|
$number = substr( $input_line, $pos_beg, $numc ); |
9481
|
0
|
|
|
|
|
0
|
$type = 'v'; |
9482
|
0
|
|
|
|
|
0
|
$self->report_v_string($number); |
9483
|
|
|
|
|
|
|
} |
9484
|
|
|
|
|
|
|
|
9485
|
|
|
|
|
|
|
# handle octal, hex, binary |
9486
|
629
|
50
|
|
|
|
1642
|
if ( !defined($number) ) { |
9487
|
629
|
|
|
|
|
2375
|
pos($input_line) = $pos_beg; |
9488
|
|
|
|
|
|
|
|
9489
|
|
|
|
|
|
|
# Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0' |
9490
|
|
|
|
|
|
|
# For reference, the format prior to hex floating point is: |
9491
|
|
|
|
|
|
|
# /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) |
9492
|
|
|
|
|
|
|
# (hex) (octal) (binary) |
9493
|
629
|
100
|
|
|
|
2489
|
if ( |
9494
|
|
|
|
|
|
|
$input_line =~ m{ |
9495
|
|
|
|
|
|
|
|
9496
|
|
|
|
|
|
|
\G[+-]?0( # leading [signed] 0 |
9497
|
|
|
|
|
|
|
|
9498
|
|
|
|
|
|
|
# a hex float, i.e. '0x0.b17217f7d1cf78p0' |
9499
|
|
|
|
|
|
|
([xX][0-9a-fA-F_]* # X and optional leading digits |
9500
|
|
|
|
|
|
|
(\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction |
9501
|
|
|
|
|
|
|
[Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit |
9502
|
|
|
|
|
|
|
[0-9a-fA-F_]*) # optional Additional exponent digits |
9503
|
|
|
|
|
|
|
|
9504
|
|
|
|
|
|
|
# or hex integer |
9505
|
|
|
|
|
|
|
|([xX][0-9a-fA-F_]+) |
9506
|
|
|
|
|
|
|
|
9507
|
|
|
|
|
|
|
# or octal fraction |
9508
|
|
|
|
|
|
|
|([oO]?[0-7_]+ # string of octal digits |
9509
|
|
|
|
|
|
|
(\.([0-7][0-7_]*)?)? # optional decimal and fraction |
9510
|
|
|
|
|
|
|
[Pp][+-]?[0-7] # REQUIRED exponent, no underscore |
9511
|
|
|
|
|
|
|
[0-7_]*) # Additional exponent digits with underscores |
9512
|
|
|
|
|
|
|
|
9513
|
|
|
|
|
|
|
# or octal integer |
9514
|
|
|
|
|
|
|
|([oO]?[0-7_]+) # string of octal digits |
9515
|
|
|
|
|
|
|
|
9516
|
|
|
|
|
|
|
# or a binary float |
9517
|
|
|
|
|
|
|
|([bB][01_]* # 'b' with string of binary digits |
9518
|
|
|
|
|
|
|
(\.([01][01_]*)?)? # optional decimal and fraction |
9519
|
|
|
|
|
|
|
[Pp][+-]?[01] # Required exponent indicator, no underscore |
9520
|
|
|
|
|
|
|
[01_]*) # additional exponent bits |
9521
|
|
|
|
|
|
|
|
9522
|
|
|
|
|
|
|
# or binary integer |
9523
|
|
|
|
|
|
|
|([bB][01_]+) # 'b' with string of binary digits |
9524
|
|
|
|
|
|
|
|
9525
|
|
|
|
|
|
|
)}gx |
9526
|
|
|
|
|
|
|
) |
9527
|
|
|
|
|
|
|
{ |
9528
|
72
|
|
|
|
|
132
|
$pos = pos($input_line); |
9529
|
72
|
|
|
|
|
125
|
my $numc = $pos - $pos_beg; |
9530
|
72
|
|
|
|
|
150
|
$number = substr( $input_line, $pos_beg, $numc ); |
9531
|
72
|
|
|
|
|
132
|
$type = 'n'; |
9532
|
|
|
|
|
|
|
} |
9533
|
|
|
|
|
|
|
} |
9534
|
|
|
|
|
|
|
|
9535
|
|
|
|
|
|
|
# handle decimal |
9536
|
629
|
100
|
|
|
|
1504
|
if ( !defined($number) ) { |
9537
|
557
|
|
|
|
|
1136
|
pos($input_line) = $pos_beg; |
9538
|
|
|
|
|
|
|
|
9539
|
557
|
50
|
|
|
|
2830
|
if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { |
9540
|
557
|
|
|
|
|
1066
|
$pos = pos($input_line); |
9541
|
|
|
|
|
|
|
|
9542
|
|
|
|
|
|
|
# watch out for things like 0..40 which would give 0. by this; |
9543
|
557
|
100
|
100
|
|
|
2054
|
if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) |
9544
|
|
|
|
|
|
|
&& ( substr( $input_line, $pos, 1 ) eq '.' ) ) |
9545
|
|
|
|
|
|
|
{ |
9546
|
37
|
|
|
|
|
76
|
$pos--; |
9547
|
|
|
|
|
|
|
} |
9548
|
557
|
|
|
|
|
1006
|
my $numc = $pos - $pos_beg; |
9549
|
557
|
|
|
|
|
1078
|
$number = substr( $input_line, $pos_beg, $numc ); |
9550
|
557
|
|
|
|
|
1063
|
$type = 'n'; |
9551
|
|
|
|
|
|
|
} |
9552
|
|
|
|
|
|
|
} |
9553
|
|
|
|
|
|
|
|
9554
|
|
|
|
|
|
|
# filter out non-numbers like e + - . e2 .e3 +e6 |
9555
|
|
|
|
|
|
|
# the rule: at least one digit, and any 'e' must be preceded by a digit |
9556
|
629
|
100
|
66
|
|
|
3397
|
if ( |
|
|
|
66
|
|
|
|
|
9557
|
|
|
|
|
|
|
$number !~ /\d/ # no digits |
9558
|
|
|
|
|
|
|
|| ( $number =~ /^(.*)[eE]/ |
9559
|
|
|
|
|
|
|
&& $1 !~ /\d/ ) # or no digits before the 'e' |
9560
|
|
|
|
|
|
|
) |
9561
|
|
|
|
|
|
|
{ |
9562
|
303
|
|
|
|
|
498
|
$number = undef; |
9563
|
303
|
|
|
|
|
540
|
$type = $input_type; |
9564
|
303
|
|
|
|
|
1269
|
return ( $i, $type, $number ); |
9565
|
|
|
|
|
|
|
} |
9566
|
|
|
|
|
|
|
|
9567
|
|
|
|
|
|
|
# Found a number; now we must convert back from character position |
9568
|
|
|
|
|
|
|
# to pre_token index. An error here implies user syntax error. |
9569
|
|
|
|
|
|
|
# An example would be an invalid octal number like '009'. |
9570
|
326
|
|
|
|
|
585
|
my $error; |
9571
|
326
|
|
|
|
|
861
|
( $i, $error ) = |
9572
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
9573
|
326
|
50
|
|
|
|
989
|
if ($error) { $self->warning("Possibly invalid number\n") } |
|
0
|
|
|
|
|
0
|
|
9574
|
|
|
|
|
|
|
|
9575
|
326
|
|
|
|
|
1285
|
return ( $i, $type, $number ); |
9576
|
|
|
|
|
|
|
} ## end sub scan_number_do |
9577
|
|
|
|
|
|
|
|
9578
|
|
|
|
|
|
|
sub inverse_pretoken_map { |
9579
|
|
|
|
|
|
|
|
9580
|
|
|
|
|
|
|
# Starting with the current pre_token index $i, scan forward until |
9581
|
|
|
|
|
|
|
# finding the index of the next pre_token whose position is $pos. |
9582
|
2170
|
|
|
2170
|
0
|
5101
|
my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; |
9583
|
2170
|
|
|
|
|
3561
|
my $error = 0; |
9584
|
|
|
|
|
|
|
|
9585
|
2170
|
|
|
|
|
5449
|
while ( ++$i <= $max_token_index ) { |
9586
|
|
|
|
|
|
|
|
9587
|
4040
|
100
|
|
|
|
9699
|
if ( $pos <= $rtoken_map->[$i] ) { |
9588
|
|
|
|
|
|
|
|
9589
|
|
|
|
|
|
|
# Let the calling routine handle errors in which we do not |
9590
|
|
|
|
|
|
|
# land on a pre-token boundary. It can happen by running |
9591
|
|
|
|
|
|
|
# perltidy on some non-perl scripts, for example. |
9592
|
2135
|
50
|
|
|
|
5495
|
if ( $pos < $rtoken_map->[$i] ) { $error = 1 } |
|
0
|
|
|
|
|
0
|
|
9593
|
2135
|
|
|
|
|
3655
|
$i--; |
9594
|
2135
|
|
|
|
|
4032
|
last; |
9595
|
|
|
|
|
|
|
} |
9596
|
|
|
|
|
|
|
} |
9597
|
2170
|
|
|
|
|
5339
|
return ( $i, $error ); |
9598
|
|
|
|
|
|
|
} ## end sub inverse_pretoken_map |
9599
|
|
|
|
|
|
|
|
9600
|
|
|
|
|
|
|
sub find_here_doc { |
9601
|
|
|
|
|
|
|
|
9602
|
|
|
|
|
|
|
# find the target of a here document, if any |
9603
|
|
|
|
|
|
|
# input parameters: |
9604
|
|
|
|
|
|
|
# $i - token index of the second < of << |
9605
|
|
|
|
|
|
|
# ($i must be less than the last token index if this is called) |
9606
|
|
|
|
|
|
|
# output parameters: |
9607
|
|
|
|
|
|
|
# $found_target = 0 didn't find target; =1 found target |
9608
|
|
|
|
|
|
|
# HERE_TARGET - the target string (may be empty string) |
9609
|
|
|
|
|
|
|
# $i - unchanged if not here doc, |
9610
|
|
|
|
|
|
|
# or index of the last token of the here target |
9611
|
|
|
|
|
|
|
# $saw_error - flag noting unbalanced quote on here target |
9612
|
9
|
|
|
9
|
0
|
44
|
my ( $self, $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; |
9613
|
|
|
|
|
|
|
|
9614
|
9
|
|
|
|
|
22
|
my $ibeg = $i; |
9615
|
9
|
|
|
|
|
23
|
my $found_target = 0; |
9616
|
9
|
|
|
|
|
27
|
my $here_doc_target = EMPTY_STRING; |
9617
|
9
|
|
|
|
|
22
|
my $here_quote_character = EMPTY_STRING; |
9618
|
9
|
|
|
|
|
26
|
my $saw_error = 0; |
9619
|
9
|
|
|
|
|
29
|
my ( $next_nonblank_token, $i_next_nonblank, $next_token ); |
9620
|
9
|
|
|
|
|
26
|
$next_token = $rtokens->[ $i + 1 ]; |
9621
|
|
|
|
|
|
|
|
9622
|
|
|
|
|
|
|
# perl allows a backslash before the target string (heredoc.t) |
9623
|
9
|
|
|
|
|
24
|
my $backslash = 0; |
9624
|
9
|
50
|
|
|
|
40
|
if ( $next_token eq '\\' ) { |
9625
|
0
|
|
|
|
|
0
|
$backslash = 1; |
9626
|
0
|
|
|
|
|
0
|
$next_token = $rtokens->[ $i + 2 ]; |
9627
|
|
|
|
|
|
|
} |
9628
|
|
|
|
|
|
|
|
9629
|
9
|
|
|
|
|
50
|
( $next_nonblank_token, $i_next_nonblank ) = |
9630
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); |
9631
|
|
|
|
|
|
|
|
9632
|
9
|
100
|
33
|
|
|
88
|
if ( $next_nonblank_token =~ /[\'\"\`]/ ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9633
|
|
|
|
|
|
|
|
9634
|
6
|
|
|
|
|
17
|
my $in_quote = 1; |
9635
|
6
|
|
|
|
|
13
|
my $quote_depth = 0; |
9636
|
6
|
|
|
|
|
15
|
my $quote_pos = 0; |
9637
|
6
|
|
|
|
|
14
|
my $quoted_string; |
9638
|
|
|
|
|
|
|
|
9639
|
|
|
|
|
|
|
( |
9640
|
6
|
|
|
|
|
38
|
$i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, |
9641
|
|
|
|
|
|
|
$quoted_string |
9642
|
|
|
|
|
|
|
) |
9643
|
|
|
|
|
|
|
= $self->follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, |
9644
|
|
|
|
|
|
|
$here_quote_character, $quote_pos, $quote_depth, $max_token_index ); |
9645
|
|
|
|
|
|
|
|
9646
|
6
|
50
|
|
|
|
37
|
if ($in_quote) { # didn't find end of quote, so no target found |
9647
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
9648
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
9649
|
0
|
|
|
|
|
0
|
$self->warning( |
9650
|
|
|
|
|
|
|
"Did not find here-doc string terminator ($here_quote_character) before end of line \n" |
9651
|
|
|
|
|
|
|
); |
9652
|
0
|
|
|
|
|
0
|
$saw_error = 1; |
9653
|
|
|
|
|
|
|
} |
9654
|
|
|
|
|
|
|
} |
9655
|
|
|
|
|
|
|
else { # found ending quote |
9656
|
6
|
|
|
|
|
19
|
$found_target = 1; |
9657
|
|
|
|
|
|
|
|
9658
|
6
|
|
|
|
|
11
|
my $tokj; |
9659
|
6
|
|
|
|
|
28
|
foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) { |
9660
|
6
|
|
|
|
|
20
|
$tokj = $rtokens->[$j]; |
9661
|
|
|
|
|
|
|
|
9662
|
|
|
|
|
|
|
# we have to remove any backslash before the quote character |
9663
|
|
|
|
|
|
|
# so that the here-doc-target exactly matches this string |
9664
|
|
|
|
|
|
|
next |
9665
|
6
|
0
|
33
|
|
|
37
|
if ( $tokj eq "\\" |
|
|
|
33
|
|
|
|
|
9666
|
|
|
|
|
|
|
&& $j < $i - 1 |
9667
|
|
|
|
|
|
|
&& $rtokens->[ $j + 1 ] eq $here_quote_character ); |
9668
|
6
|
|
|
|
|
25
|
$here_doc_target .= $tokj; |
9669
|
|
|
|
|
|
|
} |
9670
|
|
|
|
|
|
|
} |
9671
|
|
|
|
|
|
|
} |
9672
|
|
|
|
|
|
|
|
9673
|
|
|
|
|
|
|
elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { |
9674
|
0
|
|
|
|
|
0
|
$found_target = 1; |
9675
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
9676
|
|
|
|
|
|
|
"found blank here-target after <<; suggest using \"\"\n"); |
9677
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
9678
|
|
|
|
|
|
|
} |
9679
|
|
|
|
|
|
|
elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << |
9680
|
|
|
|
|
|
|
|
9681
|
3
|
|
|
|
|
9
|
my $here_doc_expected; |
9682
|
3
|
50
|
|
|
|
20
|
if ( $expecting == UNKNOWN ) { |
9683
|
0
|
|
|
|
|
0
|
$here_doc_expected = $self->guess_if_here_doc($next_token); |
9684
|
|
|
|
|
|
|
} |
9685
|
|
|
|
|
|
|
else { |
9686
|
3
|
|
|
|
|
11
|
$here_doc_expected = 1; |
9687
|
|
|
|
|
|
|
} |
9688
|
|
|
|
|
|
|
|
9689
|
3
|
50
|
|
|
|
13
|
if ($here_doc_expected) { |
9690
|
3
|
|
|
|
|
11
|
$found_target = 1; |
9691
|
3
|
|
|
|
|
7
|
$here_doc_target = $next_token; |
9692
|
3
|
|
|
|
|
8
|
$i = $ibeg + 1; |
9693
|
|
|
|
|
|
|
} |
9694
|
|
|
|
|
|
|
|
9695
|
|
|
|
|
|
|
} |
9696
|
|
|
|
|
|
|
else { |
9697
|
|
|
|
|
|
|
|
9698
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
9699
|
0
|
|
|
|
|
0
|
$found_target = 1; |
9700
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("Note: bare here-doc operator <<\n"); |
9701
|
|
|
|
|
|
|
} |
9702
|
|
|
|
|
|
|
else { |
9703
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
9704
|
|
|
|
|
|
|
} |
9705
|
|
|
|
|
|
|
} |
9706
|
|
|
|
|
|
|
|
9707
|
|
|
|
|
|
|
# patch to neglect any prepended backslash |
9708
|
9
|
50
|
33
|
|
|
68
|
if ( $found_target && $backslash ) { $i++ } |
|
0
|
|
|
|
|
0
|
|
9709
|
|
|
|
|
|
|
|
9710
|
9
|
|
|
|
|
53
|
return ( $found_target, $here_doc_target, $here_quote_character, $i, |
9711
|
|
|
|
|
|
|
$saw_error ); |
9712
|
|
|
|
|
|
|
} ## end sub find_here_doc |
9713
|
|
|
|
|
|
|
|
9714
|
|
|
|
|
|
|
sub do_quote { |
9715
|
|
|
|
|
|
|
|
9716
|
|
|
|
|
|
|
# follow (or continue following) quoted string(s) |
9717
|
|
|
|
|
|
|
# $in_quote return code: |
9718
|
|
|
|
|
|
|
# 0 - ok, found end |
9719
|
|
|
|
|
|
|
# 1 - still must find end of quote whose target is $quote_character |
9720
|
|
|
|
|
|
|
# 2 - still looking for end of first of two quotes |
9721
|
|
|
|
|
|
|
# |
9722
|
|
|
|
|
|
|
# Returns updated strings: |
9723
|
|
|
|
|
|
|
# $quoted_string_1 = quoted string seen while in_quote=1 |
9724
|
|
|
|
|
|
|
# $quoted_string_2 = quoted string seen while in_quote=2 |
9725
|
|
|
|
|
|
|
my ( |
9726
|
|
|
|
|
|
|
|
9727
|
2768
|
|
|
2768
|
0
|
7959
|
$self, |
9728
|
|
|
|
|
|
|
$i, |
9729
|
|
|
|
|
|
|
$in_quote, |
9730
|
|
|
|
|
|
|
$quote_character, |
9731
|
|
|
|
|
|
|
$quote_pos, |
9732
|
|
|
|
|
|
|
$quote_depth, |
9733
|
|
|
|
|
|
|
$quoted_string_1, |
9734
|
|
|
|
|
|
|
$quoted_string_2, |
9735
|
|
|
|
|
|
|
$rtokens, |
9736
|
|
|
|
|
|
|
$rtoken_map, |
9737
|
|
|
|
|
|
|
$max_token_index, |
9738
|
|
|
|
|
|
|
|
9739
|
|
|
|
|
|
|
) = @_; |
9740
|
|
|
|
|
|
|
|
9741
|
2768
|
|
|
|
|
4093
|
my $quoted_string; |
9742
|
2768
|
100
|
|
|
|
6353
|
if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow |
9743
|
29
|
|
|
|
|
58
|
my $ibeg = $i; |
9744
|
|
|
|
|
|
|
( |
9745
|
29
|
|
|
|
|
154
|
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, |
9746
|
|
|
|
|
|
|
$quoted_string |
9747
|
|
|
|
|
|
|
) |
9748
|
|
|
|
|
|
|
= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens, |
9749
|
|
|
|
|
|
|
$quote_character, $quote_pos, $quote_depth, $max_token_index ); |
9750
|
29
|
|
|
|
|
86
|
$quoted_string_2 .= $quoted_string; |
9751
|
29
|
50
|
|
|
|
101
|
if ( $in_quote == 1 ) { |
9752
|
29
|
50
|
|
|
|
110
|
if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } |
|
0
|
|
|
|
|
0
|
|
9753
|
29
|
|
|
|
|
71
|
$quote_character = EMPTY_STRING; |
9754
|
|
|
|
|
|
|
} |
9755
|
|
|
|
|
|
|
else { |
9756
|
0
|
|
|
|
|
0
|
$quoted_string_2 .= "\n"; |
9757
|
|
|
|
|
|
|
} |
9758
|
|
|
|
|
|
|
} |
9759
|
|
|
|
|
|
|
|
9760
|
2768
|
50
|
|
|
|
5979
|
if ( $in_quote == 1 ) { # one (more) quote to follow |
9761
|
2768
|
|
|
|
|
4425
|
my $ibeg = $i; |
9762
|
|
|
|
|
|
|
( |
9763
|
2768
|
|
|
|
|
7652
|
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, |
9764
|
|
|
|
|
|
|
$quoted_string |
9765
|
|
|
|
|
|
|
) |
9766
|
|
|
|
|
|
|
= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens, |
9767
|
|
|
|
|
|
|
$quote_character, $quote_pos, $quote_depth, $max_token_index ); |
9768
|
2768
|
|
|
|
|
5952
|
$quoted_string_1 .= $quoted_string; |
9769
|
2768
|
100
|
|
|
|
6210
|
if ( $in_quote == 1 ) { |
9770
|
183
|
|
|
|
|
375
|
$quoted_string_1 .= "\n"; |
9771
|
|
|
|
|
|
|
} |
9772
|
|
|
|
|
|
|
} |
9773
|
|
|
|
|
|
|
return ( |
9774
|
|
|
|
|
|
|
|
9775
|
2768
|
|
|
|
|
9569
|
$i, |
9776
|
|
|
|
|
|
|
$in_quote, |
9777
|
|
|
|
|
|
|
$quote_character, |
9778
|
|
|
|
|
|
|
$quote_pos, |
9779
|
|
|
|
|
|
|
$quote_depth, |
9780
|
|
|
|
|
|
|
$quoted_string_1, |
9781
|
|
|
|
|
|
|
$quoted_string_2, |
9782
|
|
|
|
|
|
|
|
9783
|
|
|
|
|
|
|
); |
9784
|
|
|
|
|
|
|
} ## end sub do_quote |
9785
|
|
|
|
|
|
|
|
9786
|
|
|
|
|
|
|
sub follow_quoted_string { |
9787
|
|
|
|
|
|
|
|
9788
|
|
|
|
|
|
|
# scan for a specific token, skipping escaped characters |
9789
|
|
|
|
|
|
|
# if the quote character is blank, use the first non-blank character |
9790
|
|
|
|
|
|
|
# input parameters: |
9791
|
|
|
|
|
|
|
# $rtokens = reference to the array of tokens |
9792
|
|
|
|
|
|
|
# $i = the token index of the first character to search |
9793
|
|
|
|
|
|
|
# $in_quote = number of quoted strings being followed |
9794
|
|
|
|
|
|
|
# $beginning_tok = the starting quote character |
9795
|
|
|
|
|
|
|
# $quote_pos = index to check next for alphanumeric delimiter |
9796
|
|
|
|
|
|
|
# output parameters: |
9797
|
|
|
|
|
|
|
# $i = the token index of the ending quote character |
9798
|
|
|
|
|
|
|
# $in_quote = decremented if found end, unchanged if not |
9799
|
|
|
|
|
|
|
# $beginning_tok = the starting quote character |
9800
|
|
|
|
|
|
|
# $quote_pos = index to check next for alphanumeric delimiter |
9801
|
|
|
|
|
|
|
# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. |
9802
|
|
|
|
|
|
|
# $quoted_string = the text of the quote (without quotation tokens) |
9803
|
|
|
|
|
|
|
my ( |
9804
|
|
|
|
|
|
|
|
9805
|
2814
|
|
|
2814
|
0
|
6539
|
$self, |
9806
|
|
|
|
|
|
|
$i_beg, |
9807
|
|
|
|
|
|
|
$in_quote, |
9808
|
|
|
|
|
|
|
$rtokens, |
9809
|
|
|
|
|
|
|
$beginning_tok, |
9810
|
|
|
|
|
|
|
$quote_pos, |
9811
|
|
|
|
|
|
|
$quote_depth, |
9812
|
|
|
|
|
|
|
$max_token_index, |
9813
|
|
|
|
|
|
|
|
9814
|
|
|
|
|
|
|
) = @_; |
9815
|
|
|
|
|
|
|
|
9816
|
2814
|
|
|
|
|
4447
|
my ( $tok, $end_tok ); |
9817
|
2814
|
|
|
|
|
4690
|
my $i = $i_beg - 1; |
9818
|
2814
|
|
|
|
|
4248
|
my $quoted_string = EMPTY_STRING; |
9819
|
|
|
|
|
|
|
|
9820
|
2814
|
|
|
|
|
4024
|
0 && do { |
9821
|
|
|
|
|
|
|
print {*STDOUT} |
9822
|
|
|
|
|
|
|
"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; |
9823
|
|
|
|
|
|
|
}; |
9824
|
|
|
|
|
|
|
|
9825
|
|
|
|
|
|
|
# get the corresponding end token |
9826
|
2814
|
100
|
|
|
|
13276
|
if ( $beginning_tok !~ /^\s*$/ ) { |
9827
|
183
|
|
|
|
|
560
|
$end_tok = matching_end_token($beginning_tok); |
9828
|
|
|
|
|
|
|
} |
9829
|
|
|
|
|
|
|
|
9830
|
|
|
|
|
|
|
# a blank token means we must find and use the first non-blank one |
9831
|
|
|
|
|
|
|
else { |
9832
|
2631
|
100
|
|
|
|
6567
|
my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> |
9833
|
|
|
|
|
|
|
|
9834
|
2631
|
|
|
|
|
6092
|
while ( $i < $max_token_index ) { |
9835
|
2631
|
|
|
|
|
4996
|
$tok = $rtokens->[ ++$i ]; |
9836
|
|
|
|
|
|
|
|
9837
|
2631
|
50
|
|
|
|
8026
|
if ( $tok !~ /^\s*$/ ) { |
9838
|
|
|
|
|
|
|
|
9839
|
2631
|
50
|
66
|
|
|
7838
|
if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { |
9840
|
0
|
|
|
|
|
0
|
$i = $max_token_index; |
9841
|
|
|
|
|
|
|
} |
9842
|
|
|
|
|
|
|
else { |
9843
|
|
|
|
|
|
|
|
9844
|
2631
|
100
|
|
|
|
5746
|
if ( length($tok) > 1 ) { |
9845
|
1
|
50
|
|
|
|
5
|
if ( $quote_pos <= 0 ) { $quote_pos = 1 } |
|
1
|
|
|
|
|
2
|
|
9846
|
1
|
|
|
|
|
5
|
$beginning_tok = substr( $tok, $quote_pos - 1, 1 ); |
9847
|
|
|
|
|
|
|
} |
9848
|
|
|
|
|
|
|
else { |
9849
|
2630
|
|
|
|
|
5328
|
$beginning_tok = $tok; |
9850
|
2630
|
|
|
|
|
4067
|
$quote_pos = 0; |
9851
|
|
|
|
|
|
|
} |
9852
|
2631
|
|
|
|
|
6289
|
$end_tok = matching_end_token($beginning_tok); |
9853
|
2631
|
|
|
|
|
4642
|
$quote_depth = 1; |
9854
|
2631
|
|
|
|
|
4694
|
last; |
9855
|
|
|
|
|
|
|
} |
9856
|
|
|
|
|
|
|
} |
9857
|
|
|
|
|
|
|
else { |
9858
|
0
|
|
|
|
|
0
|
$allow_quote_comments = 1; |
9859
|
|
|
|
|
|
|
} |
9860
|
|
|
|
|
|
|
} |
9861
|
|
|
|
|
|
|
} |
9862
|
|
|
|
|
|
|
|
9863
|
|
|
|
|
|
|
# There are two different loops which search for the ending quote |
9864
|
|
|
|
|
|
|
# character. In the rare case of an alphanumeric quote delimiter, we |
9865
|
|
|
|
|
|
|
# have to look through alphanumeric tokens character-by-character, since |
9866
|
|
|
|
|
|
|
# the pre-tokenization process combines multiple alphanumeric |
9867
|
|
|
|
|
|
|
# characters, whereas for a non-alphanumeric delimiter, only tokens of |
9868
|
|
|
|
|
|
|
# length 1 can match. |
9869
|
|
|
|
|
|
|
|
9870
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
9871
|
|
|
|
|
|
|
# Case 1 (rare): loop for case of alphanumeric quote delimiter.. |
9872
|
|
|
|
|
|
|
# "quote_pos" is the position the current word to begin searching |
9873
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
9874
|
2814
|
100
|
|
|
|
7448
|
if ( $beginning_tok =~ /\w/ ) { |
9875
|
|
|
|
|
|
|
|
9876
|
|
|
|
|
|
|
# Note this because it is not recommended practice except |
9877
|
|
|
|
|
|
|
# for obfuscated perl contests |
9878
|
1
|
50
|
|
|
|
5
|
if ( $in_quote == 1 ) { |
9879
|
1
|
|
|
|
|
7
|
$self->write_logfile_entry( |
9880
|
|
|
|
|
|
|
"Note: alphanumeric quote delimiter ($beginning_tok) \n"); |
9881
|
|
|
|
|
|
|
} |
9882
|
|
|
|
|
|
|
|
9883
|
|
|
|
|
|
|
# Note: changed < to <= here to fix c109. Relying on extra end blanks. |
9884
|
1
|
|
|
|
|
12
|
while ( $i <= $max_token_index ) { |
9885
|
|
|
|
|
|
|
|
9886
|
4
|
100
|
66
|
|
|
15
|
if ( $quote_pos == 0 || ( $i < 0 ) ) { |
9887
|
3
|
|
|
|
|
17
|
$tok = $rtokens->[ ++$i ]; |
9888
|
|
|
|
|
|
|
|
9889
|
3
|
100
|
|
|
|
10
|
if ( $tok eq '\\' ) { |
9890
|
|
|
|
|
|
|
|
9891
|
|
|
|
|
|
|
# retain backslash unless it hides the end token |
9892
|
1
|
50
|
|
|
|
26
|
$quoted_string .= $tok |
9893
|
|
|
|
|
|
|
unless $rtokens->[ $i + 1 ] eq $end_tok; |
9894
|
1
|
|
|
|
|
2
|
$quote_pos++; |
9895
|
1
|
50
|
|
|
|
6
|
last if ( $i >= $max_token_index ); |
9896
|
1
|
|
|
|
|
7
|
$tok = $rtokens->[ ++$i ]; |
9897
|
|
|
|
|
|
|
} |
9898
|
|
|
|
|
|
|
} |
9899
|
4
|
|
|
|
|
6
|
my $old_pos = $quote_pos; |
9900
|
|
|
|
|
|
|
|
9901
|
4
|
|
|
|
|
8
|
$quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); |
9902
|
|
|
|
|
|
|
|
9903
|
4
|
100
|
|
|
|
11
|
if ( $quote_pos > 0 ) { |
9904
|
|
|
|
|
|
|
|
9905
|
1
|
|
|
|
|
5
|
$quoted_string .= |
9906
|
|
|
|
|
|
|
substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); |
9907
|
|
|
|
|
|
|
|
9908
|
|
|
|
|
|
|
# NOTE: any quote modifiers will be at the end of '$tok'. If we |
9909
|
|
|
|
|
|
|
# wanted to check them, this is the place to get them. But |
9910
|
|
|
|
|
|
|
# this quote form is rarely used in practice, so it isn't |
9911
|
|
|
|
|
|
|
# worthwhile. |
9912
|
|
|
|
|
|
|
|
9913
|
1
|
|
|
|
|
1
|
$quote_depth--; |
9914
|
|
|
|
|
|
|
|
9915
|
1
|
50
|
|
|
|
5
|
if ( $quote_depth == 0 ) { |
9916
|
1
|
|
|
|
|
2
|
$in_quote--; |
9917
|
1
|
|
|
|
|
2
|
last; |
9918
|
|
|
|
|
|
|
} |
9919
|
|
|
|
|
|
|
} |
9920
|
|
|
|
|
|
|
else { |
9921
|
3
|
50
|
|
|
|
8
|
if ( $old_pos <= length($tok) ) { |
9922
|
3
|
|
|
|
|
10
|
$quoted_string .= substr( $tok, $old_pos ); |
9923
|
|
|
|
|
|
|
} |
9924
|
|
|
|
|
|
|
} |
9925
|
|
|
|
|
|
|
} |
9926
|
|
|
|
|
|
|
} |
9927
|
|
|
|
|
|
|
|
9928
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
9929
|
|
|
|
|
|
|
# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. |
9930
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
9931
|
|
|
|
|
|
|
else { |
9932
|
|
|
|
|
|
|
|
9933
|
2813
|
|
|
|
|
6776
|
while ( $i < $max_token_index ) { |
9934
|
10798
|
|
|
|
|
16451
|
$tok = $rtokens->[ ++$i ]; |
9935
|
|
|
|
|
|
|
|
9936
|
10798
|
100
|
|
|
|
24351
|
if ( $tok eq $end_tok ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
9937
|
2620
|
|
|
|
|
4169
|
$quote_depth--; |
9938
|
|
|
|
|
|
|
|
9939
|
2620
|
100
|
|
|
|
6051
|
if ( $quote_depth == 0 ) { |
9940
|
2619
|
|
|
|
|
3745
|
$in_quote--; |
9941
|
2619
|
|
|
|
|
4151
|
last; |
9942
|
|
|
|
|
|
|
} |
9943
|
|
|
|
|
|
|
} |
9944
|
|
|
|
|
|
|
elsif ( $tok eq $beginning_tok ) { |
9945
|
1
|
|
|
|
|
12
|
$quote_depth++; |
9946
|
|
|
|
|
|
|
} |
9947
|
|
|
|
|
|
|
elsif ( $tok eq '\\' ) { |
9948
|
|
|
|
|
|
|
|
9949
|
|
|
|
|
|
|
# retain backslash unless it hides the beginning or end token |
9950
|
376
|
|
|
|
|
1003
|
$tok = $rtokens->[ ++$i ]; |
9951
|
376
|
100
|
100
|
|
|
2060
|
$quoted_string .= '\\' |
9952
|
|
|
|
|
|
|
if ( $tok ne $end_tok && $tok ne $beginning_tok ); |
9953
|
|
|
|
|
|
|
} |
9954
|
|
|
|
|
|
|
else { |
9955
|
|
|
|
|
|
|
## nothing special |
9956
|
|
|
|
|
|
|
} |
9957
|
8179
|
|
|
|
|
15531
|
$quoted_string .= $tok; |
9958
|
|
|
|
|
|
|
} |
9959
|
|
|
|
|
|
|
} |
9960
|
2814
|
50
|
|
|
|
6485
|
if ( $i > $max_token_index ) { $i = $max_token_index } |
|
0
|
|
|
|
|
0
|
|
9961
|
|
|
|
|
|
|
return ( |
9962
|
|
|
|
|
|
|
|
9963
|
2814
|
|
|
|
|
10866
|
$i, |
9964
|
|
|
|
|
|
|
$in_quote, |
9965
|
|
|
|
|
|
|
$beginning_tok, |
9966
|
|
|
|
|
|
|
$quote_pos, |
9967
|
|
|
|
|
|
|
$quote_depth, |
9968
|
|
|
|
|
|
|
$quoted_string, |
9969
|
|
|
|
|
|
|
|
9970
|
|
|
|
|
|
|
); |
9971
|
|
|
|
|
|
|
} ## end sub follow_quoted_string |
9972
|
|
|
|
|
|
|
|
9973
|
|
|
|
|
|
|
sub indicate_error { |
9974
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg, $line_number, $input_line, $pos, $carrat ) = @_; |
9975
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
9976
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
9977
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( $line_number, $input_line, $pos, |
9978
|
|
|
|
|
|
|
$carrat ); |
9979
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
9980
|
0
|
|
|
|
|
0
|
return; |
9981
|
|
|
|
|
|
|
} ## end sub indicate_error |
9982
|
|
|
|
|
|
|
|
9983
|
|
|
|
|
|
|
sub write_error_indicator_pair { |
9984
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $line_number, $input_line, $pos, $carrat ) = @_; |
9985
|
0
|
|
|
|
|
0
|
my ( $offset, $numbered_line, $underline ) = |
9986
|
|
|
|
|
|
|
make_numbered_line( $line_number, $input_line, $pos ); |
9987
|
0
|
|
|
|
|
0
|
$underline = write_on_underline( $underline, $pos - $offset, $carrat ); |
9988
|
0
|
|
|
|
|
0
|
$self->warning( $numbered_line . "\n" ); |
9989
|
0
|
|
|
|
|
0
|
$underline =~ s/\s*$//; |
9990
|
0
|
|
|
|
|
0
|
$self->warning( $underline . "\n" ); |
9991
|
0
|
|
|
|
|
0
|
return; |
9992
|
|
|
|
|
|
|
} ## end sub write_error_indicator_pair |
9993
|
|
|
|
|
|
|
|
9994
|
|
|
|
|
|
|
sub make_numbered_line { |
9995
|
|
|
|
|
|
|
|
9996
|
|
|
|
|
|
|
# Given an input line, its line number, and a character position of |
9997
|
|
|
|
|
|
|
# interest, create a string not longer than 80 characters of the form |
9998
|
|
|
|
|
|
|
# $lineno: sub_string |
9999
|
|
|
|
|
|
|
# such that the sub_string of $str contains the position of interest |
10000
|
|
|
|
|
|
|
# |
10001
|
|
|
|
|
|
|
# Here is an example of what we want, in this case we add trailing |
10002
|
|
|
|
|
|
|
# '...' because the line is long. |
10003
|
|
|
|
|
|
|
# |
10004
|
|
|
|
|
|
|
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... |
10005
|
|
|
|
|
|
|
# |
10006
|
|
|
|
|
|
|
# Here is another example, this time in which we used leading '...' |
10007
|
|
|
|
|
|
|
# because of excessive length: |
10008
|
|
|
|
|
|
|
# |
10009
|
|
|
|
|
|
|
# 2: ... er of the World Wide Web Consortium's |
10010
|
|
|
|
|
|
|
# |
10011
|
|
|
|
|
|
|
# input parameters are: |
10012
|
|
|
|
|
|
|
# $lineno = line number |
10013
|
|
|
|
|
|
|
# $str = the text of the line |
10014
|
|
|
|
|
|
|
# $pos = position of interest (the error) : 0 = first character |
10015
|
|
|
|
|
|
|
# |
10016
|
|
|
|
|
|
|
# We return : |
10017
|
|
|
|
|
|
|
# - $offset = an offset which corrects the position in case we only |
10018
|
|
|
|
|
|
|
# display part of a line, such that $pos-$offset is the effective |
10019
|
|
|
|
|
|
|
# position from the start of the displayed line. |
10020
|
|
|
|
|
|
|
# - $numbered_line = the numbered line as above, |
10021
|
|
|
|
|
|
|
# - $underline = a blank 'underline' which is all spaces with the same |
10022
|
|
|
|
|
|
|
# number of characters as the numbered line. |
10023
|
|
|
|
|
|
|
|
10024
|
0
|
|
|
0
|
0
|
0
|
my ( $lineno, $str, $pos ) = @_; |
10025
|
0
|
0
|
|
|
|
0
|
my $offset = ( $pos < 60 ) ? 0 : $pos - 40; |
10026
|
0
|
|
|
|
|
0
|
my $excess = length($str) - $offset - 68; |
10027
|
0
|
0
|
|
|
|
0
|
my $numc = ( $excess > 0 ) ? 68 : undef; |
10028
|
|
|
|
|
|
|
|
10029
|
0
|
0
|
|
|
|
0
|
if ( defined($numc) ) { |
10030
|
0
|
0
|
|
|
|
0
|
if ( $offset == 0 ) { |
10031
|
0
|
|
|
|
|
0
|
$str = substr( $str, $offset, $numc - 4 ) . " ..."; |
10032
|
|
|
|
|
|
|
} |
10033
|
|
|
|
|
|
|
else { |
10034
|
0
|
|
|
|
|
0
|
$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; |
10035
|
|
|
|
|
|
|
} |
10036
|
|
|
|
|
|
|
} |
10037
|
|
|
|
|
|
|
else { |
10038
|
|
|
|
|
|
|
|
10039
|
0
|
0
|
|
|
|
0
|
if ( $offset == 0 ) { |
10040
|
|
|
|
|
|
|
} |
10041
|
|
|
|
|
|
|
else { |
10042
|
0
|
|
|
|
|
0
|
$str = "... " . substr( $str, $offset + 4 ); |
10043
|
|
|
|
|
|
|
} |
10044
|
|
|
|
|
|
|
} |
10045
|
|
|
|
|
|
|
|
10046
|
0
|
|
|
|
|
0
|
my $numbered_line = sprintf( "%d: ", $lineno ); |
10047
|
0
|
|
|
|
|
0
|
$offset -= length($numbered_line); |
10048
|
0
|
|
|
|
|
0
|
$numbered_line .= $str; |
10049
|
0
|
|
|
|
|
0
|
my $underline = SPACE x length($numbered_line); |
10050
|
0
|
|
|
|
|
0
|
return ( $offset, $numbered_line, $underline ); |
10051
|
|
|
|
|
|
|
} ## end sub make_numbered_line |
10052
|
|
|
|
|
|
|
|
10053
|
|
|
|
|
|
|
sub write_on_underline { |
10054
|
|
|
|
|
|
|
|
10055
|
|
|
|
|
|
|
# The "underline" is a string that shows where an error is; it starts |
10056
|
|
|
|
|
|
|
# out as a string of blanks with the same length as the numbered line of |
10057
|
|
|
|
|
|
|
# code above it, and we have to add marking to show where an error is. |
10058
|
|
|
|
|
|
|
# In the example below, we want to write the string '--^' just below |
10059
|
|
|
|
|
|
|
# the line of bad code: |
10060
|
|
|
|
|
|
|
# |
10061
|
|
|
|
|
|
|
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... |
10062
|
|
|
|
|
|
|
# ---^ |
10063
|
|
|
|
|
|
|
# We are given the current underline string, plus a position and a |
10064
|
|
|
|
|
|
|
# string to write on it. |
10065
|
|
|
|
|
|
|
# |
10066
|
|
|
|
|
|
|
# In the above example, there will be 2 calls to do this: |
10067
|
|
|
|
|
|
|
# First call: $pos=19, pos_chr=^ |
10068
|
|
|
|
|
|
|
# Second call: $pos=16, pos_chr=--- |
10069
|
|
|
|
|
|
|
# |
10070
|
|
|
|
|
|
|
# This is a trivial thing to do with substr, but there is some |
10071
|
|
|
|
|
|
|
# checking to do. |
10072
|
|
|
|
|
|
|
|
10073
|
0
|
|
|
0
|
0
|
0
|
my ( $underline, $pos, $pos_chr ) = @_; |
10074
|
|
|
|
|
|
|
|
10075
|
|
|
|
|
|
|
# check for error..shouldn't happen |
10076
|
0
|
0
|
0
|
|
|
0
|
if ( $pos < 0 || $pos > length($underline) ) { |
10077
|
0
|
|
|
|
|
0
|
return $underline; |
10078
|
|
|
|
|
|
|
} |
10079
|
0
|
|
|
|
|
0
|
my $excess = length($pos_chr) + $pos - length($underline); |
10080
|
0
|
0
|
|
|
|
0
|
if ( $excess > 0 ) { |
10081
|
0
|
|
|
|
|
0
|
$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); |
10082
|
|
|
|
|
|
|
} |
10083
|
0
|
|
|
|
|
0
|
substr( $underline, $pos, length($pos_chr), $pos_chr ); |
10084
|
0
|
|
|
|
|
0
|
return ($underline); |
10085
|
|
|
|
|
|
|
} ## end sub write_on_underline |
10086
|
|
|
|
|
|
|
|
10087
|
|
|
|
|
|
|
sub pre_tokenize { |
10088
|
|
|
|
|
|
|
|
10089
|
6195
|
|
|
6195
|
0
|
12861
|
my ( $str, $max_tokens_wanted ) = @_; |
10090
|
|
|
|
|
|
|
|
10091
|
|
|
|
|
|
|
# Input parameters: |
10092
|
|
|
|
|
|
|
# $str = string to be parsed |
10093
|
|
|
|
|
|
|
# $max_tokens_wanted > 0 to stop on reaching this many tokens. |
10094
|
|
|
|
|
|
|
# = undef or 0 means get all tokens |
10095
|
|
|
|
|
|
|
|
10096
|
|
|
|
|
|
|
# Break a string, $str, into a sequence of preliminary tokens (pre-tokens). |
10097
|
|
|
|
|
|
|
# We look for these types of tokens: |
10098
|
|
|
|
|
|
|
# words (type='w'), example: 'max_tokens_wanted' |
10099
|
|
|
|
|
|
|
# digits (type = 'd'), example: '0755' |
10100
|
|
|
|
|
|
|
# whitespace (type = 'b'), example: ' ' |
10101
|
|
|
|
|
|
|
# single character punct (type = char) example: '=' |
10102
|
|
|
|
|
|
|
|
10103
|
|
|
|
|
|
|
# Later operations will combine one or more of these pre-tokens into final |
10104
|
|
|
|
|
|
|
# tokens. We cannot do better than this yet because we might be in a |
10105
|
|
|
|
|
|
|
# quoted string or pattern. |
10106
|
|
|
|
|
|
|
|
10107
|
|
|
|
|
|
|
# An advantage of doing this pre-tokenization step is that it keeps almost |
10108
|
|
|
|
|
|
|
# all of the regex parsing very simple and localized right here. A |
10109
|
|
|
|
|
|
|
# disadvantage is that in some extremely rare instances we will have to go |
10110
|
|
|
|
|
|
|
# back and split a pre-token. |
10111
|
|
|
|
|
|
|
|
10112
|
|
|
|
|
|
|
# Return parameters: |
10113
|
6195
|
|
|
|
|
10273
|
my @tokens = (); # array of the tokens themselves |
10114
|
6195
|
|
|
|
|
12682
|
my @token_map = (0); # string position of start of each token |
10115
|
6195
|
|
|
|
|
10091
|
my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct |
10116
|
|
|
|
|
|
|
|
10117
|
6195
|
100
|
|
|
|
13121
|
if ( !$max_tokens_wanted ) { $max_tokens_wanted = -1 } |
|
5910
|
|
|
|
|
9137
|
|
10118
|
|
|
|
|
|
|
|
10119
|
6195
|
|
|
|
|
14214
|
while ( $max_tokens_wanted-- ) { |
10120
|
|
|
|
|
|
|
|
10121
|
82230
|
100
|
|
|
|
229074
|
if ( |
10122
|
|
|
|
|
|
|
$str =~ m{ |
10123
|
|
|
|
|
|
|
\G( |
10124
|
|
|
|
|
|
|
(\s+) # type 'b' = whitespace - this must come before \W |
10125
|
|
|
|
|
|
|
| (\W) # or type=char = single-character, non-whitespace punct |
10126
|
|
|
|
|
|
|
| (\d+) # or type 'd' = sequence of digits - must come before \w |
10127
|
|
|
|
|
|
|
| (\w+) # or type 'w' = words not starting with a digit |
10128
|
|
|
|
|
|
|
) |
10129
|
|
|
|
|
|
|
}gcx |
10130
|
|
|
|
|
|
|
) |
10131
|
|
|
|
|
|
|
{ |
10132
|
76179
|
|
|
|
|
161064
|
push @tokens, $1; |
10133
|
76179
|
100
|
|
|
|
186872
|
push @type, |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
10134
|
|
|
|
|
|
|
defined($2) ? 'b' : defined($3) ? $1 : defined($4) ? 'd' : 'w'; |
10135
|
76179
|
|
|
|
|
150863
|
push @token_map, pos($str); |
10136
|
|
|
|
|
|
|
} |
10137
|
|
|
|
|
|
|
|
10138
|
|
|
|
|
|
|
# that's all.. |
10139
|
|
|
|
|
|
|
else { |
10140
|
6051
|
|
|
|
|
37545
|
return ( \@tokens, \@token_map, \@type ); |
10141
|
|
|
|
|
|
|
} |
10142
|
|
|
|
|
|
|
} |
10143
|
|
|
|
|
|
|
|
10144
|
144
|
|
|
|
|
917
|
return ( \@tokens, \@token_map, \@type ); |
10145
|
|
|
|
|
|
|
} ## end sub pre_tokenize |
10146
|
|
|
|
|
|
|
|
10147
|
|
|
|
|
|
|
sub show_tokens { |
10148
|
|
|
|
|
|
|
|
10149
|
|
|
|
|
|
|
# this is an old debug routine |
10150
|
|
|
|
|
|
|
# not called, but saved for reference |
10151
|
0
|
|
|
0
|
0
|
0
|
my ( $rtokens, $rtoken_map ) = @_; |
10152
|
0
|
|
|
|
|
0
|
my $num = scalar( @{$rtokens} ); |
|
0
|
|
|
|
|
0
|
|
10153
|
|
|
|
|
|
|
|
10154
|
0
|
|
|
|
|
0
|
foreach my $i ( 0 .. $num - 1 ) { |
10155
|
0
|
|
|
|
|
0
|
my $len = length( $rtokens->[$i] ); |
10156
|
0
|
|
|
|
|
0
|
print {*STDOUT} "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n"; |
|
0
|
|
|
|
|
0
|
|
10157
|
|
|
|
|
|
|
} |
10158
|
0
|
|
|
|
|
0
|
return; |
10159
|
|
|
|
|
|
|
} ## end sub show_tokens |
10160
|
|
|
|
|
|
|
|
10161
|
|
|
|
|
|
|
{ ## closure for sub matching end token |
10162
|
|
|
|
|
|
|
my %matching_end_token; |
10163
|
|
|
|
|
|
|
|
10164
|
|
|
|
|
|
|
BEGIN { |
10165
|
39
|
|
|
39
|
|
62235
|
%matching_end_token = ( |
10166
|
|
|
|
|
|
|
'{' => '}', |
10167
|
|
|
|
|
|
|
'(' => ')', |
10168
|
|
|
|
|
|
|
'[' => ']', |
10169
|
|
|
|
|
|
|
'<' => '>', |
10170
|
|
|
|
|
|
|
); |
10171
|
|
|
|
|
|
|
} ## end BEGIN |
10172
|
|
|
|
|
|
|
|
10173
|
|
|
|
|
|
|
sub matching_end_token { |
10174
|
|
|
|
|
|
|
|
10175
|
|
|
|
|
|
|
# return closing character for a pattern |
10176
|
2998
|
|
|
2998
|
0
|
4991
|
my $beginning_token = shift; |
10177
|
2998
|
100
|
|
|
|
7656
|
if ( $matching_end_token{$beginning_token} ) { |
10178
|
373
|
|
|
|
|
882
|
return $matching_end_token{$beginning_token}; |
10179
|
|
|
|
|
|
|
} |
10180
|
2625
|
|
|
|
|
5924
|
return ($beginning_token); |
10181
|
|
|
|
|
|
|
} ## end sub matching_end_token |
10182
|
|
|
|
|
|
|
} |
10183
|
|
|
|
|
|
|
|
10184
|
|
|
|
|
|
|
sub dump_token_types { |
10185
|
0
|
|
|
0
|
0
|
|
my ( $class, $fh ) = @_; |
10186
|
|
|
|
|
|
|
|
10187
|
|
|
|
|
|
|
# This should be the latest list of token types in use |
10188
|
|
|
|
|
|
|
# adding NEW_TOKENS: add a comment here |
10189
|
0
|
|
|
|
|
|
$fh->print(<<'END_OF_LIST'); |
10190
|
|
|
|
|
|
|
|
10191
|
|
|
|
|
|
|
Here is a list of the token types currently used for lines of type 'CODE'. |
10192
|
|
|
|
|
|
|
For the following tokens, the "type" of a token is just the token itself. |
10193
|
|
|
|
|
|
|
|
10194
|
|
|
|
|
|
|
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> |
10195
|
|
|
|
|
|
|
( ) <= >= == =~ !~ != ++ -- /= x= |
10196
|
|
|
|
|
|
|
... **= <<= >>= &&= ||= //= <=> |
10197
|
|
|
|
|
|
|
, + - / * | % ! x ~ = \ ? : . < > ^ & |
10198
|
|
|
|
|
|
|
|
10199
|
|
|
|
|
|
|
The following additional token types are defined: |
10200
|
|
|
|
|
|
|
|
10201
|
|
|
|
|
|
|
type meaning |
10202
|
|
|
|
|
|
|
b blank (white space) |
10203
|
|
|
|
|
|
|
{ indent: opening structural curly brace or square bracket or paren |
10204
|
|
|
|
|
|
|
(code block, anonymous hash reference, or anonymous array reference) |
10205
|
|
|
|
|
|
|
} outdent: right structural curly brace or square bracket or paren |
10206
|
|
|
|
|
|
|
[ left non-structural square bracket (enclosing an array index) |
10207
|
|
|
|
|
|
|
] right non-structural square bracket |
10208
|
|
|
|
|
|
|
( left non-structural paren (all but a list right of an =) |
10209
|
|
|
|
|
|
|
) right non-structural paren |
10210
|
|
|
|
|
|
|
L left non-structural curly brace (enclosing a key) |
10211
|
|
|
|
|
|
|
R right non-structural curly brace |
10212
|
|
|
|
|
|
|
; terminal semicolon |
10213
|
|
|
|
|
|
|
f indicates a semicolon in a "for" statement |
10214
|
|
|
|
|
|
|
h here_doc operator << |
10215
|
|
|
|
|
|
|
# a comment |
10216
|
|
|
|
|
|
|
Q indicates a quote or pattern |
10217
|
|
|
|
|
|
|
q indicates a qw quote block |
10218
|
|
|
|
|
|
|
k a perl keyword |
10219
|
|
|
|
|
|
|
C user-defined constant or constant function (with void prototype = ()) |
10220
|
|
|
|
|
|
|
U user-defined function taking parameters |
10221
|
|
|
|
|
|
|
G user-defined function taking block parameter (like grep/map/eval) |
10222
|
|
|
|
|
|
|
S sub definition (reported as type 'i' in older versions) |
10223
|
|
|
|
|
|
|
P package definition (reported as type 'i' in older versions) |
10224
|
|
|
|
|
|
|
t type indicater such as %,$,@,*,&,sub |
10225
|
|
|
|
|
|
|
w bare word (perhaps a subroutine call) |
10226
|
|
|
|
|
|
|
i identifier of some type (with leading %, $, @, *, &, sub, -> ) |
10227
|
|
|
|
|
|
|
n a number |
10228
|
|
|
|
|
|
|
v a v-string |
10229
|
|
|
|
|
|
|
F a file test operator (like -e) |
10230
|
|
|
|
|
|
|
Y File handle |
10231
|
|
|
|
|
|
|
Z identifier in indirect object slot: may be file handle, object |
10232
|
|
|
|
|
|
|
J LABEL: code block label |
10233
|
|
|
|
|
|
|
j LABEL after next, last, redo, goto |
10234
|
|
|
|
|
|
|
p unary + |
10235
|
|
|
|
|
|
|
m unary - |
10236
|
|
|
|
|
|
|
pp pre-increment operator ++ |
10237
|
|
|
|
|
|
|
mm pre-decrement operator -- |
10238
|
|
|
|
|
|
|
A : used as attribute separator |
10239
|
|
|
|
|
|
|
|
10240
|
|
|
|
|
|
|
Here are the '_line_type' codes used internally: |
10241
|
|
|
|
|
|
|
SYSTEM - system-specific code before hash-bang line |
10242
|
|
|
|
|
|
|
CODE - line of perl code (including comments) |
10243
|
|
|
|
|
|
|
POD_START - line starting pod, such as '=head' |
10244
|
|
|
|
|
|
|
POD - pod documentation text |
10245
|
|
|
|
|
|
|
POD_END - last line of pod section, '=cut' |
10246
|
|
|
|
|
|
|
HERE - text of here-document |
10247
|
|
|
|
|
|
|
HERE_END - last line of here-doc (target word) |
10248
|
|
|
|
|
|
|
FORMAT - format section |
10249
|
|
|
|
|
|
|
FORMAT_END - last line of format section, '.' |
10250
|
|
|
|
|
|
|
SKIP - code skipping section |
10251
|
|
|
|
|
|
|
SKIP_END - last line of code skipping section, '#>>V' |
10252
|
|
|
|
|
|
|
DATA_START - __DATA__ line |
10253
|
|
|
|
|
|
|
DATA - unidentified text following __DATA__ |
10254
|
|
|
|
|
|
|
END_START - __END__ line |
10255
|
|
|
|
|
|
|
END - unidentified text following __END__ |
10256
|
|
|
|
|
|
|
ERROR - we are in big trouble, probably not a perl script |
10257
|
|
|
|
|
|
|
END_OF_LIST |
10258
|
|
|
|
|
|
|
|
10259
|
0
|
|
|
|
|
|
return; |
10260
|
|
|
|
|
|
|
} ## end sub dump_token_types |
10261
|
|
|
|
|
|
|
|
10262
|
|
|
|
|
|
|
BEGIN { |
10263
|
|
|
|
|
|
|
|
10264
|
|
|
|
|
|
|
# These names are used in error messages |
10265
|
39
|
|
|
39
|
|
373
|
@opening_brace_names = qw# '{' '[' '(' '?' #; |
10266
|
39
|
|
|
|
|
294
|
@closing_brace_names = qw# '}' ']' ')' ':' #; |
10267
|
|
|
|
|
|
|
|
10268
|
39
|
|
|
|
|
161
|
my @q; |
10269
|
|
|
|
|
|
|
|
10270
|
39
|
|
|
|
|
262
|
my @digraphs = qw( |
10271
|
|
|
|
|
|
|
.. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <> |
10272
|
|
|
|
|
|
|
<= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. |
10273
|
|
|
|
|
|
|
); |
10274
|
39
|
|
|
|
|
692
|
@is_digraph{@digraphs} = (1) x scalar(@digraphs); |
10275
|
|
|
|
|
|
|
|
10276
|
39
|
|
|
|
|
237
|
@q = qw( |
10277
|
|
|
|
|
|
|
. : < > * & | / - = + - % ^ ! x ~ |
10278
|
|
|
|
|
|
|
); |
10279
|
39
|
|
|
|
|
380
|
@can_start_digraph{@q} = (1) x scalar(@q); |
10280
|
|
|
|
|
|
|
|
10281
|
39
|
|
|
|
|
183
|
my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~); |
10282
|
39
|
|
|
|
|
294
|
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); |
10283
|
|
|
|
|
|
|
|
10284
|
39
|
|
|
|
|
114
|
my @tetragraphs = qw( <<>> ); |
10285
|
39
|
|
|
|
|
194
|
@is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs); |
10286
|
|
|
|
|
|
|
|
10287
|
|
|
|
|
|
|
# make a hash of all valid token types for self-checking the tokenizer |
10288
|
|
|
|
|
|
|
# (adding NEW_TOKENS : select a new character and add to this list) |
10289
|
|
|
|
|
|
|
# fix for c250: added new token type 'P' and 'S' |
10290
|
39
|
|
|
|
|
764
|
my @valid_token_types = qw# |
10291
|
|
|
|
|
|
|
A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S |
10292
|
|
|
|
|
|
|
{ } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & |
10293
|
|
|
|
|
|
|
#; |
10294
|
39
|
|
|
|
|
313
|
push( @valid_token_types, @digraphs ); |
10295
|
39
|
|
|
|
|
228
|
push( @valid_token_types, @trigraphs ); |
10296
|
39
|
|
|
|
|
101
|
push( @valid_token_types, @tetragraphs ); |
10297
|
39
|
|
|
|
|
95
|
push( @valid_token_types, ( '#', ',', 'CORE::' ) ); |
10298
|
39
|
|
|
|
|
1734
|
@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); |
10299
|
|
|
|
|
|
|
|
10300
|
|
|
|
|
|
|
# a list of file test letters, as in -e (Table 3-4 of 'camel 3') |
10301
|
39
|
|
|
|
|
260
|
my @file_test_operators = |
10302
|
|
|
|
|
|
|
qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); |
10303
|
39
|
|
|
|
|
542
|
@is_file_test_operator{@file_test_operators} = |
10304
|
|
|
|
|
|
|
(1) x scalar(@file_test_operators); |
10305
|
|
|
|
|
|
|
|
10306
|
|
|
|
|
|
|
# these functions have prototypes of the form (&), so when they are |
10307
|
|
|
|
|
|
|
# followed by a block, that block MAY BE followed by an operator. |
10308
|
|
|
|
|
|
|
# Smartmatch operator ~~ may be followed by anonymous hash or array ref |
10309
|
39
|
|
|
|
|
178
|
@q = qw( do eval ); |
10310
|
39
|
|
|
|
|
186
|
@is_block_operator{@q} = (1) x scalar(@q); |
10311
|
|
|
|
|
|
|
|
10312
|
|
|
|
|
|
|
# these functions allow an identifier in the indirect object slot |
10313
|
39
|
|
|
|
|
112
|
@q = qw( print printf sort exec system say); |
10314
|
39
|
|
|
|
|
222
|
@is_indirect_object_taker{@q} = (1) x scalar(@q); |
10315
|
|
|
|
|
|
|
|
10316
|
|
|
|
|
|
|
# Note: 'field' will be added by sub check_options if --use-feature=class |
10317
|
39
|
|
|
|
|
128
|
@q = qw(my our state); |
10318
|
39
|
|
|
|
|
149
|
@is_my_our_state{@q} = (1) x scalar(@q); |
10319
|
|
|
|
|
|
|
|
10320
|
|
|
|
|
|
|
# These tokens may precede a code block |
10321
|
|
|
|
|
|
|
# patched for SWITCH/CASE/CATCH. Actually these could be removed |
10322
|
|
|
|
|
|
|
# now and we could let the extended-syntax coding handle them. |
10323
|
|
|
|
|
|
|
# Added 'default' for Switch::Plain. |
10324
|
|
|
|
|
|
|
# Note: 'ADJUST' will be added by sub check_options if --use-feature=class |
10325
|
39
|
|
|
|
|
255
|
@q = |
10326
|
|
|
|
|
|
|
qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else |
10327
|
|
|
|
|
|
|
unless do while until eval for foreach map grep sort |
10328
|
|
|
|
|
|
|
switch case given when default catch try finally); |
10329
|
39
|
|
|
|
|
812
|
@is_code_block_token{@q} = (1) x scalar(@q); |
10330
|
|
|
|
|
|
|
|
10331
|
|
|
|
|
|
|
# Note: this hash was formerly named '%is_not_zero_continuation_block_type' |
10332
|
|
|
|
|
|
|
# to contrast it with the block types in '%is_zero_continuation_block_type' |
10333
|
39
|
|
|
|
|
208
|
@q = qw( sort map grep eval do ); |
10334
|
39
|
|
|
|
|
160
|
@is_sort_map_grep_eval_do{@q} = (1) x scalar(@q); |
10335
|
|
|
|
|
|
|
|
10336
|
39
|
|
|
|
|
99
|
@q = qw( sort map grep ); |
10337
|
39
|
|
|
|
|
154
|
@is_sort_map_grep{@q} = (1) x scalar(@q); |
10338
|
|
|
|
|
|
|
|
10339
|
39
|
|
|
|
|
85
|
%is_grep_alias = (); |
10340
|
|
|
|
|
|
|
|
10341
|
|
|
|
|
|
|
# I'll build the list of keywords incrementally |
10342
|
39
|
|
|
|
|
93
|
my @Keywords = (); |
10343
|
|
|
|
|
|
|
|
10344
|
|
|
|
|
|
|
# keywords and tokens after which a value or pattern is expected, |
10345
|
|
|
|
|
|
|
# but not an operator. In other words, these should consume terms |
10346
|
|
|
|
|
|
|
# to their right, or at least they are not expected to be followed |
10347
|
|
|
|
|
|
|
# immediately by operators. |
10348
|
39
|
|
|
|
|
1547
|
my @value_requestor = qw( |
10349
|
|
|
|
|
|
|
AUTOLOAD |
10350
|
|
|
|
|
|
|
BEGIN |
10351
|
|
|
|
|
|
|
CHECK |
10352
|
|
|
|
|
|
|
DESTROY |
10353
|
|
|
|
|
|
|
END |
10354
|
|
|
|
|
|
|
EQ |
10355
|
|
|
|
|
|
|
GE |
10356
|
|
|
|
|
|
|
GT |
10357
|
|
|
|
|
|
|
INIT |
10358
|
|
|
|
|
|
|
LE |
10359
|
|
|
|
|
|
|
LT |
10360
|
|
|
|
|
|
|
NE |
10361
|
|
|
|
|
|
|
UNITCHECK |
10362
|
|
|
|
|
|
|
abs |
10363
|
|
|
|
|
|
|
accept |
10364
|
|
|
|
|
|
|
alarm |
10365
|
|
|
|
|
|
|
and |
10366
|
|
|
|
|
|
|
atan2 |
10367
|
|
|
|
|
|
|
bind |
10368
|
|
|
|
|
|
|
binmode |
10369
|
|
|
|
|
|
|
bless |
10370
|
|
|
|
|
|
|
break |
10371
|
|
|
|
|
|
|
caller |
10372
|
|
|
|
|
|
|
chdir |
10373
|
|
|
|
|
|
|
chmod |
10374
|
|
|
|
|
|
|
chomp |
10375
|
|
|
|
|
|
|
chop |
10376
|
|
|
|
|
|
|
chown |
10377
|
|
|
|
|
|
|
chr |
10378
|
|
|
|
|
|
|
chroot |
10379
|
|
|
|
|
|
|
close |
10380
|
|
|
|
|
|
|
closedir |
10381
|
|
|
|
|
|
|
cmp |
10382
|
|
|
|
|
|
|
connect |
10383
|
|
|
|
|
|
|
continue |
10384
|
|
|
|
|
|
|
cos |
10385
|
|
|
|
|
|
|
crypt |
10386
|
|
|
|
|
|
|
dbmclose |
10387
|
|
|
|
|
|
|
dbmopen |
10388
|
|
|
|
|
|
|
defined |
10389
|
|
|
|
|
|
|
delete |
10390
|
|
|
|
|
|
|
die |
10391
|
|
|
|
|
|
|
dump |
10392
|
|
|
|
|
|
|
each |
10393
|
|
|
|
|
|
|
else |
10394
|
|
|
|
|
|
|
elsif |
10395
|
|
|
|
|
|
|
eof |
10396
|
|
|
|
|
|
|
eq |
10397
|
|
|
|
|
|
|
evalbytes |
10398
|
|
|
|
|
|
|
exec |
10399
|
|
|
|
|
|
|
exists |
10400
|
|
|
|
|
|
|
exit |
10401
|
|
|
|
|
|
|
exp |
10402
|
|
|
|
|
|
|
fc |
10403
|
|
|
|
|
|
|
fcntl |
10404
|
|
|
|
|
|
|
fileno |
10405
|
|
|
|
|
|
|
flock |
10406
|
|
|
|
|
|
|
for |
10407
|
|
|
|
|
|
|
foreach |
10408
|
|
|
|
|
|
|
formline |
10409
|
|
|
|
|
|
|
ge |
10410
|
|
|
|
|
|
|
getc |
10411
|
|
|
|
|
|
|
getgrgid |
10412
|
|
|
|
|
|
|
getgrnam |
10413
|
|
|
|
|
|
|
gethostbyaddr |
10414
|
|
|
|
|
|
|
gethostbyname |
10415
|
|
|
|
|
|
|
getnetbyaddr |
10416
|
|
|
|
|
|
|
getnetbyname |
10417
|
|
|
|
|
|
|
getpeername |
10418
|
|
|
|
|
|
|
getpgrp |
10419
|
|
|
|
|
|
|
getpriority |
10420
|
|
|
|
|
|
|
getprotobyname |
10421
|
|
|
|
|
|
|
getprotobynumber |
10422
|
|
|
|
|
|
|
getpwnam |
10423
|
|
|
|
|
|
|
getpwuid |
10424
|
|
|
|
|
|
|
getservbyname |
10425
|
|
|
|
|
|
|
getservbyport |
10426
|
|
|
|
|
|
|
getsockname |
10427
|
|
|
|
|
|
|
getsockopt |
10428
|
|
|
|
|
|
|
glob |
10429
|
|
|
|
|
|
|
gmtime |
10430
|
|
|
|
|
|
|
goto |
10431
|
|
|
|
|
|
|
grep |
10432
|
|
|
|
|
|
|
gt |
10433
|
|
|
|
|
|
|
hex |
10434
|
|
|
|
|
|
|
if |
10435
|
|
|
|
|
|
|
index |
10436
|
|
|
|
|
|
|
int |
10437
|
|
|
|
|
|
|
ioctl |
10438
|
|
|
|
|
|
|
join |
10439
|
|
|
|
|
|
|
keys |
10440
|
|
|
|
|
|
|
kill |
10441
|
|
|
|
|
|
|
last |
10442
|
|
|
|
|
|
|
lc |
10443
|
|
|
|
|
|
|
lcfirst |
10444
|
|
|
|
|
|
|
le |
10445
|
|
|
|
|
|
|
length |
10446
|
|
|
|
|
|
|
link |
10447
|
|
|
|
|
|
|
listen |
10448
|
|
|
|
|
|
|
local |
10449
|
|
|
|
|
|
|
localtime |
10450
|
|
|
|
|
|
|
lock |
10451
|
|
|
|
|
|
|
log |
10452
|
|
|
|
|
|
|
lstat |
10453
|
|
|
|
|
|
|
lt |
10454
|
|
|
|
|
|
|
map |
10455
|
|
|
|
|
|
|
mkdir |
10456
|
|
|
|
|
|
|
msgctl |
10457
|
|
|
|
|
|
|
msgget |
10458
|
|
|
|
|
|
|
msgrcv |
10459
|
|
|
|
|
|
|
msgsnd |
10460
|
|
|
|
|
|
|
my |
10461
|
|
|
|
|
|
|
ne |
10462
|
|
|
|
|
|
|
next |
10463
|
|
|
|
|
|
|
no |
10464
|
|
|
|
|
|
|
not |
10465
|
|
|
|
|
|
|
oct |
10466
|
|
|
|
|
|
|
open |
10467
|
|
|
|
|
|
|
opendir |
10468
|
|
|
|
|
|
|
or |
10469
|
|
|
|
|
|
|
ord |
10470
|
|
|
|
|
|
|
our |
10471
|
|
|
|
|
|
|
pack |
10472
|
|
|
|
|
|
|
pipe |
10473
|
|
|
|
|
|
|
pop |
10474
|
|
|
|
|
|
|
pos |
10475
|
|
|
|
|
|
|
print |
10476
|
|
|
|
|
|
|
printf |
10477
|
|
|
|
|
|
|
prototype |
10478
|
|
|
|
|
|
|
push |
10479
|
|
|
|
|
|
|
quotemeta |
10480
|
|
|
|
|
|
|
rand |
10481
|
|
|
|
|
|
|
read |
10482
|
|
|
|
|
|
|
readdir |
10483
|
|
|
|
|
|
|
readlink |
10484
|
|
|
|
|
|
|
readline |
10485
|
|
|
|
|
|
|
readpipe |
10486
|
|
|
|
|
|
|
recv |
10487
|
|
|
|
|
|
|
redo |
10488
|
|
|
|
|
|
|
ref |
10489
|
|
|
|
|
|
|
rename |
10490
|
|
|
|
|
|
|
require |
10491
|
|
|
|
|
|
|
reset |
10492
|
|
|
|
|
|
|
return |
10493
|
|
|
|
|
|
|
reverse |
10494
|
|
|
|
|
|
|
rewinddir |
10495
|
|
|
|
|
|
|
rindex |
10496
|
|
|
|
|
|
|
rmdir |
10497
|
|
|
|
|
|
|
scalar |
10498
|
|
|
|
|
|
|
seek |
10499
|
|
|
|
|
|
|
seekdir |
10500
|
|
|
|
|
|
|
select |
10501
|
|
|
|
|
|
|
semctl |
10502
|
|
|
|
|
|
|
semget |
10503
|
|
|
|
|
|
|
semop |
10504
|
|
|
|
|
|
|
send |
10505
|
|
|
|
|
|
|
sethostent |
10506
|
|
|
|
|
|
|
setnetent |
10507
|
|
|
|
|
|
|
setpgrp |
10508
|
|
|
|
|
|
|
setpriority |
10509
|
|
|
|
|
|
|
setprotoent |
10510
|
|
|
|
|
|
|
setservent |
10511
|
|
|
|
|
|
|
setsockopt |
10512
|
|
|
|
|
|
|
shift |
10513
|
|
|
|
|
|
|
shmctl |
10514
|
|
|
|
|
|
|
shmget |
10515
|
|
|
|
|
|
|
shmread |
10516
|
|
|
|
|
|
|
shmwrite |
10517
|
|
|
|
|
|
|
shutdown |
10518
|
|
|
|
|
|
|
sin |
10519
|
|
|
|
|
|
|
sleep |
10520
|
|
|
|
|
|
|
socket |
10521
|
|
|
|
|
|
|
socketpair |
10522
|
|
|
|
|
|
|
sort |
10523
|
|
|
|
|
|
|
splice |
10524
|
|
|
|
|
|
|
split |
10525
|
|
|
|
|
|
|
sprintf |
10526
|
|
|
|
|
|
|
sqrt |
10527
|
|
|
|
|
|
|
srand |
10528
|
|
|
|
|
|
|
stat |
10529
|
|
|
|
|
|
|
state |
10530
|
|
|
|
|
|
|
study |
10531
|
|
|
|
|
|
|
substr |
10532
|
|
|
|
|
|
|
symlink |
10533
|
|
|
|
|
|
|
syscall |
10534
|
|
|
|
|
|
|
sysopen |
10535
|
|
|
|
|
|
|
sysread |
10536
|
|
|
|
|
|
|
sysseek |
10537
|
|
|
|
|
|
|
system |
10538
|
|
|
|
|
|
|
syswrite |
10539
|
|
|
|
|
|
|
tell |
10540
|
|
|
|
|
|
|
telldir |
10541
|
|
|
|
|
|
|
tie |
10542
|
|
|
|
|
|
|
tied |
10543
|
|
|
|
|
|
|
truncate |
10544
|
|
|
|
|
|
|
uc |
10545
|
|
|
|
|
|
|
ucfirst |
10546
|
|
|
|
|
|
|
umask |
10547
|
|
|
|
|
|
|
undef |
10548
|
|
|
|
|
|
|
unless |
10549
|
|
|
|
|
|
|
unlink |
10550
|
|
|
|
|
|
|
unpack |
10551
|
|
|
|
|
|
|
unshift |
10552
|
|
|
|
|
|
|
untie |
10553
|
|
|
|
|
|
|
until |
10554
|
|
|
|
|
|
|
use |
10555
|
|
|
|
|
|
|
utime |
10556
|
|
|
|
|
|
|
values |
10557
|
|
|
|
|
|
|
vec |
10558
|
|
|
|
|
|
|
waitpid |
10559
|
|
|
|
|
|
|
warn |
10560
|
|
|
|
|
|
|
while |
10561
|
|
|
|
|
|
|
write |
10562
|
|
|
|
|
|
|
xor |
10563
|
|
|
|
|
|
|
|
10564
|
|
|
|
|
|
|
switch |
10565
|
|
|
|
|
|
|
case |
10566
|
|
|
|
|
|
|
default |
10567
|
|
|
|
|
|
|
given |
10568
|
|
|
|
|
|
|
when |
10569
|
|
|
|
|
|
|
err |
10570
|
|
|
|
|
|
|
say |
10571
|
|
|
|
|
|
|
isa |
10572
|
|
|
|
|
|
|
|
10573
|
|
|
|
|
|
|
catch |
10574
|
|
|
|
|
|
|
|
10575
|
|
|
|
|
|
|
); |
10576
|
|
|
|
|
|
|
|
10577
|
|
|
|
|
|
|
# Note: 'ADJUST', 'field' are added by sub check_options |
10578
|
|
|
|
|
|
|
# if --use-feature=class |
10579
|
|
|
|
|
|
|
|
10580
|
|
|
|
|
|
|
# patched above for SWITCH/CASE given/when err say |
10581
|
|
|
|
|
|
|
# 'err' is a fairly safe addition. |
10582
|
|
|
|
|
|
|
# Added 'default' for Switch::Plain. Note that we could also have |
10583
|
|
|
|
|
|
|
# a separate set of keywords to include if we see 'use Switch::Plain' |
10584
|
39
|
|
|
|
|
1627
|
push( @Keywords, @value_requestor ); |
10585
|
|
|
|
|
|
|
|
10586
|
|
|
|
|
|
|
# These are treated the same but are not keywords: |
10587
|
39
|
|
|
|
|
148
|
my @extra_vr = qw( |
10588
|
|
|
|
|
|
|
constant |
10589
|
|
|
|
|
|
|
vars |
10590
|
|
|
|
|
|
|
); |
10591
|
39
|
|
|
|
|
281
|
push( @value_requestor, @extra_vr ); |
10592
|
|
|
|
|
|
|
|
10593
|
39
|
|
|
|
|
4772
|
@expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); |
10594
|
|
|
|
|
|
|
|
10595
|
|
|
|
|
|
|
# this list contains keywords which do not look for arguments, |
10596
|
|
|
|
|
|
|
# so that they might be followed by an operator, or at least |
10597
|
|
|
|
|
|
|
# not a term. |
10598
|
39
|
|
|
|
|
372
|
my @operator_requestor = qw( |
10599
|
|
|
|
|
|
|
endgrent |
10600
|
|
|
|
|
|
|
endhostent |
10601
|
|
|
|
|
|
|
endnetent |
10602
|
|
|
|
|
|
|
endprotoent |
10603
|
|
|
|
|
|
|
endpwent |
10604
|
|
|
|
|
|
|
endservent |
10605
|
|
|
|
|
|
|
fork |
10606
|
|
|
|
|
|
|
getgrent |
10607
|
|
|
|
|
|
|
gethostent |
10608
|
|
|
|
|
|
|
getlogin |
10609
|
|
|
|
|
|
|
getnetent |
10610
|
|
|
|
|
|
|
getppid |
10611
|
|
|
|
|
|
|
getprotoent |
10612
|
|
|
|
|
|
|
getpwent |
10613
|
|
|
|
|
|
|
getservent |
10614
|
|
|
|
|
|
|
setgrent |
10615
|
|
|
|
|
|
|
setpwent |
10616
|
|
|
|
|
|
|
time |
10617
|
|
|
|
|
|
|
times |
10618
|
|
|
|
|
|
|
wait |
10619
|
|
|
|
|
|
|
wantarray |
10620
|
|
|
|
|
|
|
); |
10621
|
|
|
|
|
|
|
|
10622
|
39
|
|
|
|
|
151
|
push( @Keywords, @operator_requestor ); |
10623
|
|
|
|
|
|
|
|
10624
|
|
|
|
|
|
|
# These are treated the same but are not considered keywords: |
10625
|
39
|
|
|
|
|
93
|
my @extra_or = qw( |
10626
|
|
|
|
|
|
|
STDERR |
10627
|
|
|
|
|
|
|
STDIN |
10628
|
|
|
|
|
|
|
STDOUT |
10629
|
|
|
|
|
|
|
); |
10630
|
|
|
|
|
|
|
|
10631
|
39
|
|
|
|
|
121
|
push( @operator_requestor, @extra_or ); |
10632
|
|
|
|
|
|
|
|
10633
|
39
|
|
|
|
|
777
|
@expecting_operator_token{@operator_requestor} = |
10634
|
|
|
|
|
|
|
(1) x scalar(@operator_requestor); |
10635
|
|
|
|
|
|
|
|
10636
|
|
|
|
|
|
|
# these token TYPES expect trailing operator but not a term |
10637
|
|
|
|
|
|
|
# note: ++ and -- are post-increment and decrement, 'C' = constant |
10638
|
39
|
|
|
|
|
170
|
my @operator_requestor_types = qw( ++ -- C <> q ); |
10639
|
|
|
|
|
|
|
|
10640
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
10641
|
39
|
|
|
|
|
178
|
@expecting_operator_types{@operator_requestor_types} = |
10642
|
|
|
|
|
|
|
(1) x scalar(@operator_requestor_types); |
10643
|
|
|
|
|
|
|
|
10644
|
|
|
|
|
|
|
# these token TYPES consume values (terms) |
10645
|
|
|
|
|
|
|
# note: pp and mm are pre-increment and decrement |
10646
|
|
|
|
|
|
|
# f=semicolon in for, F=file test operator |
10647
|
39
|
|
|
|
|
768
|
my @value_requestor_type = qw# |
10648
|
|
|
|
|
|
|
L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x |
10649
|
|
|
|
|
|
|
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= |
10650
|
|
|
|
|
|
|
<= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ <<~ |
10651
|
|
|
|
|
|
|
f F pp mm Y p m U J G j >> << ^ t |
10652
|
|
|
|
|
|
|
~. ^. |. &. ^.= |.= &.= |
10653
|
|
|
|
|
|
|
#; |
10654
|
39
|
|
|
|
|
281
|
push( @value_requestor_type, ',' ) |
10655
|
|
|
|
|
|
|
; # (perl doesn't like a ',' in a qw block) |
10656
|
|
|
|
|
|
|
|
10657
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
10658
|
39
|
|
|
|
|
1025
|
@expecting_term_types{@value_requestor_type} = |
10659
|
|
|
|
|
|
|
(1) x scalar(@value_requestor_type); |
10660
|
|
|
|
|
|
|
|
10661
|
|
|
|
|
|
|
# Note: the following valid token types are not assigned here to |
10662
|
|
|
|
|
|
|
# hashes requesting to be followed by values or terms, but are |
10663
|
|
|
|
|
|
|
# instead currently hard-coded into sub operator_expected: |
10664
|
|
|
|
|
|
|
# ) -> :: Q R Z ] b h i k n v w } # |
10665
|
|
|
|
|
|
|
|
10666
|
|
|
|
|
|
|
# For simple syntax checking, it is nice to have a list of operators which |
10667
|
|
|
|
|
|
|
# will really be unhappy if not followed by a term. This includes most |
10668
|
|
|
|
|
|
|
# of the above... |
10669
|
39
|
|
|
|
|
1292
|
@really_want_term{@value_requestor_type} = |
10670
|
|
|
|
|
|
|
(1) x scalar(@value_requestor_type); |
10671
|
|
|
|
|
|
|
|
10672
|
|
|
|
|
|
|
# with these exceptions... |
10673
|
39
|
|
|
|
|
182
|
delete $really_want_term{'U'}; # user sub, depends on prototype |
10674
|
39
|
|
|
|
|
105
|
delete $really_want_term{'F'}; # file test works on $_ if no following term |
10675
|
39
|
|
|
|
|
88
|
delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; |
10676
|
|
|
|
|
|
|
# let perl do it |
10677
|
39
|
|
|
|
|
262
|
@q = qw(q qq qx qr s y tr m); |
10678
|
39
|
|
|
|
|
239
|
@is_q_qq_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q); |
10679
|
|
|
|
|
|
|
|
10680
|
|
|
|
|
|
|
# Note added 'qw' here |
10681
|
39
|
|
|
|
|
153
|
@q = qw(q qq qw qx qr s y tr m); |
10682
|
39
|
|
|
|
|
194
|
@is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q); |
10683
|
|
|
|
|
|
|
|
10684
|
|
|
|
|
|
|
# Note: 'class' will be added by sub check_options if -use-feature=class |
10685
|
39
|
|
|
|
|
125
|
@q = qw(package); |
10686
|
39
|
|
|
|
|
164
|
@is_package{@q} = (1) x scalar(@q); |
10687
|
|
|
|
|
|
|
|
10688
|
39
|
|
|
|
|
101
|
@q = qw( if elsif unless ); |
10689
|
39
|
|
|
|
|
114
|
@is_if_elsif_unless{@q} = (1) x scalar(@q); |
10690
|
|
|
|
|
|
|
|
10691
|
39
|
|
|
|
|
108
|
@q = qw( ; t ); |
10692
|
39
|
|
|
|
|
158
|
@is_semicolon_or_t{@q} = (1) x scalar(@q); |
10693
|
|
|
|
|
|
|
|
10694
|
39
|
|
|
|
|
129
|
@q = qw( if elsif unless case when ); |
10695
|
39
|
|
|
|
|
168
|
@is_if_elsif_unless_case_when{@q} = (1) x scalar(@q); |
10696
|
|
|
|
|
|
|
|
10697
|
|
|
|
|
|
|
# Hash of other possible line endings which may occur. |
10698
|
|
|
|
|
|
|
# Keep these coordinated with the regex where this is used. |
10699
|
|
|
|
|
|
|
# Note: chr(13) = chr(015)="\r". |
10700
|
39
|
|
|
|
|
152
|
@q = ( chr(13), chr(29), chr(26) ); |
10701
|
39
|
|
|
|
|
189
|
@other_line_endings{@q} = (1) x scalar(@q); |
10702
|
|
|
|
|
|
|
|
10703
|
|
|
|
|
|
|
# These keywords are handled specially in the tokenizer code: |
10704
|
39
|
|
|
|
|
134
|
my @special_keywords = qw( |
10705
|
|
|
|
|
|
|
do |
10706
|
|
|
|
|
|
|
eval |
10707
|
|
|
|
|
|
|
format |
10708
|
|
|
|
|
|
|
m |
10709
|
|
|
|
|
|
|
package |
10710
|
|
|
|
|
|
|
q |
10711
|
|
|
|
|
|
|
qq |
10712
|
|
|
|
|
|
|
qr |
10713
|
|
|
|
|
|
|
qw |
10714
|
|
|
|
|
|
|
qx |
10715
|
|
|
|
|
|
|
s |
10716
|
|
|
|
|
|
|
sub |
10717
|
|
|
|
|
|
|
tr |
10718
|
|
|
|
|
|
|
y |
10719
|
|
|
|
|
|
|
); |
10720
|
39
|
|
|
|
|
387
|
push( @Keywords, @special_keywords ); |
10721
|
|
|
|
|
|
|
|
10722
|
|
|
|
|
|
|
# Keywords after which list formatting may be used |
10723
|
|
|
|
|
|
|
# WARNING: do not include |map|grep|eval or perl may die on |
10724
|
|
|
|
|
|
|
# syntax errors (map1.t). |
10725
|
39
|
|
|
|
|
623
|
my @keyword_taking_list = qw( |
10726
|
|
|
|
|
|
|
and |
10727
|
|
|
|
|
|
|
chmod |
10728
|
|
|
|
|
|
|
chomp |
10729
|
|
|
|
|
|
|
chop |
10730
|
|
|
|
|
|
|
chown |
10731
|
|
|
|
|
|
|
dbmopen |
10732
|
|
|
|
|
|
|
die |
10733
|
|
|
|
|
|
|
elsif |
10734
|
|
|
|
|
|
|
exec |
10735
|
|
|
|
|
|
|
fcntl |
10736
|
|
|
|
|
|
|
for |
10737
|
|
|
|
|
|
|
foreach |
10738
|
|
|
|
|
|
|
formline |
10739
|
|
|
|
|
|
|
getsockopt |
10740
|
|
|
|
|
|
|
if |
10741
|
|
|
|
|
|
|
index |
10742
|
|
|
|
|
|
|
ioctl |
10743
|
|
|
|
|
|
|
join |
10744
|
|
|
|
|
|
|
kill |
10745
|
|
|
|
|
|
|
local |
10746
|
|
|
|
|
|
|
msgctl |
10747
|
|
|
|
|
|
|
msgrcv |
10748
|
|
|
|
|
|
|
msgsnd |
10749
|
|
|
|
|
|
|
my |
10750
|
|
|
|
|
|
|
open |
10751
|
|
|
|
|
|
|
or |
10752
|
|
|
|
|
|
|
our |
10753
|
|
|
|
|
|
|
pack |
10754
|
|
|
|
|
|
|
print |
10755
|
|
|
|
|
|
|
printf |
10756
|
|
|
|
|
|
|
push |
10757
|
|
|
|
|
|
|
read |
10758
|
|
|
|
|
|
|
readpipe |
10759
|
|
|
|
|
|
|
recv |
10760
|
|
|
|
|
|
|
return |
10761
|
|
|
|
|
|
|
reverse |
10762
|
|
|
|
|
|
|
rindex |
10763
|
|
|
|
|
|
|
seek |
10764
|
|
|
|
|
|
|
select |
10765
|
|
|
|
|
|
|
semctl |
10766
|
|
|
|
|
|
|
semget |
10767
|
|
|
|
|
|
|
send |
10768
|
|
|
|
|
|
|
setpriority |
10769
|
|
|
|
|
|
|
setsockopt |
10770
|
|
|
|
|
|
|
shmctl |
10771
|
|
|
|
|
|
|
shmget |
10772
|
|
|
|
|
|
|
shmread |
10773
|
|
|
|
|
|
|
shmwrite |
10774
|
|
|
|
|
|
|
socket |
10775
|
|
|
|
|
|
|
socketpair |
10776
|
|
|
|
|
|
|
sort |
10777
|
|
|
|
|
|
|
splice |
10778
|
|
|
|
|
|
|
split |
10779
|
|
|
|
|
|
|
sprintf |
10780
|
|
|
|
|
|
|
state |
10781
|
|
|
|
|
|
|
substr |
10782
|
|
|
|
|
|
|
syscall |
10783
|
|
|
|
|
|
|
sysopen |
10784
|
|
|
|
|
|
|
sysread |
10785
|
|
|
|
|
|
|
sysseek |
10786
|
|
|
|
|
|
|
system |
10787
|
|
|
|
|
|
|
syswrite |
10788
|
|
|
|
|
|
|
tie |
10789
|
|
|
|
|
|
|
unless |
10790
|
|
|
|
|
|
|
unlink |
10791
|
|
|
|
|
|
|
unpack |
10792
|
|
|
|
|
|
|
unshift |
10793
|
|
|
|
|
|
|
until |
10794
|
|
|
|
|
|
|
vec |
10795
|
|
|
|
|
|
|
warn |
10796
|
|
|
|
|
|
|
while |
10797
|
|
|
|
|
|
|
given |
10798
|
|
|
|
|
|
|
when |
10799
|
|
|
|
|
|
|
); |
10800
|
|
|
|
|
|
|
|
10801
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
10802
|
39
|
|
|
|
|
1307
|
@is_keyword_taking_list{@keyword_taking_list} = |
10803
|
|
|
|
|
|
|
(1) x scalar(@keyword_taking_list); |
10804
|
|
|
|
|
|
|
|
10805
|
|
|
|
|
|
|
# perl functions which may be unary operators. |
10806
|
|
|
|
|
|
|
|
10807
|
|
|
|
|
|
|
# This list is used to decide if a pattern delimited by slashes, /pattern/, |
10808
|
|
|
|
|
|
|
# can follow one of these keywords. |
10809
|
39
|
|
|
|
|
218
|
@q = qw( |
10810
|
|
|
|
|
|
|
chomp eof eval fc lc pop shift uc undef |
10811
|
|
|
|
|
|
|
); |
10812
|
|
|
|
|
|
|
|
10813
|
39
|
|
|
|
|
269
|
@is_keyword_rejecting_slash_as_pattern_delimiter{@q} = |
10814
|
|
|
|
|
|
|
(1) x scalar(@q); |
10815
|
|
|
|
|
|
|
|
10816
|
|
|
|
|
|
|
# These are keywords for which an arg may optionally be omitted. They are |
10817
|
|
|
|
|
|
|
# currently only used to disambiguate a ? used as a ternary from one used |
10818
|
|
|
|
|
|
|
# as a (deprecated) pattern delimiter. In the future, they might be used |
10819
|
|
|
|
|
|
|
# to give a warning about ambiguous syntax before a /. |
10820
|
|
|
|
|
|
|
# Note: split has been omitted (see note below). |
10821
|
39
|
|
|
|
|
499
|
my @keywords_taking_optional_arg = qw( |
10822
|
|
|
|
|
|
|
abs |
10823
|
|
|
|
|
|
|
alarm |
10824
|
|
|
|
|
|
|
caller |
10825
|
|
|
|
|
|
|
chdir |
10826
|
|
|
|
|
|
|
chomp |
10827
|
|
|
|
|
|
|
chop |
10828
|
|
|
|
|
|
|
chr |
10829
|
|
|
|
|
|
|
chroot |
10830
|
|
|
|
|
|
|
close |
10831
|
|
|
|
|
|
|
cos |
10832
|
|
|
|
|
|
|
defined |
10833
|
|
|
|
|
|
|
die |
10834
|
|
|
|
|
|
|
eof |
10835
|
|
|
|
|
|
|
eval |
10836
|
|
|
|
|
|
|
evalbytes |
10837
|
|
|
|
|
|
|
exit |
10838
|
|
|
|
|
|
|
exp |
10839
|
|
|
|
|
|
|
fc |
10840
|
|
|
|
|
|
|
getc |
10841
|
|
|
|
|
|
|
glob |
10842
|
|
|
|
|
|
|
gmtime |
10843
|
|
|
|
|
|
|
hex |
10844
|
|
|
|
|
|
|
int |
10845
|
|
|
|
|
|
|
last |
10846
|
|
|
|
|
|
|
lc |
10847
|
|
|
|
|
|
|
lcfirst |
10848
|
|
|
|
|
|
|
length |
10849
|
|
|
|
|
|
|
localtime |
10850
|
|
|
|
|
|
|
log |
10851
|
|
|
|
|
|
|
lstat |
10852
|
|
|
|
|
|
|
mkdir |
10853
|
|
|
|
|
|
|
next |
10854
|
|
|
|
|
|
|
oct |
10855
|
|
|
|
|
|
|
ord |
10856
|
|
|
|
|
|
|
pop |
10857
|
|
|
|
|
|
|
pos |
10858
|
|
|
|
|
|
|
print |
10859
|
|
|
|
|
|
|
printf |
10860
|
|
|
|
|
|
|
prototype |
10861
|
|
|
|
|
|
|
quotemeta |
10862
|
|
|
|
|
|
|
rand |
10863
|
|
|
|
|
|
|
readline |
10864
|
|
|
|
|
|
|
readlink |
10865
|
|
|
|
|
|
|
readpipe |
10866
|
|
|
|
|
|
|
redo |
10867
|
|
|
|
|
|
|
ref |
10868
|
|
|
|
|
|
|
require |
10869
|
|
|
|
|
|
|
reset |
10870
|
|
|
|
|
|
|
reverse |
10871
|
|
|
|
|
|
|
rmdir |
10872
|
|
|
|
|
|
|
say |
10873
|
|
|
|
|
|
|
select |
10874
|
|
|
|
|
|
|
shift |
10875
|
|
|
|
|
|
|
sin |
10876
|
|
|
|
|
|
|
sleep |
10877
|
|
|
|
|
|
|
sqrt |
10878
|
|
|
|
|
|
|
srand |
10879
|
|
|
|
|
|
|
stat |
10880
|
|
|
|
|
|
|
study |
10881
|
|
|
|
|
|
|
tell |
10882
|
|
|
|
|
|
|
uc |
10883
|
|
|
|
|
|
|
ucfirst |
10884
|
|
|
|
|
|
|
umask |
10885
|
|
|
|
|
|
|
undef |
10886
|
|
|
|
|
|
|
unlink |
10887
|
|
|
|
|
|
|
warn |
10888
|
|
|
|
|
|
|
write |
10889
|
|
|
|
|
|
|
); |
10890
|
39
|
|
|
|
|
1074
|
@is_keyword_taking_optional_arg{@keywords_taking_optional_arg} = |
10891
|
|
|
|
|
|
|
(1) x scalar(@keywords_taking_optional_arg); |
10892
|
|
|
|
|
|
|
|
10893
|
|
|
|
|
|
|
# This list is used to decide if a pattern delimited by question marks, |
10894
|
|
|
|
|
|
|
# ?pattern?, can follow one of these keywords. Note that from perl 5.22 |
10895
|
|
|
|
|
|
|
# on, a ?pattern? is not recognized, so we can be much more strict than |
10896
|
|
|
|
|
|
|
# with a /pattern/. Note that 'split' is not in this list. In current |
10897
|
|
|
|
|
|
|
# versions of perl a question following split must be a ternary, but |
10898
|
|
|
|
|
|
|
# in older versions it could be a pattern. The guessing algorithm will |
10899
|
|
|
|
|
|
|
# decide. We are combining two lists here to simplify the test. |
10900
|
39
|
|
|
|
|
772
|
@q = ( @keywords_taking_optional_arg, @operator_requestor ); |
10901
|
39
|
|
|
|
|
1574
|
@is_keyword_rejecting_question_as_pattern_delimiter{@q} = |
10902
|
|
|
|
|
|
|
(1) x scalar(@q); |
10903
|
|
|
|
|
|
|
|
10904
|
|
|
|
|
|
|
# These are not used in any way yet |
10905
|
|
|
|
|
|
|
# my @unused_keywords = qw( |
10906
|
|
|
|
|
|
|
# __FILE__ |
10907
|
|
|
|
|
|
|
# __LINE__ |
10908
|
|
|
|
|
|
|
# __PACKAGE__ |
10909
|
|
|
|
|
|
|
# ); |
10910
|
|
|
|
|
|
|
|
10911
|
|
|
|
|
|
|
# The list of keywords was originally extracted from function 'keyword' in |
10912
|
|
|
|
|
|
|
# perl file toke.c version 5.005.03, using this utility, plus a |
10913
|
|
|
|
|
|
|
# little editing: (file getkwd.pl): |
10914
|
|
|
|
|
|
|
# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } |
10915
|
|
|
|
|
|
|
# Add 'get' prefix where necessary, then split into the above lists. |
10916
|
|
|
|
|
|
|
# This list should be updated as necessary. |
10917
|
|
|
|
|
|
|
# The list should not contain these special variables: |
10918
|
|
|
|
|
|
|
# ARGV DATA ENV SIG STDERR STDIN STDOUT |
10919
|
|
|
|
|
|
|
# __DATA__ __END__ |
10920
|
|
|
|
|
|
|
|
10921
|
39
|
|
|
|
|
9059
|
@is_keyword{@Keywords} = (1) x scalar(@Keywords); |
10922
|
|
|
|
|
|
|
} ## end BEGIN |
10923
|
|
|
|
|
|
|
1; |