line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
##################################################################### |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# The Perl::Tidy::Tokenizer package is essentially a filter which |
4
|
|
|
|
|
|
|
# reads lines of perl source code from a source object and provides |
5
|
|
|
|
|
|
|
# corresponding tokenized lines through its get_line() method. Lines |
6
|
|
|
|
|
|
|
# flow from the source_object to the caller like this: |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# source_object --> Tokenizer --> calling routine |
9
|
|
|
|
|
|
|
# get_line() get_line() line_of_tokens |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# The source object can be a STRING ref, an ARRAY ref, or an object with a |
12
|
|
|
|
|
|
|
# get_line() method which supplies one line (a character string) perl call. |
13
|
|
|
|
|
|
|
# The Tokenizer returns a reference to a data structure 'line_of_tokens' |
14
|
|
|
|
|
|
|
# containing one tokenized line for each call to its get_line() method. |
15
|
|
|
|
|
|
|
# |
16
|
|
|
|
|
|
|
# NOTE: This is not a real class. Only one tokenizer my be used. |
17
|
|
|
|
|
|
|
# |
18
|
|
|
|
|
|
|
######################################################################## |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
package Perl::Tidy::Tokenizer; |
21
|
39
|
|
|
39
|
|
306
|
use strict; |
|
39
|
|
|
|
|
109
|
|
|
39
|
|
|
|
|
1322
|
|
22
|
39
|
|
|
39
|
|
250
|
use warnings; |
|
39
|
|
|
|
|
105
|
|
|
39
|
|
|
|
|
1140
|
|
23
|
39
|
|
|
39
|
|
220
|
use English qw( -no_match_vars ); |
|
39
|
|
|
|
|
127
|
|
|
39
|
|
|
|
|
283
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our $VERSION = '20230909'; |
26
|
|
|
|
|
|
|
|
27
|
39
|
|
|
39
|
|
14644
|
use Carp; |
|
39
|
|
|
|
|
116
|
|
|
39
|
|
|
|
|
2461
|
|
28
|
|
|
|
|
|
|
|
29
|
39
|
|
|
39
|
|
311
|
use constant DEVEL_MODE => 0; |
|
39
|
|
|
|
|
132
|
|
|
39
|
|
|
|
|
2491
|
|
30
|
39
|
|
|
39
|
|
275
|
use constant EMPTY_STRING => q{}; |
|
39
|
|
|
|
|
157
|
|
|
39
|
|
|
|
|
2267
|
|
31
|
39
|
|
|
39
|
|
289
|
use constant SPACE => q{ }; |
|
39
|
|
|
|
|
117
|
|
|
39
|
|
|
|
|
2522
|
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Decimal values of some ascii characters for quick checks |
34
|
39
|
|
|
39
|
|
286
|
use constant ORD_TAB => 9; |
|
39
|
|
|
|
|
98
|
|
|
39
|
|
|
|
|
2044
|
|
35
|
39
|
|
|
39
|
|
265
|
use constant ORD_SPACE => 32; |
|
39
|
|
|
|
|
127
|
|
|
39
|
|
|
|
|
2163
|
|
36
|
39
|
|
|
39
|
|
297
|
use constant ORD_PRINTABLE_MIN => 33; |
|
39
|
|
|
|
|
123
|
|
|
39
|
|
|
|
|
2033
|
|
37
|
39
|
|
|
39
|
|
279
|
use constant ORD_PRINTABLE_MAX => 126; |
|
39
|
|
|
|
|
100
|
|
|
39
|
|
|
|
|
7971
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# GLOBAL VARIABLES which change during tokenization: |
40
|
|
|
|
|
|
|
# These could also be stored in $self but it is more convenient and |
41
|
|
|
|
|
|
|
# efficient to make them global lexical variables. |
42
|
|
|
|
|
|
|
# INITIALIZER: sub prepare_for_a_new_file |
43
|
|
|
|
|
|
|
my ( |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
$brace_depth, |
46
|
|
|
|
|
|
|
$context, |
47
|
|
|
|
|
|
|
$current_package, |
48
|
|
|
|
|
|
|
$last_nonblank_block_type, |
49
|
|
|
|
|
|
|
$last_nonblank_token, |
50
|
|
|
|
|
|
|
$last_nonblank_type, |
51
|
|
|
|
|
|
|
$next_sequence_number, |
52
|
|
|
|
|
|
|
$paren_depth, |
53
|
|
|
|
|
|
|
$rbrace_context, |
54
|
|
|
|
|
|
|
$rbrace_package, |
55
|
|
|
|
|
|
|
$rbrace_structural_type, |
56
|
|
|
|
|
|
|
$rbrace_type, |
57
|
|
|
|
|
|
|
$rcurrent_depth, |
58
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
59
|
|
|
|
|
|
|
$rdepth_array, |
60
|
|
|
|
|
|
|
$ris_block_function, |
61
|
|
|
|
|
|
|
$ris_block_list_function, |
62
|
|
|
|
|
|
|
$ris_constant, |
63
|
|
|
|
|
|
|
$ris_user_function, |
64
|
|
|
|
|
|
|
$rnested_statement_type, |
65
|
|
|
|
|
|
|
$rnested_ternary_flag, |
66
|
|
|
|
|
|
|
$rparen_semicolon_count, |
67
|
|
|
|
|
|
|
$rparen_vars, |
68
|
|
|
|
|
|
|
$rparen_type, |
69
|
|
|
|
|
|
|
$rsaw_function_definition, |
70
|
|
|
|
|
|
|
$rsaw_use_module, |
71
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
72
|
|
|
|
|
|
|
$rsquare_bracket_type, |
73
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
74
|
|
|
|
|
|
|
$rtotal_depth, |
75
|
|
|
|
|
|
|
$ruser_function_prototype, |
76
|
|
|
|
|
|
|
$square_bracket_depth, |
77
|
|
|
|
|
|
|
$statement_type, |
78
|
|
|
|
|
|
|
$total_depth, |
79
|
|
|
|
|
|
|
); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
my ( |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# GLOBAL CONSTANTS for routines in this package, |
84
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block. |
85
|
|
|
|
|
|
|
%can_start_digraph, |
86
|
|
|
|
|
|
|
%expecting_operator_token, |
87
|
|
|
|
|
|
|
%expecting_operator_types, |
88
|
|
|
|
|
|
|
%expecting_term_token, |
89
|
|
|
|
|
|
|
%expecting_term_types, |
90
|
|
|
|
|
|
|
%is_block_operator, |
91
|
|
|
|
|
|
|
%is_digraph, |
92
|
|
|
|
|
|
|
%is_file_test_operator, |
93
|
|
|
|
|
|
|
%is_if_elsif_unless, |
94
|
|
|
|
|
|
|
%is_if_elsif_unless_case_when, |
95
|
|
|
|
|
|
|
%is_indirect_object_taker, |
96
|
|
|
|
|
|
|
%is_keyword_rejecting_question_as_pattern_delimiter, |
97
|
|
|
|
|
|
|
%is_keyword_rejecting_slash_as_pattern_delimiter, |
98
|
|
|
|
|
|
|
%is_keyword_taking_list, |
99
|
|
|
|
|
|
|
%is_keyword_taking_optional_arg, |
100
|
|
|
|
|
|
|
%is_q_qq_qw_qx_qr_s_y_tr_m, |
101
|
|
|
|
|
|
|
%is_q_qq_qx_qr_s_y_tr_m, |
102
|
|
|
|
|
|
|
%is_semicolon_or_t, |
103
|
|
|
|
|
|
|
%is_sort_map_grep, |
104
|
|
|
|
|
|
|
%is_sort_map_grep_eval_do, |
105
|
|
|
|
|
|
|
%is_tetragraph, |
106
|
|
|
|
|
|
|
%is_trigraph, |
107
|
|
|
|
|
|
|
%is_valid_token_type, |
108
|
|
|
|
|
|
|
%other_line_endings, |
109
|
|
|
|
|
|
|
%really_want_term, |
110
|
|
|
|
|
|
|
@closing_brace_names, |
111
|
|
|
|
|
|
|
@opening_brace_names, |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# GLOBAL CONSTANT hash lookup table of operator expected values |
114
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block |
115
|
|
|
|
|
|
|
%op_expected_table, |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# GLOBAL VARIABLES which are constant after being configured. |
118
|
|
|
|
|
|
|
# INITIALIZER: BEGIN block and modified by sub check_options |
119
|
|
|
|
|
|
|
%is_code_block_token, |
120
|
|
|
|
|
|
|
%is_keyword, |
121
|
|
|
|
|
|
|
%is_my_our_state, |
122
|
|
|
|
|
|
|
%is_package, |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# INITIALIZER: sub check_options |
125
|
|
|
|
|
|
|
$code_skipping_pattern_begin, |
126
|
|
|
|
|
|
|
$code_skipping_pattern_end, |
127
|
|
|
|
|
|
|
$rOpts_code_skipping, |
128
|
|
|
|
|
|
|
$rOpts_code_skipping_begin, |
129
|
|
|
|
|
|
|
%is_END_DATA_format_sub, |
130
|
|
|
|
|
|
|
%is_grep_alias, |
131
|
|
|
|
|
|
|
%is_sub, |
132
|
|
|
|
|
|
|
$guess_if_method, |
133
|
|
|
|
|
|
|
); |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# possible values of operator_expected() |
136
|
39
|
|
|
39
|
|
301
|
use constant TERM => -1; |
|
39
|
|
|
|
|
1477
|
|
|
39
|
|
|
|
|
2274
|
|
137
|
39
|
|
|
39
|
|
265
|
use constant UNKNOWN => 0; |
|
39
|
|
|
|
|
138
|
|
|
39
|
|
|
|
|
2074
|
|
138
|
39
|
|
|
39
|
|
257
|
use constant OPERATOR => 1; |
|
39
|
|
|
|
|
138
|
|
|
39
|
|
|
|
|
2357
|
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# possible values of context |
141
|
39
|
|
|
39
|
|
320
|
use constant SCALAR_CONTEXT => -1; |
|
39
|
|
|
|
|
156
|
|
|
39
|
|
|
|
|
2332
|
|
142
|
39
|
|
|
39
|
|
313
|
use constant UNKNOWN_CONTEXT => 0; |
|
39
|
|
|
|
|
100
|
|
|
39
|
|
|
|
|
2013
|
|
143
|
39
|
|
|
39
|
|
265
|
use constant LIST_CONTEXT => 1; |
|
39
|
|
|
|
|
111
|
|
|
39
|
|
|
|
|
2251
|
|
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# Maximum number of little messages; probably need not be changed. |
146
|
39
|
|
|
39
|
|
339
|
use constant MAX_NAG_MESSAGES => 6; |
|
39
|
|
|
|
|
140
|
|
|
39
|
|
|
|
|
8660
|
|
147
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
0
|
BEGIN { |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# Array index names for $self. |
151
|
|
|
|
|
|
|
# Do not combine with other BEGIN blocks (c101). |
152
|
39
|
|
|
39
|
|
284081
|
my $i = 0; |
153
|
|
|
|
|
|
|
use constant { |
154
|
39
|
|
|
|
|
26373
|
_rhere_target_list_ => $i++, |
155
|
|
|
|
|
|
|
_in_here_doc_ => $i++, |
156
|
|
|
|
|
|
|
_here_doc_target_ => $i++, |
157
|
|
|
|
|
|
|
_here_quote_character_ => $i++, |
158
|
|
|
|
|
|
|
_in_data_ => $i++, |
159
|
|
|
|
|
|
|
_in_end_ => $i++, |
160
|
|
|
|
|
|
|
_in_format_ => $i++, |
161
|
|
|
|
|
|
|
_in_error_ => $i++, |
162
|
|
|
|
|
|
|
_in_trouble_ => $i++, |
163
|
|
|
|
|
|
|
_warning_count_ => $i++, |
164
|
|
|
|
|
|
|
_html_tag_count_ => $i++, |
165
|
|
|
|
|
|
|
_in_pod_ => $i++, |
166
|
|
|
|
|
|
|
_in_skipped_ => $i++, |
167
|
|
|
|
|
|
|
_in_attribute_list_ => $i++, |
168
|
|
|
|
|
|
|
_in_quote_ => $i++, |
169
|
|
|
|
|
|
|
_quote_target_ => $i++, |
170
|
|
|
|
|
|
|
_line_start_quote_ => $i++, |
171
|
|
|
|
|
|
|
_starting_level_ => $i++, |
172
|
|
|
|
|
|
|
_know_starting_level_ => $i++, |
173
|
|
|
|
|
|
|
_tabsize_ => $i++, |
174
|
|
|
|
|
|
|
_indent_columns_ => $i++, |
175
|
|
|
|
|
|
|
_look_for_hash_bang_ => $i++, |
176
|
|
|
|
|
|
|
_trim_qw_ => $i++, |
177
|
|
|
|
|
|
|
_continuation_indentation_ => $i++, |
178
|
|
|
|
|
|
|
_outdent_labels_ => $i++, |
179
|
|
|
|
|
|
|
_last_line_number_ => $i++, |
180
|
|
|
|
|
|
|
_saw_perl_dash_P_ => $i++, |
181
|
|
|
|
|
|
|
_saw_perl_dash_w_ => $i++, |
182
|
|
|
|
|
|
|
_saw_use_strict_ => $i++, |
183
|
|
|
|
|
|
|
_saw_v_string_ => $i++, |
184
|
|
|
|
|
|
|
_saw_brace_error_ => $i++, |
185
|
|
|
|
|
|
|
_hit_bug_ => $i++, |
186
|
|
|
|
|
|
|
_look_for_autoloader_ => $i++, |
187
|
|
|
|
|
|
|
_look_for_selfloader_ => $i++, |
188
|
|
|
|
|
|
|
_saw_autoloader_ => $i++, |
189
|
|
|
|
|
|
|
_saw_selfloader_ => $i++, |
190
|
|
|
|
|
|
|
_saw_hash_bang_ => $i++, |
191
|
|
|
|
|
|
|
_saw_end_ => $i++, |
192
|
|
|
|
|
|
|
_saw_data_ => $i++, |
193
|
|
|
|
|
|
|
_saw_negative_indentation_ => $i++, |
194
|
|
|
|
|
|
|
_started_tokenizing_ => $i++, |
195
|
|
|
|
|
|
|
_debugger_object_ => $i++, |
196
|
|
|
|
|
|
|
_diagnostics_object_ => $i++, |
197
|
|
|
|
|
|
|
_logger_object_ => $i++, |
198
|
|
|
|
|
|
|
_unexpected_error_count_ => $i++, |
199
|
|
|
|
|
|
|
_started_looking_for_here_target_at_ => $i++, |
200
|
|
|
|
|
|
|
_nearly_matched_here_target_at_ => $i++, |
201
|
|
|
|
|
|
|
_line_of_text_ => $i++, |
202
|
|
|
|
|
|
|
_rlower_case_labels_at_ => $i++, |
203
|
|
|
|
|
|
|
_extended_syntax_ => $i++, |
204
|
|
|
|
|
|
|
_maximum_level_ => $i++, |
205
|
|
|
|
|
|
|
_true_brace_error_count_ => $i++, |
206
|
|
|
|
|
|
|
_rOpts_maximum_level_errors_ => $i++, |
207
|
|
|
|
|
|
|
_rOpts_maximum_unexpected_errors_ => $i++, |
208
|
|
|
|
|
|
|
_rOpts_logfile_ => $i++, |
209
|
|
|
|
|
|
|
_rOpts_ => $i++, |
210
|
|
|
|
|
|
|
_rinput_lines_ => $i++, |
211
|
|
|
|
|
|
|
_input_line_index_next_ => $i++, |
212
|
39
|
|
|
39
|
|
294
|
}; |
|
39
|
|
|
|
|
111
|
|
213
|
|
|
|
|
|
|
} ## end BEGIN |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
{ ## closure for subs to count instances |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# methods to count instances |
218
|
|
|
|
|
|
|
my $_count = 0; |
219
|
0
|
|
|
0
|
0
|
0
|
sub get_count { return $_count; } |
220
|
561
|
|
|
561
|
|
2473
|
sub _increment_count { return ++$_count } |
221
|
561
|
|
|
561
|
|
1217
|
sub _decrement_count { return --$_count } |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
sub DESTROY { |
225
|
561
|
|
|
561
|
|
1441
|
my $self = shift; |
226
|
561
|
|
|
|
|
3087
|
$self->_decrement_count(); |
227
|
561
|
|
|
|
|
8561
|
return; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub AUTOLOAD { |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Catch any undefined sub calls so that we are sure to get |
233
|
|
|
|
|
|
|
# some diagnostic information. This sub should never be called |
234
|
|
|
|
|
|
|
# except for a programming error. |
235
|
0
|
|
|
0
|
|
0
|
our $AUTOLOAD; |
236
|
0
|
0
|
|
|
|
0
|
return if ( $AUTOLOAD =~ /\bDESTROY$/ ); |
237
|
0
|
|
|
|
|
0
|
my ( $pkg, $fname, $lno ) = caller(); |
238
|
0
|
|
|
|
|
0
|
my $my_package = __PACKAGE__; |
239
|
0
|
|
|
|
|
0
|
print {*STDERR} <<EOM; |
|
0
|
|
|
|
|
0
|
|
240
|
|
|
|
|
|
|
====================================================================== |
241
|
|
|
|
|
|
|
Error detected in package '$my_package', version $VERSION |
242
|
|
|
|
|
|
|
Received unexpected AUTOLOAD call for sub '$AUTOLOAD' |
243
|
|
|
|
|
|
|
Called from package: '$pkg' |
244
|
|
|
|
|
|
|
Called from File '$fname' at line '$lno' |
245
|
|
|
|
|
|
|
This error is probably due to a recent programming change |
246
|
|
|
|
|
|
|
====================================================================== |
247
|
|
|
|
|
|
|
EOM |
248
|
0
|
|
|
|
|
0
|
exit 1; |
249
|
|
|
|
|
|
|
} ## end sub AUTOLOAD |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub Die { |
252
|
0
|
|
|
0
|
0
|
0
|
my ($msg) = @_; |
253
|
0
|
|
|
|
|
0
|
Perl::Tidy::Die($msg); |
254
|
0
|
|
|
|
|
0
|
croak "unexpected return from Perl::Tidy::Die"; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub Fault { |
258
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# This routine is called for errors that really should not occur |
261
|
|
|
|
|
|
|
# except if there has been a bug introduced by a recent program change. |
262
|
|
|
|
|
|
|
# Please add comments at calls to Fault to explain why the call |
263
|
|
|
|
|
|
|
# should not occur, and where to look to fix it. |
264
|
0
|
|
|
|
|
0
|
my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); |
265
|
0
|
|
|
|
|
0
|
my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); |
266
|
0
|
|
|
|
|
0
|
my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); |
267
|
0
|
|
|
|
|
0
|
my $pkg = __PACKAGE__; |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# Catch potential error of Fault not called as a method |
270
|
0
|
|
|
|
|
0
|
my $input_stream_name; |
271
|
0
|
0
|
|
|
|
0
|
if ( !ref($self) ) { |
272
|
0
|
|
|
|
|
0
|
$msg = "Fault not called as a method - please fix\n"; |
273
|
0
|
0
|
0
|
|
|
0
|
if ( $self && length($self) < 200 ) { $msg .= $self } |
|
0
|
|
|
|
|
0
|
|
274
|
0
|
|
|
|
|
0
|
$self = undef; |
275
|
0
|
|
|
|
|
0
|
$input_stream_name = "(UNKNOWN)"; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
else { |
278
|
0
|
|
|
|
|
0
|
$input_stream_name = $self->get_input_stream_name(); |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
0
|
|
|
|
|
0
|
Die(<<EOM); |
282
|
|
|
|
|
|
|
============================================================================== |
283
|
|
|
|
|
|
|
While operating on input stream with name: '$input_stream_name' |
284
|
|
|
|
|
|
|
A fault was detected at line $line0 of sub '$subroutine1' |
285
|
|
|
|
|
|
|
in file '$filename1' |
286
|
|
|
|
|
|
|
which was called from line $line1 of sub '$subroutine2' |
287
|
|
|
|
|
|
|
Message: '$msg' |
288
|
|
|
|
|
|
|
This is probably an error introduced by a recent programming change. |
289
|
|
|
|
|
|
|
$pkg reports VERSION='$VERSION'. |
290
|
|
|
|
|
|
|
============================================================================== |
291
|
|
|
|
|
|
|
EOM |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# We shouldn't get here, but this return is to keep Perl-Critic from |
294
|
|
|
|
|
|
|
# complaining. |
295
|
0
|
|
|
|
|
0
|
return; |
296
|
|
|
|
|
|
|
} ## end sub Fault |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub make_code_skipping_pattern { |
299
|
1118
|
|
|
1118
|
0
|
3400
|
my ( $rOpts, $opt_name, $default ) = @_; |
300
|
1118
|
|
|
|
|
2594
|
my $param = $rOpts->{$opt_name}; |
301
|
1118
|
100
|
|
|
|
3016
|
if ( !$param ) { $param = $default } |
|
1116
|
|
|
|
|
2054
|
|
302
|
1118
|
|
|
|
|
4614
|
$param =~ s/^\s*//; # allow leading spaces to be like format-skipping |
303
|
1118
|
50
|
|
|
|
5200
|
if ( $param !~ /^#/ ) { |
304
|
0
|
|
|
|
|
0
|
Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n"); |
305
|
|
|
|
|
|
|
} |
306
|
1118
|
|
|
|
|
3484
|
my $pattern = '^\s*' . $param . '\b'; |
307
|
1118
|
50
|
|
|
|
3342
|
if ( Perl::Tidy::Formatter::bad_pattern($pattern) ) { |
308
|
0
|
|
|
|
|
0
|
Die( |
309
|
|
|
|
|
|
|
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n" |
310
|
|
|
|
|
|
|
); |
311
|
|
|
|
|
|
|
} |
312
|
1118
|
|
|
|
|
3777
|
return $pattern; |
313
|
|
|
|
|
|
|
} ## end sub make_code_skipping_pattern |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
sub check_options { |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# Check Tokenizer parameters |
318
|
559
|
|
|
559
|
0
|
1751
|
my $rOpts = shift; |
319
|
|
|
|
|
|
|
|
320
|
559
|
|
|
|
|
2161
|
%is_sub = (); |
321
|
559
|
|
|
|
|
1809
|
$is_sub{'sub'} = 1; |
322
|
|
|
|
|
|
|
|
323
|
559
|
|
|
|
|
4658
|
%is_END_DATA_format_sub = ( |
324
|
|
|
|
|
|
|
'__END__' => 1, |
325
|
|
|
|
|
|
|
'__DATA__' => 1, |
326
|
|
|
|
|
|
|
'format' => 1, |
327
|
|
|
|
|
|
|
'sub' => 1, |
328
|
|
|
|
|
|
|
); |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
# Install any aliases to 'sub' |
331
|
559
|
100
|
|
|
|
2355
|
if ( $rOpts->{'sub-alias-list'} ) { |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# Note that any 'sub-alias-list' has been preprocessed to |
334
|
|
|
|
|
|
|
# be a trimmed, space-separated list which includes 'sub' |
335
|
|
|
|
|
|
|
# for example, it might be 'sub method fun' |
336
|
3
|
|
|
|
|
32
|
my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'}; |
337
|
3
|
|
|
|
|
17
|
foreach my $word (@sub_alias_list) { |
338
|
11
|
|
|
|
|
24
|
$is_sub{$word} = 1; |
339
|
11
|
|
|
|
|
22
|
$is_END_DATA_format_sub{$word} = 1; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
# Set global flag to say if we have to guess if bareword 'method' is |
344
|
|
|
|
|
|
|
# a sub when 'method' is in %is_sub. This will be true unless: |
345
|
|
|
|
|
|
|
# (1) the user entered 'method' as sub alias, or |
346
|
|
|
|
|
|
|
# (2) the user set --use-feature=class |
347
|
|
|
|
|
|
|
# In these two cases we can assume that 'method' is a sub alias. |
348
|
559
|
|
|
|
|
1323
|
$guess_if_method = 1; |
349
|
559
|
100
|
|
|
|
2104
|
if ( $is_sub{'method'} ) { $guess_if_method = 0 } |
|
2
|
|
|
|
|
6
|
|
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
#------------------------------------------------ |
352
|
|
|
|
|
|
|
# Update hash values for any -use-feature options |
353
|
|
|
|
|
|
|
#------------------------------------------------ |
354
|
|
|
|
|
|
|
|
355
|
559
|
|
|
|
|
1381
|
my $use_feature_class = 1; |
356
|
559
|
50
|
|
|
|
2180
|
if ( $rOpts->{'use-feature'} ) { |
357
|
0
|
0
|
|
|
|
0
|
if ( $rOpts->{'use-feature'} =~ /\bnoclass\b/ ) { |
|
|
0
|
|
|
|
|
|
358
|
0
|
|
|
|
|
0
|
$use_feature_class = 0; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
elsif ( $rOpts->{'use-feature'} =~ /\bclass\b/ ) { |
361
|
0
|
|
|
|
|
0
|
$guess_if_method = 0; |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
else { |
364
|
|
|
|
|
|
|
## neither 'class' nor 'noclass' seen so use default |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
# These are the main updates for this option. There are additional |
369
|
|
|
|
|
|
|
# changes elsewhere, usually indicated with a comment 'rt145706' |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# Update hash values for use_feature=class, added for rt145706 |
372
|
|
|
|
|
|
|
# see 'perlclass.pod' |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
# IMPORTANT: We are changing global hash values initially set in a BEGIN |
375
|
|
|
|
|
|
|
# block. Values must be defined (true or false) for each of these new |
376
|
|
|
|
|
|
|
# words whether true or false. Otherwise, programs using the module which |
377
|
|
|
|
|
|
|
# change options between runs (such as test code) will have |
378
|
|
|
|
|
|
|
# incorrect settings and fail. |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# There are 4 new keywords: |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# 'class' - treated specially as generalization of 'package' |
383
|
|
|
|
|
|
|
# Note: we must not set 'class' to be a keyword to avoid problems |
384
|
|
|
|
|
|
|
# with older uses. |
385
|
559
|
|
|
|
|
1828
|
$is_package{'class'} = $use_feature_class; |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# 'method' - treated like sub using the sub-alias-list option |
388
|
|
|
|
|
|
|
# Note: we must not set 'method' to be a keyword to avoid problems |
389
|
|
|
|
|
|
|
# with older uses. |
390
|
559
|
50
|
|
|
|
1851
|
if ($use_feature_class) { |
391
|
559
|
|
|
|
|
1649
|
$is_sub{'method'} = 1; |
392
|
559
|
|
|
|
|
1615
|
$is_END_DATA_format_sub{'method'} = 1; |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
# 'field' - added as a keyword, and works like 'my' |
396
|
559
|
|
|
|
|
1606
|
$is_keyword{'field'} = $use_feature_class; |
397
|
559
|
|
|
|
|
1614
|
$is_my_our_state{'field'} = $use_feature_class; |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
# 'ADJUST' - added as a keyword and works like 'BEGIN' |
400
|
|
|
|
|
|
|
# TODO: if ADJUST gets a paren list, this will need to be updated |
401
|
559
|
|
|
|
|
1429
|
$is_keyword{'ADJUST'} = $use_feature_class; |
402
|
559
|
|
|
|
|
1478
|
$is_code_block_token{'ADJUST'} = $use_feature_class; |
403
|
|
|
|
|
|
|
|
404
|
559
|
|
|
|
|
2079
|
%is_grep_alias = (); |
405
|
559
|
50
|
|
|
|
2077
|
if ( $rOpts->{'grep-alias-list'} ) { |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# Note that 'grep-alias-list' has been preprocessed to be a trimmed, |
408
|
|
|
|
|
|
|
# space-separated list |
409
|
559
|
|
|
|
|
5709
|
my @q = split /\s+/, $rOpts->{'grep-alias-list'}; |
410
|
559
|
|
|
|
|
5057
|
@{is_grep_alias}{@q} = (1) x scalar(@q); |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
|
413
|
559
|
|
|
|
|
2000
|
$rOpts_code_skipping = $rOpts->{'code-skipping'}; |
414
|
559
|
|
|
|
|
1449
|
$rOpts_code_skipping_begin = $rOpts->{'code-skipping-begin'}; |
415
|
559
|
|
|
|
|
2862
|
$code_skipping_pattern_begin = |
416
|
|
|
|
|
|
|
make_code_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' ); |
417
|
559
|
|
|
|
|
2116
|
$code_skipping_pattern_end = |
418
|
|
|
|
|
|
|
make_code_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' ); |
419
|
|
|
|
|
|
|
|
420
|
559
|
|
|
|
|
2161
|
return; |
421
|
|
|
|
|
|
|
} ## end sub check_options |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub new { |
424
|
|
|
|
|
|
|
|
425
|
561
|
|
|
561
|
0
|
5686
|
my ( $class, @args ) = @_; |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# Note: 'tabs' and 'indent_columns' are temporary and should be |
428
|
|
|
|
|
|
|
# removed asap |
429
|
561
|
|
|
|
|
9472
|
my %defaults = ( |
430
|
|
|
|
|
|
|
source_object => undef, |
431
|
|
|
|
|
|
|
debugger_object => undef, |
432
|
|
|
|
|
|
|
diagnostics_object => undef, |
433
|
|
|
|
|
|
|
logger_object => undef, |
434
|
|
|
|
|
|
|
starting_level => undef, |
435
|
|
|
|
|
|
|
indent_columns => 4, |
436
|
|
|
|
|
|
|
tabsize => 8, |
437
|
|
|
|
|
|
|
look_for_hash_bang => 0, |
438
|
|
|
|
|
|
|
trim_qw => 1, |
439
|
|
|
|
|
|
|
look_for_autoloader => 1, |
440
|
|
|
|
|
|
|
look_for_selfloader => 1, |
441
|
|
|
|
|
|
|
starting_line_number => 1, |
442
|
|
|
|
|
|
|
extended_syntax => 0, |
443
|
|
|
|
|
|
|
rOpts => {}, |
444
|
|
|
|
|
|
|
); |
445
|
561
|
|
|
|
|
7570
|
my %args = ( %defaults, @args ); |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
# we are given an object with a get_line() method to supply source lines |
448
|
561
|
|
|
|
|
2327
|
my $source_object = $args{source_object}; |
449
|
561
|
|
|
|
|
1952
|
my $rOpts = $args{rOpts}; |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# Check call args |
452
|
561
|
50
|
|
|
|
2165
|
if ( !defined($source_object) ) { |
453
|
0
|
|
|
|
|
0
|
Die( |
454
|
|
|
|
|
|
|
"Perl::Tidy::Tokenizer::new called without a 'source_object' parameter\n" |
455
|
|
|
|
|
|
|
); |
456
|
|
|
|
|
|
|
} |
457
|
561
|
50
|
|
|
|
2187
|
if ( !ref($source_object) ) { |
458
|
0
|
|
|
|
|
0
|
Die(<<EOM); |
459
|
|
|
|
|
|
|
sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference; |
460
|
|
|
|
|
|
|
'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method |
461
|
|
|
|
|
|
|
EOM |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
# Tokenizer state data is as follows: |
465
|
|
|
|
|
|
|
# _rhere_target_list_ reference to list of here-doc targets |
466
|
|
|
|
|
|
|
# _here_doc_target_ the target string for a here document |
467
|
|
|
|
|
|
|
# _here_quote_character_ the type of here-doc quoting (" ' ` or none) |
468
|
|
|
|
|
|
|
# to determine if interpolation is done |
469
|
|
|
|
|
|
|
# _quote_target_ character we seek if chasing a quote |
470
|
|
|
|
|
|
|
# _line_start_quote_ line where we started looking for a long quote |
471
|
|
|
|
|
|
|
# _in_here_doc_ flag indicating if we are in a here-doc |
472
|
|
|
|
|
|
|
# _in_pod_ flag set if we are in pod documentation |
473
|
|
|
|
|
|
|
# _in_skipped_ flag set if we are in a skipped section |
474
|
|
|
|
|
|
|
# _in_error_ flag set if we saw severe error (binary in script) |
475
|
|
|
|
|
|
|
# _in_trouble_ set if we saw a troublesome lexical like 'my sub s' |
476
|
|
|
|
|
|
|
# _warning_count_ number of calls to logger sub warning |
477
|
|
|
|
|
|
|
# _html_tag_count_ number of apparent html tags seen (indicates html) |
478
|
|
|
|
|
|
|
# _in_data_ flag set if we are in __DATA__ section |
479
|
|
|
|
|
|
|
# _in_end_ flag set if we are in __END__ section |
480
|
|
|
|
|
|
|
# _in_format_ flag set if we are in a format description |
481
|
|
|
|
|
|
|
# _in_attribute_list_ flag telling if we are looking for attributes |
482
|
|
|
|
|
|
|
# _in_quote_ flag telling if we are chasing a quote |
483
|
|
|
|
|
|
|
# _starting_level_ indentation level of first line |
484
|
|
|
|
|
|
|
# _diagnostics_object_ place to write debugging information |
485
|
|
|
|
|
|
|
# _unexpected_error_count_ error count used to limit output |
486
|
|
|
|
|
|
|
# _lower_case_labels_at_ line numbers where lower case labels seen |
487
|
|
|
|
|
|
|
# _hit_bug_ program bug detected |
488
|
|
|
|
|
|
|
|
489
|
561
|
|
|
|
|
1432
|
my $self = []; |
490
|
561
|
|
|
|
|
1775
|
$self->[_rhere_target_list_] = []; |
491
|
561
|
|
|
|
|
1427
|
$self->[_in_here_doc_] = 0; |
492
|
561
|
|
|
|
|
1651
|
$self->[_here_doc_target_] = EMPTY_STRING; |
493
|
561
|
|
|
|
|
1423
|
$self->[_here_quote_character_] = EMPTY_STRING; |
494
|
561
|
|
|
|
|
1480
|
$self->[_in_data_] = 0; |
495
|
561
|
|
|
|
|
1531
|
$self->[_in_end_] = 0; |
496
|
561
|
|
|
|
|
1549
|
$self->[_in_format_] = 0; |
497
|
561
|
|
|
|
|
1350
|
$self->[_in_error_] = 0; |
498
|
561
|
|
|
|
|
1376
|
$self->[_in_trouble_] = 0; |
499
|
561
|
|
|
|
|
1224
|
$self->[_warning_count_] = 0; |
500
|
561
|
|
|
|
|
1318
|
$self->[_html_tag_count_] = 0; |
501
|
561
|
|
|
|
|
1309
|
$self->[_in_pod_] = 0; |
502
|
561
|
|
|
|
|
1383
|
$self->[_in_skipped_] = 0; |
503
|
561
|
|
|
|
|
1221
|
$self->[_in_attribute_list_] = 0; |
504
|
561
|
|
|
|
|
1260
|
$self->[_in_quote_] = 0; |
505
|
561
|
|
|
|
|
1468
|
$self->[_quote_target_] = EMPTY_STRING; |
506
|
561
|
|
|
|
|
1348
|
$self->[_line_start_quote_] = -1; |
507
|
561
|
|
|
|
|
1436
|
$self->[_starting_level_] = $args{starting_level}; |
508
|
561
|
|
|
|
|
1839
|
$self->[_know_starting_level_] = defined( $args{starting_level} ); |
509
|
561
|
|
|
|
|
1548
|
$self->[_tabsize_] = $args{tabsize}; |
510
|
561
|
|
|
|
|
1871
|
$self->[_indent_columns_] = $args{indent_columns}; |
511
|
561
|
|
|
|
|
1347
|
$self->[_look_for_hash_bang_] = $args{look_for_hash_bang}; |
512
|
561
|
|
|
|
|
1461
|
$self->[_trim_qw_] = $args{trim_qw}; |
513
|
561
|
|
|
|
|
1461
|
$self->[_continuation_indentation_] = $args{continuation_indentation}; |
514
|
561
|
|
|
|
|
1390
|
$self->[_outdent_labels_] = $args{outdent_labels}; |
515
|
561
|
|
|
|
|
1515
|
$self->[_last_line_number_] = $args{starting_line_number} - 1; |
516
|
561
|
|
|
|
|
1329
|
$self->[_saw_perl_dash_P_] = 0; |
517
|
561
|
|
|
|
|
1475
|
$self->[_saw_perl_dash_w_] = 0; |
518
|
561
|
|
|
|
|
1195
|
$self->[_saw_use_strict_] = 0; |
519
|
561
|
|
|
|
|
1187
|
$self->[_saw_v_string_] = 0; |
520
|
561
|
|
|
|
|
1214
|
$self->[_saw_brace_error_] = 0; |
521
|
561
|
|
|
|
|
1221
|
$self->[_hit_bug_] = 0; |
522
|
561
|
|
|
|
|
1240
|
$self->[_look_for_autoloader_] = $args{look_for_autoloader}; |
523
|
561
|
|
|
|
|
1543
|
$self->[_look_for_selfloader_] = $args{look_for_selfloader}; |
524
|
561
|
|
|
|
|
1428
|
$self->[_saw_autoloader_] = 0; |
525
|
561
|
|
|
|
|
1190
|
$self->[_saw_selfloader_] = 0; |
526
|
561
|
|
|
|
|
1282
|
$self->[_saw_hash_bang_] = 0; |
527
|
561
|
|
|
|
|
1163
|
$self->[_saw_end_] = 0; |
528
|
561
|
|
|
|
|
1188
|
$self->[_saw_data_] = 0; |
529
|
561
|
|
|
|
|
1190
|
$self->[_saw_negative_indentation_] = 0; |
530
|
561
|
|
|
|
|
1438
|
$self->[_started_tokenizing_] = 0; |
531
|
561
|
|
|
|
|
1343
|
$self->[_debugger_object_] = $args{debugger_object}; |
532
|
561
|
|
|
|
|
1335
|
$self->[_diagnostics_object_] = $args{diagnostics_object}; |
533
|
561
|
|
|
|
|
1312
|
$self->[_logger_object_] = $args{logger_object}; |
534
|
561
|
|
|
|
|
1407
|
$self->[_unexpected_error_count_] = 0; |
535
|
561
|
|
|
|
|
1242
|
$self->[_started_looking_for_here_target_at_] = 0; |
536
|
561
|
|
|
|
|
1228
|
$self->[_nearly_matched_here_target_at_] = undef; |
537
|
561
|
|
|
|
|
1219
|
$self->[_line_of_text_] = EMPTY_STRING; |
538
|
561
|
|
|
|
|
1469
|
$self->[_rlower_case_labels_at_] = undef; |
539
|
561
|
|
|
|
|
1497
|
$self->[_extended_syntax_] = $args{extended_syntax}; |
540
|
561
|
|
|
|
|
1515
|
$self->[_maximum_level_] = 0; |
541
|
561
|
|
|
|
|
1234
|
$self->[_true_brace_error_count_] = 0; |
542
|
561
|
|
|
|
|
1425
|
$self->[_rOpts_maximum_level_errors_] = $rOpts->{'maximum-level-errors'}; |
543
|
|
|
|
|
|
|
$self->[_rOpts_maximum_unexpected_errors_] = |
544
|
561
|
|
|
|
|
1486
|
$rOpts->{'maximum-unexpected-errors'}; |
545
|
561
|
|
|
|
|
1572
|
$self->[_rOpts_logfile_] = $rOpts->{'logfile'}; |
546
|
561
|
|
|
|
|
1573
|
$self->[_rOpts_] = $rOpts; |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
# These vars are used for guessing indentation and must be positive |
549
|
561
|
50
|
|
|
|
2116
|
$self->[_tabsize_] = 8 if ( !$self->[_tabsize_] ); |
550
|
561
|
100
|
|
|
|
2111
|
$self->[_indent_columns_] = 4 if ( !$self->[_indent_columns_] ); |
551
|
|
|
|
|
|
|
|
552
|
561
|
|
|
|
|
1480
|
bless $self, $class; |
553
|
|
|
|
|
|
|
|
554
|
561
|
|
|
|
|
3744
|
$self->prepare_for_a_new_file($source_object); |
555
|
561
|
|
|
|
|
3477
|
$self->find_starting_indentation_level(); |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
# This is not a full class yet, so die if an attempt is made to |
558
|
|
|
|
|
|
|
# create more than one object. |
559
|
|
|
|
|
|
|
|
560
|
561
|
50
|
|
|
|
2747
|
if ( _increment_count() > 1 ) { |
561
|
0
|
|
|
|
|
0
|
confess |
562
|
|
|
|
|
|
|
"Attempt to create more than 1 object in $class, which is not a true class yet\n"; |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
|
565
|
561
|
|
|
|
|
6418
|
return $self; |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
} ## end sub new |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# Called externally |
570
|
|
|
|
|
|
|
sub get_unexpected_error_count { |
571
|
4
|
|
|
4
|
0
|
17
|
my ($self) = @_; |
572
|
4
|
|
|
|
|
18
|
return $self->[_unexpected_error_count_]; |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
# Called externally |
576
|
|
|
|
|
|
|
sub is_keyword { |
577
|
2791
|
|
|
2791
|
0
|
5126
|
my ($str) = @_; |
578
|
2791
|
|
|
|
|
10244
|
return $is_keyword{$str}; |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
582
|
|
|
|
|
|
|
# Line input routines, previously handled by the LineBuffer class |
583
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
584
|
|
|
|
|
|
|
sub make_source_array { |
585
|
|
|
|
|
|
|
|
586
|
561
|
|
|
561
|
0
|
1788
|
my ( $self, $line_source_object ) = @_; |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
# Convert the source into an array of lines |
589
|
561
|
|
|
|
|
1332
|
my $rinput_lines = []; |
590
|
|
|
|
|
|
|
|
591
|
561
|
|
|
|
|
1529
|
my $rsource = ref($line_source_object); |
592
|
|
|
|
|
|
|
|
593
|
561
|
50
|
|
|
|
3719
|
if ( !$rsource ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
# shouldn't happen: this should have been checked in sub new |
596
|
0
|
|
|
|
|
0
|
$self->Fault(<<EOM); |
597
|
|
|
|
|
|
|
sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference; |
598
|
|
|
|
|
|
|
'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method |
599
|
|
|
|
|
|
|
EOM |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
# handle an ARRAY ref |
603
|
|
|
|
|
|
|
elsif ( $rsource eq 'ARRAY' ) { |
604
|
0
|
|
|
|
|
0
|
$rinput_lines = $line_source_object; |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
# handle a SCALAR ref |
608
|
|
|
|
|
|
|
elsif ( $rsource eq 'SCALAR' ) { |
609
|
561
|
|
|
|
|
974
|
my @lines = split /^/, ${$line_source_object}; |
|
561
|
|
|
|
|
9070
|
|
610
|
561
|
|
|
|
|
2442
|
$rinput_lines = \@lines; |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
# handle an object - must have a get_line method |
614
|
|
|
|
|
|
|
else { |
615
|
0
|
|
|
|
|
0
|
while ( my $line = $line_source_object->get_line() ) { |
616
|
0
|
|
|
|
|
0
|
push( @{$rinput_lines}, $line ); |
|
0
|
|
|
|
|
0
|
|
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
561
|
|
|
|
|
2245
|
$self->[_rinput_lines_] = $rinput_lines; |
621
|
561
|
|
|
|
|
1574
|
$self->[_input_line_index_next_] = 0; |
622
|
561
|
|
|
|
|
1310
|
return; |
623
|
|
|
|
|
|
|
} ## end sub make_source_array |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub peek_ahead { |
626
|
1234
|
|
|
1234
|
0
|
2926
|
my ( $self, $buffer_index ) = @_; |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
# look $buffer_index lines ahead of the current location without disturbing |
629
|
|
|
|
|
|
|
# the input |
630
|
1234
|
|
|
|
|
2014
|
my $line; |
631
|
1234
|
|
|
|
|
2429
|
my $rinput_lines = $self->[_rinput_lines_]; |
632
|
1234
|
|
|
|
|
2534
|
my $line_index = $buffer_index + $self->[_input_line_index_next_]; |
633
|
1234
|
100
|
|
|
|
2091
|
if ( $line_index < @{$rinput_lines} ) { |
|
1234
|
|
|
|
|
3595
|
|
634
|
1222
|
|
|
|
|
2607
|
$line = $rinput_lines->[$line_index]; |
635
|
|
|
|
|
|
|
} |
636
|
1234
|
|
|
|
|
4657
|
return $line; |
637
|
|
|
|
|
|
|
} ## end sub peek_ahead |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
#----------------------------------------- |
640
|
|
|
|
|
|
|
# interface to Perl::Tidy::Logger routines |
641
|
|
|
|
|
|
|
#----------------------------------------- |
642
|
|
|
|
|
|
|
sub warning { |
643
|
|
|
|
|
|
|
|
644
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
645
|
|
|
|
|
|
|
|
646
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
647
|
0
|
|
|
|
|
0
|
$self->[_warning_count_]++; |
648
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
649
|
0
|
|
|
|
|
0
|
my $msg_line_number = $self->[_last_line_number_]; |
650
|
0
|
|
|
|
|
0
|
$logger_object->warning( $msg, $msg_line_number ); |
651
|
|
|
|
|
|
|
} |
652
|
0
|
|
|
|
|
0
|
return; |
653
|
|
|
|
|
|
|
} ## end sub warning |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
sub get_input_stream_name { |
656
|
|
|
|
|
|
|
|
657
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
658
|
|
|
|
|
|
|
|
659
|
0
|
|
|
|
|
0
|
my $input_stream_name = EMPTY_STRING; |
660
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
661
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
662
|
0
|
|
|
|
|
0
|
$input_stream_name = $logger_object->get_input_stream_name(); |
663
|
|
|
|
|
|
|
} |
664
|
0
|
|
|
|
|
0
|
return $input_stream_name; |
665
|
|
|
|
|
|
|
} ## end sub get_input_stream_name |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub complain { |
668
|
|
|
|
|
|
|
|
669
|
32
|
|
|
32
|
0
|
112
|
my ( $self, $msg ) = @_; |
670
|
|
|
|
|
|
|
|
671
|
32
|
|
|
|
|
80
|
my $logger_object = $self->[_logger_object_]; |
672
|
32
|
50
|
|
|
|
101
|
if ($logger_object) { |
673
|
32
|
|
|
|
|
72
|
my $input_line_number = $self->[_last_line_number_]; |
674
|
32
|
|
|
|
|
185
|
$logger_object->complain( $msg, $input_line_number ); |
675
|
|
|
|
|
|
|
} |
676
|
32
|
|
|
|
|
82
|
return; |
677
|
|
|
|
|
|
|
} ## end sub complain |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
sub write_logfile_entry { |
680
|
|
|
|
|
|
|
|
681
|
1857
|
|
|
1857
|
0
|
4409
|
my ( $self, $msg ) = @_; |
682
|
|
|
|
|
|
|
|
683
|
1857
|
|
|
|
|
3529
|
my $logger_object = $self->[_logger_object_]; |
684
|
1857
|
100
|
|
|
|
4484
|
if ($logger_object) { |
685
|
1851
|
|
|
|
|
5579
|
$logger_object->write_logfile_entry($msg); |
686
|
|
|
|
|
|
|
} |
687
|
1857
|
|
|
|
|
5683
|
return; |
688
|
|
|
|
|
|
|
} ## end sub write_logfile_entry |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
sub interrupt_logfile { |
691
|
|
|
|
|
|
|
|
692
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
693
|
|
|
|
|
|
|
|
694
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
695
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
696
|
0
|
|
|
|
|
0
|
$logger_object->interrupt_logfile(); |
697
|
|
|
|
|
|
|
} |
698
|
0
|
|
|
|
|
0
|
return; |
699
|
|
|
|
|
|
|
} ## end sub interrupt_logfile |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
sub resume_logfile { |
702
|
|
|
|
|
|
|
|
703
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
704
|
|
|
|
|
|
|
|
705
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
706
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
707
|
0
|
|
|
|
|
0
|
$logger_object->resume_logfile(); |
708
|
|
|
|
|
|
|
} |
709
|
0
|
|
|
|
|
0
|
return; |
710
|
|
|
|
|
|
|
} ## end sub resume_logfile |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
sub brace_warning { |
713
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
714
|
0
|
|
|
|
|
0
|
$self->[_saw_brace_error_]++; |
715
|
|
|
|
|
|
|
|
716
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
717
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
718
|
0
|
|
|
|
|
0
|
my $msg_line_number = $self->[_last_line_number_]; |
719
|
0
|
|
|
|
|
0
|
$logger_object->brace_warning( $msg, $msg_line_number ); |
720
|
|
|
|
|
|
|
} |
721
|
0
|
|
|
|
|
0
|
return; |
722
|
|
|
|
|
|
|
} ## end sub brace_warning |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
sub increment_brace_error { |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
# This is same as sub brace_warning but without a message |
727
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
728
|
0
|
|
|
|
|
0
|
$self->[_saw_brace_error_]++; |
729
|
|
|
|
|
|
|
|
730
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
731
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
732
|
0
|
|
|
|
|
0
|
$logger_object->increment_brace_error(); |
733
|
|
|
|
|
|
|
} |
734
|
0
|
|
|
|
|
0
|
return; |
735
|
|
|
|
|
|
|
} ## end sub increment_brace_error |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
sub get_saw_brace_error { |
738
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
739
|
0
|
|
|
|
|
0
|
return $self->[_saw_brace_error_]; |
740
|
|
|
|
|
|
|
} ## end sub get_saw_brace_error |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
sub report_definite_bug { |
743
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
744
|
0
|
|
|
|
|
0
|
$self->[_hit_bug_] = 1; |
745
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
746
|
0
|
0
|
|
|
|
0
|
if ($logger_object) { |
747
|
0
|
|
|
|
|
0
|
$logger_object->report_definite_bug(); |
748
|
|
|
|
|
|
|
} |
749
|
0
|
|
|
|
|
0
|
return; |
750
|
|
|
|
|
|
|
} ## end sub report_definite_bug |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
#------------------------------------- |
753
|
|
|
|
|
|
|
# Interface to Perl::Tidy::Diagnostics |
754
|
|
|
|
|
|
|
#------------------------------------- |
755
|
|
|
|
|
|
|
sub write_diagnostics { |
756
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg ) = @_; |
757
|
0
|
|
|
|
|
0
|
my $input_line_number = $self->[_last_line_number_]; |
758
|
0
|
|
|
|
|
0
|
my $diagnostics_object = $self->[_diagnostics_object_]; |
759
|
0
|
0
|
|
|
|
0
|
if ($diagnostics_object) { |
760
|
0
|
|
|
|
|
0
|
$diagnostics_object->write_diagnostics( $msg, $input_line_number ); |
761
|
|
|
|
|
|
|
} |
762
|
0
|
|
|
|
|
0
|
return; |
763
|
|
|
|
|
|
|
} ## end sub write_diagnostics |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
sub report_tokenization_errors { |
766
|
|
|
|
|
|
|
|
767
|
561
|
|
|
561
|
0
|
1990
|
my ($self) = @_; |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
# Report any tokenization errors and return a flag '$severe_error'. |
770
|
|
|
|
|
|
|
# Set $severe_error = 1 if the tokenization errors are so severe that |
771
|
|
|
|
|
|
|
# the formatter should not attempt to format the file. Instead, it will |
772
|
|
|
|
|
|
|
# just output the file verbatim. |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
# set severe error flag if tokenizer has encountered file reading problems |
775
|
|
|
|
|
|
|
# (i.e. unexpected binary characters) |
776
|
|
|
|
|
|
|
# or code which may not be formatted correctly (such as 'my sub q') |
777
|
|
|
|
|
|
|
# The difference between _in_error_ and _in_trouble_ is that |
778
|
|
|
|
|
|
|
# _in_error_ stops the tokenizer immediately whereas |
779
|
|
|
|
|
|
|
# _in_trouble_ lets the tokenizer finish so that all errors are seen |
780
|
|
|
|
|
|
|
# Both block formatting and cause the input stream to be output verbatim. |
781
|
561
|
|
33
|
|
|
3403
|
my $severe_error = $self->[_in_error_] || $self->[_in_trouble_]; |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
# And do not format if it looks like an html file (c209) |
784
|
561
|
|
33
|
|
|
4248
|
$severe_error ||= $self->[_html_tag_count_] && $self->[_warning_count_]; |
|
|
|
33
|
|
|
|
|
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
# Inform the logger object on length of input stream |
787
|
561
|
|
|
|
|
1544
|
my $logger_object = $self->[_logger_object_]; |
788
|
561
|
100
|
|
|
|
1985
|
if ($logger_object) { |
789
|
559
|
|
|
|
|
1486
|
my $last_line_number = $self->[_last_line_number_]; |
790
|
559
|
|
|
|
|
2955
|
$logger_object->set_last_input_line_number($last_line_number); |
791
|
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
|
793
|
561
|
|
|
|
|
1645
|
my $maxle = $self->[_rOpts_maximum_level_errors_]; |
794
|
561
|
|
|
|
|
1410
|
my $maxue = $self->[_rOpts_maximum_unexpected_errors_]; |
795
|
561
|
50
|
|
|
|
1835
|
$maxle = 1 unless defined($maxle); |
796
|
561
|
50
|
|
|
|
1898
|
$maxue = 0 unless defined($maxue); |
797
|
|
|
|
|
|
|
|
798
|
561
|
|
|
|
|
2396
|
my $level = get_indentation_level(); |
799
|
561
|
50
|
|
|
|
2263
|
if ( $level != $self->[_starting_level_] ) { |
800
|
0
|
|
|
|
|
0
|
$self->warning("final indentation level: $level\n"); |
801
|
0
|
|
|
|
|
0
|
my $level_diff = $self->[_starting_level_] - $level; |
802
|
0
|
0
|
|
|
|
0
|
if ( $level_diff < 0 ) { $level_diff = -$level_diff } |
|
0
|
|
|
|
|
0
|
|
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
# Set severe error flag if the level error is greater than 1. |
805
|
|
|
|
|
|
|
# The formatter can function for any level error but it is probably |
806
|
|
|
|
|
|
|
# best not to attempt formatting for a high level error. |
807
|
0
|
0
|
0
|
|
|
0
|
if ( $maxle >= 0 && $level_diff > $maxle ) { |
808
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
809
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
810
|
|
|
|
|
|
|
Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting |
811
|
|
|
|
|
|
|
EOM |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
} |
814
|
|
|
|
|
|
|
|
815
|
561
|
|
|
|
|
3127
|
$self->check_final_nesting_depths(); |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
# Likewise, large numbers of brace errors usually indicate non-perl |
818
|
|
|
|
|
|
|
# scripts, so set the severe error flag at a low number. This is similar |
819
|
|
|
|
|
|
|
# to the level check, but different because braces may balance but be |
820
|
|
|
|
|
|
|
# incorrectly interlaced. |
821
|
561
|
50
|
|
|
|
2903
|
if ( $self->[_true_brace_error_count_] > 2 ) { |
822
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
823
|
|
|
|
|
|
|
} |
824
|
|
|
|
|
|
|
|
825
|
561
|
50
|
66
|
|
|
2606
|
if ( $self->[_look_for_hash_bang_] |
826
|
|
|
|
|
|
|
&& !$self->[_saw_hash_bang_] ) |
827
|
|
|
|
|
|
|
{ |
828
|
0
|
|
|
|
|
0
|
$self->warning( |
829
|
|
|
|
|
|
|
"hit EOF without seeing hash-bang line; maybe don't need -x?\n"); |
830
|
|
|
|
|
|
|
} |
831
|
|
|
|
|
|
|
|
832
|
561
|
50
|
|
|
|
1969
|
if ( $self->[_in_format_] ) { |
833
|
0
|
|
|
|
|
0
|
$self->warning("hit EOF while in format description\n"); |
834
|
|
|
|
|
|
|
} |
835
|
|
|
|
|
|
|
|
836
|
561
|
50
|
|
|
|
2105
|
if ( $self->[_in_skipped_] ) { |
837
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
838
|
|
|
|
|
|
|
"hit EOF while in lines skipped with --code-skipping\n"); |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
|
841
|
561
|
50
|
|
|
|
2028
|
if ( $self->[_in_pod_] ) { |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
# Just write log entry if this is after __END__ or __DATA__ |
844
|
|
|
|
|
|
|
# because this happens to often, and it is not likely to be |
845
|
|
|
|
|
|
|
# a parsing error. |
846
|
0
|
0
|
0
|
|
|
0
|
if ( $self->[_saw_data_] || $self->[_saw_end_] ) { |
847
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
848
|
|
|
|
|
|
|
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" |
849
|
|
|
|
|
|
|
); |
850
|
|
|
|
|
|
|
} |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
else { |
853
|
0
|
|
|
|
|
0
|
$self->complain( |
854
|
|
|
|
|
|
|
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" |
855
|
|
|
|
|
|
|
); |
856
|
|
|
|
|
|
|
} |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
|
860
|
561
|
50
|
|
|
|
1933
|
if ( $self->[_in_here_doc_] ) { |
861
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
862
|
0
|
|
|
|
|
0
|
my $here_doc_target = $self->[_here_doc_target_]; |
863
|
0
|
|
|
|
|
0
|
my $started_looking_for_here_target_at = |
864
|
|
|
|
|
|
|
$self->[_started_looking_for_here_target_at_]; |
865
|
0
|
0
|
|
|
|
0
|
if ($here_doc_target) { |
866
|
0
|
|
|
|
|
0
|
$self->warning( |
867
|
|
|
|
|
|
|
"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" |
868
|
|
|
|
|
|
|
); |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
else { |
871
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
872
|
|
|
|
|
|
|
Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string. |
873
|
|
|
|
|
|
|
(Perl will match to the end of file but this may not be intended). |
874
|
|
|
|
|
|
|
EOM |
875
|
|
|
|
|
|
|
} |
876
|
0
|
|
|
|
|
0
|
my $nearly_matched_here_target_at = |
877
|
|
|
|
|
|
|
$self->[_nearly_matched_here_target_at_]; |
878
|
0
|
0
|
|
|
|
0
|
if ($nearly_matched_here_target_at) { |
879
|
0
|
|
|
|
|
0
|
$self->warning( |
880
|
|
|
|
|
|
|
"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" |
881
|
|
|
|
|
|
|
); |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
} |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
# Something is seriously wrong if we ended inside a quote |
886
|
561
|
50
|
|
|
|
2337
|
if ( $self->[_in_quote_] ) { |
887
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
888
|
0
|
|
|
|
|
0
|
my $line_start_quote = $self->[_line_start_quote_]; |
889
|
0
|
|
|
|
|
0
|
my $quote_target = $self->[_quote_target_]; |
890
|
0
|
0
|
|
|
|
0
|
my $what = |
891
|
|
|
|
|
|
|
( $self->[_in_attribute_list_] ) |
892
|
|
|
|
|
|
|
? "attribute list" |
893
|
|
|
|
|
|
|
: "quote/pattern"; |
894
|
0
|
|
|
|
|
0
|
$self->warning( |
895
|
|
|
|
|
|
|
"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" |
896
|
|
|
|
|
|
|
); |
897
|
|
|
|
|
|
|
} |
898
|
|
|
|
|
|
|
|
899
|
561
|
50
|
|
|
|
1995
|
if ( $self->[_hit_bug_] ) { |
900
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
901
|
|
|
|
|
|
|
} |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
# Multiple "unexpected" type tokenization errors usually indicate parsing |
904
|
|
|
|
|
|
|
# non-perl scripts, or that something is seriously wrong, so we should |
905
|
|
|
|
|
|
|
# avoid formatting them. This can happen for example if we run perltidy on |
906
|
|
|
|
|
|
|
# a shell script or an html file. But unfortunately this check can |
907
|
|
|
|
|
|
|
# interfere with some extended syntaxes, such as RPerl, so it has to be off |
908
|
|
|
|
|
|
|
# by default. |
909
|
561
|
|
|
|
|
1463
|
my $ue_count = $self->[_unexpected_error_count_]; |
910
|
561
|
50
|
33
|
|
|
2529
|
if ( $maxue > 0 && $ue_count > $maxue ) { |
911
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
912
|
|
|
|
|
|
|
Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting |
913
|
|
|
|
|
|
|
EOM |
914
|
0
|
|
|
|
|
0
|
$severe_error = 1; |
915
|
|
|
|
|
|
|
} |
916
|
|
|
|
|
|
|
|
917
|
561
|
100
|
|
|
|
1927
|
if ( !$self->[_saw_perl_dash_w_] ) { |
918
|
545
|
50
|
|
|
|
3621
|
if ( $] < 5.006 ) { |
919
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("Suggest including '-w parameter'\n"); |
920
|
|
|
|
|
|
|
} |
921
|
|
|
|
|
|
|
else { |
922
|
545
|
|
|
|
|
1894
|
$self->write_logfile_entry("Suggest including 'use warnings;'\n"); |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
|
926
|
561
|
50
|
|
|
|
5161
|
if ( $self->[_saw_perl_dash_P_] ) { |
927
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
928
|
|
|
|
|
|
|
"Use of -P parameter for defines is discouraged\n"); |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
|
931
|
561
|
100
|
|
|
|
2472
|
if ( !$self->[_saw_use_strict_] ) { |
932
|
547
|
|
|
|
|
1675
|
$self->write_logfile_entry("Suggest including 'use strict;'\n"); |
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
# it is suggested that labels have at least one upper case character |
936
|
|
|
|
|
|
|
# for legibility and to avoid code breakage as new keywords are introduced |
937
|
561
|
100
|
|
|
|
3932
|
if ( $self->[_rlower_case_labels_at_] ) { |
938
|
12
|
|
|
|
|
44
|
my @lower_case_labels_at = @{ $self->[_rlower_case_labels_at_] }; |
|
12
|
|
|
|
|
46
|
|
939
|
12
|
|
|
|
|
48
|
$self->write_logfile_entry( |
940
|
|
|
|
|
|
|
"Suggest using upper case characters in label(s)\n"); |
941
|
12
|
|
|
|
|
54
|
local $LIST_SEPARATOR = ')('; |
942
|
12
|
|
|
|
|
112
|
$self->write_logfile_entry( |
943
|
|
|
|
|
|
|
" defined at line(s): (@lower_case_labels_at)\n"); |
944
|
|
|
|
|
|
|
} |
945
|
561
|
|
|
|
|
2365
|
return $severe_error; |
946
|
|
|
|
|
|
|
} ## end sub report_tokenization_errors |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
sub report_v_string { |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
# warn if this version can't handle v-strings |
951
|
2
|
|
|
2
|
0
|
9
|
my ( $self, $tok ) = @_; |
952
|
2
|
50
|
|
|
|
8
|
if ( !$self->[_saw_v_string_] ) { |
953
|
2
|
|
|
|
|
6
|
$self->[_saw_v_string_] = $self->[_last_line_number_]; |
954
|
|
|
|
|
|
|
} |
955
|
2
|
50
|
|
|
|
10
|
if ( $] < 5.006 ) { |
956
|
0
|
|
|
|
|
0
|
$self->warning( |
957
|
|
|
|
|
|
|
"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" |
958
|
|
|
|
|
|
|
); |
959
|
|
|
|
|
|
|
} |
960
|
2
|
|
|
|
|
7
|
return; |
961
|
|
|
|
|
|
|
} ## end sub report_v_string |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
sub is_valid_token_type { |
964
|
3
|
|
|
3
|
0
|
7
|
my ($type) = @_; |
965
|
3
|
|
|
|
|
15
|
return $is_valid_token_type{$type}; |
966
|
|
|
|
|
|
|
} |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
sub log_numbered_msg { |
969
|
167
|
|
|
167
|
0
|
480
|
my ( $self, $msg ) = @_; |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
# write input line number + message to logfile |
972
|
167
|
|
|
|
|
350
|
my $input_line_number = $self->[_last_line_number_]; |
973
|
167
|
|
|
|
|
896
|
$self->write_logfile_entry("Line $input_line_number: $msg"); |
974
|
167
|
|
|
|
|
359
|
return; |
975
|
|
|
|
|
|
|
} ## end sub log_numbered_msg |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
sub get_line { |
978
|
|
|
|
|
|
|
|
979
|
8221
|
|
|
8221
|
0
|
15270
|
my $self = shift; |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
# Read the next input line and tokenize it |
982
|
|
|
|
|
|
|
# Returns: |
983
|
|
|
|
|
|
|
# $line_of_tokens = ref to hash of info for the tokenized line |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: |
986
|
|
|
|
|
|
|
# $brace_depth, $square_bracket_depth, $paren_depth |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
# get the next line from the input array |
989
|
8221
|
|
|
|
|
12202
|
my $input_line; |
990
|
8221
|
|
|
|
|
14231
|
my $rinput_lines = $self->[_rinput_lines_]; |
991
|
8221
|
|
|
|
|
13022
|
my $line_index = $self->[_input_line_index_next_]; |
992
|
8221
|
100
|
|
|
|
11827
|
if ( $line_index < @{$rinput_lines} ) { |
|
8221
|
|
|
|
|
19167
|
|
993
|
7660
|
|
|
|
|
16116
|
$input_line = $rinput_lines->[ $line_index++ ]; |
994
|
7660
|
|
|
|
|
12420
|
$self->[_input_line_index_next_] = $line_index; |
995
|
|
|
|
|
|
|
} |
996
|
|
|
|
|
|
|
|
997
|
8221
|
|
|
|
|
14154
|
$self->[_line_of_text_] = $input_line; |
998
|
|
|
|
|
|
|
|
999
|
8221
|
100
|
|
|
|
19934
|
return if ( !defined($input_line) ); |
1000
|
|
|
|
|
|
|
|
1001
|
7660
|
|
|
|
|
12922
|
my $input_line_number = ++$self->[_last_line_number_]; |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
# Find and remove what characters terminate this line, including any |
1004
|
|
|
|
|
|
|
# control r |
1005
|
7660
|
|
|
|
|
12011
|
my $input_line_separator = EMPTY_STRING; |
1006
|
7660
|
100
|
|
|
|
20193
|
if ( chomp($input_line) ) { |
1007
|
7659
|
|
|
|
|
17040
|
$input_line_separator = $INPUT_RECORD_SEPARATOR; |
1008
|
|
|
|
|
|
|
} |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
# The first test here very significantly speeds things up, but be sure to |
1011
|
|
|
|
|
|
|
# keep the regex and hash %other_line_endings the same. |
1012
|
7660
|
100
|
|
|
|
23205
|
if ( $other_line_endings{ substr( $input_line, -1 ) } ) { |
1013
|
24
|
50
|
|
|
|
218
|
if ( $input_line =~ s/([\r\035\032])+$// ) { |
1014
|
24
|
|
|
|
|
71
|
$input_line_separator = $1 . $input_line_separator; |
1015
|
|
|
|
|
|
|
} |
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
# for backwards compatibility we keep the line text terminated with |
1019
|
|
|
|
|
|
|
# a newline character |
1020
|
7660
|
|
|
|
|
15945
|
$input_line .= "\n"; |
1021
|
7660
|
|
|
|
|
13464
|
$self->[_line_of_text_] = $input_line; |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
# create a data structure describing this line which will be |
1024
|
|
|
|
|
|
|
# returned to the caller. |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
# _line_type codes are: |
1027
|
|
|
|
|
|
|
# SYSTEM - system-specific code before hash-bang line |
1028
|
|
|
|
|
|
|
# CODE - line of perl code (including comments) |
1029
|
|
|
|
|
|
|
# POD_START - line starting pod, such as '=head' |
1030
|
|
|
|
|
|
|
# POD - pod documentation text |
1031
|
|
|
|
|
|
|
# POD_END - last line of pod section, '=cut' |
1032
|
|
|
|
|
|
|
# HERE - text of here-document |
1033
|
|
|
|
|
|
|
# HERE_END - last line of here-doc (target word) |
1034
|
|
|
|
|
|
|
# FORMAT - format section |
1035
|
|
|
|
|
|
|
# FORMAT_END - last line of format section, '.' |
1036
|
|
|
|
|
|
|
# SKIP - code skipping section |
1037
|
|
|
|
|
|
|
# SKIP_END - last line of code skipping section, '#>>V' |
1038
|
|
|
|
|
|
|
# DATA_START - __DATA__ line |
1039
|
|
|
|
|
|
|
# DATA - unidentified text following __DATA__ |
1040
|
|
|
|
|
|
|
# END_START - __END__ line |
1041
|
|
|
|
|
|
|
# END - unidentified text following __END__ |
1042
|
|
|
|
|
|
|
# ERROR - we are in big trouble, probably not a perl script |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
# Other variables: |
1045
|
|
|
|
|
|
|
# _curly_brace_depth - depth of curly braces at start of line |
1046
|
|
|
|
|
|
|
# _square_bracket_depth - depth of square brackets at start of line |
1047
|
|
|
|
|
|
|
# _paren_depth - depth of parens at start of line |
1048
|
|
|
|
|
|
|
# _starting_in_quote - this line continues a multi-line quote |
1049
|
|
|
|
|
|
|
# (so don't trim leading blanks!) |
1050
|
|
|
|
|
|
|
# _ending_in_quote - this line ends in a multi-line quote |
1051
|
|
|
|
|
|
|
# (so don't trim trailing blanks!) |
1052
|
7660
|
|
|
|
|
42656
|
my $line_of_tokens = { |
1053
|
|
|
|
|
|
|
_line_type => 'EOF', |
1054
|
|
|
|
|
|
|
_line_text => $input_line, |
1055
|
|
|
|
|
|
|
_line_number => $input_line_number, |
1056
|
|
|
|
|
|
|
_guessed_indentation_level => 0, |
1057
|
|
|
|
|
|
|
_curly_brace_depth => $brace_depth, |
1058
|
|
|
|
|
|
|
_square_bracket_depth => $square_bracket_depth, |
1059
|
|
|
|
|
|
|
_paren_depth => $paren_depth, |
1060
|
|
|
|
|
|
|
_quote_character => EMPTY_STRING, |
1061
|
|
|
|
|
|
|
## Skip these needless initializations for efficiency: |
1062
|
|
|
|
|
|
|
## _rtoken_type => undef, |
1063
|
|
|
|
|
|
|
## _rtokens => undef, |
1064
|
|
|
|
|
|
|
## _rlevels => undef, |
1065
|
|
|
|
|
|
|
## _rblock_type => undef, |
1066
|
|
|
|
|
|
|
## _rtype_sequence => undef, |
1067
|
|
|
|
|
|
|
## _rci_levels => undef, |
1068
|
|
|
|
|
|
|
## _starting_in_quote => 0, |
1069
|
|
|
|
|
|
|
## _ending_in_quote => 0, |
1070
|
|
|
|
|
|
|
}; |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
# must print line unchanged if we are in a here document |
1073
|
7660
|
100
|
|
|
|
38934
|
if ( $self->[_in_here_doc_] ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
|
1075
|
24
|
|
|
|
|
86
|
$line_of_tokens->{_line_type} = 'HERE'; |
1076
|
24
|
|
|
|
|
63
|
my $here_doc_target = $self->[_here_doc_target_]; |
1077
|
24
|
|
|
|
|
58
|
my $here_quote_character = $self->[_here_quote_character_]; |
1078
|
24
|
|
|
|
|
40
|
my $candidate_target = $input_line; |
1079
|
24
|
|
|
|
|
53
|
chomp $candidate_target; |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
# Handle <<~ targets, which are indicated here by a leading space on |
1082
|
|
|
|
|
|
|
# the here quote character |
1083
|
24
|
100
|
|
|
|
94
|
if ( $here_quote_character =~ /^\s/ ) { |
1084
|
4
|
|
|
|
|
18
|
$candidate_target =~ s/^\s*//; |
1085
|
|
|
|
|
|
|
} |
1086
|
24
|
100
|
|
|
|
67
|
if ( $candidate_target eq $here_doc_target ) { |
1087
|
9
|
|
|
|
|
47
|
$self->[_nearly_matched_here_target_at_] = undef; |
1088
|
9
|
|
|
|
|
30
|
$line_of_tokens->{_line_type} = 'HERE_END'; |
1089
|
9
|
|
|
|
|
53
|
$self->log_numbered_msg("Exiting HERE document $here_doc_target\n"); |
1090
|
|
|
|
|
|
|
|
1091
|
9
|
|
|
|
|
54
|
my $rhere_target_list = $self->[_rhere_target_list_]; |
1092
|
9
|
50
|
|
|
|
24
|
if ( @{$rhere_target_list} ) { # there can be multiple here targets |
|
9
|
|
|
|
|
48
|
|
1093
|
|
|
|
|
|
|
( $here_doc_target, $here_quote_character ) = |
1094
|
0
|
|
|
|
|
0
|
@{ shift @{$rhere_target_list} }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1095
|
0
|
|
|
|
|
0
|
$self->[_here_doc_target_] = $here_doc_target; |
1096
|
0
|
|
|
|
|
0
|
$self->[_here_quote_character_] = $here_quote_character; |
1097
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
1098
|
|
|
|
|
|
|
"Entering HERE document $here_doc_target\n"); |
1099
|
0
|
|
|
|
|
0
|
$self->[_nearly_matched_here_target_at_] = undef; |
1100
|
0
|
|
|
|
|
0
|
$self->[_started_looking_for_here_target_at_] = |
1101
|
|
|
|
|
|
|
$input_line_number; |
1102
|
|
|
|
|
|
|
} |
1103
|
|
|
|
|
|
|
else { |
1104
|
9
|
|
|
|
|
33
|
$self->[_in_here_doc_] = 0; |
1105
|
9
|
|
|
|
|
26
|
$self->[_here_doc_target_] = EMPTY_STRING; |
1106
|
9
|
|
|
|
|
30
|
$self->[_here_quote_character_] = EMPTY_STRING; |
1107
|
|
|
|
|
|
|
} |
1108
|
|
|
|
|
|
|
} |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
# check for error of extra whitespace |
1111
|
|
|
|
|
|
|
# note for PERL6: leading whitespace is allowed |
1112
|
|
|
|
|
|
|
else { |
1113
|
15
|
|
|
|
|
142
|
$candidate_target =~ s/\s*$//; |
1114
|
15
|
|
|
|
|
66
|
$candidate_target =~ s/^\s*//; |
1115
|
15
|
50
|
|
|
|
60
|
if ( $candidate_target eq $here_doc_target ) { |
1116
|
0
|
|
|
|
|
0
|
$self->[_nearly_matched_here_target_at_] = $input_line_number; |
1117
|
|
|
|
|
|
|
} |
1118
|
|
|
|
|
|
|
} |
1119
|
24
|
|
|
|
|
99
|
return $line_of_tokens; |
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
# Print line unchanged if we are in a format section |
1123
|
|
|
|
|
|
|
elsif ( $self->[_in_format_] ) { |
1124
|
|
|
|
|
|
|
|
1125
|
3
|
100
|
|
|
|
23
|
if ( $input_line =~ /^\.[\s#]*$/ ) { |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
# Decrement format depth count at a '.' after a 'format' |
1128
|
1
|
|
|
|
|
3
|
$self->[_in_format_]--; |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# This is the end when count reaches 0 |
1131
|
1
|
50
|
|
|
|
7
|
if ( !$self->[_in_format_] ) { |
1132
|
1
|
|
|
|
|
5
|
$self->log_numbered_msg("Exiting format section\n"); |
1133
|
1
|
|
|
|
|
7
|
$line_of_tokens->{_line_type} = 'FORMAT_END'; |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
# Make the tokenizer mark an opening brace which follows |
1136
|
|
|
|
|
|
|
# as a code block. Fixes issue c202/t032. |
1137
|
1
|
|
|
|
|
5
|
$last_nonblank_token = ';'; |
1138
|
1
|
|
|
|
|
3
|
$last_nonblank_type = ';'; |
1139
|
|
|
|
|
|
|
} |
1140
|
|
|
|
|
|
|
} |
1141
|
|
|
|
|
|
|
else { |
1142
|
2
|
|
|
|
|
5
|
$line_of_tokens->{_line_type} = 'FORMAT'; |
1143
|
2
|
50
|
|
|
|
11
|
if ( $input_line =~ /^\s*format\s+\w+/ ) { |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
# Increment format depth count at a 'format' within a 'format' |
1146
|
|
|
|
|
|
|
# This is a simple way to handle nested formats (issue c019). |
1147
|
0
|
|
|
|
|
0
|
$self->[_in_format_]++; |
1148
|
|
|
|
|
|
|
} |
1149
|
|
|
|
|
|
|
} |
1150
|
3
|
|
|
|
|
11
|
return $line_of_tokens; |
1151
|
|
|
|
|
|
|
} |
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
# must print line unchanged if we are in pod documentation |
1154
|
|
|
|
|
|
|
elsif ( $self->[_in_pod_] ) { |
1155
|
|
|
|
|
|
|
|
1156
|
47
|
|
|
|
|
164
|
$line_of_tokens->{_line_type} = 'POD'; |
1157
|
47
|
100
|
|
|
|
225
|
if ( $input_line =~ /^=cut/ ) { |
1158
|
20
|
|
|
|
|
95
|
$line_of_tokens->{_line_type} = 'POD_END'; |
1159
|
20
|
|
|
|
|
104
|
$self->log_numbered_msg("Exiting POD section\n"); |
1160
|
20
|
|
|
|
|
92
|
$self->[_in_pod_] = 0; |
1161
|
|
|
|
|
|
|
} |
1162
|
47
|
50
|
33
|
|
|
150
|
if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) { |
1163
|
0
|
|
|
|
|
0
|
$self->warning( |
1164
|
|
|
|
|
|
|
"Hash-bang in pod can cause older versions of perl to fail! \n" |
1165
|
|
|
|
|
|
|
); |
1166
|
|
|
|
|
|
|
} |
1167
|
|
|
|
|
|
|
|
1168
|
47
|
|
|
|
|
165
|
return $line_of_tokens; |
1169
|
|
|
|
|
|
|
} |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
# print line unchanged if in skipped section |
1172
|
|
|
|
|
|
|
elsif ( $self->[_in_skipped_] ) { |
1173
|
|
|
|
|
|
|
|
1174
|
8
|
|
|
|
|
18
|
$line_of_tokens->{_line_type} = 'SKIP'; |
1175
|
8
|
100
|
|
|
|
90
|
if ( $input_line =~ /$code_skipping_pattern_end/ ) { |
|
|
50
|
|
|
|
|
|
1176
|
2
|
|
|
|
|
7
|
$line_of_tokens->{_line_type} = 'SKIP_END'; |
1177
|
2
|
|
|
|
|
10
|
$self->log_numbered_msg("Exiting code-skipping section\n"); |
1178
|
2
|
|
|
|
|
5
|
$self->[_in_skipped_] = 0; |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
elsif ( $input_line =~ /$code_skipping_pattern_begin/ ) { |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
# warn of duplicate starting comment lines, git #118 |
1183
|
0
|
|
|
|
|
0
|
my $lno = $self->[_in_skipped_]; |
1184
|
0
|
|
|
|
|
0
|
$self->warning( |
1185
|
|
|
|
|
|
|
"Already in code-skipping section which started at line $lno\n" |
1186
|
|
|
|
|
|
|
); |
1187
|
|
|
|
|
|
|
} |
1188
|
|
|
|
|
|
|
else { |
1189
|
|
|
|
|
|
|
## ok - not a code-skipping control line |
1190
|
|
|
|
|
|
|
} |
1191
|
8
|
|
|
|
|
31
|
return $line_of_tokens; |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
# must print line unchanged if we have seen a severe error (i.e., we |
1195
|
|
|
|
|
|
|
# are seeing illegal tokens and cannot continue. Syntax errors do |
1196
|
|
|
|
|
|
|
# not pass this route). Calling routine can decide what to do, but |
1197
|
|
|
|
|
|
|
# the default can be to just pass all lines as if they were after __END__ |
1198
|
|
|
|
|
|
|
elsif ( $self->[_in_error_] ) { |
1199
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'ERROR'; |
1200
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1201
|
|
|
|
|
|
|
} |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
# print line unchanged if we are __DATA__ section |
1204
|
|
|
|
|
|
|
elsif ( $self->[_in_data_] ) { |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
# ...but look for POD |
1207
|
|
|
|
|
|
|
# Note that the _in_data and _in_end flags remain set |
1208
|
|
|
|
|
|
|
# so that we return to that state after seeing the |
1209
|
|
|
|
|
|
|
# end of a pod section |
1210
|
1
|
50
|
33
|
|
|
8
|
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { |
1211
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1212
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg("Entering POD section\n"); |
1213
|
0
|
|
|
|
|
0
|
$self->[_in_pod_] = 1; |
1214
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
else { |
1217
|
1
|
|
|
|
|
4
|
$line_of_tokens->{_line_type} = 'DATA'; |
1218
|
1
|
|
|
|
|
4
|
return $line_of_tokens; |
1219
|
|
|
|
|
|
|
} |
1220
|
|
|
|
|
|
|
} |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
# print line unchanged if we are in __END__ section |
1223
|
|
|
|
|
|
|
elsif ( $self->[_in_end_] ) { |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
# ...but look for POD |
1226
|
|
|
|
|
|
|
# Note that the _in_data and _in_end flags remain set |
1227
|
|
|
|
|
|
|
# so that we return to that state after seeing the |
1228
|
|
|
|
|
|
|
# end of a pod section |
1229
|
48
|
100
|
66
|
|
|
347
|
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { |
1230
|
6
|
|
|
|
|
29
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1231
|
6
|
|
|
|
|
31
|
$self->log_numbered_msg("Entering POD section\n"); |
1232
|
6
|
|
|
|
|
34
|
$self->[_in_pod_] = 1; |
1233
|
6
|
|
|
|
|
69
|
return $line_of_tokens; |
1234
|
|
|
|
|
|
|
} |
1235
|
|
|
|
|
|
|
else { |
1236
|
42
|
|
|
|
|
73
|
$line_of_tokens->{_line_type} = 'END'; |
1237
|
42
|
|
|
|
|
139
|
return $line_of_tokens; |
1238
|
|
|
|
|
|
|
} |
1239
|
|
|
|
|
|
|
} |
1240
|
|
|
|
|
|
|
else { |
1241
|
|
|
|
|
|
|
## ok |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
# check for a hash-bang line if we haven't seen one |
1245
|
7529
|
100
|
100
|
|
|
33761
|
if ( !$self->[_saw_hash_bang_] |
|
|
|
66
|
|
|
|
|
1246
|
|
|
|
|
|
|
&& substr( $input_line, 0, 2 ) eq '#!' |
1247
|
|
|
|
|
|
|
&& $input_line =~ /^\#\!.*perl\b/ ) |
1248
|
|
|
|
|
|
|
{ |
1249
|
15
|
|
|
|
|
53
|
$self->[_saw_hash_bang_] = $input_line_number; |
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
# check for -w and -P flags |
1252
|
15
|
50
|
|
|
|
102
|
if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { |
1253
|
0
|
|
|
|
|
0
|
$self->[_saw_perl_dash_P_] = 1; |
1254
|
|
|
|
|
|
|
} |
1255
|
|
|
|
|
|
|
|
1256
|
15
|
100
|
|
|
|
97
|
if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { |
1257
|
8
|
|
|
|
|
26
|
$self->[_saw_perl_dash_w_] = 1; |
1258
|
|
|
|
|
|
|
} |
1259
|
|
|
|
|
|
|
|
1260
|
15
|
100
|
33
|
|
|
137
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
1261
|
|
|
|
|
|
|
$input_line_number > 1 |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
# leave any hash bang in a BEGIN block alone |
1264
|
|
|
|
|
|
|
# i.e. see 'debugger-duck_type.t' |
1265
|
|
|
|
|
|
|
&& !( |
1266
|
|
|
|
|
|
|
$last_nonblank_block_type |
1267
|
|
|
|
|
|
|
&& $last_nonblank_block_type eq 'BEGIN' |
1268
|
|
|
|
|
|
|
) |
1269
|
|
|
|
|
|
|
&& !$self->[_look_for_hash_bang_] |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
# Try to avoid giving a false alarm at a simple comment. |
1272
|
|
|
|
|
|
|
# These look like valid hash-bang lines: |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
1275
|
|
|
|
|
|
|
#! /usr/bin/perl -w |
1276
|
|
|
|
|
|
|
#!c:\perl\bin\perl.exe |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
# These are comments: |
1279
|
|
|
|
|
|
|
#! I love perl |
1280
|
|
|
|
|
|
|
#! sunos does not yet provide a /usr/bin/perl |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
# Comments typically have multiple spaces, which suggests |
1283
|
|
|
|
|
|
|
# the filter |
1284
|
|
|
|
|
|
|
&& $input_line =~ /^\#\!(\s+)?(\S+)?perl/ |
1285
|
|
|
|
|
|
|
) |
1286
|
|
|
|
|
|
|
{ |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
# this is helpful for VMS systems; we may have accidentally |
1289
|
|
|
|
|
|
|
# tokenized some DCL commands |
1290
|
1
|
50
|
|
|
|
5
|
if ( $self->[_started_tokenizing_] ) { |
1291
|
0
|
|
|
|
|
0
|
$self->warning( |
1292
|
|
|
|
|
|
|
"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" |
1293
|
|
|
|
|
|
|
); |
1294
|
|
|
|
|
|
|
} |
1295
|
|
|
|
|
|
|
else { |
1296
|
1
|
|
|
|
|
6
|
$self->complain("Useless hash-bang after line 1\n"); |
1297
|
|
|
|
|
|
|
} |
1298
|
|
|
|
|
|
|
} |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
# Report the leading hash-bang as a system line |
1301
|
|
|
|
|
|
|
# This will prevent -dac from deleting it |
1302
|
|
|
|
|
|
|
else { |
1303
|
14
|
|
|
|
|
51
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
1304
|
14
|
|
|
|
|
79
|
return $line_of_tokens; |
1305
|
|
|
|
|
|
|
} |
1306
|
|
|
|
|
|
|
} |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
# wait for a hash-bang before parsing if the user invoked us with -x |
1309
|
7515
|
100
|
100
|
|
|
18601
|
if ( $self->[_look_for_hash_bang_] |
1310
|
|
|
|
|
|
|
&& !$self->[_saw_hash_bang_] ) |
1311
|
|
|
|
|
|
|
{ |
1312
|
5
|
|
|
|
|
13
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
1313
|
5
|
|
|
|
|
19
|
return $line_of_tokens; |
1314
|
|
|
|
|
|
|
} |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
# a first line of the form ': #' will be marked as SYSTEM |
1317
|
|
|
|
|
|
|
# since lines of this form may be used by tcsh |
1318
|
7510
|
50
|
66
|
|
|
19881
|
if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { |
1319
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'SYSTEM'; |
1320
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1321
|
|
|
|
|
|
|
} |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
# now we know that it is ok to tokenize the line... |
1324
|
|
|
|
|
|
|
# the line tokenizer will modify any of these private variables: |
1325
|
|
|
|
|
|
|
# _rhere_target_list_ |
1326
|
|
|
|
|
|
|
# _in_data_ |
1327
|
|
|
|
|
|
|
# _in_end_ |
1328
|
|
|
|
|
|
|
# _in_format_ |
1329
|
|
|
|
|
|
|
# _in_error_ |
1330
|
|
|
|
|
|
|
# _in_skipped_ |
1331
|
|
|
|
|
|
|
# _in_pod_ |
1332
|
|
|
|
|
|
|
# _in_quote_ |
1333
|
7510
|
|
|
|
|
23419
|
$self->tokenize_this_line($line_of_tokens); |
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
# Now finish defining the return structure and return it |
1336
|
7510
|
|
|
|
|
15892
|
$line_of_tokens->{_ending_in_quote} = $self->[_in_quote_]; |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
# handle severe error (binary data in script) |
1339
|
7510
|
50
|
|
|
|
16987
|
if ( $self->[_in_error_] ) { |
1340
|
0
|
|
|
|
|
0
|
$self->[_in_quote_] = 0; # to avoid any more messages |
1341
|
0
|
|
|
|
|
0
|
$self->warning("Giving up after error\n"); |
1342
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'ERROR'; |
1343
|
0
|
|
|
|
|
0
|
reset_indentation_level(0); # avoid error messages |
1344
|
0
|
|
|
|
|
0
|
return $line_of_tokens; |
1345
|
|
|
|
|
|
|
} |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
# handle start of pod documentation |
1348
|
7510
|
100
|
|
|
|
15976
|
if ( $self->[_in_pod_] ) { |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
# This gets tricky..above a __DATA__ or __END__ section, perl |
1351
|
|
|
|
|
|
|
# accepts '=cut' as the start of pod section. But afterwards, |
1352
|
|
|
|
|
|
|
# only pod utilities see it and they may ignore an =cut without |
1353
|
|
|
|
|
|
|
# leading =head. In any case, this isn't good. |
1354
|
14
|
50
|
|
|
|
84
|
if ( $input_line =~ /^=cut\b/ ) { |
1355
|
0
|
0
|
0
|
|
|
0
|
if ( $self->[_saw_data_] || $self->[_saw_end_] ) { |
1356
|
0
|
|
|
|
|
0
|
$self->complain("=cut while not in pod ignored\n"); |
1357
|
0
|
|
|
|
|
0
|
$self->[_in_pod_] = 0; |
1358
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_END'; |
1359
|
|
|
|
|
|
|
} |
1360
|
|
|
|
|
|
|
else { |
1361
|
0
|
|
|
|
|
0
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1362
|
0
|
|
|
|
|
0
|
if ( !DEVEL_MODE ) { |
1363
|
0
|
|
|
|
|
0
|
$self->warning( |
1364
|
|
|
|
|
|
|
"=cut starts a pod section .. this can fool pod utilities.\n" |
1365
|
|
|
|
|
|
|
); |
1366
|
|
|
|
|
|
|
} |
1367
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg("Entering POD section\n"); |
1368
|
|
|
|
|
|
|
} |
1369
|
|
|
|
|
|
|
} |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
else { |
1372
|
14
|
|
|
|
|
70
|
$line_of_tokens->{_line_type} = 'POD_START'; |
1373
|
14
|
|
|
|
|
76
|
$self->log_numbered_msg("Entering POD section\n"); |
1374
|
|
|
|
|
|
|
} |
1375
|
|
|
|
|
|
|
|
1376
|
14
|
|
|
|
|
94
|
return $line_of_tokens; |
1377
|
|
|
|
|
|
|
} |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
# handle start of skipped section |
1380
|
7496
|
100
|
|
|
|
14876
|
if ( $self->[_in_skipped_] ) { |
1381
|
|
|
|
|
|
|
|
1382
|
2
|
|
|
|
|
8
|
$line_of_tokens->{_line_type} = 'SKIP'; |
1383
|
2
|
|
|
|
|
14
|
$self->log_numbered_msg("Entering code-skipping section\n"); |
1384
|
2
|
|
|
|
|
10
|
return $line_of_tokens; |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
# see if this line contains here doc targets |
1388
|
7494
|
|
|
|
|
12459
|
my $rhere_target_list = $self->[_rhere_target_list_]; |
1389
|
7494
|
100
|
|
|
|
10782
|
if ( @{$rhere_target_list} ) { |
|
7494
|
|
|
|
|
17331
|
|
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
my ( $here_doc_target, $here_quote_character ) = |
1392
|
9
|
|
|
|
|
52
|
@{ shift @{$rhere_target_list} }; |
|
9
|
|
|
|
|
25
|
|
|
9
|
|
|
|
|
43
|
|
1393
|
9
|
|
|
|
|
32
|
$self->[_in_here_doc_] = 1; |
1394
|
9
|
|
|
|
|
31
|
$self->[_here_doc_target_] = $here_doc_target; |
1395
|
9
|
|
|
|
|
24
|
$self->[_here_quote_character_] = $here_quote_character; |
1396
|
9
|
|
|
|
|
69
|
$self->log_numbered_msg("Entering HERE document $here_doc_target\n"); |
1397
|
9
|
|
|
|
|
34
|
$self->[_started_looking_for_here_target_at_] = $input_line_number; |
1398
|
|
|
|
|
|
|
} |
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
# NOTE: __END__ and __DATA__ statements are written unformatted |
1401
|
|
|
|
|
|
|
# because they can theoretically contain additional characters |
1402
|
|
|
|
|
|
|
# which are not tokenized (and cannot be read with <DATA> either!). |
1403
|
7494
|
100
|
|
|
|
20350
|
if ( $self->[_in_data_] ) { |
|
|
100
|
|
|
|
|
|
1404
|
1
|
|
|
|
|
4
|
$line_of_tokens->{_line_type} = 'DATA_START'; |
1405
|
1
|
|
|
|
|
6
|
$self->log_numbered_msg("Starting __DATA__ section\n"); |
1406
|
1
|
|
|
|
|
4
|
$self->[_saw_data_] = 1; |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
# keep parsing after __DATA__ if use SelfLoader was seen |
1409
|
1
|
50
|
|
|
|
5
|
if ( $self->[_saw_selfloader_] ) { |
1410
|
0
|
|
|
|
|
0
|
$self->[_in_data_] = 0; |
1411
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
1412
|
|
|
|
|
|
|
"SelfLoader seen, continuing; -nlsl deactivates\n"); |
1413
|
|
|
|
|
|
|
} |
1414
|
|
|
|
|
|
|
|
1415
|
1
|
|
|
|
|
5
|
return $line_of_tokens; |
1416
|
|
|
|
|
|
|
} |
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
elsif ( $self->[_in_end_] ) { |
1419
|
6
|
|
|
|
|
50
|
$line_of_tokens->{_line_type} = 'END_START'; |
1420
|
6
|
|
|
|
|
31
|
$self->log_numbered_msg("Starting __END__ section\n"); |
1421
|
6
|
|
|
|
|
22
|
$self->[_saw_end_] = 1; |
1422
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
# keep parsing after __END__ if use AutoLoader was seen |
1424
|
6
|
50
|
|
|
|
71
|
if ( $self->[_saw_autoloader_] ) { |
1425
|
0
|
|
|
|
|
0
|
$self->[_in_end_] = 0; |
1426
|
0
|
|
|
|
|
0
|
$self->log_numbered_msg( |
1427
|
|
|
|
|
|
|
"AutoLoader seen, continuing; -nlal deactivates\n"); |
1428
|
|
|
|
|
|
|
} |
1429
|
6
|
|
|
|
|
28
|
return $line_of_tokens; |
1430
|
|
|
|
|
|
|
} |
1431
|
|
|
|
|
|
|
else { |
1432
|
|
|
|
|
|
|
## ok: not in __END__ or __DATA__ |
1433
|
|
|
|
|
|
|
} |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
# now, finally, we know that this line is type 'CODE' |
1436
|
7487
|
|
|
|
|
14137
|
$line_of_tokens->{_line_type} = 'CODE'; |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
# remember if we have seen any real code |
1439
|
7487
|
100
|
100
|
|
|
23773
|
if ( !$self->[_started_tokenizing_] |
|
|
|
100
|
|
|
|
|
1440
|
|
|
|
|
|
|
&& $input_line !~ /^\s*$/ |
1441
|
|
|
|
|
|
|
&& $input_line !~ /^\s*#/ ) |
1442
|
|
|
|
|
|
|
{ |
1443
|
557
|
|
|
|
|
2855
|
$self->[_started_tokenizing_] = 1; |
1444
|
|
|
|
|
|
|
} |
1445
|
|
|
|
|
|
|
|
1446
|
7487
|
100
|
|
|
|
16073
|
if ( $self->[_debugger_object_] ) { |
1447
|
7
|
|
|
|
|
36
|
$self->[_debugger_object_]->write_debug_entry($line_of_tokens); |
1448
|
|
|
|
|
|
|
} |
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
# Note: if keyword 'format' occurs in this line code, it is still CODE |
1451
|
|
|
|
|
|
|
# (keyword 'format' need not start a line) |
1452
|
7487
|
100
|
|
|
|
15537
|
if ( $self->[_in_format_] ) { |
1453
|
1
|
|
|
|
|
7
|
$self->log_numbered_msg("Entering format section\n"); |
1454
|
|
|
|
|
|
|
} |
1455
|
|
|
|
|
|
|
|
1456
|
7487
|
100
|
100
|
|
|
29299
|
if ( $self->[_in_quote_] |
|
|
100
|
100
|
|
|
|
|
1457
|
|
|
|
|
|
|
and ( $self->[_line_start_quote_] < 0 ) ) |
1458
|
|
|
|
|
|
|
{ |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { |
1461
|
49
|
100
|
|
|
|
431
|
if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) { |
1462
|
48
|
|
|
|
|
140
|
$self->[_line_start_quote_] = $input_line_number; |
1463
|
48
|
|
|
|
|
361
|
$self->log_numbered_msg( |
1464
|
|
|
|
|
|
|
"Start multi-line quote or pattern ending in $quote_target\n"); |
1465
|
|
|
|
|
|
|
} |
1466
|
|
|
|
|
|
|
} |
1467
|
|
|
|
|
|
|
elsif ( ( $self->[_line_start_quote_] >= 0 ) |
1468
|
|
|
|
|
|
|
&& !$self->[_in_quote_] ) |
1469
|
|
|
|
|
|
|
{ |
1470
|
48
|
|
|
|
|
175
|
$self->[_line_start_quote_] = -1; |
1471
|
48
|
|
|
|
|
200
|
$self->log_numbered_msg("End of multi-line quote or pattern\n"); |
1472
|
|
|
|
|
|
|
} |
1473
|
|
|
|
|
|
|
else { |
1474
|
|
|
|
|
|
|
## ok |
1475
|
|
|
|
|
|
|
} |
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
# we are returning a line of CODE |
1478
|
7487
|
|
|
|
|
29946
|
return $line_of_tokens; |
1479
|
|
|
|
|
|
|
} ## end sub get_line |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
sub find_starting_indentation_level { |
1482
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
# We need to find the indentation level of the first line of the |
1484
|
|
|
|
|
|
|
# script being formatted. Often it will be zero for an entire file, |
1485
|
|
|
|
|
|
|
# but if we are formatting a local block of code (within an editor for |
1486
|
|
|
|
|
|
|
# example) it may not be zero. The user may specify this with the |
1487
|
|
|
|
|
|
|
# -sil=n parameter but normally doesn't so we have to guess. |
1488
|
|
|
|
|
|
|
# |
1489
|
561
|
|
|
561
|
0
|
1796
|
my ($self) = @_; |
1490
|
561
|
|
|
|
|
1370
|
my $starting_level = 0; |
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
# use value if given as parameter |
1493
|
561
|
100
|
|
|
|
2942
|
if ( $self->[_know_starting_level_] ) { |
|
|
100
|
|
|
|
|
|
1494
|
1
|
|
|
|
|
4
|
$starting_level = $self->[_starting_level_]; |
1495
|
|
|
|
|
|
|
} |
1496
|
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
# if we know there is a hash_bang line, the level must be zero |
1498
|
|
|
|
|
|
|
elsif ( $self->[_look_for_hash_bang_] ) { |
1499
|
1
|
|
|
|
|
4
|
$self->[_know_starting_level_] = 1; |
1500
|
|
|
|
|
|
|
} |
1501
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
# otherwise figure it out from the input file |
1503
|
|
|
|
|
|
|
else { |
1504
|
559
|
|
|
|
|
1222
|
my $line; |
1505
|
559
|
|
|
|
|
1209
|
my $i = 0; |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
# keep looking at lines until we find a hash bang or piece of code |
1508
|
|
|
|
|
|
|
# ( or, for now, an =pod line) |
1509
|
559
|
|
|
|
|
1298
|
my $msg = EMPTY_STRING; |
1510
|
559
|
|
|
|
|
1143
|
my $in_code_skipping; |
1511
|
559
|
|
|
|
|
3067
|
while ( $line = $self->peek_ahead( $i++ ) ) { |
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
# if first line is #! then assume starting level is zero |
1514
|
870
|
100
|
100
|
|
|
5504
|
if ( $i == 1 && $line =~ /^\#\!/ ) { |
1515
|
13
|
|
|
|
|
42
|
$starting_level = 0; |
1516
|
13
|
|
|
|
|
51
|
last; |
1517
|
|
|
|
|
|
|
} |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
# ignore lines fenced off with code-skipping comments |
1520
|
857
|
100
|
|
|
|
5043
|
if ( $line =~ /^\s*#/ ) { |
1521
|
296
|
50
|
|
|
|
1103
|
if ( !$in_code_skipping ) { |
1522
|
296
|
50
|
33
|
|
|
3102
|
if ( $rOpts_code_skipping |
1523
|
|
|
|
|
|
|
&& $line =~ /$code_skipping_pattern_begin/ ) |
1524
|
|
|
|
|
|
|
{ |
1525
|
0
|
|
|
|
|
0
|
$in_code_skipping = 1; |
1526
|
|
|
|
|
|
|
} |
1527
|
|
|
|
|
|
|
} |
1528
|
|
|
|
|
|
|
else { |
1529
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /$code_skipping_pattern_end/ ) { |
1530
|
0
|
|
|
|
|
0
|
$in_code_skipping = 0; |
1531
|
|
|
|
|
|
|
} |
1532
|
|
|
|
|
|
|
} |
1533
|
296
|
|
|
|
|
1054
|
next; |
1534
|
|
|
|
|
|
|
} |
1535
|
561
|
50
|
|
|
|
2583
|
next if ($in_code_skipping); |
1536
|
|
|
|
|
|
|
|
1537
|
561
|
100
|
|
|
|
3613
|
next if ( $line =~ /^\s*$/ ); # skip past blank lines |
1538
|
|
|
|
|
|
|
|
1539
|
543
|
|
|
|
|
2793
|
$starting_level = $self->guess_old_indentation_level($line); |
1540
|
543
|
|
|
|
|
1485
|
last; |
1541
|
|
|
|
|
|
|
} |
1542
|
559
|
|
|
|
|
2559
|
$msg = "Line $i implies starting-indentation-level = $starting_level\n"; |
1543
|
559
|
|
|
|
|
3140
|
$self->write_logfile_entry("$msg"); |
1544
|
|
|
|
|
|
|
} |
1545
|
561
|
|
|
|
|
2207
|
$self->[_starting_level_] = $starting_level; |
1546
|
561
|
|
|
|
|
3775
|
reset_indentation_level($starting_level); |
1547
|
561
|
|
|
|
|
1082
|
return; |
1548
|
|
|
|
|
|
|
} ## end sub find_starting_indentation_level |
1549
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
sub guess_old_indentation_level { |
1551
|
543
|
|
|
543
|
0
|
1812
|
my ( $self, $line ) = @_; |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
# Guess the indentation level of an input line. |
1554
|
|
|
|
|
|
|
# |
1555
|
|
|
|
|
|
|
# For the first line of code this result will define the starting |
1556
|
|
|
|
|
|
|
# indentation level. It will mainly be non-zero when perltidy is applied |
1557
|
|
|
|
|
|
|
# within an editor to a local block of code. |
1558
|
|
|
|
|
|
|
# |
1559
|
|
|
|
|
|
|
# This is an impossible task in general because we can't know what tabs |
1560
|
|
|
|
|
|
|
# meant for the old script and how many spaces were used for one |
1561
|
|
|
|
|
|
|
# indentation level in the given input script. For example it may have |
1562
|
|
|
|
|
|
|
# been previously formatted with -i=7 -et=3. But we can at least try to |
1563
|
|
|
|
|
|
|
# make sure that perltidy guesses correctly if it is applied repeatedly to |
1564
|
|
|
|
|
|
|
# a block of code within an editor, so that the block stays at the same |
1565
|
|
|
|
|
|
|
# level when perltidy is applied repeatedly. |
1566
|
|
|
|
|
|
|
# |
1567
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
1568
|
543
|
|
|
|
|
1315
|
my $level = 0; |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
# find leading tabs, spaces, and any statement label |
1571
|
543
|
|
|
|
|
1094
|
my $spaces = 0; |
1572
|
543
|
50
|
|
|
|
4279
|
if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { |
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
# If there are leading tabs, we use the tab scheme for this run, if |
1575
|
|
|
|
|
|
|
# any, so that the code will remain stable when editing. |
1576
|
543
|
100
|
|
|
|
2518
|
if ($1) { $spaces += length($1) * $self->[_tabsize_] } |
|
2
|
|
|
|
|
10
|
|
1577
|
|
|
|
|
|
|
|
1578
|
543
|
100
|
|
|
|
2143
|
if ($2) { $spaces += length($2) } |
|
79
|
|
|
|
|
294
|
|
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
# correct for outdented labels |
1581
|
543
|
50
|
66
|
|
|
2345
|
if ( $3 && $self->[_outdent_labels_] ) { |
1582
|
1
|
|
|
|
|
3
|
$spaces += $self->[_continuation_indentation_]; |
1583
|
|
|
|
|
|
|
} |
1584
|
|
|
|
|
|
|
} |
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
# compute indentation using the value of -i for this run. |
1587
|
|
|
|
|
|
|
# If -i=0 is used for this run (which is possible) it doesn't matter |
1588
|
|
|
|
|
|
|
# what we do here but we'll guess that the old run used 4 spaces per level. |
1589
|
543
|
|
|
|
|
1612
|
my $indent_columns = $self->[_indent_columns_]; |
1590
|
543
|
50
|
|
|
|
1830
|
$indent_columns = 4 if ( !$indent_columns ); |
1591
|
543
|
|
|
|
|
1961
|
$level = int( $spaces / $indent_columns ); |
1592
|
543
|
|
|
|
|
1430
|
return ($level); |
1593
|
|
|
|
|
|
|
} ## end sub guess_old_indentation_level |
1594
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
# This is a currently unused debug routine |
1596
|
|
|
|
|
|
|
sub dump_functions { |
1597
|
|
|
|
|
|
|
|
1598
|
0
|
|
|
0
|
0
|
0
|
my $fh = *STDOUT; |
1599
|
0
|
|
|
|
|
0
|
foreach my $pkg ( keys %{$ris_user_function} ) { |
|
0
|
|
|
|
|
0
|
|
1600
|
0
|
|
|
|
|
0
|
$fh->print("\nnon-constant subs in package $pkg\n"); |
1601
|
|
|
|
|
|
|
|
1602
|
0
|
|
|
|
|
0
|
foreach my $sub ( keys %{ $ris_user_function->{$pkg} } ) { |
|
0
|
|
|
|
|
0
|
|
1603
|
0
|
|
|
|
|
0
|
my $msg = EMPTY_STRING; |
1604
|
0
|
0
|
|
|
|
0
|
if ( $ris_block_list_function->{$pkg}{$sub} ) { |
1605
|
0
|
|
|
|
|
0
|
$msg = 'block_list'; |
1606
|
|
|
|
|
|
|
} |
1607
|
|
|
|
|
|
|
|
1608
|
0
|
0
|
|
|
|
0
|
if ( $ris_block_function->{$pkg}{$sub} ) { |
1609
|
0
|
|
|
|
|
0
|
$msg = 'block'; |
1610
|
|
|
|
|
|
|
} |
1611
|
0
|
|
|
|
|
0
|
$fh->print("$sub $msg\n"); |
1612
|
|
|
|
|
|
|
} |
1613
|
|
|
|
|
|
|
} |
1614
|
|
|
|
|
|
|
|
1615
|
0
|
|
|
|
|
0
|
foreach my $pkg ( keys %{$ris_constant} ) { |
|
0
|
|
|
|
|
0
|
|
1616
|
0
|
|
|
|
|
0
|
$fh->print("\nconstants and constant subs in package $pkg\n"); |
1617
|
|
|
|
|
|
|
|
1618
|
0
|
|
|
|
|
0
|
foreach my $sub ( keys %{ $ris_constant->{$pkg} } ) { |
|
0
|
|
|
|
|
0
|
|
1619
|
0
|
|
|
|
|
0
|
$fh->print("$sub\n"); |
1620
|
|
|
|
|
|
|
} |
1621
|
|
|
|
|
|
|
} |
1622
|
0
|
|
|
|
|
0
|
return; |
1623
|
|
|
|
|
|
|
} ## end sub dump_functions |
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
sub prepare_for_a_new_file { |
1626
|
|
|
|
|
|
|
|
1627
|
561
|
|
|
561
|
0
|
1975
|
my ( $self, $source_object ) = @_; |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
# copy the source object lines to an array of lines |
1630
|
561
|
|
|
|
|
3055
|
$self->make_source_array($source_object); |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
# previous tokens needed to determine what to expect next |
1633
|
561
|
|
|
|
|
1786
|
$last_nonblank_token = ';'; # the only possible starting state which |
1634
|
561
|
|
|
|
|
1384
|
$last_nonblank_type = ';'; # will make a leading brace a code block |
1635
|
561
|
|
|
|
|
1328
|
$last_nonblank_block_type = EMPTY_STRING; |
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
# scalars for remembering statement types across multiple lines |
1638
|
561
|
|
|
|
|
1348
|
$statement_type = EMPTY_STRING; # '' or 'use' or 'sub..' or 'case..' |
1639
|
|
|
|
|
|
|
|
1640
|
|
|
|
|
|
|
# scalars for remembering where we are in the file |
1641
|
561
|
|
|
|
|
1355
|
$current_package = "main"; |
1642
|
561
|
|
|
|
|
1265
|
$context = UNKNOWN_CONTEXT; |
1643
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
# hashes used to remember function information |
1645
|
561
|
|
|
|
|
1932
|
$ris_constant = {}; # user-defined constants |
1646
|
561
|
|
|
|
|
1856
|
$ris_user_function = {}; # user-defined functions |
1647
|
561
|
|
|
|
|
1611
|
$ruser_function_prototype = {}; # their prototypes |
1648
|
561
|
|
|
|
|
1591
|
$ris_block_function = {}; |
1649
|
561
|
|
|
|
|
1531
|
$ris_block_list_function = {}; |
1650
|
561
|
|
|
|
|
1419
|
$rsaw_function_definition = {}; |
1651
|
561
|
|
|
|
|
1323
|
$rsaw_use_module = {}; |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
# variables used to track depths of various containers |
1654
|
|
|
|
|
|
|
# and report nesting errors |
1655
|
561
|
|
|
|
|
1137
|
$paren_depth = 0; |
1656
|
561
|
|
|
|
|
1179
|
$brace_depth = 0; |
1657
|
561
|
|
|
|
|
1090
|
$square_bracket_depth = 0; |
1658
|
561
|
|
|
|
|
2693
|
$rcurrent_depth = [ (0) x scalar @closing_brace_names ]; |
1659
|
561
|
|
|
|
|
1323
|
$total_depth = 0; |
1660
|
561
|
|
|
|
|
2316
|
$rtotal_depth = []; |
1661
|
561
|
|
|
|
|
2114
|
$rcurrent_sequence_number = []; |
1662
|
561
|
|
|
|
|
1240
|
$next_sequence_number = 2; # The value 1 is reserved for SEQ_ROOT |
1663
|
|
|
|
|
|
|
|
1664
|
561
|
|
|
|
|
1882
|
$rparen_type = []; |
1665
|
561
|
|
|
|
|
1629
|
$rparen_semicolon_count = []; |
1666
|
561
|
|
|
|
|
2379
|
$rparen_vars = []; |
1667
|
561
|
|
|
|
|
1862
|
$rbrace_type = []; |
1668
|
561
|
|
|
|
|
1711
|
$rbrace_structural_type = []; |
1669
|
561
|
|
|
|
|
1517
|
$rbrace_context = []; |
1670
|
561
|
|
|
|
|
1707
|
$rbrace_package = []; |
1671
|
561
|
|
|
|
|
1596
|
$rsquare_bracket_type = []; |
1672
|
561
|
|
|
|
|
1460
|
$rsquare_bracket_structural_type = []; |
1673
|
561
|
|
|
|
|
3510
|
$rdepth_array = []; |
1674
|
561
|
|
|
|
|
1235
|
$rnested_ternary_flag = []; |
1675
|
561
|
|
|
|
|
3692
|
$rnested_statement_type = []; |
1676
|
561
|
|
|
|
|
3116
|
$rstarting_line_of_current_depth = []; |
1677
|
|
|
|
|
|
|
|
1678
|
561
|
|
|
|
|
1773
|
$rparen_type->[$paren_depth] = EMPTY_STRING; |
1679
|
561
|
|
|
|
|
1583
|
$rparen_semicolon_count->[$paren_depth] = 0; |
1680
|
561
|
|
|
|
|
1488
|
$rparen_vars->[$paren_depth] = []; |
1681
|
561
|
|
|
|
|
1579
|
$rbrace_type->[$brace_depth] = ';'; # identify opening brace as code block |
1682
|
561
|
|
|
|
|
1535
|
$rbrace_structural_type->[$brace_depth] = EMPTY_STRING; |
1683
|
561
|
|
|
|
|
1413
|
$rbrace_context->[$brace_depth] = UNKNOWN_CONTEXT; |
1684
|
561
|
|
|
|
|
1376
|
$rbrace_package->[$paren_depth] = $current_package; |
1685
|
561
|
|
|
|
|
1366
|
$rsquare_bracket_type->[$square_bracket_depth] = EMPTY_STRING; |
1686
|
561
|
|
|
|
|
1309
|
$rsquare_bracket_structural_type->[$square_bracket_depth] = EMPTY_STRING; |
1687
|
|
|
|
|
|
|
|
1688
|
561
|
|
|
|
|
2697
|
initialize_tokenizer_state(); |
1689
|
561
|
|
|
|
|
1134
|
return; |
1690
|
|
|
|
|
|
|
} ## end sub prepare_for_a_new_file |
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
{ ## closure for sub tokenize_this_line |
1693
|
|
|
|
|
|
|
|
1694
|
39
|
|
|
39
|
|
397
|
use constant BRACE => 0; |
|
39
|
|
|
|
|
98
|
|
|
39
|
|
|
|
|
2686
|
|
1695
|
39
|
|
|
39
|
|
307
|
use constant SQUARE_BRACKET => 1; |
|
39
|
|
|
|
|
109
|
|
|
39
|
|
|
|
|
2316
|
|
1696
|
39
|
|
|
39
|
|
288
|
use constant PAREN => 2; |
|
39
|
|
|
|
|
97
|
|
|
39
|
|
|
|
|
2227
|
|
1697
|
39
|
|
|
39
|
|
296
|
use constant QUESTION_COLON => 3; |
|
39
|
|
|
|
|
87
|
|
|
39
|
|
|
|
|
86024
|
|
1698
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
# TV1: scalars for processing one LINE. |
1700
|
|
|
|
|
|
|
# Re-initialized on each entry to sub tokenize_this_line. |
1701
|
|
|
|
|
|
|
my ( |
1702
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
1703
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
1704
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
1705
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
1706
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
1707
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
1708
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
1709
|
|
|
|
|
|
|
); |
1710
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
# TV2: refs to ARRAYS for processing one LINE |
1712
|
|
|
|
|
|
|
# Re-initialized on each call. |
1713
|
|
|
|
|
|
|
my $routput_token_list = []; # stack of output token indexes |
1714
|
|
|
|
|
|
|
my $routput_token_type = []; # token types |
1715
|
|
|
|
|
|
|
my $routput_block_type = []; # types of code block |
1716
|
|
|
|
|
|
|
my $routput_container_type = []; # paren types, such as if, elsif, .. |
1717
|
|
|
|
|
|
|
my $routput_type_sequence = []; # nesting sequential number |
1718
|
|
|
|
|
|
|
my $routput_indent_flag = []; # |
1719
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
# TV3: SCALARS for quote variables. These are initialized with a |
1721
|
|
|
|
|
|
|
# subroutine call and continually updated as lines are processed. |
1722
|
|
|
|
|
|
|
my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, |
1723
|
|
|
|
|
|
|
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); |
1724
|
|
|
|
|
|
|
|
1725
|
|
|
|
|
|
|
# TV4: SCALARS for multi-line identifiers and |
1726
|
|
|
|
|
|
|
# statements. These are initialized with a subroutine call |
1727
|
|
|
|
|
|
|
# and continually updated as lines are processed. |
1728
|
|
|
|
|
|
|
my ( $id_scan_state, $identifier, $want_paren ); |
1729
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
# TV5: SCALARS for tracking indentation level. |
1731
|
|
|
|
|
|
|
# Initialized once and continually updated as lines are |
1732
|
|
|
|
|
|
|
# processed. |
1733
|
|
|
|
|
|
|
my ( |
1734
|
|
|
|
|
|
|
$nesting_token_string, $nesting_block_string, |
1735
|
|
|
|
|
|
|
$nesting_block_flag, $level_in_tokenizer, |
1736
|
|
|
|
|
|
|
); |
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
# TV6: SCALARS for remembering several previous |
1739
|
|
|
|
|
|
|
# tokens. Initialized once and continually updated as |
1740
|
|
|
|
|
|
|
# lines are processed. |
1741
|
|
|
|
|
|
|
my ( |
1742
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
1743
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
1744
|
|
|
|
|
|
|
$last_nonblank_prototype, |
1745
|
|
|
|
|
|
|
); |
1746
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
# ---------------------------------------------------------------- |
1748
|
|
|
|
|
|
|
# beginning of tokenizer variable access and manipulation routines |
1749
|
|
|
|
|
|
|
# ---------------------------------------------------------------- |
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
sub initialize_tokenizer_state { |
1752
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
# GV1: initialized once |
1754
|
|
|
|
|
|
|
# TV1: initialized on each call |
1755
|
|
|
|
|
|
|
# TV2: initialized on each call |
1756
|
|
|
|
|
|
|
# TV3: |
1757
|
561
|
|
|
561
|
0
|
1251
|
$in_quote = 0; |
1758
|
561
|
|
|
|
|
1371
|
$quote_type = 'Q'; |
1759
|
561
|
|
|
|
|
1218
|
$quote_character = EMPTY_STRING; |
1760
|
561
|
|
|
|
|
1224
|
$quote_pos = 0; |
1761
|
561
|
|
|
|
|
1177
|
$quote_depth = 0; |
1762
|
561
|
|
|
|
|
1247
|
$quoted_string_1 = EMPTY_STRING; |
1763
|
561
|
|
|
|
|
1247
|
$quoted_string_2 = EMPTY_STRING; |
1764
|
561
|
|
|
|
|
1183
|
$allowed_quote_modifiers = EMPTY_STRING; |
1765
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
# TV4: |
1767
|
561
|
|
|
|
|
1144
|
$id_scan_state = EMPTY_STRING; |
1768
|
561
|
|
|
|
|
1186
|
$identifier = EMPTY_STRING; |
1769
|
561
|
|
|
|
|
1194
|
$want_paren = EMPTY_STRING; |
1770
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
# TV5: |
1772
|
561
|
|
|
|
|
1361
|
$nesting_token_string = EMPTY_STRING; |
1773
|
561
|
|
|
|
|
1229
|
$nesting_block_string = '1'; # initially in a block |
1774
|
561
|
|
|
|
|
1069
|
$nesting_block_flag = 1; |
1775
|
561
|
|
|
|
|
1107
|
$level_in_tokenizer = 0; |
1776
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
# TV6: |
1778
|
561
|
|
|
|
|
1232
|
$last_nonblank_container_type = EMPTY_STRING; |
1779
|
561
|
|
|
|
|
1155
|
$last_nonblank_type_sequence = EMPTY_STRING; |
1780
|
561
|
|
|
|
|
1275
|
$last_last_nonblank_token = ';'; |
1781
|
561
|
|
|
|
|
1163
|
$last_last_nonblank_type = ';'; |
1782
|
561
|
|
|
|
|
1181
|
$last_nonblank_prototype = EMPTY_STRING; |
1783
|
561
|
|
|
|
|
1154
|
return; |
1784
|
|
|
|
|
|
|
} ## end sub initialize_tokenizer_state |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
sub save_tokenizer_state { |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
# Global variables: |
1789
|
0
|
|
|
0
|
0
|
0
|
my $rGV1 = [ |
1790
|
|
|
|
|
|
|
$brace_depth, |
1791
|
|
|
|
|
|
|
$context, |
1792
|
|
|
|
|
|
|
$current_package, |
1793
|
|
|
|
|
|
|
$last_nonblank_block_type, |
1794
|
|
|
|
|
|
|
$last_nonblank_token, |
1795
|
|
|
|
|
|
|
$last_nonblank_type, |
1796
|
|
|
|
|
|
|
$next_sequence_number, |
1797
|
|
|
|
|
|
|
$paren_depth, |
1798
|
|
|
|
|
|
|
$rbrace_context, |
1799
|
|
|
|
|
|
|
$rbrace_package, |
1800
|
|
|
|
|
|
|
$rbrace_structural_type, |
1801
|
|
|
|
|
|
|
$rbrace_type, |
1802
|
|
|
|
|
|
|
$rcurrent_depth, |
1803
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
1804
|
|
|
|
|
|
|
$rdepth_array, |
1805
|
|
|
|
|
|
|
$ris_block_function, |
1806
|
|
|
|
|
|
|
$ris_block_list_function, |
1807
|
|
|
|
|
|
|
$ris_constant, |
1808
|
|
|
|
|
|
|
$ris_user_function, |
1809
|
|
|
|
|
|
|
$rnested_statement_type, |
1810
|
|
|
|
|
|
|
$rnested_ternary_flag, |
1811
|
|
|
|
|
|
|
$rparen_semicolon_count, |
1812
|
|
|
|
|
|
|
$rparen_vars, |
1813
|
|
|
|
|
|
|
$rparen_type, |
1814
|
|
|
|
|
|
|
$rsaw_function_definition, |
1815
|
|
|
|
|
|
|
$rsaw_use_module, |
1816
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
1817
|
|
|
|
|
|
|
$rsquare_bracket_type, |
1818
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
1819
|
|
|
|
|
|
|
$rtotal_depth, |
1820
|
|
|
|
|
|
|
$ruser_function_prototype, |
1821
|
|
|
|
|
|
|
$square_bracket_depth, |
1822
|
|
|
|
|
|
|
$statement_type, |
1823
|
|
|
|
|
|
|
$total_depth, |
1824
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
]; |
1826
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
# Tokenizer closure variables: |
1828
|
0
|
|
|
|
|
0
|
my $rTV1 = [ |
1829
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
1830
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
1831
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
1832
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
1833
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
1834
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
1835
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
1836
|
|
|
|
|
|
|
]; |
1837
|
|
|
|
|
|
|
|
1838
|
0
|
|
|
|
|
0
|
my $rTV2 = [ |
1839
|
|
|
|
|
|
|
$routput_token_list, $routput_token_type, |
1840
|
|
|
|
|
|
|
$routput_block_type, $routput_container_type, |
1841
|
|
|
|
|
|
|
$routput_type_sequence, $routput_indent_flag, |
1842
|
|
|
|
|
|
|
]; |
1843
|
|
|
|
|
|
|
|
1844
|
0
|
|
|
|
|
0
|
my $rTV3 = [ |
1845
|
|
|
|
|
|
|
$in_quote, $quote_type, |
1846
|
|
|
|
|
|
|
$quote_character, $quote_pos, |
1847
|
|
|
|
|
|
|
$quote_depth, $quoted_string_1, |
1848
|
|
|
|
|
|
|
$quoted_string_2, $allowed_quote_modifiers, |
1849
|
|
|
|
|
|
|
]; |
1850
|
|
|
|
|
|
|
|
1851
|
0
|
|
|
|
|
0
|
my $rTV4 = [ $id_scan_state, $identifier, $want_paren ]; |
1852
|
|
|
|
|
|
|
|
1853
|
0
|
|
|
|
|
0
|
my $rTV5 = [ |
1854
|
|
|
|
|
|
|
$nesting_token_string, $nesting_block_string, |
1855
|
|
|
|
|
|
|
$nesting_block_flag, $level_in_tokenizer, |
1856
|
|
|
|
|
|
|
]; |
1857
|
|
|
|
|
|
|
|
1858
|
0
|
|
|
|
|
0
|
my $rTV6 = [ |
1859
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
1860
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
1861
|
|
|
|
|
|
|
$last_nonblank_prototype, |
1862
|
|
|
|
|
|
|
]; |
1863
|
0
|
|
|
|
|
0
|
return [ $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; |
1864
|
|
|
|
|
|
|
} ## end sub save_tokenizer_state |
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
sub restore_tokenizer_state { |
1867
|
0
|
|
|
0
|
0
|
0
|
my ($rstate) = @_; |
1868
|
0
|
|
|
|
|
0
|
my ( $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; |
|
0
|
|
|
|
|
0
|
|
1869
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
( |
1871
|
|
|
|
|
|
|
$brace_depth, |
1872
|
|
|
|
|
|
|
$context, |
1873
|
|
|
|
|
|
|
$current_package, |
1874
|
|
|
|
|
|
|
$last_nonblank_block_type, |
1875
|
|
|
|
|
|
|
$last_nonblank_token, |
1876
|
|
|
|
|
|
|
$last_nonblank_type, |
1877
|
|
|
|
|
|
|
$next_sequence_number, |
1878
|
|
|
|
|
|
|
$paren_depth, |
1879
|
|
|
|
|
|
|
$rbrace_context, |
1880
|
|
|
|
|
|
|
$rbrace_package, |
1881
|
|
|
|
|
|
|
$rbrace_structural_type, |
1882
|
|
|
|
|
|
|
$rbrace_type, |
1883
|
|
|
|
|
|
|
$rcurrent_depth, |
1884
|
|
|
|
|
|
|
$rcurrent_sequence_number, |
1885
|
|
|
|
|
|
|
$rdepth_array, |
1886
|
|
|
|
|
|
|
$ris_block_function, |
1887
|
|
|
|
|
|
|
$ris_block_list_function, |
1888
|
|
|
|
|
|
|
$ris_constant, |
1889
|
|
|
|
|
|
|
$ris_user_function, |
1890
|
|
|
|
|
|
|
$rnested_statement_type, |
1891
|
|
|
|
|
|
|
$rnested_ternary_flag, |
1892
|
|
|
|
|
|
|
$rparen_semicolon_count, |
1893
|
|
|
|
|
|
|
$rparen_vars, |
1894
|
|
|
|
|
|
|
$rparen_type, |
1895
|
|
|
|
|
|
|
$rsaw_function_definition, |
1896
|
|
|
|
|
|
|
$rsaw_use_module, |
1897
|
|
|
|
|
|
|
$rsquare_bracket_structural_type, |
1898
|
|
|
|
|
|
|
$rsquare_bracket_type, |
1899
|
|
|
|
|
|
|
$rstarting_line_of_current_depth, |
1900
|
|
|
|
|
|
|
$rtotal_depth, |
1901
|
|
|
|
|
|
|
$ruser_function_prototype, |
1902
|
|
|
|
|
|
|
$square_bracket_depth, |
1903
|
|
|
|
|
|
|
$statement_type, |
1904
|
|
|
|
|
|
|
$total_depth, |
1905
|
|
|
|
|
|
|
|
1906
|
0
|
|
|
|
|
0
|
) = @{$rGV1}; |
|
0
|
|
|
|
|
0
|
|
1907
|
|
|
|
|
|
|
|
1908
|
|
|
|
|
|
|
( |
1909
|
|
|
|
|
|
|
$block_type, $container_type, $expecting, |
1910
|
|
|
|
|
|
|
$i, $i_tok, $input_line, |
1911
|
|
|
|
|
|
|
$input_line_number, $last_nonblank_i, $max_token_index, |
1912
|
|
|
|
|
|
|
$next_tok, $next_type, $peeked_ahead, |
1913
|
|
|
|
|
|
|
$prototype, $rhere_target_list, $rtoken_map, |
1914
|
|
|
|
|
|
|
$rtoken_type, $rtokens, $tok, |
1915
|
|
|
|
|
|
|
$type, $type_sequence, $indent_flag, |
1916
|
0
|
|
|
|
|
0
|
) = @{$rTV1}; |
|
0
|
|
|
|
|
0
|
|
1917
|
|
|
|
|
|
|
|
1918
|
|
|
|
|
|
|
( |
1919
|
|
|
|
|
|
|
$routput_token_list, $routput_token_type, |
1920
|
|
|
|
|
|
|
$routput_block_type, $routput_container_type, |
1921
|
|
|
|
|
|
|
$routput_type_sequence, $routput_indent_flag, |
1922
|
0
|
|
|
|
|
0
|
) = @{$rTV2}; |
|
0
|
|
|
|
|
0
|
|
1923
|
|
|
|
|
|
|
|
1924
|
|
|
|
|
|
|
( |
1925
|
|
|
|
|
|
|
$in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, |
1926
|
|
|
|
|
|
|
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, |
1927
|
0
|
|
|
|
|
0
|
) = @{$rTV3}; |
|
0
|
|
|
|
|
0
|
|
1928
|
|
|
|
|
|
|
|
1929
|
0
|
|
|
|
|
0
|
( $id_scan_state, $identifier, $want_paren ) = @{$rTV4}; |
|
0
|
|
|
|
|
0
|
|
1930
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
( |
1932
|
|
|
|
|
|
|
$nesting_token_string, $nesting_block_string, |
1933
|
|
|
|
|
|
|
$nesting_block_flag, $level_in_tokenizer, |
1934
|
0
|
|
|
|
|
0
|
) = @{$rTV5}; |
|
0
|
|
|
|
|
0
|
|
1935
|
|
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
( |
1937
|
|
|
|
|
|
|
$last_nonblank_container_type, $last_nonblank_type_sequence, |
1938
|
|
|
|
|
|
|
$last_last_nonblank_token, $last_last_nonblank_type, |
1939
|
|
|
|
|
|
|
$last_nonblank_prototype, |
1940
|
0
|
|
|
|
|
0
|
) = @{$rTV6}; |
|
0
|
|
|
|
|
0
|
|
1941
|
0
|
|
|
|
|
0
|
return; |
1942
|
|
|
|
|
|
|
} ## end sub restore_tokenizer_state |
1943
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
sub split_pretoken { |
1945
|
|
|
|
|
|
|
|
1946
|
8
|
|
|
8
|
0
|
19
|
my ( $self, $numc ) = @_; |
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
# Split the leading $numc characters from the current token (at index=$i) |
1949
|
|
|
|
|
|
|
# which is pre-type 'w' and insert the remainder back into the pretoken |
1950
|
|
|
|
|
|
|
# stream with appropriate settings. Since we are splitting a pre-type 'w', |
1951
|
|
|
|
|
|
|
# there are three cases, depending on if the remainder starts with a digit: |
1952
|
|
|
|
|
|
|
# Case 1: remainder is type 'd', all digits |
1953
|
|
|
|
|
|
|
# Case 2: remainder is type 'd' and type 'w': digits and other characters |
1954
|
|
|
|
|
|
|
# Case 3: remainder is type 'w' |
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
# Examples, for $numc=1: |
1957
|
|
|
|
|
|
|
# $tok => $tok_0 $tok_1 $tok_2 |
1958
|
|
|
|
|
|
|
# 'x10' => 'x' '10' # case 1 |
1959
|
|
|
|
|
|
|
# 'x10if' => 'x' '10' 'if' # case 2 |
1960
|
|
|
|
|
|
|
# '0ne => 'O' 'ne' # case 3 |
1961
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
# where: |
1963
|
|
|
|
|
|
|
# $tok_1 is a possible string of digits (pre-type 'd') |
1964
|
|
|
|
|
|
|
# $tok_2 is a possible word (pre-type 'w') |
1965
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
# return 1 if successful |
1967
|
|
|
|
|
|
|
# return undef if error (shouldn't happen) |
1968
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
# Calling routine should update '$type' and '$tok' if successful. |
1970
|
|
|
|
|
|
|
|
1971
|
8
|
|
|
|
|
18
|
my $pretoken = $rtokens->[$i]; |
1972
|
8
|
50
|
33
|
|
|
80
|
if ( $pretoken |
|
|
|
33
|
|
|
|
|
1973
|
|
|
|
|
|
|
&& length($pretoken) > $numc |
1974
|
|
|
|
|
|
|
&& substr( $pretoken, $numc ) =~ /^(\d*)(.*)$/ ) |
1975
|
|
|
|
|
|
|
{ |
1976
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
# Split $tok into up to 3 tokens: |
1978
|
8
|
|
|
|
|
21
|
my $tok_0 = substr( $pretoken, 0, $numc ); |
1979
|
8
|
50
|
|
|
|
28
|
my $tok_1 = defined($1) ? $1 : EMPTY_STRING; |
1980
|
8
|
50
|
|
|
|
27
|
my $tok_2 = defined($2) ? $2 : EMPTY_STRING; |
1981
|
|
|
|
|
|
|
|
1982
|
8
|
|
|
|
|
12
|
my $len_0 = length($tok_0); |
1983
|
8
|
|
|
|
|
13
|
my $len_1 = length($tok_1); |
1984
|
8
|
|
|
|
|
14
|
my $len_2 = length($tok_2); |
1985
|
|
|
|
|
|
|
|
1986
|
8
|
|
|
|
|
16
|
my $pre_type_0 = 'w'; |
1987
|
8
|
|
|
|
|
13
|
my $pre_type_1 = 'd'; |
1988
|
8
|
|
|
|
|
14
|
my $pre_type_2 = 'w'; |
1989
|
|
|
|
|
|
|
|
1990
|
8
|
|
|
|
|
16
|
my $pos_0 = $rtoken_map->[$i]; |
1991
|
8
|
|
|
|
|
14
|
my $pos_1 = $pos_0 + $len_0; |
1992
|
8
|
|
|
|
|
13
|
my $pos_2 = $pos_1 + $len_1; |
1993
|
|
|
|
|
|
|
|
1994
|
8
|
|
|
|
|
13
|
my $isplice = $i + 1; |
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
# Splice in any digits |
1997
|
8
|
100
|
|
|
|
24
|
if ($len_1) { |
1998
|
5
|
|
|
|
|
12
|
splice @{$rtoken_map}, $isplice, 0, $pos_1; |
|
5
|
|
|
|
|
22
|
|
1999
|
5
|
|
|
|
|
18
|
splice @{$rtokens}, $isplice, 0, $tok_1; |
|
5
|
|
|
|
|
12
|
|
2000
|
5
|
|
|
|
|
11
|
splice @{$rtoken_type}, $isplice, 0, $pre_type_1; |
|
5
|
|
|
|
|
11
|
|
2001
|
5
|
|
|
|
|
11
|
$max_token_index++; |
2002
|
5
|
|
|
|
|
10
|
$isplice++; |
2003
|
|
|
|
|
|
|
} |
2004
|
|
|
|
|
|
|
|
2005
|
|
|
|
|
|
|
# Splice in any trailing word |
2006
|
8
|
100
|
|
|
|
20
|
if ($len_2) { |
2007
|
4
|
|
|
|
|
5
|
splice @{$rtoken_map}, $isplice, 0, $pos_2; |
|
4
|
|
|
|
|
11
|
|
2008
|
4
|
|
|
|
|
11
|
splice @{$rtokens}, $isplice, 0, $tok_2; |
|
4
|
|
|
|
|
10
|
|
2009
|
4
|
|
|
|
|
6
|
splice @{$rtoken_type}, $isplice, 0, $pre_type_2; |
|
4
|
|
|
|
|
12
|
|
2010
|
4
|
|
|
|
|
6
|
$max_token_index++; |
2011
|
|
|
|
|
|
|
} |
2012
|
|
|
|
|
|
|
|
2013
|
8
|
|
|
|
|
18
|
$rtokens->[$i] = $tok_0; |
2014
|
8
|
|
|
|
|
28
|
return 1; |
2015
|
|
|
|
|
|
|
} |
2016
|
|
|
|
|
|
|
else { |
2017
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
# Shouldn't get here |
2019
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
2020
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
2021
|
|
|
|
|
|
|
While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken() |
2022
|
|
|
|
|
|
|
EOM |
2023
|
|
|
|
|
|
|
} |
2024
|
|
|
|
|
|
|
} |
2025
|
0
|
|
|
|
|
0
|
return; |
2026
|
|
|
|
|
|
|
} ## end sub split_pretoken |
2027
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
sub get_indentation_level { |
2029
|
561
|
|
|
561
|
0
|
1551
|
return $level_in_tokenizer; |
2030
|
|
|
|
|
|
|
} |
2031
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
sub reset_indentation_level { |
2033
|
561
|
|
|
561
|
0
|
1458
|
$level_in_tokenizer = shift; |
2034
|
561
|
|
|
|
|
1135
|
return; |
2035
|
|
|
|
|
|
|
} |
2036
|
|
|
|
|
|
|
|
2037
|
|
|
|
|
|
|
sub peeked_ahead { |
2038
|
252
|
|
|
252
|
0
|
490
|
my $flag = shift; |
2039
|
252
|
100
|
|
|
|
680
|
$peeked_ahead = defined($flag) ? $flag : $peeked_ahead; |
2040
|
252
|
|
|
|
|
606
|
return $peeked_ahead; |
2041
|
|
|
|
|
|
|
} |
2042
|
|
|
|
|
|
|
|
2043
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2044
|
|
|
|
|
|
|
# end of tokenizer variable access and manipulation routines |
2045
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2046
|
|
|
|
|
|
|
|
2047
|
|
|
|
|
|
|
#------------------------------ |
2048
|
|
|
|
|
|
|
# beginning of tokenizer hashes |
2049
|
|
|
|
|
|
|
#------------------------------ |
2050
|
|
|
|
|
|
|
|
2051
|
|
|
|
|
|
|
my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); |
2052
|
|
|
|
|
|
|
|
2053
|
|
|
|
|
|
|
# These block types terminate statements and do not need a trailing |
2054
|
|
|
|
|
|
|
# semicolon |
2055
|
|
|
|
|
|
|
# patched for SWITCH/CASE/ |
2056
|
|
|
|
|
|
|
# NOTE: not currently used but may be used in the future |
2057
|
|
|
|
|
|
|
my %is_zero_continuation_block_type; |
2058
|
|
|
|
|
|
|
my @q; |
2059
|
|
|
|
|
|
|
@q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; |
2060
|
|
|
|
|
|
|
if elsif else unless while until for foreach switch case given when); |
2061
|
|
|
|
|
|
|
@is_zero_continuation_block_type{@q} = (1) x scalar(@q); |
2062
|
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
my %is_logical_container; |
2064
|
|
|
|
|
|
|
@q = qw(if elsif unless while and or err not && ! || for foreach); |
2065
|
|
|
|
|
|
|
@is_logical_container{@q} = (1) x scalar(@q); |
2066
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
my %is_binary_type; |
2068
|
|
|
|
|
|
|
@q = qw(|| &&); |
2069
|
|
|
|
|
|
|
@is_binary_type{@q} = (1) x scalar(@q); |
2070
|
|
|
|
|
|
|
|
2071
|
|
|
|
|
|
|
my %is_binary_keyword; |
2072
|
|
|
|
|
|
|
@q = qw(and or err eq ne cmp); |
2073
|
|
|
|
|
|
|
@is_binary_keyword{@q} = (1) x scalar(@q); |
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
# 'L' is token for opening { at hash key |
2076
|
|
|
|
|
|
|
my %is_opening_type; |
2077
|
|
|
|
|
|
|
@q = qw< L { ( [ >; |
2078
|
|
|
|
|
|
|
@is_opening_type{@q} = (1) x scalar(@q); |
2079
|
|
|
|
|
|
|
|
2080
|
|
|
|
|
|
|
my %is_opening_or_ternary_type; |
2081
|
|
|
|
|
|
|
push @q, '?'; |
2082
|
|
|
|
|
|
|
@is_opening_or_ternary_type{@q} = (1) x scalar(@q); |
2083
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
# 'R' is token for closing } at hash key |
2085
|
|
|
|
|
|
|
my %is_closing_type; |
2086
|
|
|
|
|
|
|
@q = qw< R } ) ] >; |
2087
|
|
|
|
|
|
|
@is_closing_type{@q} = (1) x scalar(@q); |
2088
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
my %is_closing_or_ternary_type; |
2090
|
|
|
|
|
|
|
push @q, ':'; |
2091
|
|
|
|
|
|
|
@is_closing_or_ternary_type{@q} = (1) x scalar(@q); |
2092
|
|
|
|
|
|
|
|
2093
|
|
|
|
|
|
|
my %is_redo_last_next_goto; |
2094
|
|
|
|
|
|
|
@q = qw(redo last next goto); |
2095
|
|
|
|
|
|
|
@is_redo_last_next_goto{@q} = (1) x scalar(@q); |
2096
|
|
|
|
|
|
|
|
2097
|
|
|
|
|
|
|
my %is_use_require; |
2098
|
|
|
|
|
|
|
@q = qw(use require); |
2099
|
|
|
|
|
|
|
@is_use_require{@q} = (1) x scalar(@q); |
2100
|
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
# This hash holds the array index in $self for these keywords: |
2102
|
|
|
|
|
|
|
# Fix for issue c035: removed 'format' from this hash |
2103
|
|
|
|
|
|
|
my %is_END_DATA = ( |
2104
|
|
|
|
|
|
|
'__END__' => _in_end_, |
2105
|
|
|
|
|
|
|
'__DATA__' => _in_data_, |
2106
|
|
|
|
|
|
|
); |
2107
|
|
|
|
|
|
|
|
2108
|
|
|
|
|
|
|
my %is_list_end_type; |
2109
|
|
|
|
|
|
|
@q = qw( ; { } ); |
2110
|
|
|
|
|
|
|
push @q, ','; |
2111
|
|
|
|
|
|
|
@is_list_end_type{@q} = (1) x scalar(@q); |
2112
|
|
|
|
|
|
|
|
2113
|
|
|
|
|
|
|
# original ref: camel 3 p 147, |
2114
|
|
|
|
|
|
|
# but perl may accept undocumented flags |
2115
|
|
|
|
|
|
|
# perl 5.10 adds 'p' (preserve) |
2116
|
|
|
|
|
|
|
# Perl version 5.22 added 'n' |
2117
|
|
|
|
|
|
|
# From http://perldoc.perl.org/perlop.html we have |
2118
|
|
|
|
|
|
|
# /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc |
2119
|
|
|
|
|
|
|
# s/PATTERN/REPLACEMENT/msixpodualngcer |
2120
|
|
|
|
|
|
|
# y/SEARCHLIST/REPLACEMENTLIST/cdsr |
2121
|
|
|
|
|
|
|
# tr/SEARCHLIST/REPLACEMENTLIST/cdsr |
2122
|
|
|
|
|
|
|
# qr/STRING/msixpodualn |
2123
|
|
|
|
|
|
|
my %quote_modifiers = ( |
2124
|
|
|
|
|
|
|
's' => '[msixpodualngcer]', |
2125
|
|
|
|
|
|
|
'y' => '[cdsr]', |
2126
|
|
|
|
|
|
|
'tr' => '[cdsr]', |
2127
|
|
|
|
|
|
|
'm' => '[msixpodualngc]', |
2128
|
|
|
|
|
|
|
'qr' => '[msixpodualn]', |
2129
|
|
|
|
|
|
|
'q' => EMPTY_STRING, |
2130
|
|
|
|
|
|
|
'qq' => EMPTY_STRING, |
2131
|
|
|
|
|
|
|
'qw' => EMPTY_STRING, |
2132
|
|
|
|
|
|
|
'qx' => EMPTY_STRING, |
2133
|
|
|
|
|
|
|
); |
2134
|
|
|
|
|
|
|
|
2135
|
|
|
|
|
|
|
# table showing how many quoted things to look for after quote operator.. |
2136
|
|
|
|
|
|
|
# s, y, tr have 2 (pattern and replacement) |
2137
|
|
|
|
|
|
|
# others have 1 (pattern only) |
2138
|
|
|
|
|
|
|
my %quote_items = ( |
2139
|
|
|
|
|
|
|
's' => 2, |
2140
|
|
|
|
|
|
|
'y' => 2, |
2141
|
|
|
|
|
|
|
'tr' => 2, |
2142
|
|
|
|
|
|
|
'm' => 1, |
2143
|
|
|
|
|
|
|
'qr' => 1, |
2144
|
|
|
|
|
|
|
'q' => 1, |
2145
|
|
|
|
|
|
|
'qq' => 1, |
2146
|
|
|
|
|
|
|
'qw' => 1, |
2147
|
|
|
|
|
|
|
'qx' => 1, |
2148
|
|
|
|
|
|
|
); |
2149
|
|
|
|
|
|
|
|
2150
|
|
|
|
|
|
|
my %is_for_foreach; |
2151
|
|
|
|
|
|
|
@q = qw(for foreach); |
2152
|
|
|
|
|
|
|
@is_for_foreach{@q} = (1) x scalar(@q); |
2153
|
|
|
|
|
|
|
|
2154
|
|
|
|
|
|
|
# These keywords may introduce blocks after parenthesized expressions, |
2155
|
|
|
|
|
|
|
# in the form: |
2156
|
|
|
|
|
|
|
# keyword ( .... ) { BLOCK } |
2157
|
|
|
|
|
|
|
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' |
2158
|
|
|
|
|
|
|
# NOTE for --use-feature=class: if ADJUST blocks eventually take a |
2159
|
|
|
|
|
|
|
# parameter list, then ADJUST might need to be added to this list (see |
2160
|
|
|
|
|
|
|
# perlclass.pod) |
2161
|
|
|
|
|
|
|
my %is_blocktype_with_paren; |
2162
|
|
|
|
|
|
|
@q = |
2163
|
|
|
|
|
|
|
qw(if elsif unless while until for foreach switch case given when catch); |
2164
|
|
|
|
|
|
|
@is_blocktype_with_paren{@q} = (1) x scalar(@q); |
2165
|
|
|
|
|
|
|
|
2166
|
|
|
|
|
|
|
my %is_case_default; |
2167
|
|
|
|
|
|
|
@q = qw(case default); |
2168
|
|
|
|
|
|
|
@is_case_default{@q} = (1) x scalar(@q); |
2169
|
|
|
|
|
|
|
|
2170
|
|
|
|
|
|
|
#------------------------ |
2171
|
|
|
|
|
|
|
# end of tokenizer hashes |
2172
|
|
|
|
|
|
|
#------------------------ |
2173
|
|
|
|
|
|
|
|
2174
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2175
|
|
|
|
|
|
|
# beginning of various scanner interface routines |
2176
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2177
|
|
|
|
|
|
|
sub scan_replacement_text { |
2178
|
|
|
|
|
|
|
|
2179
|
|
|
|
|
|
|
# check for here-docs in replacement text invoked by |
2180
|
|
|
|
|
|
|
# a substitution operator with executable modifier 'e'. |
2181
|
|
|
|
|
|
|
# |
2182
|
|
|
|
|
|
|
# given: |
2183
|
|
|
|
|
|
|
# $replacement_text |
2184
|
|
|
|
|
|
|
# return: |
2185
|
|
|
|
|
|
|
# $rht = reference to any here-doc targets |
2186
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $replacement_text ) = @_; |
2187
|
|
|
|
|
|
|
|
2188
|
|
|
|
|
|
|
# quick check |
2189
|
0
|
0
|
|
|
|
0
|
return if ( $replacement_text !~ /<</ ); |
2190
|
|
|
|
|
|
|
|
2191
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
2192
|
|
|
|
|
|
|
"scanning replacement text for here-doc targets\n"); |
2193
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
# save the logger object for error messages |
2195
|
0
|
|
|
|
|
0
|
my $logger_object = $self->[_logger_object_]; |
2196
|
|
|
|
|
|
|
|
2197
|
|
|
|
|
|
|
# save all lexical variables |
2198
|
0
|
|
|
|
|
0
|
my $rstate = save_tokenizer_state(); |
2199
|
0
|
|
|
|
|
0
|
_decrement_count(); # avoid error check for multiple tokenizers |
2200
|
|
|
|
|
|
|
|
2201
|
|
|
|
|
|
|
# make a new tokenizer |
2202
|
0
|
|
|
|
|
0
|
my $tokenizer = Perl::Tidy::Tokenizer->new( |
2203
|
|
|
|
|
|
|
source_object => \$replacement_text, |
2204
|
|
|
|
|
|
|
logger_object => $logger_object, |
2205
|
|
|
|
|
|
|
starting_line_number => $input_line_number, |
2206
|
|
|
|
|
|
|
); |
2207
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
# scan the replacement text |
2209
|
0
|
|
|
|
|
0
|
while ( $tokenizer->get_line() ) { } |
2210
|
|
|
|
|
|
|
|
2211
|
|
|
|
|
|
|
# remove any here doc targets |
2212
|
0
|
|
|
|
|
0
|
my $rht = undef; |
2213
|
0
|
0
|
|
|
|
0
|
if ( $tokenizer->[_in_here_doc_] ) { |
2214
|
0
|
|
|
|
|
0
|
$rht = []; |
2215
|
0
|
|
|
|
|
0
|
push @{$rht}, |
|
0
|
|
|
|
|
0
|
|
2216
|
|
|
|
|
|
|
[ |
2217
|
|
|
|
|
|
|
$tokenizer->[_here_doc_target_], |
2218
|
|
|
|
|
|
|
$tokenizer->[_here_quote_character_] |
2219
|
|
|
|
|
|
|
]; |
2220
|
0
|
0
|
|
|
|
0
|
if ( $tokenizer->[_rhere_target_list_] ) { |
2221
|
0
|
|
|
|
|
0
|
push @{$rht}, @{ $tokenizer->[_rhere_target_list_] }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
2222
|
0
|
|
|
|
|
0
|
$tokenizer->[_rhere_target_list_] = undef; |
2223
|
|
|
|
|
|
|
} |
2224
|
0
|
|
|
|
|
0
|
$tokenizer->[_in_here_doc_] = undef; |
2225
|
|
|
|
|
|
|
} |
2226
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
# now its safe to report errors |
2228
|
0
|
|
|
|
|
0
|
my $severe_error = $tokenizer->report_tokenization_errors(); |
2229
|
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
# TODO: Could propagate a severe error up |
2231
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
# restore all tokenizer lexical variables |
2233
|
0
|
|
|
|
|
0
|
restore_tokenizer_state($rstate); |
2234
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
# return the here doc targets |
2236
|
0
|
|
|
|
|
0
|
return $rht; |
2237
|
|
|
|
|
|
|
} ## end sub scan_replacement_text |
2238
|
|
|
|
|
|
|
|
2239
|
|
|
|
|
|
|
sub scan_bare_identifier { |
2240
|
1672
|
|
|
1672
|
0
|
2959
|
my $self = shift; |
2241
|
1672
|
|
|
|
|
5151
|
( $i, $tok, $type, $prototype ) = |
2242
|
|
|
|
|
|
|
$self->scan_bare_identifier_do( $input_line, $i, $tok, $type, |
2243
|
|
|
|
|
|
|
$prototype, $rtoken_map, $max_token_index ); |
2244
|
1672
|
|
|
|
|
3466
|
return; |
2245
|
|
|
|
|
|
|
} ## end sub scan_bare_identifier |
2246
|
|
|
|
|
|
|
|
2247
|
|
|
|
|
|
|
sub scan_identifier { |
2248
|
|
|
|
|
|
|
|
2249
|
486
|
|
|
486
|
0
|
861
|
my $self = shift; |
2250
|
|
|
|
|
|
|
|
2251
|
|
|
|
|
|
|
( |
2252
|
486
|
|
|
|
|
1952
|
$i, $tok, $type, $id_scan_state, $identifier, |
2253
|
|
|
|
|
|
|
my $split_pretoken_flag |
2254
|
|
|
|
|
|
|
) |
2255
|
|
|
|
|
|
|
= $self->scan_complex_identifier( $i, $id_scan_state, $identifier, |
2256
|
|
|
|
|
|
|
$rtokens, $max_token_index, $expecting, |
2257
|
|
|
|
|
|
|
$rparen_type->[$paren_depth] ); |
2258
|
|
|
|
|
|
|
|
2259
|
|
|
|
|
|
|
# Check for signal to fix a special variable adjacent to a keyword, |
2260
|
|
|
|
|
|
|
# such as '$^One$0'. |
2261
|
486
|
100
|
|
|
|
1467
|
if ($split_pretoken_flag) { |
2262
|
|
|
|
|
|
|
|
2263
|
|
|
|
|
|
|
# Try to fix it by splitting the pretoken |
2264
|
3
|
50
|
33
|
|
|
23
|
if ( $i > 0 |
|
|
|
33
|
|
|
|
|
2265
|
|
|
|
|
|
|
&& $rtokens->[ $i - 1 ] eq '^' |
2266
|
|
|
|
|
|
|
&& $self->split_pretoken(1) ) |
2267
|
|
|
|
|
|
|
{ |
2268
|
3
|
|
|
|
|
10
|
$identifier = substr( $identifier, 0, 3 ); |
2269
|
3
|
|
|
|
|
5
|
$tok = $identifier; |
2270
|
|
|
|
|
|
|
} |
2271
|
|
|
|
|
|
|
else { |
2272
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
# This shouldn't happen ... |
2274
|
0
|
|
|
|
|
0
|
my $var = substr( $tok, 0, 3 ); |
2275
|
0
|
|
|
|
|
0
|
my $excess = substr( $tok, 3 ); |
2276
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
2277
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
2278
|
|
|
|
|
|
|
$input_line_number: Trouble parsing at characters '$excess' after special variable '$var'. |
2279
|
|
|
|
|
|
|
A space may be needed after '$var'. |
2280
|
|
|
|
|
|
|
EOM |
2281
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
2282
|
|
|
|
|
|
|
} |
2283
|
|
|
|
|
|
|
} |
2284
|
486
|
|
|
|
|
883
|
return; |
2285
|
|
|
|
|
|
|
} ## end sub scan_identifier |
2286
|
|
|
|
|
|
|
|
2287
|
39
|
|
|
39
|
|
359
|
use constant VERIFY_FASTSCAN => 0; |
|
39
|
|
|
|
|
104
|
|
|
39
|
|
|
|
|
3977
|
|
2288
|
|
|
|
|
|
|
my %fast_scan_context; |
2289
|
|
|
|
|
|
|
|
2290
|
|
|
|
|
|
|
BEGIN { |
2291
|
39
|
|
|
39
|
|
50174
|
%fast_scan_context = ( |
2292
|
|
|
|
|
|
|
'$' => SCALAR_CONTEXT, |
2293
|
|
|
|
|
|
|
'*' => SCALAR_CONTEXT, |
2294
|
|
|
|
|
|
|
'@' => LIST_CONTEXT, |
2295
|
|
|
|
|
|
|
'%' => LIST_CONTEXT, |
2296
|
|
|
|
|
|
|
'&' => UNKNOWN_CONTEXT, |
2297
|
|
|
|
|
|
|
); |
2298
|
|
|
|
|
|
|
} ## end BEGIN |
2299
|
|
|
|
|
|
|
|
2300
|
|
|
|
|
|
|
sub scan_simple_identifier { |
2301
|
|
|
|
|
|
|
|
2302
|
|
|
|
|
|
|
# This is a wrapper for sub scan_identifier. It does a fast preliminary |
2303
|
|
|
|
|
|
|
# scan for certain common identifiers: |
2304
|
|
|
|
|
|
|
# '$var', '@var', %var, *var, &var, '@{...}', '%{...}' |
2305
|
|
|
|
|
|
|
# If it does not find one of these, or this is a restart, it calls the |
2306
|
|
|
|
|
|
|
# original scanner directly. |
2307
|
|
|
|
|
|
|
|
2308
|
|
|
|
|
|
|
# This gives the same results as the full scanner in about 1/4 the |
2309
|
|
|
|
|
|
|
# total runtime for a typical input stream. |
2310
|
|
|
|
|
|
|
|
2311
|
|
|
|
|
|
|
# Notation: |
2312
|
|
|
|
|
|
|
# $var * 2 |
2313
|
|
|
|
|
|
|
# ^^ ^ |
2314
|
|
|
|
|
|
|
# || | |
2315
|
|
|
|
|
|
|
# || ---- $i_next [= next nonblank pretoken ] |
2316
|
|
|
|
|
|
|
# |----$i_plus_1 [= a bareword ] |
2317
|
|
|
|
|
|
|
# ---$i_begin [= a sigil] |
2318
|
|
|
|
|
|
|
|
2319
|
4791
|
|
|
4791
|
0
|
7125
|
my $self = shift; |
2320
|
|
|
|
|
|
|
|
2321
|
4791
|
|
|
|
|
6836
|
my $i_begin = $i; |
2322
|
4791
|
|
|
|
|
7241
|
my $tok_begin = $tok; |
2323
|
4791
|
|
|
|
|
7307
|
my $i_plus_1 = $i + 1; |
2324
|
4791
|
|
|
|
|
6793
|
my $fast_scan_type; |
2325
|
|
|
|
|
|
|
|
2326
|
|
|
|
|
|
|
#------------------------------------------------------- |
2327
|
|
|
|
|
|
|
# Do full scan for anything following a pointer, such as |
2328
|
|
|
|
|
|
|
# $cref->&*; # a postderef |
2329
|
|
|
|
|
|
|
#------------------------------------------------------- |
2330
|
4791
|
100
|
66
|
|
|
27369
|
if ( $last_nonblank_token eq '->' ) { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
33
|
|
|
|
|
2331
|
|
|
|
|
|
|
|
2332
|
|
|
|
|
|
|
} |
2333
|
|
|
|
|
|
|
|
2334
|
|
|
|
|
|
|
#------------------------------ |
2335
|
|
|
|
|
|
|
# quick scan with leading sigil |
2336
|
|
|
|
|
|
|
#------------------------------ |
2337
|
|
|
|
|
|
|
elsif ( !$id_scan_state |
2338
|
|
|
|
|
|
|
&& $i_plus_1 <= $max_token_index |
2339
|
|
|
|
|
|
|
&& $fast_scan_context{$tok} ) |
2340
|
|
|
|
|
|
|
{ |
2341
|
4678
|
|
|
|
|
8438
|
$context = $fast_scan_context{$tok}; |
2342
|
|
|
|
|
|
|
|
2343
|
|
|
|
|
|
|
# look for $var, @var, ... |
2344
|
4678
|
100
|
100
|
|
|
11163
|
if ( $rtoken_type->[$i_plus_1] eq 'w' ) { |
|
|
100
|
66
|
|
|
|
|
2345
|
4390
|
|
|
|
|
6957
|
my $pretype_next = EMPTY_STRING; |
2346
|
4390
|
100
|
|
|
|
9434
|
if ( $i_plus_1 < $max_token_index ) { |
2347
|
4274
|
|
|
|
|
6657
|
my $i_next = $i_plus_1 + 1; |
2348
|
4274
|
100
|
100
|
|
|
13230
|
if ( $rtoken_type->[$i_next] eq 'b' |
2349
|
|
|
|
|
|
|
&& $i_next < $max_token_index ) |
2350
|
|
|
|
|
|
|
{ |
2351
|
1707
|
|
|
|
|
3085
|
$i_next += 1; |
2352
|
|
|
|
|
|
|
} |
2353
|
4274
|
|
|
|
|
7308
|
$pretype_next = $rtoken_type->[$i_next]; |
2354
|
|
|
|
|
|
|
} |
2355
|
4390
|
100
|
100
|
|
|
16207
|
if ( $pretype_next ne ':' && $pretype_next ne "'" ) { |
2356
|
|
|
|
|
|
|
|
2357
|
|
|
|
|
|
|
# Found type 'i' like '$var', '@var', or '%var' |
2358
|
4282
|
|
|
|
|
8389
|
$identifier = $tok . $rtokens->[$i_plus_1]; |
2359
|
4282
|
|
|
|
|
6509
|
$tok = $identifier; |
2360
|
4282
|
|
|
|
|
6973
|
$type = 'i'; |
2361
|
4282
|
|
|
|
|
6136
|
$i = $i_plus_1; |
2362
|
4282
|
|
|
|
|
7289
|
$fast_scan_type = $type; |
2363
|
|
|
|
|
|
|
} |
2364
|
|
|
|
|
|
|
} |
2365
|
|
|
|
|
|
|
|
2366
|
|
|
|
|
|
|
# Look for @{ or %{ . |
2367
|
|
|
|
|
|
|
# But we must let the full scanner handle things ${ because it may |
2368
|
|
|
|
|
|
|
# keep going to get a complete identifier like '${#}' . |
2369
|
|
|
|
|
|
|
elsif ( |
2370
|
|
|
|
|
|
|
$rtoken_type->[$i_plus_1] eq '{' |
2371
|
|
|
|
|
|
|
&& ( $tok_begin eq '@' |
2372
|
|
|
|
|
|
|
|| $tok_begin eq '%' ) |
2373
|
|
|
|
|
|
|
) |
2374
|
|
|
|
|
|
|
{ |
2375
|
|
|
|
|
|
|
|
2376
|
30
|
|
|
|
|
70
|
$identifier = $tok; |
2377
|
30
|
|
|
|
|
64
|
$type = 't'; |
2378
|
30
|
|
|
|
|
58
|
$fast_scan_type = $type; |
2379
|
|
|
|
|
|
|
} |
2380
|
|
|
|
|
|
|
else { |
2381
|
|
|
|
|
|
|
## out of tricks |
2382
|
|
|
|
|
|
|
} |
2383
|
|
|
|
|
|
|
} |
2384
|
|
|
|
|
|
|
|
2385
|
|
|
|
|
|
|
#--------------------------- |
2386
|
|
|
|
|
|
|
# Quick scan with leading -> |
2387
|
|
|
|
|
|
|
# Look for ->[ and ->{ |
2388
|
|
|
|
|
|
|
#--------------------------- |
2389
|
|
|
|
|
|
|
elsif ( |
2390
|
|
|
|
|
|
|
$tok eq '->' |
2391
|
|
|
|
|
|
|
&& $i < $max_token_index |
2392
|
|
|
|
|
|
|
&& ( $rtokens->[$i_plus_1] eq '{' |
2393
|
|
|
|
|
|
|
|| $rtokens->[$i_plus_1] eq '[' ) |
2394
|
|
|
|
|
|
|
) |
2395
|
|
|
|
|
|
|
{ |
2396
|
0
|
|
|
|
|
0
|
$type = $tok; |
2397
|
0
|
|
|
|
|
0
|
$fast_scan_type = $type; |
2398
|
0
|
|
|
|
|
0
|
$identifier = $tok; |
2399
|
0
|
|
|
|
|
0
|
$context = UNKNOWN_CONTEXT; |
2400
|
|
|
|
|
|
|
} |
2401
|
|
|
|
|
|
|
else { |
2402
|
|
|
|
|
|
|
## out of tricks |
2403
|
|
|
|
|
|
|
} |
2404
|
|
|
|
|
|
|
|
2405
|
|
|
|
|
|
|
#-------------------------------------- |
2406
|
|
|
|
|
|
|
# Verify correctness during development |
2407
|
|
|
|
|
|
|
#-------------------------------------- |
2408
|
4791
|
|
|
|
|
6827
|
if ( VERIFY_FASTSCAN && $fast_scan_type ) { |
2409
|
|
|
|
|
|
|
|
2410
|
|
|
|
|
|
|
# We will call the full method |
2411
|
|
|
|
|
|
|
my $identifier_simple = $identifier; |
2412
|
|
|
|
|
|
|
my $tok_simple = $tok; |
2413
|
|
|
|
|
|
|
my $i_simple = $i; |
2414
|
|
|
|
|
|
|
my $context_simple = $context; |
2415
|
|
|
|
|
|
|
|
2416
|
|
|
|
|
|
|
$tok = $tok_begin; |
2417
|
|
|
|
|
|
|
$i = $i_begin; |
2418
|
|
|
|
|
|
|
$self->scan_identifier(); |
2419
|
|
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
if ( $tok ne $tok_simple |
2421
|
|
|
|
|
|
|
|| $type ne $fast_scan_type |
2422
|
|
|
|
|
|
|
|| $i != $i_simple |
2423
|
|
|
|
|
|
|
|| $identifier ne $identifier_simple |
2424
|
|
|
|
|
|
|
|| $id_scan_state |
2425
|
|
|
|
|
|
|
|| $context ne $context_simple ) |
2426
|
|
|
|
|
|
|
{ |
2427
|
|
|
|
|
|
|
print {*STDERR} <<EOM; |
2428
|
|
|
|
|
|
|
scan_simple_identifier differs from scan_identifier: |
2429
|
|
|
|
|
|
|
simple: i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple |
2430
|
|
|
|
|
|
|
full: i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state |
2431
|
|
|
|
|
|
|
EOM |
2432
|
|
|
|
|
|
|
} |
2433
|
|
|
|
|
|
|
} |
2434
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
#------------------------------------------------- |
2436
|
|
|
|
|
|
|
# call full scanner if fast method did not succeed |
2437
|
|
|
|
|
|
|
#------------------------------------------------- |
2438
|
4791
|
100
|
|
|
|
10064
|
if ( !$fast_scan_type ) { |
2439
|
479
|
|
|
|
|
1641
|
$self->scan_identifier(); |
2440
|
|
|
|
|
|
|
} |
2441
|
4791
|
|
|
|
|
8394
|
return; |
2442
|
|
|
|
|
|
|
} ## end sub scan_simple_identifier |
2443
|
|
|
|
|
|
|
|
2444
|
|
|
|
|
|
|
sub method_ok_here { |
2445
|
|
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
# Return: |
2447
|
|
|
|
|
|
|
# false if this is definitely an invalid method declaration |
2448
|
|
|
|
|
|
|
# true otherwise (even if not sure) |
2449
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
# We are trying to avoid problems with old uses of 'method' |
2451
|
|
|
|
|
|
|
# when --use-feature=class is set (rt145706). |
2452
|
|
|
|
|
|
|
# For example, this should cause a return of 'false': |
2453
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
# method paint => sub { |
2455
|
|
|
|
|
|
|
# return; |
2456
|
|
|
|
|
|
|
# }; |
2457
|
|
|
|
|
|
|
|
2458
|
6
|
|
|
6
|
0
|
15
|
my $self = shift; |
2459
|
|
|
|
|
|
|
|
2460
|
|
|
|
|
|
|
# from do_scan_sub: |
2461
|
6
|
|
|
|
|
16
|
my $i_beg = $i + 1; |
2462
|
6
|
|
|
|
|
13
|
my $pos_beg = $rtoken_map->[$i_beg]; |
2463
|
6
|
|
|
|
|
21
|
pos($input_line) = $pos_beg; |
2464
|
|
|
|
|
|
|
|
2465
|
|
|
|
|
|
|
# TEST 1: look a valid sub NAME |
2466
|
6
|
50
|
|
|
|
43
|
if ( |
2467
|
|
|
|
|
|
|
$input_line =~ m{\G\s* |
2468
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
2469
|
|
|
|
|
|
|
(\w+) # NAME - required |
2470
|
|
|
|
|
|
|
}gcx |
2471
|
|
|
|
|
|
|
) |
2472
|
|
|
|
|
|
|
{ |
2473
|
|
|
|
|
|
|
# For possible future use.. |
2474
|
6
|
|
|
|
|
21
|
my $subname = $2; |
2475
|
6
|
50
|
|
|
|
22
|
my $package = $1 ? $1 : EMPTY_STRING; |
2476
|
|
|
|
|
|
|
} |
2477
|
|
|
|
|
|
|
else { |
2478
|
0
|
|
|
|
|
0
|
return; |
2479
|
|
|
|
|
|
|
} |
2480
|
|
|
|
|
|
|
|
2481
|
|
|
|
|
|
|
# TEST 2: look for invalid characters after name, such as here: |
2482
|
|
|
|
|
|
|
# method paint => sub { |
2483
|
|
|
|
|
|
|
# ... |
2484
|
|
|
|
|
|
|
# } |
2485
|
6
|
|
|
|
|
15
|
my $next_char = EMPTY_STRING; |
2486
|
6
|
100
|
|
|
|
30
|
if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 } |
|
5
|
|
|
|
|
13
|
|
2487
|
6
|
100
|
66
|
|
|
37
|
if ( !$next_char || $next_char eq '#' ) { |
2488
|
1
|
|
|
|
|
7
|
( $next_char, my $i_next ) = |
2489
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
2490
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
2491
|
|
|
|
|
|
|
} |
2492
|
|
|
|
|
|
|
|
2493
|
6
|
50
|
|
|
|
22
|
if ( !$next_char ) { |
2494
|
|
|
|
|
|
|
|
2495
|
|
|
|
|
|
|
# out of characters - give up |
2496
|
0
|
|
|
|
|
0
|
return; |
2497
|
|
|
|
|
|
|
} |
2498
|
|
|
|
|
|
|
|
2499
|
|
|
|
|
|
|
# Possibly valid next token types: |
2500
|
|
|
|
|
|
|
# '(' could start prototype or signature |
2501
|
|
|
|
|
|
|
# ':' could start ATTRIBUTE |
2502
|
|
|
|
|
|
|
# '{' cold start BLOCK |
2503
|
|
|
|
|
|
|
# ';' or '}' could end a statement |
2504
|
6
|
100
|
|
|
|
28
|
if ( $next_char !~ /^[\(\:\{\;\}]/ ) { |
2505
|
|
|
|
|
|
|
|
2506
|
|
|
|
|
|
|
# This does not match use feature 'class' syntax |
2507
|
3
|
|
|
|
|
21
|
return; |
2508
|
|
|
|
|
|
|
} |
2509
|
|
|
|
|
|
|
|
2510
|
|
|
|
|
|
|
# We will stop here and assume that this is valid syntax for |
2511
|
|
|
|
|
|
|
# use feature 'class'. |
2512
|
3
|
|
|
|
|
15
|
return 1; |
2513
|
|
|
|
|
|
|
} ## end sub method_ok_here |
2514
|
|
|
|
|
|
|
|
2515
|
|
|
|
|
|
|
sub class_ok_here { |
2516
|
|
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
# Return: |
2518
|
|
|
|
|
|
|
# false if this is definitely an invalid class declaration |
2519
|
|
|
|
|
|
|
# true otherwise (even if not sure) |
2520
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
# We are trying to avoid problems with old uses of 'class' |
2522
|
|
|
|
|
|
|
# when --use-feature=class is set (rt145706). We look ahead |
2523
|
|
|
|
|
|
|
# see if this use of 'class' is obviously inconsistent with |
2524
|
|
|
|
|
|
|
# the syntax of use feature 'class'. This allows the default |
2525
|
|
|
|
|
|
|
# setting --use-feature=class to work for old syntax too. |
2526
|
|
|
|
|
|
|
|
2527
|
|
|
|
|
|
|
# Valid class declarations look like |
2528
|
|
|
|
|
|
|
# class NAME ?ATTRS ?VERSION ?BLOCK |
2529
|
|
|
|
|
|
|
# where ATTRS VERSION and BLOCK are optional |
2530
|
|
|
|
|
|
|
|
2531
|
|
|
|
|
|
|
# For example, this should produce a return of 'false': |
2532
|
|
|
|
|
|
|
# |
2533
|
|
|
|
|
|
|
# class ExtendsBasicAttributes is BasicAttributes{ |
2534
|
|
|
|
|
|
|
|
2535
|
6
|
|
|
6
|
0
|
12
|
my $self = shift; |
2536
|
|
|
|
|
|
|
|
2537
|
|
|
|
|
|
|
# TEST 1: class stmt can only go where a new statment can start |
2538
|
6
|
50
|
|
|
|
16
|
if ( !new_statement_ok() ) { return } |
|
0
|
|
|
|
|
0
|
|
2539
|
|
|
|
|
|
|
|
2540
|
6
|
|
|
|
|
17
|
my $i_beg = $i + 1; |
2541
|
6
|
|
|
|
|
29
|
my $pos_beg = $rtoken_map->[$i_beg]; |
2542
|
6
|
|
|
|
|
18
|
pos($input_line) = $pos_beg; |
2543
|
|
|
|
|
|
|
|
2544
|
|
|
|
|
|
|
# TEST 2: look for a valid NAME |
2545
|
6
|
50
|
|
|
|
39
|
if ( |
2546
|
|
|
|
|
|
|
$input_line =~ m{\G\s* |
2547
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
2548
|
|
|
|
|
|
|
(\w+) # NAME - required |
2549
|
|
|
|
|
|
|
}gcx |
2550
|
|
|
|
|
|
|
) |
2551
|
|
|
|
|
|
|
{ |
2552
|
|
|
|
|
|
|
# For possible future use.. |
2553
|
6
|
|
|
|
|
17
|
my $subname = $2; |
2554
|
6
|
100
|
|
|
|
21
|
my $package = $1 ? $1 : EMPTY_STRING; |
2555
|
|
|
|
|
|
|
} |
2556
|
|
|
|
|
|
|
else { |
2557
|
0
|
|
|
|
|
0
|
return; |
2558
|
|
|
|
|
|
|
} |
2559
|
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
# TEST 3: look for valid characters after NAME |
2561
|
6
|
|
|
|
|
12
|
my $next_char = EMPTY_STRING; |
2562
|
6
|
100
|
|
|
|
22
|
if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 } |
|
5
|
|
|
|
|
11
|
|
2563
|
6
|
100
|
66
|
|
|
32
|
if ( !$next_char || $next_char eq '#' ) { |
2564
|
1
|
|
|
|
|
4
|
( $next_char, my $i_next ) = |
2565
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
2566
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
2567
|
|
|
|
|
|
|
} |
2568
|
6
|
50
|
|
|
|
15
|
if ( !$next_char ) { |
2569
|
|
|
|
|
|
|
|
2570
|
|
|
|
|
|
|
# out of characters - give up |
2571
|
0
|
|
|
|
|
0
|
return; |
2572
|
|
|
|
|
|
|
} |
2573
|
|
|
|
|
|
|
|
2574
|
|
|
|
|
|
|
# Must see one of: ATTRIBUTE, VERSION, BLOCK, or end stmt |
2575
|
|
|
|
|
|
|
|
2576
|
|
|
|
|
|
|
# Possibly valid next token types: |
2577
|
|
|
|
|
|
|
# ':' could start ATTRIBUTE |
2578
|
|
|
|
|
|
|
# '\d' could start VERSION |
2579
|
|
|
|
|
|
|
# '{' cold start BLOCK |
2580
|
|
|
|
|
|
|
# ';' could end a statement |
2581
|
|
|
|
|
|
|
# '}' could end statement but would be strange |
2582
|
|
|
|
|
|
|
|
2583
|
6
|
100
|
|
|
|
19
|
if ( $next_char !~ /^[\:\d\{\;\}]/ ) { |
2584
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
# This does not match use feature 'class' syntax |
2586
|
2
|
|
|
|
|
9
|
return; |
2587
|
|
|
|
|
|
|
} |
2588
|
|
|
|
|
|
|
|
2589
|
|
|
|
|
|
|
# We will stop here and assume that this is valid syntax for |
2590
|
|
|
|
|
|
|
# use feature 'class'. |
2591
|
4
|
|
|
|
|
20
|
return 1; |
2592
|
|
|
|
|
|
|
} ## end sub class_ok_here |
2593
|
|
|
|
|
|
|
|
2594
|
|
|
|
|
|
|
sub scan_id { |
2595
|
331
|
|
|
331
|
0
|
704
|
my $self = shift; |
2596
|
331
|
|
|
|
|
1302
|
( $i, $tok, $type, $id_scan_state ) = |
2597
|
|
|
|
|
|
|
$self->scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, |
2598
|
|
|
|
|
|
|
$id_scan_state, $max_token_index ); |
2599
|
331
|
|
|
|
|
832
|
return; |
2600
|
|
|
|
|
|
|
} ## end sub scan_id |
2601
|
|
|
|
|
|
|
|
2602
|
|
|
|
|
|
|
sub scan_number { |
2603
|
629
|
|
|
629
|
0
|
1053
|
my $self = shift; |
2604
|
629
|
|
|
|
|
964
|
my $number; |
2605
|
629
|
|
|
|
|
1770
|
( $i, $type, $number ) = |
2606
|
|
|
|
|
|
|
$self->scan_number_do( $input_line, $i, $rtoken_map, $type, |
2607
|
|
|
|
|
|
|
$max_token_index ); |
2608
|
629
|
|
|
|
|
1424
|
return $number; |
2609
|
|
|
|
|
|
|
} ## end sub scan_number |
2610
|
|
|
|
|
|
|
|
2611
|
39
|
|
|
39
|
|
359
|
use constant VERIFY_FASTNUM => 0; |
|
39
|
|
|
|
|
131
|
|
|
39
|
|
|
|
|
32266
|
|
2612
|
|
|
|
|
|
|
|
2613
|
|
|
|
|
|
|
sub scan_number_fast { |
2614
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
# This is a wrapper for sub scan_number. It does a fast preliminary |
2616
|
|
|
|
|
|
|
# scan for a simple integer. It calls the original scan_number if it |
2617
|
|
|
|
|
|
|
# does not find one. |
2618
|
|
|
|
|
|
|
|
2619
|
2277
|
|
|
2277
|
0
|
3469
|
my $self = shift; |
2620
|
2277
|
|
|
|
|
3559
|
my $i_begin = $i; |
2621
|
2277
|
|
|
|
|
3709
|
my $tok_begin = $tok; |
2622
|
2277
|
|
|
|
|
3147
|
my $number; |
2623
|
|
|
|
|
|
|
|
2624
|
|
|
|
|
|
|
#--------------------------------- |
2625
|
|
|
|
|
|
|
# Quick check for (signed) integer |
2626
|
|
|
|
|
|
|
#--------------------------------- |
2627
|
|
|
|
|
|
|
|
2628
|
|
|
|
|
|
|
# This will be the string of digits: |
2629
|
2277
|
|
|
|
|
3369
|
my $i_d = $i; |
2630
|
2277
|
|
|
|
|
3822
|
my $tok_d = $tok; |
2631
|
2277
|
|
|
|
|
3912
|
my $typ_d = $rtoken_type->[$i_d]; |
2632
|
|
|
|
|
|
|
|
2633
|
|
|
|
|
|
|
# check for signed integer |
2634
|
2277
|
|
|
|
|
3636
|
my $sign = EMPTY_STRING; |
2635
|
2277
|
50
|
66
|
|
|
8699
|
if ( $typ_d ne 'd' |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
2636
|
|
|
|
|
|
|
&& ( $typ_d eq '+' || $typ_d eq '-' ) |
2637
|
|
|
|
|
|
|
&& $i_d < $max_token_index ) |
2638
|
|
|
|
|
|
|
{ |
2639
|
343
|
|
|
|
|
585
|
$sign = $tok_d; |
2640
|
343
|
|
|
|
|
583
|
$i_d++; |
2641
|
343
|
|
|
|
|
606
|
$tok_d = $rtokens->[$i_d]; |
2642
|
343
|
|
|
|
|
599
|
$typ_d = $rtoken_type->[$i_d]; |
2643
|
|
|
|
|
|
|
} |
2644
|
|
|
|
|
|
|
|
2645
|
|
|
|
|
|
|
# Handle integers |
2646
|
2277
|
100
|
100
|
|
|
16884
|
if ( |
|
|
|
100
|
|
|
|
|
2647
|
|
|
|
|
|
|
$typ_d eq 'd' |
2648
|
|
|
|
|
|
|
&& ( |
2649
|
|
|
|
|
|
|
$i_d == $max_token_index |
2650
|
|
|
|
|
|
|
|| ( $i_d < $max_token_index |
2651
|
|
|
|
|
|
|
&& $rtoken_type->[ $i_d + 1 ] ne '.' |
2652
|
|
|
|
|
|
|
&& $rtoken_type->[ $i_d + 1 ] ne 'w' ) |
2653
|
|
|
|
|
|
|
) |
2654
|
|
|
|
|
|
|
) |
2655
|
|
|
|
|
|
|
{ |
2656
|
|
|
|
|
|
|
# Let let full scanner handle multi-digit integers beginning with |
2657
|
|
|
|
|
|
|
# '0' because there could be error messages. For example, '009' is |
2658
|
|
|
|
|
|
|
# not a valid number. |
2659
|
|
|
|
|
|
|
|
2660
|
1715
|
100
|
100
|
|
|
7269
|
if ( $tok_d eq '0' || substr( $tok_d, 0, 1 ) ne '0' ) { |
2661
|
1658
|
|
|
|
|
3208
|
$number = $sign . $tok_d; |
2662
|
1658
|
|
|
|
|
2561
|
$type = 'n'; |
2663
|
1658
|
|
|
|
|
2802
|
$i = $i_d; |
2664
|
|
|
|
|
|
|
} |
2665
|
|
|
|
|
|
|
} |
2666
|
|
|
|
|
|
|
|
2667
|
|
|
|
|
|
|
#-------------------------------------- |
2668
|
|
|
|
|
|
|
# Verify correctness during development |
2669
|
|
|
|
|
|
|
#-------------------------------------- |
2670
|
2277
|
|
|
|
|
3241
|
if ( VERIFY_FASTNUM && defined($number) ) { |
2671
|
|
|
|
|
|
|
|
2672
|
|
|
|
|
|
|
# We will call the full method |
2673
|
|
|
|
|
|
|
my $type_simple = $type; |
2674
|
|
|
|
|
|
|
my $i_simple = $i; |
2675
|
|
|
|
|
|
|
my $number_simple = $number; |
2676
|
|
|
|
|
|
|
|
2677
|
|
|
|
|
|
|
$tok = $tok_begin; |
2678
|
|
|
|
|
|
|
$i = $i_begin; |
2679
|
|
|
|
|
|
|
$number = $self->scan_number(); |
2680
|
|
|
|
|
|
|
|
2681
|
|
|
|
|
|
|
if ( $type ne $type_simple |
2682
|
|
|
|
|
|
|
|| ( $i != $i_simple && $i <= $max_token_index ) |
2683
|
|
|
|
|
|
|
|| $number ne $number_simple ) |
2684
|
|
|
|
|
|
|
{ |
2685
|
|
|
|
|
|
|
print {*STDERR} <<EOM; |
2686
|
|
|
|
|
|
|
scan_number_fast differs from scan_number: |
2687
|
|
|
|
|
|
|
simple: i=$i_simple, type=$type_simple, number=$number_simple |
2688
|
|
|
|
|
|
|
full: i=$i, type=$type, number=$number |
2689
|
|
|
|
|
|
|
EOM |
2690
|
|
|
|
|
|
|
} |
2691
|
|
|
|
|
|
|
} |
2692
|
|
|
|
|
|
|
|
2693
|
|
|
|
|
|
|
#---------------------------------------- |
2694
|
|
|
|
|
|
|
# call full scanner if may not be integer |
2695
|
|
|
|
|
|
|
#---------------------------------------- |
2696
|
2277
|
100
|
|
|
|
5066
|
if ( !defined($number) ) { |
2697
|
619
|
|
|
|
|
1692
|
$number = $self->scan_number(); |
2698
|
|
|
|
|
|
|
} |
2699
|
2277
|
|
|
|
|
5488
|
return $number; |
2700
|
|
|
|
|
|
|
} ## end sub scan_number_fast |
2701
|
|
|
|
|
|
|
|
2702
|
|
|
|
|
|
|
# a sub to warn if token found where term expected |
2703
|
|
|
|
|
|
|
sub error_if_expecting_TERM { |
2704
|
9
|
|
|
9
|
0
|
23
|
my $self = shift; |
2705
|
9
|
50
|
|
|
|
31
|
if ( $expecting == TERM ) { |
2706
|
9
|
50
|
|
|
|
32
|
if ( $really_want_term{$last_nonblank_type} ) { |
2707
|
0
|
|
|
|
|
0
|
$self->report_unexpected( $tok, "term", $i_tok, |
2708
|
|
|
|
|
|
|
$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line ); |
2709
|
0
|
|
|
|
|
0
|
return 1; |
2710
|
|
|
|
|
|
|
} |
2711
|
|
|
|
|
|
|
} |
2712
|
9
|
|
|
|
|
20
|
return; |
2713
|
|
|
|
|
|
|
} ## end sub error_if_expecting_TERM |
2714
|
|
|
|
|
|
|
|
2715
|
|
|
|
|
|
|
# a sub to warn if token found where operator expected |
2716
|
|
|
|
|
|
|
sub error_if_expecting_OPERATOR { |
2717
|
769
|
|
|
769
|
0
|
1577
|
my ( $self, $thing ) = @_; |
2718
|
769
|
50
|
|
|
|
1859
|
if ( $expecting == OPERATOR ) { |
2719
|
0
|
0
|
|
|
|
0
|
if ( !defined($thing) ) { $thing = $tok } |
|
0
|
|
|
|
|
0
|
|
2720
|
0
|
|
|
|
|
0
|
$self->report_unexpected( $thing, "operator", $i_tok, |
2721
|
|
|
|
|
|
|
$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line ); |
2722
|
0
|
0
|
|
|
|
0
|
if ( $i_tok == 0 ) { |
2723
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
2724
|
0
|
|
|
|
|
0
|
$self->warning("Missing ';' or ',' above?\n"); |
2725
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
2726
|
|
|
|
|
|
|
} |
2727
|
0
|
|
|
|
|
0
|
return 1; |
2728
|
|
|
|
|
|
|
} |
2729
|
769
|
|
|
|
|
1404
|
return; |
2730
|
|
|
|
|
|
|
} ## end sub error_if_expecting_OPERATOR |
2731
|
|
|
|
|
|
|
|
2732
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2733
|
|
|
|
|
|
|
# end scanner interfaces |
2734
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
2735
|
|
|
|
|
|
|
|
2736
|
|
|
|
|
|
|
#------------------ |
2737
|
|
|
|
|
|
|
# Tokenization subs |
2738
|
|
|
|
|
|
|
#------------------ |
2739
|
|
|
|
|
|
|
sub do_GREATER_THAN_SIGN { |
2740
|
|
|
|
|
|
|
|
2741
|
31
|
|
|
31
|
0
|
100
|
my $self = shift; |
2742
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
# '>' |
2744
|
31
|
50
|
|
|
|
120
|
$self->error_if_expecting_TERM() |
2745
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
2746
|
31
|
|
|
|
|
73
|
return; |
2747
|
|
|
|
|
|
|
} ## end sub do_GREATER_THAN_SIGN |
2748
|
|
|
|
|
|
|
|
2749
|
|
|
|
|
|
|
sub do_VERTICAL_LINE { |
2750
|
|
|
|
|
|
|
|
2751
|
4
|
|
|
4
|
0
|
9
|
my $self = shift; |
2752
|
|
|
|
|
|
|
|
2753
|
|
|
|
|
|
|
# '|' |
2754
|
4
|
50
|
|
|
|
13
|
$self->error_if_expecting_TERM() |
2755
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
2756
|
4
|
|
|
|
|
9
|
return; |
2757
|
|
|
|
|
|
|
} ## end sub do_VERTICAL_LINE |
2758
|
|
|
|
|
|
|
|
2759
|
|
|
|
|
|
|
# An identifier in possible indirect object location followed by any of |
2760
|
|
|
|
|
|
|
# these tokens: -> , ; } (plus others) is not an indirect object. Fix c257. |
2761
|
|
|
|
|
|
|
my %Z_test_hash; |
2762
|
|
|
|
|
|
|
|
2763
|
|
|
|
|
|
|
BEGIN { |
2764
|
39
|
|
|
39
|
|
384
|
my @qZ = qw# |
2765
|
|
|
|
|
|
|
-> ; } ) ] |
2766
|
|
|
|
|
|
|
=> =~ = == !~ || >= != *= .. && |= .= -= += <= %= |
2767
|
|
|
|
|
|
|
^= &&= ||= //= <=> |
2768
|
|
|
|
|
|
|
#; |
2769
|
39
|
|
|
|
|
168
|
push @qZ, ','; |
2770
|
39
|
|
|
|
|
352195
|
@{Z_test_hash}{@qZ} = (1) x scalar(@qZ); |
2771
|
|
|
|
|
|
|
} |
2772
|
|
|
|
|
|
|
|
2773
|
|
|
|
|
|
|
sub do_DOLLAR_SIGN { |
2774
|
|
|
|
|
|
|
|
2775
|
4036
|
|
|
4036
|
0
|
6699
|
my $self = shift; |
2776
|
|
|
|
|
|
|
|
2777
|
|
|
|
|
|
|
# '$' |
2778
|
|
|
|
|
|
|
# start looking for a scalar |
2779
|
4036
|
50
|
|
|
|
8890
|
$self->error_if_expecting_OPERATOR("Scalar") |
2780
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
2781
|
4036
|
|
|
|
|
12581
|
$self->scan_simple_identifier(); |
2782
|
|
|
|
|
|
|
|
2783
|
4036
|
100
|
|
|
|
9110
|
if ( $identifier eq '$^W' ) { |
2784
|
1
|
|
|
|
|
3
|
$self->[_saw_perl_dash_w_] = 1; |
2785
|
|
|
|
|
|
|
} |
2786
|
|
|
|
|
|
|
|
2787
|
|
|
|
|
|
|
# Check for identifier in indirect object slot |
2788
|
|
|
|
|
|
|
# (vorboard.pl, sort.t). Something like: |
2789
|
|
|
|
|
|
|
# /^(print|printf|sort|exec|system)$/ |
2790
|
4036
|
100
|
66
|
|
|
32229
|
if ( |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
2791
|
|
|
|
|
|
|
$is_indirect_object_taker{$last_nonblank_token} |
2792
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'k' |
2793
|
|
|
|
|
|
|
|| ( ( $last_nonblank_token eq '(' ) |
2794
|
|
|
|
|
|
|
&& $is_indirect_object_taker{ $rparen_type->[$paren_depth] } ) |
2795
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'w' |
2796
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'U' ) # possible object |
2797
|
|
|
|
|
|
|
) |
2798
|
|
|
|
|
|
|
{ |
2799
|
|
|
|
|
|
|
|
2800
|
|
|
|
|
|
|
# An identifier followed by '->' is not indirect object; |
2801
|
|
|
|
|
|
|
# fixes b1175, b1176. Fix c257: Likewise for other tokens like |
2802
|
|
|
|
|
|
|
# comma, semicolon, closing brace, and single space. |
2803
|
98
|
|
|
|
|
640
|
my ( $next_nonblank_token, $i_next ) = |
2804
|
|
|
|
|
|
|
$self->find_next_noncomment_token( $i, $rtokens, |
2805
|
|
|
|
|
|
|
$max_token_index ); |
2806
|
98
|
100
|
|
|
|
396
|
$type = 'Z' if ( !$Z_test_hash{$next_nonblank_token} ); |
2807
|
|
|
|
|
|
|
} |
2808
|
4036
|
|
|
|
|
6659
|
return; |
2809
|
|
|
|
|
|
|
} ## end sub do_DOLLAR_SIGN |
2810
|
|
|
|
|
|
|
|
2811
|
|
|
|
|
|
|
sub do_LEFT_PARENTHESIS { |
2812
|
|
|
|
|
|
|
|
2813
|
2125
|
|
|
2125
|
0
|
3962
|
my $self = shift; |
2814
|
|
|
|
|
|
|
|
2815
|
|
|
|
|
|
|
# '(' |
2816
|
2125
|
|
|
|
|
3452
|
++$paren_depth; |
2817
|
|
|
|
|
|
|
|
2818
|
|
|
|
|
|
|
# variable to enable check for brace after closing paren (c230) |
2819
|
2125
|
|
|
|
|
3802
|
my $want_brace = EMPTY_STRING; |
2820
|
|
|
|
|
|
|
|
2821
|
2125
|
100
|
|
|
|
6374
|
if ($want_paren) { |
|
|
100
|
|
|
|
|
|
2822
|
240
|
|
|
|
|
564
|
$container_type = $want_paren; |
2823
|
240
|
|
|
|
|
502
|
$want_brace = $want_paren; |
2824
|
240
|
|
|
|
|
501
|
$want_paren = EMPTY_STRING; |
2825
|
|
|
|
|
|
|
} |
2826
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^sub\b/ ) { |
2827
|
14
|
|
|
|
|
39
|
$container_type = $statement_type; |
2828
|
|
|
|
|
|
|
} |
2829
|
|
|
|
|
|
|
else { |
2830
|
1871
|
|
|
|
|
3217
|
$container_type = $last_nonblank_token; |
2831
|
|
|
|
|
|
|
|
2832
|
|
|
|
|
|
|
# We can check for a syntax error here of unexpected '(', |
2833
|
|
|
|
|
|
|
# but this is going to get messy... |
2834
|
1871
|
100
|
100
|
|
|
7703
|
if ( |
2835
|
|
|
|
|
|
|
$expecting == OPERATOR |
2836
|
|
|
|
|
|
|
|
2837
|
|
|
|
|
|
|
# Be sure this is not a method call of the form |
2838
|
|
|
|
|
|
|
# &method(...), $method->(..), &{method}(...), |
2839
|
|
|
|
|
|
|
# $ref[2](list) is ok & short for $ref[2]->(list) |
2840
|
|
|
|
|
|
|
# NOTE: at present, braces in something like &{ xxx } |
2841
|
|
|
|
|
|
|
# are not marked as a block, we might have a method call. |
2842
|
|
|
|
|
|
|
# Added ')' to fix case c017, something like ()()() |
2843
|
|
|
|
|
|
|
&& $last_nonblank_token !~ /^(?:[\]\}\)\&]|\-\>)/ |
2844
|
|
|
|
|
|
|
) |
2845
|
|
|
|
|
|
|
{ |
2846
|
|
|
|
|
|
|
|
2847
|
|
|
|
|
|
|
# ref: camel 3 p 703. |
2848
|
3
|
50
|
|
|
|
10
|
if ( $last_last_nonblank_token eq 'do' ) { |
2849
|
0
|
|
|
|
|
0
|
$self->complain( |
2850
|
|
|
|
|
|
|
"do SUBROUTINE is deprecated; consider & or -> notation\n" |
2851
|
|
|
|
|
|
|
); |
2852
|
|
|
|
|
|
|
} |
2853
|
|
|
|
|
|
|
else { |
2854
|
|
|
|
|
|
|
|
2855
|
|
|
|
|
|
|
# if this is an empty list, (), then it is not an |
2856
|
|
|
|
|
|
|
# error; for example, we might have a constant pi and |
2857
|
|
|
|
|
|
|
# invoke it with pi() or just pi; |
2858
|
3
|
|
|
|
|
8
|
my ( $next_nonblank_token, $i_next ) = |
2859
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, |
2860
|
|
|
|
|
|
|
$max_token_index ); |
2861
|
|
|
|
|
|
|
|
2862
|
|
|
|
|
|
|
# Patch for c029: give up error check if |
2863
|
|
|
|
|
|
|
# a side comment follows |
2864
|
3
|
50
|
33
|
|
|
13
|
if ( $next_nonblank_token ne ')' |
2865
|
|
|
|
|
|
|
&& $next_nonblank_token ne '#' ) |
2866
|
|
|
|
|
|
|
{ |
2867
|
0
|
|
|
|
|
0
|
my $hint; |
2868
|
|
|
|
|
|
|
|
2869
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR('('); |
2870
|
|
|
|
|
|
|
|
2871
|
0
|
0
|
|
|
|
0
|
if ( $last_nonblank_type eq 'C' ) { |
|
|
0
|
|
|
|
|
|
2872
|
0
|
|
|
|
|
0
|
$hint = |
2873
|
|
|
|
|
|
|
"$last_nonblank_token has a void prototype\n"; |
2874
|
|
|
|
|
|
|
} |
2875
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'i' ) { |
2876
|
0
|
0
|
0
|
|
|
0
|
if ( $i_tok > 0 |
2877
|
|
|
|
|
|
|
&& $last_nonblank_token =~ /^\$/ ) |
2878
|
|
|
|
|
|
|
{ |
2879
|
0
|
|
|
|
|
0
|
$hint = |
2880
|
|
|
|
|
|
|
"Do you mean '$last_nonblank_token->(' ?\n"; |
2881
|
|
|
|
|
|
|
} |
2882
|
|
|
|
|
|
|
} |
2883
|
|
|
|
|
|
|
else { |
2884
|
|
|
|
|
|
|
## no hint |
2885
|
|
|
|
|
|
|
} |
2886
|
0
|
0
|
|
|
|
0
|
if ($hint) { |
2887
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
2888
|
0
|
|
|
|
|
0
|
$self->warning($hint); |
2889
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
2890
|
|
|
|
|
|
|
} |
2891
|
|
|
|
|
|
|
} ## end if ( $next_nonblank_token... |
2892
|
|
|
|
|
|
|
} ## end else [ if ( $last_last_nonblank_token... |
2893
|
|
|
|
|
|
|
} ## end if ( $expecting == OPERATOR... |
2894
|
|
|
|
|
|
|
} |
2895
|
|
|
|
|
|
|
|
2896
|
2125
|
|
|
|
|
7273
|
( $type_sequence, $indent_flag ) = |
2897
|
|
|
|
|
|
|
$self->increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); |
2898
|
|
|
|
|
|
|
|
2899
|
|
|
|
|
|
|
# propagate types down through nested parens |
2900
|
|
|
|
|
|
|
# for example: the second paren in 'if ((' would be structural |
2901
|
|
|
|
|
|
|
# since the first is. |
2902
|
|
|
|
|
|
|
|
2903
|
2125
|
100
|
|
|
|
5491
|
if ( $last_nonblank_token eq '(' ) { |
2904
|
61
|
|
|
|
|
178
|
$type = $last_nonblank_type; |
2905
|
|
|
|
|
|
|
} |
2906
|
|
|
|
|
|
|
|
2907
|
|
|
|
|
|
|
# We exclude parens as structural after a ',' because it |
2908
|
|
|
|
|
|
|
# causes subtle problems with continuation indentation for |
2909
|
|
|
|
|
|
|
# something like this, where the first 'or' will not get |
2910
|
|
|
|
|
|
|
# indented. |
2911
|
|
|
|
|
|
|
# |
2912
|
|
|
|
|
|
|
# assert( |
2913
|
|
|
|
|
|
|
# __LINE__, |
2914
|
|
|
|
|
|
|
# ( not defined $check ) |
2915
|
|
|
|
|
|
|
# or ref $check |
2916
|
|
|
|
|
|
|
# or $check eq "new" |
2917
|
|
|
|
|
|
|
# or $check eq "old", |
2918
|
|
|
|
|
|
|
# ); |
2919
|
|
|
|
|
|
|
# |
2920
|
|
|
|
|
|
|
# Likewise, we exclude parens where a statement can start |
2921
|
|
|
|
|
|
|
# because of problems with continuation indentation, like |
2922
|
|
|
|
|
|
|
# these: |
2923
|
|
|
|
|
|
|
# |
2924
|
|
|
|
|
|
|
# ($firstline =~ /^#\!.*perl/) |
2925
|
|
|
|
|
|
|
# and (print $File::Find::name, "\n") |
2926
|
|
|
|
|
|
|
# and (return 1); |
2927
|
|
|
|
|
|
|
# |
2928
|
|
|
|
|
|
|
# (ref($usage_fref) =~ /CODE/) |
2929
|
|
|
|
|
|
|
# ? &$usage_fref |
2930
|
|
|
|
|
|
|
# : (&blast_usage, &blast_params, &blast_general_params); |
2931
|
|
|
|
|
|
|
|
2932
|
|
|
|
|
|
|
else { |
2933
|
2064
|
|
|
|
|
3480
|
$type = '{'; |
2934
|
|
|
|
|
|
|
} |
2935
|
|
|
|
|
|
|
|
2936
|
2125
|
50
|
|
|
|
5224
|
if ( $last_nonblank_type eq ')' ) { |
2937
|
0
|
|
|
|
|
0
|
$self->warning( |
2938
|
|
|
|
|
|
|
"Syntax error? found token '$last_nonblank_type' then '('\n"); |
2939
|
|
|
|
|
|
|
} |
2940
|
|
|
|
|
|
|
|
2941
|
|
|
|
|
|
|
# git #105: Copy container type and want-brace flag at ') ('; |
2942
|
|
|
|
|
|
|
# propagate the container type onward so that any subsequent brace gets |
2943
|
|
|
|
|
|
|
# correctly marked. I have implemented this as a general rule, which |
2944
|
|
|
|
|
|
|
# should be safe, but if necessary it could be restricted to certain |
2945
|
|
|
|
|
|
|
# container statement types such as 'for'. |
2946
|
2125
|
100
|
|
|
|
5036
|
if ( $last_nonblank_token eq ')' ) { |
2947
|
1
|
|
|
|
|
2
|
my $rvars = $rparen_vars->[$paren_depth]; |
2948
|
1
|
50
|
|
|
|
7
|
if ( defined($rvars) ) { |
2949
|
1
|
|
|
|
|
4
|
$container_type = $rparen_type->[$paren_depth]; |
2950
|
1
|
|
|
|
|
4
|
( my $type_lp, $want_brace ) = @{$rvars}; |
|
1
|
|
|
|
|
5
|
|
2951
|
|
|
|
|
|
|
} |
2952
|
|
|
|
|
|
|
} |
2953
|
|
|
|
|
|
|
|
2954
|
2125
|
|
|
|
|
4171
|
$rparen_type->[$paren_depth] = $container_type; |
2955
|
2125
|
|
|
|
|
6030
|
$rparen_vars->[$paren_depth] = [ $type, $want_brace ]; |
2956
|
2125
|
|
|
|
|
3998
|
$rparen_semicolon_count->[$paren_depth] = 0; |
2957
|
|
|
|
|
|
|
|
2958
|
2125
|
|
|
|
|
3791
|
return; |
2959
|
|
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
} ## end sub do_LEFT_PARENTHESIS |
2961
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
sub do_RIGHT_PARENTHESIS { |
2963
|
|
|
|
|
|
|
|
2964
|
2125
|
|
|
2125
|
0
|
4387
|
my $self = shift; |
2965
|
|
|
|
|
|
|
|
2966
|
|
|
|
|
|
|
# ')' |
2967
|
2125
|
|
|
|
|
7368
|
( $type_sequence, $indent_flag ) = |
2968
|
|
|
|
|
|
|
$self->decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); |
2969
|
|
|
|
|
|
|
|
2970
|
2125
|
|
|
|
|
4669
|
my $rvars = $rparen_vars->[$paren_depth]; |
2971
|
2125
|
50
|
|
|
|
5248
|
if ( defined($rvars) ) { |
2972
|
2125
|
|
|
|
|
3442
|
my ( $type_lp, $want_brace ) = @{$rvars}; |
|
2125
|
|
|
|
|
4750
|
|
2973
|
2125
|
50
|
33
|
|
|
8903
|
if ( $type_lp && $type_lp eq '{' ) { |
2974
|
2125
|
|
|
|
|
4131
|
$type = '}'; |
2975
|
|
|
|
|
|
|
} |
2976
|
|
|
|
|
|
|
} |
2977
|
|
|
|
|
|
|
|
2978
|
2125
|
|
|
|
|
4221
|
$container_type = $rparen_type->[$paren_depth]; |
2979
|
|
|
|
|
|
|
|
2980
|
|
|
|
|
|
|
# restore statement type as 'sub' at closing paren of a signature |
2981
|
|
|
|
|
|
|
# so that a subsequent ':' is identified as an attribute |
2982
|
2125
|
100
|
|
|
|
6106
|
if ( $container_type =~ /^sub\b/ ) { |
2983
|
24
|
|
|
|
|
61
|
$statement_type = $container_type; |
2984
|
|
|
|
|
|
|
} |
2985
|
|
|
|
|
|
|
|
2986
|
|
|
|
|
|
|
# /^(for|foreach)$/ |
2987
|
2125
|
100
|
|
|
|
6147
|
if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) { |
2988
|
69
|
|
|
|
|
187
|
my $num_sc = $rparen_semicolon_count->[$paren_depth]; |
2989
|
69
|
50
|
66
|
|
|
448
|
if ( $num_sc > 0 && $num_sc != 2 ) { |
2990
|
0
|
|
|
|
|
0
|
$self->warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); |
2991
|
|
|
|
|
|
|
} |
2992
|
|
|
|
|
|
|
} |
2993
|
|
|
|
|
|
|
|
2994
|
2125
|
50
|
|
|
|
4939
|
if ( $paren_depth > 0 ) { $paren_depth-- } |
|
2125
|
|
|
|
|
3254
|
|
2995
|
2125
|
|
|
|
|
3617
|
return; |
2996
|
|
|
|
|
|
|
} ## end sub do_RIGHT_PARENTHESIS |
2997
|
|
|
|
|
|
|
|
2998
|
|
|
|
|
|
|
sub do_COMMA { |
2999
|
|
|
|
|
|
|
|
3000
|
3075
|
|
|
3075
|
0
|
5176
|
my $self = shift; |
3001
|
|
|
|
|
|
|
|
3002
|
|
|
|
|
|
|
# ',' |
3003
|
3075
|
100
|
33
|
|
|
10478
|
if ( $last_nonblank_type eq ',' ) { |
|
|
50
|
|
|
|
|
|
3004
|
10
|
|
|
|
|
49
|
$self->complain("Repeated ','s \n"); |
3005
|
|
|
|
|
|
|
} |
3006
|
|
|
|
|
|
|
|
3007
|
|
|
|
|
|
|
# Note that we have to check both token and type here because a |
3008
|
|
|
|
|
|
|
# comma following a qw list can have last token='(' but type = 'q' |
3009
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) { |
3010
|
0
|
|
|
|
|
0
|
$self->warning("Unexpected leading ',' after a '('\n"); |
3011
|
|
|
|
|
|
|
} |
3012
|
|
|
|
|
|
|
else { |
3013
|
|
|
|
|
|
|
## ok: no complaints needed |
3014
|
|
|
|
|
|
|
} |
3015
|
|
|
|
|
|
|
|
3016
|
|
|
|
|
|
|
# patch for operator_expected: note if we are in the list (use.t) |
3017
|
3075
|
100
|
|
|
|
6466
|
if ( $statement_type eq 'use' ) { $statement_type = '_use' } |
|
6
|
|
|
|
|
12
|
|
3018
|
3075
|
|
|
|
|
4636
|
return; |
3019
|
|
|
|
|
|
|
|
3020
|
|
|
|
|
|
|
} ## end sub do_COMMA |
3021
|
|
|
|
|
|
|
|
3022
|
|
|
|
|
|
|
sub do_SEMICOLON { |
3023
|
|
|
|
|
|
|
|
3024
|
2448
|
|
|
2448
|
0
|
4603
|
my $self = shift; |
3025
|
|
|
|
|
|
|
|
3026
|
|
|
|
|
|
|
# ';' |
3027
|
2448
|
|
|
|
|
4116
|
$context = UNKNOWN_CONTEXT; |
3028
|
2448
|
|
|
|
|
3929
|
$statement_type = EMPTY_STRING; |
3029
|
2448
|
|
|
|
|
4511
|
$want_paren = EMPTY_STRING; |
3030
|
|
|
|
|
|
|
|
3031
|
|
|
|
|
|
|
# /^(for|foreach)$/ |
3032
|
2448
|
100
|
|
|
|
6758
|
if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) |
3033
|
|
|
|
|
|
|
{ # mark ; in for loop |
3034
|
|
|
|
|
|
|
|
3035
|
|
|
|
|
|
|
# Be careful: we do not want a semicolon such as the |
3036
|
|
|
|
|
|
|
# following to be included: |
3037
|
|
|
|
|
|
|
# |
3038
|
|
|
|
|
|
|
# for (sort {strcoll($a,$b);} keys %investments) { |
3039
|
|
|
|
|
|
|
|
3040
|
35
|
100
|
66
|
|
|
250
|
if ( $brace_depth == $rdepth_array->[PAREN][BRACE][$paren_depth] |
3041
|
|
|
|
|
|
|
&& $square_bracket_depth == |
3042
|
|
|
|
|
|
|
$rdepth_array->[PAREN][SQUARE_BRACKET][$paren_depth] ) |
3043
|
|
|
|
|
|
|
{ |
3044
|
|
|
|
|
|
|
|
3045
|
34
|
|
|
|
|
68
|
$type = 'f'; |
3046
|
34
|
|
|
|
|
72
|
$rparen_semicolon_count->[$paren_depth]++; |
3047
|
|
|
|
|
|
|
} |
3048
|
|
|
|
|
|
|
} |
3049
|
2448
|
|
|
|
|
4045
|
return; |
3050
|
|
|
|
|
|
|
} ## end sub do_SEMICOLON |
3051
|
|
|
|
|
|
|
|
3052
|
|
|
|
|
|
|
sub do_QUOTATION_MARK { |
3053
|
|
|
|
|
|
|
|
3054
|
1125
|
|
|
1125
|
0
|
2189
|
my $self = shift; |
3055
|
|
|
|
|
|
|
|
3056
|
|
|
|
|
|
|
# '"' |
3057
|
1125
|
50
|
|
|
|
2920
|
$self->error_if_expecting_OPERATOR("String") |
3058
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3059
|
1125
|
|
|
|
|
1874
|
$in_quote = 1; |
3060
|
1125
|
|
|
|
|
1958
|
$type = 'Q'; |
3061
|
1125
|
|
|
|
|
1832
|
$allowed_quote_modifiers = EMPTY_STRING; |
3062
|
1125
|
|
|
|
|
1911
|
return; |
3063
|
|
|
|
|
|
|
} ## end sub do_QUOTATION_MARK |
3064
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
sub do_APOSTROPHE { |
3066
|
|
|
|
|
|
|
|
3067
|
1164
|
|
|
1164
|
0
|
2158
|
my $self = shift; |
3068
|
|
|
|
|
|
|
|
3069
|
|
|
|
|
|
|
# "'" |
3070
|
1164
|
50
|
|
|
|
2813
|
$self->error_if_expecting_OPERATOR("String") |
3071
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3072
|
1164
|
|
|
|
|
1847
|
$in_quote = 1; |
3073
|
1164
|
|
|
|
|
1922
|
$type = 'Q'; |
3074
|
1164
|
|
|
|
|
1918
|
$allowed_quote_modifiers = EMPTY_STRING; |
3075
|
1164
|
|
|
|
|
1898
|
return; |
3076
|
|
|
|
|
|
|
} ## end sub do_APOSTROPHE |
3077
|
|
|
|
|
|
|
|
3078
|
|
|
|
|
|
|
sub do_BACKTICK { |
3079
|
|
|
|
|
|
|
|
3080
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
3081
|
|
|
|
|
|
|
|
3082
|
|
|
|
|
|
|
# '`' |
3083
|
0
|
0
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR("String") |
3084
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3085
|
0
|
|
|
|
|
0
|
$in_quote = 1; |
3086
|
0
|
|
|
|
|
0
|
$type = 'Q'; |
3087
|
0
|
|
|
|
|
0
|
$allowed_quote_modifiers = EMPTY_STRING; |
3088
|
0
|
|
|
|
|
0
|
return; |
3089
|
|
|
|
|
|
|
} ## end sub do_BACKTICK |
3090
|
|
|
|
|
|
|
|
3091
|
|
|
|
|
|
|
sub do_SLASH { |
3092
|
|
|
|
|
|
|
|
3093
|
207
|
|
|
207
|
0
|
450
|
my $self = shift; |
3094
|
|
|
|
|
|
|
|
3095
|
|
|
|
|
|
|
# '/' |
3096
|
207
|
|
|
|
|
365
|
my $is_pattern; |
3097
|
|
|
|
|
|
|
|
3098
|
|
|
|
|
|
|
# a pattern cannot follow certain keywords which take optional |
3099
|
|
|
|
|
|
|
# arguments, like 'shift' and 'pop'. See also '?'. |
3100
|
207
|
50
|
66
|
|
|
1036
|
if ( |
|
|
50
|
|
|
|
|
|
3101
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
3102
|
|
|
|
|
|
|
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ |
3103
|
|
|
|
|
|
|
$last_nonblank_token} |
3104
|
|
|
|
|
|
|
) |
3105
|
|
|
|
|
|
|
{ |
3106
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
3107
|
|
|
|
|
|
|
} |
3108
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess.. |
3109
|
0
|
|
|
|
|
0
|
my $msg; |
3110
|
0
|
|
|
|
|
0
|
( $is_pattern, $msg ) = |
3111
|
|
|
|
|
|
|
$self->guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, |
3112
|
|
|
|
|
|
|
$max_token_index ); |
3113
|
|
|
|
|
|
|
|
3114
|
0
|
0
|
|
|
|
0
|
if ($msg) { |
3115
|
0
|
|
|
|
|
0
|
$self->write_diagnostics("DIVIDE:$msg\n"); |
3116
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry($msg); |
3117
|
|
|
|
|
|
|
} |
3118
|
|
|
|
|
|
|
} |
3119
|
207
|
|
|
|
|
435
|
else { $is_pattern = ( $expecting == TERM ) } |
3120
|
|
|
|
|
|
|
|
3121
|
207
|
100
|
|
|
|
504
|
if ($is_pattern) { |
3122
|
78
|
|
|
|
|
171
|
$in_quote = 1; |
3123
|
78
|
|
|
|
|
159
|
$type = 'Q'; |
3124
|
78
|
|
|
|
|
148
|
$allowed_quote_modifiers = '[msixpodualngc]'; |
3125
|
|
|
|
|
|
|
} |
3126
|
|
|
|
|
|
|
else { # not a pattern; check for a /= token |
3127
|
|
|
|
|
|
|
|
3128
|
129
|
50
|
|
|
|
382
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /= |
3129
|
0
|
|
|
|
|
0
|
$i++; |
3130
|
0
|
|
|
|
|
0
|
$tok = '/='; |
3131
|
0
|
|
|
|
|
0
|
$type = $tok; |
3132
|
|
|
|
|
|
|
} |
3133
|
|
|
|
|
|
|
|
3134
|
|
|
|
|
|
|
#DEBUG - collecting info on what tokens follow a divide |
3135
|
|
|
|
|
|
|
# for development of guessing algorithm |
3136
|
|
|
|
|
|
|
## if ( |
3137
|
|
|
|
|
|
|
## $self->is_possible_numerator( $i, $rtokens, |
3138
|
|
|
|
|
|
|
## $max_token_index ) < 0 |
3139
|
|
|
|
|
|
|
## ) |
3140
|
|
|
|
|
|
|
## { |
3141
|
|
|
|
|
|
|
## $self->write_diagnostics("DIVIDE? $input_line\n"); |
3142
|
|
|
|
|
|
|
## } |
3143
|
|
|
|
|
|
|
} |
3144
|
207
|
|
|
|
|
393
|
return; |
3145
|
|
|
|
|
|
|
} ## end sub do_SLASH |
3146
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
sub do_LEFT_CURLY_BRACKET { |
3148
|
|
|
|
|
|
|
|
3149
|
1668
|
|
|
1668
|
0
|
3237
|
my $self = shift; |
3150
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
# '{' |
3152
|
|
|
|
|
|
|
# if we just saw a ')', we will label this block with |
3153
|
|
|
|
|
|
|
# its type. We need to do this to allow sub |
3154
|
|
|
|
|
|
|
# code_block_type to determine if this brace starts a |
3155
|
|
|
|
|
|
|
# code block or anonymous hash. (The type of a paren |
3156
|
|
|
|
|
|
|
# pair is the preceding token, such as 'if', 'else', |
3157
|
|
|
|
|
|
|
# etc). |
3158
|
1668
|
|
|
|
|
3248
|
$container_type = EMPTY_STRING; |
3159
|
|
|
|
|
|
|
|
3160
|
|
|
|
|
|
|
# ATTRS: for a '{' following an attribute list, reset |
3161
|
|
|
|
|
|
|
# things to look like we just saw a sub name |
3162
|
|
|
|
|
|
|
# Added 'package' (can be 'class') for --use-feature=class (rt145706) |
3163
|
1668
|
100
|
100
|
|
|
15477
|
if ( substr( $statement_type, 0, 3 ) eq 'sub' ) { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
3164
|
34
|
|
|
|
|
93
|
$last_nonblank_token = $statement_type; |
3165
|
34
|
|
|
|
|
75
|
$last_nonblank_type = 'S'; # c250 change |
3166
|
34
|
|
|
|
|
71
|
$statement_type = EMPTY_STRING; |
3167
|
|
|
|
|
|
|
} |
3168
|
|
|
|
|
|
|
elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) { |
3169
|
4
|
|
|
|
|
9
|
$last_nonblank_token = $statement_type; |
3170
|
4
|
|
|
|
|
7
|
$last_nonblank_type = 'P'; # c250 change |
3171
|
4
|
|
|
|
|
8
|
$statement_type = EMPTY_STRING; |
3172
|
|
|
|
|
|
|
} |
3173
|
|
|
|
|
|
|
|
3174
|
|
|
|
|
|
|
# patch for SWITCH/CASE: hide these keywords from an immediately |
3175
|
|
|
|
|
|
|
# following opening brace |
3176
|
|
|
|
|
|
|
elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) |
3177
|
|
|
|
|
|
|
&& $statement_type eq $last_nonblank_token ) |
3178
|
|
|
|
|
|
|
{ |
3179
|
0
|
|
|
|
|
0
|
$last_nonblank_token = ";"; |
3180
|
|
|
|
|
|
|
} |
3181
|
|
|
|
|
|
|
|
3182
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq ')' ) { |
3183
|
241
|
|
|
|
|
694
|
$last_nonblank_token = $rparen_type->[ $paren_depth + 1 ]; |
3184
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
# defensive move in case of a nesting error (pbug.t) |
3186
|
|
|
|
|
|
|
# in which this ')' had no previous '(' |
3187
|
|
|
|
|
|
|
# this nesting error will have been caught |
3188
|
241
|
50
|
|
|
|
757
|
if ( !defined($last_nonblank_token) ) { |
3189
|
0
|
|
|
|
|
0
|
$last_nonblank_token = 'if'; |
3190
|
|
|
|
|
|
|
} |
3191
|
|
|
|
|
|
|
|
3192
|
|
|
|
|
|
|
# Syntax check at '){' |
3193
|
241
|
100
|
|
|
|
1095
|
if ( $is_blocktype_with_paren{$last_nonblank_token} ) { |
3194
|
|
|
|
|
|
|
|
3195
|
227
|
|
|
|
|
564
|
my $rvars = $rparen_vars->[ $paren_depth + 1 ]; |
3196
|
227
|
50
|
|
|
|
766
|
if ( defined($rvars) ) { |
3197
|
227
|
|
|
|
|
430
|
my ( $type_lp, $want_brace ) = @{$rvars}; |
|
227
|
|
|
|
|
593
|
|
3198
|
|
|
|
|
|
|
|
3199
|
|
|
|
|
|
|
# Now verify that this is not a trailing form |
3200
|
227
|
50
|
|
|
|
765
|
if ( !$want_brace ) { |
3201
|
0
|
|
|
|
|
0
|
$self->warning( |
3202
|
|
|
|
|
|
|
"syntax error at ') {', unexpected '{' after closing ')' of a trailing '$last_nonblank_token'\n" |
3203
|
|
|
|
|
|
|
); |
3204
|
|
|
|
|
|
|
} |
3205
|
|
|
|
|
|
|
} |
3206
|
|
|
|
|
|
|
} |
3207
|
|
|
|
|
|
|
else { |
3208
|
14
|
50
|
|
|
|
75
|
if ( $self->[_extended_syntax_] ) { |
3209
|
|
|
|
|
|
|
|
3210
|
|
|
|
|
|
|
# we append a trailing () to mark this as an unknown |
3211
|
|
|
|
|
|
|
# block type. This allows perltidy to format some |
3212
|
|
|
|
|
|
|
# common extensions of perl syntax. |
3213
|
|
|
|
|
|
|
# This is used by sub code_block_type |
3214
|
14
|
|
|
|
|
49
|
$last_nonblank_token .= '()'; |
3215
|
|
|
|
|
|
|
} |
3216
|
|
|
|
|
|
|
else { |
3217
|
0
|
|
|
|
|
0
|
my $list = |
3218
|
|
|
|
|
|
|
join( SPACE, sort keys %is_blocktype_with_paren ); |
3219
|
0
|
|
|
|
|
0
|
$self->warning( |
3220
|
|
|
|
|
|
|
"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n" |
3221
|
|
|
|
|
|
|
); |
3222
|
|
|
|
|
|
|
} |
3223
|
|
|
|
|
|
|
} |
3224
|
|
|
|
|
|
|
} |
3225
|
|
|
|
|
|
|
|
3226
|
|
|
|
|
|
|
# patch for paren-less for/foreach glitch, part 2. |
3227
|
|
|
|
|
|
|
# see note below under 'qw' |
3228
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq 'qw' |
3229
|
|
|
|
|
|
|
&& $is_for_foreach{$want_paren} ) |
3230
|
|
|
|
|
|
|
{ |
3231
|
0
|
|
|
|
|
0
|
$last_nonblank_token = $want_paren; |
3232
|
0
|
0
|
|
|
|
0
|
if ( $last_last_nonblank_token eq $want_paren ) { |
3233
|
0
|
|
|
|
|
0
|
$self->warning( |
3234
|
|
|
|
|
|
|
"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" |
3235
|
|
|
|
|
|
|
); |
3236
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
} |
3238
|
0
|
|
|
|
|
0
|
$want_paren = EMPTY_STRING; |
3239
|
|
|
|
|
|
|
} |
3240
|
|
|
|
|
|
|
else { |
3241
|
|
|
|
|
|
|
## ok: not special |
3242
|
|
|
|
|
|
|
} |
3243
|
|
|
|
|
|
|
|
3244
|
|
|
|
|
|
|
# now identify which of the three possible types of |
3245
|
|
|
|
|
|
|
# curly braces we have: hash index container, anonymous |
3246
|
|
|
|
|
|
|
# hash reference, or code block. |
3247
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
# non-structural (hash index) curly brace pair |
3249
|
|
|
|
|
|
|
# get marked 'L' and 'R' |
3250
|
1668
|
100
|
|
|
|
4981
|
if ( is_non_structural_brace() ) { |
3251
|
367
|
|
|
|
|
767
|
$type = 'L'; |
3252
|
|
|
|
|
|
|
|
3253
|
|
|
|
|
|
|
# patch for SWITCH/CASE: |
3254
|
|
|
|
|
|
|
# allow paren-less identifier after 'when' |
3255
|
|
|
|
|
|
|
# if the brace is preceded by a space |
3256
|
367
|
0
|
33
|
|
|
1346
|
if ( $statement_type eq 'when' |
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
3257
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'i' |
3258
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'k' |
3259
|
|
|
|
|
|
|
&& ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) |
3260
|
|
|
|
|
|
|
{ |
3261
|
0
|
|
|
|
|
0
|
$type = '{'; |
3262
|
0
|
|
|
|
|
0
|
$block_type = $statement_type; |
3263
|
|
|
|
|
|
|
} |
3264
|
|
|
|
|
|
|
} |
3265
|
|
|
|
|
|
|
|
3266
|
|
|
|
|
|
|
# code and anonymous hash have the same type, '{', but are |
3267
|
|
|
|
|
|
|
# distinguished by 'block_type', |
3268
|
|
|
|
|
|
|
# which will be blank for an anonymous hash |
3269
|
|
|
|
|
|
|
else { |
3270
|
|
|
|
|
|
|
|
3271
|
1301
|
|
|
|
|
4506
|
$block_type = |
3272
|
|
|
|
|
|
|
$self->code_block_type( $i_tok, $rtokens, $rtoken_type, |
3273
|
|
|
|
|
|
|
$max_token_index ); |
3274
|
|
|
|
|
|
|
|
3275
|
|
|
|
|
|
|
# patch to promote bareword type to function taking block |
3276
|
1301
|
100
|
100
|
|
|
6023
|
if ( $block_type |
|
|
|
66
|
|
|
|
|
3277
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
3278
|
|
|
|
|
|
|
&& $last_nonblank_i >= 0 ) |
3279
|
|
|
|
|
|
|
{ |
3280
|
34
|
50
|
|
|
|
144
|
if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { |
3281
|
|
|
|
|
|
|
$routput_token_type->[$last_nonblank_i] = |
3282
|
34
|
100
|
|
|
|
166
|
$is_grep_alias{$block_type} ? 'k' : 'G'; |
3283
|
|
|
|
|
|
|
} |
3284
|
|
|
|
|
|
|
} |
3285
|
|
|
|
|
|
|
|
3286
|
|
|
|
|
|
|
# patch for SWITCH/CASE: if we find a stray opening block brace |
3287
|
|
|
|
|
|
|
# where we might accept a 'case' or 'when' block, then take it |
3288
|
1301
|
100
|
100
|
|
|
5285
|
if ( $statement_type eq 'case' |
3289
|
|
|
|
|
|
|
|| $statement_type eq 'when' ) |
3290
|
|
|
|
|
|
|
{ |
3291
|
38
|
100
|
66
|
|
|
237
|
if ( !$block_type || $block_type eq '}' ) { |
3292
|
4
|
|
|
|
|
9
|
$block_type = $statement_type; |
3293
|
|
|
|
|
|
|
} |
3294
|
|
|
|
|
|
|
} |
3295
|
|
|
|
|
|
|
} |
3296
|
|
|
|
|
|
|
|
3297
|
1668
|
|
|
|
|
3905
|
$rbrace_type->[ ++$brace_depth ] = $block_type; |
3298
|
|
|
|
|
|
|
|
3299
|
|
|
|
|
|
|
# Patch for CLASS BLOCK definitions: do not update the package for the |
3300
|
|
|
|
|
|
|
# current depth if this is a BLOCK type definition. |
3301
|
|
|
|
|
|
|
# TODO: should make 'class' separate from 'package' and only do |
3302
|
|
|
|
|
|
|
# this for 'class' |
3303
|
1668
|
100
|
|
|
|
5418
|
$rbrace_package->[$brace_depth] = $current_package |
3304
|
|
|
|
|
|
|
if ( substr( $block_type, 0, 8 ) ne 'package ' ); |
3305
|
|
|
|
|
|
|
|
3306
|
1668
|
|
|
|
|
3744
|
$rbrace_structural_type->[$brace_depth] = $type; |
3307
|
1668
|
|
|
|
|
2981
|
$rbrace_context->[$brace_depth] = $context; |
3308
|
1668
|
|
|
|
|
4984
|
( $type_sequence, $indent_flag ) = |
3309
|
|
|
|
|
|
|
$self->increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); |
3310
|
1668
|
|
|
|
|
3542
|
return; |
3311
|
|
|
|
|
|
|
} ## end sub do_LEFT_CURLY_BRACKET |
3312
|
|
|
|
|
|
|
|
3313
|
|
|
|
|
|
|
sub do_RIGHT_CURLY_BRACKET { |
3314
|
|
|
|
|
|
|
|
3315
|
1668
|
|
|
1668
|
0
|
3525
|
my $self = shift; |
3316
|
|
|
|
|
|
|
|
3317
|
|
|
|
|
|
|
# '}' |
3318
|
1668
|
|
|
|
|
3575
|
$block_type = $rbrace_type->[$brace_depth]; |
3319
|
1668
|
100
|
|
|
|
4247
|
if ($block_type) { $statement_type = EMPTY_STRING } |
|
972
|
|
|
|
|
1811
|
|
3320
|
1668
|
100
|
|
|
|
4020
|
if ( defined( $rbrace_package->[$brace_depth] ) ) { |
3321
|
1664
|
|
|
|
|
3245
|
$current_package = $rbrace_package->[$brace_depth]; |
3322
|
|
|
|
|
|
|
} |
3323
|
|
|
|
|
|
|
|
3324
|
|
|
|
|
|
|
# can happen on brace error (caught elsewhere) |
3325
|
|
|
|
|
|
|
else { |
3326
|
|
|
|
|
|
|
} |
3327
|
1668
|
|
|
|
|
5599
|
( $type_sequence, $indent_flag ) = |
3328
|
|
|
|
|
|
|
$self->decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); |
3329
|
|
|
|
|
|
|
|
3330
|
1668
|
100
|
|
|
|
5515
|
if ( $rbrace_structural_type->[$brace_depth] eq 'L' ) { |
3331
|
367
|
|
|
|
|
792
|
$type = 'R'; |
3332
|
|
|
|
|
|
|
} |
3333
|
|
|
|
|
|
|
|
3334
|
|
|
|
|
|
|
# propagate type information for 'do' and 'eval' blocks, and also |
3335
|
|
|
|
|
|
|
# for smartmatch operator. This is necessary to enable us to know |
3336
|
|
|
|
|
|
|
# if an operator or term is expected next. |
3337
|
1668
|
100
|
|
|
|
4690
|
if ( $is_block_operator{$block_type} ) { |
3338
|
83
|
|
|
|
|
252
|
$tok = $block_type; |
3339
|
|
|
|
|
|
|
} |
3340
|
|
|
|
|
|
|
|
3341
|
1668
|
|
|
|
|
2960
|
$context = $rbrace_context->[$brace_depth]; |
3342
|
1668
|
50
|
|
|
|
4002
|
if ( $brace_depth > 0 ) { $brace_depth--; } |
|
1668
|
|
|
|
|
2573
|
|
3343
|
1668
|
|
|
|
|
2840
|
return; |
3344
|
|
|
|
|
|
|
} ## end sub do_RIGHT_CURLY_BRACKET |
3345
|
|
|
|
|
|
|
|
3346
|
|
|
|
|
|
|
sub do_AMPERSAND { |
3347
|
|
|
|
|
|
|
|
3348
|
126
|
|
|
126
|
0
|
361
|
my $self = shift; |
3349
|
|
|
|
|
|
|
|
3350
|
|
|
|
|
|
|
# '&' = maybe sub call? start looking |
3351
|
|
|
|
|
|
|
# We have to check for sub call unless we are sure we |
3352
|
|
|
|
|
|
|
# are expecting an operator. This example from s2p |
3353
|
|
|
|
|
|
|
# got mistaken as a q operator in an early version: |
3354
|
|
|
|
|
|
|
# print BODY &q(<<'EOT'); |
3355
|
126
|
100
|
|
|
|
447
|
if ( $expecting != OPERATOR ) { |
3356
|
|
|
|
|
|
|
|
3357
|
|
|
|
|
|
|
# But only look for a sub call if we are expecting a term or |
3358
|
|
|
|
|
|
|
# if there is no existing space after the &. |
3359
|
|
|
|
|
|
|
# For example we probably don't want & as sub call here: |
3360
|
|
|
|
|
|
|
# Fcntl::S_IRUSR & $mode; |
3361
|
107
|
100
|
66
|
|
|
484
|
if ( $expecting == TERM || $next_type ne 'b' ) { |
3362
|
104
|
|
|
|
|
337
|
$self->scan_simple_identifier(); |
3363
|
|
|
|
|
|
|
} |
3364
|
|
|
|
|
|
|
} |
3365
|
|
|
|
|
|
|
else { |
3366
|
|
|
|
|
|
|
} |
3367
|
126
|
|
|
|
|
279
|
return; |
3368
|
|
|
|
|
|
|
} ## end sub do_AMPERSAND |
3369
|
|
|
|
|
|
|
|
3370
|
|
|
|
|
|
|
sub do_LESS_THAN_SIGN { |
3371
|
|
|
|
|
|
|
|
3372
|
29
|
|
|
29
|
0
|
104
|
my $self = shift; |
3373
|
|
|
|
|
|
|
|
3374
|
|
|
|
|
|
|
# '<' - angle operator or less than? |
3375
|
29
|
100
|
|
|
|
138
|
if ( $expecting != OPERATOR ) { |
3376
|
8
|
|
|
|
|
57
|
( $i, $type ) = |
3377
|
|
|
|
|
|
|
$self->find_angle_operator_termination( $input_line, $i, |
3378
|
|
|
|
|
|
|
$rtoken_map, $expecting, $max_token_index ); |
3379
|
|
|
|
|
|
|
|
3380
|
|
|
|
|
|
|
## This message is not very helpful and quite confusing if the above |
3381
|
|
|
|
|
|
|
## routine decided not to write a message with the line number. |
3382
|
|
|
|
|
|
|
## if ( $type eq '<' && $expecting == TERM ) { |
3383
|
|
|
|
|
|
|
## $self->error_if_expecting_TERM(); |
3384
|
|
|
|
|
|
|
## $self->interrupt_logfile(); |
3385
|
|
|
|
|
|
|
## $self->warning("Unterminated <> operator?\n"); |
3386
|
|
|
|
|
|
|
## $self->resume_logfile(); |
3387
|
|
|
|
|
|
|
## } |
3388
|
|
|
|
|
|
|
|
3389
|
|
|
|
|
|
|
} |
3390
|
|
|
|
|
|
|
else { |
3391
|
|
|
|
|
|
|
} |
3392
|
29
|
|
|
|
|
69
|
return; |
3393
|
|
|
|
|
|
|
} ## end sub do_LESS_THAN_SIGN |
3394
|
|
|
|
|
|
|
|
3395
|
|
|
|
|
|
|
sub do_QUESTION_MARK { |
3396
|
|
|
|
|
|
|
|
3397
|
187
|
|
|
187
|
0
|
538
|
my $self = shift; |
3398
|
|
|
|
|
|
|
|
3399
|
|
|
|
|
|
|
# '?' = conditional or starting pattern? |
3400
|
187
|
|
|
|
|
418
|
my $is_pattern; |
3401
|
|
|
|
|
|
|
|
3402
|
|
|
|
|
|
|
# Patch for rt #126965 |
3403
|
|
|
|
|
|
|
# a pattern cannot follow certain keywords which take optional |
3404
|
|
|
|
|
|
|
# arguments, like 'shift' and 'pop'. See also '/'. |
3405
|
187
|
100
|
66
|
|
|
1759
|
if ( |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3406
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
3407
|
|
|
|
|
|
|
&& $is_keyword_rejecting_question_as_pattern_delimiter{ |
3408
|
|
|
|
|
|
|
$last_nonblank_token} |
3409
|
|
|
|
|
|
|
) |
3410
|
|
|
|
|
|
|
{ |
3411
|
1
|
|
|
|
|
4
|
$is_pattern = 0; |
3412
|
|
|
|
|
|
|
} |
3413
|
|
|
|
|
|
|
|
3414
|
|
|
|
|
|
|
# patch for RT#131288, user constant function without prototype |
3415
|
|
|
|
|
|
|
# last type is 'U' followed by ?. |
3416
|
|
|
|
|
|
|
elsif ( $last_nonblank_type =~ /^[FUY]$/ ) { |
3417
|
1
|
|
|
|
|
4
|
$is_pattern = 0; |
3418
|
|
|
|
|
|
|
} |
3419
|
|
|
|
|
|
|
elsif ( $expecting == UNKNOWN ) { |
3420
|
|
|
|
|
|
|
|
3421
|
|
|
|
|
|
|
# In older versions of Perl, a bare ? can be a pattern |
3422
|
|
|
|
|
|
|
# delimiter. In perl version 5.22 this was |
3423
|
|
|
|
|
|
|
# dropped, but we have to support it in order to format |
3424
|
|
|
|
|
|
|
# older programs. See: |
3425
|
|
|
|
|
|
|
## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html |
3426
|
|
|
|
|
|
|
# For example, the following line worked |
3427
|
|
|
|
|
|
|
# at one time: |
3428
|
|
|
|
|
|
|
# ?(.*)? && (print $1,"\n"); |
3429
|
|
|
|
|
|
|
# In current versions it would have to be written with slashes: |
3430
|
|
|
|
|
|
|
# /(.*)/ && (print $1,"\n"); |
3431
|
11
|
|
|
|
|
25
|
my $msg; |
3432
|
11
|
|
|
|
|
76
|
( $is_pattern, $msg ) = |
3433
|
|
|
|
|
|
|
$self->guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, |
3434
|
|
|
|
|
|
|
$max_token_index ); |
3435
|
|
|
|
|
|
|
|
3436
|
11
|
50
|
|
|
|
46
|
if ($msg) { $self->write_logfile_entry($msg) } |
|
11
|
|
|
|
|
43
|
|
3437
|
|
|
|
|
|
|
} |
3438
|
174
|
|
|
|
|
522
|
else { $is_pattern = ( $expecting == TERM ) } |
3439
|
|
|
|
|
|
|
|
3440
|
187
|
50
|
|
|
|
547
|
if ($is_pattern) { |
3441
|
0
|
|
|
|
|
0
|
$in_quote = 1; |
3442
|
0
|
|
|
|
|
0
|
$type = 'Q'; |
3443
|
0
|
|
|
|
|
0
|
$allowed_quote_modifiers = '[msixpodualngc]'; |
3444
|
|
|
|
|
|
|
} |
3445
|
|
|
|
|
|
|
else { |
3446
|
187
|
|
|
|
|
723
|
( $type_sequence, $indent_flag ) = |
3447
|
|
|
|
|
|
|
$self->increase_nesting_depth( QUESTION_COLON, |
3448
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3449
|
|
|
|
|
|
|
} |
3450
|
187
|
|
|
|
|
488
|
return; |
3451
|
|
|
|
|
|
|
} ## end sub do_QUESTION_MARK |
3452
|
|
|
|
|
|
|
|
3453
|
|
|
|
|
|
|
sub do_STAR { |
3454
|
|
|
|
|
|
|
|
3455
|
238
|
|
|
238
|
0
|
512
|
my $self = shift; |
3456
|
|
|
|
|
|
|
|
3457
|
|
|
|
|
|
|
# '*' = typeglob, or multiply? |
3458
|
238
|
50
|
66
|
|
|
898
|
if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) { |
3459
|
0
|
0
|
0
|
|
|
0
|
if ( $next_type ne 'b' |
|
|
|
0
|
|
|
|
|
3460
|
|
|
|
|
|
|
&& $next_type ne '(' |
3461
|
|
|
|
|
|
|
&& $next_type ne '#' ) # Fix c036 |
3462
|
|
|
|
|
|
|
{ |
3463
|
0
|
|
|
|
|
0
|
$expecting = TERM; |
3464
|
|
|
|
|
|
|
} |
3465
|
|
|
|
|
|
|
} |
3466
|
238
|
100
|
|
|
|
629
|
if ( $expecting == TERM ) { |
3467
|
21
|
|
|
|
|
91
|
$self->scan_simple_identifier(); |
3468
|
|
|
|
|
|
|
} |
3469
|
|
|
|
|
|
|
else { |
3470
|
|
|
|
|
|
|
|
3471
|
217
|
50
|
|
|
|
824
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { |
|
|
100
|
|
|
|
|
|
3472
|
0
|
|
|
|
|
0
|
$tok = '*='; |
3473
|
0
|
|
|
|
|
0
|
$type = $tok; |
3474
|
0
|
|
|
|
|
0
|
$i++; |
3475
|
|
|
|
|
|
|
} |
3476
|
|
|
|
|
|
|
elsif ( $rtokens->[ $i + 1 ] eq '*' ) { |
3477
|
36
|
|
|
|
|
103
|
$tok = '**'; |
3478
|
36
|
|
|
|
|
84
|
$type = $tok; |
3479
|
36
|
|
|
|
|
68
|
$i++; |
3480
|
36
|
50
|
|
|
|
127
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { |
3481
|
0
|
|
|
|
|
0
|
$tok = '**='; |
3482
|
0
|
|
|
|
|
0
|
$type = $tok; |
3483
|
0
|
|
|
|
|
0
|
$i++; |
3484
|
|
|
|
|
|
|
} |
3485
|
|
|
|
|
|
|
} |
3486
|
|
|
|
|
|
|
else { |
3487
|
|
|
|
|
|
|
## not multiple characters |
3488
|
|
|
|
|
|
|
} |
3489
|
|
|
|
|
|
|
} |
3490
|
238
|
|
|
|
|
438
|
return; |
3491
|
|
|
|
|
|
|
} ## end sub do_STAR |
3492
|
|
|
|
|
|
|
|
3493
|
|
|
|
|
|
|
sub do_DOT { |
3494
|
|
|
|
|
|
|
|
3495
|
150
|
|
|
150
|
0
|
333
|
my $self = shift; |
3496
|
|
|
|
|
|
|
|
3497
|
|
|
|
|
|
|
# '.' = what kind of . ? |
3498
|
150
|
100
|
|
|
|
463
|
if ( $expecting != OPERATOR ) { |
3499
|
10
|
|
|
|
|
51
|
$self->scan_number(); |
3500
|
10
|
100
|
|
|
|
43
|
if ( $type eq '.' ) { |
3501
|
2
|
50
|
|
|
|
8
|
$self->error_if_expecting_TERM() |
3502
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
3503
|
|
|
|
|
|
|
} |
3504
|
|
|
|
|
|
|
} |
3505
|
|
|
|
|
|
|
else { |
3506
|
|
|
|
|
|
|
} |
3507
|
150
|
|
|
|
|
289
|
return; |
3508
|
|
|
|
|
|
|
} ## end sub do_DOT |
3509
|
|
|
|
|
|
|
|
3510
|
|
|
|
|
|
|
sub do_COLON { |
3511
|
|
|
|
|
|
|
|
3512
|
271
|
|
|
271
|
0
|
810
|
my $self = shift; |
3513
|
|
|
|
|
|
|
|
3514
|
|
|
|
|
|
|
# ':' = label, ternary, attribute, ? |
3515
|
|
|
|
|
|
|
|
3516
|
|
|
|
|
|
|
# if this is the first nonblank character, call it a label |
3517
|
|
|
|
|
|
|
# since perl seems to just swallow it |
3518
|
271
|
50
|
66
|
|
|
3691
|
if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3519
|
0
|
|
|
|
|
0
|
$type = 'J'; |
3520
|
|
|
|
|
|
|
} |
3521
|
|
|
|
|
|
|
|
3522
|
|
|
|
|
|
|
# ATTRS: check for a ':' which introduces an attribute list |
3523
|
|
|
|
|
|
|
# either after a 'sub' keyword or within a paren list |
3524
|
|
|
|
|
|
|
# Added 'package' (can be 'class') for --use-feature=class (rt145706) |
3525
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^(sub|package)\b/ ) { |
3526
|
22
|
|
|
|
|
51
|
$type = 'A'; |
3527
|
22
|
|
|
|
|
53
|
$self->[_in_attribute_list_] = 1; |
3528
|
|
|
|
|
|
|
} |
3529
|
|
|
|
|
|
|
|
3530
|
|
|
|
|
|
|
# Within a signature, unless we are in a ternary. For example, |
3531
|
|
|
|
|
|
|
# from 't/filter_example.t': |
3532
|
|
|
|
|
|
|
# method foo4 ( $class: $bar ) { $class->bar($bar) } |
3533
|
|
|
|
|
|
|
elsif ( $rparen_type->[$paren_depth] =~ /^sub\b/ |
3534
|
|
|
|
|
|
|
&& !is_balanced_closing_container(QUESTION_COLON) ) |
3535
|
|
|
|
|
|
|
{ |
3536
|
1
|
|
|
|
|
5
|
$type = 'A'; |
3537
|
1
|
|
|
|
|
3
|
$self->[_in_attribute_list_] = 1; |
3538
|
|
|
|
|
|
|
} |
3539
|
|
|
|
|
|
|
|
3540
|
|
|
|
|
|
|
# check for scalar attribute, such as |
3541
|
|
|
|
|
|
|
# my $foo : shared = 1; |
3542
|
|
|
|
|
|
|
elsif ($is_my_our_state{$statement_type} |
3543
|
|
|
|
|
|
|
&& $rcurrent_depth->[QUESTION_COLON] == 0 ) |
3544
|
|
|
|
|
|
|
{ |
3545
|
15
|
|
|
|
|
38
|
$type = 'A'; |
3546
|
15
|
|
|
|
|
42
|
$self->[_in_attribute_list_] = 1; |
3547
|
|
|
|
|
|
|
} |
3548
|
|
|
|
|
|
|
|
3549
|
|
|
|
|
|
|
# Look for Switch::Plain syntax if an error would otherwise occur |
3550
|
|
|
|
|
|
|
# here. Note that we do not need to check if the extended syntax |
3551
|
|
|
|
|
|
|
# flag is set because otherwise an error would occur, and we would |
3552
|
|
|
|
|
|
|
# then have to output a message telling the user to set the |
3553
|
|
|
|
|
|
|
# extended syntax flag to avoid the error. |
3554
|
|
|
|
|
|
|
# case 1: { |
3555
|
|
|
|
|
|
|
# default: { |
3556
|
|
|
|
|
|
|
# default: |
3557
|
|
|
|
|
|
|
# Note that the line 'default:' will be parsed as a label elsewhere. |
3558
|
|
|
|
|
|
|
elsif ( $is_case_default{$statement_type} |
3559
|
|
|
|
|
|
|
&& !is_balanced_closing_container(QUESTION_COLON) ) |
3560
|
|
|
|
|
|
|
{ |
3561
|
|
|
|
|
|
|
# mark it as a perltidy label type |
3562
|
46
|
|
|
|
|
103
|
$type = 'J'; |
3563
|
|
|
|
|
|
|
} |
3564
|
|
|
|
|
|
|
|
3565
|
|
|
|
|
|
|
# otherwise, it should be part of a ?/: operator |
3566
|
|
|
|
|
|
|
else { |
3567
|
187
|
|
|
|
|
834
|
( $type_sequence, $indent_flag ) = |
3568
|
|
|
|
|
|
|
$self->decrease_nesting_depth( QUESTION_COLON, |
3569
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3570
|
187
|
50
|
|
|
|
1093
|
if ( $last_nonblank_token eq '?' ) { |
3571
|
0
|
|
|
|
|
0
|
$self->warning("Syntax error near ? :\n"); |
3572
|
|
|
|
|
|
|
} |
3573
|
|
|
|
|
|
|
} |
3574
|
271
|
|
|
|
|
541
|
return; |
3575
|
|
|
|
|
|
|
} ## end sub do_COLON |
3576
|
|
|
|
|
|
|
|
3577
|
|
|
|
|
|
|
sub do_PLUS_SIGN { |
3578
|
|
|
|
|
|
|
|
3579
|
227
|
|
|
227
|
0
|
428
|
my $self = shift; |
3580
|
|
|
|
|
|
|
|
3581
|
|
|
|
|
|
|
# '+' = what kind of plus? |
3582
|
227
|
100
|
|
|
|
851
|
if ( $expecting == TERM ) { |
|
|
100
|
|
|
|
|
|
3583
|
13
|
|
|
|
|
49
|
my $number = $self->scan_number_fast(); |
3584
|
|
|
|
|
|
|
|
3585
|
|
|
|
|
|
|
# unary plus is safest assumption if not a number |
3586
|
13
|
50
|
|
|
|
42
|
if ( !defined($number) ) { $type = 'p'; } |
|
13
|
|
|
|
|
41
|
|
3587
|
|
|
|
|
|
|
} |
3588
|
|
|
|
|
|
|
elsif ( $expecting == OPERATOR ) { |
3589
|
|
|
|
|
|
|
} |
3590
|
|
|
|
|
|
|
else { |
3591
|
3
|
100
|
|
|
|
14
|
if ( $next_type eq 'w' ) { $type = 'p' } |
|
2
|
|
|
|
|
3
|
|
3592
|
|
|
|
|
|
|
} |
3593
|
227
|
|
|
|
|
389
|
return; |
3594
|
|
|
|
|
|
|
} ## end sub do_PLUS_SIGN |
3595
|
|
|
|
|
|
|
|
3596
|
|
|
|
|
|
|
sub do_AT_SIGN { |
3597
|
|
|
|
|
|
|
|
3598
|
438
|
|
|
438
|
0
|
1153
|
my $self = shift; |
3599
|
|
|
|
|
|
|
|
3600
|
|
|
|
|
|
|
# '@' = sigil for array? |
3601
|
438
|
50
|
|
|
|
1478
|
$self->error_if_expecting_OPERATOR("Array") |
3602
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
3603
|
438
|
|
|
|
|
1791
|
$self->scan_simple_identifier(); |
3604
|
438
|
|
|
|
|
918
|
return; |
3605
|
|
|
|
|
|
|
} ## end sub do_AT_SIGN |
3606
|
|
|
|
|
|
|
|
3607
|
|
|
|
|
|
|
sub do_PERCENT_SIGN { |
3608
|
|
|
|
|
|
|
|
3609
|
202
|
|
|
202
|
0
|
576
|
my $self = shift; |
3610
|
|
|
|
|
|
|
|
3611
|
|
|
|
|
|
|
# '%' = hash or modulo? |
3612
|
|
|
|
|
|
|
# first guess is hash if no following blank or paren |
3613
|
202
|
50
|
|
|
|
711
|
if ( $expecting == UNKNOWN ) { |
3614
|
0
|
0
|
0
|
|
|
0
|
if ( $next_type ne 'b' && $next_type ne '(' ) { |
3615
|
0
|
|
|
|
|
0
|
$expecting = TERM; |
3616
|
|
|
|
|
|
|
} |
3617
|
|
|
|
|
|
|
} |
3618
|
202
|
100
|
|
|
|
655
|
if ( $expecting == TERM ) { |
3619
|
192
|
|
|
|
|
794
|
$self->scan_simple_identifier(); |
3620
|
|
|
|
|
|
|
} |
3621
|
202
|
|
|
|
|
503
|
return; |
3622
|
|
|
|
|
|
|
} ## end sub do_PERCENT_SIGN |
3623
|
|
|
|
|
|
|
|
3624
|
|
|
|
|
|
|
sub do_LEFT_SQUARE_BRACKET { |
3625
|
|
|
|
|
|
|
|
3626
|
594
|
|
|
594
|
0
|
1188
|
my $self = shift; |
3627
|
|
|
|
|
|
|
|
3628
|
|
|
|
|
|
|
# '[' |
3629
|
594
|
|
|
|
|
2453
|
$rsquare_bracket_type->[ ++$square_bracket_depth ] = |
3630
|
|
|
|
|
|
|
$last_nonblank_token; |
3631
|
594
|
|
|
|
|
2168
|
( $type_sequence, $indent_flag ) = |
3632
|
|
|
|
|
|
|
$self->increase_nesting_depth( SQUARE_BRACKET, |
3633
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3634
|
|
|
|
|
|
|
|
3635
|
|
|
|
|
|
|
# It may seem odd, but structural square brackets have |
3636
|
|
|
|
|
|
|
# type '{' and '}'. This simplifies the indentation logic. |
3637
|
594
|
100
|
|
|
|
1841
|
if ( !is_non_structural_brace() ) { |
3638
|
287
|
|
|
|
|
733
|
$type = '{'; |
3639
|
|
|
|
|
|
|
} |
3640
|
594
|
|
|
|
|
1440
|
$rsquare_bracket_structural_type->[$square_bracket_depth] = $type; |
3641
|
594
|
|
|
|
|
1105
|
return; |
3642
|
|
|
|
|
|
|
} ## end sub do_LEFT_SQUARE_BRACKET |
3643
|
|
|
|
|
|
|
|
3644
|
|
|
|
|
|
|
sub do_RIGHT_SQUARE_BRACKET { |
3645
|
|
|
|
|
|
|
|
3646
|
594
|
|
|
594
|
0
|
2351
|
my $self = shift; |
3647
|
|
|
|
|
|
|
|
3648
|
|
|
|
|
|
|
# ']' |
3649
|
594
|
|
|
|
|
2166
|
( $type_sequence, $indent_flag ) = |
3650
|
|
|
|
|
|
|
$self->decrease_nesting_depth( SQUARE_BRACKET, |
3651
|
|
|
|
|
|
|
$rtoken_map->[$i_tok] ); |
3652
|
|
|
|
|
|
|
|
3653
|
594
|
100
|
|
|
|
2061
|
if ( $rsquare_bracket_structural_type->[$square_bracket_depth] eq '{' ) |
3654
|
|
|
|
|
|
|
{ |
3655
|
287
|
|
|
|
|
1810
|
$type = '}'; |
3656
|
|
|
|
|
|
|
} |
3657
|
|
|
|
|
|
|
|
3658
|
|
|
|
|
|
|
# propagate type information for smartmatch operator. This is |
3659
|
|
|
|
|
|
|
# necessary to enable us to know if an operator or term is expected |
3660
|
|
|
|
|
|
|
# next. |
3661
|
594
|
100
|
|
|
|
1710
|
if ( $rsquare_bracket_type->[$square_bracket_depth] eq '~~' ) { |
3662
|
20
|
|
|
|
|
42
|
$tok = $rsquare_bracket_type->[$square_bracket_depth]; |
3663
|
|
|
|
|
|
|
} |
3664
|
|
|
|
|
|
|
|
3665
|
594
|
50
|
|
|
|
1567
|
if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } |
|
594
|
|
|
|
|
956
|
|
3666
|
594
|
|
|
|
|
989
|
return; |
3667
|
|
|
|
|
|
|
} ## end sub do_RIGHT_SQUARE_BRACKET |
3668
|
|
|
|
|
|
|
|
3669
|
|
|
|
|
|
|
sub do_MINUS_SIGN { |
3670
|
|
|
|
|
|
|
|
3671
|
441
|
|
|
441
|
0
|
902
|
my $self = shift; |
3672
|
|
|
|
|
|
|
|
3673
|
|
|
|
|
|
|
# '-' = what kind of minus? |
3674
|
441
|
100
|
100
|
|
|
2843
|
if ( ( $expecting != OPERATOR ) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3675
|
|
|
|
|
|
|
&& $is_file_test_operator{$next_tok} ) |
3676
|
|
|
|
|
|
|
{ |
3677
|
10
|
|
|
|
|
44
|
my ( $next_nonblank_token, $i_next ) = |
3678
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i + 1, $rtokens, |
3679
|
|
|
|
|
|
|
$max_token_index ); |
3680
|
|
|
|
|
|
|
|
3681
|
|
|
|
|
|
|
# check for a quoted word like "-w=>xx"; |
3682
|
|
|
|
|
|
|
# it is sufficient to just check for a following '=' |
3683
|
10
|
50
|
|
|
|
82
|
if ( $next_nonblank_token eq '=' ) { |
3684
|
0
|
|
|
|
|
0
|
$type = 'm'; |
3685
|
|
|
|
|
|
|
} |
3686
|
|
|
|
|
|
|
else { |
3687
|
10
|
|
|
|
|
23
|
$i++; |
3688
|
10
|
|
|
|
|
29
|
$tok .= $next_tok; |
3689
|
10
|
|
|
|
|
24
|
$type = 'F'; |
3690
|
|
|
|
|
|
|
} |
3691
|
|
|
|
|
|
|
} |
3692
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
3693
|
330
|
|
|
|
|
1051
|
my $number = $self->scan_number_fast(); |
3694
|
|
|
|
|
|
|
|
3695
|
|
|
|
|
|
|
# maybe part of bareword token? unary is safest |
3696
|
330
|
100
|
|
|
|
877
|
if ( !defined($number) ) { $type = 'm'; } |
|
288
|
|
|
|
|
519
|
|
3697
|
|
|
|
|
|
|
|
3698
|
|
|
|
|
|
|
} |
3699
|
|
|
|
|
|
|
elsif ( $expecting == OPERATOR ) { |
3700
|
|
|
|
|
|
|
} |
3701
|
|
|
|
|
|
|
else { |
3702
|
|
|
|
|
|
|
|
3703
|
4
|
50
|
|
|
|
14
|
if ( $next_type eq 'w' ) { |
3704
|
4
|
|
|
|
|
20
|
$type = 'm'; |
3705
|
|
|
|
|
|
|
} |
3706
|
|
|
|
|
|
|
} |
3707
|
441
|
|
|
|
|
763
|
return; |
3708
|
|
|
|
|
|
|
} ## end sub do_MINUS_SIGN |
3709
|
|
|
|
|
|
|
|
3710
|
|
|
|
|
|
|
sub do_CARAT_SIGN { |
3711
|
|
|
|
|
|
|
|
3712
|
12
|
|
|
12
|
0
|
25
|
my $self = shift; |
3713
|
|
|
|
|
|
|
|
3714
|
|
|
|
|
|
|
# '^' |
3715
|
|
|
|
|
|
|
# check for special variables like ${^WARNING_BITS} |
3716
|
12
|
100
|
|
|
|
44
|
if ( $expecting == TERM ) { |
3717
|
|
|
|
|
|
|
|
3718
|
5
|
50
|
33
|
|
|
58
|
if ( $last_nonblank_token eq '{' |
|
|
|
33
|
|
|
|
|
3719
|
|
|
|
|
|
|
&& ( $next_tok !~ /^\d/ ) |
3720
|
|
|
|
|
|
|
&& ( $next_tok =~ /^\w/ ) ) |
3721
|
|
|
|
|
|
|
{ |
3722
|
|
|
|
|
|
|
|
3723
|
5
|
100
|
|
|
|
19
|
if ( $next_tok eq 'W' ) { |
3724
|
1
|
|
|
|
|
4
|
$self->[_saw_perl_dash_w_] = 1; |
3725
|
|
|
|
|
|
|
} |
3726
|
5
|
|
|
|
|
12
|
$tok = $tok . $next_tok; |
3727
|
5
|
|
|
|
|
10
|
$i = $i + 1; |
3728
|
5
|
|
|
|
|
11
|
$type = 'w'; |
3729
|
|
|
|
|
|
|
|
3730
|
|
|
|
|
|
|
# Optional coding to try to catch syntax errors. This can |
3731
|
|
|
|
|
|
|
# be removed if it ever causes incorrect warning messages. |
3732
|
|
|
|
|
|
|
# The '{^' should be preceded by either by a type or '$#' |
3733
|
|
|
|
|
|
|
# Examples: |
3734
|
|
|
|
|
|
|
# $#{^CAPTURE} ok |
3735
|
|
|
|
|
|
|
# *${^LAST_FH}{NAME} ok |
3736
|
|
|
|
|
|
|
# @{^HOWDY} ok |
3737
|
|
|
|
|
|
|
# $hash{^HOWDY} error |
3738
|
|
|
|
|
|
|
|
3739
|
|
|
|
|
|
|
# Note that a type sigil '$' may be tokenized as 'Z' |
3740
|
|
|
|
|
|
|
# after something like 'print', so allow type 'Z' |
3741
|
5
|
0
|
33
|
|
|
23
|
if ( $last_last_nonblank_type ne 't' |
|
|
|
33
|
|
|
|
|
3742
|
|
|
|
|
|
|
&& $last_last_nonblank_type ne 'Z' |
3743
|
|
|
|
|
|
|
&& $last_last_nonblank_token ne '$#' ) |
3744
|
|
|
|
|
|
|
{ |
3745
|
0
|
|
|
|
|
0
|
$self->warning("Possible syntax error near '{^'\n"); |
3746
|
|
|
|
|
|
|
} |
3747
|
|
|
|
|
|
|
} |
3748
|
|
|
|
|
|
|
|
3749
|
|
|
|
|
|
|
else { |
3750
|
0
|
0
|
|
|
|
0
|
if ( !$self->error_if_expecting_TERM() ) { |
3751
|
|
|
|
|
|
|
|
3752
|
|
|
|
|
|
|
# Something like this is valid but strange: |
3753
|
|
|
|
|
|
|
# undef ^I; |
3754
|
0
|
|
|
|
|
0
|
$self->complain("The '^' seems unusual here\n"); |
3755
|
|
|
|
|
|
|
} |
3756
|
|
|
|
|
|
|
} |
3757
|
|
|
|
|
|
|
} |
3758
|
12
|
|
|
|
|
26
|
return; |
3759
|
|
|
|
|
|
|
} ## end sub do_CARAT_SIGN |
3760
|
|
|
|
|
|
|
|
3761
|
|
|
|
|
|
|
sub do_DOUBLE_COLON { |
3762
|
|
|
|
|
|
|
|
3763
|
9
|
|
|
9
|
0
|
16
|
my $self = shift; |
3764
|
|
|
|
|
|
|
|
3765
|
|
|
|
|
|
|
# '::' = probably a sub call |
3766
|
9
|
|
|
|
|
35
|
$self->scan_bare_identifier(); |
3767
|
9
|
|
|
|
|
19
|
return; |
3768
|
|
|
|
|
|
|
} ## end sub do_DOUBLE_COLON |
3769
|
|
|
|
|
|
|
|
3770
|
|
|
|
|
|
|
sub do_LEFT_SHIFT { |
3771
|
|
|
|
|
|
|
|
3772
|
7
|
|
|
7
|
0
|
30
|
my $self = shift; |
3773
|
|
|
|
|
|
|
|
3774
|
|
|
|
|
|
|
# '<<' = maybe a here-doc? |
3775
|
7
|
50
|
|
|
|
33
|
if ( $expecting != OPERATOR ) { |
3776
|
7
|
|
|
|
|
24
|
my ( $found_target, $here_doc_target, $here_quote_character, |
3777
|
|
|
|
|
|
|
$saw_error ); |
3778
|
|
|
|
|
|
|
( |
3779
|
7
|
|
|
|
|
54
|
$found_target, $here_doc_target, $here_quote_character, $i, |
3780
|
|
|
|
|
|
|
$saw_error |
3781
|
|
|
|
|
|
|
) |
3782
|
|
|
|
|
|
|
= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map, |
3783
|
|
|
|
|
|
|
$max_token_index ); |
3784
|
|
|
|
|
|
|
|
3785
|
7
|
50
|
|
|
|
30
|
if ($found_target) { |
|
|
0
|
|
|
|
|
|
3786
|
7
|
|
|
|
|
16
|
push @{$rhere_target_list}, |
|
7
|
|
|
|
|
34
|
|
3787
|
|
|
|
|
|
|
[ $here_doc_target, $here_quote_character ]; |
3788
|
7
|
|
|
|
|
26
|
$type = 'h'; |
3789
|
7
|
50
|
|
|
|
69
|
if ( length($here_doc_target) > 80 ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3790
|
0
|
|
|
|
|
0
|
my $truncated = substr( $here_doc_target, 0, 80 ); |
3791
|
0
|
|
|
|
|
0
|
$self->complain("Long here-target: '$truncated' ...\n"); |
3792
|
|
|
|
|
|
|
} |
3793
|
|
|
|
|
|
|
elsif ( !$here_doc_target ) { |
3794
|
0
|
0
|
|
|
|
0
|
$self->warning( |
3795
|
|
|
|
|
|
|
'Use of bare << to mean <<"" is deprecated' . "\n" ) |
3796
|
|
|
|
|
|
|
if ( !$here_quote_character ); |
3797
|
|
|
|
|
|
|
} |
3798
|
|
|
|
|
|
|
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { |
3799
|
2
|
|
|
|
|
13
|
$self->complain( |
3800
|
|
|
|
|
|
|
"Unconventional here-target: '$here_doc_target'\n"); |
3801
|
|
|
|
|
|
|
} |
3802
|
|
|
|
|
|
|
else { |
3803
|
|
|
|
|
|
|
## ok: nothing to complain about |
3804
|
|
|
|
|
|
|
} |
3805
|
|
|
|
|
|
|
} |
3806
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
3807
|
0
|
0
|
|
|
|
0
|
if ( !$saw_error ) { |
3808
|
|
|
|
|
|
|
|
3809
|
|
|
|
|
|
|
# shouldn't happen..arriving here implies an error in |
3810
|
|
|
|
|
|
|
# the logic in sub 'find_here_doc' |
3811
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
3812
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
3813
|
|
|
|
|
|
|
Program bug; didn't find here doc target |
3814
|
|
|
|
|
|
|
EOM |
3815
|
|
|
|
|
|
|
} |
3816
|
|
|
|
|
|
|
$self->warning( |
3817
|
0
|
|
|
|
|
0
|
"Possible program error: didn't find here doc target\n" |
3818
|
|
|
|
|
|
|
); |
3819
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
3820
|
|
|
|
|
|
|
} |
3821
|
|
|
|
|
|
|
} |
3822
|
|
|
|
|
|
|
|
3823
|
|
|
|
|
|
|
# target not found, expecting == UNKNOWN |
3824
|
|
|
|
|
|
|
else { |
3825
|
|
|
|
|
|
|
# assume it is a shift |
3826
|
|
|
|
|
|
|
} |
3827
|
|
|
|
|
|
|
} |
3828
|
|
|
|
|
|
|
else { |
3829
|
|
|
|
|
|
|
} |
3830
|
7
|
|
|
|
|
20
|
return; |
3831
|
|
|
|
|
|
|
} ## end sub do_LEFT_SHIFT |
3832
|
|
|
|
|
|
|
|
3833
|
|
|
|
|
|
|
sub do_NEW_HERE_DOC { |
3834
|
|
|
|
|
|
|
|
3835
|
|
|
|
|
|
|
# '<<~' = a here-doc, new type added in v26 |
3836
|
|
|
|
|
|
|
|
3837
|
2
|
|
|
2
|
0
|
9
|
my $self = shift; |
3838
|
|
|
|
|
|
|
|
3839
|
|
|
|
|
|
|
return |
3840
|
2
|
50
|
|
|
|
10
|
if ( $i >= $max_token_index ); # here-doc not possible if end of line |
3841
|
2
|
50
|
|
|
|
11
|
if ( $expecting != OPERATOR ) { |
3842
|
2
|
|
|
|
|
8
|
my ( $found_target, $here_doc_target, $here_quote_character, |
3843
|
|
|
|
|
|
|
$saw_error ); |
3844
|
|
|
|
|
|
|
( |
3845
|
2
|
|
|
|
|
14
|
$found_target, $here_doc_target, $here_quote_character, $i, |
3846
|
|
|
|
|
|
|
$saw_error |
3847
|
|
|
|
|
|
|
) |
3848
|
|
|
|
|
|
|
= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map, |
3849
|
|
|
|
|
|
|
$max_token_index ); |
3850
|
|
|
|
|
|
|
|
3851
|
2
|
50
|
|
|
|
9
|
if ($found_target) { |
|
|
0
|
|
|
|
|
|
3852
|
|
|
|
|
|
|
|
3853
|
2
|
50
|
|
|
|
18
|
if ( length($here_doc_target) > 80 ) { |
|
|
50
|
|
|
|
|
|
3854
|
0
|
|
|
|
|
0
|
my $truncated = substr( $here_doc_target, 0, 80 ); |
3855
|
0
|
|
|
|
|
0
|
$self->complain("Long here-target: '$truncated' ...\n"); |
3856
|
|
|
|
|
|
|
} |
3857
|
|
|
|
|
|
|
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { |
3858
|
0
|
|
|
|
|
0
|
$self->complain( |
3859
|
|
|
|
|
|
|
"Unconventional here-target: '$here_doc_target'\n"); |
3860
|
|
|
|
|
|
|
} |
3861
|
|
|
|
|
|
|
else { |
3862
|
|
|
|
|
|
|
## ok: nothing to complain about |
3863
|
|
|
|
|
|
|
} |
3864
|
|
|
|
|
|
|
|
3865
|
|
|
|
|
|
|
# Note that we put a leading space on the here quote |
3866
|
|
|
|
|
|
|
# character indicate that it may be preceded by spaces |
3867
|
2
|
|
|
|
|
8
|
$here_quote_character = SPACE . $here_quote_character; |
3868
|
2
|
|
|
|
|
5
|
push @{$rhere_target_list}, |
|
2
|
|
|
|
|
9
|
|
3869
|
|
|
|
|
|
|
[ $here_doc_target, $here_quote_character ]; |
3870
|
2
|
|
|
|
|
5
|
$type = 'h'; |
3871
|
|
|
|
|
|
|
} |
3872
|
|
|
|
|
|
|
|
3873
|
|
|
|
|
|
|
# target not found .. |
3874
|
|
|
|
|
|
|
elsif ( $expecting == TERM ) { |
3875
|
0
|
0
|
|
|
|
0
|
if ( !$saw_error ) { |
3876
|
|
|
|
|
|
|
|
3877
|
|
|
|
|
|
|
# shouldn't happen..arriving here implies an error in |
3878
|
|
|
|
|
|
|
# the logic in sub 'find_here_doc' |
3879
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
3880
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
3881
|
|
|
|
|
|
|
Program bug; didn't find here doc target |
3882
|
|
|
|
|
|
|
EOM |
3883
|
|
|
|
|
|
|
} |
3884
|
|
|
|
|
|
|
$self->warning( |
3885
|
0
|
|
|
|
|
0
|
"Possible program error: didn't find here doc target\n" |
3886
|
|
|
|
|
|
|
); |
3887
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
3888
|
|
|
|
|
|
|
} |
3889
|
|
|
|
|
|
|
} |
3890
|
|
|
|
|
|
|
|
3891
|
|
|
|
|
|
|
# Target not found, expecting==UNKNOWN |
3892
|
|
|
|
|
|
|
else { |
3893
|
0
|
|
|
|
|
0
|
$self->warning("didn't find here doc target after '<<~'\n"); |
3894
|
|
|
|
|
|
|
} |
3895
|
|
|
|
|
|
|
} |
3896
|
|
|
|
|
|
|
else { |
3897
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR(); |
3898
|
|
|
|
|
|
|
} |
3899
|
2
|
|
|
|
|
6
|
return; |
3900
|
|
|
|
|
|
|
} ## end sub do_NEW_HERE_DOC |
3901
|
|
|
|
|
|
|
|
3902
|
|
|
|
|
|
|
sub do_POINTER { |
3903
|
|
|
|
|
|
|
|
3904
|
|
|
|
|
|
|
# '->' |
3905
|
886
|
|
|
886
|
0
|
1568
|
return; |
3906
|
|
|
|
|
|
|
} |
3907
|
|
|
|
|
|
|
|
3908
|
|
|
|
|
|
|
sub do_PLUS_PLUS { |
3909
|
|
|
|
|
|
|
|
3910
|
46
|
|
|
46
|
0
|
182
|
my $self = shift; |
3911
|
|
|
|
|
|
|
|
3912
|
|
|
|
|
|
|
# '++' |
3913
|
|
|
|
|
|
|
# type = 'pp' for pre-increment, '++' for post-increment |
3914
|
46
|
100
|
|
|
|
264
|
if ( $expecting == OPERATOR ) { $type = '++' } |
|
37
|
100
|
|
|
|
144
|
|
3915
|
7
|
|
|
|
|
22
|
elsif ( $expecting == TERM ) { $type = 'pp' } |
3916
|
|
|
|
|
|
|
|
3917
|
|
|
|
|
|
|
# handle ( $expecting == UNKNOWN ) |
3918
|
|
|
|
|
|
|
else { |
3919
|
|
|
|
|
|
|
|
3920
|
|
|
|
|
|
|
# look ahead .. |
3921
|
2
|
|
|
|
|
8
|
my ( $next_nonblank_token, $i_next ) = |
3922
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
3923
|
|
|
|
|
|
|
|
3924
|
|
|
|
|
|
|
# Fix for c042: look past a side comment |
3925
|
2
|
50
|
|
|
|
22
|
if ( $next_nonblank_token eq '#' ) { |
3926
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
3927
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
3928
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
3929
|
|
|
|
|
|
|
} |
3930
|
|
|
|
|
|
|
|
3931
|
2
|
50
|
|
|
|
11
|
if ( $next_nonblank_token eq '$' ) { $type = 'pp' } |
|
0
|
|
|
|
|
0
|
|
3932
|
|
|
|
|
|
|
} |
3933
|
46
|
|
|
|
|
113
|
return; |
3934
|
|
|
|
|
|
|
} ## end sub do_PLUS_PLUS |
3935
|
|
|
|
|
|
|
|
3936
|
|
|
|
|
|
|
sub do_FAT_COMMA { |
3937
|
|
|
|
|
|
|
|
3938
|
1025
|
|
|
1025
|
0
|
1809
|
my $self = shift; |
3939
|
|
|
|
|
|
|
|
3940
|
|
|
|
|
|
|
# '=>' |
3941
|
1025
|
50
|
|
|
|
2465
|
if ( $last_nonblank_type eq $tok ) { |
3942
|
0
|
|
|
|
|
0
|
$self->complain("Repeated '=>'s \n"); |
3943
|
|
|
|
|
|
|
} |
3944
|
|
|
|
|
|
|
|
3945
|
|
|
|
|
|
|
# patch for operator_expected: note if we are in the list (use.t) |
3946
|
|
|
|
|
|
|
# TODO: make version numbers a new token type |
3947
|
1025
|
100
|
|
|
|
2364
|
if ( $statement_type eq 'use' ) { $statement_type = '_use' } |
|
18
|
|
|
|
|
42
|
|
3948
|
1025
|
|
|
|
|
1717
|
return; |
3949
|
|
|
|
|
|
|
} ## end sub do_FAT_COMMA |
3950
|
|
|
|
|
|
|
|
3951
|
|
|
|
|
|
|
sub do_MINUS_MINUS { |
3952
|
|
|
|
|
|
|
|
3953
|
2
|
|
|
2
|
0
|
5
|
my $self = shift; |
3954
|
|
|
|
|
|
|
|
3955
|
|
|
|
|
|
|
# '--' |
3956
|
|
|
|
|
|
|
# type = 'mm' for pre-decrement, '--' for post-decrement |
3957
|
|
|
|
|
|
|
|
3958
|
2
|
50
|
|
|
|
15
|
if ( $expecting == OPERATOR ) { $type = '--' } |
|
0
|
50
|
|
|
|
0
|
|
3959
|
2
|
|
|
|
|
7
|
elsif ( $expecting == TERM ) { $type = 'mm' } |
3960
|
|
|
|
|
|
|
|
3961
|
|
|
|
|
|
|
# handle ( $expecting == UNKNOWN ) |
3962
|
|
|
|
|
|
|
else { |
3963
|
|
|
|
|
|
|
|
3964
|
|
|
|
|
|
|
# look ahead .. |
3965
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
3966
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
3967
|
|
|
|
|
|
|
|
3968
|
|
|
|
|
|
|
# Fix for c042: look past a side comment |
3969
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '#' ) { |
3970
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
3971
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, |
3972
|
|
|
|
|
|
|
$rtokens, $max_token_index ); |
3973
|
|
|
|
|
|
|
} |
3974
|
|
|
|
|
|
|
|
3975
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '$' ) { $type = 'mm' } |
|
0
|
|
|
|
|
0
|
|
3976
|
|
|
|
|
|
|
} |
3977
|
|
|
|
|
|
|
|
3978
|
2
|
|
|
|
|
5
|
return; |
3979
|
|
|
|
|
|
|
} ## end sub do_MINUS_MINUS |
3980
|
|
|
|
|
|
|
|
3981
|
|
|
|
|
|
|
sub do_LOGICAL_AND { |
3982
|
|
|
|
|
|
|
|
3983
|
58
|
|
|
58
|
0
|
136
|
my $self = shift; |
3984
|
|
|
|
|
|
|
|
3985
|
|
|
|
|
|
|
# '&&' |
3986
|
58
|
50
|
33
|
|
|
247
|
$self->error_if_expecting_TERM() |
3987
|
|
|
|
|
|
|
if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015 |
3988
|
58
|
|
|
|
|
121
|
return; |
3989
|
|
|
|
|
|
|
} ## end sub do_LOGICAL_AND |
3990
|
|
|
|
|
|
|
|
3991
|
|
|
|
|
|
|
sub do_LOGICAL_OR { |
3992
|
|
|
|
|
|
|
|
3993
|
74
|
|
|
74
|
0
|
170
|
my $self = shift; |
3994
|
|
|
|
|
|
|
|
3995
|
|
|
|
|
|
|
# '||' |
3996
|
74
|
100
|
66
|
|
|
378
|
$self->error_if_expecting_TERM() |
3997
|
|
|
|
|
|
|
if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015 |
3998
|
74
|
|
|
|
|
133
|
return; |
3999
|
|
|
|
|
|
|
} ## end sub do_LOGICAL_OR |
4000
|
|
|
|
|
|
|
|
4001
|
|
|
|
|
|
|
sub do_SLASH_SLASH { |
4002
|
|
|
|
|
|
|
|
4003
|
10
|
|
|
10
|
0
|
22
|
my $self = shift; |
4004
|
|
|
|
|
|
|
|
4005
|
|
|
|
|
|
|
# '//' |
4006
|
10
|
100
|
|
|
|
31
|
$self->error_if_expecting_TERM() |
4007
|
|
|
|
|
|
|
if ( $expecting == TERM ); |
4008
|
10
|
|
|
|
|
20
|
return; |
4009
|
|
|
|
|
|
|
} ## end sub do_SLASH_SLASH |
4010
|
|
|
|
|
|
|
|
4011
|
|
|
|
|
|
|
sub do_DIGITS { |
4012
|
|
|
|
|
|
|
|
4013
|
1934
|
|
|
1934
|
0
|
3195
|
my $self = shift; |
4014
|
|
|
|
|
|
|
|
4015
|
|
|
|
|
|
|
# 'd' = string of digits |
4016
|
1934
|
50
|
|
|
|
4255
|
$self->error_if_expecting_OPERATOR("Number") |
4017
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4018
|
|
|
|
|
|
|
|
4019
|
1934
|
|
|
|
|
5147
|
my $number = $self->scan_number_fast(); |
4020
|
1934
|
50
|
|
|
|
4490
|
if ( !defined($number) ) { |
4021
|
|
|
|
|
|
|
|
4022
|
|
|
|
|
|
|
# shouldn't happen - we should always get a number |
4023
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
4024
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
4025
|
|
|
|
|
|
|
non-number beginning with digit--program bug |
4026
|
|
|
|
|
|
|
EOM |
4027
|
|
|
|
|
|
|
} |
4028
|
|
|
|
|
|
|
$self->warning( |
4029
|
0
|
|
|
|
|
0
|
"Unexpected error condition: non-number beginning with digit\n" |
4030
|
|
|
|
|
|
|
); |
4031
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
4032
|
|
|
|
|
|
|
} |
4033
|
1934
|
|
|
|
|
3108
|
return; |
4034
|
|
|
|
|
|
|
} ## end sub do_DIGITS |
4035
|
|
|
|
|
|
|
|
4036
|
|
|
|
|
|
|
sub do_ATTRIBUTE_LIST { |
4037
|
|
|
|
|
|
|
|
4038
|
39
|
|
|
39
|
0
|
121
|
my ( $self, $next_nonblank_token ) = @_; |
4039
|
|
|
|
|
|
|
|
4040
|
|
|
|
|
|
|
# Called at a bareword encountered while in an attribute list |
4041
|
|
|
|
|
|
|
# returns 'is_attribute': |
4042
|
|
|
|
|
|
|
# true if attribute found |
4043
|
|
|
|
|
|
|
# false if an attribute (continue parsing bareword) |
4044
|
|
|
|
|
|
|
|
4045
|
|
|
|
|
|
|
# treat bare word followed by open paren like qw( |
4046
|
39
|
100
|
|
|
|
120
|
if ( $next_nonblank_token eq '(' ) { |
4047
|
|
|
|
|
|
|
|
4048
|
|
|
|
|
|
|
# For something like: |
4049
|
|
|
|
|
|
|
# : prototype($$) |
4050
|
|
|
|
|
|
|
# we should let do_scan_sub see it so that it can see |
4051
|
|
|
|
|
|
|
# the prototype. All other attributes get parsed as a |
4052
|
|
|
|
|
|
|
# quoted string. |
4053
|
18
|
100
|
|
|
|
85
|
if ( $tok eq 'prototype' ) { |
4054
|
2
|
|
|
|
|
6
|
$id_scan_state = 'prototype'; |
4055
|
|
|
|
|
|
|
|
4056
|
|
|
|
|
|
|
# start just after the word 'prototype' |
4057
|
2
|
|
|
|
|
8
|
my $i_beg = $i + 1; |
4058
|
2
|
|
|
|
|
23
|
( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub( |
4059
|
|
|
|
|
|
|
{ |
4060
|
|
|
|
|
|
|
input_line => $input_line, |
4061
|
|
|
|
|
|
|
i => $i, |
4062
|
|
|
|
|
|
|
i_beg => $i_beg, |
4063
|
|
|
|
|
|
|
tok => $tok, |
4064
|
|
|
|
|
|
|
type => $type, |
4065
|
|
|
|
|
|
|
rtokens => $rtokens, |
4066
|
|
|
|
|
|
|
rtoken_map => $rtoken_map, |
4067
|
|
|
|
|
|
|
id_scan_state => $id_scan_state, |
4068
|
|
|
|
|
|
|
max_token_index => $max_token_index, |
4069
|
|
|
|
|
|
|
} |
4070
|
|
|
|
|
|
|
); |
4071
|
|
|
|
|
|
|
|
4072
|
|
|
|
|
|
|
# If successful, mark as type 'q' to be consistent |
4073
|
|
|
|
|
|
|
# with other attributes. Type 'w' would also work. |
4074
|
2
|
50
|
|
|
|
12
|
if ( $i > $i_beg ) { |
4075
|
2
|
|
|
|
|
5
|
$type = 'q'; |
4076
|
2
|
|
|
|
|
10
|
return 1; |
4077
|
|
|
|
|
|
|
} |
4078
|
|
|
|
|
|
|
|
4079
|
|
|
|
|
|
|
# If not successful, continue and parse as a quote. |
4080
|
|
|
|
|
|
|
} |
4081
|
|
|
|
|
|
|
|
4082
|
|
|
|
|
|
|
# All other attribute lists must be parsed as quotes |
4083
|
|
|
|
|
|
|
# (see 'signatures.t' for good examples) |
4084
|
16
|
|
|
|
|
58
|
$in_quote = $quote_items{'q'}; |
4085
|
16
|
|
|
|
|
47
|
$allowed_quote_modifiers = $quote_modifiers{'q'}; |
4086
|
16
|
|
|
|
|
35
|
$type = 'q'; |
4087
|
16
|
|
|
|
|
30
|
$quote_type = 'q'; |
4088
|
16
|
|
|
|
|
40
|
return 1; |
4089
|
|
|
|
|
|
|
} |
4090
|
|
|
|
|
|
|
|
4091
|
|
|
|
|
|
|
# handle bareword not followed by open paren |
4092
|
|
|
|
|
|
|
else { |
4093
|
21
|
|
|
|
|
66
|
$type = 'w'; |
4094
|
21
|
|
|
|
|
55
|
return 1; |
4095
|
|
|
|
|
|
|
} |
4096
|
|
|
|
|
|
|
|
4097
|
|
|
|
|
|
|
# attribute not found |
4098
|
0
|
|
|
|
|
0
|
return; |
4099
|
|
|
|
|
|
|
} ## end sub do_ATTRIBUTE_LIST |
4100
|
|
|
|
|
|
|
|
4101
|
|
|
|
|
|
|
sub do_QUOTED_BAREWORD { |
4102
|
|
|
|
|
|
|
|
4103
|
786
|
|
|
786
|
0
|
1475
|
my $self = shift; |
4104
|
|
|
|
|
|
|
|
4105
|
|
|
|
|
|
|
# find type of a bareword followed by a '=>' |
4106
|
786
|
100
|
|
|
|
4092
|
if ( $ris_constant->{$current_package}{$tok} ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
4107
|
14
|
|
|
|
|
37
|
$type = 'C'; |
4108
|
|
|
|
|
|
|
} |
4109
|
|
|
|
|
|
|
elsif ( $ris_user_function->{$current_package}{$tok} ) { |
4110
|
0
|
|
|
|
|
0
|
$type = 'U'; |
4111
|
0
|
|
|
|
|
0
|
$prototype = $ruser_function_prototype->{$current_package}{$tok}; |
4112
|
|
|
|
|
|
|
} |
4113
|
|
|
|
|
|
|
elsif ( $tok =~ /^v\d+$/ ) { |
4114
|
0
|
|
|
|
|
0
|
$type = 'v'; |
4115
|
0
|
|
|
|
|
0
|
$self->report_v_string($tok); |
4116
|
|
|
|
|
|
|
} |
4117
|
|
|
|
|
|
|
else { |
4118
|
|
|
|
|
|
|
|
4119
|
|
|
|
|
|
|
# Bareword followed by a fat comma - see 'git18.in' |
4120
|
|
|
|
|
|
|
# If tok is something like 'x17' then it could |
4121
|
|
|
|
|
|
|
# actually be operator x followed by number 17. |
4122
|
|
|
|
|
|
|
# For example, here: |
4123
|
|
|
|
|
|
|
# 123x17 => [ 792, 1224 ], |
4124
|
|
|
|
|
|
|
# (a key of 123 repeated 17 times, perhaps not |
4125
|
|
|
|
|
|
|
# what was intended). We will mark x17 as type |
4126
|
|
|
|
|
|
|
# 'n' and it will be split. If the previous token |
4127
|
|
|
|
|
|
|
# was also a bareword then it is not very clear is |
4128
|
|
|
|
|
|
|
# going on. In this case we will not be sure that |
4129
|
|
|
|
|
|
|
# an operator is expected, so we just mark it as a |
4130
|
|
|
|
|
|
|
# bareword. Perl is a little murky in what it does |
4131
|
|
|
|
|
|
|
# with stuff like this, and its behavior can change |
4132
|
|
|
|
|
|
|
# over time. Something like |
4133
|
|
|
|
|
|
|
# a x18 => [792, 1224], will compile as |
4134
|
|
|
|
|
|
|
# a key with 18 a's. But something like |
4135
|
|
|
|
|
|
|
# push @array, a x18; |
4136
|
|
|
|
|
|
|
# is a syntax error. |
4137
|
772
|
100
|
66
|
|
|
2813
|
if ( |
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
4138
|
|
|
|
|
|
|
$expecting == OPERATOR |
4139
|
|
|
|
|
|
|
&& substr( $tok, 0, 1 ) eq 'x' |
4140
|
|
|
|
|
|
|
&& ( length($tok) == 1 |
4141
|
|
|
|
|
|
|
|| substr( $tok, 1, 1 ) =~ /^\d/ ) |
4142
|
|
|
|
|
|
|
) |
4143
|
|
|
|
|
|
|
{ |
4144
|
3
|
|
|
|
|
6
|
$type = 'n'; |
4145
|
3
|
50
|
|
|
|
11
|
if ( $self->split_pretoken(1) ) { |
4146
|
3
|
|
|
|
|
6
|
$type = 'x'; |
4147
|
3
|
|
|
|
|
5
|
$tok = 'x'; |
4148
|
|
|
|
|
|
|
} |
4149
|
|
|
|
|
|
|
} |
4150
|
|
|
|
|
|
|
else { |
4151
|
|
|
|
|
|
|
|
4152
|
|
|
|
|
|
|
# git #18 |
4153
|
769
|
|
|
|
|
1334
|
$type = 'w'; |
4154
|
769
|
|
|
|
|
2034
|
$self->error_if_expecting_OPERATOR(); |
4155
|
|
|
|
|
|
|
} |
4156
|
|
|
|
|
|
|
} |
4157
|
786
|
|
|
|
|
1276
|
return; |
4158
|
|
|
|
|
|
|
} ## end sub do_QUOTED_BAREWORD |
4159
|
|
|
|
|
|
|
|
4160
|
|
|
|
|
|
|
sub do_X_OPERATOR { |
4161
|
|
|
|
|
|
|
|
4162
|
17
|
|
|
17
|
0
|
41
|
my $self = shift; |
4163
|
|
|
|
|
|
|
|
4164
|
17
|
100
|
|
|
|
73
|
if ( $tok eq 'x' ) { |
4165
|
15
|
50
|
|
|
|
86
|
if ( $rtokens->[ $i + 1 ] eq '=' ) { # x= |
4166
|
0
|
|
|
|
|
0
|
$tok = 'x='; |
4167
|
0
|
|
|
|
|
0
|
$type = $tok; |
4168
|
0
|
|
|
|
|
0
|
$i++; |
4169
|
|
|
|
|
|
|
} |
4170
|
|
|
|
|
|
|
else { |
4171
|
15
|
|
|
|
|
40
|
$type = 'x'; |
4172
|
|
|
|
|
|
|
} |
4173
|
|
|
|
|
|
|
} |
4174
|
|
|
|
|
|
|
else { |
4175
|
|
|
|
|
|
|
|
4176
|
|
|
|
|
|
|
# Split a pretoken like 'x10' into 'x' and '10'. |
4177
|
|
|
|
|
|
|
# Note: In previous versions of perltidy it was marked |
4178
|
|
|
|
|
|
|
# as a number, $type = 'n', and fixed downstream by the |
4179
|
|
|
|
|
|
|
# Formatter. |
4180
|
2
|
|
|
|
|
4
|
$type = 'n'; |
4181
|
2
|
50
|
|
|
|
8
|
if ( $self->split_pretoken(1) ) { |
4182
|
2
|
|
|
|
|
5
|
$type = 'x'; |
4183
|
2
|
|
|
|
|
5
|
$tok = 'x'; |
4184
|
|
|
|
|
|
|
} |
4185
|
|
|
|
|
|
|
} |
4186
|
17
|
|
|
|
|
39
|
return; |
4187
|
|
|
|
|
|
|
} ## end sub do_X_OPERATOR |
4188
|
|
|
|
|
|
|
|
4189
|
|
|
|
|
|
|
sub do_USE_CONSTANT { |
4190
|
|
|
|
|
|
|
|
4191
|
16
|
|
|
16
|
0
|
47
|
my $self = shift; |
4192
|
|
|
|
|
|
|
|
4193
|
16
|
|
|
|
|
64
|
$self->scan_bare_identifier(); |
4194
|
16
|
|
|
|
|
85
|
my ( $next_nonblank_tok2, $i_next2 ) = |
4195
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
4196
|
|
|
|
|
|
|
|
4197
|
16
|
50
|
|
|
|
89
|
if ($next_nonblank_tok2) { |
4198
|
|
|
|
|
|
|
|
4199
|
16
|
100
|
|
|
|
59
|
if ( $is_keyword{$next_nonblank_tok2} ) { |
4200
|
|
|
|
|
|
|
|
4201
|
|
|
|
|
|
|
# Assume qw is used as a quote and okay, as in: |
4202
|
|
|
|
|
|
|
# use constant qw{ DEBUG 0 }; |
4203
|
|
|
|
|
|
|
# Not worth trying to parse for just a warning |
4204
|
|
|
|
|
|
|
|
4205
|
|
|
|
|
|
|
# NOTE: This warning is deactivated because recent |
4206
|
|
|
|
|
|
|
# versions of perl do not complain here, but |
4207
|
|
|
|
|
|
|
# the coding is retained for reference. |
4208
|
1
|
|
|
|
|
2
|
if ( 0 && $next_nonblank_tok2 ne 'qw' ) { |
4209
|
|
|
|
|
|
|
$self->warning( |
4210
|
|
|
|
|
|
|
"Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n" |
4211
|
|
|
|
|
|
|
); |
4212
|
|
|
|
|
|
|
} |
4213
|
|
|
|
|
|
|
} |
4214
|
|
|
|
|
|
|
|
4215
|
|
|
|
|
|
|
else { |
4216
|
15
|
|
|
|
|
336
|
$ris_constant->{$current_package}{$next_nonblank_tok2} = 1; |
4217
|
|
|
|
|
|
|
} |
4218
|
|
|
|
|
|
|
} |
4219
|
16
|
|
|
|
|
50
|
return; |
4220
|
|
|
|
|
|
|
} ## end sub do_USE_CONSTANT |
4221
|
|
|
|
|
|
|
|
4222
|
|
|
|
|
|
|
sub do_KEYWORD { |
4223
|
|
|
|
|
|
|
|
4224
|
2641
|
|
|
2641
|
0
|
4493
|
my $self = shift; |
4225
|
|
|
|
|
|
|
|
4226
|
|
|
|
|
|
|
# found a keyword - set any associated flags |
4227
|
2641
|
|
|
|
|
4460
|
$type = 'k'; |
4228
|
|
|
|
|
|
|
|
4229
|
|
|
|
|
|
|
# Since for and foreach may not be followed immediately |
4230
|
|
|
|
|
|
|
# by an opening paren, we have to remember which keyword |
4231
|
|
|
|
|
|
|
# is associated with the next '(' |
4232
|
|
|
|
|
|
|
# Previously, before update c230 : if ( $is_for_foreach{$tok} ) { |
4233
|
|
|
|
|
|
|
##(if elsif unless while until for foreach switch case given when catch) |
4234
|
2641
|
100
|
|
|
|
7473
|
if ( $is_blocktype_with_paren{$tok} ) { |
4235
|
395
|
100
|
|
|
|
1381
|
if ( new_statement_ok() ) { |
4236
|
309
|
|
|
|
|
714
|
$want_paren = $tok; |
4237
|
|
|
|
|
|
|
} |
4238
|
|
|
|
|
|
|
} |
4239
|
|
|
|
|
|
|
|
4240
|
|
|
|
|
|
|
# recognize 'use' statements, which are special |
4241
|
2641
|
100
|
100
|
|
|
18746
|
if ( $is_use_require{$tok} ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
4242
|
175
|
|
|
|
|
349
|
$statement_type = $tok; |
4243
|
175
|
50
|
|
|
|
577
|
$self->error_if_expecting_OPERATOR() |
4244
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4245
|
|
|
|
|
|
|
} |
4246
|
|
|
|
|
|
|
|
4247
|
|
|
|
|
|
|
# remember my and our to check for trailing ": shared" |
4248
|
|
|
|
|
|
|
elsif ( $is_my_our_state{$tok} ) { |
4249
|
628
|
|
|
|
|
1360
|
$statement_type = $tok; |
4250
|
|
|
|
|
|
|
} |
4251
|
|
|
|
|
|
|
|
4252
|
|
|
|
|
|
|
# Check for misplaced 'elsif' and 'else', but allow isolated |
4253
|
|
|
|
|
|
|
# else or elsif blocks to be formatted. This is indicated |
4254
|
|
|
|
|
|
|
# by a last noblank token of ';' |
4255
|
|
|
|
|
|
|
elsif ( $tok eq 'elsif' ) { |
4256
|
29
|
50
|
66
|
|
|
208
|
if ( |
4257
|
|
|
|
|
|
|
$last_nonblank_token ne ';' |
4258
|
|
|
|
|
|
|
|
4259
|
|
|
|
|
|
|
## !~ /^(if|elsif|unless)$/ |
4260
|
|
|
|
|
|
|
&& !$is_if_elsif_unless{$last_nonblank_block_type} |
4261
|
|
|
|
|
|
|
) |
4262
|
|
|
|
|
|
|
{ |
4263
|
0
|
|
|
|
|
0
|
$self->warning( |
4264
|
|
|
|
|
|
|
"expecting '$tok' to follow one of 'if|elsif|unless'\n"); |
4265
|
|
|
|
|
|
|
} |
4266
|
|
|
|
|
|
|
} |
4267
|
|
|
|
|
|
|
elsif ( $tok eq 'else' ) { |
4268
|
|
|
|
|
|
|
|
4269
|
|
|
|
|
|
|
# patched for SWITCH/CASE |
4270
|
44
|
50
|
66
|
|
|
646
|
if ( |
|
|
|
66
|
|
|
|
|
4271
|
|
|
|
|
|
|
$last_nonblank_token ne ';' |
4272
|
|
|
|
|
|
|
|
4273
|
|
|
|
|
|
|
## !~ /^(if|elsif|unless|case|when)$/ |
4274
|
|
|
|
|
|
|
&& !$is_if_elsif_unless_case_when{$last_nonblank_block_type} |
4275
|
|
|
|
|
|
|
|
4276
|
|
|
|
|
|
|
# patch to avoid an unwanted error message for |
4277
|
|
|
|
|
|
|
# the case of a parenless 'case' (RT 105484): |
4278
|
|
|
|
|
|
|
# switch ( 1 ) { case x { 2 } else { } } |
4279
|
|
|
|
|
|
|
## !~ /^(if|elsif|unless|case|when)$/ |
4280
|
|
|
|
|
|
|
&& !$is_if_elsif_unless_case_when{$statement_type} |
4281
|
|
|
|
|
|
|
) |
4282
|
|
|
|
|
|
|
{ |
4283
|
0
|
|
|
|
|
0
|
$self->warning( |
4284
|
|
|
|
|
|
|
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" |
4285
|
|
|
|
|
|
|
); |
4286
|
|
|
|
|
|
|
} |
4287
|
|
|
|
|
|
|
} |
4288
|
|
|
|
|
|
|
|
4289
|
|
|
|
|
|
|
# patch for SWITCH/CASE if 'case' and 'when are |
4290
|
|
|
|
|
|
|
# treated as keywords. Also 'default' for Switch::Plain |
4291
|
|
|
|
|
|
|
elsif ($tok eq 'when' |
4292
|
|
|
|
|
|
|
|| $tok eq 'case' |
4293
|
|
|
|
|
|
|
|| $tok eq 'default' ) |
4294
|
|
|
|
|
|
|
{ |
4295
|
56
|
|
|
|
|
115
|
$statement_type = $tok; # next '{' is block |
4296
|
|
|
|
|
|
|
} |
4297
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
# feature 'err' was removed in Perl 5.10. So mark this as |
4299
|
|
|
|
|
|
|
# a bareword unless an operator is expected (see c158). |
4300
|
|
|
|
|
|
|
elsif ( $tok eq 'err' ) { |
4301
|
1
|
50
|
|
|
|
6
|
if ( $expecting != OPERATOR ) { $type = 'w' } |
|
1
|
|
|
|
|
2
|
|
4302
|
|
|
|
|
|
|
} |
4303
|
|
|
|
|
|
|
else { |
4304
|
|
|
|
|
|
|
## no special treatment needed |
4305
|
|
|
|
|
|
|
} |
4306
|
|
|
|
|
|
|
|
4307
|
2641
|
|
|
|
|
4748
|
return; |
4308
|
|
|
|
|
|
|
} ## end sub do_KEYWORD |
4309
|
|
|
|
|
|
|
|
4310
|
|
|
|
|
|
|
sub do_QUOTE_OPERATOR { |
4311
|
|
|
|
|
|
|
|
4312
|
202
|
|
|
202
|
0
|
471
|
my $self = shift; |
4313
|
|
|
|
|
|
|
|
4314
|
202
|
50
|
|
|
|
724
|
if ( $expecting == OPERATOR ) { |
4315
|
|
|
|
|
|
|
|
4316
|
|
|
|
|
|
|
# Be careful not to call an error for a qw quote |
4317
|
|
|
|
|
|
|
# where a parenthesized list is allowed. For example, |
4318
|
|
|
|
|
|
|
# it could also be a for/foreach construct such as |
4319
|
|
|
|
|
|
|
# |
4320
|
|
|
|
|
|
|
# foreach my $key qw\Uno Due Tres Quadro\ { |
4321
|
|
|
|
|
|
|
# print "Set $key\n"; |
4322
|
|
|
|
|
|
|
# } |
4323
|
|
|
|
|
|
|
# |
4324
|
|
|
|
|
|
|
|
4325
|
|
|
|
|
|
|
# Or it could be a function call. |
4326
|
|
|
|
|
|
|
# NOTE: Braces in something like &{ xxx } are not |
4327
|
|
|
|
|
|
|
# marked as a block, we might have a method call. |
4328
|
|
|
|
|
|
|
# &method(...), $method->(..), &{method}(...), |
4329
|
|
|
|
|
|
|
# $ref[2](list) is ok & short for $ref[2]->(list) |
4330
|
|
|
|
|
|
|
# |
4331
|
|
|
|
|
|
|
# See notes in 'sub code_block_type' and |
4332
|
|
|
|
|
|
|
# 'sub is_non_structural_brace' |
4333
|
|
|
|
|
|
|
|
4334
|
|
|
|
|
|
|
my $paren_list_possible = $tok eq 'qw' |
4335
|
|
|
|
|
|
|
&& ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ |
4336
|
0
|
|
0
|
|
|
0
|
|| $is_for_foreach{$want_paren} ); |
4337
|
|
|
|
|
|
|
|
4338
|
0
|
0
|
|
|
|
0
|
if ( !$paren_list_possible ) { |
4339
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR(); |
4340
|
|
|
|
|
|
|
} |
4341
|
|
|
|
|
|
|
} |
4342
|
202
|
|
|
|
|
553
|
$in_quote = $quote_items{$tok}; |
4343
|
202
|
|
|
|
|
533
|
$allowed_quote_modifiers = $quote_modifiers{$tok}; |
4344
|
|
|
|
|
|
|
|
4345
|
|
|
|
|
|
|
# All quote types are 'Q' except possibly qw quotes. |
4346
|
|
|
|
|
|
|
# qw quotes are special in that they may generally be trimmed |
4347
|
|
|
|
|
|
|
# of leading and trailing whitespace. So they are given a |
4348
|
|
|
|
|
|
|
# separate type, 'q', unless requested otherwise. |
4349
|
202
|
100
|
66
|
|
|
1078
|
$type = |
4350
|
|
|
|
|
|
|
( $tok eq 'qw' && $self->[_trim_qw_] ) |
4351
|
|
|
|
|
|
|
? 'q' |
4352
|
|
|
|
|
|
|
: 'Q'; |
4353
|
202
|
|
|
|
|
424
|
$quote_type = $type; |
4354
|
202
|
|
|
|
|
390
|
return; |
4355
|
|
|
|
|
|
|
} ## end sub do_QUOTE_OPERATOR |
4356
|
|
|
|
|
|
|
|
4357
|
|
|
|
|
|
|
sub do_UNKNOWN_BAREWORD { |
4358
|
|
|
|
|
|
|
|
4359
|
957
|
|
|
957
|
0
|
2249
|
my ( $self, $next_nonblank_token ) = @_; |
4360
|
|
|
|
|
|
|
|
4361
|
957
|
|
|
|
|
3352
|
$self->scan_bare_identifier(); |
4362
|
|
|
|
|
|
|
|
4363
|
957
|
100
|
100
|
|
|
3732
|
if ( $statement_type eq 'use' |
4364
|
|
|
|
|
|
|
&& $last_nonblank_token eq 'use' ) |
4365
|
|
|
|
|
|
|
{ |
4366
|
108
|
|
|
|
|
517
|
$rsaw_use_module->{$current_package}->{$tok} = 1; |
4367
|
|
|
|
|
|
|
} |
4368
|
|
|
|
|
|
|
|
4369
|
957
|
100
|
|
|
|
2534
|
if ( $type eq 'w' ) { |
4370
|
|
|
|
|
|
|
|
4371
|
932
|
50
|
|
|
|
2556
|
if ( $expecting == OPERATOR ) { |
4372
|
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
# Patch to avoid error message for RPerl overloaded |
4374
|
|
|
|
|
|
|
# operator functions: use overload |
4375
|
|
|
|
|
|
|
# '+' => \&sse_add, |
4376
|
|
|
|
|
|
|
# '-' => \&sse_sub, |
4377
|
|
|
|
|
|
|
# '*' => \&sse_mul, |
4378
|
|
|
|
|
|
|
# '/' => \&sse_div; |
4379
|
|
|
|
|
|
|
# TODO: this could eventually be generalized |
4380
|
0
|
0
|
0
|
|
|
0
|
if ( $rsaw_use_module->{$current_package}->{'RPerl'} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4381
|
|
|
|
|
|
|
&& $tok =~ /^sse_(mul|div|add|sub)$/ ) |
4382
|
|
|
|
|
|
|
{ |
4383
|
|
|
|
|
|
|
|
4384
|
|
|
|
|
|
|
} |
4385
|
|
|
|
|
|
|
|
4386
|
|
|
|
|
|
|
# Fix part 1 for git #63 in which a comment falls |
4387
|
|
|
|
|
|
|
# between an -> and the following word. An |
4388
|
|
|
|
|
|
|
# alternate fix would be to change operator_expected |
4389
|
|
|
|
|
|
|
# to return an UNKNOWN for this type. |
4390
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq '->' ) { |
4391
|
|
|
|
|
|
|
|
4392
|
|
|
|
|
|
|
} |
4393
|
|
|
|
|
|
|
|
4394
|
|
|
|
|
|
|
# don't complain about possible indirect object |
4395
|
|
|
|
|
|
|
# notation. |
4396
|
|
|
|
|
|
|
# For example: |
4397
|
|
|
|
|
|
|
# package main; |
4398
|
|
|
|
|
|
|
# sub new($) { ... } |
4399
|
|
|
|
|
|
|
# $b = new A::; # calls A::new |
4400
|
|
|
|
|
|
|
# $c = new A; # same thing but suspicious |
4401
|
|
|
|
|
|
|
# This will call A::new but we have a 'new' in |
4402
|
|
|
|
|
|
|
# main:: which looks like a constant. |
4403
|
|
|
|
|
|
|
# |
4404
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'C' ) { |
4405
|
0
|
0
|
|
|
|
0
|
if ( $tok !~ /::$/ ) { |
4406
|
0
|
|
|
|
|
0
|
$self->complain(<<EOM); |
4407
|
|
|
|
|
|
|
Expecting operator after '$last_nonblank_token' but found bare word '$tok' |
4408
|
|
|
|
|
|
|
Maybe indirectet object notation? |
4409
|
|
|
|
|
|
|
EOM |
4410
|
|
|
|
|
|
|
} |
4411
|
|
|
|
|
|
|
} |
4412
|
|
|
|
|
|
|
else { |
4413
|
0
|
|
|
|
|
0
|
$self->error_if_expecting_OPERATOR("bareword"); |
4414
|
|
|
|
|
|
|
} |
4415
|
|
|
|
|
|
|
} |
4416
|
|
|
|
|
|
|
|
4417
|
|
|
|
|
|
|
# mark bare words immediately followed by a paren as |
4418
|
|
|
|
|
|
|
# functions |
4419
|
932
|
|
|
|
|
2563
|
$next_tok = $rtokens->[ $i + 1 ]; |
4420
|
932
|
100
|
|
|
|
2623
|
if ( $next_tok eq '(' ) { |
4421
|
|
|
|
|
|
|
|
4422
|
|
|
|
|
|
|
# Patch for issue c151, where we are processing a snippet and |
4423
|
|
|
|
|
|
|
# have not seen that SPACE is a constant. In this case 'x' is |
4424
|
|
|
|
|
|
|
# probably an operator. The only disadvantage with an incorrect |
4425
|
|
|
|
|
|
|
# guess is that the space after it may be incorrect. For example |
4426
|
|
|
|
|
|
|
# $str .= SPACE x ( 16 - length($str) ); See also b1410. |
4427
|
276
|
50
|
33
|
|
|
1554
|
if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' } |
|
0
|
50
|
|
|
|
0
|
|
4428
|
|
|
|
|
|
|
|
4429
|
|
|
|
|
|
|
# Fix part 2 for git #63. Leave type as 'w' to keep |
4430
|
|
|
|
|
|
|
# the type the same as if the -> were not separated |
4431
|
276
|
|
|
|
|
580
|
elsif ( $last_nonblank_type ne '->' ) { $type = 'U' } |
4432
|
|
|
|
|
|
|
|
4433
|
|
|
|
|
|
|
# not a special case |
4434
|
|
|
|
|
|
|
else { } |
4435
|
|
|
|
|
|
|
|
4436
|
|
|
|
|
|
|
} |
4437
|
|
|
|
|
|
|
|
4438
|
|
|
|
|
|
|
# underscore after file test operator is file handle |
4439
|
932
|
50
|
66
|
|
|
2723
|
if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { |
4440
|
0
|
|
|
|
|
0
|
$type = 'Z'; |
4441
|
|
|
|
|
|
|
} |
4442
|
|
|
|
|
|
|
|
4443
|
|
|
|
|
|
|
# patch for SWITCH/CASE if 'case' and 'when are |
4444
|
|
|
|
|
|
|
# not treated as keywords: |
4445
|
932
|
50
|
33
|
|
|
4849
|
if ( |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
4446
|
|
|
|
|
|
|
( $tok eq 'case' && $rbrace_type->[$brace_depth] eq 'switch' ) |
4447
|
|
|
|
|
|
|
|| ( $tok eq 'when' |
4448
|
|
|
|
|
|
|
&& $rbrace_type->[$brace_depth] eq 'given' ) |
4449
|
|
|
|
|
|
|
) |
4450
|
|
|
|
|
|
|
{ |
4451
|
0
|
|
|
|
|
0
|
$statement_type = $tok; # next '{' is block |
4452
|
0
|
|
|
|
|
0
|
$type = 'k'; # for keyword syntax coloring |
4453
|
|
|
|
|
|
|
} |
4454
|
932
|
100
|
|
|
|
2708
|
if ( $next_nonblank_token eq '(' ) { |
4455
|
|
|
|
|
|
|
|
4456
|
|
|
|
|
|
|
# patch for SWITCH/CASE if switch and given not keywords |
4457
|
|
|
|
|
|
|
# Switch is not a perl 5 keyword, but we will gamble |
4458
|
|
|
|
|
|
|
# and mark switch followed by paren as a keyword. This |
4459
|
|
|
|
|
|
|
# is only necessary to get html syntax coloring nice, |
4460
|
|
|
|
|
|
|
# and does not commit this as being a switch/case. |
4461
|
241
|
50
|
33
|
|
|
1877
|
if ( $tok eq 'switch' || $tok eq 'given' ) { |
|
|
50
|
33
|
|
|
|
|
4462
|
0
|
|
|
|
|
0
|
$type = 'k'; # for keyword syntax coloring |
4463
|
|
|
|
|
|
|
} |
4464
|
|
|
|
|
|
|
|
4465
|
|
|
|
|
|
|
# mark 'x' as operator for something like this (see b1410) |
4466
|
|
|
|
|
|
|
# my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths ); |
4467
|
|
|
|
|
|
|
elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { |
4468
|
0
|
|
|
|
|
0
|
$type = 'x'; |
4469
|
|
|
|
|
|
|
} |
4470
|
|
|
|
|
|
|
else { |
4471
|
|
|
|
|
|
|
## not a special case |
4472
|
|
|
|
|
|
|
} |
4473
|
|
|
|
|
|
|
} |
4474
|
|
|
|
|
|
|
} |
4475
|
957
|
|
|
|
|
1795
|
return; |
4476
|
|
|
|
|
|
|
} ## end sub do_UNKNOWN_BAREWORD |
4477
|
|
|
|
|
|
|
|
4478
|
|
|
|
|
|
|
sub sub_attribute_ok_here { |
4479
|
|
|
|
|
|
|
|
4480
|
35
|
|
|
35
|
0
|
142
|
my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_; |
4481
|
|
|
|
|
|
|
|
4482
|
|
|
|
|
|
|
# Decide if 'sub :' can be the start of a sub attribute list. |
4483
|
|
|
|
|
|
|
# We will decide based on if the colon is followed by a |
4484
|
|
|
|
|
|
|
# bareword which is not a keyword. |
4485
|
|
|
|
|
|
|
# Changed inext+1 to inext to fixed case b1190. |
4486
|
35
|
|
|
|
|
78
|
my $sub_attribute_ok_here; |
4487
|
35
|
50
|
66
|
|
|
197
|
if ( $is_sub{$tok_kw} |
|
|
|
66
|
|
|
|
|
4488
|
|
|
|
|
|
|
&& $expecting != OPERATOR |
4489
|
|
|
|
|
|
|
&& $next_nonblank_token eq ':' ) |
4490
|
|
|
|
|
|
|
{ |
4491
|
3
|
|
|
|
|
17
|
my ( $nn_nonblank_token, $i_nn ) = |
4492
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i_next, $rtokens, |
4493
|
|
|
|
|
|
|
$max_token_index ); |
4494
|
|
|
|
|
|
|
$sub_attribute_ok_here = |
4495
|
|
|
|
|
|
|
$nn_nonblank_token =~ /^\w/ |
4496
|
|
|
|
|
|
|
&& $nn_nonblank_token !~ /^\d/ |
4497
|
3
|
|
66
|
|
|
50
|
&& !$is_keyword{$nn_nonblank_token}; |
4498
|
|
|
|
|
|
|
} |
4499
|
35
|
|
|
|
|
222
|
return $sub_attribute_ok_here; |
4500
|
|
|
|
|
|
|
} ## end sub sub_attribute_ok_here |
4501
|
|
|
|
|
|
|
|
4502
|
|
|
|
|
|
|
sub do_BAREWORD { |
4503
|
|
|
|
|
|
|
|
4504
|
5832
|
|
|
5832
|
0
|
10830
|
my ( $self, $is_END_or_DATA ) = @_; |
4505
|
|
|
|
|
|
|
|
4506
|
|
|
|
|
|
|
# handle a bareword token: |
4507
|
|
|
|
|
|
|
# returns |
4508
|
|
|
|
|
|
|
# true if this token ends the current line |
4509
|
|
|
|
|
|
|
# false otherwise |
4510
|
|
|
|
|
|
|
|
4511
|
5832
|
|
|
|
|
16261
|
my ( $next_nonblank_token, $i_next ) = |
4512
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
4513
|
|
|
|
|
|
|
|
4514
|
|
|
|
|
|
|
# a bare word immediately followed by :: is not a keyword; |
4515
|
|
|
|
|
|
|
# use $tok_kw when testing for keywords to avoid a mistake |
4516
|
5832
|
|
|
|
|
11025
|
my $tok_kw = $tok; |
4517
|
5832
|
100
|
100
|
|
|
17319
|
if ( $rtokens->[ $i + 1 ] eq ':' |
4518
|
|
|
|
|
|
|
&& $rtokens->[ $i + 2 ] eq ':' ) |
4519
|
|
|
|
|
|
|
{ |
4520
|
266
|
|
|
|
|
617
|
$tok_kw .= '::'; |
4521
|
|
|
|
|
|
|
} |
4522
|
|
|
|
|
|
|
|
4523
|
5832
|
100
|
|
|
|
13183
|
if ( $self->[_in_attribute_list_] ) { |
4524
|
39
|
|
|
|
|
206
|
my $is_attribute = $self->do_ATTRIBUTE_LIST($next_nonblank_token); |
4525
|
39
|
50
|
|
|
|
148
|
return if ($is_attribute); |
4526
|
|
|
|
|
|
|
} |
4527
|
|
|
|
|
|
|
|
4528
|
|
|
|
|
|
|
#---------------------------------------- |
4529
|
|
|
|
|
|
|
# Starting final if-elsif- chain of tests |
4530
|
|
|
|
|
|
|
#---------------------------------------- |
4531
|
|
|
|
|
|
|
|
4532
|
|
|
|
|
|
|
# This is the return flag: |
4533
|
|
|
|
|
|
|
# true => this is the last token on the line |
4534
|
|
|
|
|
|
|
# false => keep tokenizing the line |
4535
|
5793
|
|
|
|
|
8750
|
my $is_last; |
4536
|
|
|
|
|
|
|
|
4537
|
|
|
|
|
|
|
# The following blocks of code must update these vars: |
4538
|
|
|
|
|
|
|
# $type - the final token type, must always be set |
4539
|
|
|
|
|
|
|
|
4540
|
|
|
|
|
|
|
# In addition, if additional pretokens are added: |
4541
|
|
|
|
|
|
|
# $tok - the final token |
4542
|
|
|
|
|
|
|
# $i - the index of the last pretoken |
4543
|
|
|
|
|
|
|
|
4544
|
|
|
|
|
|
|
# They may also need to check and set various flags |
4545
|
|
|
|
|
|
|
|
4546
|
|
|
|
|
|
|
# Scan a bare word following a -> as an identifier; it could |
4547
|
|
|
|
|
|
|
# have a long package name. Fixes c037, c041. |
4548
|
5793
|
100
|
100
|
|
|
94991
|
if ( $last_nonblank_token eq '->' ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
100
|
|
|
|
|
|
|
50
|
100
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
4549
|
669
|
|
|
|
|
2500
|
$self->scan_bare_identifier(); |
4550
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
# a bareward after '->' gets type 'i' |
4552
|
669
|
|
|
|
|
1243
|
$type = 'i'; |
4553
|
|
|
|
|
|
|
} |
4554
|
|
|
|
|
|
|
|
4555
|
|
|
|
|
|
|
# Quote a word followed by => operator |
4556
|
|
|
|
|
|
|
# unless the word __END__ or __DATA__ and the only word on |
4557
|
|
|
|
|
|
|
# the line. |
4558
|
|
|
|
|
|
|
elsif ( !$is_END_or_DATA |
4559
|
|
|
|
|
|
|
&& $next_nonblank_token eq '=' |
4560
|
|
|
|
|
|
|
&& $rtokens->[ $i_next + 1 ] eq '>' ) |
4561
|
|
|
|
|
|
|
{ |
4562
|
786
|
|
|
|
|
2500
|
$self->do_QUOTED_BAREWORD(); |
4563
|
|
|
|
|
|
|
} |
4564
|
|
|
|
|
|
|
|
4565
|
|
|
|
|
|
|
# quote a bare word within braces..like xxx->{s}; note that we |
4566
|
|
|
|
|
|
|
# must be sure this is not a structural brace, to avoid |
4567
|
|
|
|
|
|
|
# mistaking {s} in the following for a quoted bare word: |
4568
|
|
|
|
|
|
|
# for(@[){s}bla}BLA} |
4569
|
|
|
|
|
|
|
# Also treat q in something like var{-q} as a bare word, not |
4570
|
|
|
|
|
|
|
# a quote operator |
4571
|
|
|
|
|
|
|
elsif ( |
4572
|
|
|
|
|
|
|
$next_nonblank_token eq '}' |
4573
|
|
|
|
|
|
|
&& ( |
4574
|
|
|
|
|
|
|
$last_nonblank_type eq 'L' |
4575
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 'm' |
4576
|
|
|
|
|
|
|
&& $last_last_nonblank_type eq 'L' ) |
4577
|
|
|
|
|
|
|
) |
4578
|
|
|
|
|
|
|
) |
4579
|
|
|
|
|
|
|
{ |
4580
|
100
|
|
|
|
|
263
|
$type = 'w'; |
4581
|
|
|
|
|
|
|
} |
4582
|
|
|
|
|
|
|
|
4583
|
|
|
|
|
|
|
# handle operator x (now we know it isn't $x=) |
4584
|
|
|
|
|
|
|
elsif ( |
4585
|
|
|
|
|
|
|
$expecting == OPERATOR |
4586
|
|
|
|
|
|
|
&& substr( $tok, 0, 1 ) eq 'x' |
4587
|
|
|
|
|
|
|
&& ( length($tok) == 1 |
4588
|
|
|
|
|
|
|
|| substr( $tok, 1, 1 ) =~ /^\d/ ) |
4589
|
|
|
|
|
|
|
) |
4590
|
|
|
|
|
|
|
{ |
4591
|
17
|
|
|
|
|
137
|
$self->do_X_OPERATOR(); |
4592
|
|
|
|
|
|
|
} |
4593
|
|
|
|
|
|
|
elsif ( $tok_kw eq 'CORE::' ) { |
4594
|
3
|
|
|
|
|
9
|
$type = $tok = $tok_kw; |
4595
|
3
|
|
|
|
|
6
|
$i += 2; |
4596
|
|
|
|
|
|
|
} |
4597
|
|
|
|
|
|
|
elsif ( ( $tok eq 'strict' ) |
4598
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
4599
|
|
|
|
|
|
|
{ |
4600
|
14
|
|
|
|
|
40
|
$self->[_saw_use_strict_] = 1; |
4601
|
14
|
|
|
|
|
83
|
$self->scan_bare_identifier(); |
4602
|
|
|
|
|
|
|
} |
4603
|
|
|
|
|
|
|
|
4604
|
|
|
|
|
|
|
elsif ( ( $tok eq 'warnings' ) |
4605
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
4606
|
|
|
|
|
|
|
{ |
4607
|
7
|
|
|
|
|
22
|
$self->[_saw_perl_dash_w_] = 1; |
4608
|
|
|
|
|
|
|
|
4609
|
|
|
|
|
|
|
# scan as identifier, so that we pick up something like: |
4610
|
|
|
|
|
|
|
# use warnings::register |
4611
|
7
|
|
|
|
|
28
|
$self->scan_bare_identifier(); |
4612
|
|
|
|
|
|
|
} |
4613
|
|
|
|
|
|
|
|
4614
|
|
|
|
|
|
|
elsif ( |
4615
|
|
|
|
|
|
|
$tok eq 'AutoLoader' |
4616
|
|
|
|
|
|
|
&& $self->[_look_for_autoloader_] |
4617
|
|
|
|
|
|
|
&& ( |
4618
|
|
|
|
|
|
|
$last_nonblank_token eq 'use' |
4619
|
|
|
|
|
|
|
|
4620
|
|
|
|
|
|
|
# these regexes are from AutoSplit.pm, which we want |
4621
|
|
|
|
|
|
|
# to mimic |
4622
|
|
|
|
|
|
|
|| $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ |
4623
|
|
|
|
|
|
|
|| $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ |
4624
|
|
|
|
|
|
|
) |
4625
|
|
|
|
|
|
|
) |
4626
|
|
|
|
|
|
|
{ |
4627
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); |
4628
|
0
|
|
|
|
|
0
|
$self->[_saw_autoloader_] = 1; |
4629
|
0
|
|
|
|
|
0
|
$self->[_look_for_autoloader_] = 0; |
4630
|
0
|
|
|
|
|
0
|
$self->scan_bare_identifier(); |
4631
|
|
|
|
|
|
|
} |
4632
|
|
|
|
|
|
|
|
4633
|
|
|
|
|
|
|
elsif ( |
4634
|
|
|
|
|
|
|
$tok eq 'SelfLoader' |
4635
|
|
|
|
|
|
|
&& $self->[_look_for_selfloader_] |
4636
|
|
|
|
|
|
|
&& ( $last_nonblank_token eq 'use' |
4637
|
|
|
|
|
|
|
|| $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ |
4638
|
|
|
|
|
|
|
|| $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) |
4639
|
|
|
|
|
|
|
) |
4640
|
|
|
|
|
|
|
{ |
4641
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); |
4642
|
0
|
|
|
|
|
0
|
$self->[_saw_selfloader_] = 1; |
4643
|
0
|
|
|
|
|
0
|
$self->[_look_for_selfloader_] = 0; |
4644
|
0
|
|
|
|
|
0
|
$self->scan_bare_identifier(); |
4645
|
|
|
|
|
|
|
} |
4646
|
|
|
|
|
|
|
|
4647
|
|
|
|
|
|
|
elsif ( ( $tok eq 'constant' ) |
4648
|
|
|
|
|
|
|
and ( $last_nonblank_token eq 'use' ) ) |
4649
|
|
|
|
|
|
|
{ |
4650
|
16
|
|
|
|
|
88
|
$self->do_USE_CONSTANT(); |
4651
|
|
|
|
|
|
|
} |
4652
|
|
|
|
|
|
|
|
4653
|
|
|
|
|
|
|
# various quote operators |
4654
|
|
|
|
|
|
|
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { |
4655
|
202
|
|
|
|
|
993
|
$self->do_QUOTE_OPERATOR(); |
4656
|
|
|
|
|
|
|
} |
4657
|
|
|
|
|
|
|
|
4658
|
|
|
|
|
|
|
# check for a statement label |
4659
|
|
|
|
|
|
|
elsif ( |
4660
|
|
|
|
|
|
|
( $next_nonblank_token eq ':' ) |
4661
|
|
|
|
|
|
|
&& ( $rtokens->[ $i_next + 1 ] ne ':' ) |
4662
|
|
|
|
|
|
|
&& ( $i_next <= $max_token_index ) # colon on same line |
4663
|
|
|
|
|
|
|
|
4664
|
|
|
|
|
|
|
# like 'sub : lvalue' ? |
4665
|
|
|
|
|
|
|
&& !$self->sub_attribute_ok_here( $tok_kw, $next_nonblank_token, |
4666
|
|
|
|
|
|
|
$i_next ) |
4667
|
|
|
|
|
|
|
&& new_statement_ok() |
4668
|
|
|
|
|
|
|
) |
4669
|
|
|
|
|
|
|
{ |
4670
|
33
|
100
|
|
|
|
181
|
if ( $tok !~ /[A-Z]/ ) { |
4671
|
15
|
|
|
|
|
32
|
push @{ $self->[_rlower_case_labels_at_] }, $input_line_number; |
|
15
|
|
|
|
|
58
|
|
4672
|
|
|
|
|
|
|
} |
4673
|
33
|
|
|
|
|
117
|
$type = 'J'; |
4674
|
33
|
|
|
|
|
104
|
$tok .= ':'; |
4675
|
33
|
|
|
|
|
68
|
$i = $i_next; |
4676
|
|
|
|
|
|
|
} |
4677
|
|
|
|
|
|
|
|
4678
|
|
|
|
|
|
|
# 'sub' or other sub alias |
4679
|
|
|
|
|
|
|
elsif ( $is_sub{$tok_kw} ) { |
4680
|
|
|
|
|
|
|
|
4681
|
|
|
|
|
|
|
# Update for --use-feature=class (rt145706): |
4682
|
|
|
|
|
|
|
# We have to be extra careful to avoid misparsing other uses of |
4683
|
|
|
|
|
|
|
# 'method' in older scripts. |
4684
|
302
|
100
|
100
|
|
|
1459
|
if ( $tok_kw eq 'method' && $guess_if_method ) { |
4685
|
10
|
100
|
66
|
|
|
102
|
if ( $expecting == OPERATOR |
|
|
|
100
|
|
|
|
|
4686
|
|
|
|
|
|
|
|| $next_nonblank_token !~ /^[\w\:]/ |
4687
|
|
|
|
|
|
|
|| !$self->method_ok_here() ) |
4688
|
|
|
|
|
|
|
{ |
4689
|
7
|
|
|
|
|
28
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
4690
|
|
|
|
|
|
|
} |
4691
|
|
|
|
|
|
|
else { |
4692
|
3
|
|
|
|
|
182
|
initialize_subname(); |
4693
|
3
|
|
|
|
|
18
|
$self->scan_id(); |
4694
|
|
|
|
|
|
|
} |
4695
|
|
|
|
|
|
|
} |
4696
|
|
|
|
|
|
|
else { |
4697
|
292
|
50
|
|
|
|
895
|
$self->error_if_expecting_OPERATOR() |
4698
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4699
|
292
|
|
|
|
|
1143
|
initialize_subname(); |
4700
|
292
|
|
|
|
|
1063
|
$self->scan_id(); |
4701
|
|
|
|
|
|
|
} |
4702
|
|
|
|
|
|
|
} |
4703
|
|
|
|
|
|
|
|
4704
|
|
|
|
|
|
|
# 'package' |
4705
|
|
|
|
|
|
|
elsif ( $is_package{$tok_kw} ) { |
4706
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
# Update for --use-feature=class (rt145706): |
4708
|
|
|
|
|
|
|
# We have to be extra careful because 'class' may be used for other |
4709
|
|
|
|
|
|
|
# purposes on older code; i.e. |
4710
|
|
|
|
|
|
|
# class($x) - valid sub call |
4711
|
|
|
|
|
|
|
# package($x) - error |
4712
|
30
|
100
|
|
|
|
95
|
if ( $tok_kw eq 'class' ) { |
4713
|
8
|
100
|
66
|
|
|
68
|
if ( $expecting == OPERATOR |
|
|
|
100
|
|
|
|
|
4714
|
|
|
|
|
|
|
|| $next_nonblank_token !~ /^[\w\:]/ |
4715
|
|
|
|
|
|
|
|| !$self->class_ok_here() ) |
4716
|
|
|
|
|
|
|
{ |
4717
|
4
|
|
|
|
|
16
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
4718
|
|
|
|
|
|
|
} |
4719
|
4
|
|
|
|
|
12
|
else { $self->scan_id() } |
4720
|
|
|
|
|
|
|
} |
4721
|
|
|
|
|
|
|
else { |
4722
|
22
|
50
|
|
|
|
65
|
$self->error_if_expecting_OPERATOR() |
4723
|
|
|
|
|
|
|
if ( $expecting == OPERATOR ); |
4724
|
22
|
|
|
|
|
86
|
$self->scan_id(); |
4725
|
|
|
|
|
|
|
} |
4726
|
|
|
|
|
|
|
} |
4727
|
|
|
|
|
|
|
|
4728
|
|
|
|
|
|
|
# Fix for c035: split 'format' from 'is_format_END_DATA' to be |
4729
|
|
|
|
|
|
|
# more restrictive. Require a new statement to be ok here. |
4730
|
|
|
|
|
|
|
elsif ( $tok_kw eq 'format' && new_statement_ok() ) { |
4731
|
1
|
|
|
|
|
3
|
$type = ';'; # make tokenizer look for TERM next |
4732
|
1
|
|
|
|
|
4
|
$self->[_in_format_] = 1; |
4733
|
1
|
|
|
|
|
2
|
$is_last = 1; ## is last token on this line |
4734
|
|
|
|
|
|
|
} |
4735
|
|
|
|
|
|
|
|
4736
|
|
|
|
|
|
|
# Note on token types for format, __DATA__, __END__: |
4737
|
|
|
|
|
|
|
# It simplifies things to give these type ';', so that when we |
4738
|
|
|
|
|
|
|
# start rescanning we will be expecting a token of type TERM. |
4739
|
|
|
|
|
|
|
# We will switch to type 'k' before outputting the tokens. |
4740
|
|
|
|
|
|
|
elsif ( $is_END_DATA{$tok_kw} ) { |
4741
|
7
|
|
|
|
|
28
|
$type = ';'; # make tokenizer look for TERM next |
4742
|
|
|
|
|
|
|
|
4743
|
|
|
|
|
|
|
# Remember that we are in one of these three sections |
4744
|
7
|
|
|
|
|
26
|
$self->[ $is_END_DATA{$tok_kw} ] = 1; |
4745
|
7
|
|
|
|
|
12
|
$is_last = 1; ## is last token on this line |
4746
|
|
|
|
|
|
|
} |
4747
|
|
|
|
|
|
|
|
4748
|
|
|
|
|
|
|
elsif ( $is_keyword{$tok_kw} ) { |
4749
|
2641
|
|
|
|
|
8749
|
$self->do_KEYWORD(); |
4750
|
|
|
|
|
|
|
} |
4751
|
|
|
|
|
|
|
|
4752
|
|
|
|
|
|
|
# check for inline label following |
4753
|
|
|
|
|
|
|
# /^(redo|last|next|goto)$/ |
4754
|
|
|
|
|
|
|
elsif (( $last_nonblank_type eq 'k' ) |
4755
|
|
|
|
|
|
|
&& ( $is_redo_last_next_goto{$last_nonblank_token} ) ) |
4756
|
|
|
|
|
|
|
{ |
4757
|
19
|
|
|
|
|
54
|
$type = 'j'; |
4758
|
|
|
|
|
|
|
} |
4759
|
|
|
|
|
|
|
|
4760
|
|
|
|
|
|
|
# something else -- |
4761
|
|
|
|
|
|
|
else { |
4762
|
946
|
|
|
|
|
3254
|
$self->do_UNKNOWN_BAREWORD($next_nonblank_token); |
4763
|
|
|
|
|
|
|
} |
4764
|
|
|
|
|
|
|
|
4765
|
5793
|
|
|
|
|
11682
|
return $is_last; |
4766
|
|
|
|
|
|
|
|
4767
|
|
|
|
|
|
|
} ## end sub do_BAREWORD |
4768
|
|
|
|
|
|
|
|
4769
|
|
|
|
|
|
|
sub do_FOLLOW_QUOTE { |
4770
|
|
|
|
|
|
|
|
4771
|
2768
|
|
|
2768
|
0
|
4389
|
my $self = shift; |
4772
|
|
|
|
|
|
|
|
4773
|
|
|
|
|
|
|
# Continue following a quote on a new line |
4774
|
2768
|
|
|
|
|
4571
|
$type = $quote_type; |
4775
|
|
|
|
|
|
|
|
4776
|
2768
|
100
|
|
|
|
4066
|
if ( !@{$routput_token_list} ) { # initialize if continuation line |
|
2768
|
|
|
|
|
7093
|
|
4777
|
184
|
|
|
|
|
384
|
push( @{$routput_token_list}, $i ); |
|
184
|
|
|
|
|
398
|
|
4778
|
184
|
|
|
|
|
475
|
$routput_token_type->[$i] = $type; |
4779
|
|
|
|
|
|
|
|
4780
|
|
|
|
|
|
|
} |
4781
|
|
|
|
|
|
|
|
4782
|
|
|
|
|
|
|
# scan for the end of the quote or pattern |
4783
|
|
|
|
|
|
|
( |
4784
|
2768
|
|
|
|
|
8854
|
$i, |
4785
|
|
|
|
|
|
|
$in_quote, |
4786
|
|
|
|
|
|
|
$quote_character, |
4787
|
|
|
|
|
|
|
$quote_pos, |
4788
|
|
|
|
|
|
|
$quote_depth, |
4789
|
|
|
|
|
|
|
$quoted_string_1, |
4790
|
|
|
|
|
|
|
$quoted_string_2, |
4791
|
|
|
|
|
|
|
|
4792
|
|
|
|
|
|
|
) = $self->do_quote( |
4793
|
|
|
|
|
|
|
|
4794
|
|
|
|
|
|
|
$i, |
4795
|
|
|
|
|
|
|
$in_quote, |
4796
|
|
|
|
|
|
|
$quote_character, |
4797
|
|
|
|
|
|
|
$quote_pos, |
4798
|
|
|
|
|
|
|
$quote_depth, |
4799
|
|
|
|
|
|
|
$quoted_string_1, |
4800
|
|
|
|
|
|
|
$quoted_string_2, |
4801
|
|
|
|
|
|
|
$rtokens, |
4802
|
|
|
|
|
|
|
$rtoken_map, |
4803
|
|
|
|
|
|
|
$max_token_index, |
4804
|
|
|
|
|
|
|
|
4805
|
|
|
|
|
|
|
); |
4806
|
|
|
|
|
|
|
|
4807
|
|
|
|
|
|
|
# all done if we didn't find it |
4808
|
2768
|
100
|
|
|
|
6930
|
if ($in_quote) { return } |
|
183
|
|
|
|
|
398
|
|
4809
|
|
|
|
|
|
|
|
4810
|
|
|
|
|
|
|
# save pattern and replacement text for rescanning |
4811
|
2585
|
|
|
|
|
4350
|
my $qs1 = $quoted_string_1; |
4812
|
|
|
|
|
|
|
|
4813
|
|
|
|
|
|
|
# re-initialize for next search |
4814
|
2585
|
|
|
|
|
4052
|
$quote_character = EMPTY_STRING; |
4815
|
2585
|
|
|
|
|
3704
|
$quote_pos = 0; |
4816
|
2585
|
|
|
|
|
3974
|
$quote_type = 'Q'; |
4817
|
2585
|
|
|
|
|
3831
|
$quoted_string_1 = EMPTY_STRING; |
4818
|
2585
|
|
|
|
|
3911
|
$quoted_string_2 = EMPTY_STRING; |
4819
|
2585
|
100
|
|
|
|
6397
|
if ( ++$i > $max_token_index ) { return } |
|
116
|
|
|
|
|
376
|
|
4820
|
|
|
|
|
|
|
|
4821
|
|
|
|
|
|
|
# look for any modifiers |
4822
|
2469
|
100
|
|
|
|
5335
|
if ($allowed_quote_modifiers) { |
4823
|
|
|
|
|
|
|
|
4824
|
|
|
|
|
|
|
# check for exact quote modifiers |
4825
|
144
|
100
|
|
|
|
750
|
if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) { |
4826
|
30
|
|
|
|
|
63
|
my $str = $rtokens->[$i]; |
4827
|
30
|
|
|
|
|
55
|
my $saw_modifier_e; |
4828
|
30
|
|
|
|
|
521
|
while ( $str =~ /\G$allowed_quote_modifiers/gc ) { |
4829
|
47
|
|
|
|
|
110
|
my $pos = pos($str); |
4830
|
47
|
|
|
|
|
111
|
my $char = substr( $str, $pos - 1, 1 ); |
4831
|
47
|
|
66
|
|
|
301
|
$saw_modifier_e ||= ( $char eq 'e' ); |
4832
|
|
|
|
|
|
|
} |
4833
|
|
|
|
|
|
|
|
4834
|
|
|
|
|
|
|
# For an 'e' quote modifier we must scan the replacement |
4835
|
|
|
|
|
|
|
# text for here-doc targets... |
4836
|
|
|
|
|
|
|
# but if the modifier starts a new line we can skip |
4837
|
|
|
|
|
|
|
# this because either the here doc will be fully |
4838
|
|
|
|
|
|
|
# contained in the replacement text (so we can |
4839
|
|
|
|
|
|
|
# ignore it) or Perl will not find it. |
4840
|
|
|
|
|
|
|
# See test 'here2.in'. |
4841
|
30
|
50
|
66
|
|
|
160
|
if ( $saw_modifier_e && $i_tok >= 0 ) { |
4842
|
|
|
|
|
|
|
|
4843
|
0
|
|
|
|
|
0
|
my $rht = $self->scan_replacement_text($qs1); |
4844
|
|
|
|
|
|
|
|
4845
|
|
|
|
|
|
|
# Change type from 'Q' to 'h' for quotes with |
4846
|
|
|
|
|
|
|
# here-doc targets so that the formatter (see sub |
4847
|
|
|
|
|
|
|
# process_line_of_CODE) will not make any line |
4848
|
|
|
|
|
|
|
# breaks after this point. |
4849
|
0
|
0
|
|
|
|
0
|
if ($rht) { |
4850
|
0
|
|
|
|
|
0
|
push @{$rhere_target_list}, @{$rht}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
4851
|
0
|
|
|
|
|
0
|
$type = 'h'; |
4852
|
0
|
0
|
|
|
|
0
|
if ( $i_tok < 0 ) { |
4853
|
0
|
|
|
|
|
0
|
my $ilast = $routput_token_list->[-1]; |
4854
|
0
|
|
|
|
|
0
|
$routput_token_type->[$ilast] = $type; |
4855
|
|
|
|
|
|
|
} |
4856
|
|
|
|
|
|
|
} |
4857
|
|
|
|
|
|
|
} |
4858
|
|
|
|
|
|
|
|
4859
|
30
|
50
|
|
|
|
110
|
if ( defined( pos($str) ) ) { |
4860
|
|
|
|
|
|
|
|
4861
|
|
|
|
|
|
|
# matched |
4862
|
30
|
50
|
|
|
|
97
|
if ( pos($str) == length($str) ) { |
4863
|
30
|
50
|
|
|
|
128
|
if ( ++$i > $max_token_index ) { return } |
|
0
|
|
|
|
|
0
|
|
4864
|
|
|
|
|
|
|
} |
4865
|
|
|
|
|
|
|
|
4866
|
|
|
|
|
|
|
# Looks like a joined quote modifier |
4867
|
|
|
|
|
|
|
# and keyword, maybe something like |
4868
|
|
|
|
|
|
|
# s/xxx/yyy/gefor @k=... |
4869
|
|
|
|
|
|
|
# Example is "galgen.pl". Would have to split |
4870
|
|
|
|
|
|
|
# the word and insert a new token in the |
4871
|
|
|
|
|
|
|
# pre-token list. This is so rare that I haven't |
4872
|
|
|
|
|
|
|
# done it. Will just issue a warning citation. |
4873
|
|
|
|
|
|
|
|
4874
|
|
|
|
|
|
|
# This error might also be triggered if my quote |
4875
|
|
|
|
|
|
|
# modifier characters are incomplete |
4876
|
|
|
|
|
|
|
else { |
4877
|
0
|
|
|
|
|
0
|
$self->warning(<<EOM); |
4878
|
|
|
|
|
|
|
|
4879
|
|
|
|
|
|
|
Partial match to quote modifier $allowed_quote_modifiers at word: '$str' |
4880
|
|
|
|
|
|
|
Please put a space between quote modifiers and trailing keywords. |
4881
|
|
|
|
|
|
|
EOM |
4882
|
|
|
|
|
|
|
|
4883
|
|
|
|
|
|
|
# print "token $rtokens->[$i]\n"; |
4884
|
|
|
|
|
|
|
# my $num = length($str) - pos($str); |
4885
|
|
|
|
|
|
|
# $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num); |
4886
|
|
|
|
|
|
|
# print "continuing with new token $rtokens->[$i]\n"; |
4887
|
|
|
|
|
|
|
|
4888
|
|
|
|
|
|
|
# skipping past this token does least damage |
4889
|
0
|
0
|
|
|
|
0
|
if ( ++$i > $max_token_index ) { return } |
|
0
|
|
|
|
|
0
|
|
4890
|
|
|
|
|
|
|
} |
4891
|
|
|
|
|
|
|
} |
4892
|
|
|
|
|
|
|
else { |
4893
|
|
|
|
|
|
|
|
4894
|
|
|
|
|
|
|
# example file: rokicki4.pl |
4895
|
|
|
|
|
|
|
# This error might also be triggered if my quote |
4896
|
|
|
|
|
|
|
# modifier characters are incomplete |
4897
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
4898
|
|
|
|
|
|
|
"Note: found word $str at quote modifier location\n"); |
4899
|
|
|
|
|
|
|
} |
4900
|
|
|
|
|
|
|
} |
4901
|
|
|
|
|
|
|
|
4902
|
|
|
|
|
|
|
# re-initialize |
4903
|
144
|
|
|
|
|
298
|
$allowed_quote_modifiers = EMPTY_STRING; |
4904
|
|
|
|
|
|
|
} |
4905
|
2469
|
|
|
|
|
4483
|
return; |
4906
|
|
|
|
|
|
|
} ## end sub do_FOLLOW_QUOTE |
4907
|
|
|
|
|
|
|
|
4908
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4909
|
|
|
|
|
|
|
# begin hash of code for handling most token types |
4910
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4911
|
|
|
|
|
|
|
my $tokenization_code = { |
4912
|
|
|
|
|
|
|
|
4913
|
|
|
|
|
|
|
'>' => \&do_GREATER_THAN_SIGN, |
4914
|
|
|
|
|
|
|
'|' => \&do_VERTICAL_LINE, |
4915
|
|
|
|
|
|
|
'$' => \&do_DOLLAR_SIGN, |
4916
|
|
|
|
|
|
|
'(' => \&do_LEFT_PARENTHESIS, |
4917
|
|
|
|
|
|
|
')' => \&do_RIGHT_PARENTHESIS, |
4918
|
|
|
|
|
|
|
',' => \&do_COMMA, |
4919
|
|
|
|
|
|
|
';' => \&do_SEMICOLON, |
4920
|
|
|
|
|
|
|
'"' => \&do_QUOTATION_MARK, |
4921
|
|
|
|
|
|
|
"'" => \&do_APOSTROPHE, |
4922
|
|
|
|
|
|
|
'`' => \&do_BACKTICK, |
4923
|
|
|
|
|
|
|
'/' => \&do_SLASH, |
4924
|
|
|
|
|
|
|
'{' => \&do_LEFT_CURLY_BRACKET, |
4925
|
|
|
|
|
|
|
'}' => \&do_RIGHT_CURLY_BRACKET, |
4926
|
|
|
|
|
|
|
'&' => \&do_AMPERSAND, |
4927
|
|
|
|
|
|
|
'<' => \&do_LESS_THAN_SIGN, |
4928
|
|
|
|
|
|
|
'?' => \&do_QUESTION_MARK, |
4929
|
|
|
|
|
|
|
'*' => \&do_STAR, |
4930
|
|
|
|
|
|
|
'.' => \&do_DOT, |
4931
|
|
|
|
|
|
|
':' => \&do_COLON, |
4932
|
|
|
|
|
|
|
'+' => \&do_PLUS_SIGN, |
4933
|
|
|
|
|
|
|
'@' => \&do_AT_SIGN, |
4934
|
|
|
|
|
|
|
'%' => \&do_PERCENT_SIGN, |
4935
|
|
|
|
|
|
|
'[' => \&do_LEFT_SQUARE_BRACKET, |
4936
|
|
|
|
|
|
|
']' => \&do_RIGHT_SQUARE_BRACKET, |
4937
|
|
|
|
|
|
|
'-' => \&do_MINUS_SIGN, |
4938
|
|
|
|
|
|
|
'^' => \&do_CARAT_SIGN, |
4939
|
|
|
|
|
|
|
'::' => \&do_DOUBLE_COLON, |
4940
|
|
|
|
|
|
|
'<<' => \&do_LEFT_SHIFT, |
4941
|
|
|
|
|
|
|
'<<~' => \&do_NEW_HERE_DOC, |
4942
|
|
|
|
|
|
|
'->' => \&do_POINTER, |
4943
|
|
|
|
|
|
|
'++' => \&do_PLUS_PLUS, |
4944
|
|
|
|
|
|
|
'=>' => \&do_FAT_COMMA, |
4945
|
|
|
|
|
|
|
'--' => \&do_MINUS_MINUS, |
4946
|
|
|
|
|
|
|
'&&' => \&do_LOGICAL_AND, |
4947
|
|
|
|
|
|
|
'||' => \&do_LOGICAL_OR, |
4948
|
|
|
|
|
|
|
'//' => \&do_SLASH_SLASH, |
4949
|
|
|
|
|
|
|
|
4950
|
|
|
|
|
|
|
# No special code for these types yet, but syntax checks |
4951
|
|
|
|
|
|
|
# could be added. |
4952
|
|
|
|
|
|
|
## '!' => undef, |
4953
|
|
|
|
|
|
|
## '!=' => undef, |
4954
|
|
|
|
|
|
|
## '!~' => undef, |
4955
|
|
|
|
|
|
|
## '%=' => undef, |
4956
|
|
|
|
|
|
|
## '&&=' => undef, |
4957
|
|
|
|
|
|
|
## '&=' => undef, |
4958
|
|
|
|
|
|
|
## '+=' => undef, |
4959
|
|
|
|
|
|
|
## '-=' => undef, |
4960
|
|
|
|
|
|
|
## '..' => undef, |
4961
|
|
|
|
|
|
|
## '..' => undef, |
4962
|
|
|
|
|
|
|
## '...' => undef, |
4963
|
|
|
|
|
|
|
## '.=' => undef, |
4964
|
|
|
|
|
|
|
## '<<=' => undef, |
4965
|
|
|
|
|
|
|
## '<=' => undef, |
4966
|
|
|
|
|
|
|
## '<=>' => undef, |
4967
|
|
|
|
|
|
|
## '<>' => undef, |
4968
|
|
|
|
|
|
|
## '=' => undef, |
4969
|
|
|
|
|
|
|
## '==' => undef, |
4970
|
|
|
|
|
|
|
## '=~' => undef, |
4971
|
|
|
|
|
|
|
## '>=' => undef, |
4972
|
|
|
|
|
|
|
## '>>' => undef, |
4973
|
|
|
|
|
|
|
## '>>=' => undef, |
4974
|
|
|
|
|
|
|
## '\\' => undef, |
4975
|
|
|
|
|
|
|
## '^=' => undef, |
4976
|
|
|
|
|
|
|
## '|=' => undef, |
4977
|
|
|
|
|
|
|
## '||=' => undef, |
4978
|
|
|
|
|
|
|
## '//=' => undef, |
4979
|
|
|
|
|
|
|
## '~' => undef, |
4980
|
|
|
|
|
|
|
## '~~' => undef, |
4981
|
|
|
|
|
|
|
## '!~~' => undef, |
4982
|
|
|
|
|
|
|
|
4983
|
|
|
|
|
|
|
}; |
4984
|
|
|
|
|
|
|
|
4985
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4986
|
|
|
|
|
|
|
# end hash of code for handling individual token types |
4987
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
4988
|
|
|
|
|
|
|
|
4989
|
39
|
|
|
39
|
|
412
|
use constant DEBUG_TOKENIZE => 0; |
|
39
|
|
|
|
|
157
|
|
|
39
|
|
|
|
|
134030
|
|
4990
|
|
|
|
|
|
|
|
4991
|
|
|
|
|
|
|
sub tokenize_this_line { |
4992
|
|
|
|
|
|
|
|
4993
|
|
|
|
|
|
|
# This routine breaks a line of perl code into tokens which are of use in |
4994
|
|
|
|
|
|
|
# indentation and reformatting. One of my goals has been to define tokens |
4995
|
|
|
|
|
|
|
# such that a newline may be inserted between any pair of tokens without |
4996
|
|
|
|
|
|
|
# changing or invalidating the program. This version comes close to this, |
4997
|
|
|
|
|
|
|
# although there are necessarily a few exceptions which must be caught by |
4998
|
|
|
|
|
|
|
# the formatter. Many of these involve the treatment of bare words. |
4999
|
|
|
|
|
|
|
# |
5000
|
|
|
|
|
|
|
# The tokens and their types are returned in arrays. See previous |
5001
|
|
|
|
|
|
|
# routine for their names. |
5002
|
|
|
|
|
|
|
# |
5003
|
|
|
|
|
|
|
# See also the array "valid_token_types" in the BEGIN section for an |
5004
|
|
|
|
|
|
|
# up-to-date list. |
5005
|
|
|
|
|
|
|
# |
5006
|
|
|
|
|
|
|
# To simplify things, token types are either a single character, or they |
5007
|
|
|
|
|
|
|
# are identical to the tokens themselves. |
5008
|
|
|
|
|
|
|
# |
5009
|
|
|
|
|
|
|
# As a debugging aid, the -D flag creates a file containing a side-by-side |
5010
|
|
|
|
|
|
|
# comparison of the input string and its tokenization for each line of a file. |
5011
|
|
|
|
|
|
|
# This is an invaluable debugging aid. |
5012
|
|
|
|
|
|
|
# |
5013
|
|
|
|
|
|
|
# In addition to tokens, and some associated quantities, the tokenizer |
5014
|
|
|
|
|
|
|
# also returns flags indication any special line types. These include |
5015
|
|
|
|
|
|
|
# quotes, here_docs, formats. |
5016
|
|
|
|
|
|
|
# |
5017
|
|
|
|
|
|
|
# ----------------------------------------------------------------------- |
5018
|
|
|
|
|
|
|
# |
5019
|
|
|
|
|
|
|
# How to add NEW_TOKENS: |
5020
|
|
|
|
|
|
|
# |
5021
|
|
|
|
|
|
|
# New token types will undoubtedly be needed in the future both to keep up |
5022
|
|
|
|
|
|
|
# with changes in perl and to help adapt the tokenizer to other applications. |
5023
|
|
|
|
|
|
|
# |
5024
|
|
|
|
|
|
|
# Here are some notes on the minimal steps. I wrote these notes while |
5025
|
|
|
|
|
|
|
# adding the 'v' token type for v-strings, which are things like version |
5026
|
|
|
|
|
|
|
# numbers 5.6.0, and ip addresses, and will use that as an example. ( You |
5027
|
|
|
|
|
|
|
# can use your editor to search for the string "NEW_TOKENS" to find the |
5028
|
|
|
|
|
|
|
# appropriate sections to change): |
5029
|
|
|
|
|
|
|
# |
5030
|
|
|
|
|
|
|
# *. Try to talk somebody else into doing it! If not, .. |
5031
|
|
|
|
|
|
|
# |
5032
|
|
|
|
|
|
|
# *. Make a backup of your current version in case things don't work out! |
5033
|
|
|
|
|
|
|
# |
5034
|
|
|
|
|
|
|
# *. Think of a new, unused character for the token type, and add to |
5035
|
|
|
|
|
|
|
# the array @valid_token_types in the BEGIN section of this package. |
5036
|
|
|
|
|
|
|
# For example, I used 'v' for v-strings. |
5037
|
|
|
|
|
|
|
# |
5038
|
|
|
|
|
|
|
# *. Implement coding to recognize the $type of the token in this routine. |
5039
|
|
|
|
|
|
|
# This is the hardest part, and is best done by imitating or modifying |
5040
|
|
|
|
|
|
|
# some of the existing coding. For example, to recognize v-strings, I |
5041
|
|
|
|
|
|
|
# patched 'sub scan_bare_identifier' to recognize v-strings beginning with |
5042
|
|
|
|
|
|
|
# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. |
5043
|
|
|
|
|
|
|
# |
5044
|
|
|
|
|
|
|
# *. Update sub operator_expected. This update is critically important but |
5045
|
|
|
|
|
|
|
# the coding is trivial. Look at the comments in that routine for help. |
5046
|
|
|
|
|
|
|
# For v-strings, which should behave like numbers, I just added 'v' to the |
5047
|
|
|
|
|
|
|
# regex used to handle numbers and strings (types 'n' and 'Q'). |
5048
|
|
|
|
|
|
|
# |
5049
|
|
|
|
|
|
|
# *. Implement a 'bond strength' rule in sub set_bond_strengths in |
5050
|
|
|
|
|
|
|
# Perl::Tidy::Formatter for breaking lines around this token type. You can |
5051
|
|
|
|
|
|
|
# skip this step and take the default at first, then adjust later to get |
5052
|
|
|
|
|
|
|
# desired results. For adding type 'v', I looked at sub bond_strength and |
5053
|
|
|
|
|
|
|
# saw that number type 'n' was using default strengths, so I didn't do |
5054
|
|
|
|
|
|
|
# anything. I may tune it up someday if I don't like the way line |
5055
|
|
|
|
|
|
|
# breaks with v-strings look. |
5056
|
|
|
|
|
|
|
# |
5057
|
|
|
|
|
|
|
# *. Implement a 'whitespace' rule in sub set_whitespace_flags in |
5058
|
|
|
|
|
|
|
# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine |
5059
|
|
|
|
|
|
|
# and saw that type 'n' used spaces on both sides, so I just added 'v' |
5060
|
|
|
|
|
|
|
# to the array @spaces_both_sides. |
5061
|
|
|
|
|
|
|
# |
5062
|
|
|
|
|
|
|
# *. Update HtmlWriter package so that users can colorize the token as |
5063
|
|
|
|
|
|
|
# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in |
5064
|
|
|
|
|
|
|
# that package. For v-strings, I initially chose to use a default color |
5065
|
|
|
|
|
|
|
# equal to the default for numbers, but it might be nice to change that |
5066
|
|
|
|
|
|
|
# eventually. |
5067
|
|
|
|
|
|
|
# |
5068
|
|
|
|
|
|
|
# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. |
5069
|
|
|
|
|
|
|
# |
5070
|
|
|
|
|
|
|
# *. Run lots and lots of debug tests. Start with special files designed |
5071
|
|
|
|
|
|
|
# to test the new token type. Run with the -D flag to create a .DEBUG |
5072
|
|
|
|
|
|
|
# file which shows the tokenization. When these work ok, test as many old |
5073
|
|
|
|
|
|
|
# scripts as possible. Start with all of the '.t' files in the 'test' |
5074
|
|
|
|
|
|
|
# directory of the distribution file. Compare .tdy output with previous |
5075
|
|
|
|
|
|
|
# version and updated version to see the differences. Then include as |
5076
|
|
|
|
|
|
|
# many more files as possible. My own technique has been to collect a huge |
5077
|
|
|
|
|
|
|
# number of perl scripts (thousands!) into one directory and run perltidy |
5078
|
|
|
|
|
|
|
# *, then run diff between the output of the previous version and the |
5079
|
|
|
|
|
|
|
# current version. |
5080
|
|
|
|
|
|
|
# |
5081
|
|
|
|
|
|
|
# *. For another example, search for the smartmatch operator '~~' |
5082
|
|
|
|
|
|
|
# with your editor to see where updates were made for it. |
5083
|
|
|
|
|
|
|
# |
5084
|
|
|
|
|
|
|
# ----------------------------------------------------------------------- |
5085
|
|
|
|
|
|
|
|
5086
|
7510
|
|
|
7510
|
0
|
15198
|
my ( $self, $line_of_tokens ) = @_; |
5087
|
7510
|
|
|
|
|
14684
|
my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; |
5088
|
|
|
|
|
|
|
|
5089
|
|
|
|
|
|
|
# Extract line number for use in error messages |
5090
|
7510
|
|
|
|
|
12413
|
$input_line_number = $line_of_tokens->{_line_number}; |
5091
|
|
|
|
|
|
|
|
5092
|
|
|
|
|
|
|
# Check for pod documentation |
5093
|
7510
|
100
|
66
|
|
|
20124
|
if ( substr( $untrimmed_input_line, 0, 1 ) eq '=' |
5094
|
|
|
|
|
|
|
&& $untrimmed_input_line =~ /^=[A-Za-z_]/ ) |
5095
|
|
|
|
|
|
|
{ |
5096
|
|
|
|
|
|
|
|
5097
|
|
|
|
|
|
|
# Must not be in multi-line quote |
5098
|
|
|
|
|
|
|
# and must not be in an equation |
5099
|
14
|
|
|
|
|
38
|
my $blank_after_Z = 1; |
5100
|
14
|
50
|
33
|
|
|
109
|
if ( |
5101
|
|
|
|
|
|
|
!$in_quote |
5102
|
|
|
|
|
|
|
&& ( $self->operator_expected( '=', 'b', $blank_after_Z ) == |
5103
|
|
|
|
|
|
|
TERM ) |
5104
|
|
|
|
|
|
|
) |
5105
|
|
|
|
|
|
|
{ |
5106
|
14
|
|
|
|
|
38
|
$self->[_in_pod_] = 1; |
5107
|
14
|
|
|
|
|
34
|
return; |
5108
|
|
|
|
|
|
|
} |
5109
|
|
|
|
|
|
|
} |
5110
|
|
|
|
|
|
|
|
5111
|
7496
|
|
|
|
|
13911
|
$input_line = $untrimmed_input_line; |
5112
|
|
|
|
|
|
|
|
5113
|
7496
|
|
|
|
|
13780
|
chomp $input_line; |
5114
|
|
|
|
|
|
|
|
5115
|
|
|
|
|
|
|
# Set a flag to indicate if we might be at an __END__ or __DATA__ line |
5116
|
|
|
|
|
|
|
# This will be used below to avoid quoting a bare word followed by |
5117
|
|
|
|
|
|
|
# a fat comma. |
5118
|
7496
|
|
|
|
|
10905
|
my $is_END_or_DATA; |
5119
|
|
|
|
|
|
|
|
5120
|
|
|
|
|
|
|
# Reinitialize the multi-line quote flag |
5121
|
7496
|
100
|
100
|
|
|
19124
|
if ( $in_quote && $quote_type eq 'Q' ) { |
5122
|
47
|
|
|
|
|
148
|
$line_of_tokens->{_starting_in_quote} = 1; |
5123
|
|
|
|
|
|
|
} |
5124
|
|
|
|
|
|
|
else { |
5125
|
7449
|
|
|
|
|
12676
|
$line_of_tokens->{_starting_in_quote} = 0; |
5126
|
|
|
|
|
|
|
|
5127
|
|
|
|
|
|
|
# Trim start of this line unless we are continuing a quoted line. |
5128
|
|
|
|
|
|
|
# Do not trim end because we might end in a quote (test: deken4.pl) |
5129
|
|
|
|
|
|
|
# Perl::Tidy::Formatter will delete needless trailing blanks |
5130
|
7449
|
100
|
|
|
|
34073
|
if ( !length($input_line) ) { |
|
|
100
|
|
|
|
|
|
5131
|
|
|
|
|
|
|
|
5132
|
|
|
|
|
|
|
# line is empty |
5133
|
|
|
|
|
|
|
} |
5134
|
|
|
|
|
|
|
elsif ( $input_line =~ m/\S/g ) { |
5135
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
# There are $spaces blank characters before a nonblank character |
5137
|
6643
|
|
|
|
|
13170
|
my $spaces = pos($input_line) - 1; |
5138
|
6643
|
100
|
|
|
|
15364
|
if ( $spaces > 0 ) { |
5139
|
|
|
|
|
|
|
|
5140
|
|
|
|
|
|
|
# Trim the leading spaces |
5141
|
3541
|
|
|
|
|
9010
|
$input_line = substr( $input_line, $spaces ); |
5142
|
|
|
|
|
|
|
|
5143
|
|
|
|
|
|
|
# Find actual space count if there are leading tabs |
5144
|
3541
|
100
|
66
|
|
|
12825
|
if ( |
5145
|
|
|
|
|
|
|
ord( substr( $untrimmed_input_line, 0, 1 ) ) == ORD_TAB |
5146
|
|
|
|
|
|
|
&& $untrimmed_input_line =~ /^(\t+)/ ) |
5147
|
|
|
|
|
|
|
{ |
5148
|
213
|
|
|
|
|
455
|
my $tabsize = $self->[_tabsize_]; |
5149
|
213
|
|
|
|
|
605
|
$spaces += length($1) * ( $tabsize - 1 ); |
5150
|
|
|
|
|
|
|
} |
5151
|
|
|
|
|
|
|
|
5152
|
|
|
|
|
|
|
# Calculate a guessed level for nonblank lines to avoid |
5153
|
|
|
|
|
|
|
# calls to sub guess_old_indentation_level() |
5154
|
3541
|
|
|
|
|
7194
|
my $indent_columns = $self->[_indent_columns_]; |
5155
|
|
|
|
|
|
|
$line_of_tokens->{_guessed_indentation_level} = |
5156
|
3541
|
|
|
|
|
10218
|
int( $spaces / $indent_columns ); |
5157
|
|
|
|
|
|
|
} |
5158
|
|
|
|
|
|
|
} |
5159
|
|
|
|
|
|
|
else { |
5160
|
|
|
|
|
|
|
|
5161
|
|
|
|
|
|
|
# line has all blank characters |
5162
|
9
|
|
|
|
|
37
|
$input_line = EMPTY_STRING; |
5163
|
|
|
|
|
|
|
} |
5164
|
|
|
|
|
|
|
|
5165
|
|
|
|
|
|
|
} |
5166
|
|
|
|
|
|
|
|
5167
|
7496
|
100
|
|
|
|
16380
|
if ( !$in_quote ) { |
5168
|
|
|
|
|
|
|
|
5169
|
|
|
|
|
|
|
# Optimize handling of a blank line |
5170
|
7312
|
100
|
|
|
|
16299
|
if ( !length($input_line) ) { |
5171
|
806
|
|
|
|
|
2275
|
$line_of_tokens->{_line_type} = 'CODE'; |
5172
|
806
|
|
|
|
|
1835
|
$line_of_tokens->{_rtokens} = []; |
5173
|
806
|
|
|
|
|
2061
|
$line_of_tokens->{_rtoken_type} = []; |
5174
|
806
|
|
|
|
|
1774
|
$line_of_tokens->{_rlevels} = []; |
5175
|
806
|
|
|
|
|
1679
|
$line_of_tokens->{_rci_levels} = []; |
5176
|
806
|
|
|
|
|
1670
|
$line_of_tokens->{_rblock_type} = []; |
5177
|
806
|
|
|
|
|
1703
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
5178
|
806
|
|
|
|
|
2236
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string; |
5179
|
806
|
|
|
|
|
1694
|
return; |
5180
|
|
|
|
|
|
|
} |
5181
|
|
|
|
|
|
|
|
5182
|
|
|
|
|
|
|
# Check comments |
5183
|
6506
|
100
|
|
|
|
15435
|
if ( substr( $input_line, 0, 1 ) eq '#' ) { |
5184
|
|
|
|
|
|
|
|
5185
|
|
|
|
|
|
|
# and check for skipped section |
5186
|
788
|
50
|
66
|
|
|
4623
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
5187
|
|
|
|
|
|
|
( |
5188
|
|
|
|
|
|
|
substr( $input_line, 0, 4 ) eq '#<<V' |
5189
|
|
|
|
|
|
|
|| $rOpts_code_skipping_begin |
5190
|
|
|
|
|
|
|
) |
5191
|
|
|
|
|
|
|
&& $rOpts_code_skipping |
5192
|
|
|
|
|
|
|
&& $input_line =~ /$code_skipping_pattern_begin/ |
5193
|
|
|
|
|
|
|
) |
5194
|
|
|
|
|
|
|
{ |
5195
|
2
|
|
|
|
|
9
|
$self->[_in_skipped_] = $self->[_last_line_number_]; |
5196
|
2
|
|
|
|
|
6
|
return; |
5197
|
|
|
|
|
|
|
} |
5198
|
|
|
|
|
|
|
|
5199
|
|
|
|
|
|
|
# Optional fast processing of a block comment |
5200
|
786
|
|
|
|
|
1766
|
$line_of_tokens->{_line_type} = 'CODE'; |
5201
|
786
|
|
|
|
|
3294
|
$line_of_tokens->{_rtokens} = [$input_line]; |
5202
|
786
|
|
|
|
|
2248
|
$line_of_tokens->{_rtoken_type} = ['#']; |
5203
|
786
|
|
|
|
|
2108
|
$line_of_tokens->{_rlevels} = [$level_in_tokenizer]; |
5204
|
786
|
|
|
|
|
1847
|
$line_of_tokens->{_rci_levels} = [0]; |
5205
|
786
|
|
|
|
|
2066
|
$line_of_tokens->{_rblock_type} = [EMPTY_STRING]; |
5206
|
786
|
|
|
|
|
1798
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
5207
|
786
|
|
|
|
|
2612
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string; |
5208
|
786
|
|
|
|
|
1771
|
return; |
5209
|
|
|
|
|
|
|
} |
5210
|
|
|
|
|
|
|
|
5211
|
|
|
|
|
|
|
# Look for __END__ or __DATA__ lines |
5212
|
5718
|
100
|
100
|
|
|
16018
|
if ( substr( $input_line, 0, 1 ) eq '_' |
5213
|
|
|
|
|
|
|
&& $input_line =~ /^__(END|DATA)__\s*$/ ) |
5214
|
|
|
|
|
|
|
{ |
5215
|
7
|
|
|
|
|
18
|
$is_END_or_DATA = 1; |
5216
|
|
|
|
|
|
|
} |
5217
|
|
|
|
|
|
|
} |
5218
|
|
|
|
|
|
|
|
5219
|
|
|
|
|
|
|
# update the copy of the line for use in error messages |
5220
|
|
|
|
|
|
|
# This must be exactly what we give the pre_tokenizer |
5221
|
5902
|
|
|
|
|
10954
|
$self->[_line_of_text_] = $input_line; |
5222
|
|
|
|
|
|
|
|
5223
|
|
|
|
|
|
|
# re-initialize for the main loop |
5224
|
5902
|
|
|
|
|
14186
|
$routput_token_list = []; # stack of output token indexes |
5225
|
5902
|
|
|
|
|
17994
|
$routput_token_type = []; # token types |
5226
|
5902
|
|
|
|
|
16534
|
$routput_block_type = []; # types of code block |
5227
|
5902
|
|
|
|
|
15616
|
$routput_container_type = []; # paren types, such as if, elsif, .. |
5228
|
5902
|
|
|
|
|
14729
|
$routput_type_sequence = []; # nesting sequential number |
5229
|
|
|
|
|
|
|
|
5230
|
5902
|
|
|
|
|
9635
|
$rhere_target_list = []; |
5231
|
|
|
|
|
|
|
|
5232
|
5902
|
|
|
|
|
9766
|
$tok = $last_nonblank_token; |
5233
|
5902
|
|
|
|
|
8996
|
$type = $last_nonblank_type; |
5234
|
5902
|
|
|
|
|
9079
|
$prototype = $last_nonblank_prototype; |
5235
|
5902
|
|
|
|
|
8845
|
$last_nonblank_i = -1; |
5236
|
5902
|
|
|
|
|
8940
|
$block_type = $last_nonblank_block_type; |
5237
|
5902
|
|
|
|
|
8807
|
$container_type = $last_nonblank_container_type; |
5238
|
5902
|
|
|
|
|
8618
|
$type_sequence = $last_nonblank_type_sequence; |
5239
|
5902
|
|
|
|
|
8371
|
$indent_flag = 0; |
5240
|
5902
|
|
|
|
|
8496
|
$peeked_ahead = 0; |
5241
|
|
|
|
|
|
|
|
5242
|
5902
|
|
|
|
|
18210
|
$self->tokenizer_main_loop($is_END_or_DATA); |
5243
|
|
|
|
|
|
|
|
5244
|
|
|
|
|
|
|
#----------------------------------------------- |
5245
|
|
|
|
|
|
|
# all done tokenizing this line ... |
5246
|
|
|
|
|
|
|
# now prepare the final list of tokens and types |
5247
|
|
|
|
|
|
|
#----------------------------------------------- |
5248
|
5902
|
|
|
|
|
18260
|
$self->tokenizer_wrapup_line($line_of_tokens); |
5249
|
|
|
|
|
|
|
|
5250
|
5902
|
|
|
|
|
10832
|
return; |
5251
|
|
|
|
|
|
|
} ## end sub tokenize_this_line |
5252
|
|
|
|
|
|
|
|
5253
|
|
|
|
|
|
|
sub tokenizer_main_loop { |
5254
|
|
|
|
|
|
|
|
5255
|
5902
|
|
|
5902
|
0
|
11845
|
my ( $self, $is_END_or_DATA ) = @_; |
5256
|
|
|
|
|
|
|
|
5257
|
|
|
|
|
|
|
#--------------------------------- |
5258
|
|
|
|
|
|
|
# Break one input line into tokens |
5259
|
|
|
|
|
|
|
#--------------------------------- |
5260
|
|
|
|
|
|
|
|
5261
|
|
|
|
|
|
|
# Input parameter: |
5262
|
|
|
|
|
|
|
# $is_END_or_DATA is true for a __END__ or __DATA__ line |
5263
|
|
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
# start by breaking the line into pre-tokens |
5265
|
5902
|
|
|
|
|
14524
|
( $rtokens, $rtoken_map, $rtoken_type ) = pre_tokenize($input_line); |
5266
|
|
|
|
|
|
|
|
5267
|
5902
|
|
|
|
|
24026
|
$max_token_index = scalar( @{$rtokens} ) - 1; |
|
5902
|
|
|
|
|
10827
|
|
5268
|
5902
|
|
|
|
|
8960
|
push( @{$rtokens}, SPACE, SPACE, SPACE ) |
|
5902
|
|
|
|
|
14800
|
|
5269
|
|
|
|
|
|
|
; # extra whitespace simplifies logic |
5270
|
5902
|
|
|
|
|
8787
|
push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced |
|
5902
|
|
|
|
|
12660
|
|
5271
|
5902
|
|
|
|
|
8480
|
push( @{$rtoken_type}, 'b', 'b', 'b' ); |
|
5902
|
|
|
|
|
12400
|
|
5272
|
|
|
|
|
|
|
|
5273
|
|
|
|
|
|
|
# initialize for main loop |
5274
|
5902
|
|
|
|
|
8449
|
if (0) { #<<< this is not necessary |
5275
|
|
|
|
|
|
|
foreach my $ii ( 0 .. $max_token_index + 3 ) { |
5276
|
|
|
|
|
|
|
$routput_token_type->[$ii] = EMPTY_STRING; |
5277
|
|
|
|
|
|
|
$routput_block_type->[$ii] = EMPTY_STRING; |
5278
|
|
|
|
|
|
|
$routput_container_type->[$ii] = EMPTY_STRING; |
5279
|
|
|
|
|
|
|
$routput_type_sequence->[$ii] = EMPTY_STRING; |
5280
|
|
|
|
|
|
|
$routput_indent_flag->[$ii] = 0; |
5281
|
|
|
|
|
|
|
} |
5282
|
|
|
|
|
|
|
} |
5283
|
|
|
|
|
|
|
|
5284
|
5902
|
|
|
|
|
8715
|
$i = -1; |
5285
|
5902
|
|
|
|
|
8894
|
$i_tok = -1; |
5286
|
|
|
|
|
|
|
|
5287
|
|
|
|
|
|
|
#----------------------- |
5288
|
|
|
|
|
|
|
# main tokenization loop |
5289
|
|
|
|
|
|
|
#----------------------- |
5290
|
|
|
|
|
|
|
|
5291
|
|
|
|
|
|
|
# we are looking at each pre-token of one line and combining them |
5292
|
|
|
|
|
|
|
# into tokens |
5293
|
5902
|
|
|
|
|
13541
|
while ( ++$i <= $max_token_index ) { |
5294
|
|
|
|
|
|
|
|
5295
|
|
|
|
|
|
|
# continue looking for the end of a quote |
5296
|
50796
|
100
|
|
|
|
87393
|
if ($in_quote) { |
5297
|
2768
|
|
|
|
|
9005
|
$self->do_FOLLOW_QUOTE(); |
5298
|
2768
|
100
|
100
|
|
|
11289
|
last if ( $in_quote || $i > $max_token_index ); |
5299
|
|
|
|
|
|
|
} |
5300
|
|
|
|
|
|
|
|
5301
|
50497
|
100
|
100
|
|
|
137316
|
if ( $type ne 'b' && $type ne 'CORE::' ) { |
5302
|
|
|
|
|
|
|
|
5303
|
|
|
|
|
|
|
# try to catch some common errors |
5304
|
35283
|
100
|
100
|
|
|
75902
|
if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { |
5305
|
|
|
|
|
|
|
|
5306
|
1592
|
100
|
|
|
|
4839
|
if ( $last_nonblank_token eq 'eq' ) { |
|
|
50
|
|
|
|
|
|
5307
|
9
|
|
|
|
|
59
|
$self->complain("Should 'eq' be '==' here ?\n"); |
5308
|
|
|
|
|
|
|
} |
5309
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq 'ne' ) { |
5310
|
0
|
|
|
|
|
0
|
$self->complain("Should 'ne' be '!=' here ?\n"); |
5311
|
|
|
|
|
|
|
} |
5312
|
|
|
|
|
|
|
else { |
5313
|
|
|
|
|
|
|
# that's all |
5314
|
|
|
|
|
|
|
} |
5315
|
|
|
|
|
|
|
} |
5316
|
|
|
|
|
|
|
|
5317
|
|
|
|
|
|
|
# fix c090, only rotate vars if a new token will be stored |
5318
|
35283
|
100
|
|
|
|
62955
|
if ( $i_tok >= 0 ) { |
5319
|
|
|
|
|
|
|
|
5320
|
29519
|
|
|
|
|
44184
|
$last_last_nonblank_token = $last_nonblank_token; |
5321
|
29519
|
|
|
|
|
40593
|
$last_last_nonblank_type = $last_nonblank_type; |
5322
|
|
|
|
|
|
|
|
5323
|
29519
|
|
|
|
|
41735
|
$last_nonblank_prototype = $prototype; |
5324
|
29519
|
|
|
|
|
39544
|
$last_nonblank_block_type = $block_type; |
5325
|
29519
|
|
|
|
|
39689
|
$last_nonblank_container_type = $container_type; |
5326
|
29519
|
|
|
|
|
39763
|
$last_nonblank_type_sequence = $type_sequence; |
5327
|
29519
|
|
|
|
|
38589
|
$last_nonblank_i = $i_tok; |
5328
|
|
|
|
|
|
|
|
5329
|
|
|
|
|
|
|
# Fix part #3 for git82: propagate type 'Z' though L-R pair |
5330
|
29519
|
100
|
100
|
|
|
61846
|
if ( !( $type eq 'R' && $last_nonblank_type eq 'Z' ) ) { |
5331
|
29518
|
|
|
|
|
40827
|
$last_nonblank_token = $tok; |
5332
|
29518
|
|
|
|
|
40279
|
$last_nonblank_type = $type; |
5333
|
|
|
|
|
|
|
} |
5334
|
|
|
|
|
|
|
} |
5335
|
|
|
|
|
|
|
|
5336
|
|
|
|
|
|
|
# Patch for c030: Fix things in case a '->' got separated from |
5337
|
|
|
|
|
|
|
# the subsequent identifier by a side comment. We need the |
5338
|
|
|
|
|
|
|
# last_nonblank_token to have a leading -> to avoid triggering |
5339
|
|
|
|
|
|
|
# an operator expected error message at the next '('. See also |
5340
|
|
|
|
|
|
|
# fix for git #63. |
5341
|
35283
|
100
|
|
|
|
64498
|
if ( $last_last_nonblank_token eq '->' ) { |
5342
|
885
|
100
|
66
|
|
|
5104
|
if ( $last_nonblank_type eq 'w' |
5343
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'i' ) |
5344
|
|
|
|
|
|
|
{ |
5345
|
674
|
|
|
|
|
1690
|
$last_nonblank_token = '->' . $last_nonblank_token; |
5346
|
674
|
|
|
|
|
1334
|
$last_nonblank_type = 'i'; |
5347
|
|
|
|
|
|
|
} |
5348
|
|
|
|
|
|
|
} |
5349
|
|
|
|
|
|
|
} |
5350
|
|
|
|
|
|
|
|
5351
|
|
|
|
|
|
|
# store previous token type |
5352
|
50497
|
100
|
|
|
|
86378
|
if ( $i_tok >= 0 ) { |
5353
|
44733
|
|
|
|
|
86656
|
$routput_token_type->[$i_tok] = $type; |
5354
|
44733
|
|
|
|
|
73265
|
$routput_block_type->[$i_tok] = $block_type; |
5355
|
44733
|
|
|
|
|
73133
|
$routput_container_type->[$i_tok] = $container_type; |
5356
|
44733
|
|
|
|
|
70103
|
$routput_type_sequence->[$i_tok] = $type_sequence; |
5357
|
44733
|
|
|
|
|
67550
|
$routput_indent_flag->[$i_tok] = $indent_flag; |
5358
|
|
|
|
|
|
|
} |
5359
|
|
|
|
|
|
|
|
5360
|
|
|
|
|
|
|
# get the next pre-token and type |
5361
|
|
|
|
|
|
|
# $tok and $type will be modified to make the output token |
5362
|
50497
|
|
|
|
|
79628
|
my $pre_tok = $tok = $rtokens->[$i]; # get the next pre-token |
5363
|
50497
|
|
|
|
|
75952
|
my $pre_type = $type = $rtoken_type->[$i]; # and type |
5364
|
|
|
|
|
|
|
|
5365
|
|
|
|
|
|
|
# remember the starting index of this token; we will be updating $i |
5366
|
50497
|
|
|
|
|
67464
|
$i_tok = $i; |
5367
|
|
|
|
|
|
|
|
5368
|
|
|
|
|
|
|
# re-initialize various flags for the next output token |
5369
|
|
|
|
|
|
|
( |
5370
|
|
|
|
|
|
|
|
5371
|
50497
|
|
|
|
|
89767
|
$block_type, |
5372
|
|
|
|
|
|
|
$container_type, |
5373
|
|
|
|
|
|
|
$type_sequence, |
5374
|
|
|
|
|
|
|
$indent_flag, |
5375
|
|
|
|
|
|
|
$prototype, |
5376
|
|
|
|
|
|
|
) |
5377
|
|
|
|
|
|
|
= ( |
5378
|
|
|
|
|
|
|
|
5379
|
|
|
|
|
|
|
EMPTY_STRING, |
5380
|
|
|
|
|
|
|
EMPTY_STRING, |
5381
|
|
|
|
|
|
|
EMPTY_STRING, |
5382
|
|
|
|
|
|
|
0, |
5383
|
|
|
|
|
|
|
EMPTY_STRING, |
5384
|
|
|
|
|
|
|
); |
5385
|
|
|
|
|
|
|
|
5386
|
|
|
|
|
|
|
# this pre-token will start an output token |
5387
|
50497
|
|
|
|
|
62928
|
push( @{$routput_token_list}, $i_tok ); |
|
50497
|
|
|
|
|
85383
|
|
5388
|
|
|
|
|
|
|
|
5389
|
|
|
|
|
|
|
# The search for the full token ends in one of 5 main END NODES: |
5390
|
|
|
|
|
|
|
|
5391
|
|
|
|
|
|
|
#----------------------- |
5392
|
|
|
|
|
|
|
# END NODE 1: whitespace |
5393
|
|
|
|
|
|
|
#----------------------- |
5394
|
50497
|
100
|
|
|
|
106731
|
next if ( $pre_type eq 'b' ); |
5395
|
|
|
|
|
|
|
|
5396
|
|
|
|
|
|
|
#---------------------- |
5397
|
|
|
|
|
|
|
# END NODE 2: a comment |
5398
|
|
|
|
|
|
|
#---------------------- |
5399
|
35141
|
100
|
|
|
|
60791
|
last if ( $pre_type eq '#' ); |
5400
|
|
|
|
|
|
|
|
5401
|
|
|
|
|
|
|
# continue gathering identifier if necessary |
5402
|
34813
|
100
|
|
|
|
61058
|
if ($id_scan_state) { |
5403
|
|
|
|
|
|
|
|
5404
|
17
|
100
|
66
|
|
|
144
|
if ( $is_sub{$id_scan_state} || $is_package{$id_scan_state} ) { |
5405
|
10
|
|
|
|
|
56
|
$self->scan_id(); |
5406
|
|
|
|
|
|
|
} |
5407
|
|
|
|
|
|
|
else { |
5408
|
7
|
|
|
|
|
36
|
$self->scan_identifier(); |
5409
|
|
|
|
|
|
|
} |
5410
|
|
|
|
|
|
|
|
5411
|
17
|
100
|
|
|
|
71
|
if ($id_scan_state) { |
5412
|
|
|
|
|
|
|
|
5413
|
|
|
|
|
|
|
# Still scanning ... |
5414
|
|
|
|
|
|
|
# Check for side comment between sub and prototype (c061) |
5415
|
|
|
|
|
|
|
|
5416
|
|
|
|
|
|
|
# done if nothing left to scan on this line |
5417
|
1
|
50
|
|
|
|
3
|
last if ( $i > $max_token_index ); |
5418
|
|
|
|
|
|
|
|
5419
|
1
|
|
|
|
|
5
|
my ( $next_nonblank_token, $i_next ) = |
5420
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, |
5421
|
|
|
|
|
|
|
$max_token_index ); |
5422
|
|
|
|
|
|
|
|
5423
|
|
|
|
|
|
|
# done if it was just some trailing space |
5424
|
1
|
50
|
|
|
|
4
|
last if ( $i_next > $max_token_index ); |
5425
|
|
|
|
|
|
|
|
5426
|
|
|
|
|
|
|
# something remains on the line ... must be a side comment |
5427
|
1
|
|
|
|
|
3
|
next; |
5428
|
|
|
|
|
|
|
} |
5429
|
|
|
|
|
|
|
|
5430
|
16
|
100
|
100
|
|
|
135
|
next if ( ( $i > 0 ) || $type ); |
5431
|
|
|
|
|
|
|
|
5432
|
|
|
|
|
|
|
# didn't find any token; start over |
5433
|
7
|
|
|
|
|
23
|
$type = $pre_type; |
5434
|
7
|
|
|
|
|
17
|
$tok = $pre_tok; |
5435
|
|
|
|
|
|
|
} |
5436
|
|
|
|
|
|
|
|
5437
|
|
|
|
|
|
|
#----------------------------------------------------------- |
5438
|
|
|
|
|
|
|
# Combine pre-tokens into digraphs and trigraphs if possible |
5439
|
|
|
|
|
|
|
#----------------------------------------------------------- |
5440
|
|
|
|
|
|
|
|
5441
|
|
|
|
|
|
|
# See if we can make a digraph... |
5442
|
|
|
|
|
|
|
# The following tokens are excluded and handled specially: |
5443
|
|
|
|
|
|
|
# '/=' is excluded because the / might start a pattern. |
5444
|
|
|
|
|
|
|
# 'x=' is excluded since it might be $x=, with $ on previous line |
5445
|
|
|
|
|
|
|
# '**' and *= might be typeglobs of punctuation variables |
5446
|
|
|
|
|
|
|
# I have allowed tokens starting with <, such as <=, |
5447
|
|
|
|
|
|
|
# because I don't think these could be valid angle operators. |
5448
|
|
|
|
|
|
|
# test file: storrs4.pl |
5449
|
34803
|
100
|
100
|
|
|
108213
|
if ( $can_start_digraph{$tok} |
|
|
|
100
|
|
|
|
|
5450
|
|
|
|
|
|
|
&& $i < $max_token_index |
5451
|
|
|
|
|
|
|
&& $is_digraph{ $tok . $rtokens->[ $i + 1 ] } ) |
5452
|
|
|
|
|
|
|
{ |
5453
|
|
|
|
|
|
|
|
5454
|
2559
|
|
|
|
|
4945
|
my $combine_ok = 1; |
5455
|
2559
|
|
|
|
|
5337
|
my $test_tok = $tok . $rtokens->[ $i + 1 ]; |
5456
|
|
|
|
|
|
|
|
5457
|
|
|
|
|
|
|
# check for special cases which cannot be combined |
5458
|
|
|
|
|
|
|
|
5459
|
|
|
|
|
|
|
# '//' must be defined_or operator if an operator is expected. |
5460
|
|
|
|
|
|
|
# TODO: Code for other ambiguous digraphs (/=, x=, **, *=) |
5461
|
|
|
|
|
|
|
# could be migrated here for clarity |
5462
|
|
|
|
|
|
|
|
5463
|
|
|
|
|
|
|
# Patch for RT#102371, misparsing a // in the following snippet: |
5464
|
|
|
|
|
|
|
# state $b //= ccc(); |
5465
|
|
|
|
|
|
|
# The solution is to always accept the digraph (or trigraph) |
5466
|
|
|
|
|
|
|
# after type 'Z' (possible file handle). The reason is that |
5467
|
|
|
|
|
|
|
# sub operator_expected gives TERM expected here, which is |
5468
|
|
|
|
|
|
|
# wrong in this case. |
5469
|
2559
|
100
|
66
|
|
|
7033
|
if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) { |
5470
|
|
|
|
|
|
|
|
5471
|
|
|
|
|
|
|
# note that here $tok = '/' and the next tok and type is '/' |
5472
|
16
|
|
|
|
|
33
|
my $blank_after_Z; |
5473
|
16
|
|
|
|
|
56
|
$expecting = |
5474
|
|
|
|
|
|
|
$self->operator_expected( $tok, '/', $blank_after_Z ); |
5475
|
|
|
|
|
|
|
|
5476
|
|
|
|
|
|
|
# Patched for RT#101547, was 'unless ($expecting==OPERATOR)' |
5477
|
16
|
100
|
|
|
|
53
|
$combine_ok = 0 if ( $expecting == TERM ); |
5478
|
|
|
|
|
|
|
} |
5479
|
|
|
|
|
|
|
|
5480
|
|
|
|
|
|
|
# Patch for RT #114359: mis-parsing of "print $x ** 0.5; |
5481
|
|
|
|
|
|
|
# Accept the digraphs '**' only after type 'Z' |
5482
|
|
|
|
|
|
|
# Otherwise postpone the decision. |
5483
|
2559
|
100
|
|
|
|
6047
|
if ( $test_tok eq '**' ) { |
5484
|
39
|
100
|
|
|
|
126
|
if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 } |
|
37
|
|
|
|
|
79
|
|
5485
|
|
|
|
|
|
|
} |
5486
|
|
|
|
|
|
|
|
5487
|
2559
|
50
|
66
|
|
|
16219
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
5488
|
|
|
|
|
|
|
|
5489
|
|
|
|
|
|
|
# still ok to combine? |
5490
|
|
|
|
|
|
|
$combine_ok |
5491
|
|
|
|
|
|
|
|
5492
|
|
|
|
|
|
|
&& ( $test_tok ne '/=' ) # might be pattern |
5493
|
|
|
|
|
|
|
&& ( $test_tok ne 'x=' ) # might be $x |
5494
|
|
|
|
|
|
|
&& ( $test_tok ne '*=' ) # typeglob? |
5495
|
|
|
|
|
|
|
|
5496
|
|
|
|
|
|
|
# Moved above as part of fix for |
5497
|
|
|
|
|
|
|
# RT #114359: Missparsing of "print $x ** 0.5; |
5498
|
|
|
|
|
|
|
# && ( $test_tok ne '**' ) # typeglob? |
5499
|
|
|
|
|
|
|
) |
5500
|
|
|
|
|
|
|
{ |
5501
|
2518
|
|
|
|
|
4014
|
$tok = $test_tok; |
5502
|
2518
|
|
|
|
|
3621
|
$i++; |
5503
|
|
|
|
|
|
|
|
5504
|
|
|
|
|
|
|
# Now try to assemble trigraphs. Note that all possible |
5505
|
|
|
|
|
|
|
# perl trigraphs can be constructed by appending a character |
5506
|
|
|
|
|
|
|
# to a digraph. |
5507
|
2518
|
|
|
|
|
4293
|
$test_tok = $tok . $rtokens->[ $i + 1 ]; |
5508
|
|
|
|
|
|
|
|
5509
|
2518
|
100
|
|
|
|
6095
|
if ( $is_trigraph{$test_tok} ) { |
5510
|
76
|
|
|
|
|
186
|
$tok = $test_tok; |
5511
|
76
|
|
|
|
|
166
|
$i++; |
5512
|
|
|
|
|
|
|
} |
5513
|
|
|
|
|
|
|
|
5514
|
|
|
|
|
|
|
# The only current tetragraph is the double diamond operator |
5515
|
|
|
|
|
|
|
# and its first three characters are NOT a trigraph, so |
5516
|
|
|
|
|
|
|
# we do can do a special test for it |
5517
|
|
|
|
|
|
|
else { |
5518
|
2442
|
100
|
|
|
|
5773
|
if ( $test_tok eq '<<>' ) { |
5519
|
1
|
|
|
|
|
4
|
$test_tok .= $rtokens->[ $i + 2 ]; |
5520
|
1
|
50
|
|
|
|
4
|
if ( $is_tetragraph{$test_tok} ) { |
5521
|
1
|
|
|
|
|
2
|
$tok = $test_tok; |
5522
|
1
|
|
|
|
|
5
|
$i += 2; |
5523
|
|
|
|
|
|
|
} |
5524
|
|
|
|
|
|
|
} |
5525
|
|
|
|
|
|
|
} |
5526
|
|
|
|
|
|
|
} |
5527
|
|
|
|
|
|
|
} |
5528
|
|
|
|
|
|
|
|
5529
|
34803
|
|
|
|
|
49416
|
$type = $tok; |
5530
|
34803
|
|
|
|
|
57425
|
$next_tok = $rtokens->[ $i + 1 ]; |
5531
|
34803
|
|
|
|
|
51733
|
$next_type = $rtoken_type->[ $i + 1 ]; |
5532
|
|
|
|
|
|
|
|
5533
|
|
|
|
|
|
|
# expecting an operator here? first try table lookup, then function |
5534
|
34803
|
|
|
|
|
64390
|
$expecting = $op_expected_table{$last_nonblank_type}; |
5535
|
34803
|
100
|
|
|
|
67575
|
if ( !defined($expecting) ) { |
5536
|
8391
|
|
100
|
|
|
20217
|
my $blank_after_Z = $last_nonblank_type eq 'Z' |
5537
|
|
|
|
|
|
|
&& ( $i == 0 || $rtoken_type->[ $i - 1 ] eq 'b' ); |
5538
|
8391
|
|
|
|
|
22469
|
$expecting = |
5539
|
|
|
|
|
|
|
$self->operator_expected( $tok, $next_type, $blank_after_Z ); |
5540
|
|
|
|
|
|
|
} |
5541
|
|
|
|
|
|
|
|
5542
|
34803
|
|
|
|
|
45843
|
DEBUG_TOKENIZE && do { |
5543
|
|
|
|
|
|
|
local $LIST_SEPARATOR = ')('; |
5544
|
|
|
|
|
|
|
my @debug_list = ( |
5545
|
|
|
|
|
|
|
$last_nonblank_token, $tok, |
5546
|
|
|
|
|
|
|
$next_tok, $brace_depth, |
5547
|
|
|
|
|
|
|
$rbrace_type->[$brace_depth], $paren_depth, |
5548
|
|
|
|
|
|
|
$rparen_type->[$paren_depth], |
5549
|
|
|
|
|
|
|
); |
5550
|
|
|
|
|
|
|
print {*STDOUT} "TOKENIZE:(@debug_list)\n"; |
5551
|
|
|
|
|
|
|
}; |
5552
|
|
|
|
|
|
|
|
5553
|
|
|
|
|
|
|
# We have the next token, $tok. |
5554
|
|
|
|
|
|
|
# Now we have to examine this token and decide what it is |
5555
|
|
|
|
|
|
|
# and define its $type |
5556
|
|
|
|
|
|
|
|
5557
|
|
|
|
|
|
|
#------------------------ |
5558
|
|
|
|
|
|
|
# END NODE 3: a bare word |
5559
|
|
|
|
|
|
|
#------------------------ |
5560
|
34803
|
100
|
|
|
|
64558
|
if ( $pre_type eq 'w' ) { |
5561
|
5832
|
|
|
|
|
17547
|
my $is_last = $self->do_BAREWORD($is_END_or_DATA); |
5562
|
5832
|
100
|
|
|
|
12147
|
last if ($is_last); |
5563
|
5824
|
|
|
|
|
14552
|
next; |
5564
|
|
|
|
|
|
|
} |
5565
|
|
|
|
|
|
|
|
5566
|
|
|
|
|
|
|
# Turn off attribute list on first non-blank, non-bareword. |
5567
|
|
|
|
|
|
|
# Added '#' to fix c038 (later moved above). |
5568
|
28971
|
|
100
|
|
|
55711
|
$self->[_in_attribute_list_] &&= 0; |
5569
|
|
|
|
|
|
|
|
5570
|
|
|
|
|
|
|
#------------------------------- |
5571
|
|
|
|
|
|
|
# END NODE 4: a string of digits |
5572
|
|
|
|
|
|
|
#------------------------------- |
5573
|
28971
|
100
|
|
|
|
51696
|
if ( $pre_type eq 'd' ) { |
5574
|
1934
|
|
|
|
|
7387
|
$self->do_DIGITS(); |
5575
|
1934
|
|
|
|
|
4437
|
next; |
5576
|
|
|
|
|
|
|
} |
5577
|
|
|
|
|
|
|
|
5578
|
|
|
|
|
|
|
#------------------------------------------ |
5579
|
|
|
|
|
|
|
# END NODE 5: everything else (punctuation) |
5580
|
|
|
|
|
|
|
#------------------------------------------ |
5581
|
27037
|
|
|
|
|
54229
|
my $code = $tokenization_code->{$tok}; |
5582
|
27037
|
100
|
|
|
|
51112
|
if ($code) { |
5583
|
25304
|
|
|
|
|
76515
|
$code->($self); |
5584
|
25304
|
100
|
|
|
|
65819
|
redo if $in_quote; |
5585
|
|
|
|
|
|
|
} |
5586
|
|
|
|
|
|
|
} ## End main tokenizer loop |
5587
|
|
|
|
|
|
|
|
5588
|
|
|
|
|
|
|
# Store the final token |
5589
|
5902
|
100
|
|
|
|
13059
|
if ( $i_tok >= 0 ) { |
5590
|
5764
|
|
|
|
|
12319
|
$routput_token_type->[$i_tok] = $type; |
5591
|
5764
|
|
|
|
|
10529
|
$routput_block_type->[$i_tok] = $block_type; |
5592
|
5764
|
|
|
|
|
10587
|
$routput_container_type->[$i_tok] = $container_type; |
5593
|
5764
|
|
|
|
|
9909
|
$routput_type_sequence->[$i_tok] = $type_sequence; |
5594
|
5764
|
|
|
|
|
10314
|
$routput_indent_flag->[$i_tok] = $indent_flag; |
5595
|
|
|
|
|
|
|
} |
5596
|
|
|
|
|
|
|
|
5597
|
|
|
|
|
|
|
# Remember last nonblank values |
5598
|
5902
|
100
|
100
|
|
|
21637
|
if ( $type ne 'b' && $type ne '#' ) { |
5599
|
|
|
|
|
|
|
|
5600
|
5429
|
|
|
|
|
8824
|
$last_last_nonblank_token = $last_nonblank_token; |
5601
|
5429
|
|
|
|
|
8361
|
$last_last_nonblank_type = $last_nonblank_type; |
5602
|
|
|
|
|
|
|
|
5603
|
5429
|
|
|
|
|
8198
|
$last_nonblank_prototype = $prototype; |
5604
|
5429
|
|
|
|
|
8068
|
$last_nonblank_block_type = $block_type; |
5605
|
5429
|
|
|
|
|
7625
|
$last_nonblank_container_type = $container_type; |
5606
|
5429
|
|
|
|
|
8285
|
$last_nonblank_type_sequence = $type_sequence; |
5607
|
|
|
|
|
|
|
|
5608
|
5429
|
|
|
|
|
7533
|
$last_nonblank_token = $tok; |
5609
|
5429
|
|
|
|
|
8101
|
$last_nonblank_type = $type; |
5610
|
|
|
|
|
|
|
} |
5611
|
|
|
|
|
|
|
|
5612
|
|
|
|
|
|
|
# reset indentation level if necessary at a sub or package |
5613
|
|
|
|
|
|
|
# in an attempt to recover from a nesting error |
5614
|
5902
|
50
|
|
|
|
12236
|
if ( $level_in_tokenizer < 0 ) { |
5615
|
0
|
0
|
|
|
|
0
|
if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { |
5616
|
0
|
|
|
|
|
0
|
reset_indentation_level(0); |
5617
|
0
|
|
|
|
|
0
|
$self->brace_warning("resetting level to 0 at $1 $2\n"); |
5618
|
|
|
|
|
|
|
} |
5619
|
|
|
|
|
|
|
} |
5620
|
|
|
|
|
|
|
|
5621
|
5902
|
|
|
|
|
9728
|
$self->[_in_quote_] = $in_quote; |
5622
|
5902
|
100
|
|
|
|
13400
|
$self->[_quote_target_] = |
5623
|
|
|
|
|
|
|
$in_quote ? matching_end_token($quote_character) : EMPTY_STRING; |
5624
|
5902
|
|
|
|
|
11102
|
$self->[_rhere_target_list_] = $rhere_target_list; |
5625
|
|
|
|
|
|
|
|
5626
|
5902
|
|
|
|
|
9941
|
return; |
5627
|
|
|
|
|
|
|
} ## end sub tokenizer_main_loop |
5628
|
|
|
|
|
|
|
|
5629
|
|
|
|
|
|
|
sub tokenizer_wrapup_line { |
5630
|
5902
|
|
|
5902
|
0
|
11199
|
my ( $self, $line_of_tokens ) = @_; |
5631
|
|
|
|
|
|
|
|
5632
|
|
|
|
|
|
|
#--------------------------------------------------------- |
5633
|
|
|
|
|
|
|
# Package a line of tokens for shipping back to the caller |
5634
|
|
|
|
|
|
|
#--------------------------------------------------------- |
5635
|
|
|
|
|
|
|
|
5636
|
|
|
|
|
|
|
# Arrays to hold token values for this line: |
5637
|
5902
|
|
|
|
|
10506
|
my ( @levels, @block_type, @type_sequence, @token_type, @tokens ); |
5638
|
|
|
|
|
|
|
|
5639
|
5902
|
|
|
|
|
14536
|
$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string; |
5640
|
|
|
|
|
|
|
|
5641
|
|
|
|
|
|
|
# Remember starting nesting block string |
5642
|
5902
|
|
|
|
|
9907
|
my $nesting_block_string_0 = $nesting_block_string; |
5643
|
|
|
|
|
|
|
|
5644
|
|
|
|
|
|
|
#----------------- |
5645
|
|
|
|
|
|
|
# Loop over tokens |
5646
|
|
|
|
|
|
|
#----------------- |
5647
|
|
|
|
|
|
|
# $i is the index of the pretoken which starts this full token |
5648
|
5902
|
|
|
|
|
8535
|
foreach my $i ( @{$routput_token_list} ) { |
|
5902
|
|
|
|
|
12720
|
|
5649
|
|
|
|
|
|
|
|
5650
|
50681
|
|
|
|
|
73141
|
my $type_i = $routput_token_type->[$i]; |
5651
|
|
|
|
|
|
|
|
5652
|
|
|
|
|
|
|
#---------------------------------------- |
5653
|
|
|
|
|
|
|
# Section 1. Handle a non-sequenced token |
5654
|
|
|
|
|
|
|
#---------------------------------------- |
5655
|
50681
|
100
|
|
|
|
80911
|
if ( !$routput_type_sequence->[$i] ) { |
5656
|
|
|
|
|
|
|
|
5657
|
|
|
|
|
|
|
#------------------------------- |
5658
|
|
|
|
|
|
|
# Section 1.1. types ';' and 't' |
5659
|
|
|
|
|
|
|
#------------------------------- |
5660
|
|
|
|
|
|
|
# - output anonymous 'sub' as keyword (type 'k') |
5661
|
|
|
|
|
|
|
# - output __END__, __DATA__, and format as type 'k' instead |
5662
|
|
|
|
|
|
|
# of ';' to make html colors correct, etc. |
5663
|
41533
|
100
|
|
|
|
90328
|
if ( $is_semicolon_or_t{$type_i} ) { |
|
|
50
|
|
|
|
|
|
5664
|
2674
|
|
|
|
|
6026
|
my $tok_i = $rtokens->[$i]; |
5665
|
2674
|
100
|
|
|
|
7668
|
if ( $is_END_DATA_format_sub{$tok_i} ) { |
5666
|
172
|
|
|
|
|
461
|
$type_i = 'k'; |
5667
|
|
|
|
|
|
|
} |
5668
|
|
|
|
|
|
|
} |
5669
|
|
|
|
|
|
|
|
5670
|
|
|
|
|
|
|
#---------------------------------------------- |
5671
|
|
|
|
|
|
|
# Section 1.2. Check for an invalid token type. |
5672
|
|
|
|
|
|
|
#---------------------------------------------- |
5673
|
|
|
|
|
|
|
# This can happen by running perltidy on non-scripts although |
5674
|
|
|
|
|
|
|
# it could also be bug introduced by programming change. Perl |
5675
|
|
|
|
|
|
|
# silently accepts a 032 (^Z) and takes it as the end |
5676
|
|
|
|
|
|
|
elsif ( !$is_valid_token_type{$type_i} ) { |
5677
|
0
|
|
|
|
|
0
|
my $val = ord($type_i); |
5678
|
0
|
|
|
|
|
0
|
$self->warning( |
5679
|
|
|
|
|
|
|
"unexpected character decimal $val ($type_i) in script\n" |
5680
|
|
|
|
|
|
|
); |
5681
|
0
|
|
|
|
|
0
|
$self->[_in_error_] = 1; |
5682
|
|
|
|
|
|
|
} |
5683
|
|
|
|
|
|
|
else { |
5684
|
|
|
|
|
|
|
## ok - valid token type other than ; and t |
5685
|
|
|
|
|
|
|
} |
5686
|
|
|
|
|
|
|
|
5687
|
|
|
|
|
|
|
#---------------------------------------------------- |
5688
|
|
|
|
|
|
|
# Section 1.3. Store values for a non-sequenced token |
5689
|
|
|
|
|
|
|
#---------------------------------------------------- |
5690
|
41533
|
|
|
|
|
66315
|
push( @levels, $level_in_tokenizer ); |
5691
|
41533
|
|
|
|
|
61537
|
push( @block_type, EMPTY_STRING ); |
5692
|
41533
|
|
|
|
|
60375
|
push( @type_sequence, EMPTY_STRING ); |
5693
|
41533
|
|
|
|
|
77073
|
push( @token_type, $type_i ); |
5694
|
|
|
|
|
|
|
|
5695
|
|
|
|
|
|
|
} |
5696
|
|
|
|
|
|
|
|
5697
|
|
|
|
|
|
|
#------------------------------------ |
5698
|
|
|
|
|
|
|
# Section 2. Handle a sequenced token |
5699
|
|
|
|
|
|
|
# One of { [ ( ? : ) ] } |
5700
|
|
|
|
|
|
|
#------------------------------------ |
5701
|
|
|
|
|
|
|
else { |
5702
|
|
|
|
|
|
|
|
5703
|
|
|
|
|
|
|
# $level_i is the level we will store. Levels of braces are |
5704
|
|
|
|
|
|
|
# set so that the leading braces have a HIGHER level than their |
5705
|
|
|
|
|
|
|
# CONTENTS, which is convenient for indentation. |
5706
|
9148
|
|
|
|
|
13733
|
my $level_i = $level_in_tokenizer; |
5707
|
|
|
|
|
|
|
|
5708
|
|
|
|
|
|
|
# $tok_i is the PRE-token. It only equals the token for symbols |
5709
|
9148
|
|
|
|
|
14676
|
my $tok_i = $rtokens->[$i]; |
5710
|
|
|
|
|
|
|
|
5711
|
|
|
|
|
|
|
# $routput_indent_flag->[$i] indicates that we need a change |
5712
|
|
|
|
|
|
|
# in level at a nested ternary, as follows |
5713
|
|
|
|
|
|
|
# 1 => at a nested ternary ? |
5714
|
|
|
|
|
|
|
# -1 => at a nested ternary : |
5715
|
|
|
|
|
|
|
# 0 => otherwise |
5716
|
|
|
|
|
|
|
|
5717
|
|
|
|
|
|
|
#-------------------------------------------- |
5718
|
|
|
|
|
|
|
# Section 2.1 Handle a level-increasing token |
5719
|
|
|
|
|
|
|
#-------------------------------------------- |
5720
|
9148
|
100
|
|
|
|
24398
|
if ( $is_opening_or_ternary_type{$type_i} ) { |
|
|
50
|
|
|
|
|
|
5721
|
|
|
|
|
|
|
|
5722
|
4574
|
100
|
|
|
|
9431
|
if ( $type_i eq '?' ) { |
5723
|
|
|
|
|
|
|
|
5724
|
187
|
100
|
|
|
|
1010
|
if ( $routput_indent_flag->[$i] > 0 ) { |
5725
|
8
|
|
|
|
|
16
|
$level_in_tokenizer++; |
5726
|
|
|
|
|
|
|
|
5727
|
|
|
|
|
|
|
# break BEFORE '?' in a nested ternary |
5728
|
8
|
|
|
|
|
24
|
$level_i = $level_in_tokenizer; |
5729
|
8
|
|
|
|
|
21
|
$nesting_block_string .= "$nesting_block_flag"; |
5730
|
|
|
|
|
|
|
|
5731
|
|
|
|
|
|
|
} |
5732
|
|
|
|
|
|
|
} |
5733
|
|
|
|
|
|
|
else { |
5734
|
|
|
|
|
|
|
|
5735
|
4387
|
|
|
|
|
7363
|
$nesting_token_string .= $tok_i; |
5736
|
|
|
|
|
|
|
|
5737
|
4387
|
100
|
100
|
|
|
12162
|
if ( $type_i eq '{' || $type_i eq 'L' ) { |
5738
|
|
|
|
|
|
|
|
5739
|
4080
|
|
|
|
|
5925
|
$level_in_tokenizer++; |
5740
|
|
|
|
|
|
|
|
5741
|
4080
|
100
|
|
|
|
7935
|
if ( $routput_block_type->[$i] ) { |
5742
|
972
|
|
|
|
|
1905
|
$nesting_block_flag = 1; |
5743
|
972
|
|
|
|
|
1992
|
$nesting_block_string .= '1'; |
5744
|
|
|
|
|
|
|
} |
5745
|
|
|
|
|
|
|
else { |
5746
|
3108
|
|
|
|
|
4892
|
$nesting_block_flag = 0; |
5747
|
3108
|
|
|
|
|
5303
|
$nesting_block_string .= '0'; |
5748
|
|
|
|
|
|
|
} |
5749
|
|
|
|
|
|
|
} |
5750
|
|
|
|
|
|
|
} |
5751
|
|
|
|
|
|
|
} |
5752
|
|
|
|
|
|
|
|
5753
|
|
|
|
|
|
|
#--------------------------------------------- |
5754
|
|
|
|
|
|
|
# Section 2.2. Handle a level-decreasing token |
5755
|
|
|
|
|
|
|
#--------------------------------------------- |
5756
|
|
|
|
|
|
|
elsif ( $is_closing_or_ternary_type{$type_i} ) { |
5757
|
|
|
|
|
|
|
|
5758
|
4574
|
100
|
|
|
|
10311
|
if ( $type_i ne ':' ) { |
5759
|
4387
|
|
|
|
|
8285
|
my $char = chop $nesting_token_string; |
5760
|
4387
|
50
|
|
|
|
11603
|
if ( $char ne $matching_start_token{$tok_i} ) { |
5761
|
0
|
|
|
|
|
0
|
$nesting_token_string .= $char . $tok_i; |
5762
|
|
|
|
|
|
|
} |
5763
|
|
|
|
|
|
|
} |
5764
|
|
|
|
|
|
|
|
5765
|
4574
|
100
|
100
|
|
|
14693
|
if ( |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
5766
|
|
|
|
|
|
|
$type_i eq '}' |
5767
|
|
|
|
|
|
|
|| $type_i eq 'R' |
5768
|
|
|
|
|
|
|
|
5769
|
|
|
|
|
|
|
# only the second and higher ? : have levels |
5770
|
|
|
|
|
|
|
|| $type_i eq ':' && $routput_indent_flag->[$i] < 0 |
5771
|
|
|
|
|
|
|
) |
5772
|
|
|
|
|
|
|
{ |
5773
|
|
|
|
|
|
|
|
5774
|
4088
|
|
|
|
|
6339
|
$level_i = --$level_in_tokenizer; |
5775
|
|
|
|
|
|
|
|
5776
|
4088
|
50
|
|
|
|
8200
|
if ( $level_in_tokenizer < 0 ) { |
5777
|
0
|
0
|
|
|
|
0
|
if ( !$self->[_saw_negative_indentation_] ) { |
5778
|
0
|
|
|
|
|
0
|
$self->[_saw_negative_indentation_] = 1; |
5779
|
0
|
|
|
|
|
0
|
$self->warning( |
5780
|
|
|
|
|
|
|
"Starting negative indentation\n"); |
5781
|
|
|
|
|
|
|
} |
5782
|
|
|
|
|
|
|
} |
5783
|
|
|
|
|
|
|
|
5784
|
|
|
|
|
|
|
# restore previous level values |
5785
|
4088
|
50
|
|
|
|
8381
|
if ( length($nesting_block_string) > 1 ) |
5786
|
|
|
|
|
|
|
{ # true for valid script |
5787
|
4088
|
|
|
|
|
6409
|
chop $nesting_block_string; |
5788
|
4088
|
|
|
|
|
8150
|
$nesting_block_flag = |
5789
|
|
|
|
|
|
|
substr( $nesting_block_string, -1 ) eq '1'; |
5790
|
|
|
|
|
|
|
} |
5791
|
|
|
|
|
|
|
|
5792
|
|
|
|
|
|
|
} |
5793
|
|
|
|
|
|
|
} |
5794
|
|
|
|
|
|
|
|
5795
|
|
|
|
|
|
|
#----------------------------------------------------- |
5796
|
|
|
|
|
|
|
# Section 2.3. Unexpected sequenced token type - error |
5797
|
|
|
|
|
|
|
#----------------------------------------------------- |
5798
|
|
|
|
|
|
|
else { |
5799
|
|
|
|
|
|
|
|
5800
|
|
|
|
|
|
|
# The tokenizer should only be assigning sequence numbers |
5801
|
|
|
|
|
|
|
# to types { [ ( ? ) ] } : |
5802
|
0
|
|
|
|
|
0
|
DEVEL_MODE && $self->Fault(<<EOM); |
5803
|
|
|
|
|
|
|
unexpected sequence number on token type $type_i with pre-tok=$tok_i |
5804
|
|
|
|
|
|
|
EOM |
5805
|
|
|
|
|
|
|
} |
5806
|
|
|
|
|
|
|
|
5807
|
|
|
|
|
|
|
#------------------------------------------------ |
5808
|
|
|
|
|
|
|
# Section 2.4. Store values for a sequenced token |
5809
|
|
|
|
|
|
|
#------------------------------------------------ |
5810
|
|
|
|
|
|
|
|
5811
|
|
|
|
|
|
|
# The starting nesting block string, which is used in any .LOG |
5812
|
|
|
|
|
|
|
# output, should include the first token of the line |
5813
|
9148
|
100
|
|
|
|
18805
|
if ( !@levels ) { |
5814
|
1574
|
|
|
|
|
2941
|
$nesting_block_string_0 = $nesting_block_string; |
5815
|
|
|
|
|
|
|
} |
5816
|
|
|
|
|
|
|
|
5817
|
|
|
|
|
|
|
# Store values for a sequenced token |
5818
|
9148
|
|
|
|
|
16779
|
push( @levels, $level_i ); |
5819
|
9148
|
|
|
|
|
17473
|
push( @block_type, $routput_block_type->[$i] ); |
5820
|
9148
|
|
|
|
|
15415
|
push( @type_sequence, $routput_type_sequence->[$i] ); |
5821
|
9148
|
|
|
|
|
19071
|
push( @token_type, $type_i ); |
5822
|
|
|
|
|
|
|
|
5823
|
|
|
|
|
|
|
} |
5824
|
|
|
|
|
|
|
} ## End loop to over tokens |
5825
|
|
|
|
|
|
|
|
5826
|
|
|
|
|
|
|
#--------------------- |
5827
|
|
|
|
|
|
|
# Post-loop operations |
5828
|
|
|
|
|
|
|
#--------------------- |
5829
|
|
|
|
|
|
|
|
5830
|
5902
|
|
|
|
|
14720
|
$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0; |
5831
|
|
|
|
|
|
|
|
5832
|
|
|
|
|
|
|
# Form and store the tokens |
5833
|
5902
|
50
|
|
|
|
13591
|
if (@levels) { |
5834
|
|
|
|
|
|
|
|
5835
|
5902
|
|
|
|
|
8349
|
my $im = shift @{$routput_token_list}; |
|
5902
|
|
|
|
|
10793
|
|
5836
|
5902
|
|
|
|
|
10121
|
my $offset = $rtoken_map->[$im]; |
5837
|
5902
|
|
|
|
|
8476
|
foreach my $i ( @{$routput_token_list} ) { |
|
5902
|
|
|
|
|
10506
|
|
5838
|
44779
|
|
|
|
|
61730
|
my $numc = $rtoken_map->[$i] - $offset; |
5839
|
44779
|
|
|
|
|
79618
|
push( @tokens, substr( $input_line, $offset, $numc ) ); |
5840
|
44779
|
|
|
|
|
56961
|
$offset += $numc; |
5841
|
|
|
|
|
|
|
|
5842
|
44779
|
|
|
|
|
60753
|
if ( DEVEL_MODE && $numc <= 0 ) { |
5843
|
|
|
|
|
|
|
|
5844
|
|
|
|
|
|
|
# Should not happen unless @{$rtoken_map} is corrupted |
5845
|
|
|
|
|
|
|
$self->Fault( |
5846
|
|
|
|
|
|
|
"number of characters is '$numc' but should be >0\n"); |
5847
|
|
|
|
|
|
|
} |
5848
|
|
|
|
|
|
|
} |
5849
|
|
|
|
|
|
|
|
5850
|
|
|
|
|
|
|
# Form and store the final token of this line |
5851
|
5902
|
|
|
|
|
11997
|
my $numc = length($input_line) - $offset; |
5852
|
5902
|
|
|
|
|
12223
|
push( @tokens, substr( $input_line, $offset, $numc ) ); |
5853
|
|
|
|
|
|
|
|
5854
|
5902
|
|
|
|
|
9022
|
if ( DEVEL_MODE && $numc <= 0 ) { |
5855
|
|
|
|
|
|
|
$self->Fault( |
5856
|
|
|
|
|
|
|
"Number of Characters is '$numc' but should be >0\n"); |
5857
|
|
|
|
|
|
|
} |
5858
|
|
|
|
|
|
|
} |
5859
|
|
|
|
|
|
|
|
5860
|
|
|
|
|
|
|
# NOTE: This routine returns ci=0. Eventually '_rci_levels' can be |
5861
|
|
|
|
|
|
|
# removed. The ci values are computed later by sub Formatter::set_ci. |
5862
|
5902
|
|
|
|
|
19678
|
my @ci_levels = (0) x scalar(@levels); |
5863
|
|
|
|
|
|
|
|
5864
|
|
|
|
|
|
|
# Wrap up this line of tokens for shipping to the Formatter |
5865
|
5902
|
|
|
|
|
13822
|
$line_of_tokens->{_rtoken_type} = \@token_type; |
5866
|
5902
|
|
|
|
|
11320
|
$line_of_tokens->{_rtokens} = \@tokens; |
5867
|
5902
|
|
|
|
|
11345
|
$line_of_tokens->{_rblock_type} = \@block_type; |
5868
|
5902
|
|
|
|
|
11386
|
$line_of_tokens->{_rtype_sequence} = \@type_sequence; |
5869
|
5902
|
|
|
|
|
18923
|
$line_of_tokens->{_rlevels} = \@levels; |
5870
|
5902
|
|
|
|
|
11777
|
$line_of_tokens->{_rci_levels} = \@ci_levels; |
5871
|
|
|
|
|
|
|
|
5872
|
5902
|
|
|
|
|
15726
|
return; |
5873
|
|
|
|
|
|
|
} ## end sub tokenizer_wrapup_line |
5874
|
|
|
|
|
|
|
|
5875
|
|
|
|
|
|
|
} ## end tokenize_this_line |
5876
|
|
|
|
|
|
|
|
5877
|
|
|
|
|
|
|
####################################################################### |
5878
|
|
|
|
|
|
|
# Tokenizer routines which assist in identifying token types |
5879
|
|
|
|
|
|
|
####################################################################### |
5880
|
|
|
|
|
|
|
|
5881
|
|
|
|
|
|
|
# Define Global '%op_expected_table' |
5882
|
|
|
|
|
|
|
# = hash table of operator expected values based on last nonblank token |
5883
|
|
|
|
|
|
|
|
5884
|
|
|
|
|
|
|
# exceptions to perl's weird parsing rules after type 'Z' |
5885
|
|
|
|
|
|
|
my %is_weird_parsing_rule_exception; |
5886
|
|
|
|
|
|
|
|
5887
|
|
|
|
|
|
|
my %is_paren_dollar; |
5888
|
|
|
|
|
|
|
|
5889
|
|
|
|
|
|
|
my %is_n_v; |
5890
|
|
|
|
|
|
|
|
5891
|
|
|
|
|
|
|
BEGIN { |
5892
|
|
|
|
|
|
|
|
5893
|
|
|
|
|
|
|
# Always expecting TERM following these types: |
5894
|
|
|
|
|
|
|
# note: this is identical to '@value_requestor_type' defined later. |
5895
|
|
|
|
|
|
|
# Fix for c250: add new type 'P' for package (expecting VERSION or {} |
5896
|
|
|
|
|
|
|
# after package NAMESPACE, so expecting TERM) |
5897
|
|
|
|
|
|
|
# Fix for c250: add new type 'S' for sub (not expecting operator) |
5898
|
39
|
|
|
39
|
|
917
|
my @q = qw( |
5899
|
|
|
|
|
|
|
; ! + x & ? F J - p / Y : % f U ~ A G j L P S * . | ^ < = [ m { \ > t |
5900
|
|
|
|
|
|
|
|| >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /= |
5901
|
|
|
|
|
|
|
&= // >> ~. &. |. ^. |
5902
|
|
|
|
|
|
|
... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ |
5903
|
|
|
|
|
|
|
); |
5904
|
39
|
|
|
|
|
4593
|
push @q, ','; |
5905
|
39
|
|
|
|
|
133
|
push @q, '('; # for completeness, not currently a token type |
5906
|
39
|
|
|
|
|
75
|
push @q, '->'; # was previously in UNKNOWN |
5907
|
39
|
|
|
|
|
1325
|
@{op_expected_table}{@q} = (TERM) x scalar(@q); |
5908
|
|
|
|
|
|
|
|
5909
|
|
|
|
|
|
|
# Always UNKNOWN following these types; |
5910
|
|
|
|
|
|
|
# previously had '->' in this list for c030 |
5911
|
39
|
|
|
|
|
209
|
@q = qw( w ); |
5912
|
39
|
|
|
|
|
120
|
@{op_expected_table}{@q} = (UNKNOWN) x scalar(@q); |
5913
|
|
|
|
|
|
|
|
5914
|
|
|
|
|
|
|
# Always expecting OPERATOR ... |
5915
|
|
|
|
|
|
|
# 'n' and 'v' are currently excluded because they might be VERSION numbers |
5916
|
|
|
|
|
|
|
# 'i' is currently excluded because it might be a package |
5917
|
|
|
|
|
|
|
# 'q' is currently excluded because it might be a prototype |
5918
|
|
|
|
|
|
|
# Fix for c030: removed '->' from this list: |
5919
|
|
|
|
|
|
|
# Fix for c250: added 'i' because new type 'P' was added |
5920
|
39
|
|
|
|
|
115
|
@q = qw( -- C h R ++ ] Q <> i ); ## n v q ); |
5921
|
39
|
|
|
|
|
75
|
push @q, ')'; |
5922
|
39
|
|
|
|
|
255
|
@{op_expected_table}{@q} = (OPERATOR) x scalar(@q); |
5923
|
|
|
|
|
|
|
|
5924
|
|
|
|
|
|
|
# Fix for git #62: added '*' and '%' |
5925
|
39
|
|
|
|
|
98
|
@q = qw( < ? * % ); |
5926
|
39
|
|
|
|
|
139
|
@{is_weird_parsing_rule_exception}{@q} = (1) x scalar(@q); |
5927
|
|
|
|
|
|
|
|
5928
|
39
|
|
|
|
|
87
|
@q = qw<) $>; |
5929
|
39
|
|
|
|
|
101
|
@{is_paren_dollar}{@q} = (1) x scalar(@q); |
5930
|
|
|
|
|
|
|
|
5931
|
39
|
|
|
|
|
95
|
@q = qw( n v ); |
5932
|
39
|
|
|
|
|
1347
|
@{is_n_v}{@q} = (1) x scalar(@q); |
5933
|
|
|
|
|
|
|
|
5934
|
|
|
|
|
|
|
} ## end BEGIN |
5935
|
|
|
|
|
|
|
|
5936
|
39
|
|
|
39
|
|
334
|
use constant DEBUG_OPERATOR_EXPECTED => 0; |
|
39
|
|
|
|
|
125
|
|
|
39
|
|
|
|
|
86972
|
|
5937
|
|
|
|
|
|
|
|
5938
|
|
|
|
|
|
|
sub operator_expected { |
5939
|
|
|
|
|
|
|
|
5940
|
|
|
|
|
|
|
# Returns a parameter indicating what types of tokens can occur next |
5941
|
|
|
|
|
|
|
|
5942
|
|
|
|
|
|
|
# Call format: |
5943
|
|
|
|
|
|
|
# $op_expected = |
5944
|
|
|
|
|
|
|
# $self->operator_expected( $tok, $next_type, $blank_after_Z ); |
5945
|
|
|
|
|
|
|
# where |
5946
|
|
|
|
|
|
|
# $tok is the current token |
5947
|
|
|
|
|
|
|
# $next_type is the type of the next token (blank or not) |
5948
|
|
|
|
|
|
|
# $blank_after_Z = flag for guessing after a type 'Z': |
5949
|
|
|
|
|
|
|
# true if $tok follows type 'Z' with intermediate blank |
5950
|
|
|
|
|
|
|
# false if $tok follows type 'Z' with no intermediate blank |
5951
|
|
|
|
|
|
|
# ignored if $tok does not follow type 'Z' |
5952
|
|
|
|
|
|
|
|
5953
|
|
|
|
|
|
|
# Many perl symbols have two or more meanings. For example, '<<' |
5954
|
|
|
|
|
|
|
# can be a shift operator or a here-doc operator. The |
5955
|
|
|
|
|
|
|
# interpretation of these symbols depends on the current state of |
5956
|
|
|
|
|
|
|
# the tokenizer, which may either be expecting a term or an |
5957
|
|
|
|
|
|
|
# operator. For this example, a << would be a shift if an OPERATOR |
5958
|
|
|
|
|
|
|
# is expected, and a here-doc if a TERM is expected. This routine |
5959
|
|
|
|
|
|
|
# is called to make this decision for any current token. It returns |
5960
|
|
|
|
|
|
|
# one of three possible values: |
5961
|
|
|
|
|
|
|
# |
5962
|
|
|
|
|
|
|
# OPERATOR - operator expected (or at least, not a term) |
5963
|
|
|
|
|
|
|
# UNKNOWN - can't tell |
5964
|
|
|
|
|
|
|
# TERM - a term is expected (or at least, not an operator) |
5965
|
|
|
|
|
|
|
# |
5966
|
|
|
|
|
|
|
# The decision is based on what has been seen so far. This |
5967
|
|
|
|
|
|
|
# information is stored in the "$last_nonblank_type" and |
5968
|
|
|
|
|
|
|
# "$last_nonblank_token" variables. For example, if the |
5969
|
|
|
|
|
|
|
# $last_nonblank_type is '=~', then we are expecting a TERM, whereas |
5970
|
|
|
|
|
|
|
# if $last_nonblank_type is 'n' (numeric), we are expecting an |
5971
|
|
|
|
|
|
|
# OPERATOR. |
5972
|
|
|
|
|
|
|
# |
5973
|
|
|
|
|
|
|
# If a UNKNOWN is returned, the calling routine must guess. A major |
5974
|
|
|
|
|
|
|
# goal of this tokenizer is to minimize the possibility of returning |
5975
|
|
|
|
|
|
|
# UNKNOWN, because a wrong guess can spoil the formatting of a |
5976
|
|
|
|
|
|
|
# script. |
5977
|
|
|
|
|
|
|
# |
5978
|
|
|
|
|
|
|
# Adding NEW_TOKENS: it is critically important that this routine be |
5979
|
|
|
|
|
|
|
# updated to allow it to determine if an operator or term is to be |
5980
|
|
|
|
|
|
|
# expected after the new token. Doing this simply involves adding |
5981
|
|
|
|
|
|
|
# the new token character to one of the regexes in this routine or |
5982
|
|
|
|
|
|
|
# to one of the hash lists |
5983
|
|
|
|
|
|
|
# that it uses, which are initialized in the BEGIN section. |
5984
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, |
5985
|
|
|
|
|
|
|
# $statement_type |
5986
|
|
|
|
|
|
|
|
5987
|
|
|
|
|
|
|
# When possible, token types should be selected such that we can determine |
5988
|
|
|
|
|
|
|
# the 'operator_expected' value by a simple hash lookup. If there are |
5989
|
|
|
|
|
|
|
# exceptions, that is an indication that a new type is needed. |
5990
|
|
|
|
|
|
|
|
5991
|
8421
|
|
|
8421
|
0
|
17898
|
my ( $self, $tok, $next_type, $blank_after_Z ) = @_; |
5992
|
|
|
|
|
|
|
|
5993
|
|
|
|
|
|
|
#-------------------------------------------- |
5994
|
|
|
|
|
|
|
# Section 1: Table lookup will get most cases |
5995
|
|
|
|
|
|
|
#-------------------------------------------- |
5996
|
|
|
|
|
|
|
|
5997
|
|
|
|
|
|
|
# Many types are can be obtained by a table lookup. This typically handles |
5998
|
|
|
|
|
|
|
# more than half of the calls. For speed, the caller may try table lookup |
5999
|
|
|
|
|
|
|
# first before calling this sub. |
6000
|
8421
|
|
|
|
|
13581
|
my $op_expected = $op_expected_table{$last_nonblank_type}; |
6001
|
8421
|
100
|
|
|
|
17183
|
if ( defined($op_expected) ) { |
6002
|
|
|
|
|
|
|
DEBUG_OPERATOR_EXPECTED |
6003
|
24
|
|
|
|
|
47
|
&& print {*STDOUT} |
6004
|
|
|
|
|
|
|
"OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; |
6005
|
24
|
|
|
|
|
89
|
return $op_expected; |
6006
|
|
|
|
|
|
|
} |
6007
|
|
|
|
|
|
|
|
6008
|
|
|
|
|
|
|
#--------------------------------------------- |
6009
|
|
|
|
|
|
|
# Section 2: Handle special cases if necessary |
6010
|
|
|
|
|
|
|
#--------------------------------------------- |
6011
|
|
|
|
|
|
|
|
6012
|
|
|
|
|
|
|
# Types 'k', '}' and 'Z' depend on context |
6013
|
|
|
|
|
|
|
# Types 'n', 'v', 'q' also depend on context. |
6014
|
|
|
|
|
|
|
|
6015
|
|
|
|
|
|
|
# identifier... |
6016
|
|
|
|
|
|
|
# Fix for c250: removed coding for type 'i' because 'i' and new type 'P' |
6017
|
|
|
|
|
|
|
# are now done by hash table lookup |
6018
|
|
|
|
|
|
|
|
6019
|
|
|
|
|
|
|
# keyword... |
6020
|
8397
|
100
|
|
|
|
25394
|
if ( $last_nonblank_type eq 'k' ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6021
|
|
|
|
|
|
|
|
6022
|
|
|
|
|
|
|
# keywords expecting OPERATOR: |
6023
|
2644
|
100
|
|
|
|
12562
|
if ( $expecting_operator_token{$last_nonblank_token} ) { |
|
|
100
|
|
|
|
|
|
6024
|
7
|
|
|
|
|
23
|
$op_expected = OPERATOR; |
6025
|
|
|
|
|
|
|
} |
6026
|
|
|
|
|
|
|
|
6027
|
|
|
|
|
|
|
# keywords expecting TERM: |
6028
|
|
|
|
|
|
|
elsif ( $expecting_term_token{$last_nonblank_token} ) { |
6029
|
|
|
|
|
|
|
|
6030
|
|
|
|
|
|
|
# Exceptions from TERM: |
6031
|
|
|
|
|
|
|
|
6032
|
|
|
|
|
|
|
# // may follow perl functions which may be unary operators |
6033
|
|
|
|
|
|
|
# see test file dor.t (defined or); |
6034
|
2538
|
100
|
100
|
|
|
10982
|
if ( |
|
|
50
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
6035
|
|
|
|
|
|
|
$tok eq '/' |
6036
|
|
|
|
|
|
|
&& $next_type eq '/' |
6037
|
|
|
|
|
|
|
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ |
6038
|
|
|
|
|
|
|
$last_nonblank_token} |
6039
|
|
|
|
|
|
|
) |
6040
|
|
|
|
|
|
|
{ |
6041
|
1
|
|
|
|
|
4
|
$op_expected = OPERATOR; |
6042
|
|
|
|
|
|
|
} |
6043
|
|
|
|
|
|
|
|
6044
|
|
|
|
|
|
|
# Patch to allow a ? following 'split' to be a deprecated pattern |
6045
|
|
|
|
|
|
|
# delimiter. This patch is coordinated with the omission of split |
6046
|
|
|
|
|
|
|
# from the list |
6047
|
|
|
|
|
|
|
# %is_keyword_rejecting_question_as_pattern_delimiter. This patch |
6048
|
|
|
|
|
|
|
# will force perltidy to guess. |
6049
|
|
|
|
|
|
|
elsif ($tok eq '?' |
6050
|
|
|
|
|
|
|
&& $last_nonblank_token eq 'split' ) |
6051
|
|
|
|
|
|
|
{ |
6052
|
0
|
|
|
|
|
0
|
$op_expected = UNKNOWN; |
6053
|
|
|
|
|
|
|
} |
6054
|
|
|
|
|
|
|
else { |
6055
|
2537
|
|
|
|
|
4479
|
$op_expected = TERM; |
6056
|
|
|
|
|
|
|
} |
6057
|
|
|
|
|
|
|
} |
6058
|
|
|
|
|
|
|
else { |
6059
|
99
|
|
|
|
|
244
|
$op_expected = TERM; |
6060
|
|
|
|
|
|
|
} |
6061
|
|
|
|
|
|
|
} ## end type 'k' |
6062
|
|
|
|
|
|
|
|
6063
|
|
|
|
|
|
|
# closing container token... |
6064
|
|
|
|
|
|
|
|
6065
|
|
|
|
|
|
|
# Note that the actual token for type '}' may also be a ')'. |
6066
|
|
|
|
|
|
|
|
6067
|
|
|
|
|
|
|
# Also note that $last_nonblank_token is not the token corresponding to |
6068
|
|
|
|
|
|
|
# $last_nonblank_type when the type is a closing container. In that |
6069
|
|
|
|
|
|
|
# case it is the token before the corresponding opening container token. |
6070
|
|
|
|
|
|
|
# So for example, for this snippet |
6071
|
|
|
|
|
|
|
# $a = do { BLOCK } / 2; |
6072
|
|
|
|
|
|
|
# the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'. |
6073
|
|
|
|
|
|
|
|
6074
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq '}' ) { |
6075
|
3590
|
|
|
|
|
6531
|
$op_expected = UNKNOWN; |
6076
|
|
|
|
|
|
|
|
6077
|
|
|
|
|
|
|
# handle something after 'do' and 'eval' |
6078
|
3590
|
100
|
66
|
|
|
19311
|
if ( $is_block_operator{$last_nonblank_token} ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6079
|
|
|
|
|
|
|
|
6080
|
|
|
|
|
|
|
# something like $a = do { BLOCK } / 2; |
6081
|
82
|
|
|
|
|
246
|
$op_expected = OPERATOR; # block mode following } |
6082
|
|
|
|
|
|
|
} |
6083
|
|
|
|
|
|
|
|
6084
|
|
|
|
|
|
|
# $last_nonblank_token =~ /^(\)|\$|\-\>)/ |
6085
|
|
|
|
|
|
|
elsif ( $is_paren_dollar{ substr( $last_nonblank_token, 0, 1 ) } |
6086
|
|
|
|
|
|
|
|| substr( $last_nonblank_token, 0, 2 ) eq '->' ) |
6087
|
|
|
|
|
|
|
{ |
6088
|
2122
|
|
|
|
|
3753
|
$op_expected = OPERATOR; |
6089
|
2122
|
50
|
|
|
|
5117
|
if ( $last_nonblank_token eq '$' ) { $op_expected = UNKNOWN } |
|
0
|
|
|
|
|
0
|
|
6090
|
|
|
|
|
|
|
} |
6091
|
|
|
|
|
|
|
|
6092
|
|
|
|
|
|
|
# Check for smartmatch operator before preceding brace or square |
6093
|
|
|
|
|
|
|
# bracket. For example, at the ? after the ] in the following |
6094
|
|
|
|
|
|
|
# expressions we are expecting an operator: |
6095
|
|
|
|
|
|
|
# |
6096
|
|
|
|
|
|
|
# qr/3/ ~~ ['1234'] ? 1 : 0; |
6097
|
|
|
|
|
|
|
# map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; |
6098
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '~~' ) { |
6099
|
20
|
|
|
|
|
47
|
$op_expected = OPERATOR; |
6100
|
|
|
|
|
|
|
} |
6101
|
|
|
|
|
|
|
|
6102
|
|
|
|
|
|
|
# A right brace here indicates the end of a simple block. All |
6103
|
|
|
|
|
|
|
# non-structural right braces have type 'R' all braces associated with |
6104
|
|
|
|
|
|
|
# block operator keywords have been given those keywords as |
6105
|
|
|
|
|
|
|
# "last_nonblank_token" and caught above. (This statement is order |
6106
|
|
|
|
|
|
|
# dependent, and must come after checking $last_nonblank_token). |
6107
|
|
|
|
|
|
|
else { |
6108
|
|
|
|
|
|
|
|
6109
|
|
|
|
|
|
|
# patch for dor.t (defined or). |
6110
|
1366
|
50
|
33
|
|
|
6277
|
if ( $tok eq '/' |
|
|
100
|
33
|
|
|
|
|
6111
|
|
|
|
|
|
|
&& $next_type eq '/' |
6112
|
|
|
|
|
|
|
&& $last_nonblank_token eq ']' ) |
6113
|
|
|
|
|
|
|
{ |
6114
|
0
|
|
|
|
|
0
|
$op_expected = OPERATOR; |
6115
|
|
|
|
|
|
|
} |
6116
|
|
|
|
|
|
|
|
6117
|
|
|
|
|
|
|
# Patch for RT #116344: misparse a ternary operator after an |
6118
|
|
|
|
|
|
|
# anonymous hash, like this: |
6119
|
|
|
|
|
|
|
# return ref {} ? 1 : 0; |
6120
|
|
|
|
|
|
|
# The right brace should really be marked type 'R' in this case, |
6121
|
|
|
|
|
|
|
# and it is safest to return an UNKNOWN here. Expecting a TERM will |
6122
|
|
|
|
|
|
|
# cause the '?' to always be interpreted as a pattern delimiter |
6123
|
|
|
|
|
|
|
# rather than introducing a ternary operator. |
6124
|
|
|
|
|
|
|
elsif ( $tok eq '?' ) { |
6125
|
1
|
|
|
|
|
3
|
$op_expected = UNKNOWN; |
6126
|
|
|
|
|
|
|
} |
6127
|
|
|
|
|
|
|
else { |
6128
|
1365
|
|
|
|
|
2320
|
$op_expected = TERM; |
6129
|
|
|
|
|
|
|
} |
6130
|
|
|
|
|
|
|
} |
6131
|
|
|
|
|
|
|
} ## end type '}' |
6132
|
|
|
|
|
|
|
|
6133
|
|
|
|
|
|
|
# number or v-string... |
6134
|
|
|
|
|
|
|
# An exception is for VERSION numbers a 'use' statement. It has the format |
6135
|
|
|
|
|
|
|
# use Module VERSION LIST |
6136
|
|
|
|
|
|
|
# We could avoid this exception by writing a special sub to parse 'use' |
6137
|
|
|
|
|
|
|
# statements and perhaps mark these numbers with a new type V (for VERSION) |
6138
|
|
|
|
|
|
|
##elsif ( $last_nonblank_type =~ /^[nv]$/ ) { |
6139
|
|
|
|
|
|
|
elsif ( $is_n_v{$last_nonblank_type} ) { |
6140
|
1985
|
|
|
|
|
3257
|
$op_expected = OPERATOR; |
6141
|
1985
|
100
|
|
|
|
4413
|
if ( $statement_type eq 'use' ) { |
6142
|
11
|
|
|
|
|
43
|
$op_expected = UNKNOWN; |
6143
|
|
|
|
|
|
|
} |
6144
|
|
|
|
|
|
|
} |
6145
|
|
|
|
|
|
|
|
6146
|
|
|
|
|
|
|
# quote... |
6147
|
|
|
|
|
|
|
# TODO: labeled prototype words would better be given type 'A' or maybe |
6148
|
|
|
|
|
|
|
# 'J'; not 'q'; or maybe mark as type 'Y'? |
6149
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'q' ) { |
6150
|
137
|
50
|
|
|
|
621
|
if ( $last_nonblank_token eq 'prototype' ) { |
|
|
100
|
|
|
|
|
|
6151
|
0
|
|
|
|
|
0
|
$op_expected = TERM; |
6152
|
|
|
|
|
|
|
} |
6153
|
|
|
|
|
|
|
|
6154
|
|
|
|
|
|
|
# update for --use-feature=class (rt145706): |
6155
|
|
|
|
|
|
|
# Look for class VERSION after possible attribute, as in |
6156
|
|
|
|
|
|
|
# class Example::Subclass : isa(Example::Base) 1.345 { ... } |
6157
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^package\b/ ) { |
6158
|
3
|
|
|
|
|
6
|
$op_expected = TERM; |
6159
|
|
|
|
|
|
|
} |
6160
|
|
|
|
|
|
|
|
6161
|
|
|
|
|
|
|
# everything else |
6162
|
|
|
|
|
|
|
else { |
6163
|
134
|
|
|
|
|
343
|
$op_expected = OPERATOR; |
6164
|
|
|
|
|
|
|
} |
6165
|
|
|
|
|
|
|
} |
6166
|
|
|
|
|
|
|
|
6167
|
|
|
|
|
|
|
# file handle or similar |
6168
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'Z' ) { |
6169
|
|
|
|
|
|
|
|
6170
|
|
|
|
|
|
|
# angle.t |
6171
|
40
|
100
|
33
|
|
|
603
|
if ( $last_nonblank_token =~ /^\w/ ) { |
|
|
50
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6172
|
2
|
|
|
|
|
5
|
$op_expected = UNKNOWN; |
6173
|
|
|
|
|
|
|
} |
6174
|
|
|
|
|
|
|
|
6175
|
|
|
|
|
|
|
# Exception to weird parsing rules for 'x(' ... see case b1205: |
6176
|
|
|
|
|
|
|
# In something like 'print $vv x(...' the x is an operator; |
6177
|
|
|
|
|
|
|
# Likewise in 'print $vv x$ww' the x is an operator (case b1207) |
6178
|
|
|
|
|
|
|
# otherwise x follows the weird parsing rules. |
6179
|
|
|
|
|
|
|
elsif ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) { |
6180
|
0
|
|
|
|
|
0
|
$op_expected = OPERATOR; |
6181
|
|
|
|
|
|
|
} |
6182
|
|
|
|
|
|
|
|
6183
|
|
|
|
|
|
|
# The 'weird parsing rules' of next section do not work for '<' and '?' |
6184
|
|
|
|
|
|
|
# It is best to mark them as unknown. Test case: |
6185
|
|
|
|
|
|
|
# print $fh <DATA>; |
6186
|
|
|
|
|
|
|
elsif ( $is_weird_parsing_rule_exception{$tok} ) { |
6187
|
4
|
|
|
|
|
13
|
$op_expected = UNKNOWN; |
6188
|
|
|
|
|
|
|
} |
6189
|
|
|
|
|
|
|
|
6190
|
|
|
|
|
|
|
# For possible file handle like "$a", Perl uses weird parsing rules. |
6191
|
|
|
|
|
|
|
# For example: |
6192
|
|
|
|
|
|
|
# print $a/2,"/hi"; - division |
6193
|
|
|
|
|
|
|
# print $a / 2,"/hi"; - division |
6194
|
|
|
|
|
|
|
# print $a/ 2,"/hi"; - division |
6195
|
|
|
|
|
|
|
# print $a /2,"/hi"; - pattern (and error)! |
6196
|
|
|
|
|
|
|
# Some examples where this logic works okay, for '&','*','+': |
6197
|
|
|
|
|
|
|
# print $fh &xsi_protos(@mods); |
6198
|
|
|
|
|
|
|
# my $x = new $CompressClass *FH; |
6199
|
|
|
|
|
|
|
# print $OUT +( $count % 15 ? ", " : "\n\t" ); |
6200
|
|
|
|
|
|
|
elsif ($blank_after_Z |
6201
|
|
|
|
|
|
|
&& $next_type ne 'b' ) |
6202
|
|
|
|
|
|
|
{ |
6203
|
0
|
|
|
|
|
0
|
$op_expected = TERM; |
6204
|
|
|
|
|
|
|
} |
6205
|
|
|
|
|
|
|
|
6206
|
|
|
|
|
|
|
# Note that '?' and '<' have been moved above |
6207
|
|
|
|
|
|
|
# ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { |
6208
|
|
|
|
|
|
|
elsif ( $tok =~ /^([x\/\+\-\*\%\&\.]|\>\>)$/ ) { |
6209
|
|
|
|
|
|
|
|
6210
|
|
|
|
|
|
|
# Do not complain in 'use' statements, which have special syntax. |
6211
|
|
|
|
|
|
|
# For example, from RT#130344: |
6212
|
|
|
|
|
|
|
# use lib $FindBin::Bin . '/lib'; |
6213
|
9
|
50
|
|
|
|
29
|
if ( $statement_type ne 'use' ) { |
6214
|
9
|
|
|
|
|
37
|
$self->complain( |
6215
|
|
|
|
|
|
|
"operator in possible indirect object location not recommended\n" |
6216
|
|
|
|
|
|
|
); |
6217
|
|
|
|
|
|
|
} |
6218
|
9
|
|
|
|
|
19
|
$op_expected = OPERATOR; |
6219
|
|
|
|
|
|
|
} |
6220
|
|
|
|
|
|
|
|
6221
|
|
|
|
|
|
|
# all other cases |
6222
|
|
|
|
|
|
|
else { |
6223
|
25
|
|
|
|
|
77
|
$op_expected = UNKNOWN; |
6224
|
|
|
|
|
|
|
} |
6225
|
|
|
|
|
|
|
} |
6226
|
|
|
|
|
|
|
|
6227
|
|
|
|
|
|
|
# anything else... |
6228
|
|
|
|
|
|
|
else { |
6229
|
1
|
|
|
|
|
2
|
$op_expected = UNKNOWN; |
6230
|
|
|
|
|
|
|
} |
6231
|
|
|
|
|
|
|
|
6232
|
|
|
|
|
|
|
DEBUG_OPERATOR_EXPECTED |
6233
|
8397
|
|
|
|
|
11127
|
&& print {*STDOUT} |
6234
|
|
|
|
|
|
|
"OPERATOR_EXPECTED: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; |
6235
|
|
|
|
|
|
|
|
6236
|
8397
|
|
|
|
|
15924
|
return $op_expected; |
6237
|
|
|
|
|
|
|
|
6238
|
|
|
|
|
|
|
} ## end sub operator_expected |
6239
|
|
|
|
|
|
|
|
6240
|
|
|
|
|
|
|
sub new_statement_ok { |
6241
|
|
|
|
|
|
|
|
6242
|
|
|
|
|
|
|
# Returns: |
6243
|
|
|
|
|
|
|
# true if a new statement can begin here |
6244
|
|
|
|
|
|
|
# false otherwise |
6245
|
|
|
|
|
|
|
|
6246
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, |
6247
|
|
|
|
|
|
|
# $brace_depth, $rbrace_type |
6248
|
|
|
|
|
|
|
|
6249
|
|
|
|
|
|
|
# Uses: |
6250
|
|
|
|
|
|
|
# - See if a 'class' statement can occur here |
6251
|
|
|
|
|
|
|
# - See if a keyword begins at a new statement; i.e. is an 'if' a |
6252
|
|
|
|
|
|
|
# block if or a trailing if? Also see if 'format' starts a statement. |
6253
|
|
|
|
|
|
|
# - Decide if a ':' is part of a statement label (not a ternary) |
6254
|
|
|
|
|
|
|
|
6255
|
|
|
|
|
|
|
# Curly braces are tricky because some small blocks do not get marked as |
6256
|
|
|
|
|
|
|
# blocks.. |
6257
|
|
|
|
|
|
|
|
6258
|
|
|
|
|
|
|
# if it follows an opening curly brace.. |
6259
|
435
|
100
|
66
|
435
|
0
|
2321
|
if ( $last_nonblank_token eq '{' ) { |
|
|
100
|
|
|
|
|
|
6260
|
|
|
|
|
|
|
|
6261
|
|
|
|
|
|
|
# The safe thing is to return true in all cases because: |
6262
|
|
|
|
|
|
|
# - a ternary ':' cannot occur here |
6263
|
|
|
|
|
|
|
# - an 'if' here, for example, cannot be a trailing if |
6264
|
|
|
|
|
|
|
# See test case c231 for an example. |
6265
|
|
|
|
|
|
|
# This works but could be improved, if necessary, by returning |
6266
|
|
|
|
|
|
|
# 'false' at obvious non-blocks. |
6267
|
59
|
|
|
|
|
220
|
return 1; |
6268
|
|
|
|
|
|
|
} |
6269
|
|
|
|
|
|
|
|
6270
|
|
|
|
|
|
|
# if it follows a closing code block curly brace.. |
6271
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq '}' |
6272
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
6273
|
|
|
|
|
|
|
{ |
6274
|
|
|
|
|
|
|
|
6275
|
|
|
|
|
|
|
# a new statement can follow certain closing block braces ... |
6276
|
|
|
|
|
|
|
# FIXME: The following has worked well but returns true in some cases |
6277
|
|
|
|
|
|
|
# where it really should not. We could fix this by either excluding |
6278
|
|
|
|
|
|
|
# certain blocks, like sort/map/grep/eval/asub or by just including |
6279
|
|
|
|
|
|
|
# certain blocks. |
6280
|
99
|
|
|
|
|
377
|
return $rbrace_type->[$brace_depth]; |
6281
|
|
|
|
|
|
|
} |
6282
|
|
|
|
|
|
|
|
6283
|
|
|
|
|
|
|
# otherwise, it is a label if and only if it follows a ';' (real or fake) |
6284
|
|
|
|
|
|
|
# or another label |
6285
|
|
|
|
|
|
|
else { |
6286
|
277
|
|
100
|
|
|
1718
|
return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); |
6287
|
|
|
|
|
|
|
} |
6288
|
|
|
|
|
|
|
} ## end sub new_statement_ok |
6289
|
|
|
|
|
|
|
|
6290
|
|
|
|
|
|
|
sub code_block_type { |
6291
|
|
|
|
|
|
|
|
6292
|
|
|
|
|
|
|
# Decide if this is a block of code, and its type. |
6293
|
|
|
|
|
|
|
# Must be called only when $type = $token = '{' |
6294
|
|
|
|
|
|
|
# The problem is to distinguish between the start of a block of code |
6295
|
|
|
|
|
|
|
# and the start of an anonymous hash reference |
6296
|
|
|
|
|
|
|
# Returns "" if not code block, otherwise returns 'last_nonblank_token' |
6297
|
|
|
|
|
|
|
# to indicate the type of code block. (For example, 'last_nonblank_token' |
6298
|
|
|
|
|
|
|
# might be 'if' for an if block, 'else' for an else block, etc). |
6299
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, |
6300
|
|
|
|
|
|
|
# $last_nonblank_block_type, $brace_depth, $rbrace_type |
6301
|
|
|
|
|
|
|
|
6302
|
|
|
|
|
|
|
# handle case of multiple '{'s |
6303
|
|
|
|
|
|
|
|
6304
|
|
|
|
|
|
|
# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; |
6305
|
|
|
|
|
|
|
|
6306
|
1301
|
|
|
1301
|
0
|
3333
|
my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; |
6307
|
1301
|
100
|
66
|
|
|
16557
|
if ( $last_nonblank_token eq '{' |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6308
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
6309
|
|
|
|
|
|
|
{ |
6310
|
|
|
|
|
|
|
|
6311
|
|
|
|
|
|
|
# opening brace where a statement may appear is probably |
6312
|
|
|
|
|
|
|
# a code block but might be and anonymous hash reference |
6313
|
90
|
50
|
|
|
|
342
|
if ( $rbrace_type->[$brace_depth] ) { |
6314
|
90
|
|
|
|
|
314
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6315
|
|
|
|
|
|
|
$max_token_index ); |
6316
|
|
|
|
|
|
|
} |
6317
|
|
|
|
|
|
|
|
6318
|
|
|
|
|
|
|
# cannot start a code block within an anonymous hash |
6319
|
|
|
|
|
|
|
else { |
6320
|
0
|
|
|
|
|
0
|
return EMPTY_STRING; |
6321
|
|
|
|
|
|
|
} |
6322
|
|
|
|
|
|
|
} |
6323
|
|
|
|
|
|
|
|
6324
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq ';' ) { |
6325
|
|
|
|
|
|
|
|
6326
|
|
|
|
|
|
|
# an opening brace where a statement may appear is probably |
6327
|
|
|
|
|
|
|
# a code block but might be and anonymous hash reference |
6328
|
48
|
|
|
|
|
276
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6329
|
|
|
|
|
|
|
$max_token_index ); |
6330
|
|
|
|
|
|
|
} |
6331
|
|
|
|
|
|
|
|
6332
|
|
|
|
|
|
|
# handle case of '}{' |
6333
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq '}' |
6334
|
|
|
|
|
|
|
&& $last_nonblank_type eq $last_nonblank_token ) |
6335
|
|
|
|
|
|
|
{ |
6336
|
|
|
|
|
|
|
|
6337
|
|
|
|
|
|
|
# a } { situation ... |
6338
|
|
|
|
|
|
|
# could be hash reference after code block..(blktype1.t) |
6339
|
9
|
50
|
|
|
|
28
|
if ($last_nonblank_block_type) { |
6340
|
9
|
|
|
|
|
44
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6341
|
|
|
|
|
|
|
$max_token_index ); |
6342
|
|
|
|
|
|
|
} |
6343
|
|
|
|
|
|
|
|
6344
|
|
|
|
|
|
|
# must be a block if it follows a closing hash reference |
6345
|
|
|
|
|
|
|
else { |
6346
|
0
|
|
|
|
|
0
|
return $last_nonblank_token; |
6347
|
|
|
|
|
|
|
} |
6348
|
|
|
|
|
|
|
} |
6349
|
|
|
|
|
|
|
|
6350
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6351
|
|
|
|
|
|
|
# NOTE: braces after type characters start code blocks, but for |
6352
|
|
|
|
|
|
|
# simplicity these are not identified as such. See also |
6353
|
|
|
|
|
|
|
# sub is_non_structural_brace. |
6354
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6355
|
|
|
|
|
|
|
|
6356
|
|
|
|
|
|
|
## elsif ( $last_nonblank_type eq 't' ) { |
6357
|
|
|
|
|
|
|
## return $last_nonblank_token; |
6358
|
|
|
|
|
|
|
## } |
6359
|
|
|
|
|
|
|
|
6360
|
|
|
|
|
|
|
# brace after label: |
6361
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'J' ) { |
6362
|
34
|
|
|
|
|
113
|
return $last_nonblank_token; |
6363
|
|
|
|
|
|
|
} |
6364
|
|
|
|
|
|
|
|
6365
|
|
|
|
|
|
|
# otherwise, look at previous token. This must be a code block if |
6366
|
|
|
|
|
|
|
# it follows any of these: |
6367
|
|
|
|
|
|
|
# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ |
6368
|
|
|
|
|
|
|
elsif ($is_code_block_token{$last_nonblank_token} |
6369
|
|
|
|
|
|
|
|| $is_grep_alias{$last_nonblank_token} ) |
6370
|
|
|
|
|
|
|
{ |
6371
|
|
|
|
|
|
|
|
6372
|
|
|
|
|
|
|
# Bug Patch: Note that the opening brace after the 'if' in the following |
6373
|
|
|
|
|
|
|
# snippet is an anonymous hash ref and not a code block! |
6374
|
|
|
|
|
|
|
# print 'hi' if { x => 1, }->{x}; |
6375
|
|
|
|
|
|
|
# We can identify this situation because the last nonblank type |
6376
|
|
|
|
|
|
|
# will be a keyword (instead of a closing paren) |
6377
|
480
|
50
|
33
|
|
|
2515
|
if ( |
|
|
|
66
|
|
|
|
|
6378
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' |
6379
|
|
|
|
|
|
|
&& ( $last_nonblank_token eq 'if' |
6380
|
|
|
|
|
|
|
|| $last_nonblank_token eq 'unless' ) |
6381
|
|
|
|
|
|
|
) |
6382
|
|
|
|
|
|
|
{ |
6383
|
0
|
|
|
|
|
0
|
return EMPTY_STRING; |
6384
|
|
|
|
|
|
|
} |
6385
|
|
|
|
|
|
|
else { |
6386
|
480
|
|
|
|
|
1811
|
return $last_nonblank_token; |
6387
|
|
|
|
|
|
|
} |
6388
|
|
|
|
|
|
|
} |
6389
|
|
|
|
|
|
|
|
6390
|
|
|
|
|
|
|
# or a sub or package BLOCK |
6391
|
|
|
|
|
|
|
# Fixed for c250 to include new package type 'P', and change 'i' to 'S' |
6392
|
|
|
|
|
|
|
elsif ( |
6393
|
|
|
|
|
|
|
$last_nonblank_type eq 'P' |
6394
|
|
|
|
|
|
|
|| $last_nonblank_type eq 'S' |
6395
|
|
|
|
|
|
|
|| ( $last_nonblank_type eq 't' |
6396
|
|
|
|
|
|
|
&& substr( $last_nonblank_token, 0, 3 ) eq 'sub' ) |
6397
|
|
|
|
|
|
|
) |
6398
|
|
|
|
|
|
|
{ |
6399
|
294
|
|
|
|
|
929
|
return $last_nonblank_token; |
6400
|
|
|
|
|
|
|
} |
6401
|
|
|
|
|
|
|
|
6402
|
|
|
|
|
|
|
elsif ( $statement_type =~ /^(sub|package)\b/ ) { |
6403
|
0
|
|
|
|
|
0
|
return $statement_type; |
6404
|
|
|
|
|
|
|
} |
6405
|
|
|
|
|
|
|
|
6406
|
|
|
|
|
|
|
# user-defined subs with block parameters (like grep/map/eval) |
6407
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'G' ) { |
6408
|
0
|
|
|
|
|
0
|
return $last_nonblank_token; |
6409
|
|
|
|
|
|
|
} |
6410
|
|
|
|
|
|
|
|
6411
|
|
|
|
|
|
|
# check bareword |
6412
|
|
|
|
|
|
|
elsif ( $last_nonblank_type eq 'w' ) { |
6413
|
|
|
|
|
|
|
|
6414
|
|
|
|
|
|
|
# check for syntax 'use MODULE LIST' |
6415
|
|
|
|
|
|
|
# This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031 |
6416
|
22
|
100
|
|
|
|
89
|
return EMPTY_STRING if ( $statement_type eq 'use' ); |
6417
|
|
|
|
|
|
|
|
6418
|
21
|
|
|
|
|
122
|
return $self->decide_if_code_block( $i, $rtokens, $rtoken_type, |
6419
|
|
|
|
|
|
|
$max_token_index ); |
6420
|
|
|
|
|
|
|
} |
6421
|
|
|
|
|
|
|
|
6422
|
|
|
|
|
|
|
# Patch for bug # RT #94338 reported by Daniel Trizen |
6423
|
|
|
|
|
|
|
# for-loop in a parenthesized block-map triggering an error message: |
6424
|
|
|
|
|
|
|
# map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) ); |
6425
|
|
|
|
|
|
|
# Check for a code block within a parenthesized function call |
6426
|
|
|
|
|
|
|
elsif ( $last_nonblank_token eq '(' ) { |
6427
|
81
|
|
|
|
|
215
|
my $paren_type = $rparen_type->[$paren_depth]; |
6428
|
|
|
|
|
|
|
|
6429
|
|
|
|
|
|
|
# /^(map|grep|sort)$/ |
6430
|
81
|
100
|
66
|
|
|
439
|
if ( $paren_type && $is_sort_map_grep{$paren_type} ) { |
6431
|
|
|
|
|
|
|
|
6432
|
|
|
|
|
|
|
# We will mark this as a code block but use type 't' instead |
6433
|
|
|
|
|
|
|
# of the name of the containing function. This will allow for |
6434
|
|
|
|
|
|
|
# correct parsing but will usually produce better formatting. |
6435
|
|
|
|
|
|
|
# Braces with block type 't' are not broken open automatically |
6436
|
|
|
|
|
|
|
# in the formatter as are other code block types, and this usually |
6437
|
|
|
|
|
|
|
# works best. |
6438
|
1
|
|
|
|
|
5
|
return 't'; # (Not $paren_type) |
6439
|
|
|
|
|
|
|
} |
6440
|
|
|
|
|
|
|
else { |
6441
|
80
|
|
|
|
|
264
|
return EMPTY_STRING; |
6442
|
|
|
|
|
|
|
} |
6443
|
|
|
|
|
|
|
} |
6444
|
|
|
|
|
|
|
|
6445
|
|
|
|
|
|
|
# handle unknown syntax ') {' |
6446
|
|
|
|
|
|
|
# we previously appended a '()' to mark this case |
6447
|
|
|
|
|
|
|
elsif ( $last_nonblank_token =~ /\(\)$/ ) { |
6448
|
14
|
|
|
|
|
117
|
return $last_nonblank_token; |
6449
|
|
|
|
|
|
|
} |
6450
|
|
|
|
|
|
|
|
6451
|
|
|
|
|
|
|
# anything else must be anonymous hash reference |
6452
|
|
|
|
|
|
|
else { |
6453
|
229
|
|
|
|
|
724
|
return EMPTY_STRING; |
6454
|
|
|
|
|
|
|
} |
6455
|
|
|
|
|
|
|
} ## end sub code_block_type |
6456
|
|
|
|
|
|
|
|
6457
|
|
|
|
|
|
|
sub decide_if_code_block { |
6458
|
|
|
|
|
|
|
|
6459
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
6460
|
168
|
|
|
168
|
0
|
481
|
my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_; |
6461
|
|
|
|
|
|
|
|
6462
|
168
|
|
|
|
|
580
|
my ( $next_nonblank_token, $i_next ) = |
6463
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
6464
|
|
|
|
|
|
|
|
6465
|
|
|
|
|
|
|
# we are at a '{' where a statement may appear. |
6466
|
|
|
|
|
|
|
# We must decide if this brace starts an anonymous hash or a code |
6467
|
|
|
|
|
|
|
# block. |
6468
|
|
|
|
|
|
|
# return "" if anonymous hash, and $last_nonblank_token otherwise |
6469
|
|
|
|
|
|
|
|
6470
|
|
|
|
|
|
|
# initialize to be code BLOCK |
6471
|
168
|
|
|
|
|
498
|
my $code_block_type = $last_nonblank_token; |
6472
|
|
|
|
|
|
|
|
6473
|
|
|
|
|
|
|
# Check for the common case of an empty anonymous hash reference: |
6474
|
|
|
|
|
|
|
# Maybe something like sub { { } } |
6475
|
168
|
100
|
|
|
|
607
|
if ( $next_nonblank_token eq '}' ) { |
6476
|
5
|
|
|
|
|
11
|
$code_block_type = EMPTY_STRING; |
6477
|
|
|
|
|
|
|
} |
6478
|
|
|
|
|
|
|
|
6479
|
|
|
|
|
|
|
else { |
6480
|
|
|
|
|
|
|
|
6481
|
|
|
|
|
|
|
# To guess if this '{' is an anonymous hash reference, look ahead |
6482
|
|
|
|
|
|
|
# and test as follows: |
6483
|
|
|
|
|
|
|
# |
6484
|
|
|
|
|
|
|
# it is a hash reference if next come: |
6485
|
|
|
|
|
|
|
# - a string or digit followed by a comma or => |
6486
|
|
|
|
|
|
|
# - bareword followed by => |
6487
|
|
|
|
|
|
|
# otherwise it is a code block |
6488
|
|
|
|
|
|
|
# |
6489
|
|
|
|
|
|
|
# Examples of anonymous hash ref: |
6490
|
|
|
|
|
|
|
# {'aa',}; |
6491
|
|
|
|
|
|
|
# {1,2} |
6492
|
|
|
|
|
|
|
# |
6493
|
|
|
|
|
|
|
# Examples of code blocks: |
6494
|
|
|
|
|
|
|
# {1; print "hello\n", 1;} |
6495
|
|
|
|
|
|
|
# {$a,1}; |
6496
|
|
|
|
|
|
|
|
6497
|
|
|
|
|
|
|
# We are only going to look ahead one more (nonblank/comment) line. |
6498
|
|
|
|
|
|
|
# Strange formatting could cause a bad guess, but that's unlikely. |
6499
|
163
|
|
|
|
|
378
|
my @pre_types; |
6500
|
|
|
|
|
|
|
my @pre_tokens; |
6501
|
|
|
|
|
|
|
|
6502
|
|
|
|
|
|
|
# Ignore the rest of this line if it is a side comment |
6503
|
163
|
100
|
|
|
|
493
|
if ( $next_nonblank_token ne '#' ) { |
6504
|
139
|
|
|
|
|
585
|
@pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ]; |
|
139
|
|
|
|
|
806
|
|
6505
|
139
|
|
|
|
|
418
|
@pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ]; |
|
139
|
|
|
|
|
757
|
|
6506
|
|
|
|
|
|
|
} |
6507
|
|
|
|
|
|
|
|
6508
|
|
|
|
|
|
|
# Here 20 is arbitrary but generous, and prevents wasting lots of time |
6509
|
|
|
|
|
|
|
# in mangled files |
6510
|
163
|
|
|
|
|
708
|
my ( $rpre_tokens, $rpre_types ) = |
6511
|
|
|
|
|
|
|
$self->peek_ahead_for_n_nonblank_pre_tokens(20); |
6512
|
163
|
100
|
66
|
|
|
619
|
if ( defined($rpre_types) && @{$rpre_types} ) { |
|
155
|
|
|
|
|
1554
|
|
6513
|
155
|
|
|
|
|
294
|
push @pre_types, @{$rpre_types}; |
|
155
|
|
|
|
|
620
|
|
6514
|
155
|
|
|
|
|
333
|
push @pre_tokens, @{$rpre_tokens}; |
|
155
|
|
|
|
|
731
|
|
6515
|
|
|
|
|
|
|
} |
6516
|
|
|
|
|
|
|
|
6517
|
|
|
|
|
|
|
# put a sentinel token to simplify stopping the search |
6518
|
163
|
|
|
|
|
439
|
push @pre_types, '}'; |
6519
|
163
|
|
|
|
|
368
|
push @pre_types, '}'; |
6520
|
|
|
|
|
|
|
|
6521
|
163
|
|
|
|
|
320
|
my $jbeg = 0; |
6522
|
163
|
100
|
|
|
|
550
|
$jbeg = 1 if $pre_types[0] eq 'b'; |
6523
|
|
|
|
|
|
|
|
6524
|
|
|
|
|
|
|
# first look for one of these |
6525
|
|
|
|
|
|
|
# - bareword |
6526
|
|
|
|
|
|
|
# - bareword with leading - |
6527
|
|
|
|
|
|
|
# - digit |
6528
|
|
|
|
|
|
|
# - quoted string |
6529
|
163
|
|
|
|
|
303
|
my $j = $jbeg; |
6530
|
163
|
100
|
33
|
|
|
1165
|
if ( $pre_types[$j] =~ /^[\'\"]/ ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
6531
|
|
|
|
|
|
|
|
6532
|
|
|
|
|
|
|
# find the closing quote; don't worry about escapes |
6533
|
1
|
|
|
|
|
3
|
my $quote_mark = $pre_types[$j]; |
6534
|
1
|
|
|
|
|
4
|
foreach my $k ( $j + 1 .. @pre_types - 2 ) { |
6535
|
1
|
50
|
|
|
|
6
|
if ( $pre_types[$k] eq $quote_mark ) { |
6536
|
1
|
|
|
|
|
3
|
$j = $k + 1; |
6537
|
|
|
|
|
|
|
##my $next = $pre_types[$j]; |
6538
|
1
|
|
|
|
|
3
|
last; |
6539
|
|
|
|
|
|
|
} |
6540
|
|
|
|
|
|
|
} |
6541
|
|
|
|
|
|
|
} |
6542
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq 'd' ) { |
6543
|
8
|
|
|
|
|
15
|
$j++; |
6544
|
|
|
|
|
|
|
} |
6545
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq 'w' ) { |
6546
|
71
|
|
|
|
|
180
|
$j++; |
6547
|
|
|
|
|
|
|
} |
6548
|
|
|
|
|
|
|
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { |
6549
|
0
|
|
|
|
|
0
|
$j++; |
6550
|
|
|
|
|
|
|
} |
6551
|
|
|
|
|
|
|
else { |
6552
|
|
|
|
|
|
|
# none of the above |
6553
|
|
|
|
|
|
|
} |
6554
|
163
|
100
|
|
|
|
509
|
if ( $j > $jbeg ) { |
6555
|
|
|
|
|
|
|
|
6556
|
80
|
100
|
|
|
|
329
|
$j++ if $pre_types[$j] eq 'b'; |
6557
|
|
|
|
|
|
|
|
6558
|
|
|
|
|
|
|
# Patched for RT #95708 |
6559
|
80
|
100
|
33
|
|
|
716
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
6560
|
|
|
|
|
|
|
|
6561
|
|
|
|
|
|
|
# it is a comma which is not a pattern delimiter except for qw |
6562
|
|
|
|
|
|
|
( |
6563
|
|
|
|
|
|
|
$pre_types[$j] eq ',' |
6564
|
|
|
|
|
|
|
## !~ /^(s|m|y|tr|qr|q|qq|qx)$/ |
6565
|
|
|
|
|
|
|
&& !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] } |
6566
|
|
|
|
|
|
|
) |
6567
|
|
|
|
|
|
|
|
6568
|
|
|
|
|
|
|
# or a => |
6569
|
|
|
|
|
|
|
|| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) |
6570
|
|
|
|
|
|
|
) |
6571
|
|
|
|
|
|
|
{ |
6572
|
18
|
|
|
|
|
45
|
$code_block_type = EMPTY_STRING; |
6573
|
|
|
|
|
|
|
} |
6574
|
|
|
|
|
|
|
} |
6575
|
|
|
|
|
|
|
|
6576
|
163
|
100
|
|
|
|
541
|
if ($code_block_type) { |
6577
|
|
|
|
|
|
|
|
6578
|
|
|
|
|
|
|
# Patch for cases b1085 b1128: It is uncertain if this is a block. |
6579
|
|
|
|
|
|
|
# If this brace follows a bareword, then append a space as a signal |
6580
|
|
|
|
|
|
|
# to the formatter that this may not be a block brace. To find the |
6581
|
|
|
|
|
|
|
# corresponding code in Formatter.pm search for 'b1085'. |
6582
|
145
|
100
|
|
|
|
1258
|
$code_block_type .= SPACE if ( $code_block_type =~ /^\w/ ); |
6583
|
|
|
|
|
|
|
} |
6584
|
|
|
|
|
|
|
} |
6585
|
|
|
|
|
|
|
|
6586
|
168
|
|
|
|
|
608
|
return $code_block_type; |
6587
|
|
|
|
|
|
|
} ## end sub decide_if_code_block |
6588
|
|
|
|
|
|
|
|
6589
|
|
|
|
|
|
|
sub report_unexpected { |
6590
|
|
|
|
|
|
|
|
6591
|
|
|
|
|
|
|
# report unexpected token type and show where it is |
6592
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
6593
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, |
6594
|
|
|
|
|
|
|
$rpretoken_type, $input_line ) |
6595
|
|
|
|
|
|
|
= @_; |
6596
|
|
|
|
|
|
|
|
6597
|
0
|
0
|
|
|
|
0
|
if ( ++$self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) { |
6598
|
0
|
|
|
|
|
0
|
my $msg = "found $found where $expecting expected"; |
6599
|
0
|
|
|
|
|
0
|
my $pos = $rpretoken_map->[$i_tok]; |
6600
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
6601
|
0
|
|
|
|
|
0
|
my $input_line_number = $self->[_last_line_number_]; |
6602
|
0
|
|
|
|
|
0
|
my ( $offset, $numbered_line, $underline ) = |
6603
|
|
|
|
|
|
|
make_numbered_line( $input_line_number, $input_line, $pos ); |
6604
|
0
|
|
|
|
|
0
|
$underline = write_on_underline( $underline, $pos - $offset, '^' ); |
6605
|
|
|
|
|
|
|
|
6606
|
0
|
|
|
|
|
0
|
my $trailer = EMPTY_STRING; |
6607
|
0
|
0
|
0
|
|
|
0
|
if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { |
6608
|
0
|
|
|
|
|
0
|
my $pos_prev = $rpretoken_map->[$last_nonblank_i]; |
6609
|
0
|
|
|
|
|
0
|
my $num; |
6610
|
0
|
0
|
|
|
|
0
|
if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) { |
6611
|
0
|
|
|
|
|
0
|
$num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev; |
6612
|
|
|
|
|
|
|
} |
6613
|
|
|
|
|
|
|
else { |
6614
|
0
|
|
|
|
|
0
|
$num = $pos - $pos_prev; |
6615
|
|
|
|
|
|
|
} |
6616
|
0
|
0
|
|
|
|
0
|
if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
6617
|
|
|
|
|
|
|
|
6618
|
|
|
|
|
|
|
$underline = |
6619
|
0
|
|
|
|
|
0
|
write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); |
6620
|
0
|
|
|
|
|
0
|
$trailer = " (previous token underlined)"; |
6621
|
|
|
|
|
|
|
} |
6622
|
0
|
|
|
|
|
0
|
$underline =~ s/\s+$//; |
6623
|
0
|
|
|
|
|
0
|
$self->warning( $numbered_line . "\n" ); |
6624
|
0
|
|
|
|
|
0
|
$self->warning( $underline . "\n" ); |
6625
|
0
|
|
|
|
|
0
|
$self->warning( $msg . $trailer . "\n" ); |
6626
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
6627
|
|
|
|
|
|
|
} |
6628
|
0
|
|
|
|
|
0
|
return; |
6629
|
|
|
|
|
|
|
} ## end sub report_unexpected |
6630
|
|
|
|
|
|
|
|
6631
|
|
|
|
|
|
|
my %is_sigil_or_paren; |
6632
|
|
|
|
|
|
|
my %is_R_closing_sb; |
6633
|
|
|
|
|
|
|
|
6634
|
|
|
|
|
|
|
BEGIN { |
6635
|
|
|
|
|
|
|
|
6636
|
39
|
|
|
39
|
|
308
|
my @q = qw< $ & % * @ ) >; |
6637
|
39
|
|
|
|
|
317
|
@{is_sigil_or_paren}{@q} = (1) x scalar(@q); |
6638
|
|
|
|
|
|
|
|
6639
|
39
|
|
|
|
|
153
|
@q = qw(R ]); |
6640
|
39
|
|
|
|
|
87832
|
@{is_R_closing_sb}{@q} = (1) x scalar(@q); |
6641
|
|
|
|
|
|
|
} ## end BEGIN |
6642
|
|
|
|
|
|
|
|
6643
|
|
|
|
|
|
|
sub is_non_structural_brace { |
6644
|
|
|
|
|
|
|
|
6645
|
|
|
|
|
|
|
# Decide if a brace or bracket is structural or non-structural |
6646
|
|
|
|
|
|
|
# by looking at the previous token and type |
6647
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token |
6648
|
|
|
|
|
|
|
|
6649
|
|
|
|
|
|
|
# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. |
6650
|
|
|
|
|
|
|
# Tentatively deactivated because it caused the wrong operator expectation |
6651
|
|
|
|
|
|
|
# for this code: |
6652
|
|
|
|
|
|
|
# $user = @vars[1] / 100; |
6653
|
|
|
|
|
|
|
# Must update sub operator_expected before re-implementing. |
6654
|
|
|
|
|
|
|
# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { |
6655
|
|
|
|
|
|
|
# return 0; |
6656
|
|
|
|
|
|
|
# } |
6657
|
|
|
|
|
|
|
|
6658
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6659
|
|
|
|
|
|
|
# NOTE: braces after type characters start code blocks, but for |
6660
|
|
|
|
|
|
|
# simplicity these are not identified as such. See also |
6661
|
|
|
|
|
|
|
# sub code_block_type |
6662
|
|
|
|
|
|
|
#-------------------------------------------------------------- |
6663
|
|
|
|
|
|
|
|
6664
|
|
|
|
|
|
|
##if ($last_nonblank_type eq 't') {return 0} |
6665
|
|
|
|
|
|
|
|
6666
|
|
|
|
|
|
|
# otherwise, it is non-structural if it is decorated |
6667
|
|
|
|
|
|
|
# by type information. |
6668
|
|
|
|
|
|
|
# For example, the '{' here is non-structural: ${xxx} |
6669
|
|
|
|
|
|
|
# Removed '::' to fix c074 |
6670
|
|
|
|
|
|
|
## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ |
6671
|
|
|
|
|
|
|
return ( |
6672
|
|
|
|
|
|
|
## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->)/ |
6673
|
|
|
|
|
|
|
$is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) } |
6674
|
|
|
|
|
|
|
|| substr( $last_nonblank_token, 0, 2 ) eq '->' |
6675
|
|
|
|
|
|
|
|
6676
|
|
|
|
|
|
|
# or if we follow a hash or array closing curly brace or bracket |
6677
|
|
|
|
|
|
|
# For example, the second '{' in this is non-structural: $a{'x'}{'y'} |
6678
|
|
|
|
|
|
|
# because the first '}' would have been given type 'R' |
6679
|
|
|
|
|
|
|
##|| $last_nonblank_type =~ /^([R\]])$/ |
6680
|
2262
|
|
66
|
2262
|
0
|
14710
|
|| $is_R_closing_sb{$last_nonblank_type} |
6681
|
|
|
|
|
|
|
); |
6682
|
|
|
|
|
|
|
} ## end sub is_non_structural_brace |
6683
|
|
|
|
|
|
|
|
6684
|
|
|
|
|
|
|
####################################################################### |
6685
|
|
|
|
|
|
|
# Tokenizer routines for tracking container nesting depths |
6686
|
|
|
|
|
|
|
####################################################################### |
6687
|
|
|
|
|
|
|
|
6688
|
|
|
|
|
|
|
# The following routines keep track of nesting depths of the nesting |
6689
|
|
|
|
|
|
|
# types, ( [ { and ?. This is necessary for determining the indentation |
6690
|
|
|
|
|
|
|
# level, and also for debugging programs. Not only do they keep track of |
6691
|
|
|
|
|
|
|
# nesting depths of the individual brace types, but they check that each |
6692
|
|
|
|
|
|
|
# of the other brace types is balanced within matching pairs. For |
6693
|
|
|
|
|
|
|
# example, if the program sees this sequence: |
6694
|
|
|
|
|
|
|
# |
6695
|
|
|
|
|
|
|
# { ( ( ) } |
6696
|
|
|
|
|
|
|
# |
6697
|
|
|
|
|
|
|
# then it can determine that there is an extra left paren somewhere |
6698
|
|
|
|
|
|
|
# between the { and the }. And so on with every other possible |
6699
|
|
|
|
|
|
|
# combination of outer and inner brace types. For another |
6700
|
|
|
|
|
|
|
# example: |
6701
|
|
|
|
|
|
|
# |
6702
|
|
|
|
|
|
|
# ( [ ..... ] ] ) |
6703
|
|
|
|
|
|
|
# |
6704
|
|
|
|
|
|
|
# which has an extra ] within the parens. |
6705
|
|
|
|
|
|
|
# |
6706
|
|
|
|
|
|
|
# The brace types have indexes 0 .. 3 which are indexes into |
6707
|
|
|
|
|
|
|
# the matrices. |
6708
|
|
|
|
|
|
|
# |
6709
|
|
|
|
|
|
|
# The pair ? : are treated as just another nesting type, with ? acting |
6710
|
|
|
|
|
|
|
# as the opening brace and : acting as the closing brace. |
6711
|
|
|
|
|
|
|
# |
6712
|
|
|
|
|
|
|
# The matrix |
6713
|
|
|
|
|
|
|
# |
6714
|
|
|
|
|
|
|
# $rdepth_array->[$a][$b][ $rcurrent_depth->[$a] ] = $rcurrent_depth->[$b]; |
6715
|
|
|
|
|
|
|
# |
6716
|
|
|
|
|
|
|
# saves the nesting depth of brace type $b (where $b is either of the other |
6717
|
|
|
|
|
|
|
# nesting types) when brace type $a enters a new depth. When this depth |
6718
|
|
|
|
|
|
|
# decreases, a check is made that the current depth of brace types $b is |
6719
|
|
|
|
|
|
|
# unchanged, or otherwise there must have been an error. This can |
6720
|
|
|
|
|
|
|
# be very useful for localizing errors, particularly when perl runs to |
6721
|
|
|
|
|
|
|
# the end of a large file (such as this one) and announces that there |
6722
|
|
|
|
|
|
|
# is a problem somewhere. |
6723
|
|
|
|
|
|
|
# |
6724
|
|
|
|
|
|
|
# A numerical sequence number is maintained for every nesting type, |
6725
|
|
|
|
|
|
|
# so that each matching pair can be uniquely identified in a simple |
6726
|
|
|
|
|
|
|
# way. |
6727
|
|
|
|
|
|
|
|
6728
|
|
|
|
|
|
|
sub increase_nesting_depth { |
6729
|
4574
|
|
|
4574
|
0
|
9142
|
my ( $self, $aa, $pos ) = @_; |
6730
|
|
|
|
|
|
|
|
6731
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, |
6732
|
|
|
|
|
|
|
# $rcurrent_sequence_number, $rdepth_array, |
6733
|
|
|
|
|
|
|
# $rstarting_line_of_current_depth, $statement_type |
6734
|
4574
|
|
|
|
|
8234
|
my $cd_aa = ++$rcurrent_depth->[$aa]; |
6735
|
4574
|
|
|
|
|
6591
|
$total_depth++; |
6736
|
4574
|
|
|
|
|
9142
|
$rtotal_depth->[$aa][$cd_aa] = $total_depth; |
6737
|
4574
|
|
|
|
|
7334
|
my $input_line_number = $self->[_last_line_number_]; |
6738
|
4574
|
|
|
|
|
7766
|
my $input_line = $self->[_line_of_text_]; |
6739
|
|
|
|
|
|
|
|
6740
|
|
|
|
|
|
|
# Sequence numbers increment by number of items. This keeps |
6741
|
|
|
|
|
|
|
# a unique set of numbers but still allows the relative location |
6742
|
|
|
|
|
|
|
# of any type to be determined. |
6743
|
|
|
|
|
|
|
|
6744
|
|
|
|
|
|
|
# make a new unique sequence number |
6745
|
4574
|
|
|
|
|
7684
|
my $seqno = $next_sequence_number++; |
6746
|
|
|
|
|
|
|
|
6747
|
4574
|
|
|
|
|
8626
|
$rcurrent_sequence_number->[$aa][$cd_aa] = $seqno; |
6748
|
|
|
|
|
|
|
|
6749
|
4574
|
|
|
|
|
14863
|
$rstarting_line_of_current_depth->[$aa][$cd_aa] = |
6750
|
|
|
|
|
|
|
[ $input_line_number, $input_line, $pos ]; |
6751
|
|
|
|
|
|
|
|
6752
|
4574
|
|
|
|
|
14300
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
6753
|
18296
|
100
|
|
|
|
33421
|
next if ( $bb == $aa ); |
6754
|
13722
|
|
|
|
|
26829
|
$rdepth_array->[$aa][$bb][$cd_aa] = $rcurrent_depth->[$bb]; |
6755
|
|
|
|
|
|
|
} |
6756
|
|
|
|
|
|
|
|
6757
|
|
|
|
|
|
|
# set a flag for indenting a nested ternary statement |
6758
|
4574
|
|
|
|
|
8023
|
my $indent = 0; |
6759
|
4574
|
100
|
|
|
|
10649
|
if ( $aa == QUESTION_COLON ) { |
6760
|
187
|
|
|
|
|
718
|
$rnested_ternary_flag->[$cd_aa] = 0; |
6761
|
187
|
100
|
|
|
|
723
|
if ( $cd_aa > 1 ) { |
6762
|
17
|
100
|
|
|
|
151
|
if ( $rnested_ternary_flag->[ $cd_aa - 1 ] == 0 ) { |
6763
|
16
|
|
|
|
|
66
|
my $pdepth = $rtotal_depth->[$aa][ $cd_aa - 1 ]; |
6764
|
16
|
100
|
|
|
|
68
|
if ( $pdepth == $total_depth - 1 ) { |
6765
|
8
|
|
|
|
|
18
|
$indent = 1; |
6766
|
8
|
|
|
|
|
22
|
$rnested_ternary_flag->[ $cd_aa - 1 ] = -1; |
6767
|
|
|
|
|
|
|
} |
6768
|
|
|
|
|
|
|
} |
6769
|
|
|
|
|
|
|
} |
6770
|
|
|
|
|
|
|
} |
6771
|
|
|
|
|
|
|
|
6772
|
|
|
|
|
|
|
# Fix part #1 for git82: save last token type for propagation of type 'Z' |
6773
|
4574
|
|
|
|
|
15681
|
$rnested_statement_type->[$aa][$cd_aa] = |
6774
|
|
|
|
|
|
|
[ $statement_type, $last_nonblank_type, $last_nonblank_token ]; |
6775
|
4574
|
|
|
|
|
7742
|
$statement_type = EMPTY_STRING; |
6776
|
4574
|
|
|
|
|
12467
|
return ( $seqno, $indent ); |
6777
|
|
|
|
|
|
|
} ## end sub increase_nesting_depth |
6778
|
|
|
|
|
|
|
|
6779
|
|
|
|
|
|
|
sub is_balanced_closing_container { |
6780
|
|
|
|
|
|
|
|
6781
|
|
|
|
|
|
|
# Return true if a closing container can go here without error |
6782
|
|
|
|
|
|
|
# Return false if not |
6783
|
47
|
|
|
47
|
0
|
150
|
my ($aa) = @_; |
6784
|
|
|
|
|
|
|
|
6785
|
|
|
|
|
|
|
# cannot close if there was no opening |
6786
|
47
|
|
|
|
|
90
|
my $cd_aa = $rcurrent_depth->[$aa]; |
6787
|
47
|
100
|
|
|
|
209
|
return if ( $cd_aa <= 0 ); |
6788
|
|
|
|
|
|
|
|
6789
|
|
|
|
|
|
|
# check that any other brace types $bb contained within would be balanced |
6790
|
8
|
|
|
|
|
26
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
6791
|
8
|
50
|
|
|
|
25
|
next if ( $bb == $aa ); |
6792
|
|
|
|
|
|
|
return |
6793
|
8
|
50
|
|
|
|
60
|
if ( $rdepth_array->[$aa][$bb][$cd_aa] != $rcurrent_depth->[$bb] ); |
6794
|
|
|
|
|
|
|
} |
6795
|
|
|
|
|
|
|
|
6796
|
|
|
|
|
|
|
# OK, everything will be balanced |
6797
|
0
|
|
|
|
|
0
|
return 1; |
6798
|
|
|
|
|
|
|
} ## end sub is_balanced_closing_container |
6799
|
|
|
|
|
|
|
|
6800
|
|
|
|
|
|
|
sub decrease_nesting_depth { |
6801
|
|
|
|
|
|
|
|
6802
|
4574
|
|
|
4574
|
0
|
9174
|
my ( $self, $aa, $pos ) = @_; |
6803
|
|
|
|
|
|
|
|
6804
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, |
6805
|
|
|
|
|
|
|
# $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth |
6806
|
|
|
|
|
|
|
# $statement_type |
6807
|
4574
|
|
|
|
|
7043
|
my $seqno = 0; |
6808
|
4574
|
|
|
|
|
7210
|
my $input_line_number = $self->[_last_line_number_]; |
6809
|
4574
|
|
|
|
|
7971
|
my $input_line = $self->[_line_of_text_]; |
6810
|
|
|
|
|
|
|
|
6811
|
4574
|
|
|
|
|
6950
|
my $outdent = 0; |
6812
|
4574
|
|
|
|
|
6565
|
$total_depth--; |
6813
|
4574
|
|
|
|
|
7947
|
my $cd_aa = $rcurrent_depth->[$aa]; |
6814
|
4574
|
50
|
|
|
|
9273
|
if ( $cd_aa > 0 ) { |
6815
|
|
|
|
|
|
|
|
6816
|
|
|
|
|
|
|
# set a flag for un-indenting after seeing a nested ternary statement |
6817
|
4574
|
|
|
|
|
8396
|
$seqno = $rcurrent_sequence_number->[$aa][$cd_aa]; |
6818
|
4574
|
100
|
|
|
|
9598
|
if ( $aa == QUESTION_COLON ) { |
6819
|
187
|
|
|
|
|
608
|
$outdent = $rnested_ternary_flag->[$cd_aa]; |
6820
|
|
|
|
|
|
|
} |
6821
|
|
|
|
|
|
|
|
6822
|
|
|
|
|
|
|
# Fix part #2 for git82: use saved type for propagation of type 'Z' |
6823
|
|
|
|
|
|
|
# through type L-R braces. Perl seems to allow ${bareword} |
6824
|
|
|
|
|
|
|
# as an indirect object, but nothing much more complex than that. |
6825
|
|
|
|
|
|
|
( $statement_type, my $saved_type, my $saved_token ) = |
6826
|
4574
|
|
|
|
|
6905
|
@{ $rnested_statement_type->[$aa][ $rcurrent_depth->[$aa] ] }; |
|
4574
|
|
|
|
|
12872
|
|
6827
|
4574
|
50
|
100
|
|
|
16045
|
if ( $aa == BRACE |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
6828
|
|
|
|
|
|
|
&& $saved_type eq 'Z' |
6829
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
6830
|
|
|
|
|
|
|
&& $rbrace_structural_type->[$brace_depth] eq 'L' ) |
6831
|
|
|
|
|
|
|
{ |
6832
|
1
|
|
|
|
|
3
|
$last_nonblank_type = $saved_type; |
6833
|
|
|
|
|
|
|
} |
6834
|
|
|
|
|
|
|
|
6835
|
|
|
|
|
|
|
# check that any brace types $bb contained within are balanced |
6836
|
4574
|
|
|
|
|
12633
|
for my $bb ( 0 .. @closing_brace_names - 1 ) { |
6837
|
18296
|
100
|
|
|
|
32271
|
next if ( $bb == $aa ); |
6838
|
|
|
|
|
|
|
|
6839
|
13722
|
50
|
|
|
|
31984
|
if ( $rdepth_array->[$aa][$bb][$cd_aa] != $rcurrent_depth->[$bb] ) { |
6840
|
0
|
|
|
|
|
0
|
my $diff = |
6841
|
|
|
|
|
|
|
$rcurrent_depth->[$bb] - $rdepth_array->[$aa][$bb][$cd_aa]; |
6842
|
|
|
|
|
|
|
|
6843
|
|
|
|
|
|
|
# don't whine too many times |
6844
|
0
|
|
|
|
|
0
|
my $saw_brace_error = $self->get_saw_brace_error(); |
6845
|
0
|
0
|
0
|
|
|
0
|
if ( |
|
|
|
0
|
|
|
|
|
6846
|
|
|
|
|
|
|
$saw_brace_error <= MAX_NAG_MESSAGES |
6847
|
|
|
|
|
|
|
|
6848
|
|
|
|
|
|
|
# if too many closing types have occurred, we probably |
6849
|
|
|
|
|
|
|
# already caught this error |
6850
|
|
|
|
|
|
|
&& ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) |
6851
|
|
|
|
|
|
|
) |
6852
|
|
|
|
|
|
|
{ |
6853
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
6854
|
0
|
|
|
|
|
0
|
my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa]; |
6855
|
0
|
|
|
|
|
0
|
my $sl = $rsl->[0]; |
6856
|
0
|
|
|
|
|
0
|
my $rel = [ $input_line_number, $input_line, $pos ]; |
6857
|
0
|
|
|
|
|
0
|
my $el = $rel->[0]; |
6858
|
0
|
|
|
|
|
0
|
my ($ess); |
6859
|
|
|
|
|
|
|
|
6860
|
0
|
0
|
0
|
|
|
0
|
if ( $diff == 1 || $diff == -1 ) { |
6861
|
0
|
|
|
|
|
0
|
$ess = EMPTY_STRING; |
6862
|
|
|
|
|
|
|
} |
6863
|
|
|
|
|
|
|
else { |
6864
|
0
|
|
|
|
|
0
|
$ess = 's'; |
6865
|
|
|
|
|
|
|
} |
6866
|
0
|
0
|
|
|
|
0
|
my $bname = |
6867
|
|
|
|
|
|
|
( $diff > 0 ) |
6868
|
|
|
|
|
|
|
? $opening_brace_names[$bb] |
6869
|
|
|
|
|
|
|
: $closing_brace_names[$bb]; |
6870
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rsl}, '^' ); |
|
0
|
|
|
|
|
0
|
|
6871
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
6872
|
|
|
|
|
|
|
Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el |
6873
|
|
|
|
|
|
|
EOM |
6874
|
|
|
|
|
|
|
|
6875
|
0
|
0
|
|
|
|
0
|
if ( $diff > 0 ) { |
6876
|
0
|
|
|
|
|
0
|
my $rml = |
6877
|
|
|
|
|
|
|
$rstarting_line_of_current_depth->[$bb] |
6878
|
|
|
|
|
|
|
[ $rcurrent_depth->[$bb] ]; |
6879
|
0
|
|
|
|
|
0
|
my $ml = $rml->[0]; |
6880
|
0
|
|
|
|
|
0
|
$msg .= |
6881
|
|
|
|
|
|
|
" The most recent un-matched $bname is on line $ml\n"; |
6882
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rml}, '^' ); |
|
0
|
|
|
|
|
0
|
|
6883
|
|
|
|
|
|
|
} |
6884
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( @{$rel}, '^' ); |
|
0
|
|
|
|
|
0
|
|
6885
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
6886
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
6887
|
|
|
|
|
|
|
} |
6888
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
6889
|
|
|
|
|
|
|
} |
6890
|
|
|
|
|
|
|
} |
6891
|
4574
|
|
|
|
|
8439
|
$rcurrent_depth->[$aa]--; |
6892
|
|
|
|
|
|
|
} |
6893
|
|
|
|
|
|
|
else { |
6894
|
|
|
|
|
|
|
|
6895
|
0
|
|
|
|
|
0
|
my $saw_brace_error = $self->get_saw_brace_error(); |
6896
|
0
|
0
|
|
|
|
0
|
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { |
6897
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
6898
|
|
|
|
|
|
|
There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number |
6899
|
|
|
|
|
|
|
EOM |
6900
|
0
|
|
|
|
|
0
|
$self->indicate_error( $msg, $input_line_number, $input_line, $pos, |
6901
|
|
|
|
|
|
|
'^' ); |
6902
|
|
|
|
|
|
|
} |
6903
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
6904
|
|
|
|
|
|
|
|
6905
|
|
|
|
|
|
|
# keep track of errors in braces alone (ignoring ternary nesting errors) |
6906
|
0
|
0
|
|
|
|
0
|
$self->[_true_brace_error_count_]++ |
6907
|
|
|
|
|
|
|
if ( $closing_brace_names[$aa] ne "':'" ); |
6908
|
|
|
|
|
|
|
} |
6909
|
4574
|
|
|
|
|
12458
|
return ( $seqno, $outdent ); |
6910
|
|
|
|
|
|
|
} ## end sub decrease_nesting_depth |
6911
|
|
|
|
|
|
|
|
6912
|
|
|
|
|
|
|
sub check_final_nesting_depths { |
6913
|
|
|
|
|
|
|
|
6914
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $rcurrent_depth, $rstarting_line_of_current_depth |
6915
|
561
|
|
|
561
|
0
|
1456
|
my $self = shift; |
6916
|
|
|
|
|
|
|
|
6917
|
561
|
|
|
|
|
2504
|
for my $aa ( 0 .. @closing_brace_names - 1 ) { |
6918
|
|
|
|
|
|
|
|
6919
|
2244
|
|
|
|
|
3897
|
my $cd_aa = $rcurrent_depth->[$aa]; |
6920
|
2244
|
50
|
|
|
|
5206
|
if ($cd_aa) { |
6921
|
0
|
|
|
|
|
0
|
my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa]; |
6922
|
0
|
|
|
|
|
0
|
my $sl = $rsl->[0]; |
6923
|
0
|
|
|
|
|
0
|
my $msg = <<"EOM"; |
6924
|
|
|
|
|
|
|
Final nesting depth of $opening_brace_names[$aa]s is $cd_aa |
6925
|
|
|
|
|
|
|
The most recent un-matched $opening_brace_names[$aa] is on line $sl |
6926
|
|
|
|
|
|
|
EOM |
6927
|
0
|
|
|
|
|
0
|
$self->indicate_error( $msg, @{$rsl}, '^' ); |
|
0
|
|
|
|
|
0
|
|
6928
|
0
|
|
|
|
|
0
|
$self->increment_brace_error(); |
6929
|
|
|
|
|
|
|
} |
6930
|
|
|
|
|
|
|
} |
6931
|
561
|
|
|
|
|
1613
|
return; |
6932
|
|
|
|
|
|
|
} ## end sub check_final_nesting_depths |
6933
|
|
|
|
|
|
|
|
6934
|
|
|
|
|
|
|
####################################################################### |
6935
|
|
|
|
|
|
|
# Tokenizer routines for looking ahead in input stream |
6936
|
|
|
|
|
|
|
####################################################################### |
6937
|
|
|
|
|
|
|
|
6938
|
|
|
|
|
|
|
sub peek_ahead_for_n_nonblank_pre_tokens { |
6939
|
|
|
|
|
|
|
|
6940
|
|
|
|
|
|
|
# returns next n pretokens if they exist |
6941
|
|
|
|
|
|
|
# returns undef's if hits eof without seeing any pretokens |
6942
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
6943
|
170
|
|
|
170
|
0
|
430
|
my ( $self, $max_pretokens ) = @_; |
6944
|
170
|
|
|
|
|
321
|
my $line; |
6945
|
170
|
|
|
|
|
310
|
my $i = 0; |
6946
|
170
|
|
|
|
|
405
|
my ( $rpre_tokens, $rmap, $rpre_types ); |
6947
|
|
|
|
|
|
|
|
6948
|
170
|
|
|
|
|
562
|
while ( $line = $self->peek_ahead( $i++ ) ) { |
6949
|
182
|
|
|
|
|
990
|
$line =~ s/^\s*//; # trim leading blanks |
6950
|
182
|
100
|
|
|
|
705
|
next if ( length($line) <= 0 ); # skip blank |
6951
|
176
|
100
|
|
|
|
642
|
next if ( $line =~ /^#/ ); # skip comment |
6952
|
162
|
|
|
|
|
514
|
( $rpre_tokens, $rmap, $rpre_types ) = |
6953
|
|
|
|
|
|
|
pre_tokenize( $line, $max_pretokens ); |
6954
|
162
|
|
|
|
|
469
|
last; |
6955
|
|
|
|
|
|
|
} |
6956
|
170
|
|
|
|
|
620
|
return ( $rpre_tokens, $rpre_types ); |
6957
|
|
|
|
|
|
|
} ## end sub peek_ahead_for_n_nonblank_pre_tokens |
6958
|
|
|
|
|
|
|
|
6959
|
|
|
|
|
|
|
# look ahead for next non-blank, non-comment line of code |
6960
|
|
|
|
|
|
|
sub peek_ahead_for_nonblank_token { |
6961
|
|
|
|
|
|
|
|
6962
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: (none) |
6963
|
125
|
|
|
125
|
0
|
386
|
my ( $self, $rtokens, $max_token_index ) = @_; |
6964
|
125
|
|
|
|
|
246
|
my $line; |
6965
|
125
|
|
|
|
|
262
|
my $i = 0; |
6966
|
|
|
|
|
|
|
|
6967
|
125
|
|
|
|
|
511
|
while ( $line = $self->peek_ahead( $i++ ) ) { |
6968
|
169
|
|
|
|
|
995
|
$line =~ s/^\s*//; # trim leading blanks |
6969
|
169
|
100
|
|
|
|
660
|
next if ( length($line) <= 0 ); # skip blank |
6970
|
144
|
100
|
|
|
|
682
|
next if ( $line =~ /^#/ ); # skip comment |
6971
|
|
|
|
|
|
|
|
6972
|
|
|
|
|
|
|
# Updated from 2 to 3 to get trigraphs, added for case b1175 |
6973
|
123
|
|
|
|
|
466
|
my ( $rtok, $rmap, $rtype ) = pre_tokenize( $line, 3 ); |
6974
|
123
|
|
|
|
|
429
|
my $j = $max_token_index + 1; |
6975
|
|
|
|
|
|
|
|
6976
|
123
|
|
|
|
|
297
|
foreach my $tok ( @{$rtok} ) { |
|
123
|
|
|
|
|
364
|
|
6977
|
355
|
100
|
|
|
|
1398
|
last if ( $tok =~ "\n" ); |
6978
|
320
|
|
|
|
|
835
|
$rtokens->[ ++$j ] = $tok; |
6979
|
|
|
|
|
|
|
} |
6980
|
123
|
|
|
|
|
491
|
last; |
6981
|
|
|
|
|
|
|
} |
6982
|
125
|
|
|
|
|
360
|
return; |
6983
|
|
|
|
|
|
|
} ## end sub peek_ahead_for_nonblank_token |
6984
|
|
|
|
|
|
|
|
6985
|
|
|
|
|
|
|
####################################################################### |
6986
|
|
|
|
|
|
|
# Tokenizer guessing routines for ambiguous situations |
6987
|
|
|
|
|
|
|
####################################################################### |
6988
|
|
|
|
|
|
|
|
6989
|
|
|
|
|
|
|
sub guess_if_pattern_or_conditional { |
6990
|
|
|
|
|
|
|
|
6991
|
|
|
|
|
|
|
# this routine is called when we have encountered a ? following an |
6992
|
|
|
|
|
|
|
# unknown bareword, and we must decide if it starts a pattern or not |
6993
|
|
|
|
|
|
|
# input parameters: |
6994
|
|
|
|
|
|
|
# $i - token index of the ? starting possible pattern |
6995
|
|
|
|
|
|
|
# output parameters: |
6996
|
|
|
|
|
|
|
# $is_pattern = 0 if probably not pattern, =1 if probably a pattern |
6997
|
|
|
|
|
|
|
# msg = a warning or diagnostic message |
6998
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
6999
|
|
|
|
|
|
|
|
7000
|
11
|
|
|
11
|
0
|
51
|
my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; |
7001
|
11
|
|
|
|
|
31
|
my $is_pattern = 0; |
7002
|
11
|
|
|
|
|
55
|
my $msg = "guessing that ? after $last_nonblank_token starts a "; |
7003
|
|
|
|
|
|
|
|
7004
|
11
|
50
|
|
|
|
46
|
if ( $i >= $max_token_index ) { |
7005
|
0
|
|
|
|
|
0
|
$msg .= "conditional (no end to pattern found on the line)\n"; |
7006
|
|
|
|
|
|
|
} |
7007
|
|
|
|
|
|
|
else { |
7008
|
11
|
|
|
|
|
29
|
my $ibeg = $i; |
7009
|
11
|
|
|
|
|
29
|
$i = $ibeg + 1; |
7010
|
11
|
|
|
|
|
33
|
my $next_token = $rtokens->[$i]; # first token after ? |
7011
|
|
|
|
|
|
|
|
7012
|
|
|
|
|
|
|
# look for a possible ending ? on this line.. |
7013
|
11
|
|
|
|
|
27
|
my $in_quote = 1; |
7014
|
11
|
|
|
|
|
23
|
my $quote_depth = 0; |
7015
|
11
|
|
|
|
|
23
|
my $quote_character = EMPTY_STRING; |
7016
|
11
|
|
|
|
|
25
|
my $quote_pos = 0; |
7017
|
11
|
|
|
|
|
25
|
my $quoted_string; |
7018
|
|
|
|
|
|
|
( |
7019
|
|
|
|
|
|
|
|
7020
|
11
|
|
|
|
|
57
|
$i, |
7021
|
|
|
|
|
|
|
$in_quote, |
7022
|
|
|
|
|
|
|
$quote_character, |
7023
|
|
|
|
|
|
|
$quote_pos, |
7024
|
|
|
|
|
|
|
$quote_depth, |
7025
|
|
|
|
|
|
|
$quoted_string, |
7026
|
|
|
|
|
|
|
|
7027
|
|
|
|
|
|
|
) = $self->follow_quoted_string( |
7028
|
|
|
|
|
|
|
|
7029
|
|
|
|
|
|
|
$ibeg, |
7030
|
|
|
|
|
|
|
$in_quote, |
7031
|
|
|
|
|
|
|
$rtokens, |
7032
|
|
|
|
|
|
|
$quote_character, |
7033
|
|
|
|
|
|
|
$quote_pos, |
7034
|
|
|
|
|
|
|
$quote_depth, |
7035
|
|
|
|
|
|
|
$max_token_index, |
7036
|
|
|
|
|
|
|
|
7037
|
|
|
|
|
|
|
); |
7038
|
|
|
|
|
|
|
|
7039
|
11
|
50
|
|
|
|
71
|
if ($in_quote) { |
7040
|
|
|
|
|
|
|
|
7041
|
|
|
|
|
|
|
# we didn't find an ending ? on this line, |
7042
|
|
|
|
|
|
|
# so we bias towards conditional |
7043
|
11
|
|
|
|
|
32
|
$is_pattern = 0; |
7044
|
11
|
|
|
|
|
38
|
$msg .= "conditional (no ending ? on this line)\n"; |
7045
|
|
|
|
|
|
|
|
7046
|
|
|
|
|
|
|
# we found an ending ?, so we bias towards a pattern |
7047
|
|
|
|
|
|
|
} |
7048
|
|
|
|
|
|
|
else { |
7049
|
|
|
|
|
|
|
|
7050
|
|
|
|
|
|
|
# Watch out for an ending ? in quotes, like this |
7051
|
|
|
|
|
|
|
# my $case_flag = File::Spec->case_tolerant ? '(?i)' : ''; |
7052
|
0
|
|
|
|
|
0
|
my $s_quote = 0; |
7053
|
0
|
|
|
|
|
0
|
my $d_quote = 0; |
7054
|
0
|
|
|
|
|
0
|
my $colons = 0; |
7055
|
0
|
|
|
|
|
0
|
foreach my $ii ( $ibeg + 1 .. $i - 1 ) { |
7056
|
0
|
|
|
|
|
0
|
my $tok = $rtokens->[$ii]; |
7057
|
0
|
0
|
|
|
|
0
|
if ( $tok eq ":" ) { $colons++ } |
|
0
|
|
|
|
|
0
|
|
7058
|
0
|
0
|
|
|
|
0
|
if ( $tok eq "'" ) { $s_quote++ } |
|
0
|
|
|
|
|
0
|
|
7059
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '"' ) { $d_quote++ } |
|
0
|
|
|
|
|
0
|
|
7060
|
|
|
|
|
|
|
} |
7061
|
0
|
0
|
0
|
|
|
0
|
if ( $s_quote % 2 || $d_quote % 2 || $colons ) { |
|
|
0
|
0
|
|
|
|
|
7062
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7063
|
0
|
|
|
|
|
0
|
$msg .= "found ending ? but unbalanced quote chars\n"; |
7064
|
|
|
|
|
|
|
} |
7065
|
|
|
|
|
|
|
elsif ( |
7066
|
|
|
|
|
|
|
$self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) |
7067
|
|
|
|
|
|
|
{ |
7068
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7069
|
0
|
|
|
|
|
0
|
$msg .= "pattern (found ending ? and pattern expected)\n"; |
7070
|
|
|
|
|
|
|
} |
7071
|
|
|
|
|
|
|
else { |
7072
|
0
|
|
|
|
|
0
|
$msg .= "pattern (uncertain, but found ending ?)\n"; |
7073
|
|
|
|
|
|
|
} |
7074
|
|
|
|
|
|
|
} |
7075
|
|
|
|
|
|
|
} |
7076
|
11
|
|
|
|
|
41
|
return ( $is_pattern, $msg ); |
7077
|
|
|
|
|
|
|
} ## end sub guess_if_pattern_or_conditional |
7078
|
|
|
|
|
|
|
|
7079
|
|
|
|
|
|
|
my %is_known_constant; |
7080
|
|
|
|
|
|
|
my %is_known_function; |
7081
|
|
|
|
|
|
|
|
7082
|
|
|
|
|
|
|
BEGIN { |
7083
|
|
|
|
|
|
|
|
7084
|
|
|
|
|
|
|
# Constants like 'pi' in Trig.pm are common |
7085
|
39
|
|
|
39
|
|
266
|
my @q = qw(pi pi2 pi4 pip2 pip4); |
7086
|
39
|
|
|
|
|
265
|
@{is_known_constant}{@q} = (1) x scalar(@q); |
7087
|
|
|
|
|
|
|
|
7088
|
|
|
|
|
|
|
# parenless calls of 'ok' are common |
7089
|
39
|
|
|
|
|
140
|
@q = qw( ok ); |
7090
|
39
|
|
|
|
|
70432
|
@{is_known_function}{@q} = (1) x scalar(@q); |
7091
|
|
|
|
|
|
|
} ## end BEGIN |
7092
|
|
|
|
|
|
|
|
7093
|
|
|
|
|
|
|
sub guess_if_pattern_or_division { |
7094
|
|
|
|
|
|
|
|
7095
|
|
|
|
|
|
|
# this routine is called when we have encountered a / following an |
7096
|
|
|
|
|
|
|
# unknown bareword, and we must decide if it starts a pattern or is a |
7097
|
|
|
|
|
|
|
# division |
7098
|
|
|
|
|
|
|
# input parameters: |
7099
|
|
|
|
|
|
|
# $i - token index of the / starting possible pattern |
7100
|
|
|
|
|
|
|
# output parameters: |
7101
|
|
|
|
|
|
|
# $is_pattern = 0 if probably division, =1 if probably a pattern |
7102
|
|
|
|
|
|
|
# msg = a warning or diagnostic message |
7103
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $last_nonblank_token |
7104
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; |
7105
|
0
|
|
|
|
|
0
|
my $is_pattern = 0; |
7106
|
0
|
|
|
|
|
0
|
my $msg = "guessing that / after $last_nonblank_token starts a "; |
7107
|
|
|
|
|
|
|
|
7108
|
0
|
0
|
|
|
|
0
|
if ( $i >= $max_token_index ) { |
7109
|
0
|
|
|
|
|
0
|
$msg .= "division (no end to pattern found on the line)\n"; |
7110
|
|
|
|
|
|
|
} |
7111
|
|
|
|
|
|
|
else { |
7112
|
0
|
|
|
|
|
0
|
my $ibeg = $i; |
7113
|
0
|
|
|
|
|
0
|
my $divide_possible = |
7114
|
|
|
|
|
|
|
$self->is_possible_numerator( $i, $rtokens, $max_token_index ); |
7115
|
|
|
|
|
|
|
|
7116
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible < 0 ) { |
7117
|
0
|
|
|
|
|
0
|
$msg = "pattern (division not possible here)\n"; |
7118
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7119
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
7120
|
|
|
|
|
|
|
} |
7121
|
|
|
|
|
|
|
|
7122
|
0
|
|
|
|
|
0
|
$i = $ibeg + 1; |
7123
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[$i]; # first token after slash |
7124
|
|
|
|
|
|
|
|
7125
|
|
|
|
|
|
|
# One of the things we can look at is the spacing around the slash. |
7126
|
|
|
|
|
|
|
# There # are four possible spacings around the first slash: |
7127
|
|
|
|
|
|
|
# |
7128
|
|
|
|
|
|
|
# return pi/two;#/; -/- |
7129
|
|
|
|
|
|
|
# return pi/ two;#/; -/+ |
7130
|
|
|
|
|
|
|
# return pi / two;#/; +/+ |
7131
|
|
|
|
|
|
|
# return pi /two;#/; +/- <-- possible pattern |
7132
|
|
|
|
|
|
|
# |
7133
|
|
|
|
|
|
|
# Spacing rule: a space before the slash but not after the slash |
7134
|
|
|
|
|
|
|
# usually indicates a pattern. We can use this to break ties. |
7135
|
|
|
|
|
|
|
|
7136
|
0
|
|
0
|
|
|
0
|
my $is_pattern_by_spacing = |
7137
|
|
|
|
|
|
|
( $i > 1 && $next_token !~ m/^\s/ && $rtokens->[ $i - 2 ] =~ m/^\s/ ); |
7138
|
|
|
|
|
|
|
|
7139
|
|
|
|
|
|
|
# look for a possible ending / on this line.. |
7140
|
0
|
|
|
|
|
0
|
my $in_quote = 1; |
7141
|
0
|
|
|
|
|
0
|
my $quote_depth = 0; |
7142
|
0
|
|
|
|
|
0
|
my $quote_character = EMPTY_STRING; |
7143
|
0
|
|
|
|
|
0
|
my $quote_pos = 0; |
7144
|
0
|
|
|
|
|
0
|
my $quoted_string; |
7145
|
|
|
|
|
|
|
( |
7146
|
0
|
|
|
|
|
0
|
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, |
7147
|
|
|
|
|
|
|
$quoted_string |
7148
|
|
|
|
|
|
|
) |
7149
|
|
|
|
|
|
|
= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens, |
7150
|
|
|
|
|
|
|
$quote_character, $quote_pos, $quote_depth, $max_token_index ); |
7151
|
|
|
|
|
|
|
|
7152
|
0
|
0
|
|
|
|
0
|
if ($in_quote) { |
7153
|
|
|
|
|
|
|
|
7154
|
|
|
|
|
|
|
# we didn't find an ending / on this line, so we bias towards |
7155
|
|
|
|
|
|
|
# division |
7156
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible >= 0 ) { |
7157
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7158
|
0
|
|
|
|
|
0
|
$msg .= "division (no ending / on this line)\n"; |
7159
|
|
|
|
|
|
|
} |
7160
|
|
|
|
|
|
|
else { |
7161
|
|
|
|
|
|
|
|
7162
|
|
|
|
|
|
|
# assuming a multi-line pattern ... this is risky, but division |
7163
|
|
|
|
|
|
|
# does not seem possible. If this fails, it would either be due |
7164
|
|
|
|
|
|
|
# to a syntax error in the code, or the division_expected logic |
7165
|
|
|
|
|
|
|
# needs to be fixed. |
7166
|
0
|
|
|
|
|
0
|
$msg = "multi-line pattern (division not possible)\n"; |
7167
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7168
|
|
|
|
|
|
|
} |
7169
|
|
|
|
|
|
|
} |
7170
|
|
|
|
|
|
|
|
7171
|
|
|
|
|
|
|
# we found an ending /, so we bias slightly towards a pattern |
7172
|
|
|
|
|
|
|
else { |
7173
|
|
|
|
|
|
|
|
7174
|
0
|
|
|
|
|
0
|
my $pattern_expected = |
7175
|
|
|
|
|
|
|
$self->pattern_expected( $i, $rtokens, $max_token_index ); |
7176
|
|
|
|
|
|
|
|
7177
|
0
|
0
|
|
|
|
0
|
if ( $pattern_expected >= 0 ) { |
7178
|
|
|
|
|
|
|
|
7179
|
|
|
|
|
|
|
# pattern looks possible... |
7180
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible >= 0 ) { |
7181
|
|
|
|
|
|
|
|
7182
|
|
|
|
|
|
|
# Both pattern and divide can work here... |
7183
|
|
|
|
|
|
|
|
7184
|
|
|
|
|
|
|
# Increase weight of divide if a pure number follows |
7185
|
0
|
|
|
|
|
0
|
$divide_possible += $next_token =~ /^\d+$/; |
7186
|
|
|
|
|
|
|
|
7187
|
|
|
|
|
|
|
# Check for known constants in the numerator, like 'pi' |
7188
|
0
|
0
|
|
|
|
0
|
if ( $is_known_constant{$last_nonblank_token} ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7189
|
0
|
|
|
|
|
0
|
$msg .= |
7190
|
|
|
|
|
|
|
"division (pattern works too but saw known constant '$last_nonblank_token')\n"; |
7191
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7192
|
|
|
|
|
|
|
} |
7193
|
|
|
|
|
|
|
|
7194
|
|
|
|
|
|
|
# A very common bare word in pattern expressions is 'ok' |
7195
|
|
|
|
|
|
|
elsif ( $is_known_function{$last_nonblank_token} ) { |
7196
|
0
|
|
|
|
|
0
|
$msg .= |
7197
|
|
|
|
|
|
|
"pattern (division works too but saw '$last_nonblank_token')\n"; |
7198
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7199
|
|
|
|
|
|
|
} |
7200
|
|
|
|
|
|
|
|
7201
|
|
|
|
|
|
|
# If one rule is more definite, use it |
7202
|
|
|
|
|
|
|
elsif ( $divide_possible > $pattern_expected ) { |
7203
|
0
|
|
|
|
|
0
|
$msg .= |
7204
|
|
|
|
|
|
|
"division (more likely based on following tokens)\n"; |
7205
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7206
|
|
|
|
|
|
|
} |
7207
|
|
|
|
|
|
|
|
7208
|
|
|
|
|
|
|
# otherwise, use the spacing rule |
7209
|
|
|
|
|
|
|
elsif ($is_pattern_by_spacing) { |
7210
|
0
|
|
|
|
|
0
|
$msg .= |
7211
|
|
|
|
|
|
|
"pattern (guess on spacing, but division possible too)\n"; |
7212
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7213
|
|
|
|
|
|
|
} |
7214
|
|
|
|
|
|
|
else { |
7215
|
0
|
|
|
|
|
0
|
$msg .= |
7216
|
|
|
|
|
|
|
"division (guess on spacing, but pattern is possible too)\n"; |
7217
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7218
|
|
|
|
|
|
|
} |
7219
|
|
|
|
|
|
|
} |
7220
|
|
|
|
|
|
|
|
7221
|
|
|
|
|
|
|
# divide_possible < 0 means divide can not work here |
7222
|
|
|
|
|
|
|
else { |
7223
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7224
|
0
|
|
|
|
|
0
|
$msg .= "pattern (division not possible)\n"; |
7225
|
|
|
|
|
|
|
} |
7226
|
|
|
|
|
|
|
} |
7227
|
|
|
|
|
|
|
|
7228
|
|
|
|
|
|
|
# pattern does not look possible... |
7229
|
|
|
|
|
|
|
else { |
7230
|
|
|
|
|
|
|
|
7231
|
0
|
0
|
|
|
|
0
|
if ( $divide_possible >= 0 ) { |
7232
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7233
|
0
|
|
|
|
|
0
|
$msg .= "division (pattern not possible)\n"; |
7234
|
|
|
|
|
|
|
} |
7235
|
|
|
|
|
|
|
|
7236
|
|
|
|
|
|
|
# Neither pattern nor divide look possible...go by spacing |
7237
|
|
|
|
|
|
|
else { |
7238
|
0
|
0
|
|
|
|
0
|
if ($is_pattern_by_spacing) { |
7239
|
0
|
|
|
|
|
0
|
$msg .= "pattern (guess on spacing)\n"; |
7240
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
7241
|
|
|
|
|
|
|
} |
7242
|
|
|
|
|
|
|
else { |
7243
|
0
|
|
|
|
|
0
|
$msg .= "division (guess on spacing)\n"; |
7244
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
7245
|
|
|
|
|
|
|
} |
7246
|
|
|
|
|
|
|
} |
7247
|
|
|
|
|
|
|
} |
7248
|
|
|
|
|
|
|
} |
7249
|
|
|
|
|
|
|
} |
7250
|
0
|
|
|
|
|
0
|
return ( $is_pattern, $msg ); |
7251
|
|
|
|
|
|
|
} ## end sub guess_if_pattern_or_division |
7252
|
|
|
|
|
|
|
|
7253
|
|
|
|
|
|
|
# try to resolve here-doc vs. shift by looking ahead for |
7254
|
|
|
|
|
|
|
# non-code or the end token (currently only looks for end token) |
7255
|
|
|
|
|
|
|
# returns 1 if it is probably a here doc, 0 if not |
7256
|
|
|
|
|
|
|
sub guess_if_here_doc { |
7257
|
|
|
|
|
|
|
|
7258
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $next_token ) = @_; |
7259
|
|
|
|
|
|
|
|
7260
|
|
|
|
|
|
|
# This is how many lines we will search for a target as part of the |
7261
|
|
|
|
|
|
|
# guessing strategy. It is a constant because there is probably |
7262
|
|
|
|
|
|
|
# little reason to change it. |
7263
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package $ris_constant, |
7264
|
0
|
|
|
|
|
0
|
my $HERE_DOC_WINDOW = 40; |
7265
|
|
|
|
|
|
|
|
7266
|
0
|
|
|
|
|
0
|
my $here_doc_expected = 0; |
7267
|
0
|
|
|
|
|
0
|
my $line; |
7268
|
0
|
|
|
|
|
0
|
my $k = 0; |
7269
|
0
|
|
|
|
|
0
|
my $msg = "checking <<"; |
7270
|
|
|
|
|
|
|
|
7271
|
0
|
|
|
|
|
0
|
while ( $line = $self->peek_ahead( $k++ ) ) { |
7272
|
0
|
|
|
|
|
0
|
chomp $line; |
7273
|
|
|
|
|
|
|
|
7274
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /^$next_token$/ ) { |
7275
|
0
|
|
|
|
|
0
|
$msg .= " -- found target $next_token ahead $k lines\n"; |
7276
|
0
|
|
|
|
|
0
|
$here_doc_expected = 1; # got it |
7277
|
0
|
|
|
|
|
0
|
last; |
7278
|
|
|
|
|
|
|
} |
7279
|
0
|
0
|
|
|
|
0
|
last if ( $k >= $HERE_DOC_WINDOW ); |
7280
|
|
|
|
|
|
|
} |
7281
|
|
|
|
|
|
|
|
7282
|
0
|
0
|
|
|
|
0
|
if ( !$here_doc_expected ) { |
7283
|
|
|
|
|
|
|
|
7284
|
0
|
0
|
|
|
|
0
|
if ( !defined($line) ) { |
7285
|
0
|
|
|
|
|
0
|
$here_doc_expected = -1; # hit eof without seeing target |
7286
|
0
|
|
|
|
|
0
|
$msg .= " -- must be shift; target $next_token not in file\n"; |
7287
|
|
|
|
|
|
|
|
7288
|
|
|
|
|
|
|
} |
7289
|
|
|
|
|
|
|
else { # still unsure..taking a wild guess |
7290
|
|
|
|
|
|
|
|
7291
|
0
|
0
|
|
|
|
0
|
if ( !$ris_constant->{$current_package}{$next_token} ) { |
7292
|
0
|
|
|
|
|
0
|
$here_doc_expected = 1; |
7293
|
0
|
|
|
|
|
0
|
$msg .= |
7294
|
|
|
|
|
|
|
" -- guessing it's a here-doc ($next_token not a constant)\n"; |
7295
|
|
|
|
|
|
|
} |
7296
|
|
|
|
|
|
|
else { |
7297
|
0
|
|
|
|
|
0
|
$msg .= |
7298
|
|
|
|
|
|
|
" -- guessing it's a shift ($next_token is a constant)\n"; |
7299
|
|
|
|
|
|
|
} |
7300
|
|
|
|
|
|
|
} |
7301
|
|
|
|
|
|
|
} |
7302
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry($msg); |
7303
|
0
|
|
|
|
|
0
|
return $here_doc_expected; |
7304
|
|
|
|
|
|
|
} ## end sub guess_if_here_doc |
7305
|
|
|
|
|
|
|
|
7306
|
|
|
|
|
|
|
####################################################################### |
7307
|
|
|
|
|
|
|
# Tokenizer Routines for scanning identifiers and related items |
7308
|
|
|
|
|
|
|
####################################################################### |
7309
|
|
|
|
|
|
|
|
7310
|
|
|
|
|
|
|
sub scan_bare_identifier_do { |
7311
|
|
|
|
|
|
|
|
7312
|
|
|
|
|
|
|
# this routine is called to scan a token starting with an alphanumeric |
7313
|
|
|
|
|
|
|
# variable or package separator, :: or '. |
7314
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, |
7315
|
|
|
|
|
|
|
# $last_nonblank_type, $rparen_type, $paren_depth |
7316
|
|
|
|
|
|
|
|
7317
|
1672
|
|
|
1672
|
0
|
4993
|
my ( $self, $input_line, $i, $tok, $type, $prototype, $rtoken_map, |
7318
|
|
|
|
|
|
|
$max_token_index ) |
7319
|
|
|
|
|
|
|
= @_; |
7320
|
1672
|
|
|
|
|
2747
|
my $i_begin = $i; |
7321
|
1672
|
|
|
|
|
2911
|
my $package = undef; |
7322
|
|
|
|
|
|
|
|
7323
|
1672
|
|
|
|
|
2599
|
my $i_beg = $i; |
7324
|
|
|
|
|
|
|
|
7325
|
|
|
|
|
|
|
# we have to back up one pretoken at a :: since each : is one pretoken |
7326
|
1672
|
100
|
|
|
|
4507
|
if ( $tok eq '::' ) { $i_beg-- } |
|
9
|
|
|
|
|
15
|
|
7327
|
1672
|
50
|
|
|
|
3941
|
if ( $tok eq '->' ) { $i_beg-- } |
|
0
|
|
|
|
|
0
|
|
7328
|
1672
|
|
|
|
|
3076
|
my $pos_beg = $rtoken_map->[$i_beg]; |
7329
|
1672
|
|
|
|
|
5048
|
pos($input_line) = $pos_beg; |
7330
|
|
|
|
|
|
|
|
7331
|
|
|
|
|
|
|
# Examples: |
7332
|
|
|
|
|
|
|
# A::B::C |
7333
|
|
|
|
|
|
|
# A:: |
7334
|
|
|
|
|
|
|
# ::A |
7335
|
|
|
|
|
|
|
# A'B |
7336
|
1672
|
50
|
|
|
|
12109
|
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { |
7337
|
|
|
|
|
|
|
|
7338
|
1672
|
|
|
|
|
3348
|
my $pos = pos($input_line); |
7339
|
1672
|
|
|
|
|
2832
|
my $numc = $pos - $pos_beg; |
7340
|
1672
|
|
|
|
|
3627
|
$tok = substr( $input_line, $pos_beg, $numc ); |
7341
|
|
|
|
|
|
|
|
7342
|
|
|
|
|
|
|
# type 'w' includes anything without leading type info |
7343
|
|
|
|
|
|
|
# ($,%,@,*) including something like abc::def::ghi |
7344
|
1672
|
|
|
|
|
2810
|
$type = 'w'; |
7345
|
|
|
|
|
|
|
|
7346
|
1672
|
|
|
|
|
2824
|
my $sub_name = EMPTY_STRING; |
7347
|
1672
|
100
|
|
|
|
4796
|
if ( defined($2) ) { $sub_name = $2; } |
|
1667
|
|
|
|
|
3207
|
|
7348
|
1672
|
100
|
|
|
|
4253
|
if ( defined($1) ) { |
7349
|
274
|
|
|
|
|
671
|
$package = $1; |
7350
|
|
|
|
|
|
|
|
7351
|
|
|
|
|
|
|
# patch: don't allow isolated package name which just ends |
7352
|
|
|
|
|
|
|
# in the old style package separator (single quote). Example: |
7353
|
|
|
|
|
|
|
# use CGI':all'; |
7354
|
274
|
50
|
66
|
|
|
1093
|
if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { |
7355
|
0
|
|
|
|
|
0
|
$pos--; |
7356
|
|
|
|
|
|
|
} |
7357
|
|
|
|
|
|
|
|
7358
|
274
|
|
|
|
|
788
|
$package =~ s/\'/::/g; |
7359
|
274
|
100
|
|
|
|
808
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
9
|
|
|
|
|
25
|
|
7360
|
274
|
|
|
|
|
1213
|
$package =~ s/::$//; |
7361
|
|
|
|
|
|
|
} |
7362
|
|
|
|
|
|
|
else { |
7363
|
1398
|
|
|
|
|
2646
|
$package = $current_package; |
7364
|
|
|
|
|
|
|
|
7365
|
|
|
|
|
|
|
# patched for c043, part 1: keyword does not follow '->' |
7366
|
1398
|
50
|
66
|
|
|
5514
|
if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) { |
7367
|
0
|
|
|
|
|
0
|
$type = 'k'; |
7368
|
|
|
|
|
|
|
} |
7369
|
|
|
|
|
|
|
} |
7370
|
|
|
|
|
|
|
|
7371
|
|
|
|
|
|
|
# if it is a bareword.. patched for c043, part 2: not following '->' |
7372
|
1672
|
100
|
66
|
|
|
7677
|
if ( $type eq 'w' && $last_nonblank_type ne '->' ) { |
7373
|
|
|
|
|
|
|
|
7374
|
|
|
|
|
|
|
# check for v-string with leading 'v' type character |
7375
|
|
|
|
|
|
|
# (This seems to have precedence over filehandle, type 'Y') |
7376
|
1003
|
100
|
66
|
|
|
14705
|
if ( $tok =~ /^v\d[_\d]*$/ ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
7377
|
|
|
|
|
|
|
|
7378
|
|
|
|
|
|
|
# we only have the first part - something like 'v101' - |
7379
|
|
|
|
|
|
|
# look for more |
7380
|
2
|
50
|
|
|
|
17
|
if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { |
7381
|
2
|
|
|
|
|
8
|
$pos = pos($input_line); |
7382
|
2
|
|
|
|
|
4
|
$numc = $pos - $pos_beg; |
7383
|
2
|
|
|
|
|
6
|
$tok = substr( $input_line, $pos_beg, $numc ); |
7384
|
|
|
|
|
|
|
} |
7385
|
2
|
|
|
|
|
6
|
$type = 'v'; |
7386
|
|
|
|
|
|
|
|
7387
|
|
|
|
|
|
|
# warn if this version can't handle v-strings |
7388
|
2
|
|
|
|
|
12
|
$self->report_v_string($tok); |
7389
|
|
|
|
|
|
|
} |
7390
|
|
|
|
|
|
|
|
7391
|
|
|
|
|
|
|
elsif ( $ris_constant->{$package}{$sub_name} ) { |
7392
|
12
|
|
|
|
|
52
|
$type = 'C'; |
7393
|
|
|
|
|
|
|
} |
7394
|
|
|
|
|
|
|
|
7395
|
|
|
|
|
|
|
# bareword after sort has implied empty prototype; for example: |
7396
|
|
|
|
|
|
|
# @sorted = sort numerically ( 53, 29, 11, 32, 7 ); |
7397
|
|
|
|
|
|
|
# This has priority over whatever the user has specified. |
7398
|
|
|
|
|
|
|
elsif ($last_nonblank_token eq 'sort' |
7399
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'k' ) |
7400
|
|
|
|
|
|
|
{ |
7401
|
1
|
|
|
|
|
4
|
$type = 'Z'; |
7402
|
|
|
|
|
|
|
} |
7403
|
|
|
|
|
|
|
|
7404
|
|
|
|
|
|
|
# Note: strangely, perl does not seem to really let you create |
7405
|
|
|
|
|
|
|
# functions which act like eval and do, in the sense that eval |
7406
|
|
|
|
|
|
|
# and do may have operators following the final }, but any operators |
7407
|
|
|
|
|
|
|
# that you create with prototype (&) apparently do not allow |
7408
|
|
|
|
|
|
|
# trailing operators, only terms. This seems strange. |
7409
|
|
|
|
|
|
|
# If this ever changes, here is the update |
7410
|
|
|
|
|
|
|
# to make perltidy behave accordingly: |
7411
|
|
|
|
|
|
|
|
7412
|
|
|
|
|
|
|
# elsif ( $ris_block_function->{$package}{$tok} ) { |
7413
|
|
|
|
|
|
|
# $tok='eval'; # patch to do braces like eval - doesn't work |
7414
|
|
|
|
|
|
|
# $type = 'k'; |
7415
|
|
|
|
|
|
|
#} |
7416
|
|
|
|
|
|
|
# TODO: This could become a separate type to allow for different |
7417
|
|
|
|
|
|
|
# future behavior: |
7418
|
|
|
|
|
|
|
elsif ( $ris_block_function->{$package}{$sub_name} ) { |
7419
|
0
|
|
|
|
|
0
|
$type = 'G'; |
7420
|
|
|
|
|
|
|
} |
7421
|
|
|
|
|
|
|
elsif ( $ris_block_list_function->{$package}{$sub_name} ) { |
7422
|
0
|
|
|
|
|
0
|
$type = 'G'; |
7423
|
|
|
|
|
|
|
} |
7424
|
|
|
|
|
|
|
elsif ( $ris_user_function->{$package}{$sub_name} ) { |
7425
|
6
|
|
|
|
|
16
|
$type = 'U'; |
7426
|
6
|
|
|
|
|
18
|
$prototype = $ruser_function_prototype->{$package}{$sub_name}; |
7427
|
|
|
|
|
|
|
} |
7428
|
|
|
|
|
|
|
|
7429
|
|
|
|
|
|
|
# check for indirect object |
7430
|
|
|
|
|
|
|
elsif ( |
7431
|
|
|
|
|
|
|
|
7432
|
|
|
|
|
|
|
# added 2001-03-27: must not be followed immediately by '(' |
7433
|
|
|
|
|
|
|
# see fhandle.t |
7434
|
|
|
|
|
|
|
( $input_line !~ m/\G\(/gc ) |
7435
|
|
|
|
|
|
|
|
7436
|
|
|
|
|
|
|
# and |
7437
|
|
|
|
|
|
|
&& ( |
7438
|
|
|
|
|
|
|
|
7439
|
|
|
|
|
|
|
# preceded by keyword like 'print', 'printf' and friends |
7440
|
|
|
|
|
|
|
$is_indirect_object_taker{$last_nonblank_token} |
7441
|
|
|
|
|
|
|
|
7442
|
|
|
|
|
|
|
# or preceded by something like 'print(' or 'printf(' |
7443
|
|
|
|
|
|
|
|| ( |
7444
|
|
|
|
|
|
|
( $last_nonblank_token eq '(' ) |
7445
|
|
|
|
|
|
|
&& $is_indirect_object_taker{ |
7446
|
|
|
|
|
|
|
$rparen_type->[$paren_depth] |
7447
|
|
|
|
|
|
|
} |
7448
|
|
|
|
|
|
|
|
7449
|
|
|
|
|
|
|
) |
7450
|
|
|
|
|
|
|
) |
7451
|
|
|
|
|
|
|
) |
7452
|
|
|
|
|
|
|
{ |
7453
|
|
|
|
|
|
|
|
7454
|
|
|
|
|
|
|
# may not be indirect object unless followed by a space; |
7455
|
|
|
|
|
|
|
# updated 2021-01-16 to consider newline to be a space. |
7456
|
|
|
|
|
|
|
# updated for case b990 to look for either ';' or space |
7457
|
4
|
50
|
33
|
|
|
130
|
if ( pos($input_line) == length($input_line) |
7458
|
|
|
|
|
|
|
|| $input_line =~ m/\G[;\s]/gc ) |
7459
|
|
|
|
|
|
|
{ |
7460
|
4
|
|
|
|
|
16
|
$type = 'Y'; |
7461
|
|
|
|
|
|
|
|
7462
|
|
|
|
|
|
|
# Abandon Hope ... |
7463
|
|
|
|
|
|
|
# Perl's indirect object notation is a very bad |
7464
|
|
|
|
|
|
|
# thing and can cause subtle bugs, especially for |
7465
|
|
|
|
|
|
|
# beginning programmers. And I haven't even been |
7466
|
|
|
|
|
|
|
# able to figure out a sane warning scheme which |
7467
|
|
|
|
|
|
|
# doesn't get in the way of good scripts. |
7468
|
|
|
|
|
|
|
|
7469
|
|
|
|
|
|
|
# Complain if a filehandle has any lower case |
7470
|
|
|
|
|
|
|
# letters. This is suggested good practice. |
7471
|
|
|
|
|
|
|
# Use 'sub_name' because something like |
7472
|
|
|
|
|
|
|
# main::MYHANDLE is ok for filehandle |
7473
|
4
|
100
|
|
|
|
22
|
if ( $sub_name =~ /[a-z]/ ) { |
7474
|
|
|
|
|
|
|
|
7475
|
|
|
|
|
|
|
# could be bug caused by older perltidy if |
7476
|
|
|
|
|
|
|
# followed by '(' |
7477
|
1
|
50
|
|
|
|
5
|
if ( $input_line =~ m/\G\s*\(/gc ) { |
7478
|
1
|
|
|
|
|
7
|
$self->complain( |
7479
|
|
|
|
|
|
|
"Caution: unknown word '$tok' in indirect object slot\n" |
7480
|
|
|
|
|
|
|
); |
7481
|
|
|
|
|
|
|
} |
7482
|
|
|
|
|
|
|
} |
7483
|
|
|
|
|
|
|
} |
7484
|
|
|
|
|
|
|
|
7485
|
|
|
|
|
|
|
# bareword not followed by a space -- may not be filehandle |
7486
|
|
|
|
|
|
|
# (may be function call defined in a 'use' statement) |
7487
|
|
|
|
|
|
|
else { |
7488
|
0
|
|
|
|
|
0
|
$type = 'Z'; |
7489
|
|
|
|
|
|
|
} |
7490
|
|
|
|
|
|
|
} |
7491
|
|
|
|
|
|
|
|
7492
|
|
|
|
|
|
|
# none of the above special types |
7493
|
|
|
|
|
|
|
else { |
7494
|
|
|
|
|
|
|
} |
7495
|
|
|
|
|
|
|
} |
7496
|
|
|
|
|
|
|
|
7497
|
|
|
|
|
|
|
# Now we must convert back from character position |
7498
|
|
|
|
|
|
|
# to pre_token index. |
7499
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but who knows |
7500
|
1672
|
|
|
|
|
2924
|
my $error; |
7501
|
1672
|
|
|
|
|
5133
|
( $i, $error ) = |
7502
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
7503
|
1672
|
50
|
|
|
|
4632
|
if ($error) { |
7504
|
0
|
|
|
|
|
0
|
$self->warning( |
7505
|
|
|
|
|
|
|
"scan_bare_identifier: Possibly invalid tokenization\n"); |
7506
|
|
|
|
|
|
|
} |
7507
|
|
|
|
|
|
|
} |
7508
|
|
|
|
|
|
|
|
7509
|
|
|
|
|
|
|
# no match but line not blank - could be syntax error |
7510
|
|
|
|
|
|
|
# perl will take '::' alone without complaint |
7511
|
|
|
|
|
|
|
else { |
7512
|
0
|
|
|
|
|
0
|
$type = 'w'; |
7513
|
|
|
|
|
|
|
|
7514
|
|
|
|
|
|
|
# change this warning to log message if it becomes annoying |
7515
|
0
|
|
|
|
|
0
|
$self->warning("didn't find identifier after leading ::\n"); |
7516
|
|
|
|
|
|
|
} |
7517
|
1672
|
|
|
|
|
6874
|
return ( $i, $tok, $type, $prototype ); |
7518
|
|
|
|
|
|
|
} ## end sub scan_bare_identifier_do |
7519
|
|
|
|
|
|
|
|
7520
|
|
|
|
|
|
|
sub scan_id_do { |
7521
|
|
|
|
|
|
|
|
7522
|
|
|
|
|
|
|
# This is the new scanner and will eventually replace scan_identifier. |
7523
|
|
|
|
|
|
|
# Only type 'sub' and 'package' are implemented. |
7524
|
|
|
|
|
|
|
# Token types $ * % @ & -> are not yet implemented. |
7525
|
|
|
|
|
|
|
# |
7526
|
|
|
|
|
|
|
# Scan identifier following a type token. |
7527
|
|
|
|
|
|
|
# The type of call depends on $id_scan_state: $id_scan_state = '' |
7528
|
|
|
|
|
|
|
# for starting call, in which case $tok must be the token defining |
7529
|
|
|
|
|
|
|
# the type. |
7530
|
|
|
|
|
|
|
# |
7531
|
|
|
|
|
|
|
# If the type token is the last nonblank token on the line, a value |
7532
|
|
|
|
|
|
|
# of $id_scan_state = $tok is returned, indicating that further |
7533
|
|
|
|
|
|
|
# calls must be made to get the identifier. If the type token is |
7534
|
|
|
|
|
|
|
# not the last nonblank token on the line, the identifier is |
7535
|
|
|
|
|
|
|
# scanned and handled and a value of '' is returned. |
7536
|
|
|
|
|
|
|
|
7537
|
331
|
|
|
331
|
0
|
1229
|
my ( $self, $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, |
7538
|
|
|
|
|
|
|
$max_token_index ) |
7539
|
|
|
|
|
|
|
= @_; |
7540
|
39
|
|
|
39
|
|
476
|
use constant DEBUG_NSCAN => 0; |
|
39
|
|
|
|
|
93
|
|
|
39
|
|
|
|
|
48307
|
|
7541
|
331
|
|
|
|
|
626
|
my $type = EMPTY_STRING; |
7542
|
331
|
|
|
|
|
725
|
my ( $i_beg, $pos_beg ); |
7543
|
|
|
|
|
|
|
|
7544
|
|
|
|
|
|
|
#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; |
7545
|
|
|
|
|
|
|
#my ($a,$b,$c) = caller; |
7546
|
|
|
|
|
|
|
#print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; |
7547
|
|
|
|
|
|
|
|
7548
|
|
|
|
|
|
|
# on re-entry, start scanning at first token on the line |
7549
|
331
|
100
|
|
|
|
1002
|
if ($id_scan_state) { |
7550
|
10
|
|
|
|
|
40
|
$i_beg = $i; |
7551
|
10
|
|
|
|
|
26
|
$type = EMPTY_STRING; |
7552
|
|
|
|
|
|
|
} |
7553
|
|
|
|
|
|
|
|
7554
|
|
|
|
|
|
|
# on initial entry, start scanning just after type token |
7555
|
|
|
|
|
|
|
else { |
7556
|
321
|
|
|
|
|
606
|
$i_beg = $i + 1; |
7557
|
321
|
|
|
|
|
599
|
$id_scan_state = $tok; |
7558
|
321
|
|
|
|
|
653
|
$type = 't'; |
7559
|
|
|
|
|
|
|
} |
7560
|
|
|
|
|
|
|
|
7561
|
|
|
|
|
|
|
# find $i_beg = index of next nonblank token, |
7562
|
|
|
|
|
|
|
# and handle empty lines |
7563
|
331
|
|
|
|
|
586
|
my $blank_line = 0; |
7564
|
331
|
|
|
|
|
751
|
my $next_nonblank_token = $rtokens->[$i_beg]; |
7565
|
331
|
100
|
|
|
|
933
|
if ( $i_beg > $max_token_index ) { |
7566
|
2
|
|
|
|
|
8
|
$blank_line = 1; |
7567
|
|
|
|
|
|
|
} |
7568
|
|
|
|
|
|
|
else { |
7569
|
|
|
|
|
|
|
|
7570
|
|
|
|
|
|
|
# only a '#' immediately after a '$' is not a comment |
7571
|
329
|
50
|
|
|
|
983
|
if ( $next_nonblank_token eq '#' ) { |
7572
|
0
|
0
|
|
|
|
0
|
if ( $tok ne '$' ) { |
7573
|
0
|
|
|
|
|
0
|
$blank_line = 1; |
7574
|
|
|
|
|
|
|
} |
7575
|
|
|
|
|
|
|
} |
7576
|
|
|
|
|
|
|
|
7577
|
329
|
100
|
|
|
|
1505
|
if ( $next_nonblank_token =~ /^\s/ ) { |
7578
|
309
|
|
|
|
|
1327
|
( $next_nonblank_token, $i_beg ) = |
7579
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i_beg, $rtokens, |
7580
|
|
|
|
|
|
|
$max_token_index ); |
7581
|
309
|
100
|
|
|
|
1728
|
if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { |
7582
|
4
|
|
|
|
|
12
|
$blank_line = 1; |
7583
|
|
|
|
|
|
|
} |
7584
|
|
|
|
|
|
|
} |
7585
|
|
|
|
|
|
|
} |
7586
|
|
|
|
|
|
|
|
7587
|
|
|
|
|
|
|
# handle non-blank line; identifier, if any, must follow |
7588
|
331
|
100
|
|
|
|
1041
|
if ( !$blank_line ) { |
7589
|
|
|
|
|
|
|
|
7590
|
325
|
100
|
|
|
|
1019
|
if ( $is_sub{$id_scan_state} ) { |
|
|
50
|
|
|
|
|
|
7591
|
299
|
|
|
|
|
3299
|
( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub( |
7592
|
|
|
|
|
|
|
{ |
7593
|
|
|
|
|
|
|
input_line => $input_line, |
7594
|
|
|
|
|
|
|
i => $i, |
7595
|
|
|
|
|
|
|
i_beg => $i_beg, |
7596
|
|
|
|
|
|
|
tok => $tok, |
7597
|
|
|
|
|
|
|
type => $type, |
7598
|
|
|
|
|
|
|
rtokens => $rtokens, |
7599
|
|
|
|
|
|
|
rtoken_map => $rtoken_map, |
7600
|
|
|
|
|
|
|
id_scan_state => $id_scan_state, |
7601
|
|
|
|
|
|
|
max_token_index => $max_token_index, |
7602
|
|
|
|
|
|
|
} |
7603
|
|
|
|
|
|
|
); |
7604
|
|
|
|
|
|
|
} |
7605
|
|
|
|
|
|
|
|
7606
|
|
|
|
|
|
|
elsif ( $is_package{$id_scan_state} ) { |
7607
|
26
|
|
|
|
|
102
|
( $i, $tok, $type ) = |
7608
|
|
|
|
|
|
|
$self->do_scan_package( $input_line, $i, $i_beg, $tok, $type, |
7609
|
|
|
|
|
|
|
$rtokens, $rtoken_map, $max_token_index ); |
7610
|
26
|
|
|
|
|
64
|
$id_scan_state = EMPTY_STRING; |
7611
|
|
|
|
|
|
|
} |
7612
|
|
|
|
|
|
|
|
7613
|
|
|
|
|
|
|
else { |
7614
|
0
|
|
|
|
|
0
|
$self->warning("invalid token in scan_id: $tok\n"); |
7615
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
7616
|
|
|
|
|
|
|
} |
7617
|
|
|
|
|
|
|
} |
7618
|
|
|
|
|
|
|
|
7619
|
331
|
50
|
33
|
|
|
1921
|
if ( $id_scan_state && ( !defined($type) || !$type ) ) { |
|
|
|
66
|
|
|
|
|
7620
|
|
|
|
|
|
|
|
7621
|
|
|
|
|
|
|
# shouldn't happen: |
7622
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
7623
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
7624
|
|
|
|
|
|
|
Program bug in scan_id: undefined type but scan_state=$id_scan_state |
7625
|
|
|
|
|
|
|
EOM |
7626
|
|
|
|
|
|
|
} |
7627
|
|
|
|
|
|
|
$self->warning( |
7628
|
0
|
|
|
|
|
0
|
"Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n" |
7629
|
|
|
|
|
|
|
); |
7630
|
0
|
|
|
|
|
0
|
$self->report_definite_bug(); |
7631
|
|
|
|
|
|
|
} |
7632
|
|
|
|
|
|
|
|
7633
|
331
|
|
|
|
|
541
|
DEBUG_NSCAN && do { |
7634
|
|
|
|
|
|
|
print {*STDOUT} |
7635
|
|
|
|
|
|
|
"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; |
7636
|
|
|
|
|
|
|
}; |
7637
|
331
|
|
|
|
|
1390
|
return ( $i, $tok, $type, $id_scan_state ); |
7638
|
|
|
|
|
|
|
} ## end sub scan_id_do |
7639
|
|
|
|
|
|
|
|
7640
|
|
|
|
|
|
|
sub check_prototype { |
7641
|
137
|
|
|
137
|
0
|
419
|
my ( $proto, $package, $subname ) = @_; |
7642
|
137
|
50
|
|
|
|
451
|
return if ( !defined($package) ); |
7643
|
137
|
50
|
|
|
|
442
|
return if ( !defined($subname) ); |
7644
|
137
|
100
|
|
|
|
393
|
if ( defined($proto) ) { |
7645
|
34
|
|
|
|
|
157
|
$proto =~ s/^\s*\(\s*//; |
7646
|
34
|
|
|
|
|
131
|
$proto =~ s/\s*\)$//; |
7647
|
34
|
100
|
|
|
|
96
|
if ($proto) { |
7648
|
5
|
|
|
|
|
20
|
$ris_user_function->{$package}{$subname} = 1; |
7649
|
5
|
|
|
|
|
18
|
$ruser_function_prototype->{$package}{$subname} = "($proto)"; |
7650
|
|
|
|
|
|
|
|
7651
|
|
|
|
|
|
|
# prototypes containing '&' must be treated specially.. |
7652
|
5
|
100
|
|
|
|
36
|
if ( $proto =~ /\&/ ) { |
7653
|
|
|
|
|
|
|
|
7654
|
|
|
|
|
|
|
# right curly braces of prototypes ending in |
7655
|
|
|
|
|
|
|
# '&' may be followed by an operator |
7656
|
1
|
50
|
|
|
|
22
|
if ( $proto =~ /\&$/ ) { |
7657
|
0
|
|
|
|
|
0
|
$ris_block_function->{$package}{$subname} = 1; |
7658
|
|
|
|
|
|
|
} |
7659
|
|
|
|
|
|
|
|
7660
|
|
|
|
|
|
|
# right curly braces of prototypes NOT ending in |
7661
|
|
|
|
|
|
|
# '&' may NOT be followed by an operator |
7662
|
|
|
|
|
|
|
else { |
7663
|
1
|
|
|
|
|
4
|
$ris_block_list_function->{$package}{$subname} = 1; |
7664
|
|
|
|
|
|
|
} |
7665
|
|
|
|
|
|
|
} |
7666
|
|
|
|
|
|
|
} |
7667
|
|
|
|
|
|
|
else { |
7668
|
29
|
|
|
|
|
107
|
$ris_constant->{$package}{$subname} = 1; |
7669
|
|
|
|
|
|
|
} |
7670
|
|
|
|
|
|
|
} |
7671
|
|
|
|
|
|
|
else { |
7672
|
103
|
|
|
|
|
392
|
$ris_user_function->{$package}{$subname} = 1; |
7673
|
|
|
|
|
|
|
} |
7674
|
137
|
|
|
|
|
324
|
return; |
7675
|
|
|
|
|
|
|
} ## end sub check_prototype |
7676
|
|
|
|
|
|
|
|
7677
|
|
|
|
|
|
|
sub do_scan_package { |
7678
|
|
|
|
|
|
|
|
7679
|
|
|
|
|
|
|
# do_scan_package parses a package name |
7680
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of the first nonblank |
7681
|
|
|
|
|
|
|
# token following a 'package' token. |
7682
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: $current_package, |
7683
|
|
|
|
|
|
|
|
7684
|
|
|
|
|
|
|
# package NAMESPACE |
7685
|
|
|
|
|
|
|
# package NAMESPACE VERSION |
7686
|
|
|
|
|
|
|
# package NAMESPACE BLOCK |
7687
|
|
|
|
|
|
|
# package NAMESPACE VERSION BLOCK |
7688
|
|
|
|
|
|
|
# |
7689
|
|
|
|
|
|
|
# If VERSION is provided, package sets the $VERSION variable in the given |
7690
|
|
|
|
|
|
|
# namespace to a version object with the VERSION provided. VERSION must be |
7691
|
|
|
|
|
|
|
# a "strict" style version number as defined by the version module: a |
7692
|
|
|
|
|
|
|
# positive decimal number (integer or decimal-fraction) without |
7693
|
|
|
|
|
|
|
# exponentiation or else a dotted-decimal v-string with a leading 'v' |
7694
|
|
|
|
|
|
|
# character and at least three components. |
7695
|
|
|
|
|
|
|
# reference http://perldoc.perl.org/functions/package.html |
7696
|
|
|
|
|
|
|
|
7697
|
|
|
|
|
|
|
my ( |
7698
|
26
|
|
|
26
|
0
|
83
|
$self, $input_line, $i, |
7699
|
|
|
|
|
|
|
$i_beg, $tok, $type, |
7700
|
|
|
|
|
|
|
$rtokens, $rtoken_map, $max_token_index |
7701
|
|
|
|
|
|
|
) = @_; |
7702
|
26
|
|
|
|
|
54
|
my $package = undef; |
7703
|
26
|
|
|
|
|
51
|
my $pos_beg = $rtoken_map->[$i_beg]; |
7704
|
26
|
|
|
|
|
78
|
pos($input_line) = $pos_beg; |
7705
|
|
|
|
|
|
|
|
7706
|
|
|
|
|
|
|
# handle non-blank line; package name, if any, must follow |
7707
|
26
|
50
|
|
|
|
193
|
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w*)/gc ) { |
7708
|
26
|
|
|
|
|
73
|
$package = $1; |
7709
|
26
|
50
|
33
|
|
|
161
|
$package = ( defined($1) && $1 ) ? $1 : 'main'; |
7710
|
26
|
|
|
|
|
70
|
$package =~ s/\'/::/g; |
7711
|
26
|
50
|
|
|
|
78
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
0
|
|
|
|
|
0
|
|
7712
|
26
|
|
|
|
|
55
|
$package =~ s/::$//; |
7713
|
26
|
|
|
|
|
50
|
my $pos = pos($input_line); |
7714
|
26
|
|
|
|
|
54
|
my $numc = $pos - $pos_beg; |
7715
|
26
|
|
|
|
|
72
|
$tok = 'package ' . substr( $input_line, $pos_beg, $numc ); |
7716
|
26
|
|
|
|
|
45
|
$type = 'P'; # Fix for c250, previously 'i' |
7717
|
|
|
|
|
|
|
|
7718
|
|
|
|
|
|
|
# Now we must convert back from character position |
7719
|
|
|
|
|
|
|
# to pre_token index. |
7720
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but ? |
7721
|
26
|
|
|
|
|
47
|
my $error; |
7722
|
26
|
|
|
|
|
95
|
( $i, $error ) = |
7723
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
7724
|
26
|
50
|
|
|
|
76
|
if ($error) { $self->warning("Possibly invalid package\n") } |
|
0
|
|
|
|
|
0
|
|
7725
|
26
|
|
|
|
|
64
|
$current_package = $package; |
7726
|
|
|
|
|
|
|
|
7727
|
|
|
|
|
|
|
# we should now have package NAMESPACE |
7728
|
|
|
|
|
|
|
# now expecting VERSION, BLOCK, or ; to follow ... |
7729
|
|
|
|
|
|
|
# package NAMESPACE VERSION |
7730
|
|
|
|
|
|
|
# package NAMESPACE BLOCK |
7731
|
|
|
|
|
|
|
# package NAMESPACE VERSION BLOCK |
7732
|
26
|
|
|
|
|
85
|
my ( $next_nonblank_token, $i_next ) = |
7733
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
7734
|
|
|
|
|
|
|
|
7735
|
|
|
|
|
|
|
# check that something recognizable follows, but do not parse. |
7736
|
|
|
|
|
|
|
# A VERSION number will be parsed later as a number or v-string in the |
7737
|
|
|
|
|
|
|
# normal way. What is important is to set the statement type if |
7738
|
|
|
|
|
|
|
# everything looks okay so that the operator_expected() routine |
7739
|
|
|
|
|
|
|
# knows that the number is in a package statement. |
7740
|
|
|
|
|
|
|
# Examples of valid primitive tokens that might follow are: |
7741
|
|
|
|
|
|
|
# 1235 . ; { } v3 v |
7742
|
|
|
|
|
|
|
# FIX: added a '#' since a side comment may also follow |
7743
|
|
|
|
|
|
|
# Added ':' for class attributes (for --use-feature=class, rt145706) |
7744
|
26
|
50
|
|
|
|
111
|
if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#\:])|v\d|\d+$/ ) { |
7745
|
26
|
|
|
|
|
66
|
$statement_type = $tok; |
7746
|
|
|
|
|
|
|
} |
7747
|
|
|
|
|
|
|
else { |
7748
|
0
|
|
|
|
|
0
|
$self->warning( |
7749
|
|
|
|
|
|
|
"Unexpected '$next_nonblank_token' after package name '$tok'\n" |
7750
|
|
|
|
|
|
|
); |
7751
|
|
|
|
|
|
|
} |
7752
|
|
|
|
|
|
|
} |
7753
|
|
|
|
|
|
|
|
7754
|
|
|
|
|
|
|
# no match but line not blank -- |
7755
|
|
|
|
|
|
|
# could be a label with name package, like package: , for example. |
7756
|
|
|
|
|
|
|
else { |
7757
|
0
|
|
|
|
|
0
|
$type = 'k'; |
7758
|
|
|
|
|
|
|
} |
7759
|
|
|
|
|
|
|
|
7760
|
26
|
|
|
|
|
96
|
return ( $i, $tok, $type ); |
7761
|
|
|
|
|
|
|
} ## end sub do_scan_package |
7762
|
|
|
|
|
|
|
|
7763
|
|
|
|
|
|
|
{ ## begin closure for sub scan_complex_identifier |
7764
|
|
|
|
|
|
|
|
7765
|
39
|
|
|
39
|
|
399
|
use constant DEBUG_SCAN_ID => 0; |
|
39
|
|
|
|
|
91
|
|
|
39
|
|
|
|
|
5113
|
|
7766
|
|
|
|
|
|
|
|
7767
|
|
|
|
|
|
|
# Constant hash: |
7768
|
|
|
|
|
|
|
my %is_special_variable_char; |
7769
|
|
|
|
|
|
|
|
7770
|
|
|
|
|
|
|
BEGIN { |
7771
|
|
|
|
|
|
|
|
7772
|
|
|
|
|
|
|
# These are the only characters which can (currently) form special |
7773
|
|
|
|
|
|
|
# variables, like $^W: (issue c066). |
7774
|
39
|
|
|
39
|
|
422
|
my @q = |
7775
|
|
|
|
|
|
|
qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ }; |
7776
|
39
|
|
|
|
|
146390
|
@{is_special_variable_char}{@q} = (1) x scalar(@q); |
7777
|
|
|
|
|
|
|
} ## end BEGIN |
7778
|
|
|
|
|
|
|
|
7779
|
|
|
|
|
|
|
# These are the possible states for this scanner: |
7780
|
|
|
|
|
|
|
my $scan_state_SIGIL = '$'; |
7781
|
|
|
|
|
|
|
my $scan_state_ALPHA = 'A'; |
7782
|
|
|
|
|
|
|
my $scan_state_COLON = ':'; |
7783
|
|
|
|
|
|
|
my $scan_state_LPAREN = '('; |
7784
|
|
|
|
|
|
|
my $scan_state_RPAREN = ')'; |
7785
|
|
|
|
|
|
|
my $scan_state_AMPERSAND = '&'; |
7786
|
|
|
|
|
|
|
my $scan_state_SPLIT = '^'; |
7787
|
|
|
|
|
|
|
|
7788
|
|
|
|
|
|
|
# Only these non-blank states may be returned to caller: |
7789
|
|
|
|
|
|
|
my %is_returnable_scan_state = ( |
7790
|
|
|
|
|
|
|
$scan_state_SIGIL => 1, |
7791
|
|
|
|
|
|
|
$scan_state_AMPERSAND => 1, |
7792
|
|
|
|
|
|
|
); |
7793
|
|
|
|
|
|
|
|
7794
|
|
|
|
|
|
|
# USES GLOBAL VARIABLES: |
7795
|
|
|
|
|
|
|
# $context, $last_nonblank_token, $last_nonblank_type |
7796
|
|
|
|
|
|
|
|
7797
|
|
|
|
|
|
|
#----------- |
7798
|
|
|
|
|
|
|
# call args: |
7799
|
|
|
|
|
|
|
#----------- |
7800
|
|
|
|
|
|
|
my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, |
7801
|
|
|
|
|
|
|
$expecting, $container_type ); |
7802
|
|
|
|
|
|
|
|
7803
|
|
|
|
|
|
|
#------------------------------------------- |
7804
|
|
|
|
|
|
|
# my variables, re-initialized on each call: |
7805
|
|
|
|
|
|
|
#------------------------------------------- |
7806
|
|
|
|
|
|
|
my $i_begin; # starting index $i |
7807
|
|
|
|
|
|
|
my $type; # returned identifier type |
7808
|
|
|
|
|
|
|
my $tok_begin; # starting token |
7809
|
|
|
|
|
|
|
my $tok; # returned token |
7810
|
|
|
|
|
|
|
my $id_scan_state_begin; # starting scan state |
7811
|
|
|
|
|
|
|
my $identifier_begin; # starting identifier |
7812
|
|
|
|
|
|
|
my $i_save; # a last good index, in case of error |
7813
|
|
|
|
|
|
|
my $message; # hold error message for log file |
7814
|
|
|
|
|
|
|
my $tok_is_blank; |
7815
|
|
|
|
|
|
|
my $last_tok_is_blank; |
7816
|
|
|
|
|
|
|
my $in_prototype_or_signature; |
7817
|
|
|
|
|
|
|
my $saw_alpha; |
7818
|
|
|
|
|
|
|
my $saw_type; |
7819
|
|
|
|
|
|
|
my $allow_tick; |
7820
|
|
|
|
|
|
|
|
7821
|
|
|
|
|
|
|
sub initialize_my_scan_id_vars { |
7822
|
|
|
|
|
|
|
|
7823
|
|
|
|
|
|
|
# Initialize all 'my' vars on entry |
7824
|
486
|
|
|
486
|
0
|
873
|
$i_begin = $i; |
7825
|
486
|
|
|
|
|
1894
|
$type = EMPTY_STRING; |
7826
|
486
|
|
|
|
|
922
|
$tok_begin = $rtokens->[$i_begin]; |
7827
|
486
|
|
|
|
|
1126
|
$tok = $tok_begin; |
7828
|
486
|
50
|
|
|
|
1611
|
if ( $tok_begin eq ':' ) { $tok_begin = '::' } |
|
0
|
|
|
|
|
0
|
|
7829
|
486
|
|
|
|
|
849
|
$id_scan_state_begin = $id_scan_state; |
7830
|
486
|
|
|
|
|
821
|
$identifier_begin = $identifier; |
7831
|
486
|
|
|
|
|
831
|
$i_save = undef; |
7832
|
|
|
|
|
|
|
|
7833
|
486
|
|
|
|
|
855
|
$message = EMPTY_STRING; |
7834
|
486
|
|
|
|
|
822
|
$tok_is_blank = undef; # a flag to speed things up |
7835
|
486
|
|
|
|
|
738
|
$last_tok_is_blank = undef; |
7836
|
|
|
|
|
|
|
|
7837
|
486
|
|
100
|
|
|
1850
|
$in_prototype_or_signature = |
7838
|
|
|
|
|
|
|
$container_type && $container_type =~ /^sub\b/; |
7839
|
|
|
|
|
|
|
|
7840
|
|
|
|
|
|
|
# these flags will be used to help figure out the type: |
7841
|
486
|
|
|
|
|
755
|
$saw_alpha = undef; |
7842
|
486
|
|
|
|
|
751
|
$saw_type = undef; |
7843
|
|
|
|
|
|
|
|
7844
|
|
|
|
|
|
|
# allow old package separator (') except in 'use' statement |
7845
|
486
|
|
|
|
|
929
|
$allow_tick = ( $last_nonblank_token ne 'use' ); |
7846
|
486
|
|
|
|
|
824
|
return; |
7847
|
|
|
|
|
|
|
} ## end sub initialize_my_scan_id_vars |
7848
|
|
|
|
|
|
|
|
7849
|
|
|
|
|
|
|
#---------------------------------- |
7850
|
|
|
|
|
|
|
# Routines for handling scan states |
7851
|
|
|
|
|
|
|
#---------------------------------- |
7852
|
|
|
|
|
|
|
sub do_id_scan_state_dollar { |
7853
|
|
|
|
|
|
|
|
7854
|
514
|
|
|
514
|
0
|
882
|
my $self = shift; |
7855
|
|
|
|
|
|
|
|
7856
|
|
|
|
|
|
|
# We saw a sigil, now looking to start a variable name |
7857
|
514
|
100
|
66
|
|
|
4189
|
if ( $tok eq '$' ) { |
|
|
100
|
33
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
7858
|
|
|
|
|
|
|
|
7859
|
31
|
|
|
|
|
93
|
$identifier .= $tok; |
7860
|
|
|
|
|
|
|
|
7861
|
|
|
|
|
|
|
# we've got a punctuation variable if end of line (punct.t) |
7862
|
31
|
50
|
|
|
|
162
|
if ( $i == $max_token_index ) { |
7863
|
0
|
|
|
|
|
0
|
$type = 'i'; |
7864
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
7865
|
|
|
|
|
|
|
} |
7866
|
|
|
|
|
|
|
} |
7867
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { # alphanumeric .. |
7868
|
253
|
|
|
|
|
454
|
$saw_alpha = 1; |
7869
|
253
|
|
|
|
|
517
|
$identifier .= $tok; |
7870
|
|
|
|
|
|
|
|
7871
|
|
|
|
|
|
|
# now need :: except for special digit vars like '$1' (c208) |
7872
|
253
|
100
|
|
|
|
853
|
$id_scan_state = $tok =~ /^\d/ ? EMPTY_STRING : $scan_state_COLON; |
7873
|
|
|
|
|
|
|
} |
7874
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { |
7875
|
16
|
|
|
|
|
51
|
$id_scan_state = $scan_state_ALPHA; |
7876
|
16
|
|
|
|
|
40
|
$identifier .= $tok; |
7877
|
|
|
|
|
|
|
} |
7878
|
|
|
|
|
|
|
|
7879
|
|
|
|
|
|
|
# POSTDEFREF ->@ ->% ->& ->* |
7880
|
|
|
|
|
|
|
elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) { |
7881
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
7882
|
|
|
|
|
|
|
} |
7883
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. |
7884
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
7885
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
7886
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
7887
|
|
|
|
|
|
|
|
7888
|
|
|
|
|
|
|
# Perl will accept leading digits in identifiers, |
7889
|
|
|
|
|
|
|
# although they may not always produce useful results. |
7890
|
|
|
|
|
|
|
# Something like $main::0 is ok. But this also works: |
7891
|
|
|
|
|
|
|
# |
7892
|
|
|
|
|
|
|
# sub howdy::123::bubba{ print "bubba $54321!\n" } |
7893
|
|
|
|
|
|
|
# howdy::123::bubba(); |
7894
|
|
|
|
|
|
|
# |
7895
|
|
|
|
|
|
|
} |
7896
|
|
|
|
|
|
|
elsif ( $tok eq '#' ) { |
7897
|
|
|
|
|
|
|
|
7898
|
99
|
|
|
|
|
259
|
my $is_punct_var = $identifier eq '$$'; |
7899
|
|
|
|
|
|
|
|
7900
|
|
|
|
|
|
|
# side comment or identifier? |
7901
|
99
|
100
|
66
|
|
|
959
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
7902
|
|
|
|
|
|
|
|
7903
|
|
|
|
|
|
|
# A '#' starts a comment if it follows a space. For example, |
7904
|
|
|
|
|
|
|
# the following is equivalent to $ans=40. |
7905
|
|
|
|
|
|
|
# my $ # |
7906
|
|
|
|
|
|
|
# ans = 40; |
7907
|
|
|
|
|
|
|
!$last_tok_is_blank |
7908
|
|
|
|
|
|
|
|
7909
|
|
|
|
|
|
|
# a # inside a prototype or signature can only start a |
7910
|
|
|
|
|
|
|
# comment |
7911
|
|
|
|
|
|
|
&& !$in_prototype_or_signature |
7912
|
|
|
|
|
|
|
|
7913
|
|
|
|
|
|
|
# these are valid punctuation vars: *# %# @# $# |
7914
|
|
|
|
|
|
|
# May also be '$#array' or POSTDEFREF ->$# |
7915
|
|
|
|
|
|
|
&& ( $identifier =~ /^[\%\@\$\*]$/ |
7916
|
|
|
|
|
|
|
|| $identifier =~ /\$$/ ) |
7917
|
|
|
|
|
|
|
|
7918
|
|
|
|
|
|
|
# but a '#' after '$$' is a side comment; see c147 |
7919
|
|
|
|
|
|
|
&& !$is_punct_var |
7920
|
|
|
|
|
|
|
|
7921
|
|
|
|
|
|
|
) |
7922
|
|
|
|
|
|
|
{ |
7923
|
95
|
|
|
|
|
221
|
$identifier .= $tok; # keep same state, a $ could follow |
7924
|
|
|
|
|
|
|
} |
7925
|
|
|
|
|
|
|
else { |
7926
|
|
|
|
|
|
|
|
7927
|
|
|
|
|
|
|
# otherwise it is a side comment |
7928
|
4
|
50
|
|
|
|
20
|
if ( $identifier eq '->' ) { } |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
7929
|
0
|
|
|
|
|
0
|
elsif ($is_punct_var) { $type = 'i' } |
7930
|
4
|
|
|
|
|
7
|
elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' } |
7931
|
0
|
|
|
|
|
0
|
else { $type = 'i' } |
7932
|
4
|
|
|
|
|
7
|
$i = $i_save; |
7933
|
4
|
|
|
|
|
8
|
$id_scan_state = EMPTY_STRING; |
7934
|
|
|
|
|
|
|
} |
7935
|
|
|
|
|
|
|
} |
7936
|
|
|
|
|
|
|
|
7937
|
|
|
|
|
|
|
elsif ( $tok eq '{' ) { |
7938
|
|
|
|
|
|
|
|
7939
|
|
|
|
|
|
|
# check for something like ${#} or ${?}, where ? is a special char |
7940
|
38
|
100
|
100
|
|
|
596
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
7941
|
|
|
|
|
|
|
( |
7942
|
|
|
|
|
|
|
$identifier eq '$' |
7943
|
|
|
|
|
|
|
|| $identifier eq '@' |
7944
|
|
|
|
|
|
|
|| $identifier eq '$#' |
7945
|
|
|
|
|
|
|
) |
7946
|
|
|
|
|
|
|
&& $i + 2 <= $max_token_index |
7947
|
|
|
|
|
|
|
&& $rtokens->[ $i + 2 ] eq '}' |
7948
|
|
|
|
|
|
|
&& $rtokens->[ $i + 1 ] !~ /[\s\w]/ |
7949
|
|
|
|
|
|
|
) |
7950
|
|
|
|
|
|
|
{ |
7951
|
1
|
|
|
|
|
4
|
my $next2 = $rtokens->[ $i + 2 ]; |
7952
|
1
|
|
|
|
|
6
|
my $next1 = $rtokens->[ $i + 1 ]; |
7953
|
1
|
|
|
|
|
6
|
$identifier .= $tok . $next1 . $next2; |
7954
|
1
|
|
|
|
|
3
|
$i += 2; |
7955
|
1
|
|
|
|
|
2
|
$id_scan_state = EMPTY_STRING; |
7956
|
|
|
|
|
|
|
} |
7957
|
|
|
|
|
|
|
else { |
7958
|
|
|
|
|
|
|
|
7959
|
|
|
|
|
|
|
# skip something like ${xxx} or ->{ |
7960
|
37
|
|
|
|
|
86
|
$id_scan_state = EMPTY_STRING; |
7961
|
|
|
|
|
|
|
|
7962
|
|
|
|
|
|
|
# if this is the first token of a line, any tokens for this |
7963
|
|
|
|
|
|
|
# identifier have already been accumulated |
7964
|
37
|
100
|
66
|
|
|
163
|
if ( $identifier eq '$' || $i == 0 ) { |
7965
|
26
|
|
|
|
|
49
|
$identifier = EMPTY_STRING; |
7966
|
|
|
|
|
|
|
} |
7967
|
37
|
|
|
|
|
77
|
$i = $i_save; |
7968
|
|
|
|
|
|
|
} |
7969
|
|
|
|
|
|
|
} |
7970
|
|
|
|
|
|
|
|
7971
|
|
|
|
|
|
|
# space ok after leading $ % * & @ |
7972
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { |
7973
|
|
|
|
|
|
|
|
7974
|
20
|
|
|
|
|
58
|
$tok_is_blank = 1; |
7975
|
|
|
|
|
|
|
|
7976
|
|
|
|
|
|
|
# note: an id with a leading '&' does not actually come this way |
7977
|
20
|
50
|
|
|
|
80
|
if ( $identifier =~ /^[\$\%\*\&\@]/ ) { |
|
|
0
|
|
|
|
|
|
7978
|
|
|
|
|
|
|
|
7979
|
20
|
100
|
|
|
|
69
|
if ( length($identifier) > 1 ) { |
7980
|
8
|
|
|
|
|
15
|
$id_scan_state = EMPTY_STRING; |
7981
|
8
|
|
|
|
|
16
|
$i = $i_save; |
7982
|
8
|
|
|
|
|
20
|
$type = 'i'; # probably punctuation variable |
7983
|
|
|
|
|
|
|
} |
7984
|
|
|
|
|
|
|
else { |
7985
|
|
|
|
|
|
|
|
7986
|
|
|
|
|
|
|
# fix c139: trim line-ending type 't' |
7987
|
12
|
100
|
|
|
|
60
|
if ( $i == $max_token_index ) { |
|
|
100
|
|
|
|
|
|
7988
|
1
|
|
|
|
|
3
|
$i = $i_save; |
7989
|
1
|
|
|
|
|
3
|
$type = 't'; |
7990
|
|
|
|
|
|
|
} |
7991
|
|
|
|
|
|
|
|
7992
|
|
|
|
|
|
|
# spaces after $'s are common, and space after @ |
7993
|
|
|
|
|
|
|
# is harmless, so only complain about space |
7994
|
|
|
|
|
|
|
# after other type characters. Space after $ and |
7995
|
|
|
|
|
|
|
# @ will be removed in formatting. Report space |
7996
|
|
|
|
|
|
|
# after % and * because they might indicate a |
7997
|
|
|
|
|
|
|
# parsing error. In other words '% ' might be a |
7998
|
|
|
|
|
|
|
# modulo operator. Delete this warning if it |
7999
|
|
|
|
|
|
|
# gets annoying. |
8000
|
|
|
|
|
|
|
elsif ( $identifier !~ /^[\@\$]$/ ) { |
8001
|
1
|
|
|
|
|
5
|
$message = |
8002
|
|
|
|
|
|
|
"Space in identifier, following $identifier\n"; |
8003
|
|
|
|
|
|
|
} |
8004
|
|
|
|
|
|
|
else { |
8005
|
|
|
|
|
|
|
## ok: silently accept space after '$' and '@' sigils |
8006
|
|
|
|
|
|
|
} |
8007
|
|
|
|
|
|
|
} |
8008
|
|
|
|
|
|
|
} |
8009
|
|
|
|
|
|
|
|
8010
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
8011
|
|
|
|
|
|
|
|
8012
|
|
|
|
|
|
|
# space after '->' is ok except at line end .. |
8013
|
|
|
|
|
|
|
# so trim line-ending in type '->' (fixes c139) |
8014
|
0
|
0
|
|
|
|
0
|
if ( $i == $max_token_index ) { |
8015
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8016
|
0
|
|
|
|
|
0
|
$type = '->'; |
8017
|
|
|
|
|
|
|
} |
8018
|
|
|
|
|
|
|
} |
8019
|
|
|
|
|
|
|
|
8020
|
|
|
|
|
|
|
# stop at space after something other than -> or sigil |
8021
|
|
|
|
|
|
|
# Example of what can arrive here: |
8022
|
|
|
|
|
|
|
# eval { $MyClass->$$ }; |
8023
|
|
|
|
|
|
|
else { |
8024
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8025
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8026
|
0
|
|
|
|
|
0
|
$type = 'i'; |
8027
|
|
|
|
|
|
|
} |
8028
|
|
|
|
|
|
|
} |
8029
|
|
|
|
|
|
|
elsif ( $tok eq '^' ) { |
8030
|
|
|
|
|
|
|
|
8031
|
|
|
|
|
|
|
# check for some special variables like $^ $^W |
8032
|
11
|
50
|
|
|
|
50
|
if ( $identifier =~ /^[\$\*\@\%]$/ ) { |
8033
|
11
|
|
|
|
|
32
|
$identifier .= $tok; |
8034
|
11
|
|
|
|
|
23
|
$type = 'i'; |
8035
|
|
|
|
|
|
|
|
8036
|
|
|
|
|
|
|
# There may be one more character, not a space, after the ^ |
8037
|
11
|
|
|
|
|
31
|
my $next1 = $rtokens->[ $i + 1 ]; |
8038
|
11
|
|
|
|
|
29
|
my $chr = substr( $next1, 0, 1 ); |
8039
|
11
|
100
|
|
|
|
45
|
if ( $is_special_variable_char{$chr} ) { |
8040
|
|
|
|
|
|
|
|
8041
|
|
|
|
|
|
|
# It is something like $^W |
8042
|
|
|
|
|
|
|
# Test case (c066) : $^Oeq'linux' |
8043
|
9
|
|
|
|
|
19
|
$i++; |
8044
|
9
|
|
|
|
|
19
|
$identifier .= $next1; |
8045
|
|
|
|
|
|
|
|
8046
|
|
|
|
|
|
|
# If pretoken $next1 is more than one character long, |
8047
|
|
|
|
|
|
|
# set a flag indicating that it needs to be split. |
8048
|
9
|
100
|
|
|
|
38
|
$id_scan_state = |
8049
|
|
|
|
|
|
|
( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING; |
8050
|
|
|
|
|
|
|
} |
8051
|
|
|
|
|
|
|
else { |
8052
|
|
|
|
|
|
|
|
8053
|
|
|
|
|
|
|
# it is just $^ |
8054
|
|
|
|
|
|
|
# Simple test case (c065): '$aa=$^if($bb)'; |
8055
|
2
|
|
|
|
|
4
|
$id_scan_state = EMPTY_STRING; |
8056
|
|
|
|
|
|
|
} |
8057
|
|
|
|
|
|
|
} |
8058
|
|
|
|
|
|
|
else { |
8059
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8060
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8061
|
|
|
|
|
|
|
} |
8062
|
|
|
|
|
|
|
} |
8063
|
|
|
|
|
|
|
else { # something else |
8064
|
|
|
|
|
|
|
|
8065
|
46
|
100
|
66
|
|
|
418
|
if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8066
|
|
|
|
|
|
|
|
8067
|
|
|
|
|
|
|
# We might be in an extrusion of |
8068
|
|
|
|
|
|
|
# sub foo2 ( $first, $, $third ) { |
8069
|
|
|
|
|
|
|
# looking at a line starting with a comma, like |
8070
|
|
|
|
|
|
|
# $ |
8071
|
|
|
|
|
|
|
# , |
8072
|
|
|
|
|
|
|
# in this case the comma ends the signature variable |
8073
|
|
|
|
|
|
|
# '$' which will have been previously marked type 't' |
8074
|
|
|
|
|
|
|
# rather than 'i'. |
8075
|
3
|
100
|
|
|
|
12
|
if ( $i == $i_begin ) { |
8076
|
1
|
|
|
|
|
4
|
$identifier = EMPTY_STRING; |
8077
|
1
|
|
|
|
|
12
|
$type = EMPTY_STRING; |
8078
|
|
|
|
|
|
|
} |
8079
|
|
|
|
|
|
|
|
8080
|
|
|
|
|
|
|
# at a # we have to mark as type 't' because more may |
8081
|
|
|
|
|
|
|
# follow, otherwise, in a signature we can let '$' be an |
8082
|
|
|
|
|
|
|
# identifier here for better formatting. |
8083
|
|
|
|
|
|
|
# See 'mangle4.in' for a test case. |
8084
|
|
|
|
|
|
|
else { |
8085
|
2
|
|
|
|
|
4
|
$type = 'i'; |
8086
|
2
|
50
|
33
|
|
|
14
|
if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) { |
8087
|
0
|
|
|
|
|
0
|
$type = 't'; |
8088
|
|
|
|
|
|
|
} |
8089
|
2
|
|
|
|
|
42
|
$i = $i_save; |
8090
|
|
|
|
|
|
|
} |
8091
|
3
|
|
|
|
|
8
|
$id_scan_state = EMPTY_STRING; |
8092
|
|
|
|
|
|
|
} |
8093
|
|
|
|
|
|
|
|
8094
|
|
|
|
|
|
|
# check for various punctuation variables |
8095
|
|
|
|
|
|
|
elsif ( $identifier =~ /^[\$\*\@\%]$/ ) { |
8096
|
35
|
|
|
|
|
110
|
$identifier .= $tok; |
8097
|
|
|
|
|
|
|
} |
8098
|
|
|
|
|
|
|
|
8099
|
|
|
|
|
|
|
# POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#* |
8100
|
|
|
|
|
|
|
elsif ($tok eq '*' |
8101
|
|
|
|
|
|
|
&& $identifier =~ /\-\>([\@\%\$\*\&]|\$\#)$/ ) |
8102
|
|
|
|
|
|
|
{ |
8103
|
6
|
|
|
|
|
14
|
$identifier .= $tok; |
8104
|
|
|
|
|
|
|
} |
8105
|
|
|
|
|
|
|
|
8106
|
|
|
|
|
|
|
elsif ( $identifier eq '$#' ) { |
8107
|
|
|
|
|
|
|
|
8108
|
2
|
50
|
|
|
|
12
|
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } |
|
0
|
50
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
8109
|
|
|
|
|
|
|
|
8110
|
|
|
|
|
|
|
# perl seems to allow just these: $#: $#- $#+ |
8111
|
|
|
|
|
|
|
elsif ( $tok =~ /^[\:\-\+]$/ ) { |
8112
|
0
|
|
|
|
|
0
|
$type = 'i'; |
8113
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8114
|
|
|
|
|
|
|
} |
8115
|
|
|
|
|
|
|
else { |
8116
|
2
|
|
|
|
|
6
|
$i = $i_save; |
8117
|
2
|
|
|
|
|
10
|
$self->write_logfile_entry( |
8118
|
|
|
|
|
|
|
'Use of $# is deprecated' . "\n" ); |
8119
|
|
|
|
|
|
|
} |
8120
|
|
|
|
|
|
|
} |
8121
|
|
|
|
|
|
|
elsif ( $identifier eq '$$' ) { |
8122
|
|
|
|
|
|
|
|
8123
|
|
|
|
|
|
|
# perl does not allow references to punctuation |
8124
|
|
|
|
|
|
|
# variables without braces. For example, this |
8125
|
|
|
|
|
|
|
# won't work: |
8126
|
|
|
|
|
|
|
# $:=\4; |
8127
|
|
|
|
|
|
|
# $a = $$:; |
8128
|
|
|
|
|
|
|
# You would have to use |
8129
|
|
|
|
|
|
|
# $a = ${$:}; |
8130
|
|
|
|
|
|
|
|
8131
|
|
|
|
|
|
|
# '$$' alone is punctuation variable for PID |
8132
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8133
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '{' ) { $type = 't' } |
|
0
|
|
|
|
|
0
|
|
8134
|
0
|
|
|
|
|
0
|
else { $type = 'i' } |
8135
|
|
|
|
|
|
|
} |
8136
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
8137
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8138
|
|
|
|
|
|
|
} |
8139
|
|
|
|
|
|
|
else { |
8140
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8141
|
0
|
0
|
|
|
|
0
|
if ( length($identifier) == 1 ) { |
8142
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
8143
|
|
|
|
|
|
|
} |
8144
|
|
|
|
|
|
|
} |
8145
|
46
|
|
|
|
|
109
|
$id_scan_state = EMPTY_STRING; |
8146
|
|
|
|
|
|
|
} |
8147
|
514
|
|
|
|
|
910
|
return; |
8148
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_dollar |
8149
|
|
|
|
|
|
|
|
8150
|
|
|
|
|
|
|
sub do_id_scan_state_alpha { |
8151
|
|
|
|
|
|
|
|
8152
|
113
|
|
|
113
|
0
|
226
|
my $self = shift; |
8153
|
|
|
|
|
|
|
|
8154
|
|
|
|
|
|
|
# looking for alphanumeric after :: |
8155
|
113
|
|
|
|
|
365
|
$tok_is_blank = $tok =~ /^\s*$/; |
8156
|
|
|
|
|
|
|
|
8157
|
113
|
100
|
33
|
|
|
472
|
if ( $tok =~ /^\w/ ) { # found it |
|
|
50
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
50
|
|
|
|
|
|
8158
|
100
|
|
|
|
|
199
|
$identifier .= $tok; |
8159
|
100
|
|
|
|
|
201
|
$id_scan_state = $scan_state_COLON; # now need :: |
8160
|
100
|
|
|
|
|
165
|
$saw_alpha = 1; |
8161
|
|
|
|
|
|
|
} |
8162
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { |
8163
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8164
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
8165
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8166
|
|
|
|
|
|
|
} |
8167
|
|
|
|
|
|
|
elsif ( $tok_is_blank && $identifier =~ /^sub / ) { |
8168
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_LPAREN; |
8169
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8170
|
|
|
|
|
|
|
} |
8171
|
|
|
|
|
|
|
elsif ( $tok eq '(' && $identifier =~ /^sub / ) { |
8172
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; |
8173
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8174
|
|
|
|
|
|
|
} |
8175
|
|
|
|
|
|
|
else { |
8176
|
13
|
|
|
|
|
22
|
$id_scan_state = EMPTY_STRING; |
8177
|
13
|
|
|
|
|
25
|
$i = $i_save; |
8178
|
|
|
|
|
|
|
} |
8179
|
113
|
|
|
|
|
220
|
return; |
8180
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_alpha |
8181
|
|
|
|
|
|
|
|
8182
|
|
|
|
|
|
|
sub do_id_scan_state_colon { |
8183
|
|
|
|
|
|
|
|
8184
|
434
|
|
|
434
|
0
|
792
|
my $self = shift; |
8185
|
|
|
|
|
|
|
|
8186
|
|
|
|
|
|
|
# looking for possible :: after alphanumeric |
8187
|
|
|
|
|
|
|
|
8188
|
434
|
|
|
|
|
1508
|
$tok_is_blank = $tok =~ /^\s*$/; |
8189
|
|
|
|
|
|
|
|
8190
|
434
|
100
|
66
|
|
|
3383
|
if ( $tok eq '::' ) { # got it |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
8191
|
97
|
|
|
|
|
173
|
$identifier .= $tok; |
8192
|
97
|
|
|
|
|
196
|
$id_scan_state = $scan_state_ALPHA; # now require alpha |
8193
|
|
|
|
|
|
|
} |
8194
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { # more alphanumeric is ok here |
8195
|
20
|
|
|
|
|
53
|
$identifier .= $tok; |
8196
|
20
|
|
|
|
|
62
|
$id_scan_state = $scan_state_COLON; # now need :: |
8197
|
20
|
|
|
|
|
38
|
$saw_alpha = 1; |
8198
|
|
|
|
|
|
|
} |
8199
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # tick |
8200
|
|
|
|
|
|
|
|
8201
|
12
|
50
|
|
|
|
37
|
if ( $is_keyword{$identifier} ) { |
8202
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # that's all |
8203
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8204
|
|
|
|
|
|
|
} |
8205
|
|
|
|
|
|
|
else { |
8206
|
12
|
|
|
|
|
22
|
$identifier .= $tok; |
8207
|
|
|
|
|
|
|
} |
8208
|
|
|
|
|
|
|
} |
8209
|
|
|
|
|
|
|
elsif ( $tok_is_blank && $identifier =~ /^sub / ) { |
8210
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_LPAREN; |
8211
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8212
|
|
|
|
|
|
|
} |
8213
|
|
|
|
|
|
|
elsif ( $tok eq '(' && $identifier =~ /^sub / ) { |
8214
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; |
8215
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8216
|
|
|
|
|
|
|
} |
8217
|
|
|
|
|
|
|
else { |
8218
|
305
|
|
|
|
|
579
|
$id_scan_state = EMPTY_STRING; # that's all |
8219
|
305
|
|
|
|
|
526
|
$i = $i_save; |
8220
|
|
|
|
|
|
|
} |
8221
|
434
|
|
|
|
|
687
|
return; |
8222
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_colon |
8223
|
|
|
|
|
|
|
|
8224
|
|
|
|
|
|
|
sub do_id_scan_state_left_paren { |
8225
|
|
|
|
|
|
|
|
8226
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
8227
|
|
|
|
|
|
|
|
8228
|
|
|
|
|
|
|
# looking for possible '(' of a prototype |
8229
|
|
|
|
|
|
|
|
8230
|
0
|
0
|
|
|
|
0
|
if ( $tok eq '(' ) { # got it |
|
|
0
|
|
|
|
|
|
8231
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8232
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_RPAREN; # now find the end of it |
8233
|
|
|
|
|
|
|
} |
8234
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { # blank - keep going |
8235
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8236
|
0
|
|
|
|
|
0
|
$tok_is_blank = 1; |
8237
|
|
|
|
|
|
|
} |
8238
|
|
|
|
|
|
|
else { |
8239
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # that's all - no prototype |
8240
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8241
|
|
|
|
|
|
|
} |
8242
|
0
|
|
|
|
|
0
|
return; |
8243
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_left_paren |
8244
|
|
|
|
|
|
|
|
8245
|
|
|
|
|
|
|
sub do_id_scan_state_right_paren { |
8246
|
|
|
|
|
|
|
|
8247
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
8248
|
|
|
|
|
|
|
|
8249
|
|
|
|
|
|
|
# looking for a ')' of prototype to close a '(' |
8250
|
|
|
|
|
|
|
|
8251
|
0
|
|
|
|
|
0
|
$tok_is_blank = $tok =~ /^\s*$/; |
8252
|
|
|
|
|
|
|
|
8253
|
0
|
0
|
|
|
|
0
|
if ( $tok eq ')' ) { # got it |
|
|
0
|
|
|
|
|
|
8254
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8255
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; # all done |
8256
|
|
|
|
|
|
|
} |
8257
|
|
|
|
|
|
|
elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { |
8258
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8259
|
|
|
|
|
|
|
} |
8260
|
|
|
|
|
|
|
else { # probable error in script, but keep going |
8261
|
0
|
|
|
|
|
0
|
warning("Unexpected '$tok' while seeking end of prototype\n"); |
8262
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8263
|
|
|
|
|
|
|
} |
8264
|
0
|
|
|
|
|
0
|
return; |
8265
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_right_paren |
8266
|
|
|
|
|
|
|
|
8267
|
|
|
|
|
|
|
sub do_id_scan_state_ampersand { |
8268
|
|
|
|
|
|
|
|
8269
|
105
|
|
|
105
|
0
|
267
|
my $self = shift; |
8270
|
|
|
|
|
|
|
|
8271
|
|
|
|
|
|
|
# Starting sub call after seeing an '&' |
8272
|
|
|
|
|
|
|
|
8273
|
105
|
100
|
33
|
|
|
705
|
if ( $tok =~ /^[\$\w]/ ) { # alphanumeric .. |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8274
|
88
|
|
|
|
|
209
|
$id_scan_state = $scan_state_COLON; # now need :: |
8275
|
88
|
|
|
|
|
164
|
$saw_alpha = 1; |
8276
|
88
|
|
|
|
|
200
|
$identifier .= $tok; |
8277
|
|
|
|
|
|
|
} |
8278
|
|
|
|
|
|
|
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. |
8279
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; # now need :: |
8280
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8281
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8282
|
|
|
|
|
|
|
} |
8283
|
|
|
|
|
|
|
elsif ( $tok =~ /^\s*$/ ) { # allow space |
8284
|
2
|
|
|
|
|
5
|
$tok_is_blank = 1; |
8285
|
|
|
|
|
|
|
|
8286
|
|
|
|
|
|
|
# fix c139: trim line-ending type 't' |
8287
|
2
|
50
|
33
|
|
|
14
|
if ( length($identifier) == 1 && $i == $max_token_index ) { |
8288
|
2
|
|
|
|
|
3
|
$i = $i_save; |
8289
|
2
|
|
|
|
|
7
|
$type = 't'; |
8290
|
|
|
|
|
|
|
} |
8291
|
|
|
|
|
|
|
} |
8292
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { # leading :: |
8293
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_ALPHA; # accept alpha next |
8294
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8295
|
|
|
|
|
|
|
} |
8296
|
|
|
|
|
|
|
elsif ( $tok eq '{' ) { |
8297
|
15
|
50
|
33
|
|
|
74
|
if ( $identifier eq '&' || $i == 0 ) { |
8298
|
15
|
|
|
|
|
33
|
$identifier = EMPTY_STRING; |
8299
|
|
|
|
|
|
|
} |
8300
|
15
|
|
|
|
|
39
|
$i = $i_save; |
8301
|
15
|
|
|
|
|
28
|
$id_scan_state = EMPTY_STRING; |
8302
|
|
|
|
|
|
|
} |
8303
|
|
|
|
|
|
|
elsif ( $tok eq '^' ) { |
8304
|
0
|
0
|
|
|
|
0
|
if ( $identifier eq '&' ) { |
8305
|
|
|
|
|
|
|
|
8306
|
|
|
|
|
|
|
# Special variable (c066) |
8307
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8308
|
0
|
|
|
|
|
0
|
$type = '&'; |
8309
|
|
|
|
|
|
|
|
8310
|
|
|
|
|
|
|
# There may be one more character, not a space, after the ^ |
8311
|
0
|
|
|
|
|
0
|
my $next1 = $rtokens->[ $i + 1 ]; |
8312
|
0
|
|
|
|
|
0
|
my $chr = substr( $next1, 0, 1 ); |
8313
|
0
|
0
|
|
|
|
0
|
if ( $is_special_variable_char{$chr} ) { |
8314
|
|
|
|
|
|
|
|
8315
|
|
|
|
|
|
|
# It is something like &^O |
8316
|
0
|
|
|
|
|
0
|
$i++; |
8317
|
0
|
|
|
|
|
0
|
$identifier .= $next1; |
8318
|
|
|
|
|
|
|
|
8319
|
|
|
|
|
|
|
# If pretoken $next1 is more than one character long, |
8320
|
|
|
|
|
|
|
# set a flag indicating that it needs to be split. |
8321
|
0
|
0
|
|
|
|
0
|
$id_scan_state = |
8322
|
|
|
|
|
|
|
( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING; |
8323
|
|
|
|
|
|
|
} |
8324
|
|
|
|
|
|
|
else { |
8325
|
|
|
|
|
|
|
|
8326
|
|
|
|
|
|
|
# it is &^ |
8327
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8328
|
|
|
|
|
|
|
} |
8329
|
|
|
|
|
|
|
} |
8330
|
|
|
|
|
|
|
else { |
8331
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
8332
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8333
|
|
|
|
|
|
|
} |
8334
|
|
|
|
|
|
|
} |
8335
|
|
|
|
|
|
|
else { |
8336
|
|
|
|
|
|
|
|
8337
|
|
|
|
|
|
|
# punctuation variable? |
8338
|
|
|
|
|
|
|
# testfile: cunningham4.pl |
8339
|
|
|
|
|
|
|
# |
8340
|
|
|
|
|
|
|
# We have to be careful here. If we are in an unknown state, |
8341
|
|
|
|
|
|
|
# we will reject the punctuation variable. In the following |
8342
|
|
|
|
|
|
|
# example the '&' is a binary operator but we are in an unknown |
8343
|
|
|
|
|
|
|
# state because there is no sigil on 'Prima', so we don't |
8344
|
|
|
|
|
|
|
# know what it is. But it is a bad guess that |
8345
|
|
|
|
|
|
|
# '&~' is a function variable. |
8346
|
|
|
|
|
|
|
# $self->{text}->{colorMap}->[ |
8347
|
|
|
|
|
|
|
# Prima::PodView::COLOR_CODE_FOREGROUND |
8348
|
|
|
|
|
|
|
# & ~tb::COLOR_INDEX ] = |
8349
|
|
|
|
|
|
|
# $sec->{ColorCode} |
8350
|
|
|
|
|
|
|
|
8351
|
|
|
|
|
|
|
# Fix for case c033: a '#' here starts a side comment |
8352
|
0
|
0
|
0
|
|
|
0
|
if ( $identifier eq '&' && $expecting && $tok ne '#' ) { |
|
|
|
0
|
|
|
|
|
8353
|
0
|
|
|
|
|
0
|
$identifier .= $tok; |
8354
|
|
|
|
|
|
|
} |
8355
|
|
|
|
|
|
|
else { |
8356
|
0
|
|
|
|
|
0
|
$identifier = EMPTY_STRING; |
8357
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8358
|
0
|
|
|
|
|
0
|
$type = '&'; |
8359
|
|
|
|
|
|
|
} |
8360
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8361
|
|
|
|
|
|
|
} |
8362
|
105
|
|
|
|
|
197
|
return; |
8363
|
|
|
|
|
|
|
} ## end sub do_id_scan_state_ampersand |
8364
|
|
|
|
|
|
|
|
8365
|
|
|
|
|
|
|
#------------------- |
8366
|
|
|
|
|
|
|
# hash of scanner subs |
8367
|
|
|
|
|
|
|
#------------------- |
8368
|
|
|
|
|
|
|
my $scan_identifier_code = { |
8369
|
|
|
|
|
|
|
$scan_state_SIGIL => \&do_id_scan_state_dollar, |
8370
|
|
|
|
|
|
|
$scan_state_ALPHA => \&do_id_scan_state_alpha, |
8371
|
|
|
|
|
|
|
$scan_state_COLON => \&do_id_scan_state_colon, |
8372
|
|
|
|
|
|
|
$scan_state_LPAREN => \&do_id_scan_state_left_paren, |
8373
|
|
|
|
|
|
|
$scan_state_RPAREN => \&do_id_scan_state_right_paren, |
8374
|
|
|
|
|
|
|
$scan_state_AMPERSAND => \&do_id_scan_state_ampersand, |
8375
|
|
|
|
|
|
|
}; |
8376
|
|
|
|
|
|
|
|
8377
|
|
|
|
|
|
|
sub scan_complex_identifier { |
8378
|
|
|
|
|
|
|
|
8379
|
|
|
|
|
|
|
# This routine assembles tokens into identifiers. It maintains a |
8380
|
|
|
|
|
|
|
# scan state, id_scan_state. It updates id_scan_state based upon |
8381
|
|
|
|
|
|
|
# current id_scan_state and token, and returns an updated |
8382
|
|
|
|
|
|
|
# id_scan_state and the next index after the identifier. |
8383
|
|
|
|
|
|
|
|
8384
|
|
|
|
|
|
|
# This routine now serves a a backup for sub scan_simple_identifier |
8385
|
|
|
|
|
|
|
# which handles most identifiers. |
8386
|
|
|
|
|
|
|
|
8387
|
|
|
|
|
|
|
# Note that $self must be a 'my' variable and not be a closure |
8388
|
|
|
|
|
|
|
# variables like the other args. Otherwise it will not get |
8389
|
|
|
|
|
|
|
# deleted by a DESTROY call at the end of a file. Then an |
8390
|
|
|
|
|
|
|
# attempt to create multiple tokenizers can occur when multiple |
8391
|
|
|
|
|
|
|
# files are processed, causing an error. |
8392
|
|
|
|
|
|
|
|
8393
|
|
|
|
|
|
|
( |
8394
|
486
|
|
|
486
|
0
|
1753
|
my $self, $i, $id_scan_state, $identifier, $rtokens, |
8395
|
|
|
|
|
|
|
$max_token_index, $expecting, $container_type |
8396
|
|
|
|
|
|
|
) = @_; |
8397
|
|
|
|
|
|
|
|
8398
|
|
|
|
|
|
|
# return flag telling caller to split the pretoken |
8399
|
486
|
|
|
|
|
2534
|
my $split_pretoken_flag; |
8400
|
|
|
|
|
|
|
|
8401
|
|
|
|
|
|
|
#------------------- |
8402
|
|
|
|
|
|
|
# Initialize my vars |
8403
|
|
|
|
|
|
|
#------------------- |
8404
|
|
|
|
|
|
|
|
8405
|
486
|
|
|
|
|
1742
|
initialize_my_scan_id_vars(); |
8406
|
|
|
|
|
|
|
|
8407
|
|
|
|
|
|
|
#-------------------------------------------------------- |
8408
|
|
|
|
|
|
|
# get started by defining a type and a state if necessary |
8409
|
|
|
|
|
|
|
#-------------------------------------------------------- |
8410
|
|
|
|
|
|
|
|
8411
|
486
|
100
|
|
|
|
1177
|
if ( !$id_scan_state ) { |
8412
|
479
|
|
|
|
|
776
|
$context = UNKNOWN_CONTEXT; |
8413
|
|
|
|
|
|
|
|
8414
|
|
|
|
|
|
|
# fixup for digraph |
8415
|
479
|
50
|
|
|
|
1314
|
if ( $tok eq '>' ) { |
8416
|
0
|
|
|
|
|
0
|
$tok = '->'; |
8417
|
0
|
|
|
|
|
0
|
$tok_begin = $tok; |
8418
|
|
|
|
|
|
|
} |
8419
|
479
|
|
|
|
|
826
|
$identifier = $tok; |
8420
|
|
|
|
|
|
|
|
8421
|
479
|
100
|
100
|
|
|
3072
|
if ( $last_nonblank_token eq '->' ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8422
|
6
|
|
|
|
|
15
|
$identifier = '->' . $identifier; |
8423
|
6
|
|
|
|
|
13
|
$id_scan_state = $scan_state_SIGIL; |
8424
|
|
|
|
|
|
|
} |
8425
|
|
|
|
|
|
|
elsif ( $tok eq '$' || $tok eq '*' ) { |
8426
|
293
|
|
|
|
|
535
|
$id_scan_state = $scan_state_SIGIL; |
8427
|
293
|
|
|
|
|
571
|
$context = SCALAR_CONTEXT; |
8428
|
|
|
|
|
|
|
} |
8429
|
|
|
|
|
|
|
elsif ( $tok eq '%' || $tok eq '@' ) { |
8430
|
77
|
|
|
|
|
186
|
$id_scan_state = $scan_state_SIGIL; |
8431
|
77
|
|
|
|
|
159
|
$context = LIST_CONTEXT; |
8432
|
|
|
|
|
|
|
} |
8433
|
|
|
|
|
|
|
elsif ( $tok eq '&' ) { |
8434
|
103
|
|
|
|
|
241
|
$id_scan_state = $scan_state_AMPERSAND; |
8435
|
|
|
|
|
|
|
} |
8436
|
|
|
|
|
|
|
elsif ( $tok eq 'sub' or $tok eq 'package' ) { |
8437
|
0
|
|
|
|
|
0
|
$saw_alpha = 0; # 'sub' is considered type info here |
8438
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_SIGIL; |
8439
|
0
|
|
|
|
|
0
|
$identifier .= |
8440
|
|
|
|
|
|
|
SPACE; # need a space to separate sub from sub name |
8441
|
|
|
|
|
|
|
} |
8442
|
|
|
|
|
|
|
elsif ( $tok eq '::' ) { |
8443
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_ALPHA; |
8444
|
|
|
|
|
|
|
} |
8445
|
|
|
|
|
|
|
elsif ( $tok =~ /^\w/ ) { |
8446
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_COLON; |
8447
|
0
|
|
|
|
|
0
|
$saw_alpha = 1; |
8448
|
|
|
|
|
|
|
} |
8449
|
|
|
|
|
|
|
elsif ( $tok eq '->' ) { |
8450
|
0
|
|
|
|
|
0
|
$id_scan_state = $scan_state_SIGIL; |
8451
|
|
|
|
|
|
|
} |
8452
|
|
|
|
|
|
|
else { |
8453
|
|
|
|
|
|
|
|
8454
|
|
|
|
|
|
|
# shouldn't happen: bad call parameter |
8455
|
0
|
|
|
|
|
0
|
my $msg = |
8456
|
|
|
|
|
|
|
"Program bug detected: scan_complex_identifier received bad starting token = '$tok'\n"; |
8457
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { $self->Fault($msg) } |
8458
|
0
|
0
|
|
|
|
0
|
if ( !$self->[_in_error_] ) { |
8459
|
0
|
|
|
|
|
0
|
warning($msg); |
8460
|
0
|
|
|
|
|
0
|
$self->[_in_error_] = 1; |
8461
|
|
|
|
|
|
|
} |
8462
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8463
|
|
|
|
|
|
|
|
8464
|
|
|
|
|
|
|
# emergency return |
8465
|
0
|
|
|
|
|
0
|
goto RETURN; |
8466
|
|
|
|
|
|
|
} |
8467
|
479
|
|
|
|
|
908
|
$saw_type = !$saw_alpha; |
8468
|
|
|
|
|
|
|
} |
8469
|
|
|
|
|
|
|
else { |
8470
|
7
|
|
|
|
|
19
|
$i--; |
8471
|
7
|
|
|
|
|
34
|
$saw_alpha = ( $tok =~ /^\w/ ); |
8472
|
7
|
|
|
|
|
20
|
$saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); |
8473
|
|
|
|
|
|
|
|
8474
|
|
|
|
|
|
|
# check for a valid starting state |
8475
|
7
|
|
|
|
|
11
|
if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) { |
8476
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
8477
|
|
|
|
|
|
|
Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state' |
8478
|
|
|
|
|
|
|
EOM |
8479
|
|
|
|
|
|
|
} |
8480
|
|
|
|
|
|
|
} |
8481
|
|
|
|
|
|
|
|
8482
|
|
|
|
|
|
|
#------------------------------ |
8483
|
|
|
|
|
|
|
# loop to gather the identifier |
8484
|
|
|
|
|
|
|
#------------------------------ |
8485
|
|
|
|
|
|
|
|
8486
|
486
|
|
|
|
|
844
|
$i_save = $i; |
8487
|
|
|
|
|
|
|
|
8488
|
486
|
|
100
|
|
|
2118
|
while ( $i < $max_token_index && $id_scan_state ) { |
8489
|
|
|
|
|
|
|
|
8490
|
|
|
|
|
|
|
# Be sure we have code to handle this state before we proceed |
8491
|
1169
|
|
|
|
|
2392
|
my $code = $scan_identifier_code->{$id_scan_state}; |
8492
|
1169
|
100
|
|
|
|
2391
|
if ( !$code ) { |
8493
|
|
|
|
|
|
|
|
8494
|
3
|
50
|
|
|
|
12
|
if ( $id_scan_state eq $scan_state_SPLIT ) { |
8495
|
|
|
|
|
|
|
## OK: this is the signal to exit and split the pretoken |
8496
|
|
|
|
|
|
|
} |
8497
|
|
|
|
|
|
|
|
8498
|
|
|
|
|
|
|
# unknown state - should not happen |
8499
|
|
|
|
|
|
|
else { |
8500
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
8501
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
8502
|
|
|
|
|
|
|
Unknown scan state in sub scan_complex_identifier: '$id_scan_state' |
8503
|
|
|
|
|
|
|
Scan state at sub entry was '$id_scan_state_begin' |
8504
|
|
|
|
|
|
|
EOM |
8505
|
|
|
|
|
|
|
} |
8506
|
0
|
|
|
|
|
0
|
$id_scan_state = EMPTY_STRING; |
8507
|
0
|
|
|
|
|
0
|
$i = $i_save; |
8508
|
|
|
|
|
|
|
} |
8509
|
3
|
|
|
|
|
5
|
last; |
8510
|
|
|
|
|
|
|
} |
8511
|
|
|
|
|
|
|
|
8512
|
|
|
|
|
|
|
# Remember the starting index for progress check below |
8513
|
1166
|
|
|
|
|
1627
|
my $i_start_loop = $i; |
8514
|
|
|
|
|
|
|
|
8515
|
1166
|
|
|
|
|
1613
|
$last_tok_is_blank = $tok_is_blank; |
8516
|
1166
|
100
|
|
|
|
1987
|
if ($tok_is_blank) { $tok_is_blank = undef } |
|
11
|
|
|
|
|
24
|
|
8517
|
1155
|
|
|
|
|
1579
|
else { $i_save = $i } |
8518
|
|
|
|
|
|
|
|
8519
|
1166
|
|
|
|
|
1947
|
$tok = $rtokens->[ ++$i ]; |
8520
|
|
|
|
|
|
|
|
8521
|
|
|
|
|
|
|
# patch to make digraph :: if necessary |
8522
|
1166
|
100
|
100
|
|
|
2942
|
if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) { |
8523
|
113
|
|
|
|
|
244
|
$tok = '::'; |
8524
|
113
|
|
|
|
|
203
|
$i++; |
8525
|
|
|
|
|
|
|
} |
8526
|
|
|
|
|
|
|
|
8527
|
1166
|
|
|
|
|
3480
|
$code->($self); |
8528
|
|
|
|
|
|
|
|
8529
|
|
|
|
|
|
|
# check for forward progress: a decrease in the index $i |
8530
|
|
|
|
|
|
|
# implies that scanning has finished |
8531
|
1166
|
100
|
|
|
|
3807
|
last if ( $i <= $i_start_loop ); |
8532
|
|
|
|
|
|
|
|
8533
|
|
|
|
|
|
|
} ## end of main loop |
8534
|
|
|
|
|
|
|
|
8535
|
|
|
|
|
|
|
#------------- |
8536
|
|
|
|
|
|
|
# Check result |
8537
|
|
|
|
|
|
|
#------------- |
8538
|
|
|
|
|
|
|
|
8539
|
|
|
|
|
|
|
# Be sure a valid state is returned |
8540
|
486
|
100
|
|
|
|
1367
|
if ($id_scan_state) { |
8541
|
|
|
|
|
|
|
|
8542
|
20
|
100
|
|
|
|
89
|
if ( !$is_returnable_scan_state{$id_scan_state} ) { |
8543
|
|
|
|
|
|
|
|
8544
|
13
|
100
|
|
|
|
56
|
if ( $id_scan_state eq $scan_state_SPLIT ) { |
8545
|
3
|
|
|
|
|
7
|
$split_pretoken_flag = 1; |
8546
|
|
|
|
|
|
|
} |
8547
|
|
|
|
|
|
|
|
8548
|
13
|
50
|
|
|
|
56
|
if ( $id_scan_state eq $scan_state_RPAREN ) { |
8549
|
0
|
|
|
|
|
0
|
warning( |
8550
|
|
|
|
|
|
|
"Hit end of line while seeking ) to end prototype\n"); |
8551
|
|
|
|
|
|
|
} |
8552
|
|
|
|
|
|
|
|
8553
|
13
|
|
|
|
|
29
|
$id_scan_state = EMPTY_STRING; |
8554
|
|
|
|
|
|
|
} |
8555
|
|
|
|
|
|
|
|
8556
|
|
|
|
|
|
|
# Patch: the deprecated variable $# does not combine with anything |
8557
|
|
|
|
|
|
|
# on the next line. |
8558
|
20
|
50
|
|
|
|
73
|
if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING } |
|
0
|
|
|
|
|
0
|
|
8559
|
|
|
|
|
|
|
} |
8560
|
|
|
|
|
|
|
|
8561
|
|
|
|
|
|
|
# Be sure the token index is valid |
8562
|
486
|
50
|
|
|
|
1331
|
if ( $i < 0 ) { $i = 0 } |
|
0
|
|
|
|
|
0
|
|
8563
|
|
|
|
|
|
|
|
8564
|
|
|
|
|
|
|
# Be sure a token type is defined |
8565
|
486
|
100
|
|
|
|
1246
|
if ( !$type ) { |
8566
|
|
|
|
|
|
|
|
8567
|
458
|
100
|
|
|
|
1088
|
if ($saw_type) { |
|
|
100
|
|
|
|
|
|
8568
|
|
|
|
|
|
|
|
8569
|
452
|
100
|
33
|
|
|
2118
|
if ($saw_alpha) { |
|
|
50
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
8570
|
|
|
|
|
|
|
|
8571
|
|
|
|
|
|
|
# The type without the -> should be the same as with the -> so |
8572
|
|
|
|
|
|
|
# that if they get separated we get the same bond strengths, |
8573
|
|
|
|
|
|
|
# etc. See b1234 |
8574
|
348
|
50
|
33
|
|
|
1359
|
if ( $identifier =~ /^->/ |
|
|
|
33
|
|
|
|
|
8575
|
|
|
|
|
|
|
&& $last_nonblank_type eq 'w' |
8576
|
|
|
|
|
|
|
&& substr( $identifier, 2, 1 ) =~ /^\w/ ) |
8577
|
|
|
|
|
|
|
{ |
8578
|
0
|
|
|
|
|
0
|
$type = 'w'; |
8579
|
|
|
|
|
|
|
} |
8580
|
348
|
|
|
|
|
746
|
else { $type = 'i' } |
8581
|
|
|
|
|
|
|
} |
8582
|
|
|
|
|
|
|
elsif ( $identifier eq '->' ) { |
8583
|
0
|
|
|
|
|
0
|
$type = '->'; |
8584
|
|
|
|
|
|
|
} |
8585
|
|
|
|
|
|
|
elsif ( |
8586
|
|
|
|
|
|
|
( length($identifier) > 1 ) |
8587
|
|
|
|
|
|
|
|
8588
|
|
|
|
|
|
|
# In something like '@$=' we have an identifier '@$' |
8589
|
|
|
|
|
|
|
# In something like '$${' we have type '$$' (and only |
8590
|
|
|
|
|
|
|
# part of an identifier) |
8591
|
|
|
|
|
|
|
&& !( $identifier =~ /\$$/ && $tok eq '{' ) |
8592
|
|
|
|
|
|
|
|
8593
|
|
|
|
|
|
|
## && ( $identifier !~ /^(sub |package )$/ ) |
8594
|
|
|
|
|
|
|
&& $identifier ne 'sub ' |
8595
|
|
|
|
|
|
|
&& $identifier ne 'package ' |
8596
|
|
|
|
|
|
|
) |
8597
|
|
|
|
|
|
|
{ |
8598
|
53
|
|
|
|
|
187
|
$type = 'i'; |
8599
|
|
|
|
|
|
|
} |
8600
|
51
|
|
|
|
|
104
|
else { $type = 't' } |
8601
|
|
|
|
|
|
|
} |
8602
|
|
|
|
|
|
|
elsif ($saw_alpha) { |
8603
|
|
|
|
|
|
|
|
8604
|
|
|
|
|
|
|
# type 'w' includes anything without leading type info |
8605
|
|
|
|
|
|
|
# ($,%,@,*) including something like abc::def::ghi |
8606
|
5
|
|
|
|
|
9
|
$type = 'w'; |
8607
|
|
|
|
|
|
|
|
8608
|
|
|
|
|
|
|
# Fix for b1337, if restarting scan after line break between |
8609
|
|
|
|
|
|
|
# '->' or sigil and identifier name, use type 'i' |
8610
|
5
|
50
|
33
|
|
|
35
|
if ( $id_scan_state_begin |
8611
|
|
|
|
|
|
|
&& $identifier =~ /^([\$\%\@\*\&]|->)/ ) |
8612
|
|
|
|
|
|
|
{ |
8613
|
5
|
|
|
|
|
10
|
$type = 'i'; |
8614
|
|
|
|
|
|
|
} |
8615
|
|
|
|
|
|
|
} |
8616
|
|
|
|
|
|
|
else { |
8617
|
1
|
|
|
|
|
3
|
$type = EMPTY_STRING; |
8618
|
|
|
|
|
|
|
} # this can happen on a restart |
8619
|
|
|
|
|
|
|
} |
8620
|
|
|
|
|
|
|
|
8621
|
|
|
|
|
|
|
# See if we formed an identifier... |
8622
|
486
|
100
|
|
|
|
1171
|
if ($identifier) { |
8623
|
444
|
|
|
|
|
796
|
$tok = $identifier; |
8624
|
444
|
100
|
|
|
|
1113
|
if ($message) { $self->write_logfile_entry($message) } |
|
1
|
|
|
|
|
8
|
|
8625
|
|
|
|
|
|
|
} |
8626
|
|
|
|
|
|
|
|
8627
|
|
|
|
|
|
|
# did not find an identifier, back up |
8628
|
|
|
|
|
|
|
else { |
8629
|
42
|
|
|
|
|
90
|
$tok = $tok_begin; |
8630
|
42
|
|
|
|
|
87
|
$i = $i_begin; |
8631
|
|
|
|
|
|
|
} |
8632
|
|
|
|
|
|
|
|
8633
|
|
|
|
|
|
|
RETURN: |
8634
|
|
|
|
|
|
|
|
8635
|
486
|
|
|
|
|
746
|
DEBUG_SCAN_ID && do { |
8636
|
|
|
|
|
|
|
my ( $a, $b, $c ) = caller; |
8637
|
|
|
|
|
|
|
print {*STDOUT} |
8638
|
|
|
|
|
|
|
"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; |
8639
|
|
|
|
|
|
|
print {*STDOUT} |
8640
|
|
|
|
|
|
|
"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; |
8641
|
|
|
|
|
|
|
}; |
8642
|
486
|
|
|
|
|
2283
|
return ( $i, $tok, $type, $id_scan_state, $identifier, |
8643
|
|
|
|
|
|
|
$split_pretoken_flag ); |
8644
|
|
|
|
|
|
|
} ## end sub scan_complex_identifier |
8645
|
|
|
|
|
|
|
} ## end closure for sub scan_complex_identifier |
8646
|
|
|
|
|
|
|
|
8647
|
|
|
|
|
|
|
{ ## closure for sub do_scan_sub |
8648
|
|
|
|
|
|
|
|
8649
|
|
|
|
|
|
|
my %warn_if_lexical; |
8650
|
|
|
|
|
|
|
|
8651
|
|
|
|
|
|
|
BEGIN { |
8652
|
|
|
|
|
|
|
|
8653
|
|
|
|
|
|
|
# lexical subs with these names can cause parsing errors in this version |
8654
|
39
|
|
|
39
|
|
324
|
my @q = qw( m q qq qr qw qx s tr y ); |
8655
|
39
|
|
|
|
|
3504
|
@{warn_if_lexical}{@q} = (1) x scalar(@q); |
8656
|
|
|
|
|
|
|
} ## end BEGIN |
8657
|
|
|
|
|
|
|
|
8658
|
|
|
|
|
|
|
# saved package and subnames in case prototype is on separate line |
8659
|
|
|
|
|
|
|
my ( $package_saved, $subname_saved ); |
8660
|
|
|
|
|
|
|
|
8661
|
|
|
|
|
|
|
# initialize subname each time a new 'sub' keyword is encountered |
8662
|
|
|
|
|
|
|
sub initialize_subname { |
8663
|
295
|
|
|
295
|
0
|
664
|
$package_saved = EMPTY_STRING; |
8664
|
295
|
|
|
|
|
597
|
$subname_saved = EMPTY_STRING; |
8665
|
295
|
|
|
|
|
521
|
return; |
8666
|
|
|
|
|
|
|
} |
8667
|
|
|
|
|
|
|
|
8668
|
|
|
|
|
|
|
use constant { |
8669
|
39
|
|
|
|
|
92987
|
SUB_CALL => 1, |
8670
|
|
|
|
|
|
|
PAREN_CALL => 2, |
8671
|
|
|
|
|
|
|
PROTOTYPE_CALL => 3, |
8672
|
39
|
|
|
39
|
|
461
|
}; |
|
39
|
|
|
|
|
128
|
|
8673
|
|
|
|
|
|
|
|
8674
|
|
|
|
|
|
|
sub do_scan_sub { |
8675
|
|
|
|
|
|
|
|
8676
|
|
|
|
|
|
|
# do_scan_sub parses a sub name and prototype. |
8677
|
|
|
|
|
|
|
|
8678
|
|
|
|
|
|
|
# At present there are three basic CALL TYPES which are |
8679
|
|
|
|
|
|
|
# distinguished by the starting value of '$tok': |
8680
|
|
|
|
|
|
|
# 1. $tok='sub', id_scan_state='sub' |
8681
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of the first nonblank |
8682
|
|
|
|
|
|
|
# token following a 'sub' token. |
8683
|
|
|
|
|
|
|
# 2. $tok='(', id_scan_state='sub', |
8684
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of a '(' which may |
8685
|
|
|
|
|
|
|
# start a prototype. |
8686
|
|
|
|
|
|
|
# 3. $tok='prototype', id_scan_state='prototype' |
8687
|
|
|
|
|
|
|
# it is called with $i_beg equal to the index of a '(' which is |
8688
|
|
|
|
|
|
|
# preceded by ': prototype' and has $id_scan_state eq 'prototype' |
8689
|
|
|
|
|
|
|
|
8690
|
|
|
|
|
|
|
# Examples: |
8691
|
|
|
|
|
|
|
|
8692
|
|
|
|
|
|
|
# A single type 1 call will get both the sub and prototype |
8693
|
|
|
|
|
|
|
# sub foo1 ( $$ ) { } |
8694
|
|
|
|
|
|
|
# ^ |
8695
|
|
|
|
|
|
|
|
8696
|
|
|
|
|
|
|
# The subname will be obtained with a 'sub' call |
8697
|
|
|
|
|
|
|
# The prototype on line 2 will be obtained with a '(' call |
8698
|
|
|
|
|
|
|
# sub foo1 |
8699
|
|
|
|
|
|
|
# ^ <---call type 1 |
8700
|
|
|
|
|
|
|
# ( $$ ) { } |
8701
|
|
|
|
|
|
|
# ^ <---call type 2 |
8702
|
|
|
|
|
|
|
|
8703
|
|
|
|
|
|
|
# The subname will be obtained with a 'sub' call |
8704
|
|
|
|
|
|
|
# The prototype will be obtained with a 'prototype' call |
8705
|
|
|
|
|
|
|
# sub foo1 ( $x, $y ) : prototype ( $$ ) { } |
8706
|
|
|
|
|
|
|
# ^ <---type 1 ^ <---type 3 |
8707
|
|
|
|
|
|
|
|
8708
|
|
|
|
|
|
|
# TODO: add future error checks to be sure we have a valid |
8709
|
|
|
|
|
|
|
# sub name. For example, 'sub &doit' is wrong. Also, be sure |
8710
|
|
|
|
|
|
|
# a name is given if and only if a non-anonymous sub is |
8711
|
|
|
|
|
|
|
# appropriate. |
8712
|
|
|
|
|
|
|
# USES GLOBAL VARS: $current_package, $last_nonblank_token, |
8713
|
|
|
|
|
|
|
# $rsaw_function_definition, |
8714
|
|
|
|
|
|
|
# $statement_type |
8715
|
|
|
|
|
|
|
|
8716
|
301
|
|
|
301
|
0
|
848
|
my ( $self, $rinput_hash ) = @_; |
8717
|
|
|
|
|
|
|
|
8718
|
301
|
|
|
|
|
708
|
my $input_line = $rinput_hash->{input_line}; |
8719
|
301
|
|
|
|
|
645
|
my $i = $rinput_hash->{i}; |
8720
|
301
|
|
|
|
|
577
|
my $i_beg = $rinput_hash->{i_beg}; |
8721
|
301
|
|
|
|
|
635
|
my $tok = $rinput_hash->{tok}; |
8722
|
301
|
|
|
|
|
631
|
my $type = $rinput_hash->{type}; |
8723
|
301
|
|
|
|
|
587
|
my $rtokens = $rinput_hash->{rtokens}; |
8724
|
301
|
|
|
|
|
548
|
my $rtoken_map = $rinput_hash->{rtoken_map}; |
8725
|
301
|
|
|
|
|
1567
|
my $id_scan_state = $rinput_hash->{id_scan_state}; |
8726
|
301
|
|
|
|
|
614
|
my $max_token_index = $rinput_hash->{max_token_index}; |
8727
|
|
|
|
|
|
|
|
8728
|
301
|
|
|
|
|
518
|
my $i_entry = $i; |
8729
|
|
|
|
|
|
|
|
8730
|
|
|
|
|
|
|
# Determine the CALL TYPE |
8731
|
|
|
|
|
|
|
# 1=sub |
8732
|
|
|
|
|
|
|
# 2=( |
8733
|
|
|
|
|
|
|
# 3=prototype |
8734
|
301
|
100
|
|
|
|
1085
|
my $call_type = |
|
|
100
|
|
|
|
|
|
8735
|
|
|
|
|
|
|
$tok eq 'prototype' ? PROTOTYPE_CALL |
8736
|
|
|
|
|
|
|
: $tok eq '(' ? PAREN_CALL |
8737
|
|
|
|
|
|
|
: SUB_CALL; |
8738
|
|
|
|
|
|
|
|
8739
|
301
|
|
|
|
|
530
|
$id_scan_state = EMPTY_STRING; # normally we get everything in one call |
8740
|
301
|
|
|
|
|
584
|
my $subname = $subname_saved; |
8741
|
301
|
|
|
|
|
535
|
my $package = $package_saved; |
8742
|
301
|
|
|
|
|
570
|
my $proto = undef; |
8743
|
301
|
|
|
|
|
522
|
my $attrs = undef; |
8744
|
301
|
|
|
|
|
518
|
my $match; |
8745
|
|
|
|
|
|
|
|
8746
|
301
|
|
|
|
|
603
|
my $pos_beg = $rtoken_map->[$i_beg]; |
8747
|
301
|
|
|
|
|
972
|
pos($input_line) = $pos_beg; |
8748
|
|
|
|
|
|
|
|
8749
|
|
|
|
|
|
|
# Look for the sub NAME if this is a SUB call |
8750
|
301
|
100
|
100
|
|
|
2785
|
if ( |
8751
|
|
|
|
|
|
|
$call_type == SUB_CALL |
8752
|
|
|
|
|
|
|
&& $input_line =~ m{\G\s* |
8753
|
|
|
|
|
|
|
((?:\w*(?:'|::))*) # package - something that ends in :: or ' |
8754
|
|
|
|
|
|
|
(\w+) # NAME - required |
8755
|
|
|
|
|
|
|
}gcx |
8756
|
|
|
|
|
|
|
) |
8757
|
|
|
|
|
|
|
{ |
8758
|
122
|
|
|
|
|
307
|
$match = 1; |
8759
|
122
|
|
|
|
|
336
|
$subname = $2; |
8760
|
|
|
|
|
|
|
|
8761
|
122
|
|
33
|
|
|
479
|
my $is_lexical_sub = |
8762
|
|
|
|
|
|
|
$last_nonblank_type eq 'k' && $last_nonblank_token eq 'my'; |
8763
|
122
|
0
|
33
|
|
|
434
|
if ( $is_lexical_sub && $1 ) { |
8764
|
0
|
|
|
|
|
0
|
$self->warning("'my' sub $subname cannot be in package '$1'\n"); |
8765
|
0
|
|
|
|
|
0
|
$is_lexical_sub = 0; |
8766
|
|
|
|
|
|
|
} |
8767
|
|
|
|
|
|
|
|
8768
|
122
|
50
|
|
|
|
362
|
if ($is_lexical_sub) { |
8769
|
|
|
|
|
|
|
|
8770
|
|
|
|
|
|
|
# lexical subs use the block sequence number as a package name |
8771
|
0
|
|
|
|
|
0
|
my $seqno = |
8772
|
|
|
|
|
|
|
$rcurrent_sequence_number->[BRACE] |
8773
|
|
|
|
|
|
|
[ $rcurrent_depth->[BRACE] ]; |
8774
|
0
|
0
|
|
|
|
0
|
$seqno = 1 if ( !defined($seqno) ); |
8775
|
0
|
|
|
|
|
0
|
$package = $seqno; |
8776
|
0
|
0
|
|
|
|
0
|
if ( $warn_if_lexical{$subname} ) { |
8777
|
0
|
|
|
|
|
0
|
$self->warning( |
8778
|
|
|
|
|
|
|
"'my' sub '$subname' matches a builtin name and may not be handled correctly in this perltidy version.\n" |
8779
|
|
|
|
|
|
|
); |
8780
|
|
|
|
|
|
|
|
8781
|
|
|
|
|
|
|
# This may end badly, it is safest to block formatting |
8782
|
|
|
|
|
|
|
# For an example, see perl527/lexsub.t (issue c203) |
8783
|
0
|
|
|
|
|
0
|
$self->[_in_trouble_] = 1; |
8784
|
|
|
|
|
|
|
} |
8785
|
|
|
|
|
|
|
} |
8786
|
|
|
|
|
|
|
else { |
8787
|
122
|
100
|
66
|
|
|
825
|
$package = ( defined($1) && $1 ) ? $1 : $current_package; |
8788
|
122
|
|
|
|
|
400
|
$package =~ s/\'/::/g; |
8789
|
122
|
50
|
|
|
|
440
|
if ( $package =~ /^\:/ ) { $package = 'main' . $package } |
|
0
|
|
|
|
|
0
|
|
8790
|
122
|
|
|
|
|
317
|
$package =~ s/::$//; |
8791
|
|
|
|
|
|
|
} |
8792
|
|
|
|
|
|
|
|
8793
|
122
|
|
|
|
|
284
|
my $pos = pos($input_line); |
8794
|
122
|
|
|
|
|
287
|
my $numc = $pos - $pos_beg; |
8795
|
122
|
|
|
|
|
403
|
$tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); |
8796
|
122
|
|
|
|
|
300
|
$type = 'S'; ## Fix for c250, was 'i'; |
8797
|
|
|
|
|
|
|
|
8798
|
|
|
|
|
|
|
# remember the sub name in case another call is needed to |
8799
|
|
|
|
|
|
|
# get the prototype |
8800
|
122
|
|
|
|
|
251
|
$package_saved = $package; |
8801
|
122
|
|
|
|
|
247
|
$subname_saved = $subname; |
8802
|
|
|
|
|
|
|
} |
8803
|
|
|
|
|
|
|
|
8804
|
|
|
|
|
|
|
# Now look for PROTO ATTRS for all call types |
8805
|
|
|
|
|
|
|
# Look for prototype/attributes which are usually on the same |
8806
|
|
|
|
|
|
|
# line as the sub name but which might be on a separate line. |
8807
|
|
|
|
|
|
|
# For example, we might have an anonymous sub with attributes, |
8808
|
|
|
|
|
|
|
# or a prototype on a separate line from its sub name |
8809
|
|
|
|
|
|
|
|
8810
|
|
|
|
|
|
|
# NOTE: We only want to parse PROTOTYPES here. If we see anything that |
8811
|
|
|
|
|
|
|
# does not look like a prototype, we assume it is a SIGNATURE and we |
8812
|
|
|
|
|
|
|
# will stop and let the the standard tokenizer handle it. In |
8813
|
|
|
|
|
|
|
# particular, we stop if we see any nested parens, braces, or commas. |
8814
|
|
|
|
|
|
|
# Also note, a valid prototype cannot contain any alphabetic character |
8815
|
|
|
|
|
|
|
# -- see https://perldoc.perl.org/perlsub |
8816
|
|
|
|
|
|
|
# But it appears that an underscore is valid in a prototype, so the |
8817
|
|
|
|
|
|
|
# regex below uses [A-Za-z] rather than \w |
8818
|
|
|
|
|
|
|
# This is the old regex which has been replaced: |
8819
|
|
|
|
|
|
|
# $input_line =~ m/\G(\s*\([^\)\(\}\{\,#]*\))? # PROTO |
8820
|
301
|
|
|
|
|
1109
|
my $saw_opening_paren = $input_line =~ /\G\s*\(/; |
8821
|
301
|
100
|
100
|
|
|
2946
|
if ( |
|
|
|
66
|
|
|
|
|
8822
|
|
|
|
|
|
|
$input_line =~ m{\G(\s*\([^\)\(\}\{\,#A-Za-z]*\))? # PROTO |
8823
|
|
|
|
|
|
|
(\s*:)? # ATTRS leading ':' |
8824
|
|
|
|
|
|
|
}gcx |
8825
|
|
|
|
|
|
|
&& ( $1 || $2 ) |
8826
|
|
|
|
|
|
|
) |
8827
|
|
|
|
|
|
|
{ |
8828
|
45
|
|
|
|
|
117
|
$proto = $1; |
8829
|
45
|
|
|
|
|
89
|
$attrs = $2; |
8830
|
|
|
|
|
|
|
|
8831
|
|
|
|
|
|
|
# Append the prototype to the starting token if it is 'sub' or |
8832
|
|
|
|
|
|
|
# 'prototype'. This is not necessary but for compatibility with |
8833
|
|
|
|
|
|
|
# previous versions when the -csc flag is used: |
8834
|
45
|
100
|
100
|
|
|
326
|
if ( $proto && ( $match || $call_type == PROTOTYPE_CALL ) ) { |
|
|
100
|
100
|
|
|
|
|
8835
|
24
|
|
|
|
|
64
|
$tok .= $proto; |
8836
|
|
|
|
|
|
|
} |
8837
|
|
|
|
|
|
|
|
8838
|
|
|
|
|
|
|
# If we just entered the sub at an opening paren on this call, not |
8839
|
|
|
|
|
|
|
# a following :prototype, label it with the previous token. This is |
8840
|
|
|
|
|
|
|
# necessary to propagate the sub name to its opening block. |
8841
|
|
|
|
|
|
|
elsif ( $call_type == PAREN_CALL ) { |
8842
|
2
|
|
|
|
|
5
|
$tok = $last_nonblank_token; |
8843
|
|
|
|
|
|
|
} |
8844
|
|
|
|
|
|
|
else { |
8845
|
|
|
|
|
|
|
} |
8846
|
|
|
|
|
|
|
|
8847
|
45
|
|
100
|
|
|
167
|
$match ||= 1; |
8848
|
|
|
|
|
|
|
|
8849
|
|
|
|
|
|
|
# Patch part #1 to fixes cases b994 and b1053: |
8850
|
|
|
|
|
|
|
# Mark an anonymous sub keyword without prototype as type 'k', i.e. |
8851
|
|
|
|
|
|
|
# 'sub : lvalue { ...' |
8852
|
45
|
|
|
|
|
94
|
$type = 'S'; ## C250, was 'i'; |
8853
|
45
|
100
|
100
|
|
|
209
|
if ( $tok eq 'sub' && !$proto ) { $type = 'k' } |
|
2
|
|
|
|
|
6
|
|
8854
|
|
|
|
|
|
|
} |
8855
|
|
|
|
|
|
|
|
8856
|
301
|
100
|
|
|
|
866
|
if ($match) { |
8857
|
|
|
|
|
|
|
|
8858
|
|
|
|
|
|
|
# ATTRS: if there are attributes, back up and let the ':' be |
8859
|
|
|
|
|
|
|
# found later by the scanner. |
8860
|
137
|
|
|
|
|
299
|
my $pos = pos($input_line); |
8861
|
137
|
100
|
|
|
|
398
|
if ($attrs) { |
8862
|
15
|
|
|
|
|
33
|
$pos -= length($attrs); |
8863
|
|
|
|
|
|
|
} |
8864
|
|
|
|
|
|
|
|
8865
|
137
|
|
|
|
|
303
|
my $next_nonblank_token = $tok; |
8866
|
|
|
|
|
|
|
|
8867
|
|
|
|
|
|
|
# catch case of line with leading ATTR ':' after anonymous sub |
8868
|
137
|
100
|
100
|
|
|
545
|
if ( $pos == $pos_beg && $tok eq ':' ) { |
8869
|
1
|
|
|
|
|
2
|
$type = 'A'; |
8870
|
1
|
|
|
|
|
3
|
$self->[_in_attribute_list_] = 1; |
8871
|
|
|
|
|
|
|
} |
8872
|
|
|
|
|
|
|
|
8873
|
|
|
|
|
|
|
# Otherwise, if we found a match we must convert back from |
8874
|
|
|
|
|
|
|
# string position to the pre_token index for continued parsing. |
8875
|
|
|
|
|
|
|
else { |
8876
|
|
|
|
|
|
|
|
8877
|
|
|
|
|
|
|
# I don't think an error flag can occur here ..but ? |
8878
|
136
|
|
|
|
|
245
|
my $error; |
8879
|
136
|
|
|
|
|
539
|
( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, |
8880
|
|
|
|
|
|
|
$max_token_index ); |
8881
|
136
|
50
|
|
|
|
486
|
if ($error) { $self->warning("Possibly invalid sub\n") } |
|
0
|
|
|
|
|
0
|
|
8882
|
|
|
|
|
|
|
|
8883
|
|
|
|
|
|
|
# Patch part #2 to fixes cases b994 and b1053: |
8884
|
|
|
|
|
|
|
# Do not let spaces be part of the token of an anonymous sub |
8885
|
|
|
|
|
|
|
# keyword which we marked as type 'k' above...i.e. for |
8886
|
|
|
|
|
|
|
# something like: |
8887
|
|
|
|
|
|
|
# 'sub : lvalue { ...' |
8888
|
|
|
|
|
|
|
# Back up and let it be parsed as a blank |
8889
|
136
|
50
|
66
|
|
|
606
|
if ( $type eq 'k' |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
8890
|
|
|
|
|
|
|
&& $attrs |
8891
|
|
|
|
|
|
|
&& $i > $i_entry |
8892
|
|
|
|
|
|
|
&& substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ ) |
8893
|
|
|
|
|
|
|
{ |
8894
|
2
|
|
|
|
|
6
|
$i--; |
8895
|
|
|
|
|
|
|
} |
8896
|
|
|
|
|
|
|
|
8897
|
|
|
|
|
|
|
# check for multiple definitions of a sub |
8898
|
136
|
|
|
|
|
404
|
( $next_nonblank_token, my $i_next ) = |
8899
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, |
8900
|
|
|
|
|
|
|
$max_token_index ); |
8901
|
|
|
|
|
|
|
} |
8902
|
|
|
|
|
|
|
|
8903
|
137
|
100
|
|
|
|
785
|
if ( $next_nonblank_token =~ /^(\s*|#)$/ ) |
8904
|
|
|
|
|
|
|
{ # skip blank or side comment |
8905
|
7
|
|
|
|
|
79
|
my ( $rpre_tokens, $rpre_types ) = |
8906
|
|
|
|
|
|
|
$self->peek_ahead_for_n_nonblank_pre_tokens(1); |
8907
|
7
|
50
|
33
|
|
|
45
|
if ( defined($rpre_tokens) && @{$rpre_tokens} ) { |
|
7
|
|
|
|
|
34
|
|
8908
|
7
|
|
|
|
|
37
|
$next_nonblank_token = $rpre_tokens->[0]; |
8909
|
|
|
|
|
|
|
} |
8910
|
|
|
|
|
|
|
else { |
8911
|
0
|
|
|
|
|
0
|
$next_nonblank_token = '}'; |
8912
|
|
|
|
|
|
|
} |
8913
|
|
|
|
|
|
|
} |
8914
|
|
|
|
|
|
|
|
8915
|
|
|
|
|
|
|
# See what's next... |
8916
|
137
|
100
|
|
|
|
667
|
if ( $next_nonblank_token eq '{' ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8917
|
105
|
100
|
|
|
|
325
|
if ($subname) { |
8918
|
|
|
|
|
|
|
|
8919
|
|
|
|
|
|
|
# Check for multiple definitions of a sub, but |
8920
|
|
|
|
|
|
|
# it is ok to have multiple sub BEGIN, etc, |
8921
|
|
|
|
|
|
|
# so we do not complain if name is all caps |
8922
|
95
|
50
|
33
|
|
|
556
|
if ( $rsaw_function_definition->{$subname}{$package} |
8923
|
|
|
|
|
|
|
&& $subname !~ /^[A-Z]+$/ ) |
8924
|
|
|
|
|
|
|
{ |
8925
|
|
|
|
|
|
|
my $lno = |
8926
|
0
|
|
|
|
|
0
|
$rsaw_function_definition->{$subname}{$package}; |
8927
|
0
|
0
|
|
|
|
0
|
if ( $package =~ /^\d/ ) { |
8928
|
0
|
|
|
|
|
0
|
$self->warning( |
8929
|
|
|
|
|
|
|
"already saw definition of lexical 'sub $subname' at line $lno\n" |
8930
|
|
|
|
|
|
|
); |
8931
|
|
|
|
|
|
|
|
8932
|
|
|
|
|
|
|
} |
8933
|
|
|
|
|
|
|
else { |
8934
|
0
|
|
|
|
|
0
|
if ( !DEVEL_MODE ) { |
8935
|
0
|
|
|
|
|
0
|
$self->warning( |
8936
|
|
|
|
|
|
|
"already saw definition of 'sub $subname' in package '$package' at line $lno\n" |
8937
|
|
|
|
|
|
|
); |
8938
|
|
|
|
|
|
|
} |
8939
|
|
|
|
|
|
|
} |
8940
|
|
|
|
|
|
|
} |
8941
|
95
|
|
|
|
|
380
|
$rsaw_function_definition->{$subname}{$package} = |
8942
|
|
|
|
|
|
|
$self->[_last_line_number_]; |
8943
|
|
|
|
|
|
|
} |
8944
|
|
|
|
|
|
|
} |
8945
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq ';' ) { |
8946
|
|
|
|
|
|
|
} |
8947
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq '}' ) { |
8948
|
|
|
|
|
|
|
} |
8949
|
|
|
|
|
|
|
|
8950
|
|
|
|
|
|
|
# ATTRS - if an attribute list follows, remember the name |
8951
|
|
|
|
|
|
|
# of the sub so the next opening brace can be labeled. |
8952
|
|
|
|
|
|
|
# Setting 'statement_type' causes any ':'s to introduce |
8953
|
|
|
|
|
|
|
# attributes. |
8954
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq ':' ) { |
8955
|
16
|
100
|
|
|
|
60
|
if ( $call_type == SUB_CALL ) { |
8956
|
14
|
100
|
|
|
|
70
|
$statement_type = |
8957
|
|
|
|
|
|
|
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub'; |
8958
|
|
|
|
|
|
|
} |
8959
|
|
|
|
|
|
|
} |
8960
|
|
|
|
|
|
|
|
8961
|
|
|
|
|
|
|
# if we stopped before an open paren ... |
8962
|
|
|
|
|
|
|
elsif ( $next_nonblank_token eq '(' ) { |
8963
|
|
|
|
|
|
|
|
8964
|
|
|
|
|
|
|
# If we DID NOT see this paren above then it must be on the |
8965
|
|
|
|
|
|
|
# next line so we will set a flag to come back here and see if |
8966
|
|
|
|
|
|
|
# it is a PROTOTYPE |
8967
|
|
|
|
|
|
|
|
8968
|
|
|
|
|
|
|
# Otherwise, we assume it is a SIGNATURE rather than a |
8969
|
|
|
|
|
|
|
# PROTOTYPE and let the normal tokenizer handle it as a list |
8970
|
15
|
100
|
|
|
|
53
|
if ( !$saw_opening_paren ) { |
8971
|
4
|
|
|
|
|
966
|
$id_scan_state = 'sub'; # we must come back to get proto |
8972
|
|
|
|
|
|
|
} |
8973
|
15
|
50
|
|
|
|
60
|
if ( $call_type == SUB_CALL ) { |
8974
|
15
|
50
|
|
|
|
64
|
$statement_type = |
8975
|
|
|
|
|
|
|
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub'; |
8976
|
|
|
|
|
|
|
} |
8977
|
|
|
|
|
|
|
} |
8978
|
|
|
|
|
|
|
|
8979
|
|
|
|
|
|
|
# something else.. |
8980
|
|
|
|
|
|
|
elsif ($next_nonblank_token) { |
8981
|
|
|
|
|
|
|
|
8982
|
0
|
0
|
0
|
|
|
0
|
if ( $rinput_hash->{tok} eq 'method' && $call_type == SUB_CALL ) |
8983
|
|
|
|
|
|
|
{ |
8984
|
|
|
|
|
|
|
# For a method call, silently ignore this error (rt145706) |
8985
|
|
|
|
|
|
|
# to avoid needless warnings. Example which can produce it: |
8986
|
|
|
|
|
|
|
# test(method Pack (), "method"); |
8987
|
|
|
|
|
|
|
|
8988
|
|
|
|
|
|
|
# TODO: scan for use feature 'class' and: |
8989
|
|
|
|
|
|
|
# - if we saw 'use feature 'class' then issue the warning. |
8990
|
|
|
|
|
|
|
# - if we did not see use feature 'class' then issue the |
8991
|
|
|
|
|
|
|
# warning and suggest turning off --use-feature=class |
8992
|
|
|
|
|
|
|
} |
8993
|
|
|
|
|
|
|
else { |
8994
|
0
|
0
|
|
|
|
0
|
$subname = EMPTY_STRING unless defined($subname); |
8995
|
0
|
|
|
|
|
0
|
$self->warning( |
8996
|
|
|
|
|
|
|
"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" |
8997
|
|
|
|
|
|
|
); |
8998
|
|
|
|
|
|
|
} |
8999
|
|
|
|
|
|
|
} |
9000
|
|
|
|
|
|
|
|
9001
|
|
|
|
|
|
|
# EOF technically ok |
9002
|
|
|
|
|
|
|
else { |
9003
|
|
|
|
|
|
|
} |
9004
|
|
|
|
|
|
|
|
9005
|
137
|
|
|
|
|
478
|
check_prototype( $proto, $package, $subname ); |
9006
|
|
|
|
|
|
|
} |
9007
|
|
|
|
|
|
|
|
9008
|
|
|
|
|
|
|
# no match to either sub name or prototype, but line not blank |
9009
|
|
|
|
|
|
|
else { |
9010
|
|
|
|
|
|
|
|
9011
|
|
|
|
|
|
|
} |
9012
|
301
|
|
|
|
|
1857
|
return ( $i, $tok, $type, $id_scan_state ); |
9013
|
|
|
|
|
|
|
} ## end sub do_scan_sub |
9014
|
|
|
|
|
|
|
} |
9015
|
|
|
|
|
|
|
|
9016
|
|
|
|
|
|
|
######################################################################### |
9017
|
|
|
|
|
|
|
# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS |
9018
|
|
|
|
|
|
|
######################################################################### |
9019
|
|
|
|
|
|
|
|
9020
|
|
|
|
|
|
|
sub find_next_nonblank_token { |
9021
|
6160
|
|
|
6160
|
0
|
12190
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9022
|
|
|
|
|
|
|
|
9023
|
|
|
|
|
|
|
# Returns the next nonblank token after the token at index $i |
9024
|
|
|
|
|
|
|
# To skip past a side comment, and any subsequent block comments |
9025
|
|
|
|
|
|
|
# and blank lines, call with i=$max_token_index |
9026
|
|
|
|
|
|
|
|
9027
|
|
|
|
|
|
|
# Skip any ending blank (fix c258). It would be cleaner if caller passed |
9028
|
|
|
|
|
|
|
# $rtoken_map, so we could check for type 'b', and avoid a regex test, but |
9029
|
|
|
|
|
|
|
# benchmarking shows that this test does not take significant time. So |
9030
|
|
|
|
|
|
|
# that would be a nice update but not essential. Also note that ending |
9031
|
|
|
|
|
|
|
# blanks will not occur for text previously processed by perltidy. |
9032
|
6160
|
100
|
100
|
|
|
19364
|
if ( $i == $max_token_index - 1 |
9033
|
|
|
|
|
|
|
&& $rtokens->[$max_token_index] =~ /^\s+$/ ) |
9034
|
|
|
|
|
|
|
{ |
9035
|
9
|
|
|
|
|
26
|
$i++; |
9036
|
|
|
|
|
|
|
} |
9037
|
|
|
|
|
|
|
|
9038
|
6160
|
100
|
|
|
|
12811
|
if ( $i >= $max_token_index ) { |
9039
|
127
|
100
|
|
|
|
714
|
if ( !peeked_ahead() ) { |
9040
|
125
|
|
|
|
|
431
|
peeked_ahead(1); |
9041
|
125
|
|
|
|
|
796
|
$self->peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); |
9042
|
|
|
|
|
|
|
} |
9043
|
|
|
|
|
|
|
} |
9044
|
|
|
|
|
|
|
|
9045
|
6160
|
|
|
|
|
10772
|
my $next_nonblank_token = $rtokens->[ ++$i ]; |
9046
|
6160
|
50
|
33
|
|
|
21916
|
return ( SPACE, $i ) |
9047
|
|
|
|
|
|
|
if ( !defined($next_nonblank_token) || !length($next_nonblank_token) ); |
9048
|
|
|
|
|
|
|
|
9049
|
|
|
|
|
|
|
# Quick test for nonblank ascii char. Note that we just have to |
9050
|
|
|
|
|
|
|
# examine the first character here. |
9051
|
6160
|
|
|
|
|
12367
|
my $ord = ord( substr( $next_nonblank_token, 0, 1 ) ); |
9052
|
6160
|
100
|
66
|
|
|
24256
|
if ( $ord >= ORD_PRINTABLE_MIN |
|
|
50
|
33
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9053
|
|
|
|
|
|
|
&& $ord <= ORD_PRINTABLE_MAX ) |
9054
|
|
|
|
|
|
|
{ |
9055
|
2353
|
|
|
|
|
8029
|
return ( $next_nonblank_token, $i ); |
9056
|
|
|
|
|
|
|
} |
9057
|
|
|
|
|
|
|
|
9058
|
|
|
|
|
|
|
# Quick test to skip over an ascii space or tab |
9059
|
|
|
|
|
|
|
elsif ( $ord == ORD_SPACE || $ord == ORD_TAB ) { |
9060
|
3807
|
|
|
|
|
7029
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9061
|
3807
|
50
|
|
|
|
8178
|
return ( SPACE, $i ) unless defined($next_nonblank_token); |
9062
|
|
|
|
|
|
|
} |
9063
|
|
|
|
|
|
|
|
9064
|
|
|
|
|
|
|
# Slow test to skip over something else identified as whitespace |
9065
|
|
|
|
|
|
|
elsif ( $next_nonblank_token =~ /^\s*$/ ) { |
9066
|
0
|
|
|
|
|
0
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9067
|
0
|
0
|
|
|
|
0
|
return ( SPACE, $i ) unless defined($next_nonblank_token); |
9068
|
|
|
|
|
|
|
} |
9069
|
|
|
|
|
|
|
else { |
9070
|
|
|
|
|
|
|
## at nonblank |
9071
|
|
|
|
|
|
|
} |
9072
|
|
|
|
|
|
|
|
9073
|
|
|
|
|
|
|
# We should be at a nonblank now |
9074
|
3807
|
|
|
|
|
11932
|
return ( $next_nonblank_token, $i ); |
9075
|
|
|
|
|
|
|
} ## end sub find_next_nonblank_token |
9076
|
|
|
|
|
|
|
|
9077
|
|
|
|
|
|
|
sub find_next_noncomment_token { |
9078
|
98
|
|
|
98
|
0
|
351
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9079
|
|
|
|
|
|
|
|
9080
|
|
|
|
|
|
|
# Given the current character position, look ahead past any comments |
9081
|
|
|
|
|
|
|
# and blank lines and return the next token, including digraphs and |
9082
|
|
|
|
|
|
|
# trigraphs. |
9083
|
|
|
|
|
|
|
|
9084
|
98
|
|
|
|
|
369
|
my ( $next_nonblank_token, $i_next ) = |
9085
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
9086
|
|
|
|
|
|
|
|
9087
|
|
|
|
|
|
|
# skip past any side comment |
9088
|
98
|
50
|
|
|
|
580
|
if ( $next_nonblank_token eq '#' ) { |
9089
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
9090
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i_next, $rtokens, |
9091
|
|
|
|
|
|
|
$max_token_index ); |
9092
|
|
|
|
|
|
|
} |
9093
|
|
|
|
|
|
|
|
9094
|
|
|
|
|
|
|
# check for a digraph |
9095
|
98
|
50
|
33
|
|
|
873
|
if ( $next_nonblank_token |
|
|
|
33
|
|
|
|
|
9096
|
|
|
|
|
|
|
&& $next_nonblank_token ne SPACE |
9097
|
|
|
|
|
|
|
&& defined( $rtokens->[ $i_next + 1 ] ) ) |
9098
|
|
|
|
|
|
|
{ |
9099
|
98
|
|
|
|
|
307
|
my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ]; |
9100
|
98
|
100
|
|
|
|
425
|
if ( $is_digraph{$test2} ) { |
9101
|
15
|
|
|
|
|
73
|
$next_nonblank_token = $test2; |
9102
|
15
|
|
|
|
|
51
|
$i_next = $i_next + 1; |
9103
|
|
|
|
|
|
|
|
9104
|
|
|
|
|
|
|
# check for a trigraph |
9105
|
15
|
50
|
|
|
|
72
|
if ( defined( $rtokens->[ $i_next + 1 ] ) ) { |
9106
|
15
|
|
|
|
|
52
|
my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ]; |
9107
|
15
|
50
|
|
|
|
69
|
if ( $is_trigraph{$test3} ) { |
9108
|
0
|
|
|
|
|
0
|
$next_nonblank_token = $test3; |
9109
|
0
|
|
|
|
|
0
|
$i_next = $i_next + 1; |
9110
|
|
|
|
|
|
|
} |
9111
|
|
|
|
|
|
|
} |
9112
|
|
|
|
|
|
|
} |
9113
|
|
|
|
|
|
|
} |
9114
|
|
|
|
|
|
|
|
9115
|
98
|
|
|
|
|
327
|
return ( $next_nonblank_token, $i_next ); |
9116
|
|
|
|
|
|
|
} ## end sub find_next_noncomment_token |
9117
|
|
|
|
|
|
|
|
9118
|
|
|
|
|
|
|
sub is_possible_numerator { |
9119
|
|
|
|
|
|
|
|
9120
|
|
|
|
|
|
|
# Look at the next non-comment character and decide if it could be a |
9121
|
|
|
|
|
|
|
# numerator. Return |
9122
|
|
|
|
|
|
|
# 1 - yes |
9123
|
|
|
|
|
|
|
# 0 - can't tell |
9124
|
|
|
|
|
|
|
# -1 - no |
9125
|
|
|
|
|
|
|
|
9126
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9127
|
0
|
|
|
|
|
0
|
my $is_possible_numerator = 0; |
9128
|
|
|
|
|
|
|
|
9129
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[ $i + 1 ]; |
9130
|
0
|
0
|
|
|
|
0
|
if ( $next_token eq '=' ) { $i++; } # handle /= |
|
0
|
|
|
|
|
0
|
|
9131
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
9132
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
9133
|
|
|
|
|
|
|
|
9134
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token eq '#' ) { |
9135
|
0
|
|
|
|
|
0
|
( $next_nonblank_token, $i_next ) = |
9136
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $max_token_index, $rtokens, |
9137
|
|
|
|
|
|
|
$max_token_index ); |
9138
|
|
|
|
|
|
|
} |
9139
|
|
|
|
|
|
|
|
9140
|
0
|
0
|
|
|
|
0
|
if ( $next_nonblank_token =~ / [ \( \$ \w \. \@ ] /x ) { |
|
|
0
|
|
|
|
|
|
9141
|
0
|
|
|
|
|
0
|
$is_possible_numerator = 1; |
9142
|
|
|
|
|
|
|
} |
9143
|
|
|
|
|
|
|
elsif ( $next_nonblank_token =~ /^\s*$/ ) { |
9144
|
0
|
|
|
|
|
0
|
$is_possible_numerator = 0; |
9145
|
|
|
|
|
|
|
} |
9146
|
|
|
|
|
|
|
else { |
9147
|
0
|
|
|
|
|
0
|
$is_possible_numerator = -1; |
9148
|
|
|
|
|
|
|
} |
9149
|
|
|
|
|
|
|
|
9150
|
0
|
|
|
|
|
0
|
return $is_possible_numerator; |
9151
|
|
|
|
|
|
|
} ## end sub is_possible_numerator |
9152
|
|
|
|
|
|
|
|
9153
|
|
|
|
|
|
|
{ ## closure for sub pattern_expected |
9154
|
|
|
|
|
|
|
my %pattern_test; |
9155
|
|
|
|
|
|
|
|
9156
|
|
|
|
|
|
|
BEGIN { |
9157
|
|
|
|
|
|
|
|
9158
|
|
|
|
|
|
|
# List of tokens which may follow a pattern. Note that we will not |
9159
|
|
|
|
|
|
|
# have formed digraphs at this point, so we will see '&' instead of |
9160
|
|
|
|
|
|
|
# '&&' and '|' instead of '||' |
9161
|
|
|
|
|
|
|
|
9162
|
|
|
|
|
|
|
# /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ |
9163
|
39
|
|
|
39
|
|
307
|
my @q = qw( & && | || ? : + - * and or while if unless); |
9164
|
39
|
|
|
|
|
152
|
push @q, ')', '}', ']', '>', ',', ';'; |
9165
|
39
|
|
|
|
|
182620
|
@{pattern_test}{@q} = (1) x scalar(@q); |
9166
|
|
|
|
|
|
|
} ## end BEGIN |
9167
|
|
|
|
|
|
|
|
9168
|
|
|
|
|
|
|
sub pattern_expected { |
9169
|
|
|
|
|
|
|
|
9170
|
|
|
|
|
|
|
# This a filter for a possible pattern. |
9171
|
|
|
|
|
|
|
# It looks at the token after a possible pattern and tries to |
9172
|
|
|
|
|
|
|
# determine if that token could end a pattern. |
9173
|
|
|
|
|
|
|
# returns - |
9174
|
|
|
|
|
|
|
# 1 - yes |
9175
|
|
|
|
|
|
|
# 0 - can't tell |
9176
|
|
|
|
|
|
|
# -1 - no |
9177
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $i, $rtokens, $max_token_index ) = @_; |
9178
|
0
|
|
|
|
|
0
|
my $is_pattern = 0; |
9179
|
|
|
|
|
|
|
|
9180
|
0
|
|
|
|
|
0
|
my $next_token = $rtokens->[ $i + 1 ]; |
9181
|
0
|
0
|
|
|
|
0
|
if ( $next_token =~ /^[msixpodualgc]/ ) { |
9182
|
0
|
|
|
|
|
0
|
$i++; |
9183
|
|
|
|
|
|
|
} # skip possible modifier |
9184
|
0
|
|
|
|
|
0
|
my ( $next_nonblank_token, $i_next ) = |
9185
|
|
|
|
|
|
|
$self->find_next_nonblank_token( $i, $rtokens, $max_token_index ); |
9186
|
|
|
|
|
|
|
|
9187
|
0
|
0
|
|
|
|
0
|
if ( $pattern_test{$next_nonblank_token} ) { |
9188
|
0
|
|
|
|
|
0
|
$is_pattern = 1; |
9189
|
|
|
|
|
|
|
} |
9190
|
|
|
|
|
|
|
else { |
9191
|
|
|
|
|
|
|
|
9192
|
|
|
|
|
|
|
# Added '#' to fix issue c044 |
9193
|
0
|
0
|
0
|
|
|
0
|
if ( $next_nonblank_token =~ /^\s*$/ |
9194
|
|
|
|
|
|
|
|| $next_nonblank_token eq '#' ) |
9195
|
|
|
|
|
|
|
{ |
9196
|
0
|
|
|
|
|
0
|
$is_pattern = 0; |
9197
|
|
|
|
|
|
|
} |
9198
|
|
|
|
|
|
|
else { |
9199
|
0
|
|
|
|
|
0
|
$is_pattern = -1; |
9200
|
|
|
|
|
|
|
} |
9201
|
|
|
|
|
|
|
} |
9202
|
0
|
|
|
|
|
0
|
return $is_pattern; |
9203
|
|
|
|
|
|
|
} ## end sub pattern_expected |
9204
|
|
|
|
|
|
|
} |
9205
|
|
|
|
|
|
|
|
9206
|
|
|
|
|
|
|
sub find_next_nonblank_token_on_this_line { |
9207
|
455
|
|
|
455
|
0
|
1081
|
my ( $i, $rtokens, $max_token_index ) = @_; |
9208
|
455
|
|
|
|
|
820
|
my $next_nonblank_token; |
9209
|
|
|
|
|
|
|
|
9210
|
455
|
100
|
|
|
|
1158
|
if ( $i < $max_token_index ) { |
9211
|
447
|
|
|
|
|
1060
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9212
|
|
|
|
|
|
|
|
9213
|
447
|
100
|
|
|
|
1987
|
if ( $next_nonblank_token =~ /^\s*$/ ) { |
9214
|
|
|
|
|
|
|
|
9215
|
120
|
100
|
|
|
|
522
|
if ( $i < $max_token_index ) { |
9216
|
118
|
|
|
|
|
321
|
$next_nonblank_token = $rtokens->[ ++$i ]; |
9217
|
|
|
|
|
|
|
} |
9218
|
|
|
|
|
|
|
} |
9219
|
|
|
|
|
|
|
} |
9220
|
|
|
|
|
|
|
else { |
9221
|
8
|
|
|
|
|
33
|
$next_nonblank_token = EMPTY_STRING; |
9222
|
|
|
|
|
|
|
} |
9223
|
455
|
|
|
|
|
1449
|
return ( $next_nonblank_token, $i ); |
9224
|
|
|
|
|
|
|
} ## end sub find_next_nonblank_token_on_this_line |
9225
|
|
|
|
|
|
|
|
9226
|
|
|
|
|
|
|
sub find_angle_operator_termination { |
9227
|
|
|
|
|
|
|
|
9228
|
|
|
|
|
|
|
# We are looking at a '<' and want to know if it is an angle operator. |
9229
|
|
|
|
|
|
|
# We are to return: |
9230
|
|
|
|
|
|
|
# $i = pretoken index of ending '>' if found, current $i otherwise |
9231
|
|
|
|
|
|
|
# $type = 'Q' if found, '>' otherwise |
9232
|
8
|
|
|
8
|
0
|
34
|
my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) |
9233
|
|
|
|
|
|
|
= @_; |
9234
|
8
|
|
|
|
|
20
|
my $i = $i_beg; |
9235
|
8
|
|
|
|
|
17
|
my $type = '<'; |
9236
|
8
|
|
|
|
|
39
|
pos($input_line) = 1 + $rtoken_map->[$i]; |
9237
|
|
|
|
|
|
|
|
9238
|
8
|
|
|
|
|
20
|
my $filter; |
9239
|
|
|
|
|
|
|
|
9240
|
|
|
|
|
|
|
# we just have to find the next '>' if a term is expected |
9241
|
8
|
100
|
|
|
|
33
|
if ( $expecting == TERM ) { $filter = '[\>]' } |
|
6
|
50
|
|
|
|
18
|
|
9242
|
|
|
|
|
|
|
|
9243
|
|
|
|
|
|
|
# we have to guess if we don't know what is expected |
9244
|
2
|
|
|
|
|
5
|
elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } |
9245
|
|
|
|
|
|
|
|
9246
|
|
|
|
|
|
|
# shouldn't happen - we shouldn't be here if operator is expected |
9247
|
|
|
|
|
|
|
else { |
9248
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
9249
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
9250
|
|
|
|
|
|
|
Bad call to find_angle_operator_termination |
9251
|
|
|
|
|
|
|
EOM |
9252
|
|
|
|
|
|
|
} |
9253
|
0
|
|
|
|
|
0
|
return ( $i, $type ); |
9254
|
|
|
|
|
|
|
} |
9255
|
|
|
|
|
|
|
|
9256
|
|
|
|
|
|
|
# To illustrate what we might be looking at, in case we are |
9257
|
|
|
|
|
|
|
# guessing, here are some examples of valid angle operators |
9258
|
|
|
|
|
|
|
# (or file globs): |
9259
|
|
|
|
|
|
|
# <tmp_imp/*> |
9260
|
|
|
|
|
|
|
# <FH> |
9261
|
|
|
|
|
|
|
# <$fh> |
9262
|
|
|
|
|
|
|
# <*.c *.h> |
9263
|
|
|
|
|
|
|
# <_> |
9264
|
|
|
|
|
|
|
# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) |
9265
|
|
|
|
|
|
|
# <${PREFIX}*img*.$IMAGE_TYPE> |
9266
|
|
|
|
|
|
|
# <img*.$IMAGE_TYPE> |
9267
|
|
|
|
|
|
|
# <Timg*.$IMAGE_TYPE> |
9268
|
|
|
|
|
|
|
# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> |
9269
|
|
|
|
|
|
|
# |
9270
|
|
|
|
|
|
|
# Here are some examples of lines which do not have angle operators: |
9271
|
|
|
|
|
|
|
# return unless $self->[2]++ < $#{$self->[1]}; |
9272
|
|
|
|
|
|
|
# < 2 || @$t > |
9273
|
|
|
|
|
|
|
# |
9274
|
|
|
|
|
|
|
# the following line from dlister.pl caused trouble: |
9275
|
|
|
|
|
|
|
# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; |
9276
|
|
|
|
|
|
|
# |
9277
|
|
|
|
|
|
|
# If the '<' starts an angle operator, it must end on this line and |
9278
|
|
|
|
|
|
|
# it must not have certain characters like ';' and '=' in it. I use |
9279
|
|
|
|
|
|
|
# this to limit the testing. This filter should be improved if |
9280
|
|
|
|
|
|
|
# possible. |
9281
|
|
|
|
|
|
|
|
9282
|
8
|
50
|
|
|
|
213
|
if ( $input_line =~ /($filter)/g ) { |
9283
|
|
|
|
|
|
|
|
9284
|
8
|
50
|
|
|
|
44
|
if ( $1 eq '>' ) { |
9285
|
|
|
|
|
|
|
|
9286
|
|
|
|
|
|
|
# We MAY have found an angle operator termination if we get |
9287
|
|
|
|
|
|
|
# here, but we need to do more to be sure we haven't been |
9288
|
|
|
|
|
|
|
# fooled. |
9289
|
8
|
|
|
|
|
21
|
my $pos = pos($input_line); |
9290
|
|
|
|
|
|
|
|
9291
|
8
|
|
|
|
|
24
|
my $pos_beg = $rtoken_map->[$i]; |
9292
|
8
|
|
|
|
|
33
|
my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); |
9293
|
|
|
|
|
|
|
|
9294
|
|
|
|
|
|
|
# Test for '<' after possible filehandle, issue c103 |
9295
|
|
|
|
|
|
|
# print $fh <>; # syntax error |
9296
|
|
|
|
|
|
|
# print $fh <DATA>; # ok |
9297
|
|
|
|
|
|
|
# print $fh < DATA>; # syntax error at '>' |
9298
|
|
|
|
|
|
|
# print STDERR < DATA>; # ok, prints word 'DATA' |
9299
|
|
|
|
|
|
|
# print BLABLA <DATA>; # ok; does nothing unless BLABLA is defined |
9300
|
8
|
100
|
|
|
|
38
|
if ( $last_nonblank_type eq 'Z' ) { |
9301
|
|
|
|
|
|
|
|
9302
|
|
|
|
|
|
|
# $str includes brackets; something like '<DATA>' |
9303
|
1
|
0
|
33
|
|
|
6
|
if ( substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/ |
9304
|
|
|
|
|
|
|
&& substr( $str, 1, 1 ) !~ /[A-Za-z_]/ ) |
9305
|
|
|
|
|
|
|
{ |
9306
|
0
|
|
|
|
|
0
|
return ( $i, $type ); |
9307
|
|
|
|
|
|
|
} |
9308
|
|
|
|
|
|
|
} |
9309
|
|
|
|
|
|
|
|
9310
|
|
|
|
|
|
|
# Reject if the closing '>' follows a '-' as in: |
9311
|
|
|
|
|
|
|
# if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } |
9312
|
8
|
100
|
|
|
|
43
|
if ( $expecting eq UNKNOWN ) { |
9313
|
2
|
|
|
|
|
8
|
my $check = substr( $input_line, $pos - 2, 1 ); |
9314
|
2
|
100
|
|
|
|
7
|
if ( $check eq '-' ) { |
9315
|
1
|
|
|
|
|
19
|
return ( $i, $type ); |
9316
|
|
|
|
|
|
|
} |
9317
|
|
|
|
|
|
|
} |
9318
|
|
|
|
|
|
|
|
9319
|
|
|
|
|
|
|
######################################debug##### |
9320
|
|
|
|
|
|
|
#$self->write_diagnostics( "ANGLE? :$str\n"); |
9321
|
|
|
|
|
|
|
#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; |
9322
|
|
|
|
|
|
|
######################################debug##### |
9323
|
7
|
|
|
|
|
22
|
$type = 'Q'; |
9324
|
7
|
|
|
|
|
64
|
my $error; |
9325
|
7
|
|
|
|
|
42
|
( $i, $error ) = |
9326
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
9327
|
|
|
|
|
|
|
|
9328
|
|
|
|
|
|
|
# It may be possible that a quote ends midway in a pretoken. |
9329
|
|
|
|
|
|
|
# If this happens, it may be necessary to split the pretoken. |
9330
|
7
|
50
|
|
|
|
36
|
if ($error) { |
9331
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
9332
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
9333
|
|
|
|
|
|
|
unexpected error condition returned by inverse_pretoken_map |
9334
|
|
|
|
|
|
|
EOM |
9335
|
|
|
|
|
|
|
} |
9336
|
|
|
|
|
|
|
$self->warning( |
9337
|
0
|
|
|
|
|
0
|
"Possible tokinization error..please check this line\n"); |
9338
|
|
|
|
|
|
|
} |
9339
|
|
|
|
|
|
|
|
9340
|
|
|
|
|
|
|
# Check for accidental formatting of a markup language doc... |
9341
|
|
|
|
|
|
|
# Formatting will be skipped if we set _html_tag_count_ and |
9342
|
|
|
|
|
|
|
# also set a warning of any kind. |
9343
|
7
|
|
|
|
|
20
|
my $is_html_tag; |
9344
|
7
|
|
33
|
|
|
45
|
my $is_first_string = |
9345
|
|
|
|
|
|
|
$i_beg == 0 && $self->[_last_line_number_] == 1; |
9346
|
|
|
|
|
|
|
|
9347
|
|
|
|
|
|
|
# html comment '<!...' of any type |
9348
|
7
|
50
|
33
|
|
|
94
|
if ( $str =~ /^<\s*!/ ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9349
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
9350
|
0
|
0
|
|
|
|
0
|
if ($is_first_string) { |
9351
|
0
|
|
|
|
|
0
|
$self->warning( |
9352
|
|
|
|
|
|
|
"looks like a markup language, continuing error checks\n" |
9353
|
|
|
|
|
|
|
); |
9354
|
|
|
|
|
|
|
} |
9355
|
|
|
|
|
|
|
} |
9356
|
|
|
|
|
|
|
|
9357
|
|
|
|
|
|
|
# html end tag, something like </h1> |
9358
|
|
|
|
|
|
|
elsif ( $str =~ /^<\s*\/\w+\s*>$/ ) { |
9359
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
9360
|
|
|
|
|
|
|
} |
9361
|
|
|
|
|
|
|
|
9362
|
|
|
|
|
|
|
# xml prolog? |
9363
|
|
|
|
|
|
|
elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) { |
9364
|
0
|
|
|
|
|
0
|
$is_html_tag = 1; |
9365
|
0
|
|
|
|
|
0
|
$self->warning( |
9366
|
|
|
|
|
|
|
"looks like a markup language, continuing error checks\n"); |
9367
|
|
|
|
|
|
|
} |
9368
|
|
|
|
|
|
|
else { |
9369
|
|
|
|
|
|
|
## doesn't look like a markup tag |
9370
|
|
|
|
|
|
|
} |
9371
|
|
|
|
|
|
|
|
9372
|
7
|
50
|
|
|
|
29
|
if ($is_html_tag) { |
9373
|
0
|
|
|
|
|
0
|
$self->[_html_tag_count_]++; |
9374
|
|
|
|
|
|
|
} |
9375
|
|
|
|
|
|
|
|
9376
|
|
|
|
|
|
|
# count blanks on inside of brackets |
9377
|
7
|
|
|
|
|
17
|
my $blank_count = 0; |
9378
|
7
|
100
|
|
|
|
45
|
$blank_count++ if ( $str =~ /<\s+/ ); |
9379
|
7
|
100
|
|
|
|
39
|
$blank_count++ if ( $str =~ /\s+>/ ); |
9380
|
|
|
|
|
|
|
|
9381
|
|
|
|
|
|
|
# Now let's see where we stand.... |
9382
|
|
|
|
|
|
|
# OK if math op not possible |
9383
|
7
|
100
|
|
|
|
35
|
if ( $expecting == TERM ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9384
|
|
|
|
|
|
|
} |
9385
|
|
|
|
|
|
|
|
9386
|
|
|
|
|
|
|
elsif ($is_html_tag) { |
9387
|
|
|
|
|
|
|
} |
9388
|
|
|
|
|
|
|
|
9389
|
|
|
|
|
|
|
# OK if there are no more than 2 non-blank pre-tokens inside |
9390
|
|
|
|
|
|
|
# (not possible to write 2 token math between < and >) |
9391
|
|
|
|
|
|
|
# This catches most common cases |
9392
|
|
|
|
|
|
|
elsif ( $i <= $i_beg + 3 + $blank_count ) { |
9393
|
|
|
|
|
|
|
|
9394
|
|
|
|
|
|
|
# No longer any need to document this common case |
9395
|
|
|
|
|
|
|
## $self->write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); |
9396
|
|
|
|
|
|
|
} |
9397
|
|
|
|
|
|
|
|
9398
|
|
|
|
|
|
|
# OK if there is some kind of identifier inside |
9399
|
|
|
|
|
|
|
# print $fh <tvg::INPUT>; |
9400
|
|
|
|
|
|
|
elsif ( $str =~ /^<\s*\$?(\w|::|\s)+\s*>$/ ) { |
9401
|
0
|
|
|
|
|
0
|
$self->write_diagnostics("ANGLE (contains identifier): $str\n"); |
9402
|
|
|
|
|
|
|
} |
9403
|
|
|
|
|
|
|
|
9404
|
|
|
|
|
|
|
# Not sure.. |
9405
|
|
|
|
|
|
|
else { |
9406
|
|
|
|
|
|
|
|
9407
|
|
|
|
|
|
|
# Let's try a Brace Test: any braces inside must balance |
9408
|
0
|
|
|
|
|
0
|
my $br = 0; |
9409
|
0
|
|
|
|
|
0
|
while ( $str =~ /\{/g ) { $br++ } |
|
0
|
|
|
|
|
0
|
|
9410
|
0
|
|
|
|
|
0
|
while ( $str =~ /\}/g ) { $br-- } |
|
0
|
|
|
|
|
0
|
|
9411
|
0
|
|
|
|
|
0
|
my $sb = 0; |
9412
|
0
|
|
|
|
|
0
|
while ( $str =~ /\[/g ) { $sb++ } |
|
0
|
|
|
|
|
0
|
|
9413
|
0
|
|
|
|
|
0
|
while ( $str =~ /\]/g ) { $sb-- } |
|
0
|
|
|
|
|
0
|
|
9414
|
0
|
|
|
|
|
0
|
my $pr = 0; |
9415
|
0
|
|
|
|
|
0
|
while ( $str =~ /\(/g ) { $pr++ } |
|
0
|
|
|
|
|
0
|
|
9416
|
0
|
|
|
|
|
0
|
while ( $str =~ /\)/g ) { $pr-- } |
|
0
|
|
|
|
|
0
|
|
9417
|
|
|
|
|
|
|
|
9418
|
|
|
|
|
|
|
# if braces do not balance - not angle operator |
9419
|
0
|
0
|
0
|
|
|
0
|
if ( $br || $sb || $pr ) { |
|
|
|
0
|
|
|
|
|
9420
|
0
|
|
|
|
|
0
|
$i = $i_beg; |
9421
|
0
|
|
|
|
|
0
|
$type = '<'; |
9422
|
0
|
|
|
|
|
0
|
$self->write_diagnostics( |
9423
|
|
|
|
|
|
|
"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); |
9424
|
|
|
|
|
|
|
} |
9425
|
|
|
|
|
|
|
|
9426
|
|
|
|
|
|
|
# we should keep doing more checks here...to be continued |
9427
|
|
|
|
|
|
|
# Tentatively accepting this as a valid angle operator. |
9428
|
|
|
|
|
|
|
# There are lots more things that can be checked. |
9429
|
|
|
|
|
|
|
else { |
9430
|
0
|
|
|
|
|
0
|
$self->write_diagnostics( |
9431
|
|
|
|
|
|
|
"ANGLE-Guessing yes: $str expecting=$expecting\n"); |
9432
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
9433
|
|
|
|
|
|
|
"Guessing angle operator here: $str\n"); |
9434
|
|
|
|
|
|
|
} |
9435
|
|
|
|
|
|
|
} |
9436
|
|
|
|
|
|
|
} |
9437
|
|
|
|
|
|
|
|
9438
|
|
|
|
|
|
|
# didn't find ending > |
9439
|
|
|
|
|
|
|
else { |
9440
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
9441
|
0
|
|
|
|
|
0
|
$self->warning("No ending > for angle operator\n"); |
9442
|
|
|
|
|
|
|
} |
9443
|
|
|
|
|
|
|
} |
9444
|
|
|
|
|
|
|
} |
9445
|
7
|
|
|
|
|
37
|
return ( $i, $type ); |
9446
|
|
|
|
|
|
|
} ## end sub find_angle_operator_termination |
9447
|
|
|
|
|
|
|
|
9448
|
|
|
|
|
|
|
sub scan_number_do { |
9449
|
|
|
|
|
|
|
|
9450
|
|
|
|
|
|
|
# scan a number in any of the formats that Perl accepts |
9451
|
|
|
|
|
|
|
# Underbars (_) are allowed in decimal numbers. |
9452
|
|
|
|
|
|
|
# input parameters - |
9453
|
|
|
|
|
|
|
# $input_line - the string to scan |
9454
|
|
|
|
|
|
|
# $i - pre_token index to start scanning |
9455
|
|
|
|
|
|
|
# $rtoken_map - reference to the pre_token map giving starting |
9456
|
|
|
|
|
|
|
# character position in $input_line of token $i |
9457
|
|
|
|
|
|
|
# output parameters - |
9458
|
|
|
|
|
|
|
# $i - last pre_token index of the number just scanned |
9459
|
|
|
|
|
|
|
# number - the number (characters); or undef if not a number |
9460
|
|
|
|
|
|
|
|
9461
|
629
|
|
|
629
|
0
|
1994
|
my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = |
9462
|
|
|
|
|
|
|
@_; |
9463
|
629
|
|
|
|
|
1157
|
my $pos_beg = $rtoken_map->[$i]; |
9464
|
629
|
|
|
|
|
996
|
my $pos; |
9465
|
629
|
|
|
|
|
985
|
my $i_begin = $i; |
9466
|
629
|
|
|
|
|
999
|
my $number = undef; |
9467
|
629
|
|
|
|
|
999
|
my $type = $input_type; |
9468
|
|
|
|
|
|
|
|
9469
|
629
|
|
|
|
|
2584
|
my $first_char = substr( $input_line, $pos_beg, 1 ); |
9470
|
|
|
|
|
|
|
|
9471
|
|
|
|
|
|
|
# Look for bad starting characters; Shouldn't happen.. |
9472
|
629
|
50
|
|
|
|
2759
|
if ( $first_char !~ /[\d\.\+\-Ee]/ ) { |
9473
|
0
|
|
|
|
|
0
|
if (DEVEL_MODE) { |
9474
|
|
|
|
|
|
|
$self->Fault(<<EOM); |
9475
|
|
|
|
|
|
|
Program bug - scan_number given bad first character = '$first_char' |
9476
|
|
|
|
|
|
|
EOM |
9477
|
|
|
|
|
|
|
} |
9478
|
0
|
|
|
|
|
0
|
return ( $i, $type, $number ); |
9479
|
|
|
|
|
|
|
} |
9480
|
|
|
|
|
|
|
|
9481
|
|
|
|
|
|
|
# handle v-string without leading 'v' character ('Two Dot' rule) |
9482
|
|
|
|
|
|
|
# (vstring.t) |
9483
|
|
|
|
|
|
|
# Here is the format prior to including underscores: |
9484
|
|
|
|
|
|
|
## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { |
9485
|
629
|
|
|
|
|
1871
|
pos($input_line) = $pos_beg; |
9486
|
629
|
50
|
|
|
|
3071
|
if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) { |
9487
|
0
|
|
|
|
|
0
|
$pos = pos($input_line); |
9488
|
0
|
|
|
|
|
0
|
my $numc = $pos - $pos_beg; |
9489
|
0
|
|
|
|
|
0
|
$number = substr( $input_line, $pos_beg, $numc ); |
9490
|
0
|
|
|
|
|
0
|
$type = 'v'; |
9491
|
0
|
|
|
|
|
0
|
$self->report_v_string($number); |
9492
|
|
|
|
|
|
|
} |
9493
|
|
|
|
|
|
|
|
9494
|
|
|
|
|
|
|
# handle octal, hex, binary |
9495
|
629
|
50
|
|
|
|
1618
|
if ( !defined($number) ) { |
9496
|
629
|
|
|
|
|
1266
|
pos($input_line) = $pos_beg; |
9497
|
|
|
|
|
|
|
|
9498
|
|
|
|
|
|
|
# Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0' |
9499
|
|
|
|
|
|
|
# For reference, the format prior to hex floating point is: |
9500
|
|
|
|
|
|
|
# /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) |
9501
|
|
|
|
|
|
|
# (hex) (octal) (binary) |
9502
|
629
|
100
|
|
|
|
2375
|
if ( |
9503
|
|
|
|
|
|
|
$input_line =~ m{ |
9504
|
|
|
|
|
|
|
|
9505
|
|
|
|
|
|
|
\G[+-]?0( # leading [signed] 0 |
9506
|
|
|
|
|
|
|
|
9507
|
|
|
|
|
|
|
# a hex float, i.e. '0x0.b17217f7d1cf78p0' |
9508
|
|
|
|
|
|
|
([xX][0-9a-fA-F_]* # X and optional leading digits |
9509
|
|
|
|
|
|
|
(\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction |
9510
|
|
|
|
|
|
|
[Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit |
9511
|
|
|
|
|
|
|
[0-9a-fA-F_]*) # optional Additional exponent digits |
9512
|
|
|
|
|
|
|
|
9513
|
|
|
|
|
|
|
# or hex integer |
9514
|
|
|
|
|
|
|
|([xX][0-9a-fA-F_]+) |
9515
|
|
|
|
|
|
|
|
9516
|
|
|
|
|
|
|
# or octal fraction |
9517
|
|
|
|
|
|
|
|([oO]?[0-7_]+ # string of octal digits |
9518
|
|
|
|
|
|
|
(\.([0-7][0-7_]*)?)? # optional decimal and fraction |
9519
|
|
|
|
|
|
|
[Pp][+-]?[0-7] # REQUIRED exponent, no underscore |
9520
|
|
|
|
|
|
|
[0-7_]*) # Additional exponent digits with underscores |
9521
|
|
|
|
|
|
|
|
9522
|
|
|
|
|
|
|
# or octal integer |
9523
|
|
|
|
|
|
|
|([oO]?[0-7_]+) # string of octal digits |
9524
|
|
|
|
|
|
|
|
9525
|
|
|
|
|
|
|
# or a binary float |
9526
|
|
|
|
|
|
|
|([bB][01_]* # 'b' with string of binary digits |
9527
|
|
|
|
|
|
|
(\.([01][01_]*)?)? # optional decimal and fraction |
9528
|
|
|
|
|
|
|
[Pp][+-]?[01] # Required exponent indicator, no underscore |
9529
|
|
|
|
|
|
|
[01_]*) # additional exponent bits |
9530
|
|
|
|
|
|
|
|
9531
|
|
|
|
|
|
|
# or binary integer |
9532
|
|
|
|
|
|
|
|([bB][01_]+) # 'b' with string of binary digits |
9533
|
|
|
|
|
|
|
|
9534
|
|
|
|
|
|
|
)}gx |
9535
|
|
|
|
|
|
|
) |
9536
|
|
|
|
|
|
|
{ |
9537
|
72
|
|
|
|
|
150
|
$pos = pos($input_line); |
9538
|
72
|
|
|
|
|
120
|
my $numc = $pos - $pos_beg; |
9539
|
72
|
|
|
|
|
151
|
$number = substr( $input_line, $pos_beg, $numc ); |
9540
|
72
|
|
|
|
|
133
|
$type = 'n'; |
9541
|
|
|
|
|
|
|
} |
9542
|
|
|
|
|
|
|
} |
9543
|
|
|
|
|
|
|
|
9544
|
|
|
|
|
|
|
# handle decimal |
9545
|
629
|
100
|
|
|
|
1487
|
if ( !defined($number) ) { |
9546
|
557
|
|
|
|
|
1037
|
pos($input_line) = $pos_beg; |
9547
|
|
|
|
|
|
|
|
9548
|
557
|
50
|
|
|
|
2679
|
if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { |
9549
|
557
|
|
|
|
|
1032
|
$pos = pos($input_line); |
9550
|
|
|
|
|
|
|
|
9551
|
|
|
|
|
|
|
# watch out for things like 0..40 which would give 0. by this; |
9552
|
557
|
100
|
100
|
|
|
1934
|
if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) |
9553
|
|
|
|
|
|
|
&& ( substr( $input_line, $pos, 1 ) eq '.' ) ) |
9554
|
|
|
|
|
|
|
{ |
9555
|
37
|
|
|
|
|
70
|
$pos--; |
9556
|
|
|
|
|
|
|
} |
9557
|
557
|
|
|
|
|
983
|
my $numc = $pos - $pos_beg; |
9558
|
557
|
|
|
|
|
1041
|
$number = substr( $input_line, $pos_beg, $numc ); |
9559
|
557
|
|
|
|
|
981
|
$type = 'n'; |
9560
|
|
|
|
|
|
|
} |
9561
|
|
|
|
|
|
|
} |
9562
|
|
|
|
|
|
|
|
9563
|
|
|
|
|
|
|
# filter out non-numbers like e + - . e2 .e3 +e6 |
9564
|
|
|
|
|
|
|
# the rule: at least one digit, and any 'e' must be preceded by a digit |
9565
|
629
|
100
|
66
|
|
|
3254
|
if ( |
|
|
|
66
|
|
|
|
|
9566
|
|
|
|
|
|
|
$number !~ /\d/ # no digits |
9567
|
|
|
|
|
|
|
|| ( $number =~ /^(.*)[eE]/ |
9568
|
|
|
|
|
|
|
&& $1 !~ /\d/ ) # or no digits before the 'e' |
9569
|
|
|
|
|
|
|
) |
9570
|
|
|
|
|
|
|
{ |
9571
|
303
|
|
|
|
|
515
|
$number = undef; |
9572
|
303
|
|
|
|
|
526
|
$type = $input_type; |
9573
|
303
|
|
|
|
|
1253
|
return ( $i, $type, $number ); |
9574
|
|
|
|
|
|
|
} |
9575
|
|
|
|
|
|
|
|
9576
|
|
|
|
|
|
|
# Found a number; now we must convert back from character position |
9577
|
|
|
|
|
|
|
# to pre_token index. An error here implies user syntax error. |
9578
|
|
|
|
|
|
|
# An example would be an invalid octal number like '009'. |
9579
|
326
|
|
|
|
|
610
|
my $error; |
9580
|
326
|
|
|
|
|
872
|
( $i, $error ) = |
9581
|
|
|
|
|
|
|
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); |
9582
|
326
|
50
|
|
|
|
916
|
if ($error) { $self->warning("Possibly invalid number\n") } |
|
0
|
|
|
|
|
0
|
|
9583
|
|
|
|
|
|
|
|
9584
|
326
|
|
|
|
|
1167
|
return ( $i, $type, $number ); |
9585
|
|
|
|
|
|
|
} ## end sub scan_number_do |
9586
|
|
|
|
|
|
|
|
9587
|
|
|
|
|
|
|
sub inverse_pretoken_map { |
9588
|
|
|
|
|
|
|
|
9589
|
|
|
|
|
|
|
# Starting with the current pre_token index $i, scan forward until |
9590
|
|
|
|
|
|
|
# finding the index of the next pre_token whose position is $pos. |
9591
|
2167
|
|
|
2167
|
0
|
5110
|
my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; |
9592
|
2167
|
|
|
|
|
3867
|
my $error = 0; |
9593
|
|
|
|
|
|
|
|
9594
|
2167
|
|
|
|
|
5630
|
while ( ++$i <= $max_token_index ) { |
9595
|
|
|
|
|
|
|
|
9596
|
4035
|
100
|
|
|
|
9661
|
if ( $pos <= $rtoken_map->[$i] ) { |
9597
|
|
|
|
|
|
|
|
9598
|
|
|
|
|
|
|
# Let the calling routine handle errors in which we do not |
9599
|
|
|
|
|
|
|
# land on a pre-token boundary. It can happen by running |
9600
|
|
|
|
|
|
|
# perltidy on some non-perl scripts, for example. |
9601
|
2132
|
50
|
|
|
|
5297
|
if ( $pos < $rtoken_map->[$i] ) { $error = 1 } |
|
0
|
|
|
|
|
0
|
|
9602
|
2132
|
|
|
|
|
3128
|
$i--; |
9603
|
2132
|
|
|
|
|
3666
|
last; |
9604
|
|
|
|
|
|
|
} |
9605
|
|
|
|
|
|
|
} |
9606
|
2167
|
|
|
|
|
5445
|
return ( $i, $error ); |
9607
|
|
|
|
|
|
|
} ## end sub inverse_pretoken_map |
9608
|
|
|
|
|
|
|
|
9609
|
|
|
|
|
|
|
sub find_here_doc { |
9610
|
|
|
|
|
|
|
|
9611
|
|
|
|
|
|
|
# find the target of a here document, if any |
9612
|
|
|
|
|
|
|
# input parameters: |
9613
|
|
|
|
|
|
|
# $i - token index of the second < of << |
9614
|
|
|
|
|
|
|
# ($i must be less than the last token index if this is called) |
9615
|
|
|
|
|
|
|
# output parameters: |
9616
|
|
|
|
|
|
|
# $found_target = 0 didn't find target; =1 found target |
9617
|
|
|
|
|
|
|
# HERE_TARGET - the target string (may be empty string) |
9618
|
|
|
|
|
|
|
# $i - unchanged if not here doc, |
9619
|
|
|
|
|
|
|
# or index of the last token of the here target |
9620
|
|
|
|
|
|
|
# $saw_error - flag noting unbalanced quote on here target |
9621
|
9
|
|
|
9
|
0
|
42
|
my ( $self, $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; |
9622
|
|
|
|
|
|
|
|
9623
|
9
|
|
|
|
|
24
|
my $ibeg = $i; |
9624
|
9
|
|
|
|
|
26
|
my $found_target = 0; |
9625
|
9
|
|
|
|
|
26
|
my $here_doc_target = EMPTY_STRING; |
9626
|
9
|
|
|
|
|
25
|
my $here_quote_character = EMPTY_STRING; |
9627
|
9
|
|
|
|
|
21
|
my $saw_error = 0; |
9628
|
9
|
|
|
|
|
22
|
my ( $next_nonblank_token, $i_next_nonblank, $next_token ); |
9629
|
9
|
|
|
|
|
29
|
$next_token = $rtokens->[ $i + 1 ]; |
9630
|
|
|
|
|
|
|
|
9631
|
|
|
|
|
|
|
# perl allows a backslash before the target string (heredoc.t) |
9632
|
9
|
|
|
|
|
20
|
my $backslash = 0; |
9633
|
9
|
50
|
|
|
|
42
|
if ( $next_token eq '\\' ) { |
9634
|
0
|
|
|
|
|
0
|
$backslash = 1; |
9635
|
0
|
|
|
|
|
0
|
$next_token = $rtokens->[ $i + 2 ]; |
9636
|
|
|
|
|
|
|
} |
9637
|
|
|
|
|
|
|
|
9638
|
9
|
|
|
|
|
54
|
( $next_nonblank_token, $i_next_nonblank ) = |
9639
|
|
|
|
|
|
|
find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); |
9640
|
|
|
|
|
|
|
|
9641
|
9
|
100
|
33
|
|
|
78
|
if ( $next_nonblank_token =~ /[\'\"\`]/ ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9642
|
|
|
|
|
|
|
|
9643
|
6
|
|
|
|
|
18
|
my $in_quote = 1; |
9644
|
6
|
|
|
|
|
12
|
my $quote_depth = 0; |
9645
|
6
|
|
|
|
|
16
|
my $quote_pos = 0; |
9646
|
6
|
|
|
|
|
10
|
my $quoted_string; |
9647
|
|
|
|
|
|
|
|
9648
|
|
|
|
|
|
|
( |
9649
|
6
|
|
|
|
|
36
|
$i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, |
9650
|
|
|
|
|
|
|
$quoted_string |
9651
|
|
|
|
|
|
|
) |
9652
|
|
|
|
|
|
|
= $self->follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, |
9653
|
|
|
|
|
|
|
$here_quote_character, $quote_pos, $quote_depth, $max_token_index ); |
9654
|
|
|
|
|
|
|
|
9655
|
6
|
50
|
|
|
|
27
|
if ($in_quote) { # didn't find end of quote, so no target found |
9656
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
9657
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
9658
|
0
|
|
|
|
|
0
|
$self->warning( |
9659
|
|
|
|
|
|
|
"Did not find here-doc string terminator ($here_quote_character) before end of line \n" |
9660
|
|
|
|
|
|
|
); |
9661
|
0
|
|
|
|
|
0
|
$saw_error = 1; |
9662
|
|
|
|
|
|
|
} |
9663
|
|
|
|
|
|
|
} |
9664
|
|
|
|
|
|
|
else { # found ending quote |
9665
|
6
|
|
|
|
|
14
|
$found_target = 1; |
9666
|
|
|
|
|
|
|
|
9667
|
6
|
|
|
|
|
14
|
my $tokj; |
9668
|
6
|
|
|
|
|
28
|
foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) { |
9669
|
6
|
|
|
|
|
20
|
$tokj = $rtokens->[$j]; |
9670
|
|
|
|
|
|
|
|
9671
|
|
|
|
|
|
|
# we have to remove any backslash before the quote character |
9672
|
|
|
|
|
|
|
# so that the here-doc-target exactly matches this string |
9673
|
|
|
|
|
|
|
next |
9674
|
6
|
0
|
33
|
|
|
35
|
if ( $tokj eq "\\" |
|
|
|
33
|
|
|
|
|
9675
|
|
|
|
|
|
|
&& $j < $i - 1 |
9676
|
|
|
|
|
|
|
&& $rtokens->[ $j + 1 ] eq $here_quote_character ); |
9677
|
6
|
|
|
|
|
27
|
$here_doc_target .= $tokj; |
9678
|
|
|
|
|
|
|
} |
9679
|
|
|
|
|
|
|
} |
9680
|
|
|
|
|
|
|
} |
9681
|
|
|
|
|
|
|
|
9682
|
|
|
|
|
|
|
elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { |
9683
|
0
|
|
|
|
|
0
|
$found_target = 1; |
9684
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry( |
9685
|
|
|
|
|
|
|
"found blank here-target after <<; suggest using \"\"\n"); |
9686
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
9687
|
|
|
|
|
|
|
} |
9688
|
|
|
|
|
|
|
elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << |
9689
|
|
|
|
|
|
|
|
9690
|
3
|
|
|
|
|
7
|
my $here_doc_expected; |
9691
|
3
|
50
|
|
|
|
16
|
if ( $expecting == UNKNOWN ) { |
9692
|
0
|
|
|
|
|
0
|
$here_doc_expected = $self->guess_if_here_doc($next_token); |
9693
|
|
|
|
|
|
|
} |
9694
|
|
|
|
|
|
|
else { |
9695
|
3
|
|
|
|
|
8
|
$here_doc_expected = 1; |
9696
|
|
|
|
|
|
|
} |
9697
|
|
|
|
|
|
|
|
9698
|
3
|
50
|
|
|
|
14
|
if ($here_doc_expected) { |
9699
|
3
|
|
|
|
|
8
|
$found_target = 1; |
9700
|
3
|
|
|
|
|
7
|
$here_doc_target = $next_token; |
9701
|
3
|
|
|
|
|
7
|
$i = $ibeg + 1; |
9702
|
|
|
|
|
|
|
} |
9703
|
|
|
|
|
|
|
|
9704
|
|
|
|
|
|
|
} |
9705
|
|
|
|
|
|
|
else { |
9706
|
|
|
|
|
|
|
|
9707
|
0
|
0
|
|
|
|
0
|
if ( $expecting == TERM ) { |
9708
|
0
|
|
|
|
|
0
|
$found_target = 1; |
9709
|
0
|
|
|
|
|
0
|
$self->write_logfile_entry("Note: bare here-doc operator <<\n"); |
9710
|
|
|
|
|
|
|
} |
9711
|
|
|
|
|
|
|
else { |
9712
|
0
|
|
|
|
|
0
|
$i = $ibeg; |
9713
|
|
|
|
|
|
|
} |
9714
|
|
|
|
|
|
|
} |
9715
|
|
|
|
|
|
|
|
9716
|
|
|
|
|
|
|
# patch to neglect any prepended backslash |
9717
|
9
|
50
|
33
|
|
|
70
|
if ( $found_target && $backslash ) { $i++ } |
|
0
|
|
|
|
|
0
|
|
9718
|
|
|
|
|
|
|
|
9719
|
9
|
|
|
|
|
49
|
return ( $found_target, $here_doc_target, $here_quote_character, $i, |
9720
|
|
|
|
|
|
|
$saw_error ); |
9721
|
|
|
|
|
|
|
} ## end sub find_here_doc |
9722
|
|
|
|
|
|
|
|
9723
|
|
|
|
|
|
|
sub do_quote { |
9724
|
|
|
|
|
|
|
|
9725
|
|
|
|
|
|
|
# follow (or continue following) quoted string(s) |
9726
|
|
|
|
|
|
|
# $in_quote return code: |
9727
|
|
|
|
|
|
|
# 0 - ok, found end |
9728
|
|
|
|
|
|
|
# 1 - still must find end of quote whose target is $quote_character |
9729
|
|
|
|
|
|
|
# 2 - still looking for end of first of two quotes |
9730
|
|
|
|
|
|
|
# |
9731
|
|
|
|
|
|
|
# Returns updated strings: |
9732
|
|
|
|
|
|
|
# $quoted_string_1 = quoted string seen while in_quote=1 |
9733
|
|
|
|
|
|
|
# $quoted_string_2 = quoted string seen while in_quote=2 |
9734
|
|
|
|
|
|
|
my ( |
9735
|
|
|
|
|
|
|
|
9736
|
2768
|
|
|
2768
|
0
|
8691
|
$self, |
9737
|
|
|
|
|
|
|
$i, |
9738
|
|
|
|
|
|
|
$in_quote, |
9739
|
|
|
|
|
|
|
$quote_character, |
9740
|
|
|
|
|
|
|
$quote_pos, |
9741
|
|
|
|
|
|
|
$quote_depth, |
9742
|
|
|
|
|
|
|
$quoted_string_1, |
9743
|
|
|
|
|
|
|
$quoted_string_2, |
9744
|
|
|
|
|
|
|
$rtokens, |
9745
|
|
|
|
|
|
|
$rtoken_map, |
9746
|
|
|
|
|
|
|
$max_token_index, |
9747
|
|
|
|
|
|
|
|
9748
|
|
|
|
|
|
|
) = @_; |
9749
|
|
|
|
|
|
|
|
9750
|
2768
|
|
|
|
|
4242
|
my $quoted_string; |
9751
|
2768
|
100
|
|
|
|
6470
|
if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow |
9752
|
29
|
|
|
|
|
70
|
my $ibeg = $i; |
9753
|
|
|
|
|
|
|
( |
9754
|
29
|
|
|
|
|
138
|
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, |
9755
|
|
|
|
|
|
|
$quoted_string |
9756
|
|
|
|
|
|
|
) |
9757
|
|
|
|
|
|
|
= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens, |
9758
|
|
|
|
|
|
|
$quote_character, $quote_pos, $quote_depth, $max_token_index ); |
9759
|
29
|
|
|
|
|
72
|
$quoted_string_2 .= $quoted_string; |
9760
|
29
|
50
|
|
|
|
89
|
if ( $in_quote == 1 ) { |
9761
|
29
|
50
|
|
|
|
124
|
if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } |
|
0
|
|
|
|
|
0
|
|
9762
|
29
|
|
|
|
|
61
|
$quote_character = EMPTY_STRING; |
9763
|
|
|
|
|
|
|
} |
9764
|
|
|
|
|
|
|
else { |
9765
|
0
|
|
|
|
|
0
|
$quoted_string_2 .= "\n"; |
9766
|
|
|
|
|
|
|
} |
9767
|
|
|
|
|
|
|
} |
9768
|
|
|
|
|
|
|
|
9769
|
2768
|
50
|
|
|
|
6124
|
if ( $in_quote == 1 ) { # one (more) quote to follow |
9770
|
2768
|
|
|
|
|
4273
|
my $ibeg = $i; |
9771
|
|
|
|
|
|
|
( |
9772
|
2768
|
|
|
|
|
7826
|
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, |
9773
|
|
|
|
|
|
|
$quoted_string |
9774
|
|
|
|
|
|
|
) |
9775
|
|
|
|
|
|
|
= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens, |
9776
|
|
|
|
|
|
|
$quote_character, $quote_pos, $quote_depth, $max_token_index ); |
9777
|
2768
|
|
|
|
|
5949
|
$quoted_string_1 .= $quoted_string; |
9778
|
2768
|
100
|
|
|
|
6305
|
if ( $in_quote == 1 ) { |
9779
|
183
|
|
|
|
|
377
|
$quoted_string_1 .= "\n"; |
9780
|
|
|
|
|
|
|
} |
9781
|
|
|
|
|
|
|
} |
9782
|
|
|
|
|
|
|
return ( |
9783
|
|
|
|
|
|
|
|
9784
|
2768
|
|
|
|
|
9533
|
$i, |
9785
|
|
|
|
|
|
|
$in_quote, |
9786
|
|
|
|
|
|
|
$quote_character, |
9787
|
|
|
|
|
|
|
$quote_pos, |
9788
|
|
|
|
|
|
|
$quote_depth, |
9789
|
|
|
|
|
|
|
$quoted_string_1, |
9790
|
|
|
|
|
|
|
$quoted_string_2, |
9791
|
|
|
|
|
|
|
|
9792
|
|
|
|
|
|
|
); |
9793
|
|
|
|
|
|
|
} ## end sub do_quote |
9794
|
|
|
|
|
|
|
|
9795
|
|
|
|
|
|
|
sub follow_quoted_string { |
9796
|
|
|
|
|
|
|
|
9797
|
|
|
|
|
|
|
# scan for a specific token, skipping escaped characters |
9798
|
|
|
|
|
|
|
# if the quote character is blank, use the first non-blank character |
9799
|
|
|
|
|
|
|
# input parameters: |
9800
|
|
|
|
|
|
|
# $rtokens = reference to the array of tokens |
9801
|
|
|
|
|
|
|
# $i = the token index of the first character to search |
9802
|
|
|
|
|
|
|
# $in_quote = number of quoted strings being followed |
9803
|
|
|
|
|
|
|
# $beginning_tok = the starting quote character |
9804
|
|
|
|
|
|
|
# $quote_pos = index to check next for alphanumeric delimiter |
9805
|
|
|
|
|
|
|
# output parameters: |
9806
|
|
|
|
|
|
|
# $i = the token index of the ending quote character |
9807
|
|
|
|
|
|
|
# $in_quote = decremented if found end, unchanged if not |
9808
|
|
|
|
|
|
|
# $beginning_tok = the starting quote character |
9809
|
|
|
|
|
|
|
# $quote_pos = index to check next for alphanumeric delimiter |
9810
|
|
|
|
|
|
|
# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. |
9811
|
|
|
|
|
|
|
# $quoted_string = the text of the quote (without quotation tokens) |
9812
|
|
|
|
|
|
|
my ( |
9813
|
|
|
|
|
|
|
|
9814
|
2814
|
|
|
2814
|
0
|
6525
|
$self, |
9815
|
|
|
|
|
|
|
$i_beg, |
9816
|
|
|
|
|
|
|
$in_quote, |
9817
|
|
|
|
|
|
|
$rtokens, |
9818
|
|
|
|
|
|
|
$beginning_tok, |
9819
|
|
|
|
|
|
|
$quote_pos, |
9820
|
|
|
|
|
|
|
$quote_depth, |
9821
|
|
|
|
|
|
|
$max_token_index, |
9822
|
|
|
|
|
|
|
|
9823
|
|
|
|
|
|
|
) = @_; |
9824
|
|
|
|
|
|
|
|
9825
|
2814
|
|
|
|
|
4945
|
my ( $tok, $end_tok ); |
9826
|
2814
|
|
|
|
|
4730
|
my $i = $i_beg - 1; |
9827
|
2814
|
|
|
|
|
4423
|
my $quoted_string = EMPTY_STRING; |
9828
|
|
|
|
|
|
|
|
9829
|
2814
|
|
|
|
|
4031
|
0 && do { |
9830
|
|
|
|
|
|
|
print {*STDOUT} |
9831
|
|
|
|
|
|
|
"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; |
9832
|
|
|
|
|
|
|
}; |
9833
|
|
|
|
|
|
|
|
9834
|
|
|
|
|
|
|
# get the corresponding end token |
9835
|
2814
|
100
|
|
|
|
13169
|
if ( $beginning_tok !~ /^\s*$/ ) { |
9836
|
183
|
|
|
|
|
541
|
$end_tok = matching_end_token($beginning_tok); |
9837
|
|
|
|
|
|
|
} |
9838
|
|
|
|
|
|
|
|
9839
|
|
|
|
|
|
|
# a blank token means we must find and use the first non-blank one |
9840
|
|
|
|
|
|
|
else { |
9841
|
2631
|
100
|
|
|
|
6187
|
my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> |
9842
|
|
|
|
|
|
|
|
9843
|
2631
|
|
|
|
|
6049
|
while ( $i < $max_token_index ) { |
9844
|
2631
|
|
|
|
|
5315
|
$tok = $rtokens->[ ++$i ]; |
9845
|
|
|
|
|
|
|
|
9846
|
2631
|
50
|
|
|
|
7998
|
if ( $tok !~ /^\s*$/ ) { |
9847
|
|
|
|
|
|
|
|
9848
|
2631
|
50
|
66
|
|
|
7666
|
if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { |
9849
|
0
|
|
|
|
|
0
|
$i = $max_token_index; |
9850
|
|
|
|
|
|
|
} |
9851
|
|
|
|
|
|
|
else { |
9852
|
|
|
|
|
|
|
|
9853
|
2631
|
100
|
|
|
|
5747
|
if ( length($tok) > 1 ) { |
9854
|
1
|
50
|
|
|
|
5
|
if ( $quote_pos <= 0 ) { $quote_pos = 1 } |
|
1
|
|
|
|
|
4
|
|
9855
|
1
|
|
|
|
|
4
|
$beginning_tok = substr( $tok, $quote_pos - 1, 1 ); |
9856
|
|
|
|
|
|
|
} |
9857
|
|
|
|
|
|
|
else { |
9858
|
2630
|
|
|
|
|
4654
|
$beginning_tok = $tok; |
9859
|
2630
|
|
|
|
|
4125
|
$quote_pos = 0; |
9860
|
|
|
|
|
|
|
} |
9861
|
2631
|
|
|
|
|
6377
|
$end_tok = matching_end_token($beginning_tok); |
9862
|
2631
|
|
|
|
|
4615
|
$quote_depth = 1; |
9863
|
2631
|
|
|
|
|
4750
|
last; |
9864
|
|
|
|
|
|
|
} |
9865
|
|
|
|
|
|
|
} |
9866
|
|
|
|
|
|
|
else { |
9867
|
0
|
|
|
|
|
0
|
$allow_quote_comments = 1; |
9868
|
|
|
|
|
|
|
} |
9869
|
|
|
|
|
|
|
} |
9870
|
|
|
|
|
|
|
} |
9871
|
|
|
|
|
|
|
|
9872
|
|
|
|
|
|
|
# There are two different loops which search for the ending quote |
9873
|
|
|
|
|
|
|
# character. In the rare case of an alphanumeric quote delimiter, we |
9874
|
|
|
|
|
|
|
# have to look through alphanumeric tokens character-by-character, since |
9875
|
|
|
|
|
|
|
# the pre-tokenization process combines multiple alphanumeric |
9876
|
|
|
|
|
|
|
# characters, whereas for a non-alphanumeric delimiter, only tokens of |
9877
|
|
|
|
|
|
|
# length 1 can match. |
9878
|
|
|
|
|
|
|
|
9879
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
9880
|
|
|
|
|
|
|
# Case 1 (rare): loop for case of alphanumeric quote delimiter.. |
9881
|
|
|
|
|
|
|
# "quote_pos" is the position the current word to begin searching |
9882
|
|
|
|
|
|
|
#---------------------------------------------------------------- |
9883
|
2814
|
100
|
|
|
|
7797
|
if ( $beginning_tok =~ /\w/ ) { |
9884
|
|
|
|
|
|
|
|
9885
|
|
|
|
|
|
|
# Note this because it is not recommended practice except |
9886
|
|
|
|
|
|
|
# for obfuscated perl contests |
9887
|
1
|
50
|
|
|
|
4
|
if ( $in_quote == 1 ) { |
9888
|
1
|
|
|
|
|
9
|
$self->write_logfile_entry( |
9889
|
|
|
|
|
|
|
"Note: alphanumeric quote delimiter ($beginning_tok) \n"); |
9890
|
|
|
|
|
|
|
} |
9891
|
|
|
|
|
|
|
|
9892
|
|
|
|
|
|
|
# Note: changed < to <= here to fix c109. Relying on extra end blanks. |
9893
|
1
|
|
|
|
|
5
|
while ( $i <= $max_token_index ) { |
9894
|
|
|
|
|
|
|
|
9895
|
4
|
100
|
66
|
|
|
17
|
if ( $quote_pos == 0 || ( $i < 0 ) ) { |
9896
|
3
|
|
|
|
|
7
|
$tok = $rtokens->[ ++$i ]; |
9897
|
|
|
|
|
|
|
|
9898
|
3
|
100
|
|
|
|
8
|
if ( $tok eq '\\' ) { |
9899
|
|
|
|
|
|
|
|
9900
|
|
|
|
|
|
|
# retain backslash unless it hides the end token |
9901
|
1
|
50
|
|
|
|
54
|
$quoted_string .= $tok |
9902
|
|
|
|
|
|
|
unless $rtokens->[ $i + 1 ] eq $end_tok; |
9903
|
1
|
|
|
|
|
8
|
$quote_pos++; |
9904
|
1
|
50
|
|
|
|
6
|
last if ( $i >= $max_token_index ); |
9905
|
1
|
|
|
|
|
5
|
$tok = $rtokens->[ ++$i ]; |
9906
|
|
|
|
|
|
|
} |
9907
|
|
|
|
|
|
|
} |
9908
|
4
|
|
|
|
|
8
|
my $old_pos = $quote_pos; |
9909
|
|
|
|
|
|
|
|
9910
|
4
|
|
|
|
|
8
|
$quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); |
9911
|
|
|
|
|
|
|
|
9912
|
4
|
100
|
|
|
|
11
|
if ( $quote_pos > 0 ) { |
9913
|
|
|
|
|
|
|
|
9914
|
1
|
|
|
|
|
16
|
$quoted_string .= |
9915
|
|
|
|
|
|
|
substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); |
9916
|
|
|
|
|
|
|
|
9917
|
|
|
|
|
|
|
# NOTE: any quote modifiers will be at the end of '$tok'. If we |
9918
|
|
|
|
|
|
|
# wanted to check them, this is the place to get them. But |
9919
|
|
|
|
|
|
|
# this quote form is rarely used in practice, so it isn't |
9920
|
|
|
|
|
|
|
# worthwhile. |
9921
|
|
|
|
|
|
|
|
9922
|
1
|
|
|
|
|
3
|
$quote_depth--; |
9923
|
|
|
|
|
|
|
|
9924
|
1
|
50
|
|
|
|
4
|
if ( $quote_depth == 0 ) { |
9925
|
1
|
|
|
|
|
3
|
$in_quote--; |
9926
|
1
|
|
|
|
|
3
|
last; |
9927
|
|
|
|
|
|
|
} |
9928
|
|
|
|
|
|
|
} |
9929
|
|
|
|
|
|
|
else { |
9930
|
3
|
50
|
|
|
|
7
|
if ( $old_pos <= length($tok) ) { |
9931
|
3
|
|
|
|
|
9
|
$quoted_string .= substr( $tok, $old_pos ); |
9932
|
|
|
|
|
|
|
} |
9933
|
|
|
|
|
|
|
} |
9934
|
|
|
|
|
|
|
} |
9935
|
|
|
|
|
|
|
} |
9936
|
|
|
|
|
|
|
|
9937
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
9938
|
|
|
|
|
|
|
# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. |
9939
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
9940
|
|
|
|
|
|
|
else { |
9941
|
|
|
|
|
|
|
|
9942
|
2813
|
|
|
|
|
6490
|
while ( $i < $max_token_index ) { |
9943
|
10798
|
|
|
|
|
16323
|
$tok = $rtokens->[ ++$i ]; |
9944
|
|
|
|
|
|
|
|
9945
|
10798
|
100
|
|
|
|
24279
|
if ( $tok eq $end_tok ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
9946
|
2620
|
|
|
|
|
4208
|
$quote_depth--; |
9947
|
|
|
|
|
|
|
|
9948
|
2620
|
100
|
|
|
|
5936
|
if ( $quote_depth == 0 ) { |
9949
|
2619
|
|
|
|
|
3860
|
$in_quote--; |
9950
|
2619
|
|
|
|
|
4131
|
last; |
9951
|
|
|
|
|
|
|
} |
9952
|
|
|
|
|
|
|
} |
9953
|
|
|
|
|
|
|
elsif ( $tok eq $beginning_tok ) { |
9954
|
1
|
|
|
|
|
3
|
$quote_depth++; |
9955
|
|
|
|
|
|
|
} |
9956
|
|
|
|
|
|
|
elsif ( $tok eq '\\' ) { |
9957
|
|
|
|
|
|
|
|
9958
|
|
|
|
|
|
|
# retain backslash unless it hides the beginning or end token |
9959
|
376
|
|
|
|
|
939
|
$tok = $rtokens->[ ++$i ]; |
9960
|
376
|
100
|
100
|
|
|
1960
|
$quoted_string .= '\\' |
9961
|
|
|
|
|
|
|
if ( $tok ne $end_tok && $tok ne $beginning_tok ); |
9962
|
|
|
|
|
|
|
} |
9963
|
|
|
|
|
|
|
else { |
9964
|
|
|
|
|
|
|
## nothing special |
9965
|
|
|
|
|
|
|
} |
9966
|
8179
|
|
|
|
|
15255
|
$quoted_string .= $tok; |
9967
|
|
|
|
|
|
|
} |
9968
|
|
|
|
|
|
|
} |
9969
|
2814
|
50
|
|
|
|
6171
|
if ( $i > $max_token_index ) { $i = $max_token_index } |
|
0
|
|
|
|
|
0
|
|
9970
|
|
|
|
|
|
|
return ( |
9971
|
|
|
|
|
|
|
|
9972
|
2814
|
|
|
|
|
10673
|
$i, |
9973
|
|
|
|
|
|
|
$in_quote, |
9974
|
|
|
|
|
|
|
$beginning_tok, |
9975
|
|
|
|
|
|
|
$quote_pos, |
9976
|
|
|
|
|
|
|
$quote_depth, |
9977
|
|
|
|
|
|
|
$quoted_string, |
9978
|
|
|
|
|
|
|
|
9979
|
|
|
|
|
|
|
); |
9980
|
|
|
|
|
|
|
} ## end sub follow_quoted_string |
9981
|
|
|
|
|
|
|
|
9982
|
|
|
|
|
|
|
sub indicate_error { |
9983
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $msg, $line_number, $input_line, $pos, $carrat ) = @_; |
9984
|
0
|
|
|
|
|
0
|
$self->interrupt_logfile(); |
9985
|
0
|
|
|
|
|
0
|
$self->warning($msg); |
9986
|
0
|
|
|
|
|
0
|
$self->write_error_indicator_pair( $line_number, $input_line, $pos, |
9987
|
|
|
|
|
|
|
$carrat ); |
9988
|
0
|
|
|
|
|
0
|
$self->resume_logfile(); |
9989
|
0
|
|
|
|
|
0
|
return; |
9990
|
|
|
|
|
|
|
} ## end sub indicate_error |
9991
|
|
|
|
|
|
|
|
9992
|
|
|
|
|
|
|
sub write_error_indicator_pair { |
9993
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $line_number, $input_line, $pos, $carrat ) = @_; |
9994
|
0
|
|
|
|
|
0
|
my ( $offset, $numbered_line, $underline ) = |
9995
|
|
|
|
|
|
|
make_numbered_line( $line_number, $input_line, $pos ); |
9996
|
0
|
|
|
|
|
0
|
$underline = write_on_underline( $underline, $pos - $offset, $carrat ); |
9997
|
0
|
|
|
|
|
0
|
$self->warning( $numbered_line . "\n" ); |
9998
|
0
|
|
|
|
|
0
|
$underline =~ s/\s*$//; |
9999
|
0
|
|
|
|
|
0
|
$self->warning( $underline . "\n" ); |
10000
|
0
|
|
|
|
|
0
|
return; |
10001
|
|
|
|
|
|
|
} ## end sub write_error_indicator_pair |
10002
|
|
|
|
|
|
|
|
10003
|
|
|
|
|
|
|
sub make_numbered_line { |
10004
|
|
|
|
|
|
|
|
10005
|
|
|
|
|
|
|
# Given an input line, its line number, and a character position of |
10006
|
|
|
|
|
|
|
# interest, create a string not longer than 80 characters of the form |
10007
|
|
|
|
|
|
|
# $lineno: sub_string |
10008
|
|
|
|
|
|
|
# such that the sub_string of $str contains the position of interest |
10009
|
|
|
|
|
|
|
# |
10010
|
|
|
|
|
|
|
# Here is an example of what we want, in this case we add trailing |
10011
|
|
|
|
|
|
|
# '...' because the line is long. |
10012
|
|
|
|
|
|
|
# |
10013
|
|
|
|
|
|
|
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... |
10014
|
|
|
|
|
|
|
# |
10015
|
|
|
|
|
|
|
# Here is another example, this time in which we used leading '...' |
10016
|
|
|
|
|
|
|
# because of excessive length: |
10017
|
|
|
|
|
|
|
# |
10018
|
|
|
|
|
|
|
# 2: ... er of the World Wide Web Consortium's |
10019
|
|
|
|
|
|
|
# |
10020
|
|
|
|
|
|
|
# input parameters are: |
10021
|
|
|
|
|
|
|
# $lineno = line number |
10022
|
|
|
|
|
|
|
# $str = the text of the line |
10023
|
|
|
|
|
|
|
# $pos = position of interest (the error) : 0 = first character |
10024
|
|
|
|
|
|
|
# |
10025
|
|
|
|
|
|
|
# We return : |
10026
|
|
|
|
|
|
|
# - $offset = an offset which corrects the position in case we only |
10027
|
|
|
|
|
|
|
# display part of a line, such that $pos-$offset is the effective |
10028
|
|
|
|
|
|
|
# position from the start of the displayed line. |
10029
|
|
|
|
|
|
|
# - $numbered_line = the numbered line as above, |
10030
|
|
|
|
|
|
|
# - $underline = a blank 'underline' which is all spaces with the same |
10031
|
|
|
|
|
|
|
# number of characters as the numbered line. |
10032
|
|
|
|
|
|
|
|
10033
|
0
|
|
|
0
|
0
|
0
|
my ( $lineno, $str, $pos ) = @_; |
10034
|
0
|
0
|
|
|
|
0
|
my $offset = ( $pos < 60 ) ? 0 : $pos - 40; |
10035
|
0
|
|
|
|
|
0
|
my $excess = length($str) - $offset - 68; |
10036
|
0
|
0
|
|
|
|
0
|
my $numc = ( $excess > 0 ) ? 68 : undef; |
10037
|
|
|
|
|
|
|
|
10038
|
0
|
0
|
|
|
|
0
|
if ( defined($numc) ) { |
10039
|
0
|
0
|
|
|
|
0
|
if ( $offset == 0 ) { |
10040
|
0
|
|
|
|
|
0
|
$str = substr( $str, $offset, $numc - 4 ) . " ..."; |
10041
|
|
|
|
|
|
|
} |
10042
|
|
|
|
|
|
|
else { |
10043
|
0
|
|
|
|
|
0
|
$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; |
10044
|
|
|
|
|
|
|
} |
10045
|
|
|
|
|
|
|
} |
10046
|
|
|
|
|
|
|
else { |
10047
|
|
|
|
|
|
|
|
10048
|
0
|
0
|
|
|
|
0
|
if ( $offset == 0 ) { |
10049
|
|
|
|
|
|
|
} |
10050
|
|
|
|
|
|
|
else { |
10051
|
0
|
|
|
|
|
0
|
$str = "... " . substr( $str, $offset + 4 ); |
10052
|
|
|
|
|
|
|
} |
10053
|
|
|
|
|
|
|
} |
10054
|
|
|
|
|
|
|
|
10055
|
0
|
|
|
|
|
0
|
my $numbered_line = sprintf( "%d: ", $lineno ); |
10056
|
0
|
|
|
|
|
0
|
$offset -= length($numbered_line); |
10057
|
0
|
|
|
|
|
0
|
$numbered_line .= $str; |
10058
|
0
|
|
|
|
|
0
|
my $underline = SPACE x length($numbered_line); |
10059
|
0
|
|
|
|
|
0
|
return ( $offset, $numbered_line, $underline ); |
10060
|
|
|
|
|
|
|
} ## end sub make_numbered_line |
10061
|
|
|
|
|
|
|
|
10062
|
|
|
|
|
|
|
sub write_on_underline { |
10063
|
|
|
|
|
|
|
|
10064
|
|
|
|
|
|
|
# The "underline" is a string that shows where an error is; it starts |
10065
|
|
|
|
|
|
|
# out as a string of blanks with the same length as the numbered line of |
10066
|
|
|
|
|
|
|
# code above it, and we have to add marking to show where an error is. |
10067
|
|
|
|
|
|
|
# In the example below, we want to write the string '--^' just below |
10068
|
|
|
|
|
|
|
# the line of bad code: |
10069
|
|
|
|
|
|
|
# |
10070
|
|
|
|
|
|
|
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... |
10071
|
|
|
|
|
|
|
# ---^ |
10072
|
|
|
|
|
|
|
# We are given the current underline string, plus a position and a |
10073
|
|
|
|
|
|
|
# string to write on it. |
10074
|
|
|
|
|
|
|
# |
10075
|
|
|
|
|
|
|
# In the above example, there will be 2 calls to do this: |
10076
|
|
|
|
|
|
|
# First call: $pos=19, pos_chr=^ |
10077
|
|
|
|
|
|
|
# Second call: $pos=16, pos_chr=--- |
10078
|
|
|
|
|
|
|
# |
10079
|
|
|
|
|
|
|
# This is a trivial thing to do with substr, but there is some |
10080
|
|
|
|
|
|
|
# checking to do. |
10081
|
|
|
|
|
|
|
|
10082
|
0
|
|
|
0
|
0
|
0
|
my ( $underline, $pos, $pos_chr ) = @_; |
10083
|
|
|
|
|
|
|
|
10084
|
|
|
|
|
|
|
# check for error..shouldn't happen |
10085
|
0
|
0
|
0
|
|
|
0
|
if ( $pos < 0 || $pos > length($underline) ) { |
10086
|
0
|
|
|
|
|
0
|
return $underline; |
10087
|
|
|
|
|
|
|
} |
10088
|
0
|
|
|
|
|
0
|
my $excess = length($pos_chr) + $pos - length($underline); |
10089
|
0
|
0
|
|
|
|
0
|
if ( $excess > 0 ) { |
10090
|
0
|
|
|
|
|
0
|
$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); |
10091
|
|
|
|
|
|
|
} |
10092
|
0
|
|
|
|
|
0
|
substr( $underline, $pos, length($pos_chr) ) = $pos_chr; |
10093
|
0
|
|
|
|
|
0
|
return ($underline); |
10094
|
|
|
|
|
|
|
} ## end sub write_on_underline |
10095
|
|
|
|
|
|
|
|
10096
|
|
|
|
|
|
|
sub pre_tokenize { |
10097
|
|
|
|
|
|
|
|
10098
|
6187
|
|
|
6187
|
0
|
12313
|
my ( $str, $max_tokens_wanted ) = @_; |
10099
|
|
|
|
|
|
|
|
10100
|
|
|
|
|
|
|
# Input parameters: |
10101
|
|
|
|
|
|
|
# $str = string to be parsed |
10102
|
|
|
|
|
|
|
# $max_tokens_wanted > 0 to stop on reaching this many tokens. |
10103
|
|
|
|
|
|
|
# = undef or 0 means get all tokens |
10104
|
|
|
|
|
|
|
|
10105
|
|
|
|
|
|
|
# Break a string, $str, into a sequence of preliminary tokens (pre-tokens). |
10106
|
|
|
|
|
|
|
# We look for these types of tokens: |
10107
|
|
|
|
|
|
|
# words (type='w'), example: 'max_tokens_wanted' |
10108
|
|
|
|
|
|
|
# digits (type = 'd'), example: '0755' |
10109
|
|
|
|
|
|
|
# whitespace (type = 'b'), example: ' ' |
10110
|
|
|
|
|
|
|
# single character punct (type = char) example: '=' |
10111
|
|
|
|
|
|
|
|
10112
|
|
|
|
|
|
|
# Later operations will combine one or more of these pre-tokens into final |
10113
|
|
|
|
|
|
|
# tokens. We cannot do better than this yet because we might be in a |
10114
|
|
|
|
|
|
|
# quoted string or pattern. |
10115
|
|
|
|
|
|
|
|
10116
|
|
|
|
|
|
|
# An advantage of doing this pre-tokenization step is that it keeps almost |
10117
|
|
|
|
|
|
|
# all of the regex parsing very simple and localized right here. A |
10118
|
|
|
|
|
|
|
# disadvantage is that in some extremely rare instances we will have to go |
10119
|
|
|
|
|
|
|
# back and split a pre-token. |
10120
|
|
|
|
|
|
|
|
10121
|
|
|
|
|
|
|
# Return parameters: |
10122
|
6187
|
|
|
|
|
10336
|
my @tokens = (); # array of the tokens themselves |
10123
|
6187
|
|
|
|
|
12732
|
my @token_map = (0); # string position of start of each token |
10124
|
6187
|
|
|
|
|
9681
|
my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct |
10125
|
|
|
|
|
|
|
|
10126
|
6187
|
100
|
|
|
|
13303
|
if ( !$max_tokens_wanted ) { $max_tokens_wanted = -1 } |
|
5902
|
|
|
|
|
9192
|
|
10127
|
|
|
|
|
|
|
|
10128
|
6187
|
|
|
|
|
14169
|
while ( $max_tokens_wanted-- ) { |
10129
|
|
|
|
|
|
|
|
10130
|
82185
|
100
|
|
|
|
227575
|
if ( |
10131
|
|
|
|
|
|
|
$str =~ m{ |
10132
|
|
|
|
|
|
|
\G( |
10133
|
|
|
|
|
|
|
(\s+) # type 'b' = whitespace - this must come before \W |
10134
|
|
|
|
|
|
|
| (\W) # or type=char = single-character, non-whitespace punct |
10135
|
|
|
|
|
|
|
| (\d+) # or type 'd' = sequence of digits - must come before \w |
10136
|
|
|
|
|
|
|
| (\w+) # or type 'w' = words not starting with a digit |
10137
|
|
|
|
|
|
|
) |
10138
|
|
|
|
|
|
|
}gcx |
10139
|
|
|
|
|
|
|
) |
10140
|
|
|
|
|
|
|
{ |
10141
|
76142
|
|
|
|
|
159576
|
push @tokens, $1; |
10142
|
76142
|
100
|
|
|
|
185396
|
push @type, |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
10143
|
|
|
|
|
|
|
defined($2) ? 'b' : defined($3) ? $1 : defined($4) ? 'd' : 'w'; |
10144
|
76142
|
|
|
|
|
147633
|
push @token_map, pos($str); |
10145
|
|
|
|
|
|
|
} |
10146
|
|
|
|
|
|
|
|
10147
|
|
|
|
|
|
|
# that's all.. |
10148
|
|
|
|
|
|
|
else { |
10149
|
6043
|
|
|
|
|
36598
|
return ( \@tokens, \@token_map, \@type ); |
10150
|
|
|
|
|
|
|
} |
10151
|
|
|
|
|
|
|
} |
10152
|
|
|
|
|
|
|
|
10153
|
144
|
|
|
|
|
1041
|
return ( \@tokens, \@token_map, \@type ); |
10154
|
|
|
|
|
|
|
} ## end sub pre_tokenize |
10155
|
|
|
|
|
|
|
|
10156
|
|
|
|
|
|
|
sub show_tokens { |
10157
|
|
|
|
|
|
|
|
10158
|
|
|
|
|
|
|
# this is an old debug routine |
10159
|
|
|
|
|
|
|
# not called, but saved for reference |
10160
|
0
|
|
|
0
|
0
|
0
|
my ( $rtokens, $rtoken_map ) = @_; |
10161
|
0
|
|
|
|
|
0
|
my $num = scalar( @{$rtokens} ); |
|
0
|
|
|
|
|
0
|
|
10162
|
|
|
|
|
|
|
|
10163
|
0
|
|
|
|
|
0
|
foreach my $i ( 0 .. $num - 1 ) { |
10164
|
0
|
|
|
|
|
0
|
my $len = length( $rtokens->[$i] ); |
10165
|
0
|
|
|
|
|
0
|
print {*STDOUT} "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n"; |
|
0
|
|
|
|
|
0
|
|
10166
|
|
|
|
|
|
|
} |
10167
|
0
|
|
|
|
|
0
|
return; |
10168
|
|
|
|
|
|
|
} ## end sub show_tokens |
10169
|
|
|
|
|
|
|
|
10170
|
|
|
|
|
|
|
{ ## closure for sub matching end token |
10171
|
|
|
|
|
|
|
my %matching_end_token; |
10172
|
|
|
|
|
|
|
|
10173
|
|
|
|
|
|
|
BEGIN { |
10174
|
39
|
|
|
39
|
|
62257
|
%matching_end_token = ( |
10175
|
|
|
|
|
|
|
'{' => '}', |
10176
|
|
|
|
|
|
|
'(' => ')', |
10177
|
|
|
|
|
|
|
'[' => ']', |
10178
|
|
|
|
|
|
|
'<' => '>', |
10179
|
|
|
|
|
|
|
); |
10180
|
|
|
|
|
|
|
} ## end BEGIN |
10181
|
|
|
|
|
|
|
|
10182
|
|
|
|
|
|
|
sub matching_end_token { |
10183
|
|
|
|
|
|
|
|
10184
|
|
|
|
|
|
|
# return closing character for a pattern |
10185
|
2998
|
|
|
2998
|
0
|
5029
|
my $beginning_token = shift; |
10186
|
2998
|
100
|
|
|
|
7663
|
if ( $matching_end_token{$beginning_token} ) { |
10187
|
373
|
|
|
|
|
982
|
return $matching_end_token{$beginning_token}; |
10188
|
|
|
|
|
|
|
} |
10189
|
2625
|
|
|
|
|
6068
|
return ($beginning_token); |
10190
|
|
|
|
|
|
|
} ## end sub matching_end_token |
10191
|
|
|
|
|
|
|
} |
10192
|
|
|
|
|
|
|
|
10193
|
|
|
|
|
|
|
sub dump_token_types { |
10194
|
0
|
|
|
0
|
0
|
|
my ( $class, $fh ) = @_; |
10195
|
|
|
|
|
|
|
|
10196
|
|
|
|
|
|
|
# This should be the latest list of token types in use |
10197
|
|
|
|
|
|
|
# adding NEW_TOKENS: add a comment here |
10198
|
0
|
|
|
|
|
|
$fh->print(<<'END_OF_LIST'); |
10199
|
|
|
|
|
|
|
|
10200
|
|
|
|
|
|
|
Here is a list of the token types currently used for lines of type 'CODE'. |
10201
|
|
|
|
|
|
|
For the following tokens, the "type" of a token is just the token itself. |
10202
|
|
|
|
|
|
|
|
10203
|
|
|
|
|
|
|
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> |
10204
|
|
|
|
|
|
|
( ) <= >= == =~ !~ != ++ -- /= x= |
10205
|
|
|
|
|
|
|
... **= <<= >>= &&= ||= //= <=> |
10206
|
|
|
|
|
|
|
, + - / * | % ! x ~ = \ ? : . < > ^ & |
10207
|
|
|
|
|
|
|
|
10208
|
|
|
|
|
|
|
The following additional token types are defined: |
10209
|
|
|
|
|
|
|
|
10210
|
|
|
|
|
|
|
type meaning |
10211
|
|
|
|
|
|
|
b blank (white space) |
10212
|
|
|
|
|
|
|
{ indent: opening structural curly brace or square bracket or paren |
10213
|
|
|
|
|
|
|
(code block, anonymous hash reference, or anonymous array reference) |
10214
|
|
|
|
|
|
|
} outdent: right structural curly brace or square bracket or paren |
10215
|
|
|
|
|
|
|
[ left non-structural square bracket (enclosing an array index) |
10216
|
|
|
|
|
|
|
] right non-structural square bracket |
10217
|
|
|
|
|
|
|
( left non-structural paren (all but a list right of an =) |
10218
|
|
|
|
|
|
|
) right non-structural paren |
10219
|
|
|
|
|
|
|
L left non-structural curly brace (enclosing a key) |
10220
|
|
|
|
|
|
|
R right non-structural curly brace |
10221
|
|
|
|
|
|
|
; terminal semicolon |
10222
|
|
|
|
|
|
|
f indicates a semicolon in a "for" statement |
10223
|
|
|
|
|
|
|
h here_doc operator << |
10224
|
|
|
|
|
|
|
# a comment |
10225
|
|
|
|
|
|
|
Q indicates a quote or pattern |
10226
|
|
|
|
|
|
|
q indicates a qw quote block |
10227
|
|
|
|
|
|
|
k a perl keyword |
10228
|
|
|
|
|
|
|
C user-defined constant or constant function (with void prototype = ()) |
10229
|
|
|
|
|
|
|
U user-defined function taking parameters |
10230
|
|
|
|
|
|
|
G user-defined function taking block parameter (like grep/map/eval) |
10231
|
|
|
|
|
|
|
S sub definition (reported as type 'i' in older versions) |
10232
|
|
|
|
|
|
|
P package definition (reported as type 'i' in older versions) |
10233
|
|
|
|
|
|
|
t type indicater such as %,$,@,*,&,sub |
10234
|
|
|
|
|
|
|
w bare word (perhaps a subroutine call) |
10235
|
|
|
|
|
|
|
i identifier of some type (with leading %, $, @, *, &, sub, -> ) |
10236
|
|
|
|
|
|
|
n a number |
10237
|
|
|
|
|
|
|
v a v-string |
10238
|
|
|
|
|
|
|
F a file test operator (like -e) |
10239
|
|
|
|
|
|
|
Y File handle |
10240
|
|
|
|
|
|
|
Z identifier in indirect object slot: may be file handle, object |
10241
|
|
|
|
|
|
|
J LABEL: code block label |
10242
|
|
|
|
|
|
|
j LABEL after next, last, redo, goto |
10243
|
|
|
|
|
|
|
p unary + |
10244
|
|
|
|
|
|
|
m unary - |
10245
|
|
|
|
|
|
|
pp pre-increment operator ++ |
10246
|
|
|
|
|
|
|
mm pre-decrement operator -- |
10247
|
|
|
|
|
|
|
A : used as attribute separator |
10248
|
|
|
|
|
|
|
|
10249
|
|
|
|
|
|
|
Here are the '_line_type' codes used internally: |
10250
|
|
|
|
|
|
|
SYSTEM - system-specific code before hash-bang line |
10251
|
|
|
|
|
|
|
CODE - line of perl code (including comments) |
10252
|
|
|
|
|
|
|
POD_START - line starting pod, such as '=head' |
10253
|
|
|
|
|
|
|
POD - pod documentation text |
10254
|
|
|
|
|
|
|
POD_END - last line of pod section, '=cut' |
10255
|
|
|
|
|
|
|
HERE - text of here-document |
10256
|
|
|
|
|
|
|
HERE_END - last line of here-doc (target word) |
10257
|
|
|
|
|
|
|
FORMAT - format section |
10258
|
|
|
|
|
|
|
FORMAT_END - last line of format section, '.' |
10259
|
|
|
|
|
|
|
SKIP - code skipping section |
10260
|
|
|
|
|
|
|
SKIP_END - last line of code skipping section, '#>>V' |
10261
|
|
|
|
|
|
|
DATA_START - __DATA__ line |
10262
|
|
|
|
|
|
|
DATA - unidentified text following __DATA__ |
10263
|
|
|
|
|
|
|
END_START - __END__ line |
10264
|
|
|
|
|
|
|
END - unidentified text following __END__ |
10265
|
|
|
|
|
|
|
ERROR - we are in big trouble, probably not a perl script |
10266
|
|
|
|
|
|
|
END_OF_LIST |
10267
|
|
|
|
|
|
|
|
10268
|
0
|
|
|
|
|
|
return; |
10269
|
|
|
|
|
|
|
} ## end sub dump_token_types |
10270
|
|
|
|
|
|
|
|
10271
|
|
|
|
|
|
|
BEGIN { |
10272
|
|
|
|
|
|
|
|
10273
|
|
|
|
|
|
|
# These names are used in error messages |
10274
|
39
|
|
|
39
|
|
304
|
@opening_brace_names = qw# '{' '[' '(' '?' #; |
10275
|
39
|
|
|
|
|
113
|
@closing_brace_names = qw# '}' ']' ')' ':' #; |
10276
|
|
|
|
|
|
|
|
10277
|
39
|
|
|
|
|
106
|
my @q; |
10278
|
|
|
|
|
|
|
|
10279
|
39
|
|
|
|
|
286
|
my @digraphs = qw( |
10280
|
|
|
|
|
|
|
.. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <> |
10281
|
|
|
|
|
|
|
<= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. |
10282
|
|
|
|
|
|
|
); |
10283
|
39
|
|
|
|
|
899
|
@is_digraph{@digraphs} = (1) x scalar(@digraphs); |
10284
|
|
|
|
|
|
|
|
10285
|
39
|
|
|
|
|
428
|
@q = qw( |
10286
|
|
|
|
|
|
|
. : < > * & | / - = + - % ^ ! x ~ |
10287
|
|
|
|
|
|
|
); |
10288
|
39
|
|
|
|
|
366
|
@can_start_digraph{@q} = (1) x scalar(@q); |
10289
|
|
|
|
|
|
|
|
10290
|
39
|
|
|
|
|
151
|
my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~); |
10291
|
39
|
|
|
|
|
254
|
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); |
10292
|
|
|
|
|
|
|
|
10293
|
39
|
|
|
|
|
135
|
my @tetragraphs = qw( <<>> ); |
10294
|
39
|
|
|
|
|
138
|
@is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs); |
10295
|
|
|
|
|
|
|
|
10296
|
|
|
|
|
|
|
# make a hash of all valid token types for self-checking the tokenizer |
10297
|
|
|
|
|
|
|
# (adding NEW_TOKENS : select a new character and add to this list) |
10298
|
|
|
|
|
|
|
# fix for c250: added new token type 'P' and 'S' |
10299
|
39
|
|
|
|
|
625
|
my @valid_token_types = qw# |
10300
|
|
|
|
|
|
|
A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S |
10301
|
|
|
|
|
|
|
{ } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & |
10302
|
|
|
|
|
|
|
#; |
10303
|
39
|
|
|
|
|
520
|
push( @valid_token_types, @digraphs ); |
10304
|
39
|
|
|
|
|
319
|
push( @valid_token_types, @trigraphs ); |
10305
|
39
|
|
|
|
|
105
|
push( @valid_token_types, @tetragraphs ); |
10306
|
39
|
|
|
|
|
116
|
push( @valid_token_types, ( '#', ',', 'CORE::' ) ); |
10307
|
39
|
|
|
|
|
1597
|
@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); |
10308
|
|
|
|
|
|
|
|
10309
|
|
|
|
|
|
|
# a list of file test letters, as in -e (Table 3-4 of 'camel 3') |
10310
|
39
|
|
|
|
|
285
|
my @file_test_operators = |
10311
|
|
|
|
|
|
|
qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); |
10312
|
39
|
|
|
|
|
807
|
@is_file_test_operator{@file_test_operators} = |
10313
|
|
|
|
|
|
|
(1) x scalar(@file_test_operators); |
10314
|
|
|
|
|
|
|
|
10315
|
|
|
|
|
|
|
# these functions have prototypes of the form (&), so when they are |
10316
|
|
|
|
|
|
|
# followed by a block, that block MAY BE followed by an operator. |
10317
|
|
|
|
|
|
|
# Smartmatch operator ~~ may be followed by anonymous hash or array ref |
10318
|
39
|
|
|
|
|
167
|
@q = qw( do eval ); |
10319
|
39
|
|
|
|
|
177
|
@is_block_operator{@q} = (1) x scalar(@q); |
10320
|
|
|
|
|
|
|
|
10321
|
|
|
|
|
|
|
# these functions allow an identifier in the indirect object slot |
10322
|
39
|
|
|
|
|
111
|
@q = qw( print printf sort exec system say); |
10323
|
39
|
|
|
|
|
242
|
@is_indirect_object_taker{@q} = (1) x scalar(@q); |
10324
|
|
|
|
|
|
|
|
10325
|
|
|
|
|
|
|
# Note: 'field' will be added by sub check_options if --use-feature=class |
10326
|
39
|
|
|
|
|
100
|
@q = qw(my our state); |
10327
|
39
|
|
|
|
|
190
|
@is_my_our_state{@q} = (1) x scalar(@q); |
10328
|
|
|
|
|
|
|
|
10329
|
|
|
|
|
|
|
# These tokens may precede a code block |
10330
|
|
|
|
|
|
|
# patched for SWITCH/CASE/CATCH. Actually these could be removed |
10331
|
|
|
|
|
|
|
# now and we could let the extended-syntax coding handle them. |
10332
|
|
|
|
|
|
|
# Added 'default' for Switch::Plain. |
10333
|
|
|
|
|
|
|
# Note: 'ADJUST' will be added by sub check_options if --use-feature=class |
10334
|
39
|
|
|
|
|
233
|
@q = |
10335
|
|
|
|
|
|
|
qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else |
10336
|
|
|
|
|
|
|
unless do while until eval for foreach map grep sort |
10337
|
|
|
|
|
|
|
switch case given when default catch try finally); |
10338
|
39
|
|
|
|
|
681
|
@is_code_block_token{@q} = (1) x scalar(@q); |
10339
|
|
|
|
|
|
|
|
10340
|
|
|
|
|
|
|
# Note: this hash was formerly named '%is_not_zero_continuation_block_type' |
10341
|
|
|
|
|
|
|
# to contrast it with the block types in '%is_zero_continuation_block_type' |
10342
|
39
|
|
|
|
|
181
|
@q = qw( sort map grep eval do ); |
10343
|
39
|
|
|
|
|
131
|
@is_sort_map_grep_eval_do{@q} = (1) x scalar(@q); |
10344
|
|
|
|
|
|
|
|
10345
|
39
|
|
|
|
|
124
|
@q = qw( sort map grep ); |
10346
|
39
|
|
|
|
|
132
|
@is_sort_map_grep{@q} = (1) x scalar(@q); |
10347
|
|
|
|
|
|
|
|
10348
|
39
|
|
|
|
|
90
|
%is_grep_alias = (); |
10349
|
|
|
|
|
|
|
|
10350
|
|
|
|
|
|
|
# I'll build the list of keywords incrementally |
10351
|
39
|
|
|
|
|
92
|
my @Keywords = (); |
10352
|
|
|
|
|
|
|
|
10353
|
|
|
|
|
|
|
# keywords and tokens after which a value or pattern is expected, |
10354
|
|
|
|
|
|
|
# but not an operator. In other words, these should consume terms |
10355
|
|
|
|
|
|
|
# to their right, or at least they are not expected to be followed |
10356
|
|
|
|
|
|
|
# immediately by operators. |
10357
|
39
|
|
|
|
|
1441
|
my @value_requestor = qw( |
10358
|
|
|
|
|
|
|
AUTOLOAD |
10359
|
|
|
|
|
|
|
BEGIN |
10360
|
|
|
|
|
|
|
CHECK |
10361
|
|
|
|
|
|
|
DESTROY |
10362
|
|
|
|
|
|
|
END |
10363
|
|
|
|
|
|
|
EQ |
10364
|
|
|
|
|
|
|
GE |
10365
|
|
|
|
|
|
|
GT |
10366
|
|
|
|
|
|
|
INIT |
10367
|
|
|
|
|
|
|
LE |
10368
|
|
|
|
|
|
|
LT |
10369
|
|
|
|
|
|
|
NE |
10370
|
|
|
|
|
|
|
UNITCHECK |
10371
|
|
|
|
|
|
|
abs |
10372
|
|
|
|
|
|
|
accept |
10373
|
|
|
|
|
|
|
alarm |
10374
|
|
|
|
|
|
|
and |
10375
|
|
|
|
|
|
|
atan2 |
10376
|
|
|
|
|
|
|
bind |
10377
|
|
|
|
|
|
|
binmode |
10378
|
|
|
|
|
|
|
bless |
10379
|
|
|
|
|
|
|
break |
10380
|
|
|
|
|
|
|
caller |
10381
|
|
|
|
|
|
|
chdir |
10382
|
|
|
|
|
|
|
chmod |
10383
|
|
|
|
|
|
|
chomp |
10384
|
|
|
|
|
|
|
chop |
10385
|
|
|
|
|
|
|
chown |
10386
|
|
|
|
|
|
|
chr |
10387
|
|
|
|
|
|
|
chroot |
10388
|
|
|
|
|
|
|
close |
10389
|
|
|
|
|
|
|
closedir |
10390
|
|
|
|
|
|
|
cmp |
10391
|
|
|
|
|
|
|
connect |
10392
|
|
|
|
|
|
|
continue |
10393
|
|
|
|
|
|
|
cos |
10394
|
|
|
|
|
|
|
crypt |
10395
|
|
|
|
|
|
|
dbmclose |
10396
|
|
|
|
|
|
|
dbmopen |
10397
|
|
|
|
|
|
|
defined |
10398
|
|
|
|
|
|
|
delete |
10399
|
|
|
|
|
|
|
die |
10400
|
|
|
|
|
|
|
dump |
10401
|
|
|
|
|
|
|
each |
10402
|
|
|
|
|
|
|
else |
10403
|
|
|
|
|
|
|
elsif |
10404
|
|
|
|
|
|
|
eof |
10405
|
|
|
|
|
|
|
eq |
10406
|
|
|
|
|
|
|
evalbytes |
10407
|
|
|
|
|
|
|
exec |
10408
|
|
|
|
|
|
|
exists |
10409
|
|
|
|
|
|
|
exit |
10410
|
|
|
|
|
|
|
exp |
10411
|
|
|
|
|
|
|
fc |
10412
|
|
|
|
|
|
|
fcntl |
10413
|
|
|
|
|
|
|
fileno |
10414
|
|
|
|
|
|
|
flock |
10415
|
|
|
|
|
|
|
for |
10416
|
|
|
|
|
|
|
foreach |
10417
|
|
|
|
|
|
|
formline |
10418
|
|
|
|
|
|
|
ge |
10419
|
|
|
|
|
|
|
getc |
10420
|
|
|
|
|
|
|
getgrgid |
10421
|
|
|
|
|
|
|
getgrnam |
10422
|
|
|
|
|
|
|
gethostbyaddr |
10423
|
|
|
|
|
|
|
gethostbyname |
10424
|
|
|
|
|
|
|
getnetbyaddr |
10425
|
|
|
|
|
|
|
getnetbyname |
10426
|
|
|
|
|
|
|
getpeername |
10427
|
|
|
|
|
|
|
getpgrp |
10428
|
|
|
|
|
|
|
getpriority |
10429
|
|
|
|
|
|
|
getprotobyname |
10430
|
|
|
|
|
|
|
getprotobynumber |
10431
|
|
|
|
|
|
|
getpwnam |
10432
|
|
|
|
|
|
|
getpwuid |
10433
|
|
|
|
|
|
|
getservbyname |
10434
|
|
|
|
|
|
|
getservbyport |
10435
|
|
|
|
|
|
|
getsockname |
10436
|
|
|
|
|
|
|
getsockopt |
10437
|
|
|
|
|
|
|
glob |
10438
|
|
|
|
|
|
|
gmtime |
10439
|
|
|
|
|
|
|
goto |
10440
|
|
|
|
|
|
|
grep |
10441
|
|
|
|
|
|
|
gt |
10442
|
|
|
|
|
|
|
hex |
10443
|
|
|
|
|
|
|
if |
10444
|
|
|
|
|
|
|
index |
10445
|
|
|
|
|
|
|
int |
10446
|
|
|
|
|
|
|
ioctl |
10447
|
|
|
|
|
|
|
join |
10448
|
|
|
|
|
|
|
keys |
10449
|
|
|
|
|
|
|
kill |
10450
|
|
|
|
|
|
|
last |
10451
|
|
|
|
|
|
|
lc |
10452
|
|
|
|
|
|
|
lcfirst |
10453
|
|
|
|
|
|
|
le |
10454
|
|
|
|
|
|
|
length |
10455
|
|
|
|
|
|
|
link |
10456
|
|
|
|
|
|
|
listen |
10457
|
|
|
|
|
|
|
local |
10458
|
|
|
|
|
|
|
localtime |
10459
|
|
|
|
|
|
|
lock |
10460
|
|
|
|
|
|
|
log |
10461
|
|
|
|
|
|
|
lstat |
10462
|
|
|
|
|
|
|
lt |
10463
|
|
|
|
|
|
|
map |
10464
|
|
|
|
|
|
|
mkdir |
10465
|
|
|
|
|
|
|
msgctl |
10466
|
|
|
|
|
|
|
msgget |
10467
|
|
|
|
|
|
|
msgrcv |
10468
|
|
|
|
|
|
|
msgsnd |
10469
|
|
|
|
|
|
|
my |
10470
|
|
|
|
|
|
|
ne |
10471
|
|
|
|
|
|
|
next |
10472
|
|
|
|
|
|
|
no |
10473
|
|
|
|
|
|
|
not |
10474
|
|
|
|
|
|
|
oct |
10475
|
|
|
|
|
|
|
open |
10476
|
|
|
|
|
|
|
opendir |
10477
|
|
|
|
|
|
|
or |
10478
|
|
|
|
|
|
|
ord |
10479
|
|
|
|
|
|
|
our |
10480
|
|
|
|
|
|
|
pack |
10481
|
|
|
|
|
|
|
pipe |
10482
|
|
|
|
|
|
|
pop |
10483
|
|
|
|
|
|
|
pos |
10484
|
|
|
|
|
|
|
print |
10485
|
|
|
|
|
|
|
printf |
10486
|
|
|
|
|
|
|
prototype |
10487
|
|
|
|
|
|
|
push |
10488
|
|
|
|
|
|
|
quotemeta |
10489
|
|
|
|
|
|
|
rand |
10490
|
|
|
|
|
|
|
read |
10491
|
|
|
|
|
|
|
readdir |
10492
|
|
|
|
|
|
|
readlink |
10493
|
|
|
|
|
|
|
readline |
10494
|
|
|
|
|
|
|
readpipe |
10495
|
|
|
|
|
|
|
recv |
10496
|
|
|
|
|
|
|
redo |
10497
|
|
|
|
|
|
|
ref |
10498
|
|
|
|
|
|
|
rename |
10499
|
|
|
|
|
|
|
require |
10500
|
|
|
|
|
|
|
reset |
10501
|
|
|
|
|
|
|
return |
10502
|
|
|
|
|
|
|
reverse |
10503
|
|
|
|
|
|
|
rewinddir |
10504
|
|
|
|
|
|
|
rindex |
10505
|
|
|
|
|
|
|
rmdir |
10506
|
|
|
|
|
|
|
scalar |
10507
|
|
|
|
|
|
|
seek |
10508
|
|
|
|
|
|
|
seekdir |
10509
|
|
|
|
|
|
|
select |
10510
|
|
|
|
|
|
|
semctl |
10511
|
|
|
|
|
|
|
semget |
10512
|
|
|
|
|
|
|
semop |
10513
|
|
|
|
|
|
|
send |
10514
|
|
|
|
|
|
|
sethostent |
10515
|
|
|
|
|
|
|
setnetent |
10516
|
|
|
|
|
|
|
setpgrp |
10517
|
|
|
|
|
|
|
setpriority |
10518
|
|
|
|
|
|
|
setprotoent |
10519
|
|
|
|
|
|
|
setservent |
10520
|
|
|
|
|
|
|
setsockopt |
10521
|
|
|
|
|
|
|
shift |
10522
|
|
|
|
|
|
|
shmctl |
10523
|
|
|
|
|
|
|
shmget |
10524
|
|
|
|
|
|
|
shmread |
10525
|
|
|
|
|
|
|
shmwrite |
10526
|
|
|
|
|
|
|
shutdown |
10527
|
|
|
|
|
|
|
sin |
10528
|
|
|
|
|
|
|
sleep |
10529
|
|
|
|
|
|
|
socket |
10530
|
|
|
|
|
|
|
socketpair |
10531
|
|
|
|
|
|
|
sort |
10532
|
|
|
|
|
|
|
splice |
10533
|
|
|
|
|
|
|
split |
10534
|
|
|
|
|
|
|
sprintf |
10535
|
|
|
|
|
|
|
sqrt |
10536
|
|
|
|
|
|
|
srand |
10537
|
|
|
|
|
|
|
stat |
10538
|
|
|
|
|
|
|
state |
10539
|
|
|
|
|
|
|
study |
10540
|
|
|
|
|
|
|
substr |
10541
|
|
|
|
|
|
|
symlink |
10542
|
|
|
|
|
|
|
syscall |
10543
|
|
|
|
|
|
|
sysopen |
10544
|
|
|
|
|
|
|
sysread |
10545
|
|
|
|
|
|
|
sysseek |
10546
|
|
|
|
|
|
|
system |
10547
|
|
|
|
|
|
|
syswrite |
10548
|
|
|
|
|
|
|
tell |
10549
|
|
|
|
|
|
|
telldir |
10550
|
|
|
|
|
|
|
tie |
10551
|
|
|
|
|
|
|
tied |
10552
|
|
|
|
|
|
|
truncate |
10553
|
|
|
|
|
|
|
uc |
10554
|
|
|
|
|
|
|
ucfirst |
10555
|
|
|
|
|
|
|
umask |
10556
|
|
|
|
|
|
|
undef |
10557
|
|
|
|
|
|
|
unless |
10558
|
|
|
|
|
|
|
unlink |
10559
|
|
|
|
|
|
|
unpack |
10560
|
|
|
|
|
|
|
unshift |
10561
|
|
|
|
|
|
|
untie |
10562
|
|
|
|
|
|
|
until |
10563
|
|
|
|
|
|
|
use |
10564
|
|
|
|
|
|
|
utime |
10565
|
|
|
|
|
|
|
values |
10566
|
|
|
|
|
|
|
vec |
10567
|
|
|
|
|
|
|
waitpid |
10568
|
|
|
|
|
|
|
warn |
10569
|
|
|
|
|
|
|
while |
10570
|
|
|
|
|
|
|
write |
10571
|
|
|
|
|
|
|
xor |
10572
|
|
|
|
|
|
|
|
10573
|
|
|
|
|
|
|
switch |
10574
|
|
|
|
|
|
|
case |
10575
|
|
|
|
|
|
|
default |
10576
|
|
|
|
|
|
|
given |
10577
|
|
|
|
|
|
|
when |
10578
|
|
|
|
|
|
|
err |
10579
|
|
|
|
|
|
|
say |
10580
|
|
|
|
|
|
|
isa |
10581
|
|
|
|
|
|
|
|
10582
|
|
|
|
|
|
|
catch |
10583
|
|
|
|
|
|
|
|
10584
|
|
|
|
|
|
|
); |
10585
|
|
|
|
|
|
|
|
10586
|
|
|
|
|
|
|
# Note: 'ADJUST', 'field' are added by sub check_options |
10587
|
|
|
|
|
|
|
# if --use-feature=class |
10588
|
|
|
|
|
|
|
|
10589
|
|
|
|
|
|
|
# patched above for SWITCH/CASE given/when err say |
10590
|
|
|
|
|
|
|
# 'err' is a fairly safe addition. |
10591
|
|
|
|
|
|
|
# Added 'default' for Switch::Plain. Note that we could also have |
10592
|
|
|
|
|
|
|
# a separate set of keywords to include if we see 'use Switch::Plain' |
10593
|
39
|
|
|
|
|
1984
|
push( @Keywords, @value_requestor ); |
10594
|
|
|
|
|
|
|
|
10595
|
|
|
|
|
|
|
# These are treated the same but are not keywords: |
10596
|
39
|
|
|
|
|
166
|
my @extra_vr = qw( |
10597
|
|
|
|
|
|
|
constant |
10598
|
|
|
|
|
|
|
vars |
10599
|
|
|
|
|
|
|
); |
10600
|
39
|
|
|
|
|
327
|
push( @value_requestor, @extra_vr ); |
10601
|
|
|
|
|
|
|
|
10602
|
39
|
|
|
|
|
4733
|
@expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); |
10603
|
|
|
|
|
|
|
|
10604
|
|
|
|
|
|
|
# this list contains keywords which do not look for arguments, |
10605
|
|
|
|
|
|
|
# so that they might be followed by an operator, or at least |
10606
|
|
|
|
|
|
|
# not a term. |
10607
|
39
|
|
|
|
|
250
|
my @operator_requestor = qw( |
10608
|
|
|
|
|
|
|
endgrent |
10609
|
|
|
|
|
|
|
endhostent |
10610
|
|
|
|
|
|
|
endnetent |
10611
|
|
|
|
|
|
|
endprotoent |
10612
|
|
|
|
|
|
|
endpwent |
10613
|
|
|
|
|
|
|
endservent |
10614
|
|
|
|
|
|
|
fork |
10615
|
|
|
|
|
|
|
getgrent |
10616
|
|
|
|
|
|
|
gethostent |
10617
|
|
|
|
|
|
|
getlogin |
10618
|
|
|
|
|
|
|
getnetent |
10619
|
|
|
|
|
|
|
getppid |
10620
|
|
|
|
|
|
|
getprotoent |
10621
|
|
|
|
|
|
|
getpwent |
10622
|
|
|
|
|
|
|
getservent |
10623
|
|
|
|
|
|
|
setgrent |
10624
|
|
|
|
|
|
|
setpwent |
10625
|
|
|
|
|
|
|
time |
10626
|
|
|
|
|
|
|
times |
10627
|
|
|
|
|
|
|
wait |
10628
|
|
|
|
|
|
|
wantarray |
10629
|
|
|
|
|
|
|
); |
10630
|
|
|
|
|
|
|
|
10631
|
39
|
|
|
|
|
174
|
push( @Keywords, @operator_requestor ); |
10632
|
|
|
|
|
|
|
|
10633
|
|
|
|
|
|
|
# These are treated the same but are not considered keywords: |
10634
|
39
|
|
|
|
|
88
|
my @extra_or = qw( |
10635
|
|
|
|
|
|
|
STDERR |
10636
|
|
|
|
|
|
|
STDIN |
10637
|
|
|
|
|
|
|
STDOUT |
10638
|
|
|
|
|
|
|
); |
10639
|
|
|
|
|
|
|
|
10640
|
39
|
|
|
|
|
111
|
push( @operator_requestor, @extra_or ); |
10641
|
|
|
|
|
|
|
|
10642
|
39
|
|
|
|
|
874
|
@expecting_operator_token{@operator_requestor} = |
10643
|
|
|
|
|
|
|
(1) x scalar(@operator_requestor); |
10644
|
|
|
|
|
|
|
|
10645
|
|
|
|
|
|
|
# these token TYPES expect trailing operator but not a term |
10646
|
|
|
|
|
|
|
# note: ++ and -- are post-increment and decrement, 'C' = constant |
10647
|
39
|
|
|
|
|
202
|
my @operator_requestor_types = qw( ++ -- C <> q ); |
10648
|
|
|
|
|
|
|
|
10649
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
10650
|
39
|
|
|
|
|
184
|
@expecting_operator_types{@operator_requestor_types} = |
10651
|
|
|
|
|
|
|
(1) x scalar(@operator_requestor_types); |
10652
|
|
|
|
|
|
|
|
10653
|
|
|
|
|
|
|
# these token TYPES consume values (terms) |
10654
|
|
|
|
|
|
|
# note: pp and mm are pre-increment and decrement |
10655
|
|
|
|
|
|
|
# f=semicolon in for, F=file test operator |
10656
|
39
|
|
|
|
|
713
|
my @value_requestor_type = qw# |
10657
|
|
|
|
|
|
|
L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x |
10658
|
|
|
|
|
|
|
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= |
10659
|
|
|
|
|
|
|
<= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ <<~ |
10660
|
|
|
|
|
|
|
f F pp mm Y p m U J G j >> << ^ t |
10661
|
|
|
|
|
|
|
~. ^. |. &. ^.= |.= &.= |
10662
|
|
|
|
|
|
|
#; |
10663
|
39
|
|
|
|
|
196
|
push( @value_requestor_type, ',' ) |
10664
|
|
|
|
|
|
|
; # (perl doesn't like a ',' in a qw block) |
10665
|
|
|
|
|
|
|
|
10666
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
10667
|
39
|
|
|
|
|
1555
|
@expecting_term_types{@value_requestor_type} = |
10668
|
|
|
|
|
|
|
(1) x scalar(@value_requestor_type); |
10669
|
|
|
|
|
|
|
|
10670
|
|
|
|
|
|
|
# Note: the following valid token types are not assigned here to |
10671
|
|
|
|
|
|
|
# hashes requesting to be followed by values or terms, but are |
10672
|
|
|
|
|
|
|
# instead currently hard-coded into sub operator_expected: |
10673
|
|
|
|
|
|
|
# ) -> :: Q R Z ] b h i k n v w } # |
10674
|
|
|
|
|
|
|
|
10675
|
|
|
|
|
|
|
# For simple syntax checking, it is nice to have a list of operators which |
10676
|
|
|
|
|
|
|
# will really be unhappy if not followed by a term. This includes most |
10677
|
|
|
|
|
|
|
# of the above... |
10678
|
39
|
|
|
|
|
952
|
@really_want_term{@value_requestor_type} = |
10679
|
|
|
|
|
|
|
(1) x scalar(@value_requestor_type); |
10680
|
|
|
|
|
|
|
|
10681
|
|
|
|
|
|
|
# with these exceptions... |
10682
|
39
|
|
|
|
|
154
|
delete $really_want_term{'U'}; # user sub, depends on prototype |
10683
|
39
|
|
|
|
|
95
|
delete $really_want_term{'F'}; # file test works on $_ if no following term |
10684
|
39
|
|
|
|
|
77
|
delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; |
10685
|
|
|
|
|
|
|
# let perl do it |
10686
|
39
|
|
|
|
|
138
|
@q = qw(q qq qx qr s y tr m); |
10687
|
39
|
|
|
|
|
195
|
@is_q_qq_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q); |
10688
|
|
|
|
|
|
|
|
10689
|
|
|
|
|
|
|
# Note added 'qw' here |
10690
|
39
|
|
|
|
|
150
|
@q = qw(q qq qw qx qr s y tr m); |
10691
|
39
|
|
|
|
|
188
|
@is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q); |
10692
|
|
|
|
|
|
|
|
10693
|
|
|
|
|
|
|
# Note: 'class' will be added by sub check_options if -use-feature=class |
10694
|
39
|
|
|
|
|
129
|
@q = qw(package); |
10695
|
39
|
|
|
|
|
129
|
@is_package{@q} = (1) x scalar(@q); |
10696
|
|
|
|
|
|
|
|
10697
|
39
|
|
|
|
|
115
|
@q = qw( if elsif unless ); |
10698
|
39
|
|
|
|
|
150
|
@is_if_elsif_unless{@q} = (1) x scalar(@q); |
10699
|
|
|
|
|
|
|
|
10700
|
39
|
|
|
|
|
99
|
@q = qw( ; t ); |
10701
|
39
|
|
|
|
|
117
|
@is_semicolon_or_t{@q} = (1) x scalar(@q); |
10702
|
|
|
|
|
|
|
|
10703
|
39
|
|
|
|
|
135
|
@q = qw( if elsif unless case when ); |
10704
|
39
|
|
|
|
|
173
|
@is_if_elsif_unless_case_when{@q} = (1) x scalar(@q); |
10705
|
|
|
|
|
|
|
|
10706
|
|
|
|
|
|
|
# Hash of other possible line endings which may occur. |
10707
|
|
|
|
|
|
|
# Keep these coordinated with the regex where this is used. |
10708
|
|
|
|
|
|
|
# Note: chr(13) = chr(015)="\r". |
10709
|
39
|
|
|
|
|
120
|
@q = ( chr(13), chr(29), chr(26) ); |
10710
|
39
|
|
|
|
|
210
|
@other_line_endings{@q} = (1) x scalar(@q); |
10711
|
|
|
|
|
|
|
|
10712
|
|
|
|
|
|
|
# These keywords are handled specially in the tokenizer code: |
10713
|
39
|
|
|
|
|
307
|
my @special_keywords = qw( |
10714
|
|
|
|
|
|
|
do |
10715
|
|
|
|
|
|
|
eval |
10716
|
|
|
|
|
|
|
format |
10717
|
|
|
|
|
|
|
m |
10718
|
|
|
|
|
|
|
package |
10719
|
|
|
|
|
|
|
q |
10720
|
|
|
|
|
|
|
qq |
10721
|
|
|
|
|
|
|
qr |
10722
|
|
|
|
|
|
|
qw |
10723
|
|
|
|
|
|
|
qx |
10724
|
|
|
|
|
|
|
s |
10725
|
|
|
|
|
|
|
sub |
10726
|
|
|
|
|
|
|
tr |
10727
|
|
|
|
|
|
|
y |
10728
|
|
|
|
|
|
|
); |
10729
|
39
|
|
|
|
|
454
|
push( @Keywords, @special_keywords ); |
10730
|
|
|
|
|
|
|
|
10731
|
|
|
|
|
|
|
# Keywords after which list formatting may be used |
10732
|
|
|
|
|
|
|
# WARNING: do not include |map|grep|eval or perl may die on |
10733
|
|
|
|
|
|
|
# syntax errors (map1.t). |
10734
|
39
|
|
|
|
|
415
|
my @keyword_taking_list = qw( |
10735
|
|
|
|
|
|
|
and |
10736
|
|
|
|
|
|
|
chmod |
10737
|
|
|
|
|
|
|
chomp |
10738
|
|
|
|
|
|
|
chop |
10739
|
|
|
|
|
|
|
chown |
10740
|
|
|
|
|
|
|
dbmopen |
10741
|
|
|
|
|
|
|
die |
10742
|
|
|
|
|
|
|
elsif |
10743
|
|
|
|
|
|
|
exec |
10744
|
|
|
|
|
|
|
fcntl |
10745
|
|
|
|
|
|
|
for |
10746
|
|
|
|
|
|
|
foreach |
10747
|
|
|
|
|
|
|
formline |
10748
|
|
|
|
|
|
|
getsockopt |
10749
|
|
|
|
|
|
|
if |
10750
|
|
|
|
|
|
|
index |
10751
|
|
|
|
|
|
|
ioctl |
10752
|
|
|
|
|
|
|
join |
10753
|
|
|
|
|
|
|
kill |
10754
|
|
|
|
|
|
|
local |
10755
|
|
|
|
|
|
|
msgctl |
10756
|
|
|
|
|
|
|
msgrcv |
10757
|
|
|
|
|
|
|
msgsnd |
10758
|
|
|
|
|
|
|
my |
10759
|
|
|
|
|
|
|
open |
10760
|
|
|
|
|
|
|
or |
10761
|
|
|
|
|
|
|
our |
10762
|
|
|
|
|
|
|
pack |
10763
|
|
|
|
|
|
|
print |
10764
|
|
|
|
|
|
|
printf |
10765
|
|
|
|
|
|
|
push |
10766
|
|
|
|
|
|
|
read |
10767
|
|
|
|
|
|
|
readpipe |
10768
|
|
|
|
|
|
|
recv |
10769
|
|
|
|
|
|
|
return |
10770
|
|
|
|
|
|
|
reverse |
10771
|
|
|
|
|
|
|
rindex |
10772
|
|
|
|
|
|
|
seek |
10773
|
|
|
|
|
|
|
select |
10774
|
|
|
|
|
|
|
semctl |
10775
|
|
|
|
|
|
|
semget |
10776
|
|
|
|
|
|
|
send |
10777
|
|
|
|
|
|
|
setpriority |
10778
|
|
|
|
|
|
|
setsockopt |
10779
|
|
|
|
|
|
|
shmctl |
10780
|
|
|
|
|
|
|
shmget |
10781
|
|
|
|
|
|
|
shmread |
10782
|
|
|
|
|
|
|
shmwrite |
10783
|
|
|
|
|
|
|
socket |
10784
|
|
|
|
|
|
|
socketpair |
10785
|
|
|
|
|
|
|
sort |
10786
|
|
|
|
|
|
|
splice |
10787
|
|
|
|
|
|
|
split |
10788
|
|
|
|
|
|
|
sprintf |
10789
|
|
|
|
|
|
|
state |
10790
|
|
|
|
|
|
|
substr |
10791
|
|
|
|
|
|
|
syscall |
10792
|
|
|
|
|
|
|
sysopen |
10793
|
|
|
|
|
|
|
sysread |
10794
|
|
|
|
|
|
|
sysseek |
10795
|
|
|
|
|
|
|
system |
10796
|
|
|
|
|
|
|
syswrite |
10797
|
|
|
|
|
|
|
tie |
10798
|
|
|
|
|
|
|
unless |
10799
|
|
|
|
|
|
|
unlink |
10800
|
|
|
|
|
|
|
unpack |
10801
|
|
|
|
|
|
|
unshift |
10802
|
|
|
|
|
|
|
until |
10803
|
|
|
|
|
|
|
vec |
10804
|
|
|
|
|
|
|
warn |
10805
|
|
|
|
|
|
|
while |
10806
|
|
|
|
|
|
|
given |
10807
|
|
|
|
|
|
|
when |
10808
|
|
|
|
|
|
|
); |
10809
|
|
|
|
|
|
|
|
10810
|
|
|
|
|
|
|
# NOTE: This hash is available but not currently used |
10811
|
39
|
|
|
|
|
1437
|
@is_keyword_taking_list{@keyword_taking_list} = |
10812
|
|
|
|
|
|
|
(1) x scalar(@keyword_taking_list); |
10813
|
|
|
|
|
|
|
|
10814
|
|
|
|
|
|
|
# perl functions which may be unary operators. |
10815
|
|
|
|
|
|
|
|
10816
|
|
|
|
|
|
|
# This list is used to decide if a pattern delimited by slashes, /pattern/, |
10817
|
|
|
|
|
|
|
# can follow one of these keywords. |
10818
|
39
|
|
|
|
|
281
|
@q = qw( |
10819
|
|
|
|
|
|
|
chomp eof eval fc lc pop shift uc undef |
10820
|
|
|
|
|
|
|
); |
10821
|
|
|
|
|
|
|
|
10822
|
39
|
|
|
|
|
300
|
@is_keyword_rejecting_slash_as_pattern_delimiter{@q} = |
10823
|
|
|
|
|
|
|
(1) x scalar(@q); |
10824
|
|
|
|
|
|
|
|
10825
|
|
|
|
|
|
|
# These are keywords for which an arg may optionally be omitted. They are |
10826
|
|
|
|
|
|
|
# currently only used to disambiguate a ? used as a ternary from one used |
10827
|
|
|
|
|
|
|
# as a (deprecated) pattern delimiter. In the future, they might be used |
10828
|
|
|
|
|
|
|
# to give a warning about ambiguous syntax before a /. |
10829
|
|
|
|
|
|
|
# Note: split has been omitted (see note below). |
10830
|
39
|
|
|
|
|
423
|
my @keywords_taking_optional_arg = qw( |
10831
|
|
|
|
|
|
|
abs |
10832
|
|
|
|
|
|
|
alarm |
10833
|
|
|
|
|
|
|
caller |
10834
|
|
|
|
|
|
|
chdir |
10835
|
|
|
|
|
|
|
chomp |
10836
|
|
|
|
|
|
|
chop |
10837
|
|
|
|
|
|
|
chr |
10838
|
|
|
|
|
|
|
chroot |
10839
|
|
|
|
|
|
|
close |
10840
|
|
|
|
|
|
|
cos |
10841
|
|
|
|
|
|
|
defined |
10842
|
|
|
|
|
|
|
die |
10843
|
|
|
|
|
|
|
eof |
10844
|
|
|
|
|
|
|
eval |
10845
|
|
|
|
|
|
|
evalbytes |
10846
|
|
|
|
|
|
|
exit |
10847
|
|
|
|
|
|
|
exp |
10848
|
|
|
|
|
|
|
fc |
10849
|
|
|
|
|
|
|
getc |
10850
|
|
|
|
|
|
|
glob |
10851
|
|
|
|
|
|
|
gmtime |
10852
|
|
|
|
|
|
|
hex |
10853
|
|
|
|
|
|
|
int |
10854
|
|
|
|
|
|
|
last |
10855
|
|
|
|
|
|
|
lc |
10856
|
|
|
|
|
|
|
lcfirst |
10857
|
|
|
|
|
|
|
length |
10858
|
|
|
|
|
|
|
localtime |
10859
|
|
|
|
|
|
|
log |
10860
|
|
|
|
|
|
|
lstat |
10861
|
|
|
|
|
|
|
mkdir |
10862
|
|
|
|
|
|
|
next |
10863
|
|
|
|
|
|
|
oct |
10864
|
|
|
|
|
|
|
ord |
10865
|
|
|
|
|
|
|
pop |
10866
|
|
|
|
|
|
|
pos |
10867
|
|
|
|
|
|
|
print |
10868
|
|
|
|
|
|
|
printf |
10869
|
|
|
|
|
|
|
prototype |
10870
|
|
|
|
|
|
|
quotemeta |
10871
|
|
|
|
|
|
|
rand |
10872
|
|
|
|
|
|
|
readline |
10873
|
|
|
|
|
|
|
readlink |
10874
|
|
|
|
|
|
|
readpipe |
10875
|
|
|
|
|
|
|
redo |
10876
|
|
|
|
|
|
|
ref |
10877
|
|
|
|
|
|
|
require |
10878
|
|
|
|
|
|
|
reset |
10879
|
|
|
|
|
|
|
reverse |
10880
|
|
|
|
|
|
|
rmdir |
10881
|
|
|
|
|
|
|
say |
10882
|
|
|
|
|
|
|
select |
10883
|
|
|
|
|
|
|
shift |
10884
|
|
|
|
|
|
|
sin |
10885
|
|
|
|
|
|
|
sleep |
10886
|
|
|
|
|
|
|
sqrt |
10887
|
|
|
|
|
|
|
srand |
10888
|
|
|
|
|
|
|
stat |
10889
|
|
|
|
|
|
|
study |
10890
|
|
|
|
|
|
|
tell |
10891
|
|
|
|
|
|
|
uc |
10892
|
|
|
|
|
|
|
ucfirst |
10893
|
|
|
|
|
|
|
umask |
10894
|
|
|
|
|
|
|
undef |
10895
|
|
|
|
|
|
|
unlink |
10896
|
|
|
|
|
|
|
warn |
10897
|
|
|
|
|
|
|
write |
10898
|
|
|
|
|
|
|
); |
10899
|
39
|
|
|
|
|
975
|
@is_keyword_taking_optional_arg{@keywords_taking_optional_arg} = |
10900
|
|
|
|
|
|
|
(1) x scalar(@keywords_taking_optional_arg); |
10901
|
|
|
|
|
|
|
|
10902
|
|
|
|
|
|
|
# This list is used to decide if a pattern delimited by question marks, |
10903
|
|
|
|
|
|
|
# ?pattern?, can follow one of these keywords. Note that from perl 5.22 |
10904
|
|
|
|
|
|
|
# on, a ?pattern? is not recognized, so we can be much more strict than |
10905
|
|
|
|
|
|
|
# with a /pattern/. Note that 'split' is not in this list. In current |
10906
|
|
|
|
|
|
|
# versions of perl a question following split must be a ternary, but |
10907
|
|
|
|
|
|
|
# in older versions it could be a pattern. The guessing algorithm will |
10908
|
|
|
|
|
|
|
# decide. We are combining two lists here to simplify the test. |
10909
|
39
|
|
|
|
|
949
|
@q = ( @keywords_taking_optional_arg, @operator_requestor ); |
10910
|
39
|
|
|
|
|
1423
|
@is_keyword_rejecting_question_as_pattern_delimiter{@q} = |
10911
|
|
|
|
|
|
|
(1) x scalar(@q); |
10912
|
|
|
|
|
|
|
|
10913
|
|
|
|
|
|
|
# These are not used in any way yet |
10914
|
|
|
|
|
|
|
# my @unused_keywords = qw( |
10915
|
|
|
|
|
|
|
# __FILE__ |
10916
|
|
|
|
|
|
|
# __LINE__ |
10917
|
|
|
|
|
|
|
# __PACKAGE__ |
10918
|
|
|
|
|
|
|
# ); |
10919
|
|
|
|
|
|
|
|
10920
|
|
|
|
|
|
|
# The list of keywords was originally extracted from function 'keyword' in |
10921
|
|
|
|
|
|
|
# perl file toke.c version 5.005.03, using this utility, plus a |
10922
|
|
|
|
|
|
|
# little editing: (file getkwd.pl): |
10923
|
|
|
|
|
|
|
# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } |
10924
|
|
|
|
|
|
|
# Add 'get' prefix where necessary, then split into the above lists. |
10925
|
|
|
|
|
|
|
# This list should be updated as necessary. |
10926
|
|
|
|
|
|
|
# The list should not contain these special variables: |
10927
|
|
|
|
|
|
|
# ARGV DATA ENV SIG STDERR STDIN STDOUT |
10928
|
|
|
|
|
|
|
# __DATA__ __END__ |
10929
|
|
|
|
|
|
|
|
10930
|
39
|
|
|
|
|
10120
|
@is_keyword{@Keywords} = (1) x scalar(@Keywords); |
10931
|
|
|
|
|
|
|
} ## end BEGIN |
10932
|
|
|
|
|
|
|
1; |