File Coverage

blib/lib/Perl/Tidy/Tokenizer.pm

Criterion	Covered	Total	%
statement	2330	3195	72.9
branch	1118	1692	66.0
condition	516	874	59.0
subroutine	160	191	83.7
pod	0	146	0.0
total	4124	6098	67.6

line	stmt	bran	cond	sub	pod	time	code
1							#####################################################################
2							#
3							# The Perl::Tidy::Tokenizer package is essentially a filter which
4							# reads lines of perl source code from a source object and provides
5							# corresponding tokenized lines through its get_line() method. Lines
6							# flow from the source_object to the caller like this:
7							#
8							# source_object --> Tokenizer --> calling routine
9							# get_line() get_line() line_of_tokens
10							#
11							# The source object can be a STRING ref, an ARRAY ref, or an object with a
12							# get_line() method which supplies one line (a character string) perl call.
13							# The Tokenizer returns a reference to a data structure 'line_of_tokens'
14							# containing one tokenized line for each call to its get_line() method.
15							#
16							# NOTE: This is not a real class. Only one tokenizer my be used.
17							#
18							########################################################################
19
20							package Perl::Tidy::Tokenizer;
21	39			39		307	use strict;
	39					147
	39					1330
22	39			39		254	use warnings;
	39					101
	39					1153
23	39			39		206	use English qw( -no_match_vars );
	39					105
	39					218
24
25							our $VERSION = '20230912';
26
27	39			39		14536	use Carp;
	39					130
	39					2290
28
29	39			39		289	use constant DEVEL_MODE => 0;
	39					107
	39					2572
30	39			39		279	use constant EMPTY_STRING => q{};
	39					121
	39					2593
31	39			39		263	use constant SPACE => q{ };
	39					98
	39					2483
32
33							# Decimal values of some ascii characters for quick checks
34	39			39		315	use constant ORD_TAB => 9;
	39					83
	39					2105
35	39			39		284	use constant ORD_SPACE => 32;
	39					124
	39					2213
36	39			39		280	use constant ORD_PRINTABLE_MIN => 33;
	39					95
	39					2140
37	39			39		259	use constant ORD_PRINTABLE_MAX => 126;
	39					108
	39					8817
38
39							# GLOBAL VARIABLES which change during tokenization:
40							# These could also be stored in $self but it is more convenient and
41							# efficient to make them global lexical variables.
42							# INITIALIZER: sub prepare_for_a_new_file
43							my (
44
45							$brace_depth,
46							$context,
47							$current_package,
48							$last_nonblank_block_type,
49							$last_nonblank_token,
50							$last_nonblank_type,
51							$next_sequence_number,
52							$paren_depth,
53							$rbrace_context,
54							$rbrace_package,
55							$rbrace_structural_type,
56							$rbrace_type,
57							$rcurrent_depth,
58							$rcurrent_sequence_number,
59							$rdepth_array,
60							$ris_block_function,
61							$ris_block_list_function,
62							$ris_constant,
63							$ris_user_function,
64							$rnested_statement_type,
65							$rnested_ternary_flag,
66							$rparen_semicolon_count,
67							$rparen_vars,
68							$rparen_type,
69							$rsaw_function_definition,
70							$rsaw_use_module,
71							$rsquare_bracket_structural_type,
72							$rsquare_bracket_type,
73							$rstarting_line_of_current_depth,
74							$rtotal_depth,
75							$ruser_function_prototype,
76							$square_bracket_depth,
77							$statement_type,
78							$total_depth,
79							);
80
81							my (
82
83							# GLOBAL CONSTANTS for routines in this package,
84							# INITIALIZER: BEGIN block.
85							%can_start_digraph,
86							%expecting_operator_token,
87							%expecting_operator_types,
88							%expecting_term_token,
89							%expecting_term_types,
90							%is_block_operator,
91							%is_digraph,
92							%is_file_test_operator,
93							%is_if_elsif_unless,
94							%is_if_elsif_unless_case_when,
95							%is_indirect_object_taker,
96							%is_keyword_rejecting_question_as_pattern_delimiter,
97							%is_keyword_rejecting_slash_as_pattern_delimiter,
98							%is_keyword_taking_list,
99							%is_keyword_taking_optional_arg,
100							%is_q_qq_qw_qx_qr_s_y_tr_m,
101							%is_q_qq_qx_qr_s_y_tr_m,
102							%is_semicolon_or_t,
103							%is_sort_map_grep,
104							%is_sort_map_grep_eval_do,
105							%is_tetragraph,
106							%is_trigraph,
107							%is_valid_token_type,
108							%other_line_endings,
109							%really_want_term,
110							@closing_brace_names,
111							@opening_brace_names,
112
113							# GLOBAL CONSTANT hash lookup table of operator expected values
114							# INITIALIZER: BEGIN block
115							%op_expected_table,
116
117							# GLOBAL VARIABLES which are constant after being configured.
118							# INITIALIZER: BEGIN block and modified by sub check_options
119							%is_code_block_token,
120							%is_keyword,
121							%is_my_our_state,
122							%is_package,
123
124							# INITIALIZER: sub check_options
125							$code_skipping_pattern_begin,
126							$code_skipping_pattern_end,
127
128							$rOpts_code_skipping,
129							$rOpts_code_skipping_begin,
130							$rOpts_starting_indentation_level,
131							$rOpts_indent_columns,
132							$rOpts_look_for_hash_bang,
133							$rOpts_look_for_autoloader,
134							$rOpts_look_for_selfloader,
135							$rOpts_trim_qw,
136							$rOpts_extended_syntax,
137							$rOpts_continuation_indentation,
138							$rOpts_outdent_labels,
139							$rOpts_maximum_level_errors,
140							$rOpts_maximum_unexpected_errors,
141
142							$tabsize,
143							%is_END_DATA_format_sub,
144							%is_grep_alias,
145							%is_sub,
146							$guess_if_method,
147							);
148
149							# possible values of operator_expected()
150	39			39		1581	use constant TERM => -1;
	39					99
	39					2270
151	39			39		269	use constant UNKNOWN => 0;
	39					86
	39					2222
152	39			39		285	use constant OPERATOR => 1;
	39					149
	39					2227
153
154							# possible values of context
155	39			39		341	use constant SCALAR_CONTEXT => -1;
	39					151
	39					2168
156	39			39		278	use constant UNKNOWN_CONTEXT => 0;
	39					139
	39					2172
157	39			39		308	use constant LIST_CONTEXT => 1;
	39					155
	39					2427
158
159							# Maximum number of little messages; probably need not be changed.
160	39			39		307	use constant MAX_NAG_MESSAGES => 6;
	39					101
	39					7947
161
162	0					0	BEGIN {
163
164							# Array index names for $self.
165							# Do not combine with other BEGIN blocks (c101).
166	39			39		284507	my $i = 0;
167							use constant {
168	39					18312	_rhere_target_list_ => $i++,
169							_in_here_doc_ => $i++,
170							_here_doc_target_ => $i++,
171							_here_quote_character_ => $i++,
172							_in_data_ => $i++,
173							_in_end_ => $i++,
174							_in_format_ => $i++,
175							_in_error_ => $i++,
176							_in_trouble_ => $i++,
177							_warning_count_ => $i++,
178							_html_tag_count_ => $i++,
179							_in_pod_ => $i++,
180							_in_skipped_ => $i++,
181							_in_attribute_list_ => $i++,
182							_in_quote_ => $i++,
183							_quote_target_ => $i++,
184							_line_start_quote_ => $i++,
185							_starting_level_ => $i++,
186							_know_starting_level_ => $i++,
187							_last_line_number_ => $i++,
188							_saw_perl_dash_P_ => $i++,
189							_saw_perl_dash_w_ => $i++,
190							_saw_use_strict_ => $i++,
191							_saw_v_string_ => $i++,
192							_saw_brace_error_ => $i++,
193							_hit_bug_ => $i++,
194							_look_for_autoloader_ => $i++,
195							_look_for_selfloader_ => $i++,
196							_saw_autoloader_ => $i++,
197							_saw_selfloader_ => $i++,
198							_saw_hash_bang_ => $i++,
199							_saw_end_ => $i++,
200							_saw_data_ => $i++,
201							_saw_negative_indentation_ => $i++,
202							_started_tokenizing_ => $i++,
203							_debugger_object_ => $i++,
204							_diagnostics_object_ => $i++,
205							_logger_object_ => $i++,
206							_unexpected_error_count_ => $i++,
207							_started_looking_for_here_target_at_ => $i++,
208							_nearly_matched_here_target_at_ => $i++,
209							_line_of_text_ => $i++,
210							_rlower_case_labels_at_ => $i++,
211							_maximum_level_ => $i++,
212							_true_brace_error_count_ => $i++,
213							_rOpts_ => $i++,
214							_rinput_lines_ => $i++,
215							_input_line_index_next_ => $i++,
216	39			39		290	};
	39					116
217							} ## end BEGIN
218
219							{ ## closure for subs to count instances
220
221							# methods to count instances
222							my $_count = 0;
223	0			0	0	0	sub get_count { return $_count; }
224	562			562		2495	sub _increment_count { return ++$_count }
225	562			562		1343	sub _decrement_count { return --$_count }
226							}
227
228							sub DESTROY {
229	562			562		1415	my $self = shift;
230	562					3016	$self->_decrement_count();
231	562					8022	return;
232							}
233
234							sub AUTOLOAD {
235
236							# Catch any undefined sub calls so that we are sure to get
237							# some diagnostic information. This sub should never be called
238							# except for a programming error.
239	0			0		0	our $AUTOLOAD;
240	0	0				0	return if ( $AUTOLOAD =~ /\bDESTROY$/ );
241	0					0	my ( $pkg, $fname, $lno ) = caller();
242	0					0	my $my_package = __PACKAGE__;
243	0					0	print {*STDERR} <<EOM;
	0					0
244							======================================================================
245							Error detected in package '$my_package', version $VERSION
246							Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
247							Called from package: '$pkg'
248							Called from File '$fname' at line '$lno'
249							This error is probably due to a recent programming change
250							======================================================================
251							EOM
252	0					0	exit 1;
253							} ## end sub AUTOLOAD
254
255							sub Die {
256	0			0	0	0	my ($msg) = @_;
257	0					0	Perl::Tidy::Die($msg);
258	0					0	croak "unexpected return from Perl::Tidy::Die";
259							}
260
261							sub Fault {
262	0			0	0	0	my ( $self, $msg ) = @_;
263
264							# This routine is called for errors that really should not occur
265							# except if there has been a bug introduced by a recent program change.
266							# Please add comments at calls to Fault to explain why the call
267							# should not occur, and where to look to fix it.
268	0					0	my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
269	0					0	my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
270	0					0	my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
271	0					0	my $pkg = __PACKAGE__;
272
273							# Catch potential error of Fault not called as a method
274	0					0	my $input_stream_name;
275	0	0				0	if ( !ref($self) ) {
276	0					0	$msg = "Fault not called as a method - please fix\n";
277	0	0	0			0	if ( $self && length($self) < 200 ) { $msg .= $self }
	0					0
278	0					0	$self = undef;
279	0					0	$input_stream_name = "(UNKNOWN)";
280							}
281							else {
282	0					0	$input_stream_name = $self->get_input_stream_name();
283							}
284
285	0					0	Die(<<EOM);
286							==============================================================================
287							While operating on input stream with name: '$input_stream_name'
288							A fault was detected at line $line0 of sub '$subroutine1'
289							in file '$filename1'
290							which was called from line $line1 of sub '$subroutine2'
291							Message: '$msg'
292							This is probably an error introduced by a recent programming change.
293							$pkg reports VERSION='$VERSION'.
294							==============================================================================
295							EOM
296
297							# We shouldn't get here, but this return is to keep Perl-Critic from
298							# complaining.
299	0					0	return;
300							} ## end sub Fault
301
302							sub make_code_skipping_pattern {
303	1120			1120	0	3122	my ( $rOpts, $opt_name, $default ) = @_;
304	1120					2549	my $param = $rOpts->{$opt_name};
305	1120	100				2899	if ( !$param ) { $param = $default }
	1118					2042
306	1120					4634	$param =~ s/^\s*//; # allow leading spaces to be like format-skipping
307	1120	50				5015	if ( $param !~ /^#/ ) {
308	0					0	Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
309							}
310	1120					3561	my $pattern = '^\s*' . $param . '\b';
311	1120	50				3362	if ( Perl::Tidy::Formatter::bad_pattern($pattern) ) {
312	0					0	Die(
313							"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
314							);
315							}
316	1120					3885	return $pattern;
317							} ## end sub make_code_skipping_pattern
318
319							sub check_options {
320
321							# Check Tokenizer parameters
322	560			560	0	1641	my $rOpts = shift;
323
324	560					2221	%is_sub = ();
325	560					1876	$is_sub{'sub'} = 1;
326
327	560					4258	%is_END_DATA_format_sub = (
328							'__END__' => 1,
329							'__DATA__' => 1,
330							'format' => 1,
331							'sub' => 1,
332							);
333
334							# Install any aliases to 'sub'
335	560	100				2292	if ( $rOpts->{'sub-alias-list'} ) {
336
337							# Note that any 'sub-alias-list' has been preprocessed to
338							# be a trimmed, space-separated list which includes 'sub'
339							# for example, it might be 'sub method fun'
340	3					31	my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'};
341	3					17	foreach my $word (@sub_alias_list) {
342	11					30	$is_sub{$word} = 1;
343	11					24	$is_END_DATA_format_sub{$word} = 1;
344							}
345							}
346
347							# Set global flag to say if we have to guess if bareword 'method' is
348							# a sub when 'method' is in %is_sub. This will be true unless:
349							# (1) the user entered 'method' as sub alias, or
350							# (2) the user set --use-feature=class
351							# In these two cases we can assume that 'method' is a sub alias.
352	560					1406	$guess_if_method = 1;
353	560	100				2168	if ( $is_sub{'method'} ) { $guess_if_method = 0 }
	2					7
354
355							#------------------------------------------------
356							# Update hash values for any -use-feature options
357							#------------------------------------------------
358
359	560					1315	my $use_feature_class = 1;
360	560	50				2133	if ( $rOpts->{'use-feature'} ) {
361	0	0				0	if ( $rOpts->{'use-feature'} =~ /\bnoclass\b/ ) {
		0
362	0					0	$use_feature_class = 0;
363							}
364							elsif ( $rOpts->{'use-feature'} =~ /\bclass\b/ ) {
365	0					0	$guess_if_method = 0;
366							}
367							else {
368							## neither 'class' nor 'noclass' seen so use default
369							}
370							}
371
372							# These are the main updates for this option. There are additional
373							# changes elsewhere, usually indicated with a comment 'rt145706'
374
375							# Update hash values for use_feature=class, added for rt145706
376							# see 'perlclass.pod'
377
378							# IMPORTANT: We are changing global hash values initially set in a BEGIN
379							# block. Values must be defined (true or false) for each of these new
380							# words whether true or false. Otherwise, programs using the module which
381							# change options between runs (such as test code) will have
382							# incorrect settings and fail.
383
384							# There are 4 new keywords:
385
386							# 'class' - treated specially as generalization of 'package'
387							# Note: we must not set 'class' to be a keyword to avoid problems
388							# with older uses.
389	560					2014	$is_package{'class'} = $use_feature_class;
390
391							# 'method' - treated like sub using the sub-alias-list option
392							# Note: we must not set 'method' to be a keyword to avoid problems
393							# with older uses.
394	560	50				1886	if ($use_feature_class) {
395	560					1565	$is_sub{'method'} = 1;
396	560					1433	$is_END_DATA_format_sub{'method'} = 1;
397							}
398
399							# 'field' - added as a keyword, and works like 'my'
400	560					1680	$is_keyword{'field'} = $use_feature_class;
401	560					1603	$is_my_our_state{'field'} = $use_feature_class;
402
403							# 'ADJUST' - added as a keyword and works like 'BEGIN'
404							# TODO: if ADJUST gets a paren list, this will need to be updated
405	560					1363	$is_keyword{'ADJUST'} = $use_feature_class;
406	560					1431	$is_code_block_token{'ADJUST'} = $use_feature_class;
407
408	560					1984	%is_grep_alias = ();
409	560	50				1939	if ( $rOpts->{'grep-alias-list'} ) {
410
411							# Note that 'grep-alias-list' has been preprocessed to be a trimmed,
412							# space-separated list
413	560					5512	my @q = split /\s+/, $rOpts->{'grep-alias-list'};
414	560					5176	@{is_grep_alias}{@q} = (1) x scalar(@q);
415							}
416
417	560					2022	$rOpts_starting_indentation_level = $rOpts->{'starting-indentation-level'};
418	560					1614	$rOpts_indent_columns = $rOpts->{'indent-columns'};
419	560					1348	$rOpts_look_for_hash_bang = $rOpts->{'look-for-hash-bang'};
420	560					1296	$rOpts_look_for_autoloader = $rOpts->{'look-for-autoloader'};
421	560					1259	$rOpts_look_for_selfloader = $rOpts->{'look-for-selfloader'};
422	560					1269	$rOpts_trim_qw = $rOpts->{'trim-qw'};
423	560					1254	$rOpts_extended_syntax = $rOpts->{'extended-syntax'};
424	560					1252	$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
425	560					1207	$rOpts_outdent_labels = $rOpts->{'outdent-labels'};
426	560					1221	$rOpts_maximum_level_errors = $rOpts->{'maximum-level-errors'};
427	560					1264	$rOpts_maximum_unexpected_errors = $rOpts->{'maximum-unexpected-errors'};
428	560					1214	$rOpts_code_skipping = $rOpts->{'code-skipping'};
429	560					1241	$rOpts_code_skipping_begin = $rOpts->{'code-skipping-begin'};
430
431							# In the Tokenizer, --indent-columns is just used for guessing old
432							# indentation, and must be positive. If -i=0 is used for this run (which
433							# is possible) we'll just guess that the old run used 4 spaces per level.
434	560	100				2028	if ( !$rOpts_indent_columns ) { $rOpts_indent_columns = 4 }
	12					28
435
436							# Define $tabsize, the number of spaces per tab for use in
437							# guessing the indentation of source lines with leading tabs.
438							# Assume same as for this run if tabs are used, otherwise assume
439							# a default value, typically 8
440							$tabsize =
441							$rOpts->{'entab-leading-whitespace'}
442							? $rOpts->{'entab-leading-whitespace'}
443							: $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
444	560	50				2769	: $rOpts->{'default-tabsize'};
		100
445	560	50				2048	if ( !$tabsize ) { $tabsize = 8 }
	0					0
446
447							$code_skipping_pattern_begin =
448	560					2699	make_code_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' );
449	560					2045	$code_skipping_pattern_end =
450							make_code_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' );
451
452	560					2001	return;
453							} ## end sub check_options
454
455							sub new {
456
457	562			562	0	3192	my ( $class, @args ) = @_;
458
459	562					5657	my %defaults = (
460							source_object => undef,
461							debugger_object => undef,
462							diagnostics_object => undef,
463							logger_object => undef,
464							starting_level => undef,
465							starting_line_number => 1,
466							rOpts => {},
467							);
468	562					4155	my %args = ( %defaults, @args );
469
470							# we are given an object with a get_line() method to supply source lines
471	562					2177	my $source_object = $args{source_object};
472	562					1491	my $rOpts = $args{rOpts};
473
474							# Check call args
475	562	50				2166	if ( !defined($source_object) ) {
476	0					0	Die(
477							"Perl::Tidy::Tokenizer::new called without a 'source_object' parameter\n"
478							);
479							}
480	562	50				2365	if ( !ref($source_object) ) {
481	0					0	Die(<<EOM);
482							sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference;
483							'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method
484							EOM
485							}
486
487							# Tokenizer state data is as follows:
488							# _rhere_target_list_ reference to list of here-doc targets
489							# _here_doc_target_ the target string for a here document
490							# _here_quote_character_ the type of here-doc quoting (" ' ` or none)
491							# to determine if interpolation is done
492							# _quote_target_ character we seek if chasing a quote
493							# _line_start_quote_ line where we started looking for a long quote
494							# _in_here_doc_ flag indicating if we are in a here-doc
495							# _in_pod_ flag set if we are in pod documentation
496							# _in_skipped_ flag set if we are in a skipped section
497							# _in_error_ flag set if we saw severe error (binary in script)
498							# _in_trouble_ set if we saw a troublesome lexical like 'my sub s'
499							# _warning_count_ number of calls to logger sub warning
500							# _html_tag_count_ number of apparent html tags seen (indicates html)
501							# _in_data_ flag set if we are in __DATA__ section
502							# _in_end_ flag set if we are in __END__ section
503							# _in_format_ flag set if we are in a format description
504							# _in_attribute_list_ flag telling if we are looking for attributes
505							# _in_quote_ flag telling if we are chasing a quote
506							# _starting_level_ indentation level of first line
507							# _diagnostics_object_ place to write debugging information
508							# _unexpected_error_count_ error count used to limit output
509							# _lower_case_labels_at_ line numbers where lower case labels seen
510							# _hit_bug_ program bug detected
511
512	562					1339	my $self = [];
513	562					1543	$self->[_rhere_target_list_] = [];
514	562					1524	$self->[_in_here_doc_] = 0;
515	562					1865	$self->[_here_doc_target_] = EMPTY_STRING;
516	562					1478	$self->[_here_quote_character_] = EMPTY_STRING;
517	562					1499	$self->[_in_data_] = 0;
518	562					1528	$self->[_in_end_] = 0;
519	562					1566	$self->[_in_format_] = 0;
520	562					1335	$self->[_in_error_] = 0;
521	562					1467	$self->[_in_trouble_] = 0;
522	562					1290	$self->[_warning_count_] = 0;
523	562					1387	$self->[_html_tag_count_] = 0;
524	562					1287	$self->[_in_pod_] = 0;
525	562					1392	$self->[_in_skipped_] = 0;
526	562					1364	$self->[_in_attribute_list_] = 0;
527	562					1319	$self->[_in_quote_] = 0;
528	562					1441	$self->[_quote_target_] = EMPTY_STRING;
529	562					1266	$self->[_line_start_quote_] = -1;
530	562					1330	$self->[_starting_level_] = $args{starting_level};
531	562					1888	$self->[_know_starting_level_] = defined( $args{starting_level} );
532	562					1877	$self->[_last_line_number_] = $args{starting_line_number} - 1;
533	562					1405	$self->[_saw_perl_dash_P_] = 0;
534	562					1219	$self->[_saw_perl_dash_w_] = 0;
535	562					1411	$self->[_saw_use_strict_] = 0;
536	562					1267	$self->[_saw_v_string_] = 0;
537	562					1268	$self->[_saw_brace_error_] = 0;
538	562					1212	$self->[_hit_bug_] = 0;
539	562					1315	$self->[_look_for_autoloader_] = $rOpts_look_for_autoloader;
540	562					1500	$self->[_look_for_selfloader_] = $rOpts_look_for_selfloader;
541	562					1318	$self->[_saw_autoloader_] = 0;
542	562					1626	$self->[_saw_selfloader_] = 0;
543	562					1227	$self->[_saw_hash_bang_] = 0;
544	562					1247	$self->[_saw_end_] = 0;
545	562					1186	$self->[_saw_data_] = 0;
546	562					1399	$self->[_saw_negative_indentation_] = 0;
547	562					1273	$self->[_started_tokenizing_] = 0;
548	562					1325	$self->[_debugger_object_] = $args{debugger_object};
549	562					1393	$self->[_diagnostics_object_] = $args{diagnostics_object};
550	562					1371	$self->[_logger_object_] = $args{logger_object};
551	562					1233	$self->[_unexpected_error_count_] = 0;
552	562					1212	$self->[_started_looking_for_here_target_at_] = 0;
553	562					1400	$self->[_nearly_matched_here_target_at_] = undef;
554	562					1396	$self->[_line_of_text_] = EMPTY_STRING;
555	562					1167	$self->[_rlower_case_labels_at_] = undef;
556	562					1292	$self->[_maximum_level_] = 0;
557	562					1304	$self->[_true_brace_error_count_] = 0;
558	562					1283	$self->[_rOpts_] = $rOpts;
559
560	562					1254	bless $self, $class;
561
562	562					3422	$self->prepare_for_a_new_file($source_object);
563	562					3268	$self->find_starting_indentation_level();
564
565							# This is not a full class yet, so die if an attempt is made to
566							# create more than one object.
567
568	562	50				2684	if ( _increment_count() > 1 ) {
569	0					0	confess
570							"Attempt to create more than 1 object in $class, which is not a true class yet\n";
571							}
572
573	562					4259	return $self;
574
575							} ## end sub new
576
577							# Called externally
578							sub get_unexpected_error_count {
579	4			4	0	14	my ($self) = @_;
580	4					19	return $self->[_unexpected_error_count_];
581							}
582
583							# Called externally
584							sub is_keyword {
585	2796			2796	0	5090	my ($str) = @_;
586	2796					10028	return $is_keyword{$str};
587							}
588
589							#----------------------------------------------------------------
590							# Line input routines, previously handled by the LineBuffer class
591							#----------------------------------------------------------------
592							sub make_source_array {
593
594	562			562	0	1745	my ( $self, $line_source_object ) = @_;
595
596							# Convert the source into an array of lines
597	562					1464	my $rinput_lines = [];
598
599	562					1589	my $rsource = ref($line_source_object);
600
601	562	50				3677	if ( !$rsource ) {
		50
		50
602
603							# shouldn't happen: this should have been checked in sub new
604	0					0	$self->Fault(<<EOM);
605							sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference;
606							'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method
607							EOM
608							}
609
610							# handle an ARRAY ref
611							elsif ( $rsource eq 'ARRAY' ) {
612	0					0	$rinput_lines = $line_source_object;
613							}
614
615							# handle a SCALAR ref
616							elsif ( $rsource eq 'SCALAR' ) {
617	562					1074	my @lines = split /^/, ${$line_source_object};
	562					9247
618	562					2316	$rinput_lines = \@lines;
619							}
620
621							# handle an object - must have a get_line method
622							else {
623	0					0	while ( my $line = $line_source_object->get_line() ) {
624	0					0	push( @{$rinput_lines}, $line );
	0					0
625							}
626							}
627
628	562					2139	$self->[_rinput_lines_] = $rinput_lines;
629	562					1566	$self->[_input_line_index_next_] = 0;
630	562					1390	return;
631							} ## end sub make_source_array
632
633							sub peek_ahead {
634	1235			1235	0	3077	my ( $self, $buffer_index ) = @_;
635
636							# look $buffer_index lines ahead of the current location without disturbing
637							# the input
638	1235					2259	my $line;
639	1235					2626	my $rinput_lines = $self->[_rinput_lines_];
640	1235					2687	my $line_index = $buffer_index + $self->[_input_line_index_next_];
641	1235	100				2156	if ( $line_index < @{$rinput_lines} ) {
	1235					3502
642	1223					2701	$line = $rinput_lines->[$line_index];
643							}
644	1235					4880	return $line;
645							} ## end sub peek_ahead
646
647							#-----------------------------------------
648							# interface to Perl::Tidy::Logger routines
649							#-----------------------------------------
650							sub warning {
651
652	0			0	0	0	my ( $self, $msg ) = @_;
653
654	0					0	my $logger_object = $self->[_logger_object_];
655	0					0	$self->[_warning_count_]++;
656	0	0				0	if ($logger_object) {
657	0					0	my $msg_line_number = $self->[_last_line_number_];
658	0					0	$logger_object->warning( $msg, $msg_line_number );
659							}
660	0					0	return;
661							} ## end sub warning
662
663							sub get_input_stream_name {
664
665	0			0	0	0	my $self = shift;
666
667	0					0	my $input_stream_name = EMPTY_STRING;
668	0					0	my $logger_object = $self->[_logger_object_];
669	0	0				0	if ($logger_object) {
670	0					0	$input_stream_name = $logger_object->get_input_stream_name();
671							}
672	0					0	return $input_stream_name;
673							} ## end sub get_input_stream_name
674
675							sub complain {
676
677	32			32	0	105	my ( $self, $msg ) = @_;
678
679	32					84	my $logger_object = $self->[_logger_object_];
680	32	50				112	if ($logger_object) {
681	32					66	my $input_line_number = $self->[_last_line_number_];
682	32					179	$logger_object->complain( $msg, $input_line_number );
683							}
684	32					95	return;
685							} ## end sub complain
686
687							sub write_logfile_entry {
688
689	1860			1860	0	5072	my ( $self, $msg ) = @_;
690
691	1860					3638	my $logger_object = $self->[_logger_object_];
692	1860	100				4637	if ($logger_object) {
693	1854					5576	$logger_object->write_logfile_entry($msg);
694							}
695	1860					5933	return;
696							} ## end sub write_logfile_entry
697
698							sub interrupt_logfile {
699
700	0			0	0	0	my $self = shift;
701
702	0					0	my $logger_object = $self->[_logger_object_];
703	0	0				0	if ($logger_object) {
704	0					0	$logger_object->interrupt_logfile();
705							}
706	0					0	return;
707							} ## end sub interrupt_logfile
708
709							sub resume_logfile {
710
711	0			0	0	0	my $self = shift;
712
713	0					0	my $logger_object = $self->[_logger_object_];
714	0	0				0	if ($logger_object) {
715	0					0	$logger_object->resume_logfile();
716							}
717	0					0	return;
718							} ## end sub resume_logfile
719
720							sub brace_warning {
721	0			0	0	0	my ( $self, $msg ) = @_;
722	0					0	$self->[_saw_brace_error_]++;
723
724	0					0	my $logger_object = $self->[_logger_object_];
725	0	0				0	if ($logger_object) {
726	0					0	my $msg_line_number = $self->[_last_line_number_];
727	0					0	$logger_object->brace_warning( $msg, $msg_line_number );
728							}
729	0					0	return;
730							} ## end sub brace_warning
731
732							sub increment_brace_error {
733
734							# This is same as sub brace_warning but without a message
735	0			0	0	0	my $self = shift;
736	0					0	$self->[_saw_brace_error_]++;
737
738	0					0	my $logger_object = $self->[_logger_object_];
739	0	0				0	if ($logger_object) {
740	0					0	$logger_object->increment_brace_error();
741							}
742	0					0	return;
743							} ## end sub increment_brace_error
744
745							sub get_saw_brace_error {
746	0			0	0	0	my $self = shift;
747	0					0	return $self->[_saw_brace_error_];
748							} ## end sub get_saw_brace_error
749
750							sub report_definite_bug {
751	0			0	0	0	my $self = shift;
752	0					0	$self->[_hit_bug_] = 1;
753	0					0	my $logger_object = $self->[_logger_object_];
754	0	0				0	if ($logger_object) {
755	0					0	$logger_object->report_definite_bug();
756							}
757	0					0	return;
758							} ## end sub report_definite_bug
759
760							#-------------------------------------
761							# Interface to Perl::Tidy::Diagnostics
762							#-------------------------------------
763							sub write_diagnostics {
764	0			0	0	0	my ( $self, $msg ) = @_;
765	0					0	my $input_line_number = $self->[_last_line_number_];
766	0					0	my $diagnostics_object = $self->[_diagnostics_object_];
767	0	0				0	if ($diagnostics_object) {
768	0					0	$diagnostics_object->write_diagnostics( $msg, $input_line_number );
769							}
770	0					0	return;
771							} ## end sub write_diagnostics
772
773							sub report_tokenization_errors {
774
775	562			562	0	1910	my ($self) = @_;
776
777							# Report any tokenization errors and return a flag '$severe_error'.
778							# Set $severe_error = 1 if the tokenization errors are so severe that
779							# the formatter should not attempt to format the file. Instead, it will
780							# just output the file verbatim.
781
782							# set severe error flag if tokenizer has encountered file reading problems
783							# (i.e. unexpected binary characters)
784							# or code which may not be formatted correctly (such as 'my sub q')
785							# The difference between _in_error_ and _in_trouble_ is that
786							# _in_error_ stops the tokenizer immediately whereas
787							# _in_trouble_ lets the tokenizer finish so that all errors are seen
788							# Both block formatting and cause the input stream to be output verbatim.
789	562		33			3616	my $severe_error = $self->[_in_error_] \|\| $self->[_in_trouble_];
790
791							# And do not format if it looks like an html file (c209)
792	562		33			4390	$severe_error \|\|= $self->[_html_tag_count_] && $self->[_warning_count_];
			33
793
794							# Inform the logger object on length of input stream
795	562					1884	my $logger_object = $self->[_logger_object_];
796	562	100				2076	if ($logger_object) {
797	560					1403	my $last_line_number = $self->[_last_line_number_];
798	560					2906	$logger_object->set_last_input_line_number($last_line_number);
799							}
800
801	562					1485	my $maxle = $rOpts_maximum_level_errors;
802	562					1244	my $maxue = $rOpts_maximum_unexpected_errors;
803	562	50				1843	$maxle = 1 unless defined($maxle);
804	562	50				1658	$maxue = 0 unless defined($maxue);
805
806	562					2432	my $level = get_indentation_level();
807	562	50				2393	if ( $level != $self->[_starting_level_] ) {
808	0					0	$self->warning("final indentation level: $level\n");
809	0					0	my $level_diff = $self->[_starting_level_] - $level;
810	0	0				0	if ( $level_diff < 0 ) { $level_diff = -$level_diff }
	0					0
811
812							# Set severe error flag if the level error is greater than 1.
813							# The formatter can function for any level error but it is probably
814							# best not to attempt formatting for a high level error.
815	0	0	0			0	if ( $maxle >= 0 && $level_diff > $maxle ) {
816	0					0	$severe_error = 1;
817	0					0	$self->warning(<<EOM);
818							Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting
819							EOM
820							}
821							}
822
823	562					3213	$self->check_final_nesting_depths();
824
825							# Likewise, large numbers of brace errors usually indicate non-perl
826							# scripts, so set the severe error flag at a low number. This is similar
827							# to the level check, but different because braces may balance but be
828							# incorrectly interlaced.
829	562	50				2855	if ( $self->[_true_brace_error_count_] > 2 ) {
830	0					0	$severe_error = 1;
831							}
832
833	562	50	66			2662	if ( $rOpts_look_for_hash_bang
834							&& !$self->[_saw_hash_bang_] )
835							{
836	0					0	$self->warning(
837							"hit EOF without seeing hash-bang line; maybe don't need -x?\n");
838							}
839
840	562	50				1888	if ( $self->[_in_format_] ) {
841	0					0	$self->warning("hit EOF while in format description\n");
842							}
843
844	562	50				2012	if ( $self->[_in_skipped_] ) {
845	0					0	$self->write_logfile_entry(
846							"hit EOF while in lines skipped with --code-skipping\n");
847							}
848
849	562	50				2051	if ( $self->[_in_pod_] ) {
850
851							# Just write log entry if this is after __END__ or __DATA__
852							# because this happens to often, and it is not likely to be
853							# a parsing error.
854	0	0	0			0	if ( $self->[_saw_data_] \|\| $self->[_saw_end_] ) {
855	0					0	$self->write_logfile_entry(
856							"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
857							);
858							}
859
860							else {
861	0					0	$self->complain(
862							"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
863							);
864							}
865
866							}
867
868	562	50				2043	if ( $self->[_in_here_doc_] ) {
869	0					0	$severe_error = 1;
870	0					0	my $here_doc_target = $self->[_here_doc_target_];
871	0					0	my $started_looking_for_here_target_at =
872							$self->[_started_looking_for_here_target_at_];
873	0	0				0	if ($here_doc_target) {
874	0					0	$self->warning(
875							"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
876							);
877							}
878							else {
879	0					0	$self->warning(<<EOM);
880							Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string.
881							(Perl will match to the end of file but this may not be intended).
882							EOM
883							}
884	0					0	my $nearly_matched_here_target_at =
885							$self->[_nearly_matched_here_target_at_];
886	0	0				0	if ($nearly_matched_here_target_at) {
887	0					0	$self->warning(
888							"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
889							);
890							}
891							}
892
893							# Something is seriously wrong if we ended inside a quote
894	562	50				1966	if ( $self->[_in_quote_] ) {
895	0					0	$severe_error = 1;
896	0					0	my $line_start_quote = $self->[_line_start_quote_];
897	0					0	my $quote_target = $self->[_quote_target_];
898	0	0				0	my $what =
899							( $self->[_in_attribute_list_] )
900							? "attribute list"
901							: "quote/pattern";
902	0					0	$self->warning(
903							"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
904							);
905							}
906
907	562	50				1891	if ( $self->[_hit_bug_] ) {
908	0					0	$severe_error = 1;
909							}
910
911							# Multiple "unexpected" type tokenization errors usually indicate parsing
912							# non-perl scripts, or that something is seriously wrong, so we should
913							# avoid formatting them. This can happen for example if we run perltidy on
914							# a shell script or an html file. But unfortunately this check can
915							# interfere with some extended syntaxes, such as RPerl, so it has to be off
916							# by default.
917	562					1447	my $ue_count = $self->[_unexpected_error_count_];
918	562	50	33			2324	if ( $maxue > 0 && $ue_count > $maxue ) {
919	0					0	$self->warning(<<EOM);
920							Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting
921							EOM
922	0					0	$severe_error = 1;
923							}
924
925	562	100				1952	if ( !$self->[_saw_perl_dash_w_] ) {
926	546	50				2310	if ( $] < 5.006 ) {
927	0					0	$self->write_logfile_entry("Suggest including '-w parameter'\n");
928							}
929							else {
930	546					2006	$self->write_logfile_entry("Suggest including 'use warnings;'\n");
931							}
932							}
933
934	562	50				3460	if ( $self->[_saw_perl_dash_P_] ) {
935	0					0	$self->write_logfile_entry(
936							"Use of -P parameter for defines is discouraged\n");
937							}
938
939	562	100				2579	if ( !$self->[_saw_use_strict_] ) {
940	548					1707	$self->write_logfile_entry("Suggest including 'use strict;'\n");
941							}
942
943							# it is suggested that labels have at least one upper case character
944							# for legibility and to avoid code breakage as new keywords are introduced
945	562	100				3575	if ( $self->[_rlower_case_labels_at_] ) {
946	12					38	my @lower_case_labels_at = @{ $self->[_rlower_case_labels_at_] };
	12					55
947	12					47	$self->write_logfile_entry(
948							"Suggest using upper case characters in label(s)\n");
949	12					72	local $LIST_SEPARATOR = ')(';
950	12					99	$self->write_logfile_entry(
951							" defined at line(s): (@lower_case_labels_at)\n");
952							}
953	562					2156	return $severe_error;
954							} ## end sub report_tokenization_errors
955
956							sub report_v_string {
957
958							# warn if this version can't handle v-strings
959	2			2	0	8	my ( $self, $tok ) = @_;
960	2	50				13	if ( !$self->[_saw_v_string_] ) {
961	2					5	$self->[_saw_v_string_] = $self->[_last_line_number_];
962							}
963	2	50				10	if ( $] < 5.006 ) {
964	0					0	$self->warning(
965							"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
966							);
967							}
968	2					6	return;
969							} ## end sub report_v_string
970
971							sub is_valid_token_type {
972	3			3	0	6	my ($type) = @_;
973	3					15	return $is_valid_token_type{$type};
974							}
975
976							sub log_numbered_msg {
977	167			167	0	458	my ( $self, $msg ) = @_;
978
979							# write input line number + message to logfile
980	167					350	my $input_line_number = $self->[_last_line_number_];
981	167					793	$self->write_logfile_entry("Line $input_line_number: $msg");
982	167					383	return;
983							} ## end sub log_numbered_msg
984
985							sub get_line {
986
987	8230			8230	0	15243	my $self = shift;
988
989							# Read the next input line and tokenize it
990							# Returns:
991							# $line_of_tokens = ref to hash of info for the tokenized line
992
993							# USES GLOBAL VARIABLES:
994							# $brace_depth, $square_bracket_depth, $paren_depth
995
996							# get the next line from the input array
997	8230					11566	my $input_line;
998	8230					13964	my $rinput_lines = $self->[_rinput_lines_];
999	8230					12579	my $line_index = $self->[_input_line_index_next_];
1000	8230	100				12222	if ( $line_index < @{$rinput_lines} ) {
	8230					18590
1001	7668					16174	$input_line = $rinput_lines->[ $line_index++ ];
1002	7668					12299	$self->[_input_line_index_next_] = $line_index;
1003							}
1004
1005	8230					14061	$self->[_line_of_text_] = $input_line;
1006
1007	8230	100				19902	return if ( !defined($input_line) );
1008
1009	7668					12308	my $input_line_number = ++$self->[_last_line_number_];
1010
1011							# Find and remove what characters terminate this line, including any
1012							# control r
1013	7668					11981	my $input_line_separator = EMPTY_STRING;
1014	7668	100				19789	if ( chomp($input_line) ) {
1015	7667					16826	$input_line_separator = $INPUT_RECORD_SEPARATOR;
1016							}
1017
1018							# The first test here very significantly speeds things up, but be sure to
1019							# keep the regex and hash %other_line_endings the same.
1020	7668	100				23205	if ( $other_line_endings{ substr( $input_line, -1 ) } ) {
1021	24	50				238	if ( $input_line =~ s/([\r\035\032])+$// ) {
1022	24					81	$input_line_separator = $1 . $input_line_separator;
1023							}
1024							}
1025
1026							# for backwards compatibility we keep the line text terminated with
1027							# a newline character
1028	7668					15772	$input_line .= "\n";
1029	7668					13311	$self->[_line_of_text_] = $input_line;
1030
1031							# create a data structure describing this line which will be
1032							# returned to the caller.
1033
1034							# _line_type codes are:
1035							# SYSTEM - system-specific code before hash-bang line
1036							# CODE - line of perl code (including comments)
1037							# POD_START - line starting pod, such as '=head'
1038							# POD - pod documentation text
1039							# POD_END - last line of pod section, '=cut'
1040							# HERE - text of here-document
1041							# HERE_END - last line of here-doc (target word)
1042							# FORMAT - format section
1043							# FORMAT_END - last line of format section, '.'
1044							# SKIP - code skipping section
1045							# SKIP_END - last line of code skipping section, '#>>V'
1046							# DATA_START - __DATA__ line
1047							# DATA - unidentified text following __DATA__
1048							# END_START - __END__ line
1049							# END - unidentified text following __END__
1050							# ERROR - we are in big trouble, probably not a perl script
1051
1052							# Other variables:
1053							# _curly_brace_depth - depth of curly braces at start of line
1054							# _square_bracket_depth - depth of square brackets at start of line
1055							# _paren_depth - depth of parens at start of line
1056							# _starting_in_quote - this line continues a multi-line quote
1057							# (so don't trim leading blanks!)
1058							# _ending_in_quote - this line ends in a multi-line quote
1059							# (so don't trim trailing blanks!)
1060	7668					42755	my $line_of_tokens = {
1061							_line_type => 'EOF',
1062							_line_text => $input_line,
1063							_line_number => $input_line_number,
1064							_guessed_indentation_level => 0,
1065							_curly_brace_depth => $brace_depth,
1066							_square_bracket_depth => $square_bracket_depth,
1067							_paren_depth => $paren_depth,
1068							_quote_character => EMPTY_STRING,
1069							## Skip these needless initializations for efficiency:
1070							## _rtoken_type => undef,
1071							## _rtokens => undef,
1072							## _rlevels => undef,
1073							## _rblock_type => undef,
1074							## _rtype_sequence => undef,
1075							## _rci_levels => undef,
1076							## _starting_in_quote => 0,
1077							## _ending_in_quote => 0,
1078							};
1079
1080							# must print line unchanged if we are in a here document
1081	7668	100				40835	if ( $self->[_in_here_doc_] ) {
		100
		100
		100
		50
		100
		100
1082
1083	24					91	$line_of_tokens->{_line_type} = 'HERE';
1084	24					73	my $here_doc_target = $self->[_here_doc_target_];
1085	24					56	my $here_quote_character = $self->[_here_quote_character_];
1086	24					49	my $candidate_target = $input_line;
1087	24					49	chomp $candidate_target;
1088
1089							# Handle <<~ targets, which are indicated here by a leading space on
1090							# the here quote character
1091	24	100				97	if ( $here_quote_character =~ /^\s/ ) {
1092	4					15	$candidate_target =~ s/^\s*//;
1093							}
1094	24	100				80	if ( $candidate_target eq $here_doc_target ) {
1095	9					49	$self->[_nearly_matched_here_target_at_] = undef;
1096	9					26	$line_of_tokens->{_line_type} = 'HERE_END';
1097	9					56	$self->log_numbered_msg("Exiting HERE document $here_doc_target\n");
1098
1099	9					35	my $rhere_target_list = $self->[_rhere_target_list_];
1100	9	50				41	if ( @{$rhere_target_list} ) { # there can be multiple here targets
	9					40
1101							( $here_doc_target, $here_quote_character ) =
1102	0					0	@{ shift @{$rhere_target_list} };
	0					0
	0					0
1103	0					0	$self->[_here_doc_target_] = $here_doc_target;
1104	0					0	$self->[_here_quote_character_] = $here_quote_character;
1105	0					0	$self->log_numbered_msg(
1106							"Entering HERE document $here_doc_target\n");
1107	0					0	$self->[_nearly_matched_here_target_at_] = undef;
1108	0					0	$self->[_started_looking_for_here_target_at_] =
1109							$input_line_number;
1110							}
1111							else {
1112	9					33	$self->[_in_here_doc_] = 0;
1113	9					24	$self->[_here_doc_target_] = EMPTY_STRING;
1114	9					28	$self->[_here_quote_character_] = EMPTY_STRING;
1115							}
1116							}
1117
1118							# check for error of extra whitespace
1119							# note for PERL6: leading whitespace is allowed
1120							else {
1121	15					146	$candidate_target =~ s/\s*$//;
1122	15					78	$candidate_target =~ s/^\s*//;
1123	15	50				64	if ( $candidate_target eq $here_doc_target ) {
1124	0					0	$self->[_nearly_matched_here_target_at_] = $input_line_number;
1125							}
1126							}
1127	24					99	return $line_of_tokens;
1128							}
1129
1130							# Print line unchanged if we are in a format section
1131							elsif ( $self->[_in_format_] ) {
1132
1133	3	100				28	if ( $input_line =~ /^\.[\s#]*$/ ) {
1134
1135							# Decrement format depth count at a '.' after a 'format'
1136	1					14	$self->[_in_format_]--;
1137
1138							# This is the end when count reaches 0
1139	1	50				7	if ( !$self->[_in_format_] ) {
1140	1					7	$self->log_numbered_msg("Exiting format section\n");
1141	1					3	$line_of_tokens->{_line_type} = 'FORMAT_END';
1142
1143							# Make the tokenizer mark an opening brace which follows
1144							# as a code block. Fixes issue c202/t032.
1145	1					4	$last_nonblank_token = ';';
1146	1					7	$last_nonblank_type = ';';
1147							}
1148							}
1149							else {
1150	2					6	$line_of_tokens->{_line_type} = 'FORMAT';
1151	2	50				10	if ( $input_line =~ /^\s*format\s+\w+/ ) {
1152
1153							# Increment format depth count at a 'format' within a 'format'
1154							# This is a simple way to handle nested formats (issue c019).
1155	0					0	$self->[_in_format_]++;
1156							}
1157							}
1158	3					11	return $line_of_tokens;
1159							}
1160
1161							# must print line unchanged if we are in pod documentation
1162							elsif ( $self->[_in_pod_] ) {
1163
1164	47					114	$line_of_tokens->{_line_type} = 'POD';
1165	47	100				231	if ( $input_line =~ /^=cut/ ) {
1166	20					74	$line_of_tokens->{_line_type} = 'POD_END';
1167	20					80	$self->log_numbered_msg("Exiting POD section\n");
1168	20					54	$self->[_in_pod_] = 0;
1169							}
1170	47	50	33			149	if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) {
1171	0					0	$self->warning(
1172							"Hash-bang in pod can cause older versions of perl to fail! \n"
1173							);
1174							}
1175
1176	47					181	return $line_of_tokens;
1177							}
1178
1179							# print line unchanged if in skipped section
1180							elsif ( $self->[_in_skipped_] ) {
1181
1182	8					22	$line_of_tokens->{_line_type} = 'SKIP';
1183	8	100				93	if ( $input_line =~ /$code_skipping_pattern_end/ ) {
		50
1184	2					11	$line_of_tokens->{_line_type} = 'SKIP_END';
1185	2					10	$self->log_numbered_msg("Exiting code-skipping section\n");
1186	2					8	$self->[_in_skipped_] = 0;
1187							}
1188							elsif ( $input_line =~ /$code_skipping_pattern_begin/ ) {
1189
1190							# warn of duplicate starting comment lines, git #118
1191	0					0	my $lno = $self->[_in_skipped_];
1192	0					0	$self->warning(
1193							"Already in code-skipping section which started at line $lno\n"
1194							);
1195							}
1196							else {
1197							## ok - not a code-skipping control line
1198							}
1199	8					32	return $line_of_tokens;
1200							}
1201
1202							# must print line unchanged if we have seen a severe error (i.e., we
1203							# are seeing illegal tokens and cannot continue. Syntax errors do
1204							# not pass this route). Calling routine can decide what to do, but
1205							# the default can be to just pass all lines as if they were after __END__
1206							elsif ( $self->[_in_error_] ) {
1207	0					0	$line_of_tokens->{_line_type} = 'ERROR';
1208	0					0	return $line_of_tokens;
1209							}
1210
1211							# print line unchanged if we are __DATA__ section
1212							elsif ( $self->[_in_data_] ) {
1213
1214							# ...but look for POD
1215							# Note that the _in_data and _in_end flags remain set
1216							# so that we return to that state after seeing the
1217							# end of a pod section
1218	1	50	33			9	if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1219	0					0	$line_of_tokens->{_line_type} = 'POD_START';
1220	0					0	$self->log_numbered_msg("Entering POD section\n");
1221	0					0	$self->[_in_pod_] = 1;
1222	0					0	return $line_of_tokens;
1223							}
1224							else {
1225	1					3	$line_of_tokens->{_line_type} = 'DATA';
1226	1					5	return $line_of_tokens;
1227							}
1228							}
1229
1230							# print line unchanged if we are in __END__ section
1231							elsif ( $self->[_in_end_] ) {
1232
1233							# ...but look for POD
1234							# Note that the _in_data and _in_end flags remain set
1235							# so that we return to that state after seeing the
1236							# end of a pod section
1237	48	100	66			273	if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1238	6					34	$line_of_tokens->{_line_type} = 'POD_START';
1239	6					39	$self->log_numbered_msg("Entering POD section\n");
1240	6					34	$self->[_in_pod_] = 1;
1241	6					27	return $line_of_tokens;
1242							}
1243							else {
1244	42					75	$line_of_tokens->{_line_type} = 'END';
1245	42					119	return $line_of_tokens;
1246							}
1247							}
1248							else {
1249							## ok
1250							}
1251
1252							# check for a hash-bang line if we haven't seen one
1253	7537	100	100			34371	if ( !$self->[_saw_hash_bang_]
			66
1254							&& substr( $input_line, 0, 2 ) eq '#!'
1255							&& $input_line =~ /^\#\!.*perl\b/ )
1256							{
1257	15					62	$self->[_saw_hash_bang_] = $input_line_number;
1258
1259							# check for -w and -P flags
1260	15	50				100	if ( $input_line =~ /^\#\!.perl\s.-.*P/ ) {
1261	0					0	$self->[_saw_perl_dash_P_] = 1;
1262							}
1263
1264	15	100				95	if ( $input_line =~ /^\#\!.perl\s.-.*w/ ) {
1265	8					24	$self->[_saw_perl_dash_w_] = 1;
1266							}
1267
1268	15	100	33			138	if (
			66
			100
			66
1269							$input_line_number > 1
1270
1271							# leave any hash bang in a BEGIN block alone
1272							# i.e. see 'debugger-duck_type.t'
1273							&& !(
1274							$last_nonblank_block_type
1275							&& $last_nonblank_block_type eq 'BEGIN'
1276							)
1277							&& !$rOpts_look_for_hash_bang
1278
1279							# Try to avoid giving a false alarm at a simple comment.
1280							# These look like valid hash-bang lines:
1281
1282							#!/usr/bin/perl -w
1283							#! /usr/bin/perl -w
1284							#!c:\perl\bin\perl.exe
1285
1286							# These are comments:
1287							#! I love perl
1288							#! sunos does not yet provide a /usr/bin/perl
1289
1290							# Comments typically have multiple spaces, which suggests
1291							# the filter
1292							&& $input_line =~ /^\#\!(\s+)?(\S+)?perl/
1293							)
1294							{
1295
1296							# this is helpful for VMS systems; we may have accidentally
1297							# tokenized some DCL commands
1298	1	50				5	if ( $self->[_started_tokenizing_] ) {
1299	0					0	$self->warning(
1300							"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
1301							);
1302							}
1303							else {
1304	1					5	$self->complain("Useless hash-bang after line 1\n");
1305							}
1306							}
1307
1308							# Report the leading hash-bang as a system line
1309							# This will prevent -dac from deleting it
1310							else {
1311	14					60	$line_of_tokens->{_line_type} = 'SYSTEM';
1312	14					97	return $line_of_tokens;
1313							}
1314							}
1315
1316							# wait for a hash-bang before parsing if the user invoked us with -x
1317	7523	100	100			18648	if ( $rOpts_look_for_hash_bang
1318							&& !$self->[_saw_hash_bang_] )
1319							{
1320	5					8	$line_of_tokens->{_line_type} = 'SYSTEM';
1321	5					18	return $line_of_tokens;
1322							}
1323
1324							# a first line of the form ': #' will be marked as SYSTEM
1325							# since lines of this form may be used by tcsh
1326	7518	50	66			18877	if ( $input_line_number == 1 && $input_line =~ /^\s\:\s\#/ ) {
1327	0					0	$line_of_tokens->{_line_type} = 'SYSTEM';
1328	0					0	return $line_of_tokens;
1329							}
1330
1331							# now we know that it is ok to tokenize the line...
1332							# the line tokenizer will modify any of these private variables:
1333							# _rhere_target_list_
1334							# _in_data_
1335							# _in_end_
1336							# _in_format_
1337							# _in_error_
1338							# _in_skipped_
1339							# _in_pod_
1340							# _in_quote_
1341	7518					23512	$self->tokenize_this_line($line_of_tokens);
1342
1343							# Now finish defining the return structure and return it
1344	7518					15959	$line_of_tokens->{_ending_in_quote} = $self->[_in_quote_];
1345
1346							# handle severe error (binary data in script)
1347	7518	50				17578	if ( $self->[_in_error_] ) {
1348	0					0	$self->[_in_quote_] = 0; # to avoid any more messages
1349	0					0	$self->warning("Giving up after error\n");
1350	0					0	$line_of_tokens->{_line_type} = 'ERROR';
1351	0					0	reset_indentation_level(0); # avoid error messages
1352	0					0	return $line_of_tokens;
1353							}
1354
1355							# handle start of pod documentation
1356	7518	100				15941	if ( $self->[_in_pod_] ) {
1357
1358							# This gets tricky..above a __DATA__ or __END__ section, perl
1359							# accepts '=cut' as the start of pod section. But afterwards,
1360							# only pod utilities see it and they may ignore an =cut without
1361							# leading =head. In any case, this isn't good.
1362	14	50				83	if ( $input_line =~ /^=cut\b/ ) {
1363	0	0	0			0	if ( $self->[_saw_data_] \|\| $self->[_saw_end_] ) {
1364	0					0	$self->complain("=cut while not in pod ignored\n");
1365	0					0	$self->[_in_pod_] = 0;
1366	0					0	$line_of_tokens->{_line_type} = 'POD_END';
1367							}
1368							else {
1369	0					0	$line_of_tokens->{_line_type} = 'POD_START';
1370	0					0	if ( !DEVEL_MODE ) {
1371	0					0	$self->warning(
1372							"=cut starts a pod section .. this can fool pod utilities.\n"
1373							);
1374							}
1375	0					0	$self->log_numbered_msg("Entering POD section\n");
1376							}
1377							}
1378
1379							else {
1380	14					45	$line_of_tokens->{_line_type} = 'POD_START';
1381	14					54	$self->log_numbered_msg("Entering POD section\n");
1382							}
1383
1384	14					71	return $line_of_tokens;
1385							}
1386
1387							# handle start of skipped section
1388	7504	100				15048	if ( $self->[_in_skipped_] ) {
1389
1390	2					12	$line_of_tokens->{_line_type} = 'SKIP';
1391	2					12	$self->log_numbered_msg("Entering code-skipping section\n");
1392	2					9	return $line_of_tokens;
1393							}
1394
1395							# see if this line contains here doc targets
1396	7502					11986	my $rhere_target_list = $self->[_rhere_target_list_];
1397	7502	100				10457	if ( @{$rhere_target_list} ) {
	7502					16825
1398
1399							my ( $here_doc_target, $here_quote_character ) =
1400	9					61	@{ shift @{$rhere_target_list} };
	9					33
	9					51
1401	9					37	$self->[_in_here_doc_] = 1;
1402	9					27	$self->[_here_doc_target_] = $here_doc_target;
1403	9					28	$self->[_here_quote_character_] = $here_quote_character;
1404	9					74	$self->log_numbered_msg("Entering HERE document $here_doc_target\n");
1405	9					38	$self->[_started_looking_for_here_target_at_] = $input_line_number;
1406							}
1407
1408							# NOTE: __END__ and __DATA__ statements are written unformatted
1409							# because they can theoretically contain additional characters
1410							# which are not tokenized (and cannot be read with <DATA> either!).
1411	7502	100				20544	if ( $self->[_in_data_] ) {
		100
1412	1					4	$line_of_tokens->{_line_type} = 'DATA_START';
1413	1					6	$self->log_numbered_msg("Starting __DATA__ section\n");
1414	1					3	$self->[_saw_data_] = 1;
1415
1416							# keep parsing after __DATA__ if use SelfLoader was seen
1417	1	50				3	if ( $self->[_saw_selfloader_] ) {
1418	0					0	$self->[_in_data_] = 0;
1419	0					0	$self->log_numbered_msg(
1420							"SelfLoader seen, continuing; -nlsl deactivates\n");
1421							}
1422
1423	1					6	return $line_of_tokens;
1424							}
1425
1426							elsif ( $self->[_in_end_] ) {
1427	6					31	$line_of_tokens->{_line_type} = 'END_START';
1428	6					33	$self->log_numbered_msg("Starting __END__ section\n");
1429	6					16	$self->[_saw_end_] = 1;
1430
1431							# keep parsing after __END__ if use AutoLoader was seen
1432	6	50				30	if ( $self->[_saw_autoloader_] ) {
1433	0					0	$self->[_in_end_] = 0;
1434	0					0	$self->log_numbered_msg(
1435							"AutoLoader seen, continuing; -nlal deactivates\n");
1436							}
1437	6					33	return $line_of_tokens;
1438							}
1439							else {
1440							## ok: not in __END__ or __DATA__
1441							}
1442
1443							# now, finally, we know that this line is type 'CODE'
1444	7495					14161	$line_of_tokens->{_line_type} = 'CODE';
1445
1446							# remember if we have seen any real code
1447	7495	100	100			24100	if ( !$self->[_started_tokenizing_]
			100
1448							&& $input_line !~ /^\s*$/
1449							&& $input_line !~ /^\s*#/ )
1450							{
1451	558					2411	$self->[_started_tokenizing_] = 1;
1452							}
1453
1454	7495	100				15908	if ( $self->[_debugger_object_] ) {
1455	7					38	$self->[_debugger_object_]->write_debug_entry($line_of_tokens);
1456							}
1457
1458							# Note: if keyword 'format' occurs in this line code, it is still CODE
1459							# (keyword 'format' need not start a line)
1460	7495	100				15580	if ( $self->[_in_format_] ) {
1461	1					22	$self->log_numbered_msg("Entering format section\n");
1462							}
1463
1464	7495	100	100			28246	if ( $self->[_in_quote_]
		100	100
1465							and ( $self->[_line_start_quote_] < 0 ) )
1466							{
1467
1468							#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
1469	49	100				391	if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) {
1470	48					130	$self->[_line_start_quote_] = $input_line_number;
1471	48					341	$self->log_numbered_msg(
1472							"Start multi-line quote or pattern ending in $quote_target\n");
1473							}
1474							}
1475							elsif ( ( $self->[_line_start_quote_] >= 0 )
1476							&& !$self->[_in_quote_] )
1477							{
1478	48					166	$self->[_line_start_quote_] = -1;
1479	48					196	$self->log_numbered_msg("End of multi-line quote or pattern\n");
1480							}
1481							else {
1482							## ok
1483							}
1484
1485							# we are returning a line of CODE
1486	7495					29868	return $line_of_tokens;
1487							} ## end sub get_line
1488
1489							sub find_starting_indentation_level {
1490
1491							# We need to find the indentation level of the first line of the
1492							# script being formatted. Often it will be zero for an entire file,
1493							# but if we are formatting a local block of code (within an editor for
1494							# example) it may not be zero. The user may specify this with the
1495							# -sil=n parameter but normally doesn't so we have to guess.
1496							#
1497	562			562	0	1814	my ($self) = @_;
1498	562					1433	my $starting_level = 0;
1499
1500							# use value if given as parameter
1501	562	100				2813	if ( $self->[_know_starting_level_] ) {
		100
1502	1					4	$starting_level = $self->[_starting_level_];
1503							}
1504
1505							# if we know there is a hash_bang line, the level must be zero
1506							elsif ($rOpts_look_for_hash_bang) {
1507	1					3	$self->[_know_starting_level_] = 1;
1508							}
1509
1510							# otherwise figure it out from the input file
1511							else {
1512	560					1530	my $line;
1513	560					1119	my $i = 0;
1514
1515							# keep looking at lines until we find a hash bang or piece of code
1516							# ( or, for now, an =pod line)
1517	560					1311	my $msg = EMPTY_STRING;
1518	560					1178	my $in_code_skipping;
1519	560					2899	while ( $line = $self->peek_ahead( $i++ ) ) {
1520
1521							# if first line is #! then assume starting level is zero
1522	871	100	100			5640	if ( $i == 1 && $line =~ /^\#\!/ ) {
1523	13					40	$starting_level = 0;
1524	13					43	last;
1525							}
1526
1527							# ignore lines fenced off with code-skipping comments
1528	858	100				4602	if ( $line =~ /^\s*#/ ) {
1529	296	50				1129	if ( !$in_code_skipping ) {
1530	296	50	33			2988	if ( $rOpts_code_skipping
1531							&& $line =~ /$code_skipping_pattern_begin/ )
1532							{
1533	0					0	$in_code_skipping = 1;
1534							}
1535							}
1536							else {
1537	0	0				0	if ( $line =~ /$code_skipping_pattern_end/ ) {
1538	0					0	$in_code_skipping = 0;
1539							}
1540							}
1541	296					1030	next;
1542							}
1543	562	50				2356	next if ($in_code_skipping);
1544
1545	562	100				3712	next if ( $line =~ /^\s*$/ ); # skip past blank lines
1546
1547	544					2778	$starting_level = $self->guess_old_indentation_level($line);
1548	544					1374	last;
1549							}
1550	560					2662	$msg = "Line $i implies starting-indentation-level = $starting_level\n";
1551	560					3021	$self->write_logfile_entry("$msg");
1552							}
1553	562					2315	$self->[_starting_level_] = $starting_level;
1554	562					3659	reset_indentation_level($starting_level);
1555	562					1103	return;
1556							} ## end sub find_starting_indentation_level
1557
1558							sub guess_old_indentation_level {
1559	544			544	0	1803	my ( $self, $line ) = @_;
1560
1561							# Guess the indentation level of an input line.
1562							#
1563							# For the first line of code this result will define the starting
1564							# indentation level. It will mainly be non-zero when perltidy is applied
1565							# within an editor to a local block of code.
1566							#
1567							# This is an impossible task in general because we can't know what tabs
1568							# meant for the old script and how many spaces were used for one
1569							# indentation level in the given input script. For example it may have
1570							# been previously formatted with -i=7 -et=3. But we can at least try to
1571							# make sure that perltidy guesses correctly if it is applied repeatedly to
1572							# a block of code within an editor, so that the block stays at the same
1573							# level when perltidy is applied repeatedly.
1574							#
1575							# USES GLOBAL VARIABLES: (none)
1576	544					1190	my $level = 0;
1577
1578							# find leading tabs, spaces, and any statement label
1579	544					1150	my $spaces = 0;
1580	544	50				4607	if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
1581
1582							# If there are leading tabs, we use the tab scheme for this run, if
1583							# any, so that the code will remain stable when editing.
1584	544	100				2655	if ($1) { $spaces += length($1) * $tabsize }
	2					10
1585
1586	544	100				2379	if ($2) { $spaces += length($2) }
	79					301
1587
1588							# correct for outdented labels
1589	544	100	66			2744	if ( $3 && $rOpts_outdent_labels ) {
1590	1					6	$spaces += $rOpts_continuation_indentation;
1591							}
1592							}
1593
1594	544					2118	$level = int( $spaces / $rOpts_indent_columns );
1595	544					1444	return ($level);
1596							} ## end sub guess_old_indentation_level
1597
1598							# This is a currently unused debug routine
1599							sub dump_functions {
1600
1601	0			0	0	0	my $fh = *STDOUT;
1602	0					0	foreach my $pkg ( keys %{$ris_user_function} ) {
	0					0
1603	0					0	$fh->print("\nnon-constant subs in package $pkg\n");
1604
1605	0					0	foreach my $sub ( keys %{ $ris_user_function->{$pkg} } ) {
	0					0
1606	0					0	my $msg = EMPTY_STRING;
1607	0	0				0	if ( $ris_block_list_function->{$pkg}{$sub} ) {
1608	0					0	$msg = 'block_list';
1609							}
1610
1611	0	0				0	if ( $ris_block_function->{$pkg}{$sub} ) {
1612	0					0	$msg = 'block';
1613							}
1614	0					0	$fh->print("$sub $msg\n");
1615							}
1616							}
1617
1618	0					0	foreach my $pkg ( keys %{$ris_constant} ) {
	0					0
1619	0					0	$fh->print("\nconstants and constant subs in package $pkg\n");
1620
1621	0					0	foreach my $sub ( keys %{ $ris_constant->{$pkg} } ) {
	0					0
1622	0					0	$fh->print("$sub\n");
1623							}
1624							}
1625	0					0	return;
1626							} ## end sub dump_functions
1627
1628							sub prepare_for_a_new_file {
1629
1630	562			562	0	1903	my ( $self, $source_object ) = @_;
1631
1632							# copy the source object lines to an array of lines
1633	562					3019	$self->make_source_array($source_object);
1634
1635							# previous tokens needed to determine what to expect next
1636	562					1767	$last_nonblank_token = ';'; # the only possible starting state which
1637	562					1573	$last_nonblank_type = ';'; # will make a leading brace a code block
1638	562					1398	$last_nonblank_block_type = EMPTY_STRING;
1639
1640							# scalars for remembering statement types across multiple lines
1641	562					1371	$statement_type = EMPTY_STRING; # '' or 'use' or 'sub..' or 'case..'
1642
1643							# scalars for remembering where we are in the file
1644	562					1401	$current_package = "main";
1645	562					1140	$context = UNKNOWN_CONTEXT;
1646
1647							# hashes used to remember function information
1648	562					2028	$ris_constant = {}; # user-defined constants
1649	562					1983	$ris_user_function = {}; # user-defined functions
1650	562					1632	$ruser_function_prototype = {}; # their prototypes
1651	562					1651	$ris_block_function = {};
1652	562					1645	$ris_block_list_function = {};
1653	562					1465	$rsaw_function_definition = {};
1654	562					1341	$rsaw_use_module = {};
1655
1656							# variables used to track depths of various containers
1657							# and report nesting errors
1658	562					1176	$paren_depth = 0;
1659	562					1064	$brace_depth = 0;
1660	562					1086	$square_bracket_depth = 0;
1661	562					2535	$rcurrent_depth = [ (0) x scalar @closing_brace_names ];
1662	562					1277	$total_depth = 0;
1663	562					2224	$rtotal_depth = [];
1664	562					2071	$rcurrent_sequence_number = [];
1665	562					1252	$next_sequence_number = 2; # The value 1 is reserved for SEQ_ROOT
1666
1667	562					1851	$rparen_type = [];
1668	562					1637	$rparen_semicolon_count = [];
1669	562					2421	$rparen_vars = [];
1670	562					1893	$rbrace_type = [];
1671	562					1726	$rbrace_structural_type = [];
1672	562					1489	$rbrace_context = [];
1673	562					1633	$rbrace_package = [];
1674	562					1598	$rsquare_bracket_type = [];
1675	562					1529	$rsquare_bracket_structural_type = [];
1676	562					3534	$rdepth_array = [];
1677	562					1306	$rnested_ternary_flag = [];
1678	562					3776	$rnested_statement_type = [];
1679	562					3253	$rstarting_line_of_current_depth = [];
1680
1681	562					1820	$rparen_type->[$paren_depth] = EMPTY_STRING;
1682	562					1651	$rparen_semicolon_count->[$paren_depth] = 0;
1683	562					1561	$rparen_vars->[$paren_depth] = [];
1684	562					1657	$rbrace_type->[$brace_depth] = ';'; # identify opening brace as code block
1685	562					1542	$rbrace_structural_type->[$brace_depth] = EMPTY_STRING;
1686	562					1433	$rbrace_context->[$brace_depth] = UNKNOWN_CONTEXT;
1687	562					1331	$rbrace_package->[$paren_depth] = $current_package;
1688	562					1270	$rsquare_bracket_type->[$square_bracket_depth] = EMPTY_STRING;
1689	562					1475	$rsquare_bracket_structural_type->[$square_bracket_depth] = EMPTY_STRING;
1690
1691	562					2804	initialize_tokenizer_state();
1692	562					1085	return;
1693							} ## end sub prepare_for_a_new_file
1694
1695							{ ## closure for sub tokenize_this_line
1696
1697	39			39		396	use constant BRACE => 0;
	39					130
	39					3022
1698	39			39		323	use constant SQUARE_BRACKET => 1;
	39					130
	39					2317
1699	39			39		326	use constant PAREN => 2;
	39					138
	39					3787
1700	39			39		308	use constant QUESTION_COLON => 3;
	39					118
	39					84547
1701
1702							# TV1: scalars for processing one LINE.
1703							# Re-initialized on each entry to sub tokenize_this_line.
1704							my (
1705							$block_type, $container_type, $expecting,
1706							$i, $i_tok, $input_line,
1707							$input_line_number, $last_nonblank_i, $max_token_index,
1708							$next_tok, $next_type, $peeked_ahead,
1709							$prototype, $rhere_target_list, $rtoken_map,
1710							$rtoken_type, $rtokens, $tok,
1711							$type, $type_sequence, $indent_flag,
1712							);
1713
1714							# TV2: refs to ARRAYS for processing one LINE
1715							# Re-initialized on each call.
1716							my $routput_token_list = []; # stack of output token indexes
1717							my $routput_token_type = []; # token types
1718							my $routput_block_type = []; # types of code block
1719							my $routput_container_type = []; # paren types, such as if, elsif, ..
1720							my $routput_type_sequence = []; # nesting sequential number
1721							my $routput_indent_flag = []; #
1722
1723							# TV3: SCALARS for quote variables. These are initialized with a
1724							# subroutine call and continually updated as lines are processed.
1725							my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
1726							$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
1727
1728							# TV4: SCALARS for multi-line identifiers and
1729							# statements. These are initialized with a subroutine call
1730							# and continually updated as lines are processed.
1731							my ( $id_scan_state, $identifier, $want_paren );
1732
1733							# TV5: SCALARS for tracking indentation level.
1734							# Initialized once and continually updated as lines are
1735							# processed.
1736							my (
1737							$nesting_token_string, $nesting_block_string,
1738							$nesting_block_flag, $level_in_tokenizer,
1739							);
1740
1741							# TV6: SCALARS for remembering several previous
1742							# tokens. Initialized once and continually updated as
1743							# lines are processed.
1744							my (
1745							$last_nonblank_container_type, $last_nonblank_type_sequence,
1746							$last_last_nonblank_token, $last_last_nonblank_type,
1747							$last_nonblank_prototype,
1748							);
1749
1750							# ----------------------------------------------------------------
1751							# beginning of tokenizer variable access and manipulation routines
1752							# ----------------------------------------------------------------
1753
1754							sub initialize_tokenizer_state {
1755
1756							# GV1: initialized once
1757							# TV1: initialized on each call
1758							# TV2: initialized on each call
1759							# TV3:
1760	562			562	0	1230	$in_quote = 0;
1761	562					1388	$quote_type = 'Q';
1762	562					1277	$quote_character = EMPTY_STRING;
1763	562					1107	$quote_pos = 0;
1764	562					1135	$quote_depth = 0;
1765	562					1100	$quoted_string_1 = EMPTY_STRING;
1766	562					1251	$quoted_string_2 = EMPTY_STRING;
1767	562					1109	$allowed_quote_modifiers = EMPTY_STRING;
1768
1769							# TV4:
1770	562					1101	$id_scan_state = EMPTY_STRING;
1771	562					1205	$identifier = EMPTY_STRING;
1772	562					1201	$want_paren = EMPTY_STRING;
1773
1774							# TV5:
1775	562					1300	$nesting_token_string = EMPTY_STRING;
1776	562					1249	$nesting_block_string = '1'; # initially in a block
1777	562					1056	$nesting_block_flag = 1;
1778	562					1089	$level_in_tokenizer = 0;
1779
1780							# TV6:
1781	562					1170	$last_nonblank_container_type = EMPTY_STRING;
1782	562					1188	$last_nonblank_type_sequence = EMPTY_STRING;
1783	562					1178	$last_last_nonblank_token = ';';
1784	562					1192	$last_last_nonblank_type = ';';
1785	562					1178	$last_nonblank_prototype = EMPTY_STRING;
1786	562					1075	return;
1787							} ## end sub initialize_tokenizer_state
1788
1789							sub save_tokenizer_state {
1790
1791							# Global variables:
1792	0			0	0	0	my $rGV1 = [
1793							$brace_depth,
1794							$context,
1795							$current_package,
1796							$last_nonblank_block_type,
1797							$last_nonblank_token,
1798							$last_nonblank_type,
1799							$next_sequence_number,
1800							$paren_depth,
1801							$rbrace_context,
1802							$rbrace_package,
1803							$rbrace_structural_type,
1804							$rbrace_type,
1805							$rcurrent_depth,
1806							$rcurrent_sequence_number,
1807							$rdepth_array,
1808							$ris_block_function,
1809							$ris_block_list_function,
1810							$ris_constant,
1811							$ris_user_function,
1812							$rnested_statement_type,
1813							$rnested_ternary_flag,
1814							$rparen_semicolon_count,
1815							$rparen_vars,
1816							$rparen_type,
1817							$rsaw_function_definition,
1818							$rsaw_use_module,
1819							$rsquare_bracket_structural_type,
1820							$rsquare_bracket_type,
1821							$rstarting_line_of_current_depth,
1822							$rtotal_depth,
1823							$ruser_function_prototype,
1824							$square_bracket_depth,
1825							$statement_type,
1826							$total_depth,
1827
1828							];
1829
1830							# Tokenizer closure variables:
1831	0					0	my $rTV1 = [
1832							$block_type, $container_type, $expecting,
1833							$i, $i_tok, $input_line,
1834							$input_line_number, $last_nonblank_i, $max_token_index,
1835							$next_tok, $next_type, $peeked_ahead,
1836							$prototype, $rhere_target_list, $rtoken_map,
1837							$rtoken_type, $rtokens, $tok,
1838							$type, $type_sequence, $indent_flag,
1839							];
1840
1841	0					0	my $rTV2 = [
1842							$routput_token_list, $routput_token_type,
1843							$routput_block_type, $routput_container_type,
1844							$routput_type_sequence, $routput_indent_flag,
1845							];
1846
1847	0					0	my $rTV3 = [
1848							$in_quote, $quote_type,
1849							$quote_character, $quote_pos,
1850							$quote_depth, $quoted_string_1,
1851							$quoted_string_2, $allowed_quote_modifiers,
1852							];
1853
1854	0					0	my $rTV4 = [ $id_scan_state, $identifier, $want_paren ];
1855
1856	0					0	my $rTV5 = [
1857							$nesting_token_string, $nesting_block_string,
1858							$nesting_block_flag, $level_in_tokenizer,
1859							];
1860
1861	0					0	my $rTV6 = [
1862							$last_nonblank_container_type, $last_nonblank_type_sequence,
1863							$last_last_nonblank_token, $last_last_nonblank_type,
1864							$last_nonblank_prototype,
1865							];
1866	0					0	return [ $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
1867							} ## end sub save_tokenizer_state
1868
1869							sub restore_tokenizer_state {
1870	0			0	0	0	my ($rstate) = @_;
1871	0					0	my ( $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
	0					0
1872
1873							(
1874							$brace_depth,
1875							$context,
1876							$current_package,
1877							$last_nonblank_block_type,
1878							$last_nonblank_token,
1879							$last_nonblank_type,
1880							$next_sequence_number,
1881							$paren_depth,
1882							$rbrace_context,
1883							$rbrace_package,
1884							$rbrace_structural_type,
1885							$rbrace_type,
1886							$rcurrent_depth,
1887							$rcurrent_sequence_number,
1888							$rdepth_array,
1889							$ris_block_function,
1890							$ris_block_list_function,
1891							$ris_constant,
1892							$ris_user_function,
1893							$rnested_statement_type,
1894							$rnested_ternary_flag,
1895							$rparen_semicolon_count,
1896							$rparen_vars,
1897							$rparen_type,
1898							$rsaw_function_definition,
1899							$rsaw_use_module,
1900							$rsquare_bracket_structural_type,
1901							$rsquare_bracket_type,
1902							$rstarting_line_of_current_depth,
1903							$rtotal_depth,
1904							$ruser_function_prototype,
1905							$square_bracket_depth,
1906							$statement_type,
1907							$total_depth,
1908
1909	0					0	) = @{$rGV1};
	0					0
1910
1911							(
1912							$block_type, $container_type, $expecting,
1913							$i, $i_tok, $input_line,
1914							$input_line_number, $last_nonblank_i, $max_token_index,
1915							$next_tok, $next_type, $peeked_ahead,
1916							$prototype, $rhere_target_list, $rtoken_map,
1917							$rtoken_type, $rtokens, $tok,
1918							$type, $type_sequence, $indent_flag,
1919	0					0	) = @{$rTV1};
	0					0
1920
1921							(
1922							$routput_token_list, $routput_token_type,
1923							$routput_block_type, $routput_container_type,
1924							$routput_type_sequence, $routput_indent_flag,
1925	0					0	) = @{$rTV2};
	0					0
1926
1927							(
1928							$in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
1929							$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
1930	0					0	) = @{$rTV3};
	0					0
1931
1932	0					0	( $id_scan_state, $identifier, $want_paren ) = @{$rTV4};
	0					0
1933
1934							(
1935							$nesting_token_string, $nesting_block_string,
1936							$nesting_block_flag, $level_in_tokenizer,
1937	0					0	) = @{$rTV5};
	0					0
1938
1939							(
1940							$last_nonblank_container_type, $last_nonblank_type_sequence,
1941							$last_last_nonblank_token, $last_last_nonblank_type,
1942							$last_nonblank_prototype,
1943	0					0	) = @{$rTV6};
	0					0
1944	0					0	return;
1945							} ## end sub restore_tokenizer_state
1946
1947							sub split_pretoken {
1948
1949	8			8	0	18	my ( $self, $numc ) = @_;
1950
1951							# Split the leading $numc characters from the current token (at index=$i)
1952							# which is pre-type 'w' and insert the remainder back into the pretoken
1953							# stream with appropriate settings. Since we are splitting a pre-type 'w',
1954							# there are three cases, depending on if the remainder starts with a digit:
1955							# Case 1: remainder is type 'd', all digits
1956							# Case 2: remainder is type 'd' and type 'w': digits and other characters
1957							# Case 3: remainder is type 'w'
1958
1959							# Examples, for $numc=1:
1960							# $tok => $tok_0 $tok_1 $tok_2
1961							# 'x10' => 'x' '10' # case 1
1962							# 'x10if' => 'x' '10' 'if' # case 2
1963							# '0ne => 'O' 'ne' # case 3
1964
1965							# where:
1966							# $tok_1 is a possible string of digits (pre-type 'd')
1967							# $tok_2 is a possible word (pre-type 'w')
1968
1969							# return 1 if successful
1970							# return undef if error (shouldn't happen)
1971
1972							# Calling routine should update '$type' and '$tok' if successful.
1973
1974	8					21	my $pretoken = $rtokens->[$i];
1975	8	50	33			86	if ( $pretoken
			33
1976							&& length($pretoken) > $numc
1977							&& substr( $pretoken, $numc ) =~ /^(\d)(.)$/ )
1978							{
1979
1980							# Split $tok into up to 3 tokens:
1981	8					20	my $tok_0 = substr( $pretoken, 0, $numc );
1982	8	50				32	my $tok_1 = defined($1) ? $1 : EMPTY_STRING;
1983	8	50				27	my $tok_2 = defined($2) ? $2 : EMPTY_STRING;
1984
1985	8					17	my $len_0 = length($tok_0);
1986	8					19	my $len_1 = length($tok_1);
1987	8					14	my $len_2 = length($tok_2);
1988
1989	8					16	my $pre_type_0 = 'w';
1990	8					16	my $pre_type_1 = 'd';
1991	8					13	my $pre_type_2 = 'w';
1992
1993	8					15	my $pos_0 = $rtoken_map->[$i];
1994	8					15	my $pos_1 = $pos_0 + $len_0;
1995	8					17	my $pos_2 = $pos_1 + $len_1;
1996
1997	8					15	my $isplice = $i + 1;
1998
1999							# Splice in any digits
2000	8	100				28	if ($len_1) {
2001	5					8	splice @{$rtoken_map}, $isplice, 0, $pos_1;
	5					19
2002	5					11	splice @{$rtokens}, $isplice, 0, $tok_1;
	5					15
2003	5					10	splice @{$rtoken_type}, $isplice, 0, $pre_type_1;
	5					16
2004	5					7	$max_token_index++;
2005	5					9	$isplice++;
2006							}
2007
2008							# Splice in any trailing word
2009	8	100				20	if ($len_2) {
2010	4					6	splice @{$rtoken_map}, $isplice, 0, $pos_2;
	4					14
2011	4					6	splice @{$rtokens}, $isplice, 0, $tok_2;
	4					9
2012	4					8	splice @{$rtoken_type}, $isplice, 0, $pre_type_2;
	4					8
2013	4					7	$max_token_index++;
2014							}
2015
2016	8					19	$rtokens->[$i] = $tok_0;
2017	8					31	return 1;
2018							}
2019							else {
2020
2021							# Shouldn't get here
2022	0					0	if (DEVEL_MODE) {
2023							$self->Fault(<<EOM);
2024							While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken()
2025							EOM
2026							}
2027							}
2028	0					0	return;
2029							} ## end sub split_pretoken
2030
2031							sub get_indentation_level {
2032	562			562	0	1603	return $level_in_tokenizer;
2033							}
2034
2035							sub reset_indentation_level {
2036	562			562	0	1573	$level_in_tokenizer = shift;
2037	562					1208	return;
2038							}
2039
2040							sub peeked_ahead {
2041	252			252	0	465	my $flag = shift;
2042	252	100				631	$peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
2043	252					927	return $peeked_ahead;
2044							}
2045
2046							# ------------------------------------------------------------
2047							# end of tokenizer variable access and manipulation routines
2048							# ------------------------------------------------------------
2049
2050							#------------------------------
2051							# beginning of tokenizer hashes
2052							#------------------------------
2053
2054							my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
2055
2056							# These block types terminate statements and do not need a trailing
2057							# semicolon
2058							# patched for SWITCH/CASE/
2059							# NOTE: not currently used but may be used in the future
2060							my %is_zero_continuation_block_type;
2061							my @q;
2062							@q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
2063							if elsif else unless while until for foreach switch case given when);
2064							@is_zero_continuation_block_type{@q} = (1) x scalar(@q);
2065
2066							my %is_logical_container;
2067							@q = qw(if elsif unless while and or err not && ! \|\| for foreach);
2068							@is_logical_container{@q} = (1) x scalar(@q);
2069
2070							my %is_binary_type;
2071							@q = qw(\|\| &&);
2072							@is_binary_type{@q} = (1) x scalar(@q);
2073
2074							my %is_binary_keyword;
2075							@q = qw(and or err eq ne cmp);
2076							@is_binary_keyword{@q} = (1) x scalar(@q);
2077
2078							# 'L' is token for opening { at hash key
2079							my %is_opening_type;
2080							@q = qw< L { ( [ >;
2081							@is_opening_type{@q} = (1) x scalar(@q);
2082
2083							my %is_opening_or_ternary_type;
2084							push @q, '?';
2085							@is_opening_or_ternary_type{@q} = (1) x scalar(@q);
2086
2087							# 'R' is token for closing } at hash key
2088							my %is_closing_type;
2089							@q = qw< R } ) ] >;
2090							@is_closing_type{@q} = (1) x scalar(@q);
2091
2092							my %is_closing_or_ternary_type;
2093							push @q, ':';
2094							@is_closing_or_ternary_type{@q} = (1) x scalar(@q);
2095
2096							my %is_redo_last_next_goto;
2097							@q = qw(redo last next goto);
2098							@is_redo_last_next_goto{@q} = (1) x scalar(@q);
2099
2100							my %is_use_require;
2101							@q = qw(use require);
2102							@is_use_require{@q} = (1) x scalar(@q);
2103
2104							# This hash holds the array index in $self for these keywords:
2105							# Fix for issue c035: removed 'format' from this hash
2106							my %is_END_DATA = (
2107							'__END__' => _in_end_,
2108							'__DATA__' => _in_data_,
2109							);
2110
2111							my %is_list_end_type;
2112							@q = qw( ; { } );
2113							push @q, ',';
2114							@is_list_end_type{@q} = (1) x scalar(@q);
2115
2116							# original ref: camel 3 p 147,
2117							# but perl may accept undocumented flags
2118							# perl 5.10 adds 'p' (preserve)
2119							# Perl version 5.22 added 'n'
2120							# From http://perldoc.perl.org/perlop.html we have
2121							# /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
2122							# s/PATTERN/REPLACEMENT/msixpodualngcer
2123							# y/SEARCHLIST/REPLACEMENTLIST/cdsr
2124							# tr/SEARCHLIST/REPLACEMENTLIST/cdsr
2125							# qr/STRING/msixpodualn
2126							my %quote_modifiers = (
2127							's' => '[msixpodualngcer]',
2128							'y' => '[cdsr]',
2129							'tr' => '[cdsr]',
2130							'm' => '[msixpodualngc]',
2131							'qr' => '[msixpodualn]',
2132							'q' => EMPTY_STRING,
2133							'qq' => EMPTY_STRING,
2134							'qw' => EMPTY_STRING,
2135							'qx' => EMPTY_STRING,
2136							);
2137
2138							# table showing how many quoted things to look for after quote operator..
2139							# s, y, tr have 2 (pattern and replacement)
2140							# others have 1 (pattern only)
2141							my %quote_items = (
2142							's' => 2,
2143							'y' => 2,
2144							'tr' => 2,
2145							'm' => 1,
2146							'qr' => 1,
2147							'q' => 1,
2148							'qq' => 1,
2149							'qw' => 1,
2150							'qx' => 1,
2151							);
2152
2153							my %is_for_foreach;
2154							@q = qw(for foreach);
2155							@is_for_foreach{@q} = (1) x scalar(@q);
2156
2157							# These keywords may introduce blocks after parenthesized expressions,
2158							# in the form:
2159							# keyword ( .... ) { BLOCK }
2160							# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
2161							# NOTE for --use-feature=class: if ADJUST blocks eventually take a
2162							# parameter list, then ADJUST might need to be added to this list (see
2163							# perlclass.pod)
2164							my %is_blocktype_with_paren;
2165							@q =
2166							qw(if elsif unless while until for foreach switch case given when catch);
2167							@is_blocktype_with_paren{@q} = (1) x scalar(@q);
2168
2169							my %is_case_default;
2170							@q = qw(case default);
2171							@is_case_default{@q} = (1) x scalar(@q);
2172
2173							#------------------------
2174							# end of tokenizer hashes
2175							#------------------------
2176
2177							# ------------------------------------------------------------
2178							# beginning of various scanner interface routines
2179							# ------------------------------------------------------------
2180							sub scan_replacement_text {
2181
2182							# check for here-docs in replacement text invoked by
2183							# a substitution operator with executable modifier 'e'.
2184							#
2185							# given:
2186							# $replacement_text
2187							# return:
2188							# $rht = reference to any here-doc targets
2189	0			0	0	0	my ( $self, $replacement_text ) = @_;
2190
2191							# quick check
2192	0	0				0	return if ( $replacement_text !~ /<</ );
2193
2194	0					0	$self->write_logfile_entry(
2195							"scanning replacement text for here-doc targets\n");
2196
2197							# save the logger object for error messages
2198	0					0	my $logger_object = $self->[_logger_object_];
2199
2200							# save all lexical variables
2201	0					0	my $rstate = save_tokenizer_state();
2202	0					0	_decrement_count(); # avoid error check for multiple tokenizers
2203
2204							# make a new tokenizer
2205	0					0	my $tokenizer = Perl::Tidy::Tokenizer->new(
2206							source_object => \$replacement_text,
2207							logger_object => $logger_object,
2208							starting_line_number => $input_line_number,
2209							);
2210
2211							# scan the replacement text
2212	0					0	while ( $tokenizer->get_line() ) { }
2213
2214							# remove any here doc targets
2215	0					0	my $rht = undef;
2216	0	0				0	if ( $tokenizer->[_in_here_doc_] ) {
2217	0					0	$rht = [];
2218	0					0	push @{$rht},
	0					0
2219							[
2220							$tokenizer->[_here_doc_target_],
2221							$tokenizer->[_here_quote_character_]
2222							];
2223	0	0				0	if ( $tokenizer->[_rhere_target_list_] ) {
2224	0					0	push @{$rht}, @{ $tokenizer->[_rhere_target_list_] };
	0					0
	0					0
2225	0					0	$tokenizer->[_rhere_target_list_] = undef;
2226							}
2227	0					0	$tokenizer->[_in_here_doc_] = undef;
2228							}
2229
2230							# now its safe to report errors
2231	0					0	my $severe_error = $tokenizer->report_tokenization_errors();
2232
2233							# TODO: Could propagate a severe error up
2234
2235							# restore all tokenizer lexical variables
2236	0					0	restore_tokenizer_state($rstate);
2237
2238							# return the here doc targets
2239	0					0	return $rht;
2240							} ## end sub scan_replacement_text
2241
2242							sub scan_bare_identifier {
2243	1674			1674	0	3031	my $self = shift;
2244	1674					5209	( $i, $tok, $type, $prototype ) =
2245							$self->scan_bare_identifier_do( $input_line, $i, $tok, $type,
2246							$prototype, $rtoken_map, $max_token_index );
2247	1674					3444	return;
2248							} ## end sub scan_bare_identifier
2249
2250							sub scan_identifier {
2251
2252	486			486	0	913	my $self = shift;
2253
2254							(
2255	486					1975	$i, $tok, $type, $id_scan_state, $identifier,
2256							my $split_pretoken_flag
2257							)
2258							= $self->scan_complex_identifier( $i, $id_scan_state, $identifier,
2259							$rtokens, $max_token_index, $expecting,
2260							$rparen_type->[$paren_depth] );
2261
2262							# Check for signal to fix a special variable adjacent to a keyword,
2263							# such as '$^One$0'.
2264	486	100				1492	if ($split_pretoken_flag) {
2265
2266							# Try to fix it by splitting the pretoken
2267	3	50	33			41	if ( $i > 0
			33
2268							&& $rtokens->[ $i - 1 ] eq '^'
2269							&& $self->split_pretoken(1) )
2270							{
2271	3					9	$identifier = substr( $identifier, 0, 3 );
2272	3					5	$tok = $identifier;
2273							}
2274							else {
2275
2276							# This shouldn't happen ...
2277	0					0	my $var = substr( $tok, 0, 3 );
2278	0					0	my $excess = substr( $tok, 3 );
2279	0					0	$self->interrupt_logfile();
2280	0					0	$self->warning(<<EOM);
2281							$input_line_number: Trouble parsing at characters '$excess' after special variable '$var'.
2282							A space may be needed after '$var'.
2283							EOM
2284	0					0	$self->resume_logfile();
2285							}
2286							}
2287	486					958	return;
2288							} ## end sub scan_identifier
2289
2290	39			39		403	use constant VERIFY_FASTSCAN => 0;
	39					141
	39					5185
2291							my %fast_scan_context;
2292
2293							BEGIN {
2294	39			39		49293	%fast_scan_context = (
2295							'$' => SCALAR_CONTEXT,
2296							'*' => SCALAR_CONTEXT,
2297							'@' => LIST_CONTEXT,
2298							'%' => LIST_CONTEXT,
2299							'&' => UNKNOWN_CONTEXT,
2300							);
2301							} ## end BEGIN
2302
2303							sub scan_simple_identifier {
2304
2305							# This is a wrapper for sub scan_identifier. It does a fast preliminary
2306							# scan for certain common identifiers:
2307							# '$var', '@var', %var, *var, &var, '@{...}', '%{...}'
2308							# If it does not find one of these, or this is a restart, it calls the
2309							# original scanner directly.
2310
2311							# This gives the same results as the full scanner in about 1/4 the
2312							# total runtime for a typical input stream.
2313
2314							# Notation:
2315							# $var * 2
2316							# ^^ ^
2317							# \|\| \|
2318							# \|\| ---- $i_next [= next nonblank pretoken ]
2319							# \|----$i_plus_1 [= a bareword ]
2320							# ---$i_begin [= a sigil]
2321
2322	4793			4793	0	7352	my $self = shift;
2323
2324	4793					7288	my $i_begin = $i;
2325	4793					7126	my $tok_begin = $tok;
2326	4793					7882	my $i_plus_1 = $i + 1;
2327	4793					7137	my $fast_scan_type;
2328
2329							#-------------------------------------------------------
2330							# Do full scan for anything following a pointer, such as
2331							# $cref->&*; # a postderef
2332							#-------------------------------------------------------
2333	4793	100	66			27980	if ( $last_nonblank_token eq '->' ) {
		100	66
		50	33
			0
			33
2334
2335							}
2336
2337							#------------------------------
2338							# quick scan with leading sigil
2339							#------------------------------
2340							elsif ( !$id_scan_state
2341							&& $i_plus_1 <= $max_token_index
2342							&& $fast_scan_context{$tok} )
2343							{
2344	4680					8227	$context = $fast_scan_context{$tok};
2345
2346							# look for $var, @var, ...
2347	4680	100	100			11395	if ( $rtoken_type->[$i_plus_1] eq 'w' ) {
		100	66
2348	4392					7101	my $pretype_next = EMPTY_STRING;
2349	4392	100				9430	if ( $i_plus_1 < $max_token_index ) {
2350	4276					6741	my $i_next = $i_plus_1 + 1;
2351	4276	100	100			13227	if ( $rtoken_type->[$i_next] eq 'b'
2352							&& $i_next < $max_token_index )
2353							{
2354	1708					2986	$i_next += 1;
2355							}
2356	4276					7393	$pretype_next = $rtoken_type->[$i_next];
2357							}
2358	4392	100	100			15824	if ( $pretype_next ne ':' && $pretype_next ne "'" ) {
2359
2360							# Found type 'i' like '$var', '@var', or '%var'
2361	4284					8325	$identifier = $tok . $rtokens->[$i_plus_1];
2362	4284					6609	$tok = $identifier;
2363	4284					6995	$type = 'i';
2364	4284					6198	$i = $i_plus_1;
2365	4284					7347	$fast_scan_type = $type;
2366							}
2367							}
2368
2369							# Look for @{ or %{ .
2370							# But we must let the full scanner handle things ${ because it may
2371							# keep going to get a complete identifier like '${#}' .
2372							elsif (
2373							$rtoken_type->[$i_plus_1] eq '{'
2374							&& ( $tok_begin eq '@'
2375							\|\| $tok_begin eq '%' )
2376							)
2377							{
2378
2379	30					76	$identifier = $tok;
2380	30					62	$type = 't';
2381	30					53	$fast_scan_type = $type;
2382							}
2383							else {
2384							## out of tricks
2385							}
2386							}
2387
2388							#---------------------------
2389							# Quick scan with leading ->
2390							# Look for ->[ and ->{
2391							#---------------------------
2392							elsif (
2393							$tok eq '->'
2394							&& $i < $max_token_index
2395							&& ( $rtokens->[$i_plus_1] eq '{'
2396							\|\| $rtokens->[$i_plus_1] eq '[' )
2397							)
2398							{
2399	0					0	$type = $tok;
2400	0					0	$fast_scan_type = $type;
2401	0					0	$identifier = $tok;
2402	0					0	$context = UNKNOWN_CONTEXT;
2403							}
2404							else {
2405							## out of tricks
2406							}
2407
2408							#--------------------------------------
2409							# Verify correctness during development
2410							#--------------------------------------
2411	4793					6692	if ( VERIFY_FASTSCAN && $fast_scan_type ) {
2412
2413							# We will call the full method
2414							my $identifier_simple = $identifier;
2415							my $tok_simple = $tok;
2416							my $i_simple = $i;
2417							my $context_simple = $context;
2418
2419							$tok = $tok_begin;
2420							$i = $i_begin;
2421							$self->scan_identifier();
2422
2423							if ( $tok ne $tok_simple
2424							\|\| $type ne $fast_scan_type
2425							\|\| $i != $i_simple
2426							\|\| $identifier ne $identifier_simple
2427							\|\| $id_scan_state
2428							\|\| $context ne $context_simple )
2429							{
2430							print {*STDERR} <<EOM;
2431							scan_simple_identifier differs from scan_identifier:
2432							simple: i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple
2433							full: i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state
2434							EOM
2435							}
2436							}
2437
2438							#-------------------------------------------------
2439							# call full scanner if fast method did not succeed
2440							#-------------------------------------------------
2441	4793	100				10040	if ( !$fast_scan_type ) {
2442	479					1675	$self->scan_identifier();
2443							}
2444	4793					8359	return;
2445							} ## end sub scan_simple_identifier
2446
2447							sub method_ok_here {
2448
2449							# Return:
2450							# false if this is definitely an invalid method declaration
2451							# true otherwise (even if not sure)
2452
2453							# We are trying to avoid problems with old uses of 'method'
2454							# when --use-feature=class is set (rt145706).
2455							# For example, this should cause a return of 'false':
2456
2457							# method paint => sub {
2458							# return;
2459							# };
2460
2461	6			6	0	18	my $self = shift;
2462
2463							# from do_scan_sub:
2464	6					14	my $i_beg = $i + 1;
2465	6					13	my $pos_beg = $rtoken_map->[$i_beg];
2466	6					19	pos($input_line) = $pos_beg;
2467
2468							# TEST 1: look a valid sub NAME
2469	6	50				43	if (
2470							$input_line =~ m{\G\s*
2471							((?:\w(?:'\|::))) # package - something that ends in :: or '
2472							(\w+) # NAME - required
2473							}gcx
2474							)
2475							{
2476							# For possible future use..
2477	6					15	my $subname = $2;
2478	6	50				27	my $package = $1 ? $1 : EMPTY_STRING;
2479							}
2480							else {
2481	0					0	return;
2482							}
2483
2484							# TEST 2: look for invalid characters after name, such as here:
2485							# method paint => sub {
2486							# ...
2487							# }
2488	6					14	my $next_char = EMPTY_STRING;
2489	6	100				31	if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
	5					14
2490	6	100	66			38	if ( !$next_char \|\| $next_char eq '#' ) {
2491	1					5	( $next_char, my $i_next ) =
2492							$self->find_next_nonblank_token( $max_token_index,
2493							$rtokens, $max_token_index );
2494							}
2495
2496	6	50				21	if ( !$next_char ) {
2497
2498							# out of characters - give up
2499	0					0	return;
2500							}
2501
2502							# Possibly valid next token types:
2503							# '(' could start prototype or signature
2504							# ':' could start ATTRIBUTE
2505							# '{' cold start BLOCK
2506							# ';' or '}' could end a statement
2507	6	100				30	if ( $next_char !~ /^[\(\:\{\;\}]/ ) {
2508
2509							# This does not match use feature 'class' syntax
2510	3					13	return;
2511							}
2512
2513							# We will stop here and assume that this is valid syntax for
2514							# use feature 'class'.
2515	3					18	return 1;
2516							} ## end sub method_ok_here
2517
2518							sub class_ok_here {
2519
2520							# Return:
2521							# false if this is definitely an invalid class declaration
2522							# true otherwise (even if not sure)
2523
2524							# We are trying to avoid problems with old uses of 'class'
2525							# when --use-feature=class is set (rt145706). We look ahead
2526							# see if this use of 'class' is obviously inconsistent with
2527							# the syntax of use feature 'class'. This allows the default
2528							# setting --use-feature=class to work for old syntax too.
2529
2530							# Valid class declarations look like
2531							# class NAME ?ATTRS ?VERSION ?BLOCK
2532							# where ATTRS VERSION and BLOCK are optional
2533
2534							# For example, this should produce a return of 'false':
2535							#
2536							# class ExtendsBasicAttributes is BasicAttributes{
2537
2538	6			6	0	13	my $self = shift;
2539
2540							# TEST 1: class stmt can only go where a new statment can start
2541	6	50				15	if ( !new_statement_ok() ) { return }
	0					0
2542
2543	6					14	my $i_beg = $i + 1;
2544	6					12	my $pos_beg = $rtoken_map->[$i_beg];
2545	6					19	pos($input_line) = $pos_beg;
2546
2547							# TEST 2: look for a valid NAME
2548	6	50				36	if (
2549							$input_line =~ m{\G\s*
2550							((?:\w(?:'\|::))) # package - something that ends in :: or '
2551							(\w+) # NAME - required
2552							}gcx
2553							)
2554							{
2555							# For possible future use..
2556	6					17	my $subname = $2;
2557	6	100				20	my $package = $1 ? $1 : EMPTY_STRING;
2558							}
2559							else {
2560	0					0	return;
2561							}
2562
2563							# TEST 3: look for valid characters after NAME
2564	6					13	my $next_char = EMPTY_STRING;
2565	6	100				23	if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
	5					11
2566	6	100	66			29	if ( !$next_char \|\| $next_char eq '#' ) {
2567	1					12	( $next_char, my $i_next ) =
2568							$self->find_next_nonblank_token( $max_token_index,
2569							$rtokens, $max_token_index );
2570							}
2571	6	50				16	if ( !$next_char ) {
2572
2573							# out of characters - give up
2574	0					0	return;
2575							}
2576
2577							# Must see one of: ATTRIBUTE, VERSION, BLOCK, or end stmt
2578
2579							# Possibly valid next token types:
2580							# ':' could start ATTRIBUTE
2581							# '\d' could start VERSION
2582							# '{' cold start BLOCK
2583							# ';' could end a statement
2584							# '}' could end statement but would be strange
2585
2586	6	100				23	if ( $next_char !~ /^[\:\d\{\;\}]/ ) {
2587
2588							# This does not match use feature 'class' syntax
2589	2					10	return;
2590							}
2591
2592							# We will stop here and assume that this is valid syntax for
2593							# use feature 'class'.
2594	4					15	return 1;
2595							} ## end sub class_ok_here
2596
2597							sub scan_id {
2598	332			332	0	690	my $self = shift;
2599	332					1382	( $i, $tok, $type, $id_scan_state ) =
2600							$self->scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
2601							$id_scan_state, $max_token_index );
2602	332					802	return;
2603							} ## end sub scan_id
2604
2605							sub scan_number {
2606	629			629	0	1069	my $self = shift;
2607	629					949	my $number;
2608	629					1826	( $i, $type, $number ) =
2609							$self->scan_number_do( $input_line, $i, $rtoken_map, $type,
2610							$max_token_index );
2611	629					1512	return $number;
2612							} ## end sub scan_number
2613
2614	39			39		373	use constant VERIFY_FASTNUM => 0;
	39					100
	39					30580
2615
2616							sub scan_number_fast {
2617
2618							# This is a wrapper for sub scan_number. It does a fast preliminary
2619							# scan for a simple integer. It calls the original scan_number if it
2620							# does not find one.
2621
2622	2277			2277	0	3615	my $self = shift;
2623	2277					3611	my $i_begin = $i;
2624	2277					3574	my $tok_begin = $tok;
2625	2277					3251	my $number;
2626
2627							#---------------------------------
2628							# Quick check for (signed) integer
2629							#---------------------------------
2630
2631							# This will be the string of digits:
2632	2277					3723	my $i_d = $i;
2633	2277					3626	my $tok_d = $tok;
2634	2277					4030	my $typ_d = $rtoken_type->[$i_d];
2635
2636							# check for signed integer
2637	2277					3739	my $sign = EMPTY_STRING;
2638	2277	50	66			7009	if ( $typ_d ne 'd'
			66
			33
2639							&& ( $typ_d eq '+' \|\| $typ_d eq '-' )
2640							&& $i_d < $max_token_index )
2641							{
2642	343					581	$sign = $tok_d;
2643	343					625	$i_d++;
2644	343					646	$tok_d = $rtokens->[$i_d];
2645	343					647	$typ_d = $rtoken_type->[$i_d];
2646							}
2647
2648							# Handle integers
2649	2277	100	100			17023	if (
			100
2650							$typ_d eq 'd'
2651							&& (
2652							$i_d == $max_token_index
2653							\|\| ( $i_d < $max_token_index
2654							&& $rtoken_type->[ $i_d + 1 ] ne '.'
2655							&& $rtoken_type->[ $i_d + 1 ] ne 'w' )
2656							)
2657							)
2658							{
2659							# Let let full scanner handle multi-digit integers beginning with
2660							# '0' because there could be error messages. For example, '009' is
2661							# not a valid number.
2662
2663	1715	100	100			7373	if ( $tok_d eq '0' \|\| substr( $tok_d, 0, 1 ) ne '0' ) {
2664	1658					3124	$number = $sign . $tok_d;
2665	1658					2696	$type = 'n';
2666	1658					4018	$i = $i_d;
2667							}
2668							}
2669
2670							#--------------------------------------
2671							# Verify correctness during development
2672							#--------------------------------------
2673	2277					3305	if ( VERIFY_FASTNUM && defined($number) ) {
2674
2675							# We will call the full method
2676							my $type_simple = $type;
2677							my $i_simple = $i;
2678							my $number_simple = $number;
2679
2680							$tok = $tok_begin;
2681							$i = $i_begin;
2682							$number = $self->scan_number();
2683
2684							if ( $type ne $type_simple
2685							\|\| ( $i != $i_simple && $i <= $max_token_index )
2686							\|\| $number ne $number_simple )
2687							{
2688							print {*STDERR} <<EOM;
2689							scan_number_fast differs from scan_number:
2690							simple: i=$i_simple, type=$type_simple, number=$number_simple
2691							full: i=$i, type=$type, number=$number
2692							EOM
2693							}
2694							}
2695
2696							#----------------------------------------
2697							# call full scanner if may not be integer
2698							#----------------------------------------
2699	2277	100				5476	if ( !defined($number) ) {
2700	619					1682	$number = $self->scan_number();
2701							}
2702	2277					5480	return $number;
2703							} ## end sub scan_number_fast
2704
2705							# a sub to warn if token found where term expected
2706							sub error_if_expecting_TERM {
2707	9			9	0	21	my $self = shift;
2708	9	50				43	if ( $expecting == TERM ) {
2709	9	50				37	if ( $really_want_term{$last_nonblank_type} ) {
2710	0					0	$self->report_unexpected( $tok, "term", $i_tok,
2711							$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line );
2712	0					0	return 1;
2713							}
2714							}
2715	9					21	return;
2716							} ## end sub error_if_expecting_TERM
2717
2718							# a sub to warn if token found where operator expected
2719							sub error_if_expecting_OPERATOR {
2720	769			769	0	1631	my ( $self, $thing ) = @_;
2721	769	50				1829	if ( $expecting == OPERATOR ) {
2722	0	0				0	if ( !defined($thing) ) { $thing = $tok }
	0					0
2723	0					0	$self->report_unexpected( $thing, "operator", $i_tok,
2724							$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line );
2725	0	0				0	if ( $i_tok == 0 ) {
2726	0					0	$self->interrupt_logfile();
2727	0					0	$self->warning("Missing ';' or ',' above?\n");
2728	0					0	$self->resume_logfile();
2729							}
2730	0					0	return 1;
2731							}
2732	769					1491	return;
2733							} ## end sub error_if_expecting_OPERATOR
2734
2735							# ------------------------------------------------------------
2736							# end scanner interfaces
2737							# ------------------------------------------------------------
2738
2739							#------------------
2740							# Tokenization subs
2741							#------------------
2742							sub do_GREATER_THAN_SIGN {
2743
2744	31			31	0	92	my $self = shift;
2745
2746							# '>'
2747	31	50				118	$self->error_if_expecting_TERM()
2748							if ( $expecting == TERM );
2749	31					76	return;
2750							} ## end sub do_GREATER_THAN_SIGN
2751
2752							sub do_VERTICAL_LINE {
2753
2754	4			4	0	9	my $self = shift;
2755
2756							# '\|'
2757	4	50				19	$self->error_if_expecting_TERM()
2758							if ( $expecting == TERM );
2759	4					11	return;
2760							} ## end sub do_VERTICAL_LINE
2761
2762							# An identifier in possible indirect object location followed by any of
2763							# these tokens: -> , ; } (plus others) is not an indirect object. Fix c257.
2764							my %Z_test_hash;
2765
2766							BEGIN {
2767	39			39		468	my @qZ = qw#
2768							-> ; } ) ]
2769							=> =~ = == !~ \|\| >= != *= .. && \|= .= -= += <= %=
2770							^= &&= \|\|= //= <=>
2771							#;
2772	39					169	push @qZ, ',';
2773	39					354211	@{Z_test_hash}{@qZ} = (1) x scalar(@qZ);
2774							}
2775
2776							sub do_DOLLAR_SIGN {
2777
2778	4038			4038	0	7027	my $self = shift;
2779
2780							# '$'
2781							# start looking for a scalar
2782	4038	50				9222	$self->error_if_expecting_OPERATOR("Scalar")
2783							if ( $expecting == OPERATOR );
2784	4038					12372	$self->scan_simple_identifier();
2785
2786	4038	100				9436	if ( $identifier eq '$^W' ) {
2787	1					4	$self->[_saw_perl_dash_w_] = 1;
2788							}
2789
2790							# Check for identifier in indirect object slot
2791							# (vorboard.pl, sort.t). Something like:
2792							# /^(print\|printf\|sort\|exec\|system)$/
2793	4038	100	66			32279	if (
			100
			100
			66
			66
2794							$is_indirect_object_taker{$last_nonblank_token}
2795							&& $last_nonblank_type eq 'k'
2796							\|\| ( ( $last_nonblank_token eq '(' )
2797							&& $is_indirect_object_taker{ $rparen_type->[$paren_depth] } )
2798							\|\| ( $last_nonblank_type eq 'w'
2799							\|\| $last_nonblank_type eq 'U' ) # possible object
2800							)
2801							{
2802
2803							# An identifier followed by '->' is not indirect object;
2804							# fixes b1175, b1176. Fix c257: Likewise for other tokens like
2805							# comma, semicolon, closing brace, and single space.
2806	98					683	my ( $next_nonblank_token, $i_next ) =
2807							$self->find_next_noncomment_token( $i, $rtokens,
2808							$max_token_index );
2809	98	100				435	$type = 'Z' if ( !$Z_test_hash{$next_nonblank_token} );
2810							}
2811	4038					6741	return;
2812							} ## end sub do_DOLLAR_SIGN
2813
2814							sub do_LEFT_PARENTHESIS {
2815
2816	2126			2126	0	4074	my $self = shift;
2817
2818							# '('
2819	2126					3498	++$paren_depth;
2820
2821							# variable to enable check for brace after closing paren (c230)
2822	2126					3846	my $want_brace = EMPTY_STRING;
2823
2824	2126	100				6613	if ($want_paren) {
		100
2825	240					591	$container_type = $want_paren;
2826	240					585	$want_brace = $want_paren;
2827	240					492	$want_paren = EMPTY_STRING;
2828							}
2829							elsif ( $statement_type =~ /^sub\b/ ) {
2830	14					35	$container_type = $statement_type;
2831							}
2832							else {
2833	1872					3370	$container_type = $last_nonblank_token;
2834
2835							# We can check for a syntax error here of unexpected '(',
2836							# but this is going to get messy...
2837	1872	100	100			7495	if (
2838							$expecting == OPERATOR
2839
2840							# Be sure this is not a method call of the form
2841							# &method(...), $method->(..), &{method}(...),
2842							# $ref[2](list) is ok & short for $ref[2]->(list)
2843							# NOTE: at present, braces in something like &{ xxx }
2844							# are not marked as a block, we might have a method call.
2845							# Added ')' to fix case c017, something like ()()()
2846							&& $last_nonblank_token !~ /^(?:[\]\}\)\&]\|\-\>)/
2847							)
2848							{
2849
2850							# ref: camel 3 p 703.
2851	3	50				12	if ( $last_last_nonblank_token eq 'do' ) {
2852	0					0	$self->complain(
2853							"do SUBROUTINE is deprecated; consider & or -> notation\n"
2854							);
2855							}
2856							else {
2857
2858							# if this is an empty list, (), then it is not an
2859							# error; for example, we might have a constant pi and
2860							# invoke it with pi() or just pi;
2861	3					9	my ( $next_nonblank_token, $i_next ) =
2862							$self->find_next_nonblank_token( $i, $rtokens,
2863							$max_token_index );
2864
2865							# Patch for c029: give up error check if
2866							# a side comment follows
2867	3	50	33			19	if ( $next_nonblank_token ne ')'
2868							&& $next_nonblank_token ne '#' )
2869							{
2870	0					0	my $hint;
2871
2872	0					0	$self->error_if_expecting_OPERATOR('(');
2873
2874	0	0				0	if ( $last_nonblank_type eq 'C' ) {
		0
2875	0					0	$hint =
2876							"$last_nonblank_token has a void prototype\n";
2877							}
2878							elsif ( $last_nonblank_type eq 'i' ) {
2879	0	0	0			0	if ( $i_tok > 0
2880							&& $last_nonblank_token =~ /^\$/ )
2881							{
2882	0					0	$hint =
2883							"Do you mean '$last_nonblank_token->(' ?\n";
2884							}
2885							}
2886							else {
2887							## no hint
2888							}
2889	0	0				0	if ($hint) {
2890	0					0	$self->interrupt_logfile();
2891	0					0	$self->warning($hint);
2892	0					0	$self->resume_logfile();
2893							}
2894							} ## end if ( $next_nonblank_token...
2895							} ## end else [ if ( $last_last_nonblank_token...
2896							} ## end if ( $expecting == OPERATOR...
2897							}
2898
2899	2126					7126	( $type_sequence, $indent_flag ) =
2900							$self->increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
2901
2902							# propagate types down through nested parens
2903							# for example: the second paren in 'if ((' would be structural
2904							# since the first is.
2905
2906	2126	100				5623	if ( $last_nonblank_token eq '(' ) {
2907	61					240	$type = $last_nonblank_type;
2908							}
2909
2910							# We exclude parens as structural after a ',' because it
2911							# causes subtle problems with continuation indentation for
2912							# something like this, where the first 'or' will not get
2913							# indented.
2914							#
2915							# assert(
2916							# __LINE__,
2917							# ( not defined $check )
2918							# or ref $check
2919							# or $check eq "new"
2920							# or $check eq "old",
2921							# );
2922							#
2923							# Likewise, we exclude parens where a statement can start
2924							# because of problems with continuation indentation, like
2925							# these:
2926							#
2927							# ($firstline =~ /^#\!.*perl/)
2928							# and (print $File::Find::name, "\n")
2929							# and (return 1);
2930							#
2931							# (ref($usage_fref) =~ /CODE/)
2932							# ? &$usage_fref
2933							# : (&blast_usage, &blast_params, &blast_general_params);
2934
2935							else {
2936	2065					3606	$type = '{';
2937							}
2938
2939	2126	50				5182	if ( $last_nonblank_type eq ')' ) {
2940	0					0	$self->warning(
2941							"Syntax error? found token '$last_nonblank_type' then '('\n");
2942							}
2943
2944							# git #105: Copy container type and want-brace flag at ') (';
2945							# propagate the container type onward so that any subsequent brace gets
2946							# correctly marked. I have implemented this as a general rule, which
2947							# should be safe, but if necessary it could be restricted to certain
2948							# container statement types such as 'for'.
2949	2126	100				4882	if ( $last_nonblank_token eq ')' ) {
2950	1					3	my $rvars = $rparen_vars->[$paren_depth];
2951	1	50				5	if ( defined($rvars) ) {
2952	1					5	$container_type = $rparen_type->[$paren_depth];
2953	1					2	( my $type_lp, $want_brace ) = @{$rvars};
	1					8
2954							}
2955							}
2956
2957	2126					4378	$rparen_type->[$paren_depth] = $container_type;
2958	2126					5772	$rparen_vars->[$paren_depth] = [ $type, $want_brace ];
2959	2126					4062	$rparen_semicolon_count->[$paren_depth] = 0;
2960
2961	2126					3769	return;
2962
2963							} ## end sub do_LEFT_PARENTHESIS
2964
2965							sub do_RIGHT_PARENTHESIS {
2966
2967	2126			2126	0	4292	my $self = shift;
2968
2969							# ')'
2970	2126					7302	( $type_sequence, $indent_flag ) =
2971							$self->decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
2972
2973	2126					4775	my $rvars = $rparen_vars->[$paren_depth];
2974	2126	50				5588	if ( defined($rvars) ) {
2975	2126					3456	my ( $type_lp, $want_brace ) = @{$rvars};
	2126					4868
2976	2126	50	33			8624	if ( $type_lp && $type_lp eq '{' ) {
2977	2126					4039	$type = '}';
2978							}
2979							}
2980
2981	2126					3895	$container_type = $rparen_type->[$paren_depth];
2982
2983							# restore statement type as 'sub' at closing paren of a signature
2984							# so that a subsequent ':' is identified as an attribute
2985	2126	100				6463	if ( $container_type =~ /^sub\b/ ) {
2986	24					58	$statement_type = $container_type;
2987							}
2988
2989							# /^(for\|foreach)$/
2990	2126	100				5915	if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) {
2991	69					235	my $num_sc = $rparen_semicolon_count->[$paren_depth];
2992	69	50	66			439	if ( $num_sc > 0 && $num_sc != 2 ) {
2993	0					0	$self->warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
2994							}
2995							}
2996
2997	2126	50				5220	if ( $paren_depth > 0 ) { $paren_depth-- }
	2126					3343
2998	2126					4166	return;
2999							} ## end sub do_RIGHT_PARENTHESIS
3000
3001							sub do_COMMA {
3002
3003	3075			3075	0	5316	my $self = shift;
3004
3005							# ','
3006	3075	100	33			10751	if ( $last_nonblank_type eq ',' ) {
		50
3007	10					39	$self->complain("Repeated ','s \n");
3008							}
3009
3010							# Note that we have to check both token and type here because a
3011							# comma following a qw list can have last token='(' but type = 'q'
3012							elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) {
3013	0					0	$self->warning("Unexpected leading ',' after a '('\n");
3014							}
3015							else {
3016							## ok: no complaints needed
3017							}
3018
3019							# patch for operator_expected: note if we are in the list (use.t)
3020	3075	100				6436	if ( $statement_type eq 'use' ) { $statement_type = '_use' }
	6					15
3021	3075					4920	return;
3022
3023							} ## end sub do_COMMA
3024
3025							sub do_SEMICOLON {
3026
3027	2450			2450	0	4939	my $self = shift;
3028
3029							# ';'
3030	2450					4403	$context = UNKNOWN_CONTEXT;
3031	2450					3952	$statement_type = EMPTY_STRING;
3032	2450					4230	$want_paren = EMPTY_STRING;
3033
3034							# /^(for\|foreach)$/
3035	2450	100				7397	if ( $is_for_foreach{ $rparen_type->[$paren_depth] } )
3036							{ # mark ; in for loop
3037
3038							# Be careful: we do not want a semicolon such as the
3039							# following to be included:
3040							#
3041							# for (sort {strcoll($a,$b);} keys %investments) {
3042
3043	35	100	66			274	if ( $brace_depth == $rdepth_array->[PAREN][BRACE][$paren_depth]
3044							&& $square_bracket_depth ==
3045							$rdepth_array->[PAREN][SQUARE_BRACKET][$paren_depth] )
3046							{
3047
3048	34					78	$type = 'f';
3049	34					66	$rparen_semicolon_count->[$paren_depth]++;
3050							}
3051							}
3052	2450					4092	return;
3053							} ## end sub do_SEMICOLON
3054
3055							sub do_QUOTATION_MARK {
3056
3057	1125			1125	0	2302	my $self = shift;
3058
3059							# '"'
3060	1125	50				4079	$self->error_if_expecting_OPERATOR("String")
3061							if ( $expecting == OPERATOR );
3062	1125					1960	$in_quote = 1;
3063	1125					1914	$type = 'Q';
3064	1125					1846	$allowed_quote_modifiers = EMPTY_STRING;
3065	1125					1862	return;
3066							} ## end sub do_QUOTATION_MARK
3067
3068							sub do_APOSTROPHE {
3069
3070	1164			1164	0	2318	my $self = shift;
3071
3072							# "'"
3073	1164	50				2905	$self->error_if_expecting_OPERATOR("String")
3074							if ( $expecting == OPERATOR );
3075	1164					1931	$in_quote = 1;
3076	1164					1915	$type = 'Q';
3077	1164					1829	$allowed_quote_modifiers = EMPTY_STRING;
3078	1164					1832	return;
3079							} ## end sub do_APOSTROPHE
3080
3081							sub do_BACKTICK {
3082
3083	0			0	0	0	my $self = shift;
3084
3085							# '`'
3086	0	0				0	$self->error_if_expecting_OPERATOR("String")
3087							if ( $expecting == OPERATOR );
3088	0					0	$in_quote = 1;
3089	0					0	$type = 'Q';
3090	0					0	$allowed_quote_modifiers = EMPTY_STRING;
3091	0					0	return;
3092							} ## end sub do_BACKTICK
3093
3094							sub do_SLASH {
3095
3096	207			207	0	505	my $self = shift;
3097
3098							# '/'
3099	207					364	my $is_pattern;
3100
3101							# a pattern cannot follow certain keywords which take optional
3102							# arguments, like 'shift' and 'pop'. See also '?'.
3103	207	50	66			995	if (
		50
3104							$last_nonblank_type eq 'k'
3105							&& $is_keyword_rejecting_slash_as_pattern_delimiter{
3106							$last_nonblank_token}
3107							)
3108							{
3109	0					0	$is_pattern = 0;
3110							}
3111							elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess..
3112	0					0	my $msg;
3113	0					0	( $is_pattern, $msg ) =
3114							$self->guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
3115							$max_token_index );
3116
3117	0	0				0	if ($msg) {
3118	0					0	$self->write_diagnostics("DIVIDE:$msg\n");
3119	0					0	$self->write_logfile_entry($msg);
3120							}
3121							}
3122	207					435	else { $is_pattern = ( $expecting == TERM ) }
3123
3124	207	100				512	if ($is_pattern) {
3125	78					148	$in_quote = 1;
3126	78					170	$type = 'Q';
3127	78					149	$allowed_quote_modifiers = '[msixpodualngc]';
3128							}
3129							else { # not a pattern; check for a /= token
3130
3131	129	50				371	if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /=
3132	0					0	$i++;
3133	0					0	$tok = '/=';
3134	0					0	$type = $tok;
3135							}
3136
3137							#DEBUG - collecting info on what tokens follow a divide
3138							# for development of guessing algorithm
3139							## if (
3140							## $self->is_possible_numerator( $i, $rtokens,
3141							## $max_token_index ) < 0
3142							## )
3143							## {
3144							## $self->write_diagnostics("DIVIDE? $input_line\n");
3145							## }
3146							}
3147	207					412	return;
3148							} ## end sub do_SLASH
3149
3150							sub do_LEFT_CURLY_BRACKET {
3151
3152	1670			1670	0	3516	my $self = shift;
3153
3154							# '{'
3155							# if we just saw a ')', we will label this block with
3156							# its type. We need to do this to allow sub
3157							# code_block_type to determine if this brace starts a
3158							# code block or anonymous hash. (The type of a paren
3159							# pair is the preceding token, such as 'if', 'else',
3160							# etc).
3161	1670					3068	$container_type = EMPTY_STRING;
3162
3163							# ATTRS: for a '{' following an attribute list, reset
3164							# things to look like we just saw a sub name
3165							# Added 'package' (can be 'class') for --use-feature=class (rt145706)
3166	1670	100	100			16337	if ( substr( $statement_type, 0, 3 ) eq 'sub' ) {
		100	66
		50	33
		100
		50
3167	34					73	$last_nonblank_token = $statement_type;
3168	34					74	$last_nonblank_type = 'S'; # c250 change
3169	34					73	$statement_type = EMPTY_STRING;
3170							}
3171							elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) {
3172	4					6	$last_nonblank_token = $statement_type;
3173	4					8	$last_nonblank_type = 'P'; # c250 change
3174	4					6	$statement_type = EMPTY_STRING;
3175							}
3176
3177							# patch for SWITCH/CASE: hide these keywords from an immediately
3178							# following opening brace
3179							elsif ( ( $statement_type eq 'case' \|\| $statement_type eq 'when' )
3180							&& $statement_type eq $last_nonblank_token )
3181							{
3182	0					0	$last_nonblank_token = ";";
3183							}
3184
3185							elsif ( $last_nonblank_token eq ')' ) {
3186	242					745	$last_nonblank_token = $rparen_type->[ $paren_depth + 1 ];
3187
3188							# defensive move in case of a nesting error (pbug.t)
3189							# in which this ')' had no previous '('
3190							# this nesting error will have been caught
3191	242	50				817	if ( !defined($last_nonblank_token) ) {
3192	0					0	$last_nonblank_token = 'if';
3193							}
3194
3195							# Syntax check at '){'
3196	242	100				818	if ( $is_blocktype_with_paren{$last_nonblank_token} ) {
3197
3198	228					596	my $rvars = $rparen_vars->[ $paren_depth + 1 ];
3199	228	50				756	if ( defined($rvars) ) {
3200	228					469	my ( $type_lp, $want_brace ) = @{$rvars};
	228					879
3201
3202							# OLD: Now verify that this is not a trailing form
3203							# FIX for git #124: we have to skip this check because
3204							# the 'gather' keyword of List::Gather can operate on
3205							# a full statement, so it isn't possible to be sure
3206							# this is a trailing form.
3207	228					466	if ( 0 && !$want_brace ) {
3208							$self->warning(
3209							"syntax error at ') {', unexpected '{' after closing ')' of a trailing '$last_nonblank_token'\n"
3210							);
3211							}
3212							}
3213							}
3214							else {
3215	14	50				63	if ($rOpts_extended_syntax) {
3216
3217							# we append a trailing () to mark this as an unknown
3218							# block type. This allows perltidy to format some
3219							# common extensions of perl syntax.
3220							# This is used by sub code_block_type
3221	14					60	$last_nonblank_token .= '()';
3222							}
3223							else {
3224	0					0	my $list =
3225							join( SPACE, sort keys %is_blocktype_with_paren );
3226	0					0	$self->warning(
3227							"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
3228							);
3229							}
3230							}
3231							}
3232
3233							# patch for paren-less for/foreach glitch, part 2.
3234							# see note below under 'qw'
3235							elsif ($last_nonblank_token eq 'qw'
3236							&& $is_for_foreach{$want_paren} )
3237							{
3238	0					0	$last_nonblank_token = $want_paren;
3239	0	0				0	if ( $last_last_nonblank_token eq $want_paren ) {
3240	0					0	$self->warning(
3241							"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
3242							);
3243
3244							}
3245	0					0	$want_paren = EMPTY_STRING;
3246							}
3247							else {
3248							## ok: not special
3249							}
3250
3251							# now identify which of the three possible types of
3252							# curly braces we have: hash index container, anonymous
3253							# hash reference, or code block.
3254
3255							# non-structural (hash index) curly brace pair
3256							# get marked 'L' and 'R'
3257	1670	100				4904	if ( is_non_structural_brace() ) {
3258	367					1403	$type = 'L';
3259
3260							# patch for SWITCH/CASE:
3261							# allow paren-less identifier after 'when'
3262							# if the brace is preceded by a space
3263	367	0	33			1459	if ( $statement_type eq 'when'
			33
			0
			0
3264							&& $last_nonblank_type eq 'i'
3265							&& $last_last_nonblank_type eq 'k'
3266							&& ( $i_tok == 0 \|\| $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
3267							{
3268	0					0	$type = '{';
3269	0					0	$block_type = $statement_type;
3270							}
3271							}
3272
3273							# code and anonymous hash have the same type, '{', but are
3274							# distinguished by 'block_type',
3275							# which will be blank for an anonymous hash
3276							else {
3277
3278	1303					4884	$block_type =
3279							$self->code_block_type( $i_tok, $rtokens, $rtoken_type,
3280							$max_token_index );
3281
3282							# patch to promote bareword type to function taking block
3283	1303	100	100			6136	if ( $block_type
			66
3284							&& $last_nonblank_type eq 'w'
3285							&& $last_nonblank_i >= 0 )
3286							{
3287	34	50				164	if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
3288							$routput_token_type->[$last_nonblank_i] =
3289	34	100				148	$is_grep_alias{$block_type} ? 'k' : 'G';
3290							}
3291							}
3292
3293							# patch for SWITCH/CASE: if we find a stray opening block brace
3294							# where we might accept a 'case' or 'when' block, then take it
3295	1303	100	100			5462	if ( $statement_type eq 'case'
3296							\|\| $statement_type eq 'when' )
3297							{
3298	38	100	66			221	if ( !$block_type \|\| $block_type eq '}' ) {
3299	4					7	$block_type = $statement_type;
3300							}
3301							}
3302							}
3303
3304	1670					3983	$rbrace_type->[ ++$brace_depth ] = $block_type;
3305
3306							# Patch for CLASS BLOCK definitions: do not update the package for the
3307							# current depth if this is a BLOCK type definition.
3308							# TODO: should make 'class' separate from 'package' and only do
3309							# this for 'class'
3310	1670	100				5596	$rbrace_package->[$brace_depth] = $current_package
3311							if ( substr( $block_type, 0, 8 ) ne 'package ' );
3312
3313	1670					3507	$rbrace_structural_type->[$brace_depth] = $type;
3314	1670					3221	$rbrace_context->[$brace_depth] = $context;
3315	1670					4958	( $type_sequence, $indent_flag ) =
3316							$self->increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3317	1670					3277	return;
3318							} ## end sub do_LEFT_CURLY_BRACKET
3319
3320							sub do_RIGHT_CURLY_BRACKET {
3321
3322	1670			1670	0	3481	my $self = shift;
3323
3324							# '}'
3325	1670					3769	$block_type = $rbrace_type->[$brace_depth];
3326	1670	100				4162	if ($block_type) { $statement_type = EMPTY_STRING }
	974					1918
3327	1670	100				4031	if ( defined( $rbrace_package->[$brace_depth] ) ) {
3328	1666					3232	$current_package = $rbrace_package->[$brace_depth];
3329							}
3330
3331							# can happen on brace error (caught elsewhere)
3332							else {
3333							}
3334	1670					5174	( $type_sequence, $indent_flag ) =
3335							$self->decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3336
3337	1670	100				5262	if ( $rbrace_structural_type->[$brace_depth] eq 'L' ) {
3338	367					848	$type = 'R';
3339							}
3340
3341							# propagate type information for 'do' and 'eval' blocks, and also
3342							# for smartmatch operator. This is necessary to enable us to know
3343							# if an operator or term is expected next.
3344	1670	100				4851	if ( $is_block_operator{$block_type} ) {
3345	83					203	$tok = $block_type;
3346							}
3347
3348	1670					2919	$context = $rbrace_context->[$brace_depth];
3349	1670	50				4220	if ( $brace_depth > 0 ) { $brace_depth--; }
	1670					2588
3350	1670					2846	return;
3351							} ## end sub do_RIGHT_CURLY_BRACKET
3352
3353							sub do_AMPERSAND {
3354
3355	126			126	0	334	my $self = shift;
3356
3357							# '&' = maybe sub call? start looking
3358							# We have to check for sub call unless we are sure we
3359							# are expecting an operator. This example from s2p
3360							# got mistaken as a q operator in an early version:
3361							# print BODY &q(<<'EOT');
3362	126	100				433	if ( $expecting != OPERATOR ) {
3363
3364							# But only look for a sub call if we are expecting a term or
3365							# if there is no existing space after the &.
3366							# For example we probably don't want & as sub call here:
3367							# Fcntl::S_IRUSR & $mode;
3368	107	100	66			444	if ( $expecting == TERM \|\| $next_type ne 'b' ) {
3369	104					320	$self->scan_simple_identifier();
3370							}
3371							}
3372							else {
3373							}
3374	126					291	return;
3375							} ## end sub do_AMPERSAND
3376
3377							sub do_LESS_THAN_SIGN {
3378
3379	29			29	0	86	my $self = shift;
3380
3381							# '<' - angle operator or less than?
3382	29	100				147	if ( $expecting != OPERATOR ) {
3383	8					51	( $i, $type ) =
3384							$self->find_angle_operator_termination( $input_line, $i,
3385							$rtoken_map, $expecting, $max_token_index );
3386
3387							## This message is not very helpful and quite confusing if the above
3388							## routine decided not to write a message with the line number.
3389							## if ( $type eq '<' && $expecting == TERM ) {
3390							## $self->error_if_expecting_TERM();
3391							## $self->interrupt_logfile();
3392							## $self->warning("Unterminated <> operator?\n");
3393							## $self->resume_logfile();
3394							## }
3395
3396							}
3397							else {
3398							}
3399	29					65	return;
3400							} ## end sub do_LESS_THAN_SIGN
3401
3402							sub do_QUESTION_MARK {
3403
3404	187			187	0	594	my $self = shift;
3405
3406							# '?' = conditional or starting pattern?
3407	187					455	my $is_pattern;
3408
3409							# Patch for rt #126965
3410							# a pattern cannot follow certain keywords which take optional
3411							# arguments, like 'shift' and 'pop'. See also '/'.
3412	187	100	66			1737	if (
		100
		100
3413							$last_nonblank_type eq 'k'
3414							&& $is_keyword_rejecting_question_as_pattern_delimiter{
3415							$last_nonblank_token}
3416							)
3417							{
3418	1					2	$is_pattern = 0;
3419							}
3420
3421							# patch for RT#131288, user constant function without prototype
3422							# last type is 'U' followed by ?.
3423							elsif ( $last_nonblank_type =~ /^[FUY]$/ ) {
3424	1					5	$is_pattern = 0;
3425							}
3426							elsif ( $expecting == UNKNOWN ) {
3427
3428							# In older versions of Perl, a bare ? can be a pattern
3429							# delimiter. In perl version 5.22 this was
3430							# dropped, but we have to support it in order to format
3431							# older programs. See:
3432							## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html
3433							# For example, the following line worked
3434							# at one time:
3435							# ?(.*)? && (print $1,"\n");
3436							# In current versions it would have to be written with slashes:
3437							# /(.*)/ && (print $1,"\n");
3438	11					32	my $msg;
3439	11					72	( $is_pattern, $msg ) =
3440							$self->guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
3441							$max_token_index );
3442
3443	11	50				49	if ($msg) { $self->write_logfile_entry($msg) }
	11					48
3444							}
3445	174					514	else { $is_pattern = ( $expecting == TERM ) }
3446
3447	187	50				573	if ($is_pattern) {
3448	0					0	$in_quote = 1;
3449	0					0	$type = 'Q';
3450	0					0	$allowed_quote_modifiers = '[msixpodualngc]';
3451							}
3452							else {
3453	187					735	( $type_sequence, $indent_flag ) =
3454							$self->increase_nesting_depth( QUESTION_COLON,
3455							$rtoken_map->[$i_tok] );
3456							}
3457	187					447	return;
3458							} ## end sub do_QUESTION_MARK
3459
3460							sub do_STAR {
3461
3462	238			238	0	538	my $self = shift;
3463
3464							# '*' = typeglob, or multiply?
3465	238	50	66			893	if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) {
3466	0	0	0			0	if ( $next_type ne 'b'
			0
3467							&& $next_type ne '('
3468							&& $next_type ne '#' ) # Fix c036
3469							{
3470	0					0	$expecting = TERM;
3471							}
3472							}
3473	238	100				636	if ( $expecting == TERM ) {
3474	21					88	$self->scan_simple_identifier();
3475							}
3476							else {
3477
3478	217	50				880	if ( $rtokens->[ $i + 1 ] eq '=' ) {
		100
3479	0					0	$tok = '*=';
3480	0					0	$type = $tok;
3481	0					0	$i++;
3482							}
3483							elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
3484	36					95	$tok = '**';
3485	36					86	$type = $tok;
3486	36					62	$i++;
3487	36	50				125	if ( $rtokens->[ $i + 1 ] eq '=' ) {
3488	0					0	$tok = '**=';
3489	0					0	$type = $tok;
3490	0					0	$i++;
3491							}
3492							}
3493							else {
3494							## not multiple characters
3495							}
3496							}
3497	238					439	return;
3498							} ## end sub do_STAR
3499
3500							sub do_DOT {
3501
3502	150			150	0	339	my $self = shift;
3503
3504							# '.' = what kind of . ?
3505	150	100				482	if ( $expecting != OPERATOR ) {
3506	10					40	$self->scan_number();
3507	10	100				45	if ( $type eq '.' ) {
3508	2	50				7	$self->error_if_expecting_TERM()
3509							if ( $expecting == TERM );
3510							}
3511							}
3512							else {
3513							}
3514	150					302	return;
3515							} ## end sub do_DOT
3516
3517							sub do_COLON {
3518
3519	271			271	0	714	my $self = shift;
3520
3521							# ':' = label, ternary, attribute, ?
3522
3523							# if this is the first nonblank character, call it a label
3524							# since perl seems to just swallow it
3525	271	50	66			3725	if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
		100	66
		100	66
		100	66
		100
3526	0					0	$type = 'J';
3527							}
3528
3529							# ATTRS: check for a ':' which introduces an attribute list
3530							# either after a 'sub' keyword or within a paren list
3531							# Added 'package' (can be 'class') for --use-feature=class (rt145706)
3532							elsif ( $statement_type =~ /^(sub\|package)\b/ ) {
3533	22					57	$type = 'A';
3534	22					50	$self->[_in_attribute_list_] = 1;
3535							}
3536
3537							# Within a signature, unless we are in a ternary. For example,
3538							# from 't/filter_example.t':
3539							# method foo4 ( $class: $bar ) { $class->bar($bar) }
3540							elsif ( $rparen_type->[$paren_depth] =~ /^sub\b/
3541							&& !is_balanced_closing_container(QUESTION_COLON) )
3542							{
3543	1					6	$type = 'A';
3544	1					3	$self->[_in_attribute_list_] = 1;
3545							}
3546
3547							# check for scalar attribute, such as
3548							# my $foo : shared = 1;
3549							elsif ($is_my_our_state{$statement_type}
3550							&& $rcurrent_depth->[QUESTION_COLON] == 0 )
3551							{
3552	15					38	$type = 'A';
3553	15					38	$self->[_in_attribute_list_] = 1;
3554							}
3555
3556							# Look for Switch::Plain syntax if an error would otherwise occur
3557							# here. Note that we do not need to check if the extended syntax
3558							# flag is set because otherwise an error would occur, and we would
3559							# then have to output a message telling the user to set the
3560							# extended syntax flag to avoid the error.
3561							# case 1: {
3562							# default: {
3563							# default:
3564							# Note that the line 'default:' will be parsed as a label elsewhere.
3565							elsif ( $is_case_default{$statement_type}
3566							&& !is_balanced_closing_container(QUESTION_COLON) )
3567							{
3568							# mark it as a perltidy label type
3569	46					100	$type = 'J';
3570							}
3571
3572							# otherwise, it should be part of a ?/: operator
3573							else {
3574	187					792	( $type_sequence, $indent_flag ) =
3575							$self->decrease_nesting_depth( QUESTION_COLON,
3576							$rtoken_map->[$i_tok] );
3577	187	50				1099	if ( $last_nonblank_token eq '?' ) {
3578	0					0	$self->warning("Syntax error near ? :\n");
3579							}
3580							}
3581	271					526	return;
3582							} ## end sub do_COLON
3583
3584							sub do_PLUS_SIGN {
3585
3586	227			227	0	533	my $self = shift;
3587
3588							# '+' = what kind of plus?
3589	227	100				896	if ( $expecting == TERM ) {
		100
3590	13					61	my $number = $self->scan_number_fast();
3591
3592							# unary plus is safest assumption if not a number
3593	13	50				52	if ( !defined($number) ) { $type = 'p'; }
	13					31
3594							}
3595							elsif ( $expecting == OPERATOR ) {
3596							}
3597							else {
3598	3	100				11	if ( $next_type eq 'w' ) { $type = 'p' }
	2					4
3599							}
3600	227					399	return;
3601							} ## end sub do_PLUS_SIGN
3602
3603							sub do_AT_SIGN {
3604
3605	438			438	0	1083	my $self = shift;
3606
3607							# '@' = sigil for array?
3608	438	50				1398	$self->error_if_expecting_OPERATOR("Array")
3609							if ( $expecting == OPERATOR );
3610	438					1729	$self->scan_simple_identifier();
3611	438					846	return;
3612							} ## end sub do_AT_SIGN
3613
3614							sub do_PERCENT_SIGN {
3615
3616	202			202	0	545	my $self = shift;
3617
3618							# '%' = hash or modulo?
3619							# first guess is hash if no following blank or paren
3620	202	50				763	if ( $expecting == UNKNOWN ) {
3621	0	0	0			0	if ( $next_type ne 'b' && $next_type ne '(' ) {
3622	0					0	$expecting = TERM;
3623							}
3624							}
3625	202	100				716	if ( $expecting == TERM ) {
3626	192					700	$self->scan_simple_identifier();
3627							}
3628	202					1509	return;
3629							} ## end sub do_PERCENT_SIGN
3630
3631							sub do_LEFT_SQUARE_BRACKET {
3632
3633	595			595	0	1309	my $self = shift;
3634
3635							# '['
3636	595					1421	$rsquare_bracket_type->[ ++$square_bracket_depth ] =
3637							$last_nonblank_token;
3638	595					2152	( $type_sequence, $indent_flag ) =
3639							$self->increase_nesting_depth( SQUARE_BRACKET,
3640							$rtoken_map->[$i_tok] );
3641
3642							# It may seem odd, but structural square brackets have
3643							# type '{' and '}'. This simplifies the indentation logic.
3644	595	100				2126	if ( !is_non_structural_brace() ) {
3645	288					762	$type = '{';
3646							}
3647	595					1449	$rsquare_bracket_structural_type->[$square_bracket_depth] = $type;
3648	595					1150	return;
3649							} ## end sub do_LEFT_SQUARE_BRACKET
3650
3651							sub do_RIGHT_SQUARE_BRACKET {
3652
3653	595			595	0	2223	my $self = shift;
3654
3655							# ']'
3656	595					2244	( $type_sequence, $indent_flag ) =
3657							$self->decrease_nesting_depth( SQUARE_BRACKET,
3658							$rtoken_map->[$i_tok] );
3659
3660	595	100				2297	if ( $rsquare_bracket_structural_type->[$square_bracket_depth] eq '{' )
3661							{
3662	288					578	$type = '}';
3663							}
3664
3665							# propagate type information for smartmatch operator. This is
3666							# necessary to enable us to know if an operator or term is expected
3667							# next.
3668	595	100				1777	if ( $rsquare_bracket_type->[$square_bracket_depth] eq '~~' ) {
3669	20					43	$tok = $rsquare_bracket_type->[$square_bracket_depth];
3670							}
3671
3672	595	50				1594	if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
	595					2032
3673	595					1040	return;
3674							} ## end sub do_RIGHT_SQUARE_BRACKET
3675
3676							sub do_MINUS_SIGN {
3677
3678	441			441	0	995	my $self = shift;
3679
3680							# '-' = what kind of minus?
3681	441	100	100			3020	if ( ( $expecting != OPERATOR )
		100
		100
3682							&& $is_file_test_operator{$next_tok} )
3683							{
3684	10					77	my ( $next_nonblank_token, $i_next ) =
3685							$self->find_next_nonblank_token( $i + 1, $rtokens,
3686							$max_token_index );
3687
3688							# check for a quoted word like "-w=>xx";
3689							# it is sufficient to just check for a following '='
3690	10	50				80	if ( $next_nonblank_token eq '=' ) {
3691	0					0	$type = 'm';
3692							}
3693							else {
3694	10					24	$i++;
3695	10					32	$tok .= $next_tok;
3696	10					57	$type = 'F';
3697							}
3698							}
3699							elsif ( $expecting == TERM ) {
3700	330					1070	my $number = $self->scan_number_fast();
3701
3702							# maybe part of bareword token? unary is safest
3703	330	100				928	if ( !defined($number) ) { $type = 'm'; }
	288					557
3704
3705							}
3706							elsif ( $expecting == OPERATOR ) {
3707							}
3708							else {
3709
3710	4	50				18	if ( $next_type eq 'w' ) {
3711	4					14	$type = 'm';
3712							}
3713							}
3714	441					760	return;
3715							} ## end sub do_MINUS_SIGN
3716
3717							sub do_CARAT_SIGN {
3718
3719	12			12	0	23	my $self = shift;
3720
3721							# '^'
3722							# check for special variables like ${^WARNING_BITS}
3723	12	100				40	if ( $expecting == TERM ) {
3724
3725	5	50	33			74	if ( $last_nonblank_token eq '{'
			33
3726							&& ( $next_tok !~ /^\d/ )
3727							&& ( $next_tok =~ /^\w/ ) )
3728							{
3729
3730	5	100				21	if ( $next_tok eq 'W' ) {
3731	1					2	$self->[_saw_perl_dash_w_] = 1;
3732							}
3733	5					15	$tok = $tok . $next_tok;
3734	5					12	$i = $i + 1;
3735	5					9	$type = 'w';
3736
3737							# Optional coding to try to catch syntax errors. This can
3738							# be removed if it ever causes incorrect warning messages.
3739							# The '{^' should be preceded by either by a type or '$#'
3740							# Examples:
3741							# $#{^CAPTURE} ok
3742							# *${^LAST_FH}{NAME} ok
3743							# @{^HOWDY} ok
3744							# $hash{^HOWDY} error
3745
3746							# Note that a type sigil '$' may be tokenized as 'Z'
3747							# after something like 'print', so allow type 'Z'
3748	5	0	33			19	if ( $last_last_nonblank_type ne 't'
			33
3749							&& $last_last_nonblank_type ne 'Z'
3750							&& $last_last_nonblank_token ne '$#' )
3751							{
3752	0					0	$self->warning("Possible syntax error near '{^'\n");
3753							}
3754							}
3755
3756							else {
3757	0	0				0	if ( !$self->error_if_expecting_TERM() ) {
3758
3759							# Something like this is valid but strange:
3760							# undef ^I;
3761	0					0	$self->complain("The '^' seems unusual here\n");
3762							}
3763							}
3764							}
3765	12					23	return;
3766							} ## end sub do_CARAT_SIGN
3767
3768							sub do_DOUBLE_COLON {
3769
3770	9			9	0	21	my $self = shift;
3771
3772							# '::' = probably a sub call
3773	9					25	$self->scan_bare_identifier();
3774	9					18	return;
3775							} ## end sub do_DOUBLE_COLON
3776
3777							sub do_LEFT_SHIFT {
3778
3779	7			7	0	28	my $self = shift;
3780
3781							# '<<' = maybe a here-doc?
3782	7	50				36	if ( $expecting != OPERATOR ) {
3783	7					28	my ( $found_target, $here_doc_target, $here_quote_character,
3784							$saw_error );
3785							(
3786	7					43	$found_target, $here_doc_target, $here_quote_character, $i,
3787							$saw_error
3788							)
3789							= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
3790							$max_token_index );
3791
3792	7	50				35	if ($found_target) {
		0
3793	7					19	push @{$rhere_target_list},
	7					27
3794							[ $here_doc_target, $here_quote_character ];
3795	7					26	$type = 'h';
3796	7	50				75	if ( length($here_doc_target) > 80 ) {
		50
		100
3797	0					0	my $truncated = substr( $here_doc_target, 0, 80 );
3798	0					0	$self->complain("Long here-target: '$truncated' ...\n");
3799							}
3800							elsif ( !$here_doc_target ) {
3801	0	0				0	$self->warning(
3802							'Use of bare << to mean <<"" is deprecated' . "\n" )
3803							if ( !$here_quote_character );
3804							}
3805							elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
3806	2					17	$self->complain(
3807							"Unconventional here-target: '$here_doc_target'\n");
3808							}
3809							else {
3810							## ok: nothing to complain about
3811							}
3812							}
3813							elsif ( $expecting == TERM ) {
3814	0	0				0	if ( !$saw_error ) {
3815
3816							# shouldn't happen..arriving here implies an error in
3817							# the logic in sub 'find_here_doc'
3818	0					0	if (DEVEL_MODE) {
3819							$self->Fault(<<EOM);
3820							Program bug; didn't find here doc target
3821							EOM
3822							}
3823							$self->warning(
3824	0					0	"Possible program error: didn't find here doc target\n"
3825							);
3826	0					0	$self->report_definite_bug();
3827							}
3828							}
3829
3830							# target not found, expecting == UNKNOWN
3831							else {
3832							# assume it is a shift
3833							}
3834							}
3835							else {
3836							}
3837	7					21	return;
3838							} ## end sub do_LEFT_SHIFT
3839
3840							sub do_NEW_HERE_DOC {
3841
3842							# '<<~' = a here-doc, new type added in v26
3843
3844	2			2	0	5	my $self = shift;
3845
3846							return
3847	2	50				9	if ( $i >= $max_token_index ); # here-doc not possible if end of line
3848	2	50				10	if ( $expecting != OPERATOR ) {
3849	2					5	my ( $found_target, $here_doc_target, $here_quote_character,
3850							$saw_error );
3851							(
3852	2					14	$found_target, $here_doc_target, $here_quote_character, $i,
3853							$saw_error
3854							)
3855							= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
3856							$max_token_index );
3857
3858	2	50				9	if ($found_target) {
		0
3859
3860	2	50				21	if ( length($here_doc_target) > 80 ) {
		50
3861	0					0	my $truncated = substr( $here_doc_target, 0, 80 );
3862	0					0	$self->complain("Long here-target: '$truncated' ...\n");
3863							}
3864							elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
3865	0					0	$self->complain(
3866							"Unconventional here-target: '$here_doc_target'\n");
3867							}
3868							else {
3869							## ok: nothing to complain about
3870							}
3871
3872							# Note that we put a leading space on the here quote
3873							# character indicate that it may be preceded by spaces
3874	2					7	$here_quote_character = SPACE . $here_quote_character;
3875	2					6	push @{$rhere_target_list},
	2					9
3876							[ $here_doc_target, $here_quote_character ];
3877	2					6	$type = 'h';
3878							}
3879
3880							# target not found ..
3881							elsif ( $expecting == TERM ) {
3882	0	0				0	if ( !$saw_error ) {
3883
3884							# shouldn't happen..arriving here implies an error in
3885							# the logic in sub 'find_here_doc'
3886	0					0	if (DEVEL_MODE) {
3887							$self->Fault(<<EOM);
3888							Program bug; didn't find here doc target
3889							EOM
3890							}
3891							$self->warning(
3892	0					0	"Possible program error: didn't find here doc target\n"
3893							);
3894	0					0	$self->report_definite_bug();
3895							}
3896							}
3897
3898							# Target not found, expecting==UNKNOWN
3899							else {
3900	0					0	$self->warning("didn't find here doc target after '<<~'\n");
3901							}
3902							}
3903							else {
3904	0					0	$self->error_if_expecting_OPERATOR();
3905							}
3906	2					9	return;
3907							} ## end sub do_NEW_HERE_DOC
3908
3909							sub do_POINTER {
3910
3911							# '->'
3912	887			887	0	1680	return;
3913							}
3914
3915							sub do_PLUS_PLUS {
3916
3917	46			46	0	178	my $self = shift;
3918
3919							# '++'
3920							# type = 'pp' for pre-increment, '++' for post-increment
3921	46	100				242	if ( $expecting == OPERATOR ) { $type = '++' }
	37	100				122
3922	7					17	elsif ( $expecting == TERM ) { $type = 'pp' }
3923
3924							# handle ( $expecting == UNKNOWN )
3925							else {
3926
3927							# look ahead ..
3928	2					18	my ( $next_nonblank_token, $i_next ) =
3929							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
3930
3931							# Fix for c042: look past a side comment
3932	2	50				23	if ( $next_nonblank_token eq '#' ) {
3933	0					0	( $next_nonblank_token, $i_next ) =
3934							$self->find_next_nonblank_token( $max_token_index,
3935							$rtokens, $max_token_index );
3936							}
3937
3938	2	50				16	if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
	0					0
3939							}
3940	46					98	return;
3941							} ## end sub do_PLUS_PLUS
3942
3943							sub do_FAT_COMMA {
3944
3945	1025			1025	0	1918	my $self = shift;
3946
3947							# '=>'
3948	1025	50				2439	if ( $last_nonblank_type eq $tok ) {
3949	0					0	$self->complain("Repeated '=>'s \n");
3950							}
3951
3952							# patch for operator_expected: note if we are in the list (use.t)
3953							# TODO: make version numbers a new token type
3954	1025	100				2393	if ( $statement_type eq 'use' ) { $statement_type = '_use' }
	18					44
3955	1025					1733	return;
3956							} ## end sub do_FAT_COMMA
3957
3958							sub do_MINUS_MINUS {
3959
3960	2			2	0	5	my $self = shift;
3961
3962							# '--'
3963							# type = 'mm' for pre-decrement, '--' for post-decrement
3964
3965	2	50				13	if ( $expecting == OPERATOR ) { $type = '--' }
	0	50				0
3966	2					6	elsif ( $expecting == TERM ) { $type = 'mm' }
3967
3968							# handle ( $expecting == UNKNOWN )
3969							else {
3970
3971							# look ahead ..
3972	0					0	my ( $next_nonblank_token, $i_next ) =
3973							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
3974
3975							# Fix for c042: look past a side comment
3976	0	0				0	if ( $next_nonblank_token eq '#' ) {
3977	0					0	( $next_nonblank_token, $i_next ) =
3978							$self->find_next_nonblank_token( $max_token_index,
3979							$rtokens, $max_token_index );
3980							}
3981
3982	0	0				0	if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
	0					0
3983							}
3984
3985	2					5	return;
3986							} ## end sub do_MINUS_MINUS
3987
3988							sub do_LOGICAL_AND {
3989
3990	58			58	0	123	my $self = shift;
3991
3992							# '&&'
3993	58	50	33			274	$self->error_if_expecting_TERM()
3994							if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015
3995	58					113	return;
3996							} ## end sub do_LOGICAL_AND
3997
3998							sub do_LOGICAL_OR {
3999
4000	74			74	0	181	my $self = shift;
4001
4002							# '\|\|'
4003	74	100	66			398	$self->error_if_expecting_TERM()
4004							if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015
4005	74					165	return;
4006							} ## end sub do_LOGICAL_OR
4007
4008							sub do_SLASH_SLASH {
4009
4010	10			10	0	20	my $self = shift;
4011
4012							# '//'
4013	10	100				35	$self->error_if_expecting_TERM()
4014							if ( $expecting == TERM );
4015	10					20	return;
4016							} ## end sub do_SLASH_SLASH
4017
4018							sub do_DIGITS {
4019
4020	1934			1934	0	3337	my $self = shift;
4021
4022							# 'd' = string of digits
4023	1934	50				4278	$self->error_if_expecting_OPERATOR("Number")
4024							if ( $expecting == OPERATOR );
4025
4026	1934					4857	my $number = $self->scan_number_fast();
4027	1934	50				4583	if ( !defined($number) ) {
4028
4029							# shouldn't happen - we should always get a number
4030	0					0	if (DEVEL_MODE) {
4031							$self->Fault(<<EOM);
4032							non-number beginning with digit--program bug
4033							EOM
4034							}
4035							$self->warning(
4036	0					0	"Unexpected error condition: non-number beginning with digit\n"
4037							);
4038	0					0	$self->report_definite_bug();
4039							}
4040	1934					3206	return;
4041							} ## end sub do_DIGITS
4042
4043							sub do_ATTRIBUTE_LIST {
4044
4045	39			39	0	98	my ( $self, $next_nonblank_token ) = @_;
4046
4047							# Called at a bareword encountered while in an attribute list
4048							# returns 'is_attribute':
4049							# true if attribute found
4050							# false if an attribute (continue parsing bareword)
4051
4052							# treat bare word followed by open paren like qw(
4053	39	100				113	if ( $next_nonblank_token eq '(' ) {
4054
4055							# For something like:
4056							# : prototype($$)
4057							# we should let do_scan_sub see it so that it can see
4058							# the prototype. All other attributes get parsed as a
4059							# quoted string.
4060	18	100				88	if ( $tok eq 'prototype' ) {
4061	2					7	$id_scan_state = 'prototype';
4062
4063							# start just after the word 'prototype'
4064	2					9	my $i_beg = $i + 1;
4065	2					22	( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
4066							{
4067							input_line => $input_line,
4068							i => $i,
4069							i_beg => $i_beg,
4070							tok => $tok,
4071							type => $type,
4072							rtokens => $rtokens,
4073							rtoken_map => $rtoken_map,
4074							id_scan_state => $id_scan_state,
4075							max_token_index => $max_token_index,
4076							}
4077							);
4078
4079							# If successful, mark as type 'q' to be consistent
4080							# with other attributes. Type 'w' would also work.
4081	2	50				14	if ( $i > $i_beg ) {
4082	2					5	$type = 'q';
4083	2					6	return 1;
4084							}
4085
4086							# If not successful, continue and parse as a quote.
4087							}
4088
4089							# All other attribute lists must be parsed as quotes
4090							# (see 'signatures.t' for good examples)
4091	16					76	$in_quote = $quote_items{'q'};
4092	16					48	$allowed_quote_modifiers = $quote_modifiers{'q'};
4093	16					34	$type = 'q';
4094	16					31	$quote_type = 'q';
4095	16					39	return 1;
4096							}
4097
4098							# handle bareword not followed by open paren
4099							else {
4100	21					57	$type = 'w';
4101	21					53	return 1;
4102							}
4103
4104							# attribute not found
4105	0					0	return;
4106							} ## end sub do_ATTRIBUTE_LIST
4107
4108							sub do_QUOTED_BAREWORD {
4109
4110	786			786	0	1507	my $self = shift;
4111
4112							# find type of a bareword followed by a '=>'
4113	786	100				4420	if ( $ris_constant->{$current_package}{$tok} ) {
		50
		50
4114	14					36	$type = 'C';
4115							}
4116							elsif ( $ris_user_function->{$current_package}{$tok} ) {
4117	0					0	$type = 'U';
4118	0					0	$prototype = $ruser_function_prototype->{$current_package}{$tok};
4119							}
4120							elsif ( $tok =~ /^v\d+$/ ) {
4121	0					0	$type = 'v';
4122	0					0	$self->report_v_string($tok);
4123							}
4124							else {
4125
4126							# Bareword followed by a fat comma - see 'git18.in'
4127							# If tok is something like 'x17' then it could
4128							# actually be operator x followed by number 17.
4129							# For example, here:
4130							# 123x17 => [ 792, 1224 ],
4131							# (a key of 123 repeated 17 times, perhaps not
4132							# what was intended). We will mark x17 as type
4133							# 'n' and it will be split. If the previous token
4134							# was also a bareword then it is not very clear is
4135							# going on. In this case we will not be sure that
4136							# an operator is expected, so we just mark it as a
4137							# bareword. Perl is a little murky in what it does
4138							# with stuff like this, and its behavior can change
4139							# over time. Something like
4140							# a x18 => [792, 1224], will compile as
4141							# a key with 18 a's. But something like
4142							# push @array, a x18;
4143							# is a syntax error.
4144	772	100	66			2730	if (
			33
			66
4145							$expecting == OPERATOR
4146							&& substr( $tok, 0, 1 ) eq 'x'
4147							&& ( length($tok) == 1
4148							\|\| substr( $tok, 1, 1 ) =~ /^\d/ )
4149							)
4150							{
4151	3					8	$type = 'n';
4152	3	50				12	if ( $self->split_pretoken(1) ) {
4153	3					7	$type = 'x';
4154	3					6	$tok = 'x';
4155							}
4156							}
4157							else {
4158
4159							# git #18
4160	769					1354	$type = 'w';
4161	769					1995	$self->error_if_expecting_OPERATOR();
4162							}
4163							}
4164	786					1284	return;
4165							} ## end sub do_QUOTED_BAREWORD
4166
4167							sub do_X_OPERATOR {
4168
4169	17			17	0	51	my $self = shift;
4170
4171	17	100				73	if ( $tok eq 'x' ) {
4172	15	50				74	if ( $rtokens->[ $i + 1 ] eq '=' ) { # x=
4173	0					0	$tok = 'x=';
4174	0					0	$type = $tok;
4175	0					0	$i++;
4176							}
4177							else {
4178	15					44	$type = 'x';
4179							}
4180							}
4181							else {
4182
4183							# Split a pretoken like 'x10' into 'x' and '10'.
4184							# Note: In previous versions of perltidy it was marked
4185							# as a number, $type = 'n', and fixed downstream by the
4186							# Formatter.
4187	2					5	$type = 'n';
4188	2	50				9	if ( $self->split_pretoken(1) ) {
4189	2					10	$type = 'x';
4190	2					16	$tok = 'x';
4191							}
4192							}
4193	17					33	return;
4194							} ## end sub do_X_OPERATOR
4195
4196							sub do_USE_CONSTANT {
4197
4198	16			16	0	42	my $self = shift;
4199
4200	16					62	$self->scan_bare_identifier();
4201	16					111	my ( $next_nonblank_tok2, $i_next2 ) =
4202							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4203
4204	16	50				111	if ($next_nonblank_tok2) {
4205
4206	16	100				91	if ( $is_keyword{$next_nonblank_tok2} ) {
4207
4208							# Assume qw is used as a quote and okay, as in:
4209							# use constant qw{ DEBUG 0 };
4210							# Not worth trying to parse for just a warning
4211
4212							# NOTE: This warning is deactivated because recent
4213							# versions of perl do not complain here, but
4214							# the coding is retained for reference.
4215	1					2	if ( 0 && $next_nonblank_tok2 ne 'qw' ) {
4216							$self->warning(
4217							"Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n"
4218							);
4219							}
4220							}
4221
4222							else {
4223	15					61	$ris_constant->{$current_package}{$next_nonblank_tok2} = 1;
4224							}
4225							}
4226	16					41	return;
4227							} ## end sub do_USE_CONSTANT
4228
4229							sub do_KEYWORD {
4230
4231	2644			2644	0	4465	my $self = shift;
4232
4233							# found a keyword - set any associated flags
4234	2644					4563	$type = 'k';
4235
4236							# Since for and foreach may not be followed immediately
4237							# by an opening paren, we have to remember which keyword
4238							# is associated with the next '('
4239							# Previously, before update c230 : if ( $is_for_foreach{$tok} ) {
4240							##(if elsif unless while until for foreach switch case given when catch)
4241	2644	100				7584	if ( $is_blocktype_with_paren{$tok} ) {
4242	396	100				1488	if ( new_statement_ok() ) {
4243	309					721	$want_paren = $tok;
4244							}
4245							}
4246
4247							# recognize 'use' statements, which are special
4248	2644	100	100			19373	if ( $is_use_require{$tok} ) {
		100	66
		100
		100
		100
		100
4249	175					359	$statement_type = $tok;
4250	175	50				511	$self->error_if_expecting_OPERATOR()
4251							if ( $expecting == OPERATOR );
4252							}
4253
4254							# remember my and our to check for trailing ": shared"
4255							elsif ( $is_my_our_state{$tok} ) {
4256	629					1358	$statement_type = $tok;
4257							}
4258
4259							# Check for misplaced 'elsif' and 'else', but allow isolated
4260							# else or elsif blocks to be formatted. This is indicated
4261							# by a last noblank token of ';'
4262							elsif ( $tok eq 'elsif' ) {
4263	29	50	66			221	if (
4264							$last_nonblank_token ne ';'
4265
4266							## !~ /^(if\|elsif\|unless)$/
4267							&& !$is_if_elsif_unless{$last_nonblank_block_type}
4268							)
4269							{
4270	0					0	$self->warning(
4271							"expecting '$tok' to follow one of 'if\|elsif\|unless'\n");
4272							}
4273							}
4274							elsif ( $tok eq 'else' ) {
4275
4276							# patched for SWITCH/CASE
4277	44	50	66			598	if (
			66
4278							$last_nonblank_token ne ';'
4279
4280							## !~ /^(if\|elsif\|unless\|case\|when)$/
4281							&& !$is_if_elsif_unless_case_when{$last_nonblank_block_type}
4282
4283							# patch to avoid an unwanted error message for
4284							# the case of a parenless 'case' (RT 105484):
4285							# switch ( 1 ) { case x { 2 } else { } }
4286							## !~ /^(if\|elsif\|unless\|case\|when)$/
4287							&& !$is_if_elsif_unless_case_when{$statement_type}
4288							)
4289							{
4290	0					0	$self->warning(
4291							"expecting '$tok' to follow one of 'if\|elsif\|unless\|case\|when'\n"
4292							);
4293							}
4294							}
4295
4296							# patch for SWITCH/CASE if 'case' and 'when are
4297							# treated as keywords. Also 'default' for Switch::Plain
4298							elsif ($tok eq 'when'
4299							\|\| $tok eq 'case'
4300							\|\| $tok eq 'default' )
4301							{
4302	56					111	$statement_type = $tok; # next '{' is block
4303							}
4304
4305							# feature 'err' was removed in Perl 5.10. So mark this as
4306							# a bareword unless an operator is expected (see c158).
4307							elsif ( $tok eq 'err' ) {
4308	1	50				5	if ( $expecting != OPERATOR ) { $type = 'w' }
	1					3
4309							}
4310							else {
4311							## no special treatment needed
4312							}
4313
4314	2644					4709	return;
4315							} ## end sub do_KEYWORD
4316
4317							sub do_QUOTE_OPERATOR {
4318
4319	202			202	0	434	my $self = shift;
4320
4321	202	50				674	if ( $expecting == OPERATOR ) {
4322
4323							# Be careful not to call an error for a qw quote
4324							# where a parenthesized list is allowed. For example,
4325							# it could also be a for/foreach construct such as
4326							#
4327							# foreach my $key qw\Uno Due Tres Quadro\ {
4328							# print "Set $key\n";
4329							# }
4330							#
4331
4332							# Or it could be a function call.
4333							# NOTE: Braces in something like &{ xxx } are not
4334							# marked as a block, we might have a method call.
4335							# &method(...), $method->(..), &{method}(...),
4336							# $ref[2](list) is ok & short for $ref[2]->(list)
4337							#
4338							# See notes in 'sub code_block_type' and
4339							# 'sub is_non_structural_brace'
4340
4341							my $paren_list_possible = $tok eq 'qw'
4342							&& ( $last_nonblank_token =~ /^([\]\}\&]\|\-\>)/
4343	0		0			0	\|\| $is_for_foreach{$want_paren} );
4344
4345	0	0				0	if ( !$paren_list_possible ) {
4346	0					0	$self->error_if_expecting_OPERATOR();
4347							}
4348							}
4349	202					528	$in_quote = $quote_items{$tok};
4350	202					537	$allowed_quote_modifiers = $quote_modifiers{$tok};
4351
4352							# All quote types are 'Q' except possibly qw quotes.
4353							# qw quotes are special in that they may generally be trimmed
4354							# of leading and trailing whitespace. So they are given a
4355							# separate type, 'q', unless requested otherwise.
4356	202	100	66			982	$type =
4357							( $tok eq 'qw' && $rOpts_trim_qw )
4358							? 'q'
4359							: 'Q';
4360	202					404	$quote_type = $type;
4361	202					394	return;
4362							} ## end sub do_QUOTE_OPERATOR
4363
4364							sub do_UNKNOWN_BAREWORD {
4365
4366	958			958	0	2322	my ( $self, $next_nonblank_token ) = @_;
4367
4368	958					3298	$self->scan_bare_identifier();
4369
4370	958	100	100			3608	if ( $statement_type eq 'use'
4371							&& $last_nonblank_token eq 'use' )
4372							{
4373	108					418	$rsaw_use_module->{$current_package}->{$tok} = 1;
4374							}
4375
4376	958	100				2590	if ( $type eq 'w' ) {
4377
4378	933	50				2499	if ( $expecting == OPERATOR ) {
4379
4380							# Patch to avoid error message for RPerl overloaded
4381							# operator functions: use overload
4382							# '+' => \&sse_add,
4383							# '-' => \&sse_sub,
4384							# '*' => \&sse_mul,
4385							# '/' => \&sse_div;
4386							# TODO: this could eventually be generalized
4387	0	0	0			0	if ( $rsaw_use_module->{$current_package}->{'RPerl'}
		0
		0
4388							&& $tok =~ /^sse_(mul\|div\|add\|sub)$/ )
4389							{
4390
4391							}
4392
4393							# Fix part 1 for git #63 in which a comment falls
4394							# between an -> and the following word. An
4395							# alternate fix would be to change operator_expected
4396							# to return an UNKNOWN for this type.
4397							elsif ( $last_nonblank_type eq '->' ) {
4398
4399							}
4400
4401							# don't complain about possible indirect object
4402							# notation.
4403							# For example:
4404							# package main;
4405							# sub new($) { ... }
4406							# $b = new A::; # calls A::new
4407							# $c = new A; # same thing but suspicious
4408							# This will call A::new but we have a 'new' in
4409							# main:: which looks like a constant.
4410							#
4411							elsif ( $last_nonblank_type eq 'C' ) {
4412	0	0				0	if ( $tok !~ /::$/ ) {
4413	0					0	$self->complain(<<EOM);
4414							Expecting operator after '$last_nonblank_token' but found bare word '$tok'
4415							Maybe indirectet object notation?
4416							EOM
4417							}
4418							}
4419							else {
4420	0					0	$self->error_if_expecting_OPERATOR("bareword");
4421							}
4422							}
4423
4424							# mark bare words immediately followed by a paren as
4425							# functions
4426	933					2502	$next_tok = $rtokens->[ $i + 1 ];
4427	933	100				2648	if ( $next_tok eq '(' ) {
4428
4429							# Patch for issue c151, where we are processing a snippet and
4430							# have not seen that SPACE is a constant. In this case 'x' is
4431							# probably an operator. The only disadvantage with an incorrect
4432							# guess is that the space after it may be incorrect. For example
4433							# $str .= SPACE x ( 16 - length($str) ); See also b1410.
4434	276	50	33			1510	if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' }
	0	50				0
4435
4436							# Fix part 2 for git #63. Leave type as 'w' to keep
4437							# the type the same as if the -> were not separated
4438	276					578	elsif ( $last_nonblank_type ne '->' ) { $type = 'U' }
4439
4440							# not a special case
4441							else { }
4442
4443							}
4444
4445							# underscore after file test operator is file handle
4446	933	50	66			3254	if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
4447	0					0	$type = 'Z';
4448							}
4449
4450							# patch for SWITCH/CASE if 'case' and 'when are
4451							# not treated as keywords:
4452	933	50	33			4873	if (
			33
			33
4453							( $tok eq 'case' && $rbrace_type->[$brace_depth] eq 'switch' )
4454							\|\| ( $tok eq 'when'
4455							&& $rbrace_type->[$brace_depth] eq 'given' )
4456							)
4457							{
4458	0					0	$statement_type = $tok; # next '{' is block
4459	0					0	$type = 'k'; # for keyword syntax coloring
4460							}
4461	933	100				2772	if ( $next_nonblank_token eq '(' ) {
4462
4463							# patch for SWITCH/CASE if switch and given not keywords
4464							# Switch is not a perl 5 keyword, but we will gamble
4465							# and mark switch followed by paren as a keyword. This
4466							# is only necessary to get html syntax coloring nice,
4467							# and does not commit this as being a switch/case.
4468	241	50	33			1981	if ( $tok eq 'switch' \|\| $tok eq 'given' ) {
		50	33
4469	0					0	$type = 'k'; # for keyword syntax coloring
4470							}
4471
4472							# mark 'x' as operator for something like this (see b1410)
4473							# my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths );
4474							elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) {
4475	0					0	$type = 'x';
4476							}
4477							else {
4478							## not a special case
4479							}
4480							}
4481							}
4482	958					1823	return;
4483							} ## end sub do_UNKNOWN_BAREWORD
4484
4485							sub sub_attribute_ok_here {
4486
4487	35			35	0	156	my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_;
4488
4489							# Decide if 'sub :' can be the start of a sub attribute list.
4490							# We will decide based on if the colon is followed by a
4491							# bareword which is not a keyword.
4492							# Changed inext+1 to inext to fixed case b1190.
4493	35					69	my $sub_attribute_ok_here;
4494	35	50	66			171	if ( $is_sub{$tok_kw}
			66
4495							&& $expecting != OPERATOR
4496							&& $next_nonblank_token eq ':' )
4497							{
4498	3					11	my ( $nn_nonblank_token, $i_nn ) =
4499							$self->find_next_nonblank_token( $i_next, $rtokens,
4500							$max_token_index );
4501							$sub_attribute_ok_here =
4502							$nn_nonblank_token =~ /^\w/
4503							&& $nn_nonblank_token !~ /^\d/
4504	3		66			77	&& !$is_keyword{$nn_nonblank_token};
4505							}
4506	35					218	return $sub_attribute_ok_here;
4507							} ## end sub sub_attribute_ok_here
4508
4509							sub do_BAREWORD {
4510
4511	5838			5838	0	10876	my ($self) = @_;
4512
4513							# handle a bareword token:
4514							# returns
4515							# true if this token ends the current line
4516							# false otherwise
4517
4518	5838					16564	my ( $next_nonblank_token, $i_next ) =
4519							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4520
4521							# a bare word immediately followed by :: is not a keyword;
4522							# use $tok_kw when testing for keywords to avoid a mistake
4523	5838					10977	my $tok_kw = $tok;
4524	5838	100	100			17244	if ( $rtokens->[ $i + 1 ] eq ':'
4525							&& $rtokens->[ $i + 2 ] eq ':' )
4526							{
4527	266					591	$tok_kw .= '::';
4528							}
4529
4530	5838	100				12963	if ( $self->[_in_attribute_list_] ) {
4531	39					221	my $is_attribute = $self->do_ATTRIBUTE_LIST($next_nonblank_token);
4532	39	50				131	return if ($is_attribute);
4533							}
4534
4535							#----------------------------------------
4536							# Starting final if-elsif- chain of tests
4537							#----------------------------------------
4538
4539							# This is the return flag:
4540							# true => this is the last token on the line
4541							# false => keep tokenizing the line
4542	5799					8544	my $is_last;
4543
4544							# The following blocks of code must update these vars:
4545							# $type - the final token type, must always be set
4546
4547							# In addition, if additional pretokens are added:
4548							# $tok - the final token
4549							# $i - the index of the last pretoken
4550
4551							# They may also need to check and set various flags
4552
4553							# Scan a bare word following a -> as an identifier; it could
4554							# have a long package name. Fixes c037, c041.
4555	5799	100	100			92091	if ( $last_nonblank_token eq '->' ) {
		100	66
		100	100
		100	66
		100	66
		100	100
		100	66
		50	66
		50	100
		100	100
		100	33
		100	0
		100	0
		100	33
		100	0
		100	0
		100	66
		100	100
			100
			100
			66
			66
			66
4556	670					2432	$self->scan_bare_identifier();
4557
4558							# a bareward after '->' gets type 'i'
4559	670					1245	$type = 'i';
4560							}
4561
4562							# Quote a word followed by => operator
4563							elsif (
4564							( $next_nonblank_token eq '=' && $rtokens->[ $i_next + 1 ] eq '>' )
4565
4566							# unless the word is __END__ or __DATA__ and is the only word on
4567							# the line.
4568							&& ( !defined( $is_END_DATA{$tok_kw} )
4569							\|\| $input_line !~ /^\s__(?:END\|DATA)__\s$/ )
4570							)
4571							{
4572	786					2431	$self->do_QUOTED_BAREWORD();
4573							}
4574
4575							# quote a bare word within braces..like xxx->{s}; note that we
4576							# must be sure this is not a structural brace, to avoid
4577							# mistaking {s} in the following for a quoted bare word:
4578							# for(@[){s}bla}BLA}
4579							# Also treat q in something like var{-q} as a bare word, not
4580							# a quote operator
4581							elsif (
4582							$next_nonblank_token eq '}'
4583							&& (
4584							$last_nonblank_type eq 'L'
4585							\|\| ( $last_nonblank_type eq 'm'
4586							&& $last_last_nonblank_type eq 'L' )
4587							)
4588							)
4589							{
4590	100					259	$type = 'w';
4591							}
4592
4593							# handle operator x (now we know it isn't $x=)
4594							elsif (
4595							$expecting == OPERATOR
4596							&& substr( $tok, 0, 1 ) eq 'x'
4597							&& ( length($tok) == 1
4598							\|\| substr( $tok, 1, 1 ) =~ /^\d/ )
4599							)
4600							{
4601	17					95	$self->do_X_OPERATOR();
4602							}
4603							elsif ( $tok_kw eq 'CORE::' ) {
4604	3					6	$type = $tok = $tok_kw;
4605	3					6	$i += 2;
4606							}
4607							elsif ( ( $tok eq 'strict' )
4608							and ( $last_nonblank_token eq 'use' ) )
4609							{
4610	14					51	$self->[_saw_use_strict_] = 1;
4611	14					96	$self->scan_bare_identifier();
4612							}
4613
4614							elsif ( ( $tok eq 'warnings' )
4615							and ( $last_nonblank_token eq 'use' ) )
4616							{
4617	7					28	$self->[_saw_perl_dash_w_] = 1;
4618
4619							# scan as identifier, so that we pick up something like:
4620							# use warnings::register
4621	7					48	$self->scan_bare_identifier();
4622							}
4623
4624							elsif (
4625							$tok eq 'AutoLoader'
4626							&& $self->[_look_for_autoloader_]
4627							&& (
4628							$last_nonblank_token eq 'use'
4629
4630							# these regexes are from AutoSplit.pm, which we want
4631							# to mimic
4632							\|\| $input_line =~ /^\s*(use\|require)\s+AutoLoader\b/
4633							\|\| $input_line =~ /\bISA\s=.\bAutoLoader\b/
4634							)
4635							)
4636							{
4637	0					0	$self->write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
4638	0					0	$self->[_saw_autoloader_] = 1;
4639	0					0	$self->[_look_for_autoloader_] = 0;
4640	0					0	$self->scan_bare_identifier();
4641							}
4642
4643							elsif (
4644							$tok eq 'SelfLoader'
4645							&& $self->[_look_for_selfloader_]
4646							&& ( $last_nonblank_token eq 'use'
4647							\|\| $input_line =~ /^\s*(use\|require)\s+SelfLoader\b/
4648							\|\| $input_line =~ /\bISA\s=.\bSelfLoader\b/ )
4649							)
4650							{
4651	0					0	$self->write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
4652	0					0	$self->[_saw_selfloader_] = 1;
4653	0					0	$self->[_look_for_selfloader_] = 0;
4654	0					0	$self->scan_bare_identifier();
4655							}
4656
4657							elsif ( ( $tok eq 'constant' )
4658							and ( $last_nonblank_token eq 'use' ) )
4659							{
4660	16					82	$self->do_USE_CONSTANT();
4661							}
4662
4663							# various quote operators
4664							elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
4665	202					840	$self->do_QUOTE_OPERATOR();
4666							}
4667
4668							# check for a statement label
4669							elsif (
4670							( $next_nonblank_token eq ':' )
4671							&& ( $rtokens->[ $i_next + 1 ] ne ':' )
4672							&& ( $i_next <= $max_token_index ) # colon on same line
4673
4674							# like 'sub : lvalue' ?
4675							&& !$self->sub_attribute_ok_here( $tok_kw, $next_nonblank_token,
4676							$i_next )
4677							&& new_statement_ok()
4678							)
4679							{
4680	33	100				177	if ( $tok !~ /[A-Z]/ ) {
4681	15					36	push @{ $self->[_rlower_case_labels_at_] }, $input_line_number;
	15					56
4682							}
4683	33					96	$type = 'J';
4684	33					92	$tok .= ':';
4685	33					87	$i = $i_next;
4686							}
4687
4688							# 'sub' or other sub alias
4689							elsif ( $is_sub{$tok_kw} ) {
4690
4691							# Update for --use-feature=class (rt145706):
4692							# We have to be extra careful to avoid misparsing other uses of
4693							# 'method' in older scripts.
4694	303	100	100			1544	if ( $tok_kw eq 'method' && $guess_if_method ) {
4695	10	100	66			113	if ( $expecting == OPERATOR
			100
4696							\|\| $next_nonblank_token !~ /^[\w\:]/
4697							\|\| !$self->method_ok_here() )
4698							{
4699	7					67	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
4700							}
4701							else {
4702	3					168	initialize_subname();
4703	3					18	$self->scan_id();
4704							}
4705							}
4706							else {
4707	293	50				875	$self->error_if_expecting_OPERATOR()
4708							if ( $expecting == OPERATOR );
4709	293					1116	initialize_subname();
4710	293					1110	$self->scan_id();
4711							}
4712							}
4713
4714							# 'package'
4715							elsif ( $is_package{$tok_kw} ) {
4716
4717							# Update for --use-feature=class (rt145706):
4718							# We have to be extra careful because 'class' may be used for other
4719							# purposes on older code; i.e.
4720							# class($x) - valid sub call
4721							# package($x) - error
4722	30	100				98	if ( $tok_kw eq 'class' ) {
4723	8	100	66			72	if ( $expecting == OPERATOR
			100
4724							\|\| $next_nonblank_token !~ /^[\w\:]/
4725							\|\| !$self->class_ok_here() )
4726							{
4727	4					15	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
4728							}
4729	4					10	else { $self->scan_id() }
4730							}
4731							else {
4732	22	50				373	$self->error_if_expecting_OPERATOR()
4733							if ( $expecting == OPERATOR );
4734	22					94	$self->scan_id();
4735							}
4736							}
4737
4738							# Fix for c035: split 'format' from 'is_format_END_DATA' to be
4739							# more restrictive. Require a new statement to be ok here.
4740							elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
4741	1					3	$type = ';'; # make tokenizer look for TERM next
4742	1					3	$self->[_in_format_] = 1;
4743	1					3	$is_last = 1; ## is last token on this line
4744							}
4745
4746							# Note on token types for format, __DATA__, __END__:
4747							# It simplifies things to give these type ';', so that when we
4748							# start rescanning we will be expecting a token of type TERM.
4749							# We will switch to type 'k' before outputting the tokens.
4750							elsif ( defined( $is_END_DATA{$tok_kw} ) ) {
4751	7					26	$type = ';'; # make tokenizer look for TERM next
4752
4753							# Remember that we are in one of these three sections
4754	7					25	$self->[ $is_END_DATA{$tok_kw} ] = 1;
4755	7					31	$is_last = 1; ## is last token on this line
4756							}
4757
4758							elsif ( $is_keyword{$tok_kw} ) {
4759	2644					8114	$self->do_KEYWORD();
4760							}
4761
4762							# check for inline label following
4763							# /^(redo\|last\|next\|goto)$/
4764							elsif (( $last_nonblank_type eq 'k' )
4765							&& ( $is_redo_last_next_goto{$last_nonblank_token} ) )
4766							{
4767	19					47	$type = 'j';
4768							}
4769
4770							# something else --
4771							else {
4772	947					3462	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
4773							}
4774
4775	5799					13062	return $is_last;
4776
4777							} ## end sub do_BAREWORD
4778
4779							sub do_FOLLOW_QUOTE {
4780
4781	2768			2768	0	4495	my $self = shift;
4782
4783							# Continue following a quote on a new line
4784	2768					4583	$type = $quote_type;
4785
4786	2768	100				3909	if ( !@{$routput_token_list} ) { # initialize if continuation line
	2768					6721
4787	184					376	push( @{$routput_token_list}, $i );
	184					406
4788	184					435	$routput_token_type->[$i] = $type;
4789
4790							}
4791
4792							# scan for the end of the quote or pattern
4793							(
4794	2768					8584	$i,
4795							$in_quote,
4796							$quote_character,
4797							$quote_pos,
4798							$quote_depth,
4799							$quoted_string_1,
4800							$quoted_string_2,
4801
4802							) = $self->do_quote(
4803
4804							$i,
4805							$in_quote,
4806							$quote_character,
4807							$quote_pos,
4808							$quote_depth,
4809							$quoted_string_1,
4810							$quoted_string_2,
4811							$rtokens,
4812							$rtoken_map,
4813							$max_token_index,
4814
4815							);
4816
4817							# all done if we didn't find it
4818	2768	100				6871	if ($in_quote) { return }
	183					358
4819
4820							# save pattern and replacement text for rescanning
4821	2585					4196	my $qs1 = $quoted_string_1;
4822
4823							# re-initialize for next search
4824	2585					3999	$quote_character = EMPTY_STRING;
4825	2585					3743	$quote_pos = 0;
4826	2585					3947	$quote_type = 'Q';
4827	2585					3781	$quoted_string_1 = EMPTY_STRING;
4828	2585					3895	$quoted_string_2 = EMPTY_STRING;
4829	2585	100				5710	if ( ++$i > $max_token_index ) { return }
	116					345
4830
4831							# look for any modifiers
4832	2469	100				5397	if ($allowed_quote_modifiers) {
4833
4834							# check for exact quote modifiers
4835	144	100				754	if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
4836	30					75	my $str = $rtokens->[$i];
4837	30					71	my $saw_modifier_e;
4838	30					502	while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
4839	47					116	my $pos = pos($str);
4840	47					124	my $char = substr( $str, $pos - 1, 1 );
4841	47		66			309	$saw_modifier_e \|\|= ( $char eq 'e' );
4842							}
4843
4844							# For an 'e' quote modifier we must scan the replacement
4845							# text for here-doc targets...
4846							# but if the modifier starts a new line we can skip
4847							# this because either the here doc will be fully
4848							# contained in the replacement text (so we can
4849							# ignore it) or Perl will not find it.
4850							# See test 'here2.in'.
4851	30	50	66			155	if ( $saw_modifier_e && $i_tok >= 0 ) {
4852
4853	0					0	my $rht = $self->scan_replacement_text($qs1);
4854
4855							# Change type from 'Q' to 'h' for quotes with
4856							# here-doc targets so that the formatter (see sub
4857							# process_line_of_CODE) will not make any line
4858							# breaks after this point.
4859	0	0				0	if ($rht) {
4860	0					0	push @{$rhere_target_list}, @{$rht};
	0					0
	0					0
4861	0					0	$type = 'h';
4862	0	0				0	if ( $i_tok < 0 ) {
4863	0					0	my $ilast = $routput_token_list->[-1];
4864	0					0	$routput_token_type->[$ilast] = $type;
4865							}
4866							}
4867							}
4868
4869	30	50				143	if ( defined( pos($str) ) ) {
4870
4871							# matched
4872	30	50				123	if ( pos($str) == length($str) ) {
4873	30	50				143	if ( ++$i > $max_token_index ) { return }
	0					0
4874							}
4875
4876							# Looks like a joined quote modifier
4877							# and keyword, maybe something like
4878							# s/xxx/yyy/gefor @k=...
4879							# Example is "galgen.pl". Would have to split
4880							# the word and insert a new token in the
4881							# pre-token list. This is so rare that I haven't
4882							# done it. Will just issue a warning citation.
4883
4884							# This error might also be triggered if my quote
4885							# modifier characters are incomplete
4886							else {
4887	0					0	$self->warning(<<EOM);
4888
4889							Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
4890							Please put a space between quote modifiers and trailing keywords.
4891							EOM
4892
4893							# print "token $rtokens->[$i]\n";
4894							# my $num = length($str) - pos($str);
4895							# $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
4896							# print "continuing with new token $rtokens->[$i]\n";
4897
4898							# skipping past this token does least damage
4899	0	0				0	if ( ++$i > $max_token_index ) { return }
	0					0
4900							}
4901							}
4902							else {
4903
4904							# example file: rokicki4.pl
4905							# This error might also be triggered if my quote
4906							# modifier characters are incomplete
4907	0					0	$self->write_logfile_entry(
4908							"Note: found word $str at quote modifier location\n");
4909							}
4910							}
4911
4912							# re-initialize
4913	144					292	$allowed_quote_modifiers = EMPTY_STRING;
4914							}
4915	2469					4340	return;
4916							} ## end sub do_FOLLOW_QUOTE
4917
4918							# ------------------------------------------------------------
4919							# begin hash of code for handling most token types
4920							# ------------------------------------------------------------
4921							my $tokenization_code = {
4922
4923							'>' => \&do_GREATER_THAN_SIGN,
4924							'\|' => \&do_VERTICAL_LINE,
4925							'$' => \&do_DOLLAR_SIGN,
4926							'(' => \&do_LEFT_PARENTHESIS,
4927							')' => \&do_RIGHT_PARENTHESIS,
4928							',' => \&do_COMMA,
4929							';' => \&do_SEMICOLON,
4930							'"' => \&do_QUOTATION_MARK,
4931							"'" => \&do_APOSTROPHE,
4932							'`' => \&do_BACKTICK,
4933							'/' => \&do_SLASH,
4934							'{' => \&do_LEFT_CURLY_BRACKET,
4935							'}' => \&do_RIGHT_CURLY_BRACKET,
4936							'&' => \&do_AMPERSAND,
4937							'<' => \&do_LESS_THAN_SIGN,
4938							'?' => \&do_QUESTION_MARK,
4939							'*' => \&do_STAR,
4940							'.' => \&do_DOT,
4941							':' => \&do_COLON,
4942							'+' => \&do_PLUS_SIGN,
4943							'@' => \&do_AT_SIGN,
4944							'%' => \&do_PERCENT_SIGN,
4945							'[' => \&do_LEFT_SQUARE_BRACKET,
4946							']' => \&do_RIGHT_SQUARE_BRACKET,
4947							'-' => \&do_MINUS_SIGN,
4948							'^' => \&do_CARAT_SIGN,
4949							'::' => \&do_DOUBLE_COLON,
4950							'<<' => \&do_LEFT_SHIFT,
4951							'<<~' => \&do_NEW_HERE_DOC,
4952							'->' => \&do_POINTER,
4953							'++' => \&do_PLUS_PLUS,
4954							'=>' => \&do_FAT_COMMA,
4955							'--' => \&do_MINUS_MINUS,
4956							'&&' => \&do_LOGICAL_AND,
4957							'\|\|' => \&do_LOGICAL_OR,
4958							'//' => \&do_SLASH_SLASH,
4959
4960							# No special code for these types yet, but syntax checks
4961							# could be added.
4962							## '!' => undef,
4963							## '!=' => undef,
4964							## '!~' => undef,
4965							## '%=' => undef,
4966							## '&&=' => undef,
4967							## '&=' => undef,
4968							## '+=' => undef,
4969							## '-=' => undef,
4970							## '..' => undef,
4971							## '..' => undef,
4972							## '...' => undef,
4973							## '.=' => undef,
4974							## '<<=' => undef,
4975							## '<=' => undef,
4976							## '<=>' => undef,
4977							## '<>' => undef,
4978							## '=' => undef,
4979							## '==' => undef,
4980							## '=~' => undef,
4981							## '>=' => undef,
4982							## '>>' => undef,
4983							## '>>=' => undef,
4984							## '\\' => undef,
4985							## '^=' => undef,
4986							## '\|=' => undef,
4987							## '\|\|=' => undef,
4988							## '//=' => undef,
4989							## '~' => undef,
4990							## '~~' => undef,
4991							## '!~~' => undef,
4992
4993							};
4994
4995							# ------------------------------------------------------------
4996							# end hash of code for handling individual token types
4997							# ------------------------------------------------------------
4998
4999	39			39		473	use constant DEBUG_TOKENIZE => 0;
	39					167
	39					128162
5000
5001							sub tokenize_this_line {
5002
5003							# This routine breaks a line of perl code into tokens which are of use in
5004							# indentation and reformatting. One of my goals has been to define tokens
5005							# such that a newline may be inserted between any pair of tokens without
5006							# changing or invalidating the program. This version comes close to this,
5007							# although there are necessarily a few exceptions which must be caught by
5008							# the formatter. Many of these involve the treatment of bare words.
5009							#
5010							# The tokens and their types are returned in arrays. See previous
5011							# routine for their names.
5012							#
5013							# See also the array "valid_token_types" in the BEGIN section for an
5014							# up-to-date list.
5015							#
5016							# To simplify things, token types are either a single character, or they
5017							# are identical to the tokens themselves.
5018							#
5019							# As a debugging aid, the -D flag creates a file containing a side-by-side
5020							# comparison of the input string and its tokenization for each line of a file.
5021							# This is an invaluable debugging aid.
5022							#
5023							# In addition to tokens, and some associated quantities, the tokenizer
5024							# also returns flags indication any special line types. These include
5025							# quotes, here_docs, formats.
5026							#
5027							# -----------------------------------------------------------------------
5028							#
5029							# How to add NEW_TOKENS:
5030							#
5031							# New token types will undoubtedly be needed in the future both to keep up
5032							# with changes in perl and to help adapt the tokenizer to other applications.
5033							#
5034							# Here are some notes on the minimal steps. I wrote these notes while
5035							# adding the 'v' token type for v-strings, which are things like version
5036							# numbers 5.6.0, and ip addresses, and will use that as an example. ( You
5037							# can use your editor to search for the string "NEW_TOKENS" to find the
5038							# appropriate sections to change):
5039							#
5040							# *. Try to talk somebody else into doing it! If not, ..
5041							#
5042							# *. Make a backup of your current version in case things don't work out!
5043							#
5044							# *. Think of a new, unused character for the token type, and add to
5045							# the array @valid_token_types in the BEGIN section of this package.
5046							# For example, I used 'v' for v-strings.
5047							#
5048							# *. Implement coding to recognize the $type of the token in this routine.
5049							# This is the hardest part, and is best done by imitating or modifying
5050							# some of the existing coding. For example, to recognize v-strings, I
5051							# patched 'sub scan_bare_identifier' to recognize v-strings beginning with
5052							# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
5053							#
5054							# *. Update sub operator_expected. This update is critically important but
5055							# the coding is trivial. Look at the comments in that routine for help.
5056							# For v-strings, which should behave like numbers, I just added 'v' to the
5057							# regex used to handle numbers and strings (types 'n' and 'Q').
5058							#
5059							# *. Implement a 'bond strength' rule in sub set_bond_strengths in
5060							# Perl::Tidy::Formatter for breaking lines around this token type. You can
5061							# skip this step and take the default at first, then adjust later to get
5062							# desired results. For adding type 'v', I looked at sub bond_strength and
5063							# saw that number type 'n' was using default strengths, so I didn't do
5064							# anything. I may tune it up someday if I don't like the way line
5065							# breaks with v-strings look.
5066							#
5067							# *. Implement a 'whitespace' rule in sub set_whitespace_flags in
5068							# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
5069							# and saw that type 'n' used spaces on both sides, so I just added 'v'
5070							# to the array @spaces_both_sides.
5071							#
5072							# *. Update HtmlWriter package so that users can colorize the token as
5073							# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
5074							# that package. For v-strings, I initially chose to use a default color
5075							# equal to the default for numbers, but it might be nice to change that
5076							# eventually.
5077							#
5078							# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
5079							#
5080							# *. Run lots and lots of debug tests. Start with special files designed
5081							# to test the new token type. Run with the -D flag to create a .DEBUG
5082							# file which shows the tokenization. When these work ok, test as many old
5083							# scripts as possible. Start with all of the '.t' files in the 'test'
5084							# directory of the distribution file. Compare .tdy output with previous
5085							# version and updated version to see the differences. Then include as
5086							# many more files as possible. My own technique has been to collect a huge
5087							# number of perl scripts (thousands!) into one directory and run perltidy
5088							# *, then run diff between the output of the previous version and the
5089							# current version.
5090							#
5091							# *. For another example, search for the smartmatch operator '~~'
5092							# with your editor to see where updates were made for it.
5093							#
5094							# -----------------------------------------------------------------------
5095
5096	7518			7518	0	15216	my ( $self, $line_of_tokens ) = @_;
5097	7518					14853	my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
5098
5099							# Extract line number for use in error messages
5100	7518					12242	$input_line_number = $line_of_tokens->{_line_number};
5101
5102							# Check for pod documentation
5103	7518	100	66			20227	if ( substr( $untrimmed_input_line, 0, 1 ) eq '='
5104							&& $untrimmed_input_line =~ /^=[A-Za-z_]/ )
5105							{
5106
5107							# Must not be in multi-line quote
5108							# and must not be in an equation
5109	14					37	my $blank_after_Z = 1;
5110	14	50	33			121	if (
5111							!$in_quote
5112							&& ( $self->operator_expected( '=', 'b', $blank_after_Z ) ==
5113							TERM )
5114							)
5115							{
5116	14					38	$self->[_in_pod_] = 1;
5117	14					31	return;
5118							}
5119							}
5120
5121	7504					14287	$input_line = $untrimmed_input_line;
5122
5123	7504					14525	chomp $input_line;
5124
5125							# Reinitialize the multi-line quote flag
5126	7504	100	100			18811	if ( $in_quote && $quote_type eq 'Q' ) {
5127	47					140	$line_of_tokens->{_starting_in_quote} = 1;
5128							}
5129							else {
5130	7457					14089	$line_of_tokens->{_starting_in_quote} = 0;
5131
5132							# Trim start of this line unless we are continuing a quoted line.
5133							# Do not trim end because we might end in a quote (test: deken4.pl)
5134							# Perl::Tidy::Formatter will delete needless trailing blanks
5135	7457	100				34428	if ( !length($input_line) ) {
		100
5136
5137							# line is empty
5138							}
5139							elsif ( $input_line =~ m/\S/g ) {
5140
5141							# There are $spaces blank characters before a nonblank character
5142	6651					13684	my $spaces = pos($input_line) - 1;
5143	6651	100				15567	if ( $spaces > 0 ) {
5144
5145							# Trim the leading spaces
5146	3547					9101	$input_line = substr( $input_line, $spaces );
5147
5148							# Find actual space count if there are leading tabs
5149	3547	100	66			12836	if (
5150							ord( substr( $untrimmed_input_line, 0, 1 ) ) == ORD_TAB
5151							&& $untrimmed_input_line =~ /^(\t+)/ )
5152							{
5153	213					704	$spaces += length($1) * ( $tabsize - 1 );
5154							}
5155
5156							# Calculate a guessed level for nonblank lines to avoid
5157							$line_of_tokens->{_guessed_indentation_level} =
5158	3547					10529	int( $spaces / $rOpts_indent_columns );
5159							}
5160							}
5161							else {
5162
5163							# line has all blank characters
5164	9					42	$input_line = EMPTY_STRING;
5165							}
5166							}
5167
5168	7504	100				16164	if ( !$in_quote ) {
5169
5170							# Optimize handling of a blank line
5171	7320	100				15891	if ( !length($input_line) ) {
5172	806					2276	$line_of_tokens->{_line_type} = 'CODE';
5173	806					1887	$line_of_tokens->{_rtokens} = [];
5174	806					1823	$line_of_tokens->{_rtoken_type} = [];
5175	806					1825	$line_of_tokens->{_rlevels} = [];
5176	806					2036	$line_of_tokens->{_rci_levels} = [];
5177	806					1673	$line_of_tokens->{_rblock_type} = [];
5178	806					1956	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5179	806					2351	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
5180	806					1699	return;
5181							}
5182
5183							# Check comments
5184	6514	100				15987	if ( substr( $input_line, 0, 1 ) eq '#' ) {
5185
5186							# and check for skipped section
5187	788	50	66			4686	if (
			66
			66
5188							(
5189							substr( $input_line, 0, 4 ) eq '#<<V'
5190							\|\| $rOpts_code_skipping_begin
5191							)
5192							&& $rOpts_code_skipping
5193							&& $input_line =~ /$code_skipping_pattern_begin/
5194							)
5195							{
5196	2					10	$self->[_in_skipped_] = $self->[_last_line_number_];
5197	2					11	return;
5198							}
5199
5200							# Optional fast processing of a block comment
5201	786					1858	$line_of_tokens->{_line_type} = 'CODE';
5202	786					2278	$line_of_tokens->{_rtokens} = [$input_line];
5203	786					2643	$line_of_tokens->{_rtoken_type} = ['#'];
5204	786					2073	$line_of_tokens->{_rlevels} = [$level_in_tokenizer];
5205	786					1898	$line_of_tokens->{_rci_levels} = [0];
5206	786					2416	$line_of_tokens->{_rblock_type} = [EMPTY_STRING];
5207	786					2858	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5208	786					2769	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
5209	786					1711	return;
5210							}
5211							}
5212
5213							# update the copy of the line for use in error messages
5214							# This must be exactly what we give the pre_tokenizer
5215	5910					11908	$self->[_line_of_text_] = $input_line;
5216
5217							# re-initialize for the main loop
5218	5910					14679	$routput_token_list = []; # stack of output token indexes
5219	5910					18208	$routput_token_type = []; # token types
5220	5910					16039	$routput_block_type = []; # types of code block
5221	5910					15527	$routput_container_type = []; # paren types, such as if, elsif, ..
5222	5910					14322	$routput_type_sequence = []; # nesting sequential number
5223
5224	5910					9621	$rhere_target_list = [];
5225
5226	5910					9778	$tok = $last_nonblank_token;
5227	5910					9287	$type = $last_nonblank_type;
5228	5910					9264	$prototype = $last_nonblank_prototype;
5229	5910					8867	$last_nonblank_i = -1;
5230	5910					9371	$block_type = $last_nonblank_block_type;
5231	5910					9190	$container_type = $last_nonblank_container_type;
5232	5910					9125	$type_sequence = $last_nonblank_type_sequence;
5233	5910					9120	$indent_flag = 0;
5234	5910					8220	$peeked_ahead = 0;
5235
5236	5910					17826	$self->tokenizer_main_loop();
5237
5238							#-----------------------------------------------
5239							# all done tokenizing this line ...
5240							# now prepare the final list of tokens and types
5241							#-----------------------------------------------
5242	5910					18117	$self->tokenizer_wrapup_line($line_of_tokens);
5243
5244	5910					10345	return;
5245							} ## end sub tokenize_this_line
5246
5247							sub tokenizer_main_loop {
5248
5249	5910			5910	0	11302	my ($self) = @_;
5250
5251							#---------------------------------
5252							# Break one input line into tokens
5253							#---------------------------------
5254
5255							# start by breaking the line into pre-tokens
5256	5910					14415	( $rtokens, $rtoken_map, $rtoken_type ) = pre_tokenize($input_line);
5257
5258	5910					23584	$max_token_index = scalar( @{$rtokens} ) - 1;
	5910					10835
5259	5910					8889	push( @{$rtokens}, SPACE, SPACE, SPACE )
	5910					14956
5260							; # extra whitespace simplifies logic
5261	5910					9020	push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced
	5910					12567
5262	5910					8617	push( @{$rtoken_type}, 'b', 'b', 'b' );
	5910					12279
5263
5264							# initialize for main loop
5265	5910					8527	if (0) { #<<< this is not necessary
5266							foreach my $ii ( 0 .. $max_token_index + 3 ) {
5267							$routput_token_type->[$ii] = EMPTY_STRING;
5268							$routput_block_type->[$ii] = EMPTY_STRING;
5269							$routput_container_type->[$ii] = EMPTY_STRING;
5270							$routput_type_sequence->[$ii] = EMPTY_STRING;
5271							$routput_indent_flag->[$ii] = 0;
5272							}
5273							}
5274
5275	5910					8753	$i = -1;
5276	5910					8882	$i_tok = -1;
5277
5278							#-----------------------
5279							# main tokenization loop
5280							#-----------------------
5281
5282							# we are looking at each pre-token of one line and combining them
5283							# into tokens
5284	5910					13744	while ( ++$i <= $max_token_index ) {
5285
5286							# continue looking for the end of a quote
5287	50826	100				87559	if ($in_quote) {
5288	2768					9010	$self->do_FOLLOW_QUOTE();
5289	2768	100	100			10477	last if ( $in_quote \|\| $i > $max_token_index );
5290							}
5291
5292	50527	100	100			138837	if ( $type ne 'b' && $type ne 'CORE::' ) {
5293
5294							# try to catch some common errors
5295	35304	100	100			75603	if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
5296
5297	1592	100				5085	if ( $last_nonblank_token eq 'eq' ) {
		50
5298	9					97	$self->complain("Should 'eq' be '==' here ?\n");
5299							}
5300							elsif ( $last_nonblank_token eq 'ne' ) {
5301	0					0	$self->complain("Should 'ne' be '!=' here ?\n");
5302							}
5303							else {
5304							# that's all
5305							}
5306							}
5307
5308							# fix c090, only rotate vars if a new token will be stored
5309	35304	100				64839	if ( $i_tok >= 0 ) {
5310
5311	29532					44087	$last_last_nonblank_token = $last_nonblank_token;
5312	29532					39662	$last_last_nonblank_type = $last_nonblank_type;
5313
5314	29532					41643	$last_nonblank_prototype = $prototype;
5315	29532					40541	$last_nonblank_block_type = $block_type;
5316	29532					39995	$last_nonblank_container_type = $container_type;
5317	29532					40983	$last_nonblank_type_sequence = $type_sequence;
5318	29532					38478	$last_nonblank_i = $i_tok;
5319
5320							# Fix part #3 for git82: propagate type 'Z' though L-R pair
5321	29532	100	100			61493	if ( !( $type eq 'R' && $last_nonblank_type eq 'Z' ) ) {
5322	29531					41843	$last_nonblank_token = $tok;
5323	29531					41875	$last_nonblank_type = $type;
5324							}
5325							}
5326
5327							# Patch for c030: Fix things in case a '->' got separated from
5328							# the subsequent identifier by a side comment. We need the
5329							# last_nonblank_token to have a leading -> to avoid triggering
5330							# an operator expected error message at the next '('. See also
5331							# fix for git #63.
5332	35304	100				65372	if ( $last_last_nonblank_token eq '->' ) {
5333	886	100	66			5498	if ( $last_nonblank_type eq 'w'
5334							\|\| $last_nonblank_type eq 'i' )
5335							{
5336	675					1748	$last_nonblank_token = '->' . $last_nonblank_token;
5337	675					1397	$last_nonblank_type = 'i';
5338							}
5339							}
5340							}
5341
5342							# store previous token type
5343	50527	100				87064	if ( $i_tok >= 0 ) {
5344	44755					86851	$routput_token_type->[$i_tok] = $type;
5345	44755					73564	$routput_block_type->[$i_tok] = $block_type;
5346	44755					74748	$routput_container_type->[$i_tok] = $container_type;
5347	44755					70372	$routput_type_sequence->[$i_tok] = $type_sequence;
5348	44755					68197	$routput_indent_flag->[$i_tok] = $indent_flag;
5349							}
5350
5351							# get the next pre-token and type
5352							# $tok and $type will be modified to make the output token
5353	50527					80462	my $pre_tok = $tok = $rtokens->[$i]; # get the next pre-token
5354	50527					76595	my $pre_type = $type = $rtoken_type->[$i]; # and type
5355
5356							# remember the starting index of this token; we will be updating $i
5357	50527					66731	$i_tok = $i;
5358
5359							# re-initialize various flags for the next output token
5360							(
5361
5362	50527					89619	$block_type,
5363							$container_type,
5364							$type_sequence,
5365							$indent_flag,
5366							$prototype,
5367							)
5368							= (
5369
5370							EMPTY_STRING,
5371							EMPTY_STRING,
5372							EMPTY_STRING,
5373							0,
5374							EMPTY_STRING,
5375							);
5376
5377							# this pre-token will start an output token
5378	50527					64408	push( @{$routput_token_list}, $i_tok );
	50527					84339
5379
5380							# The search for the full token ends in one of 5 main END NODES:
5381
5382							#-----------------------
5383							# END NODE 1: whitespace
5384							#-----------------------
5385	50527	100				107292	next if ( $pre_type eq 'b' );
5386
5387							#----------------------
5388							# END NODE 2: a comment
5389							#----------------------
5390	35162	100				62299	last if ( $pre_type eq '#' );
5391
5392							# continue gathering identifier if necessary
5393	34834	100				60567	if ($id_scan_state) {
5394
5395	17	100	66			134	if ( $is_sub{$id_scan_state} \|\| $is_package{$id_scan_state} ) {
5396	10					39	$self->scan_id();
5397							}
5398							else {
5399	7					33	$self->scan_identifier();
5400							}
5401
5402	17	100				60	if ($id_scan_state) {
5403
5404							# Still scanning ...
5405							# Check for side comment between sub and prototype (c061)
5406
5407							# done if nothing left to scan on this line
5408	1	50				1521	last if ( $i > $max_token_index );
5409
5410	1					13	my ( $next_nonblank_token, $i_next ) =
5411							find_next_nonblank_token_on_this_line( $i, $rtokens,
5412							$max_token_index );
5413
5414							# done if it was just some trailing space
5415	1	50				11	last if ( $i_next > $max_token_index );
5416
5417							# something remains on the line ... must be a side comment
5418	1					4	next;
5419							}
5420
5421	16	100	100			113	next if ( ( $i > 0 ) \|\| $type );
5422
5423							# didn't find any token; start over
5424	7					15	$type = $pre_type;
5425	7					16	$tok = $pre_tok;
5426							}
5427
5428							#-----------------------------------------------------------
5429							# Combine pre-tokens into digraphs and trigraphs if possible
5430							#-----------------------------------------------------------
5431
5432							# See if we can make a digraph...
5433							# The following tokens are excluded and handled specially:
5434							# '/=' is excluded because the / might start a pattern.
5435							# 'x=' is excluded since it might be $x=, with $ on previous line
5436							# '*' and = might be typeglobs of punctuation variables
5437							# I have allowed tokens starting with <, such as <=,
5438							# because I don't think these could be valid angle operators.
5439							# test file: storrs4.pl
5440	34824	100	100			107308	if ( $can_start_digraph{$tok}
			100
5441							&& $i < $max_token_index
5442							&& $is_digraph{ $tok . $rtokens->[ $i + 1 ] } )
5443							{
5444
5445	2561					4906	my $combine_ok = 1;
5446	2561					5543	my $test_tok = $tok . $rtokens->[ $i + 1 ];
5447
5448							# check for special cases which cannot be combined
5449
5450							# '//' must be defined_or operator if an operator is expected.
5451							# TODO: Code for other ambiguous digraphs (/=, x=, *, =)
5452							# could be migrated here for clarity
5453
5454							# Patch for RT#102371, misparsing a // in the following snippet:
5455							# state $b //= ccc();
5456							# The solution is to always accept the digraph (or trigraph)
5457							# after type 'Z' (possible file handle). The reason is that
5458							# sub operator_expected gives TERM expected here, which is
5459							# wrong in this case.
5460	2561	100	66			7172	if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
5461
5462							# note that here $tok = '/' and the next tok and type is '/'
5463	16					42	my $blank_after_Z;
5464	16					69	$expecting =
5465							$self->operator_expected( $tok, '/', $blank_after_Z );
5466
5467							# Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
5468	16	100				51	$combine_ok = 0 if ( $expecting == TERM );
5469							}
5470
5471							# Patch for RT #114359: mis-parsing of "print $x ** 0.5;
5472							# Accept the digraphs '**' only after type 'Z'
5473							# Otherwise postpone the decision.
5474	2561	100				5910	if ( $test_tok eq '**' ) {
5475	39	100				180	if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
	37					79
5476							}
5477
5478	2561	50	66			16559	if (
			66
			33
5479
5480							# still ok to combine?
5481							$combine_ok
5482
5483							&& ( $test_tok ne '/=' ) # might be pattern
5484							&& ( $test_tok ne 'x=' ) # might be $x
5485							&& ( $test_tok ne '*=' ) # typeglob?
5486
5487							# Moved above as part of fix for
5488							# RT #114359: Missparsing of "print $x ** 0.5;
5489							# && ( $test_tok ne '**' ) # typeglob?
5490							)
5491							{
5492	2520					4084	$tok = $test_tok;
5493	2520					3764	$i++;
5494
5495							# Now try to assemble trigraphs. Note that all possible
5496							# perl trigraphs can be constructed by appending a character
5497							# to a digraph.
5498	2520					4690	$test_tok = $tok . $rtokens->[ $i + 1 ];
5499
5500	2520	100				6400	if ( $is_trigraph{$test_tok} ) {
5501	77					182	$tok = $test_tok;
5502	77					169	$i++;
5503							}
5504
5505							# The only current tetragraph is the double diamond operator
5506							# and its first three characters are NOT a trigraph, so
5507							# we do can do a special test for it
5508							else {
5509	2443	100				5991	if ( $test_tok eq '<<>' ) {
5510	1					4	$test_tok .= $rtokens->[ $i + 2 ];
5511	1	50				5	if ( $is_tetragraph{$test_tok} ) {
5512	1					2	$tok = $test_tok;
5513	1					3	$i += 2;
5514							}
5515							}
5516							}
5517							}
5518							}
5519
5520	34824					50236	$type = $tok;
5521	34824					57618	$next_tok = $rtokens->[ $i + 1 ];
5522	34824					52907	$next_type = $rtoken_type->[ $i + 1 ];
5523
5524							# expecting an operator here? first try table lookup, then function
5525	34824					65018	$expecting = $op_expected_table{$last_nonblank_type};
5526	34824	100				66481	if ( !defined($expecting) ) {
5527	8397		100			20226	my $blank_after_Z = $last_nonblank_type eq 'Z'
5528							&& ( $i == 0 \|\| $rtoken_type->[ $i - 1 ] eq 'b' );
5529	8397					21868	$expecting =
5530							$self->operator_expected( $tok, $next_type, $blank_after_Z );
5531							}
5532
5533	34824					44690	DEBUG_TOKENIZE && do {
5534							local $LIST_SEPARATOR = ')(';
5535							my @debug_list = (
5536							$last_nonblank_token, $tok,
5537							$next_tok, $brace_depth,
5538							$rbrace_type->[$brace_depth], $paren_depth,
5539							$rparen_type->[$paren_depth],
5540							);
5541							print {*STDOUT} "TOKENIZE:(@debug_list)\n";
5542							};
5543
5544							# We have the next token, $tok.
5545							# Now we have to examine this token and decide what it is
5546							# and define its $type
5547
5548							#------------------------
5549							# END NODE 3: a bare word
5550							#------------------------
5551	34824	100				64751	if ( $pre_type eq 'w' ) {
5552	5838					16367	my $is_last = $self->do_BAREWORD();
5553	5838	100				12259	last if ($is_last);
5554	5830					15005	next;
5555							}
5556
5557							# Turn off attribute list on first non-blank, non-bareword.
5558							# Added '#' to fix c038 (later moved above).
5559	28986		100			55449	$self->[_in_attribute_list_] &&= 0;
5560
5561							#-------------------------------
5562							# END NODE 4: a string of digits
5563							#-------------------------------
5564	28986	100				52760	if ( $pre_type eq 'd' ) {
5565	1934					7226	$self->do_DIGITS();
5566	1934					4448	next;
5567							}
5568
5569							#------------------------------------------
5570							# END NODE 5: everything else (punctuation)
5571							#------------------------------------------
5572	27052					54249	my $code = $tokenization_code->{$tok};
5573	27052	100				51029	if ($code) {
5574	25317					78446	$code->($self);
5575	25317	100				65930	redo if $in_quote;
5576							}
5577							} ## End main tokenizer loop
5578
5579							# Store the final token
5580	5910	100				13386	if ( $i_tok >= 0 ) {
5581	5772					12489	$routput_token_type->[$i_tok] = $type;
5582	5772					10635	$routput_block_type->[$i_tok] = $block_type;
5583	5772					10237	$routput_container_type->[$i_tok] = $container_type;
5584	5772					10134	$routput_type_sequence->[$i_tok] = $type_sequence;
5585	5772					10129	$routput_indent_flag->[$i_tok] = $indent_flag;
5586							}
5587
5588							# Remember last nonblank values
5589	5910	100	100			21752	if ( $type ne 'b' && $type ne '#' ) {
5590
5591	5437					8758	$last_last_nonblank_token = $last_nonblank_token;
5592	5437					7944	$last_last_nonblank_type = $last_nonblank_type;
5593
5594	5437					8489	$last_nonblank_prototype = $prototype;
5595	5437					7821	$last_nonblank_block_type = $block_type;
5596	5437					7624	$last_nonblank_container_type = $container_type;
5597	5437					7611	$last_nonblank_type_sequence = $type_sequence;
5598
5599	5437					7874	$last_nonblank_token = $tok;
5600	5437					7947	$last_nonblank_type = $type;
5601							}
5602
5603							# reset indentation level if necessary at a sub or package
5604							# in an attempt to recover from a nesting error
5605	5910	50				12251	if ( $level_in_tokenizer < 0 ) {
5606	0	0				0	if ( $input_line =~ /^\s*(sub\|package)\s+(\w+)/ ) {
5607	0					0	reset_indentation_level(0);
5608	0					0	$self->brace_warning("resetting level to 0 at $1 $2\n");
5609							}
5610							}
5611
5612	5910					9705	$self->[_in_quote_] = $in_quote;
5613	5910	100				12980	$self->[_quote_target_] =
5614							$in_quote ? matching_end_token($quote_character) : EMPTY_STRING;
5615	5910					10817	$self->[_rhere_target_list_] = $rhere_target_list;
5616
5617	5910					9940	return;
5618							} ## end sub tokenizer_main_loop
5619
5620							sub tokenizer_wrapup_line {
5621	5910			5910	0	12603	my ( $self, $line_of_tokens ) = @_;
5622
5623							#---------------------------------------------------------
5624							# Package a line of tokens for shipping back to the caller
5625							#---------------------------------------------------------
5626
5627							# Arrays to hold token values for this line:
5628	5910					10181	my ( @levels, @block_type, @type_sequence, @token_type, @tokens );
5629
5630	5910					14648	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5631
5632							# Remember starting nesting block string
5633	5910					9803	my $nesting_block_string_0 = $nesting_block_string;
5634
5635							#-----------------
5636							# Loop over tokens
5637							#-----------------
5638							# $i is the index of the pretoken which starts this full token
5639	5910					11508	foreach my $i ( @{$routput_token_list} ) {
	5910					12642
5640
5641	50711					73613	my $type_i = $routput_token_type->[$i];
5642
5643							#----------------------------------------
5644							# Section 1. Handle a non-sequenced token
5645							#----------------------------------------
5646	50711	100				79551	if ( !$routput_type_sequence->[$i] ) {
5647
5648							#-------------------------------
5649							# Section 1.1. types ';' and 't'
5650							#-------------------------------
5651							# - output anonymous 'sub' as keyword (type 'k')
5652							# - output __END__, __DATA__, and format as type 'k' instead
5653							# of ';' to make html colors correct, etc.
5654	41555	100				91878	if ( $is_semicolon_or_t{$type_i} ) {
		50
5655	2676					6227	my $tok_i = $rtokens->[$i];
5656	2676	100				7839	if ( $is_END_DATA_format_sub{$tok_i} ) {
5657	172					509	$type_i = 'k';
5658							}
5659							}
5660
5661							#----------------------------------------------
5662							# Section 1.2. Check for an invalid token type.
5663							#----------------------------------------------
5664							# This can happen by running perltidy on non-scripts although
5665							# it could also be bug introduced by programming change. Perl
5666							# silently accepts a 032 (^Z) and takes it as the end
5667							elsif ( !$is_valid_token_type{$type_i} ) {
5668	0					0	my $val = ord($type_i);
5669	0					0	$self->warning(
5670							"unexpected character decimal $val ($type_i) in script\n"
5671							);
5672	0					0	$self->[_in_error_] = 1;
5673							}
5674							else {
5675							## ok - valid token type other than ; and t
5676							}
5677
5678							#----------------------------------------------------
5679							# Section 1.3. Store values for a non-sequenced token
5680							#----------------------------------------------------
5681	41555					67407	push( @levels, $level_in_tokenizer );
5682	41555					62504	push( @block_type, EMPTY_STRING );
5683	41555					60042	push( @type_sequence, EMPTY_STRING );
5684	41555					77747	push( @token_type, $type_i );
5685
5686							}
5687
5688							#------------------------------------
5689							# Section 2. Handle a sequenced token
5690							# One of { [ ( ? : ) ] }
5691							#------------------------------------
5692							else {
5693
5694							# $level_i is the level we will store. Levels of braces are
5695							# set so that the leading braces have a HIGHER level than their
5696							# CONTENTS, which is convenient for indentation.
5697	9156					13762	my $level_i = $level_in_tokenizer;
5698
5699							# $tok_i is the PRE-token. It only equals the token for symbols
5700	9156					14859	my $tok_i = $rtokens->[$i];
5701
5702							# $routput_indent_flag->[$i] indicates that we need a change
5703							# in level at a nested ternary, as follows
5704							# 1 => at a nested ternary ?
5705							# -1 => at a nested ternary :
5706							# 0 => otherwise
5707
5708							#--------------------------------------------
5709							# Section 2.1 Handle a level-increasing token
5710							#--------------------------------------------
5711	9156	100				25078	if ( $is_opening_or_ternary_type{$type_i} ) {
		50
5712
5713	4578	100				9045	if ( $type_i eq '?' ) {
5714
5715	187	100				997	if ( $routput_indent_flag->[$i] > 0 ) {
5716	8					39	$level_in_tokenizer++;
5717
5718							# break BEFORE '?' in a nested ternary
5719	8					19	$level_i = $level_in_tokenizer;
5720	8					22	$nesting_block_string .= "$nesting_block_flag";
5721
5722							}
5723							}
5724							else {
5725
5726	4391					7246	$nesting_token_string .= $tok_i;
5727
5728	4391	100	100			12454	if ( $type_i eq '{' \|\| $type_i eq 'L' ) {
5729
5730	4084					6356	$level_in_tokenizer++;
5731
5732	4084	100				7932	if ( $routput_block_type->[$i] ) {
5733	974					1993	$nesting_block_flag = 1;
5734	974					1985	$nesting_block_string .= '1';
5735							}
5736							else {
5737	3110					5036	$nesting_block_flag = 0;
5738	3110					5440	$nesting_block_string .= '0';
5739							}
5740							}
5741							}
5742							}
5743
5744							#---------------------------------------------
5745							# Section 2.2. Handle a level-decreasing token
5746							#---------------------------------------------
5747							elsif ( $is_closing_or_ternary_type{$type_i} ) {
5748
5749	4578	100				11554	if ( $type_i ne ':' ) {
5750	4391					8104	my $char = chop $nesting_token_string;
5751	4391	50				11553	if ( $char ne $matching_start_token{$tok_i} ) {
5752	0					0	$nesting_token_string .= $char . $tok_i;
5753							}
5754							}
5755
5756	4578	100	100			15163	if (
			100
			100
5757							$type_i eq '}'
5758							\|\| $type_i eq 'R'
5759
5760							# only the second and higher ? : have levels
5761							\|\| $type_i eq ':' && $routput_indent_flag->[$i] < 0
5762							)
5763							{
5764
5765	4092					6355	$level_i = --$level_in_tokenizer;
5766
5767	4092	50				8211	if ( $level_in_tokenizer < 0 ) {
5768	0	0				0	if ( !$self->[_saw_negative_indentation_] ) {
5769	0					0	$self->[_saw_negative_indentation_] = 1;
5770	0					0	$self->warning(
5771							"Starting negative indentation\n");
5772							}
5773							}
5774
5775							# restore previous level values
5776	4092	50				8478	if ( length($nesting_block_string) > 1 )
5777							{ # true for valid script
5778	4092					6387	chop $nesting_block_string;
5779	4092					7985	$nesting_block_flag =
5780							substr( $nesting_block_string, -1 ) eq '1';
5781							}
5782
5783							}
5784							}
5785
5786							#-----------------------------------------------------
5787							# Section 2.3. Unexpected sequenced token type - error
5788							#-----------------------------------------------------
5789							else {
5790
5791							# The tokenizer should only be assigning sequence numbers
5792							# to types { [ ( ? ) ] } :
5793	0					0	DEVEL_MODE && $self->Fault(<<EOM);
5794							unexpected sequence number on token type $type_i with pre-tok=$tok_i
5795							EOM
5796							}
5797
5798							#------------------------------------------------
5799							# Section 2.4. Store values for a sequenced token
5800							#------------------------------------------------
5801
5802							# The starting nesting block string, which is used in any .LOG
5803							# output, should include the first token of the line
5804	9156	100				18992	if ( !@levels ) {
5805	1578					3060	$nesting_block_string_0 = $nesting_block_string;
5806							}
5807
5808							# Store values for a sequenced token
5809	9156					17555	push( @levels, $level_i );
5810	9156					17491	push( @block_type, $routput_block_type->[$i] );
5811	9156					15516	push( @type_sequence, $routput_type_sequence->[$i] );
5812	9156					19588	push( @token_type, $type_i );
5813
5814							}
5815							} ## End loop to over tokens
5816
5817							#---------------------
5818							# Post-loop operations
5819							#---------------------
5820
5821	5910					14403	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0;
5822
5823							# Form and store the tokens
5824	5910	50				13685	if (@levels) {
5825
5826	5910					8662	my $im = shift @{$routput_token_list};
	5910					10681
5827	5910					10636	my $offset = $rtoken_map->[$im];
5828	5910					8412	foreach my $i ( @{$routput_token_list} ) {
	5910					10585
5829	44801					60206	my $numc = $rtoken_map->[$i] - $offset;
5830	44801					80510	push( @tokens, substr( $input_line, $offset, $numc ) );
5831	44801					55891	$offset += $numc;
5832
5833	44801					61762	if ( DEVEL_MODE && $numc <= 0 ) {
5834
5835							# Should not happen unless @{$rtoken_map} is corrupted
5836							$self->Fault(
5837							"number of characters is '$numc' but should be >0\n");
5838							}
5839							}
5840
5841							# Form and store the final token of this line
5842	5910					11868	my $numc = length($input_line) - $offset;
5843	5910					12757	push( @tokens, substr( $input_line, $offset, $numc ) );
5844
5845	5910					9109	if ( DEVEL_MODE && $numc <= 0 ) {
5846							$self->Fault(
5847							"Number of Characters is '$numc' but should be >0\n");
5848							}
5849							}
5850
5851							# NOTE: This routine returns ci=0. Eventually '_rci_levels' can be
5852							# removed. The ci values are computed later by sub Formatter::set_ci.
5853	5910					19910	my @ci_levels = (0) x scalar(@levels);
5854
5855							# Wrap up this line of tokens for shipping to the Formatter
5856	5910					14143	$line_of_tokens->{_rtoken_type} = \@token_type;
5857	5910					11059	$line_of_tokens->{_rtokens} = \@tokens;
5858	5910					11689	$line_of_tokens->{_rblock_type} = \@block_type;
5859	5910					11826	$line_of_tokens->{_rtype_sequence} = \@type_sequence;
5860	5910					19369	$line_of_tokens->{_rlevels} = \@levels;
5861	5910					12266	$line_of_tokens->{_rci_levels} = \@ci_levels;
5862
5863	5910					15905	return;
5864							} ## end sub tokenizer_wrapup_line
5865
5866							} ## end tokenize_this_line
5867
5868							#######################################################################
5869							# Tokenizer routines which assist in identifying token types
5870							#######################################################################
5871
5872							# Define Global '%op_expected_table'
5873							# = hash table of operator expected values based on last nonblank token
5874
5875							# exceptions to perl's weird parsing rules after type 'Z'
5876							my %is_weird_parsing_rule_exception;
5877
5878							my %is_paren_dollar;
5879
5880							my %is_n_v;
5881
5882							BEGIN {
5883
5884							# Always expecting TERM following these types:
5885							# note: this is identical to '@value_requestor_type' defined later.
5886							# Fix for c250: add new type 'P' for package (expecting VERSION or {}
5887							# after package NAMESPACE, so expecting TERM)
5888							# Fix for c250: add new type 'S' for sub (not expecting operator)
5889	39			39		655	my @q = qw(
5890							; ! + x & ? F J - p / Y : % f U ~ A G j L P S * . \| ^ < = [ m { \ > t
5891							\|\| >= != mm = => .. !~ == && \|= .= pp -= =~ += <= %= ^= x= ~~ * << /=
5892							&= // >> ~. &. \|. ^.
5893							... **= <<= >>= &&= \|\|= //= <=> !~~ &.= \|.= ^.= <<~
5894							);
5895	39					4141	push @q, ',';
5896	39					134	push @q, '('; # for completeness, not currently a token type
5897	39					69	push @q, '->'; # was previously in UNKNOWN
5898	39					1294	@{op_expected_table}{@q} = (TERM) x scalar(@q);
5899
5900							# Always UNKNOWN following these types;
5901							# previously had '->' in this list for c030
5902	39					207	@q = qw( w );
5903	39					108	@{op_expected_table}{@q} = (UNKNOWN) x scalar(@q);
5904
5905							# Always expecting OPERATOR ...
5906							# 'n' and 'v' are currently excluded because they might be VERSION numbers
5907							# 'i' is currently excluded because it might be a package
5908							# 'q' is currently excluded because it might be a prototype
5909							# Fix for c030: removed '->' from this list:
5910							# Fix for c250: added 'i' because new type 'P' was added
5911	39					116	@q = qw( -- C h R ++ ] Q <> i ); ## n v q );
5912	39					77	push @q, ')';
5913	39					263	@{op_expected_table}{@q} = (OPERATOR) x scalar(@q);
5914
5915							# Fix for git #62: added '*' and '%'
5916	39					110	@q = qw( < ? * % );
5917	39					123	@{is_weird_parsing_rule_exception}{@q} = (1) x scalar(@q);
5918
5919	39					88	@q = qw<) $>;
5920	39					99	@{is_paren_dollar}{@q} = (1) x scalar(@q);
5921
5922	39					85	@q = qw( n v );
5923	39					1423	@{is_n_v}{@q} = (1) x scalar(@q);
5924
5925							} ## end BEGIN
5926
5927	39			39		318	use constant DEBUG_OPERATOR_EXPECTED => 0;
	39					85
	39					86888
5928
5929							sub operator_expected {
5930
5931							# Returns a parameter indicating what types of tokens can occur next
5932
5933							# Call format:
5934							# $op_expected =
5935							# $self->operator_expected( $tok, $next_type, $blank_after_Z );
5936							# where
5937							# $tok is the current token
5938							# $next_type is the type of the next token (blank or not)
5939							# $blank_after_Z = flag for guessing after a type 'Z':
5940							# true if $tok follows type 'Z' with intermediate blank
5941							# false if $tok follows type 'Z' with no intermediate blank
5942							# ignored if $tok does not follow type 'Z'
5943
5944							# Many perl symbols have two or more meanings. For example, '<<'
5945							# can be a shift operator or a here-doc operator. The
5946							# interpretation of these symbols depends on the current state of
5947							# the tokenizer, which may either be expecting a term or an
5948							# operator. For this example, a << would be a shift if an OPERATOR
5949							# is expected, and a here-doc if a TERM is expected. This routine
5950							# is called to make this decision for any current token. It returns
5951							# one of three possible values:
5952							#
5953							# OPERATOR - operator expected (or at least, not a term)
5954							# UNKNOWN - can't tell
5955							# TERM - a term is expected (or at least, not an operator)
5956							#
5957							# The decision is based on what has been seen so far. This
5958							# information is stored in the "$last_nonblank_type" and
5959							# "$last_nonblank_token" variables. For example, if the
5960							# $last_nonblank_type is '=~', then we are expecting a TERM, whereas
5961							# if $last_nonblank_type is 'n' (numeric), we are expecting an
5962							# OPERATOR.
5963							#
5964							# If a UNKNOWN is returned, the calling routine must guess. A major
5965							# goal of this tokenizer is to minimize the possibility of returning
5966							# UNKNOWN, because a wrong guess can spoil the formatting of a
5967							# script.
5968							#
5969							# Adding NEW_TOKENS: it is critically important that this routine be
5970							# updated to allow it to determine if an operator or term is to be
5971							# expected after the new token. Doing this simply involves adding
5972							# the new token character to one of the regexes in this routine or
5973							# to one of the hash lists
5974							# that it uses, which are initialized in the BEGIN section.
5975							# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
5976							# $statement_type
5977
5978							# When possible, token types should be selected such that we can determine
5979							# the 'operator_expected' value by a simple hash lookup. If there are
5980							# exceptions, that is an indication that a new type is needed.
5981
5982	8427			8427	0	19669	my ( $self, $tok, $next_type, $blank_after_Z ) = @_;
5983
5984							#--------------------------------------------
5985							# Section 1: Table lookup will get most cases
5986							#--------------------------------------------
5987
5988							# Many types are can be obtained by a table lookup. This typically handles
5989							# more than half of the calls. For speed, the caller may try table lookup
5990							# first before calling this sub.
5991	8427					13362	my $op_expected = $op_expected_table{$last_nonblank_type};
5992	8427	100				17089	if ( defined($op_expected) ) {
5993							DEBUG_OPERATOR_EXPECTED
5994	24					58	&& print {*STDOUT}
5995							"OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
5996	24					85	return $op_expected;
5997							}
5998
5999							#---------------------------------------------
6000							# Section 2: Handle special cases if necessary
6001							#---------------------------------------------
6002
6003							# Types 'k', '}' and 'Z' depend on context
6004							# Types 'n', 'v', 'q' also depend on context.
6005
6006							# identifier...
6007							# Fix for c250: removed coding for type 'i' because 'i' and new type 'P'
6008							# are now done by hash table lookup
6009
6010							# keyword...
6011	8403	100				24838	if ( $last_nonblank_type eq 'k' ) {
		100
		100
		100
		100
6012
6013							# keywords expecting OPERATOR:
6014	2647	100				12295	if ( $expecting_operator_token{$last_nonblank_token} ) {
		100
6015	7					19	$op_expected = OPERATOR;
6016							}
6017
6018							# keywords expecting TERM:
6019							elsif ( $expecting_term_token{$last_nonblank_token} ) {
6020
6021							# Exceptions from TERM:
6022
6023							# // may follow perl functions which may be unary operators
6024							# see test file dor.t (defined or);
6025	2541	100	100			10557	if (
		50	100
			66
6026							$tok eq '/'
6027							&& $next_type eq '/'
6028							&& $is_keyword_rejecting_slash_as_pattern_delimiter{
6029							$last_nonblank_token}
6030							)
6031							{
6032	1					4	$op_expected = OPERATOR;
6033							}
6034
6035							# Patch to allow a ? following 'split' to be a deprecated pattern
6036							# delimiter. This patch is coordinated with the omission of split
6037							# from the list
6038							# %is_keyword_rejecting_question_as_pattern_delimiter. This patch
6039							# will force perltidy to guess.
6040							elsif ($tok eq '?'
6041							&& $last_nonblank_token eq 'split' )
6042							{
6043	0					0	$op_expected = UNKNOWN;
6044							}
6045							else {
6046	2540					4426	$op_expected = TERM;
6047							}
6048							}
6049							else {
6050	99					292	$op_expected = TERM;
6051							}
6052							} ## end type 'k'
6053
6054							# closing container token...
6055
6056							# Note that the actual token for type '}' may also be a ')'.
6057
6058							# Also note that $last_nonblank_token is not the token corresponding to
6059							# $last_nonblank_type when the type is a closing container. In that
6060							# case it is the token before the corresponding opening container token.
6061							# So for example, for this snippet
6062							# $a = do { BLOCK } / 2;
6063							# the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'.
6064
6065							elsif ( $last_nonblank_type eq '}' ) {
6066	3593					6443	$op_expected = UNKNOWN;
6067
6068							# handle something after 'do' and 'eval'
6069	3593	100	66			19118	if ( $is_block_operator{$last_nonblank_token} ) {
		100
		100
6070
6071							# something like $a = do { BLOCK } / 2;
6072	82					222	$op_expected = OPERATOR; # block mode following }
6073							}
6074
6075							# $last_nonblank_token =~ /^(\)\|\$\|\-\>)/
6076							elsif ( $is_paren_dollar{ substr( $last_nonblank_token, 0, 1 ) }
6077							\|\| substr( $last_nonblank_token, 0, 2 ) eq '->' )
6078							{
6079	2123					3604	$op_expected = OPERATOR;
6080	2123	50				5479	if ( $last_nonblank_token eq '$' ) { $op_expected = UNKNOWN }
	0					0
6081							}
6082
6083							# Check for smartmatch operator before preceding brace or square
6084							# bracket. For example, at the ? after the ] in the following
6085							# expressions we are expecting an operator:
6086							#
6087							# qr/3/ ~~ ['1234'] ? 1 : 0;
6088							# map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
6089							elsif ( $last_nonblank_token eq '~~' ) {
6090	20					39	$op_expected = OPERATOR;
6091							}
6092
6093							# A right brace here indicates the end of a simple block. All
6094							# non-structural right braces have type 'R' all braces associated with
6095							# block operator keywords have been given those keywords as
6096							# "last_nonblank_token" and caught above. (This statement is order
6097							# dependent, and must come after checking $last_nonblank_token).
6098							else {
6099
6100							# patch for dor.t (defined or).
6101	1368	50	33			6174	if ( $tok eq '/'
		100	33
6102							&& $next_type eq '/'
6103							&& $last_nonblank_token eq ']' )
6104							{
6105	0					0	$op_expected = OPERATOR;
6106							}
6107
6108							# Patch for RT #116344: misparse a ternary operator after an
6109							# anonymous hash, like this:
6110							# return ref {} ? 1 : 0;
6111							# The right brace should really be marked type 'R' in this case,
6112							# and it is safest to return an UNKNOWN here. Expecting a TERM will
6113							# cause the '?' to always be interpreted as a pattern delimiter
6114							# rather than introducing a ternary operator.
6115							elsif ( $tok eq '?' ) {
6116	1					3	$op_expected = UNKNOWN;
6117							}
6118							else {
6119	1367					2408	$op_expected = TERM;
6120							}
6121							}
6122							} ## end type '}'
6123
6124							# number or v-string...
6125							# An exception is for VERSION numbers a 'use' statement. It has the format
6126							# use Module VERSION LIST
6127							# We could avoid this exception by writing a special sub to parse 'use'
6128							# statements and perhaps mark these numbers with a new type V (for VERSION)
6129							##elsif ( $last_nonblank_type =~ /^[nv]$/ ) {
6130							elsif ( $is_n_v{$last_nonblank_type} ) {
6131	1985					3123	$op_expected = OPERATOR;
6132	1985	100				4503	if ( $statement_type eq 'use' ) {
6133	11					40	$op_expected = UNKNOWN;
6134							}
6135							}
6136
6137							# quote...
6138							# TODO: labeled prototype words would better be given type 'A' or maybe
6139							# 'J'; not 'q'; or maybe mark as type 'Y'?
6140							elsif ( $last_nonblank_type eq 'q' ) {
6141	137	50				609	if ( $last_nonblank_token eq 'prototype' ) {
		100
6142	0					0	$op_expected = TERM;
6143							}
6144
6145							# update for --use-feature=class (rt145706):
6146							# Look for class VERSION after possible attribute, as in
6147							# class Example::Subclass : isa(Example::Base) 1.345 { ... }
6148							elsif ( $statement_type =~ /^package\b/ ) {
6149	3					7	$op_expected = TERM;
6150							}
6151
6152							# everything else
6153							else {
6154	134					291	$op_expected = OPERATOR;
6155							}
6156							}
6157
6158							# file handle or similar
6159							elsif ( $last_nonblank_type eq 'Z' ) {
6160
6161							# angle.t
6162	40	100	33			567	if ( $last_nonblank_token =~ /^\w/ ) {
		50	66
		100
		50
		100
6163	2					5	$op_expected = UNKNOWN;
6164							}
6165
6166							# Exception to weird parsing rules for 'x(' ... see case b1205:
6167							# In something like 'print $vv x(...' the x is an operator;
6168							# Likewise in 'print $vv x$ww' the x is an operator (case b1207)
6169							# otherwise x follows the weird parsing rules.
6170							elsif ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) {
6171	0					0	$op_expected = OPERATOR;
6172							}
6173
6174							# The 'weird parsing rules' of next section do not work for '<' and '?'
6175							# It is best to mark them as unknown. Test case:
6176							# print $fh <DATA>;
6177							elsif ( $is_weird_parsing_rule_exception{$tok} ) {
6178	4					15	$op_expected = UNKNOWN;
6179							}
6180
6181							# For possible file handle like "$a", Perl uses weird parsing rules.
6182							# For example:
6183							# print $a/2,"/hi"; - division
6184							# print $a / 2,"/hi"; - division
6185							# print $a/ 2,"/hi"; - division
6186							# print $a /2,"/hi"; - pattern (and error)!
6187							# Some examples where this logic works okay, for '&','*','+':
6188							# print $fh &xsi_protos(@mods);
6189							# my $x = new $CompressClass *FH;
6190							# print $OUT +( $count % 15 ? ", " : "\n\t" );
6191							elsif ($blank_after_Z
6192							&& $next_type ne 'b' )
6193							{
6194	0					0	$op_expected = TERM;
6195							}
6196
6197							# Note that '?' and '<' have been moved above
6198							# ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]\|\>\>)$/ ) {
6199							elsif ( $tok =~ /^([x\/\+\-\*\%\&\.]\|\>\>)$/ ) {
6200
6201							# Do not complain in 'use' statements, which have special syntax.
6202							# For example, from RT#130344:
6203							# use lib $FindBin::Bin . '/lib';
6204	9	50				38	if ( $statement_type ne 'use' ) {
6205	9					37	$self->complain(
6206							"operator in possible indirect object location not recommended\n"
6207							);
6208							}
6209	9					24	$op_expected = OPERATOR;
6210							}
6211
6212							# all other cases
6213							else {
6214	25					72	$op_expected = UNKNOWN;
6215							}
6216							}
6217
6218							# anything else...
6219							else {
6220	1					5	$op_expected = UNKNOWN;
6221							}
6222
6223							DEBUG_OPERATOR_EXPECTED
6224	8403					11569	&& print {*STDOUT}
6225							"OPERATOR_EXPECTED: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
6226
6227	8403					15774	return $op_expected;
6228
6229							} ## end sub operator_expected
6230
6231							sub new_statement_ok {
6232
6233							# Returns:
6234							# true if a new statement can begin here
6235							# false otherwise
6236
6237							# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
6238							# $brace_depth, $rbrace_type
6239
6240							# Uses:
6241							# - See if a 'class' statement can occur here
6242							# - See if a keyword begins at a new statement; i.e. is an 'if' a
6243							# block if or a trailing if? Also see if 'format' starts a statement.
6244							# - Decide if a ':' is part of a statement label (not a ternary)
6245
6246							# Curly braces are tricky because some small blocks do not get marked as
6247							# blocks..
6248
6249							# if it follows an opening curly brace..
6250	436	100	66	436	0	2366	if ( $last_nonblank_token eq '{' ) {
		100
6251
6252							# The safe thing is to return true in all cases because:
6253							# - a ternary ':' cannot occur here
6254							# - an 'if' here, for example, cannot be a trailing if
6255							# See test case c231 for an example.
6256							# This works but could be improved, if necessary, by returning
6257							# 'false' at obvious non-blocks.
6258	59					245	return 1;
6259							}
6260
6261							# if it follows a closing code block curly brace..
6262							elsif ($last_nonblank_token eq '}'
6263							&& $last_nonblank_type eq $last_nonblank_token )
6264							{
6265
6266							# a new statement can follow certain closing block braces ...
6267							# FIXME: The following has worked well but returns true in some cases
6268							# where it really should not. We could fix this by either excluding
6269							# certain blocks, like sort/map/grep/eval/asub or by just including
6270							# certain blocks.
6271	99					429	return $rbrace_type->[$brace_depth];
6272							}
6273
6274							# otherwise, it is a label if and only if it follows a ';' (real or fake)
6275							# or another label
6276							else {
6277	278		100			1892	return ( $last_nonblank_type eq ';' \|\| $last_nonblank_type eq 'J' );
6278							}
6279							} ## end sub new_statement_ok
6280
6281							sub code_block_type {
6282
6283							# Decide if this is a block of code, and its type.
6284							# Must be called only when $type = $token = '{'
6285							# The problem is to distinguish between the start of a block of code
6286							# and the start of an anonymous hash reference
6287							# Returns "" if not code block, otherwise returns 'last_nonblank_token'
6288							# to indicate the type of code block. (For example, 'last_nonblank_token'
6289							# might be 'if' for an if block, 'else' for an else block, etc).
6290							# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
6291							# $last_nonblank_block_type, $brace_depth, $rbrace_type
6292
6293							# handle case of multiple '{'s
6294
6295							# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
6296
6297	1303			1303	0	3321	my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
6298	1303	100	66			16515	if ( $last_nonblank_token eq '{'
		100	66
		100	66
		100	100
		100	66
		100	66
		50
		50
		100
		100
		100
6299							&& $last_nonblank_type eq $last_nonblank_token )
6300							{
6301
6302							# opening brace where a statement may appear is probably
6303							# a code block but might be and anonymous hash reference
6304	90	50				318	if ( $rbrace_type->[$brace_depth] ) {
6305	90					355	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6306							$max_token_index );
6307							}
6308
6309							# cannot start a code block within an anonymous hash
6310							else {
6311	0					0	return EMPTY_STRING;
6312							}
6313							}
6314
6315							elsif ( $last_nonblank_token eq ';' ) {
6316
6317							# an opening brace where a statement may appear is probably
6318							# a code block but might be and anonymous hash reference
6319	48					274	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6320							$max_token_index );
6321							}
6322
6323							# handle case of '}{'
6324							elsif ($last_nonblank_token eq '}'
6325							&& $last_nonblank_type eq $last_nonblank_token )
6326							{
6327
6328							# a } { situation ...
6329							# could be hash reference after code block..(blktype1.t)
6330	9	50				29	if ($last_nonblank_block_type) {
6331	9					37	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6332							$max_token_index );
6333							}
6334
6335							# must be a block if it follows a closing hash reference
6336							else {
6337	0					0	return $last_nonblank_token;
6338							}
6339							}
6340
6341							#--------------------------------------------------------------
6342							# NOTE: braces after type characters start code blocks, but for
6343							# simplicity these are not identified as such. See also
6344							# sub is_non_structural_brace.
6345							#--------------------------------------------------------------
6346
6347							## elsif ( $last_nonblank_type eq 't' ) {
6348							## return $last_nonblank_token;
6349							## }
6350
6351							# brace after label:
6352							elsif ( $last_nonblank_type eq 'J' ) {
6353	34					149	return $last_nonblank_token;
6354							}
6355
6356							# otherwise, look at previous token. This must be a code block if
6357							# it follows any of these:
6358							# /^(BEGIN\|END\|CHECK\|INIT\|AUTOLOAD\|DESTROY\|UNITCHECK\|continue\|if\|elsif\|else\|unless\|do\|while\|until\|eval\|for\|foreach\|map\|grep\|sort)$/
6359							elsif ($is_code_block_token{$last_nonblank_token}
6360							\|\| $is_grep_alias{$last_nonblank_token} )
6361							{
6362
6363							# Bug Patch: Note that the opening brace after the 'if' in the following
6364							# snippet is an anonymous hash ref and not a code block!
6365							# print 'hi' if { x => 1, }->{x};
6366							# We can identify this situation because the last nonblank type
6367							# will be a keyword (instead of a closing paren)
6368	481	50	33			2566	if (
			66
6369							$last_nonblank_type eq 'k'
6370							&& ( $last_nonblank_token eq 'if'
6371							\|\| $last_nonblank_token eq 'unless' )
6372							)
6373							{
6374	0					0	return EMPTY_STRING;
6375							}
6376							else {
6377	481					1513	return $last_nonblank_token;
6378							}
6379							}
6380
6381							# or a sub or package BLOCK
6382							# Fixed for c250 to include new package type 'P', and change 'i' to 'S'
6383							elsif (
6384							$last_nonblank_type eq 'P'
6385							\|\| $last_nonblank_type eq 'S'
6386							\|\| ( $last_nonblank_type eq 't'
6387							&& substr( $last_nonblank_token, 0, 3 ) eq 'sub' )
6388							)
6389							{
6390	295					984	return $last_nonblank_token;
6391							}
6392
6393							elsif ( $statement_type =~ /^(sub\|package)\b/ ) {
6394	0					0	return $statement_type;
6395							}
6396
6397							# user-defined subs with block parameters (like grep/map/eval)
6398							elsif ( $last_nonblank_type eq 'G' ) {
6399	0					0	return $last_nonblank_token;
6400							}
6401
6402							# check bareword
6403							elsif ( $last_nonblank_type eq 'w' ) {
6404
6405							# check for syntax 'use MODULE LIST'
6406							# This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031
6407	22	100				100	return EMPTY_STRING if ( $statement_type eq 'use' );
6408
6409	21					113	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6410							$max_token_index );
6411							}
6412
6413							# Patch for bug # RT #94338 reported by Daniel Trizen
6414							# for-loop in a parenthesized block-map triggering an error message:
6415							# map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
6416							# Check for a code block within a parenthesized function call
6417							elsif ( $last_nonblank_token eq '(' ) {
6418	81					203	my $paren_type = $rparen_type->[$paren_depth];
6419
6420							# /^(map\|grep\|sort)$/
6421	81	100	66			425	if ( $paren_type && $is_sort_map_grep{$paren_type} ) {
6422
6423							# We will mark this as a code block but use type 't' instead
6424							# of the name of the containing function. This will allow for
6425							# correct parsing but will usually produce better formatting.
6426							# Braces with block type 't' are not broken open automatically
6427							# in the formatter as are other code block types, and this usually
6428							# works best.
6429	1					4	return 't'; # (Not $paren_type)
6430							}
6431							else {
6432	80					244	return EMPTY_STRING;
6433							}
6434							}
6435
6436							# handle unknown syntax ') {'
6437							# we previously appended a '()' to mark this case
6438							elsif ( $last_nonblank_token =~ /$/ ) {
6439	14					50	return $last_nonblank_token;
6440							}
6441
6442							# anything else must be anonymous hash reference
6443							else {
6444	229					676	return EMPTY_STRING;
6445							}
6446							} ## end sub code_block_type
6447
6448							sub decide_if_code_block {
6449
6450							# USES GLOBAL VARIABLES: $last_nonblank_token
6451	168			168	0	462	my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
6452
6453	168					591	my ( $next_nonblank_token, $i_next ) =
6454							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
6455
6456							# we are at a '{' where a statement may appear.
6457							# We must decide if this brace starts an anonymous hash or a code
6458							# block.
6459							# return "" if anonymous hash, and $last_nonblank_token otherwise
6460
6461							# initialize to be code BLOCK
6462	168					492	my $code_block_type = $last_nonblank_token;
6463
6464							# Check for the common case of an empty anonymous hash reference:
6465							# Maybe something like sub { { } }
6466	168	100				519	if ( $next_nonblank_token eq '}' ) {
6467	5					14	$code_block_type = EMPTY_STRING;
6468							}
6469
6470							else {
6471
6472							# To guess if this '{' is an anonymous hash reference, look ahead
6473							# and test as follows:
6474							#
6475							# it is a hash reference if next come:
6476							# - a string or digit followed by a comma or =>
6477							# - bareword followed by =>
6478							# otherwise it is a code block
6479							#
6480							# Examples of anonymous hash ref:
6481							# {'aa',};
6482							# {1,2}
6483							#
6484							# Examples of code blocks:
6485							# {1; print "hello\n", 1;}
6486							# {$a,1};
6487
6488							# We are only going to look ahead one more (nonblank/comment) line.
6489							# Strange formatting could cause a bad guess, but that's unlikely.
6490	163					410	my @pre_types;
6491							my @pre_tokens;
6492
6493							# Ignore the rest of this line if it is a side comment
6494	163	100				541	if ( $next_nonblank_token ne '#' ) {
6495	139					525	@pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ];
	139					815
6496	139					421	@pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ];
	139					710
6497							}
6498
6499							# Here 20 is arbitrary but generous, and prevents wasting lots of time
6500							# in mangled files
6501	163					664	my ( $rpre_tokens, $rpre_types ) =
6502							$self->peek_ahead_for_n_nonblank_pre_tokens(20);
6503	163	100	66			621	if ( defined($rpre_types) && @{$rpre_types} ) {
	155					592
6504	155					315	push @pre_types, @{$rpre_types};
	155					613
6505	155					296	push @pre_tokens, @{$rpre_tokens};
	155					732
6506							}
6507
6508							# put a sentinel token to simplify stopping the search
6509	163					1326	push @pre_types, '}';
6510	163					322	push @pre_types, '}';
6511
6512	163					312	my $jbeg = 0;
6513	163	100				477	$jbeg = 1 if $pre_types[0] eq 'b';
6514
6515							# first look for one of these
6516							# - bareword
6517							# - bareword with leading -
6518							# - digit
6519							# - quoted string
6520	163					292	my $j = $jbeg;
6521	163	100	33			1288	if ( $pre_types[$j] =~ /^[\'\"]/ ) {
		100
		100
		50
6522
6523							# find the closing quote; don't worry about escapes
6524	1					3	my $quote_mark = $pre_types[$j];
6525	1					17	foreach my $k ( $j + 1 .. @pre_types - 2 ) {
6526	1	50				6	if ( $pre_types[$k] eq $quote_mark ) {
6527	1					2	$j = $k + 1;
6528							##my $next = $pre_types[$j];
6529	1					3	last;
6530							}
6531							}
6532							}
6533							elsif ( $pre_types[$j] eq 'd' ) {
6534	8					14	$j++;
6535							}
6536							elsif ( $pre_types[$j] eq 'w' ) {
6537	71					177	$j++;
6538							}
6539							elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
6540	0					0	$j++;
6541							}
6542							else {
6543							# none of the above
6544							}
6545	163	100				526	if ( $j > $jbeg ) {
6546
6547	80	100				332	$j++ if $pre_types[$j] eq 'b';
6548
6549							# Patched for RT #95708
6550	80	100	33			692	if (
			66
			66
6551
6552							# it is a comma which is not a pattern delimiter except for qw
6553							(
6554							$pre_types[$j] eq ','
6555							## !~ /^(s\|m\|y\|tr\|qr\|q\|qq\|qx)$/
6556							&& !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] }
6557							)
6558
6559							# or a =>
6560							\|\| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
6561							)
6562							{
6563	18					43	$code_block_type = EMPTY_STRING;
6564							}
6565							}
6566
6567	163	100				556	if ($code_block_type) {
6568
6569							# Patch for cases b1085 b1128: It is uncertain if this is a block.
6570							# If this brace follows a bareword, then append a space as a signal
6571							# to the formatter that this may not be a block brace. To find the
6572							# corresponding code in Formatter.pm search for 'b1085'.
6573	145	100				1222	$code_block_type .= SPACE if ( $code_block_type =~ /^\w/ );
6574							}
6575							}
6576
6577	168					583	return $code_block_type;
6578							} ## end sub decide_if_code_block
6579
6580							sub report_unexpected {
6581
6582							# report unexpected token type and show where it is
6583							# USES GLOBAL VARIABLES: (none)
6584	0			0	0	0	my ( $self, $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
6585							$rpretoken_type, $input_line )
6586							= @_;
6587
6588	0	0				0	if ( ++$self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) {
6589	0					0	my $msg = "found $found where $expecting expected";
6590	0					0	my $pos = $rpretoken_map->[$i_tok];
6591	0					0	$self->interrupt_logfile();
6592	0					0	my $input_line_number = $self->[_last_line_number_];
6593	0					0	my ( $offset, $numbered_line, $underline ) =
6594							make_numbered_line( $input_line_number, $input_line, $pos );
6595	0					0	$underline = write_on_underline( $underline, $pos - $offset, '^' );
6596
6597	0					0	my $trailer = EMPTY_STRING;
6598	0	0	0			0	if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
6599	0					0	my $pos_prev = $rpretoken_map->[$last_nonblank_i];
6600	0					0	my $num;
6601	0	0				0	if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) {
6602	0					0	$num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev;
6603							}
6604							else {
6605	0					0	$num = $pos - $pos_prev;
6606							}
6607	0	0				0	if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
	0					0
	0					0
6608
6609							$underline =
6610	0					0	write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
6611	0					0	$trailer = " (previous token underlined)";
6612							}
6613	0					0	$underline =~ s/\s+$//;
6614	0					0	$self->warning( $numbered_line . "\n" );
6615	0					0	$self->warning( $underline . "\n" );
6616	0					0	$self->warning( $msg . $trailer . "\n" );
6617	0					0	$self->resume_logfile();
6618							}
6619	0					0	return;
6620							} ## end sub report_unexpected
6621
6622							my %is_sigil_or_paren;
6623							my %is_R_closing_sb;
6624
6625							BEGIN {
6626
6627	39			39		336	my @q = qw< $ & % * @ ) >;
6628	39					410	@{is_sigil_or_paren}{@q} = (1) x scalar(@q);
6629
6630	39					236	@q = qw(R ]);
6631	39					86532	@{is_R_closing_sb}{@q} = (1) x scalar(@q);
6632							} ## end BEGIN
6633
6634							sub is_non_structural_brace {
6635
6636							# Decide if a brace or bracket is structural or non-structural
6637							# by looking at the previous token and type
6638							# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
6639
6640							# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
6641							# Tentatively deactivated because it caused the wrong operator expectation
6642							# for this code:
6643							# $user = @vars[1] / 100;
6644							# Must update sub operator_expected before re-implementing.
6645							# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
6646							# return 0;
6647							# }
6648
6649							#--------------------------------------------------------------
6650							# NOTE: braces after type characters start code blocks, but for
6651							# simplicity these are not identified as such. See also
6652							# sub code_block_type
6653							#--------------------------------------------------------------
6654
6655							##if ($last_nonblank_type eq 't') {return 0}
6656
6657							# otherwise, it is non-structural if it is decorated
6658							# by type information.
6659							# For example, the '{' here is non-structural: ${xxx}
6660							# Removed '::' to fix c074
6661							## $last_nonblank_token =~ /^([\$\@\*\&\%\)]\|->\|::)/
6662							return (
6663							## $last_nonblank_token =~ /^([\$\@\*\&\%\)]\|->)/
6664							$is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) }
6665							\|\| substr( $last_nonblank_token, 0, 2 ) eq '->'
6666
6667							# or if we follow a hash or array closing curly brace or bracket
6668							# For example, the second '{' in this is non-structural: $a{'x'}{'y'}
6669							# because the first '}' would have been given type 'R'
6670							##\|\| $last_nonblank_type =~ /^([R\]])$/
6671	2265		66	2265	0	14950	\|\| $is_R_closing_sb{$last_nonblank_type}
6672							);
6673							} ## end sub is_non_structural_brace
6674
6675							#######################################################################
6676							# Tokenizer routines for tracking container nesting depths
6677							#######################################################################
6678
6679							# The following routines keep track of nesting depths of the nesting
6680							# types, ( [ { and ?. This is necessary for determining the indentation
6681							# level, and also for debugging programs. Not only do they keep track of
6682							# nesting depths of the individual brace types, but they check that each
6683							# of the other brace types is balanced within matching pairs. For
6684							# example, if the program sees this sequence:
6685							#
6686							# { ( ( ) }
6687							#
6688							# then it can determine that there is an extra left paren somewhere
6689							# between the { and the }. And so on with every other possible
6690							# combination of outer and inner brace types. For another
6691							# example:
6692							#
6693							# ( [ ..... ] ] )
6694							#
6695							# which has an extra ] within the parens.
6696							#
6697							# The brace types have indexes 0 .. 3 which are indexes into
6698							# the matrices.
6699							#
6700							# The pair ? : are treated as just another nesting type, with ? acting
6701							# as the opening brace and : acting as the closing brace.
6702							#
6703							# The matrix
6704							#
6705							# $rdepth_array->[$a][$b][ $rcurrent_depth->[$a] ] = $rcurrent_depth->[$b];
6706							#
6707							# saves the nesting depth of brace type $b (where $b is either of the other
6708							# nesting types) when brace type $a enters a new depth. When this depth
6709							# decreases, a check is made that the current depth of brace types $b is
6710							# unchanged, or otherwise there must have been an error. This can
6711							# be very useful for localizing errors, particularly when perl runs to
6712							# the end of a large file (such as this one) and announces that there
6713							# is a problem somewhere.
6714							#
6715							# A numerical sequence number is maintained for every nesting type,
6716							# so that each matching pair can be uniquely identified in a simple
6717							# way.
6718
6719							sub increase_nesting_depth {
6720	4578			4578	0	9285	my ( $self, $aa, $pos ) = @_;
6721
6722							# USES GLOBAL VARIABLES: $rcurrent_depth,
6723							# $rcurrent_sequence_number, $rdepth_array,
6724							# $rstarting_line_of_current_depth, $statement_type
6725	4578					8287	my $cd_aa = ++$rcurrent_depth->[$aa];
6726	4578					6786	$total_depth++;
6727	4578					9092	$rtotal_depth->[$aa][$cd_aa] = $total_depth;
6728	4578					7531	my $input_line_number = $self->[_last_line_number_];
6729	4578					7817	my $input_line = $self->[_line_of_text_];
6730
6731							# Sequence numbers increment by number of items. This keeps
6732							# a unique set of numbers but still allows the relative location
6733							# of any type to be determined.
6734
6735							# make a new unique sequence number
6736	4578					7800	my $seqno = $next_sequence_number++;
6737
6738	4578					8231	$rcurrent_sequence_number->[$aa][$cd_aa] = $seqno;
6739
6740	4578					14795	$rstarting_line_of_current_depth->[$aa][$cd_aa] =
6741							[ $input_line_number, $input_line, $pos ];
6742
6743	4578					14152	for my $bb ( 0 .. @closing_brace_names - 1 ) {
6744	18312	100				33496	next if ( $bb == $aa );
6745	13734					26884	$rdepth_array->[$aa][$bb][$cd_aa] = $rcurrent_depth->[$bb];
6746							}
6747
6748							# set a flag for indenting a nested ternary statement
6749	4578					8568	my $indent = 0;
6750	4578	100				10504	if ( $aa == QUESTION_COLON ) {
6751	187					665	$rnested_ternary_flag->[$cd_aa] = 0;
6752	187	100				713	if ( $cd_aa > 1 ) {
6753	17	100				114	if ( $rnested_ternary_flag->[ $cd_aa - 1 ] == 0 ) {
6754	16					47	my $pdepth = $rtotal_depth->[$aa][ $cd_aa - 1 ];
6755	16	100				57	if ( $pdepth == $total_depth - 1 ) {
6756	8					14	$indent = 1;
6757	8					23	$rnested_ternary_flag->[ $cd_aa - 1 ] = -1;
6758							}
6759							}
6760							}
6761							}
6762
6763							# Fix part #1 for git82: save last token type for propagation of type 'Z'
6764	4578					15682	$rnested_statement_type->[$aa][$cd_aa] =
6765							[ $statement_type, $last_nonblank_type, $last_nonblank_token ];
6766	4578					7775	$statement_type = EMPTY_STRING;
6767	4578					12762	return ( $seqno, $indent );
6768							} ## end sub increase_nesting_depth
6769
6770							sub is_balanced_closing_container {
6771
6772							# Return true if a closing container can go here without error
6773							# Return false if not
6774	47			47	0	125	my ($aa) = @_;
6775
6776							# cannot close if there was no opening
6777	47					97	my $cd_aa = $rcurrent_depth->[$aa];
6778	47	100				189	return if ( $cd_aa <= 0 );
6779
6780							# check that any other brace types $bb contained within would be balanced
6781	8					36	for my $bb ( 0 .. @closing_brace_names - 1 ) {
6782	8	50				20	next if ( $bb == $aa );
6783							return
6784	8	50				43	if ( $rdepth_array->[$aa][$bb][$cd_aa] != $rcurrent_depth->[$bb] );
6785							}
6786
6787							# OK, everything will be balanced
6788	0					0	return 1;
6789							} ## end sub is_balanced_closing_container
6790
6791							sub decrease_nesting_depth {
6792
6793	4578			4578	0	9194	my ( $self, $aa, $pos ) = @_;
6794
6795							# USES GLOBAL VARIABLES: $rcurrent_depth,
6796							# $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth
6797							# $statement_type
6798	4578					7121	my $seqno = 0;
6799	4578					7454	my $input_line_number = $self->[_last_line_number_];
6800	4578					7779	my $input_line = $self->[_line_of_text_];
6801
6802	4578					6830	my $outdent = 0;
6803	4578					6669	$total_depth--;
6804	4578					7889	my $cd_aa = $rcurrent_depth->[$aa];
6805	4578	50				9576	if ( $cd_aa > 0 ) {
6806
6807							# set a flag for un-indenting after seeing a nested ternary statement
6808	4578					8321	$seqno = $rcurrent_sequence_number->[$aa][$cd_aa];
6809	4578	100				9983	if ( $aa == QUESTION_COLON ) {
6810	187					567	$outdent = $rnested_ternary_flag->[$cd_aa];
6811							}
6812
6813							# Fix part #2 for git82: use saved type for propagation of type 'Z'
6814							# through type L-R braces. Perl seems to allow ${bareword}
6815							# as an indirect object, but nothing much more complex than that.
6816							( $statement_type, my $saved_type, my $saved_token ) =
6817	4578					6871	@{ $rnested_statement_type->[$aa][ $rcurrent_depth->[$aa] ] };
	4578					13015
6818	4578	50	100			16056	if ( $aa == BRACE
			66
			66
6819							&& $saved_type eq 'Z'
6820							&& $last_nonblank_type eq 'w'
6821							&& $rbrace_structural_type->[$brace_depth] eq 'L' )
6822							{
6823	1					3	$last_nonblank_type = $saved_type;
6824							}
6825
6826							# check that any brace types $bb contained within are balanced
6827	4578					13217	for my $bb ( 0 .. @closing_brace_names - 1 ) {
6828	18312	100				32593	next if ( $bb == $aa );
6829
6830	13734	50				32116	if ( $rdepth_array->[$aa][$bb][$cd_aa] != $rcurrent_depth->[$bb] ) {
6831	0					0	my $diff =
6832							$rcurrent_depth->[$bb] - $rdepth_array->[$aa][$bb][$cd_aa];
6833
6834							# don't whine too many times
6835	0					0	my $saw_brace_error = $self->get_saw_brace_error();
6836	0	0	0			0	if (
			0
6837							$saw_brace_error <= MAX_NAG_MESSAGES
6838
6839							# if too many closing types have occurred, we probably
6840							# already caught this error
6841							&& ( ( $diff > 0 ) \|\| ( $saw_brace_error <= 0 ) )
6842							)
6843							{
6844	0					0	$self->interrupt_logfile();
6845	0					0	my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa];
6846	0					0	my $sl = $rsl->[0];
6847	0					0	my $rel = [ $input_line_number, $input_line, $pos ];
6848	0					0	my $el = $rel->[0];
6849	0					0	my ($ess);
6850
6851	0	0	0			0	if ( $diff == 1 \|\| $diff == -1 ) {
6852	0					0	$ess = EMPTY_STRING;
6853							}
6854							else {
6855	0					0	$ess = 's';
6856							}
6857	0	0				0	my $bname =
6858							( $diff > 0 )
6859							? $opening_brace_names[$bb]
6860							: $closing_brace_names[$bb];
6861	0					0	$self->write_error_indicator_pair( @{$rsl}, '^' );
	0					0
6862	0					0	my $msg = <<"EOM";
6863							Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
6864							EOM
6865
6866	0	0				0	if ( $diff > 0 ) {
6867	0					0	my $rml =
6868							$rstarting_line_of_current_depth->[$bb]
6869							[ $rcurrent_depth->[$bb] ];
6870	0					0	my $ml = $rml->[0];
6871	0					0	$msg .=
6872							" The most recent un-matched $bname is on line $ml\n";
6873	0					0	$self->write_error_indicator_pair( @{$rml}, '^' );
	0					0
6874							}
6875	0					0	$self->write_error_indicator_pair( @{$rel}, '^' );
	0					0
6876	0					0	$self->warning($msg);
6877	0					0	$self->resume_logfile();
6878							}
6879	0					0	$self->increment_brace_error();
6880							}
6881							}
6882	4578					8825	$rcurrent_depth->[$aa]--;
6883							}
6884							else {
6885
6886	0					0	my $saw_brace_error = $self->get_saw_brace_error();
6887	0	0				0	if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
6888	0					0	my $msg = <<"EOM";
6889							There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
6890							EOM
6891	0					0	$self->indicate_error( $msg, $input_line_number, $input_line, $pos,
6892							'^' );
6893							}
6894	0					0	$self->increment_brace_error();
6895
6896							# keep track of errors in braces alone (ignoring ternary nesting errors)
6897	0	0				0	$self->[_true_brace_error_count_]++
6898							if ( $closing_brace_names[$aa] ne "':'" );
6899							}
6900	4578					12813	return ( $seqno, $outdent );
6901							} ## end sub decrease_nesting_depth
6902
6903							sub check_final_nesting_depths {
6904
6905							# USES GLOBAL VARIABLES: $rcurrent_depth, $rstarting_line_of_current_depth
6906	562			562	0	1401	my $self = shift;
6907
6908	562					2321	for my $aa ( 0 .. @closing_brace_names - 1 ) {
6909
6910	2248					3728	my $cd_aa = $rcurrent_depth->[$aa];
6911	2248	50				5218	if ($cd_aa) {
6912	0					0	my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa];
6913	0					0	my $sl = $rsl->[0];
6914	0					0	my $msg = <<"EOM";
6915							Final nesting depth of $opening_brace_names[$aa]s is $cd_aa
6916							The most recent un-matched $opening_brace_names[$aa] is on line $sl
6917							EOM
6918	0					0	$self->indicate_error( $msg, @{$rsl}, '^' );
	0					0
6919	0					0	$self->increment_brace_error();
6920							}
6921							}
6922	562					1613	return;
6923							} ## end sub check_final_nesting_depths
6924
6925							#######################################################################
6926							# Tokenizer routines for looking ahead in input stream
6927							#######################################################################
6928
6929							sub peek_ahead_for_n_nonblank_pre_tokens {
6930
6931							# returns next n pretokens if they exist
6932							# returns undef's if hits eof without seeing any pretokens
6933							# USES GLOBAL VARIABLES: (none)
6934	170			170	0	424	my ( $self, $max_pretokens ) = @_;
6935	170					287	my $line;
6936	170					291	my $i = 0;
6937	170					361	my ( $rpre_tokens, $rmap, $rpre_types );
6938
6939	170					553	while ( $line = $self->peek_ahead( $i++ ) ) {
6940	182					940	$line =~ s/^\s*//; # trim leading blanks
6941	182	100				606	next if ( length($line) <= 0 ); # skip blank
6942	176	100				601	next if ( $line =~ /^#/ ); # skip comment
6943	162					472	( $rpre_tokens, $rmap, $rpre_types ) =
6944							pre_tokenize( $line, $max_pretokens );
6945	162					463	last;
6946							}
6947	170					629	return ( $rpre_tokens, $rpre_types );
6948							} ## end sub peek_ahead_for_n_nonblank_pre_tokens
6949
6950							# look ahead for next non-blank, non-comment line of code
6951							sub peek_ahead_for_nonblank_token {
6952
6953							# USES GLOBAL VARIABLES: (none)
6954	125			125	0	377	my ( $self, $rtokens, $max_token_index ) = @_;
6955	125					234	my $line;
6956	125					259	my $i = 0;
6957
6958	125					534	while ( $line = $self->peek_ahead( $i++ ) ) {
6959	169					979	$line =~ s/^\s*//; # trim leading blanks
6960	169	100				682	next if ( length($line) <= 0 ); # skip blank
6961	144	100				619	next if ( $line =~ /^#/ ); # skip comment
6962
6963							# Updated from 2 to 3 to get trigraphs, added for case b1175
6964	123					474	my ( $rtok, $rmap, $rtype ) = pre_tokenize( $line, 3 );
6965	123					407	my $j = $max_token_index + 1;
6966
6967	123					278	foreach my $tok ( @{$rtok} ) {
	123					342
6968	355	100				950	last if ( $tok =~ "\n" );
6969	320					801	$rtokens->[ ++$j ] = $tok;
6970							}
6971	123					482	last;
6972							}
6973	125					378	return;
6974							} ## end sub peek_ahead_for_nonblank_token
6975
6976							#######################################################################
6977							# Tokenizer guessing routines for ambiguous situations
6978							#######################################################################
6979
6980							sub guess_if_pattern_or_conditional {
6981
6982							# this routine is called when we have encountered a ? following an
6983							# unknown bareword, and we must decide if it starts a pattern or not
6984							# input parameters:
6985							# $i - token index of the ? starting possible pattern
6986							# output parameters:
6987							# $is_pattern = 0 if probably not pattern, =1 if probably a pattern
6988							# msg = a warning or diagnostic message
6989							# USES GLOBAL VARIABLES: $last_nonblank_token
6990
6991	11			11	0	45	my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
6992	11					22	my $is_pattern = 0;
6993	11					55	my $msg = "guessing that ? after $last_nonblank_token starts a ";
6994
6995	11	50				51	if ( $i >= $max_token_index ) {
6996	0					0	$msg .= "conditional (no end to pattern found on the line)\n";
6997							}
6998							else {
6999	11					33	my $ibeg = $i;
7000	11					29	$i = $ibeg + 1;
7001	11					35	my $next_token = $rtokens->[$i]; # first token after ?
7002
7003							# look for a possible ending ? on this line..
7004	11					28	my $in_quote = 1;
7005	11					25	my $quote_depth = 0;
7006	11					22	my $quote_character = EMPTY_STRING;
7007	11					26	my $quote_pos = 0;
7008	11					21	my $quoted_string;
7009							(
7010
7011	11					60	$i,
7012							$in_quote,
7013							$quote_character,
7014							$quote_pos,
7015							$quote_depth,
7016							$quoted_string,
7017
7018							) = $self->follow_quoted_string(
7019
7020							$ibeg,
7021							$in_quote,
7022							$rtokens,
7023							$quote_character,
7024							$quote_pos,
7025							$quote_depth,
7026							$max_token_index,
7027
7028							);
7029
7030	11	50				97	if ($in_quote) {
7031
7032							# we didn't find an ending ? on this line,
7033							# so we bias towards conditional
7034	11					34	$is_pattern = 0;
7035	11					48	$msg .= "conditional (no ending ? on this line)\n";
7036
7037							# we found an ending ?, so we bias towards a pattern
7038							}
7039							else {
7040
7041							# Watch out for an ending ? in quotes, like this
7042							# my $case_flag = File::Spec->case_tolerant ? '(?i)' : '';
7043	0					0	my $s_quote = 0;
7044	0					0	my $d_quote = 0;
7045	0					0	my $colons = 0;
7046	0					0	foreach my $ii ( $ibeg + 1 .. $i - 1 ) {
7047	0					0	my $tok = $rtokens->[$ii];
7048	0	0				0	if ( $tok eq ":" ) { $colons++ }
	0					0
7049	0	0				0	if ( $tok eq "'" ) { $s_quote++ }
	0					0
7050	0	0				0	if ( $tok eq '"' ) { $d_quote++ }
	0					0
7051							}
7052	0	0	0			0	if ( $s_quote % 2 \|\| $d_quote % 2 \|\| $colons ) {
		0	0
7053	0					0	$is_pattern = 0;
7054	0					0	$msg .= "found ending ? but unbalanced quote chars\n";
7055							}
7056							elsif (
7057							$self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 )
7058							{
7059	0					0	$is_pattern = 1;
7060	0					0	$msg .= "pattern (found ending ? and pattern expected)\n";
7061							}
7062							else {
7063	0					0	$msg .= "pattern (uncertain, but found ending ?)\n";
7064							}
7065							}
7066							}
7067	11					45	return ( $is_pattern, $msg );
7068							} ## end sub guess_if_pattern_or_conditional
7069
7070							my %is_known_constant;
7071							my %is_known_function;
7072
7073							BEGIN {
7074
7075							# Constants like 'pi' in Trig.pm are common
7076	39			39		280	my @q = qw(pi pi2 pi4 pip2 pip4);
7077	39					321	@{is_known_constant}{@q} = (1) x scalar(@q);
7078
7079							# parenless calls of 'ok' are common
7080	39					127	@q = qw( ok );
7081	39					70613	@{is_known_function}{@q} = (1) x scalar(@q);
7082							} ## end BEGIN
7083
7084							sub guess_if_pattern_or_division {
7085
7086							# this routine is called when we have encountered a / following an
7087							# unknown bareword, and we must decide if it starts a pattern or is a
7088							# division
7089							# input parameters:
7090							# $i - token index of the / starting possible pattern
7091							# output parameters:
7092							# $is_pattern = 0 if probably division, =1 if probably a pattern
7093							# msg = a warning or diagnostic message
7094							# USES GLOBAL VARIABLES: $last_nonblank_token
7095	0			0	0	0	my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
7096	0					0	my $is_pattern = 0;
7097	0					0	my $msg = "guessing that / after $last_nonblank_token starts a ";
7098
7099	0	0				0	if ( $i >= $max_token_index ) {
7100	0					0	$msg .= "division (no end to pattern found on the line)\n";
7101							}
7102							else {
7103	0					0	my $ibeg = $i;
7104	0					0	my $divide_possible =
7105							$self->is_possible_numerator( $i, $rtokens, $max_token_index );
7106
7107	0	0				0	if ( $divide_possible < 0 ) {
7108	0					0	$msg = "pattern (division not possible here)\n";
7109	0					0	$is_pattern = 1;
7110	0					0	return ( $is_pattern, $msg );
7111							}
7112
7113	0					0	$i = $ibeg + 1;
7114	0					0	my $next_token = $rtokens->[$i]; # first token after slash
7115
7116							# One of the things we can look at is the spacing around the slash.
7117							# There # are four possible spacings around the first slash:
7118							#
7119							# return pi/two;#/; -/-
7120							# return pi/ two;#/; -/+
7121							# return pi / two;#/; +/+
7122							# return pi /two;#/; +/- <-- possible pattern
7123							#
7124							# Spacing rule: a space before the slash but not after the slash
7125							# usually indicates a pattern. We can use this to break ties.
7126
7127	0		0			0	my $is_pattern_by_spacing =
7128							( $i > 1 && $next_token !~ m/^\s/ && $rtokens->[ $i - 2 ] =~ m/^\s/ );
7129
7130							# look for a possible ending / on this line..
7131	0					0	my $in_quote = 1;
7132	0					0	my $quote_depth = 0;
7133	0					0	my $quote_character = EMPTY_STRING;
7134	0					0	my $quote_pos = 0;
7135	0					0	my $quoted_string;
7136							(
7137	0					0	$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
7138							$quoted_string
7139							)
7140							= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens,
7141							$quote_character, $quote_pos, $quote_depth, $max_token_index );
7142
7143	0	0				0	if ($in_quote) {
7144
7145							# we didn't find an ending / on this line, so we bias towards
7146							# division
7147	0	0				0	if ( $divide_possible >= 0 ) {
7148	0					0	$is_pattern = 0;
7149	0					0	$msg .= "division (no ending / on this line)\n";
7150							}
7151							else {
7152
7153							# assuming a multi-line pattern ... this is risky, but division
7154							# does not seem possible. If this fails, it would either be due
7155							# to a syntax error in the code, or the division_expected logic
7156							# needs to be fixed.
7157	0					0	$msg = "multi-line pattern (division not possible)\n";
7158	0					0	$is_pattern = 1;
7159							}
7160							}
7161
7162							# we found an ending /, so we bias slightly towards a pattern
7163							else {
7164
7165	0					0	my $pattern_expected =
7166							$self->pattern_expected( $i, $rtokens, $max_token_index );
7167
7168	0	0				0	if ( $pattern_expected >= 0 ) {
7169
7170							# pattern looks possible...
7171	0	0				0	if ( $divide_possible >= 0 ) {
7172
7173							# Both pattern and divide can work here...
7174
7175							# Increase weight of divide if a pure number follows
7176	0					0	$divide_possible += $next_token =~ /^\d+$/;
7177
7178							# Check for known constants in the numerator, like 'pi'
7179	0	0				0	if ( $is_known_constant{$last_nonblank_token} ) {
		0
		0
		0
7180	0					0	$msg .=
7181							"division (pattern works too but saw known constant '$last_nonblank_token')\n";
7182	0					0	$is_pattern = 0;
7183							}
7184
7185							# A very common bare word in pattern expressions is 'ok'
7186							elsif ( $is_known_function{$last_nonblank_token} ) {
7187	0					0	$msg .=
7188							"pattern (division works too but saw '$last_nonblank_token')\n";
7189	0					0	$is_pattern = 1;
7190							}
7191
7192							# If one rule is more definite, use it
7193							elsif ( $divide_possible > $pattern_expected ) {
7194	0					0	$msg .=
7195							"division (more likely based on following tokens)\n";
7196	0					0	$is_pattern = 0;
7197							}
7198
7199							# otherwise, use the spacing rule
7200							elsif ($is_pattern_by_spacing) {
7201	0					0	$msg .=
7202							"pattern (guess on spacing, but division possible too)\n";
7203	0					0	$is_pattern = 1;
7204							}
7205							else {
7206	0					0	$msg .=
7207							"division (guess on spacing, but pattern is possible too)\n";
7208	0					0	$is_pattern = 0;
7209							}
7210							}
7211
7212							# divide_possible < 0 means divide can not work here
7213							else {
7214	0					0	$is_pattern = 1;
7215	0					0	$msg .= "pattern (division not possible)\n";
7216							}
7217							}
7218
7219							# pattern does not look possible...
7220							else {
7221
7222	0	0				0	if ( $divide_possible >= 0 ) {
7223	0					0	$is_pattern = 0;
7224	0					0	$msg .= "division (pattern not possible)\n";
7225							}
7226
7227							# Neither pattern nor divide look possible...go by spacing
7228							else {
7229	0	0				0	if ($is_pattern_by_spacing) {
7230	0					0	$msg .= "pattern (guess on spacing)\n";
7231	0					0	$is_pattern = 1;
7232							}
7233							else {
7234	0					0	$msg .= "division (guess on spacing)\n";
7235	0					0	$is_pattern = 0;
7236							}
7237							}
7238							}
7239							}
7240							}
7241	0					0	return ( $is_pattern, $msg );
7242							} ## end sub guess_if_pattern_or_division
7243
7244							# try to resolve here-doc vs. shift by looking ahead for
7245							# non-code or the end token (currently only looks for end token)
7246							# returns 1 if it is probably a here doc, 0 if not
7247							sub guess_if_here_doc {
7248
7249	0			0	0	0	my ( $self, $next_token ) = @_;
7250
7251							# This is how many lines we will search for a target as part of the
7252							# guessing strategy. It is a constant because there is probably
7253							# little reason to change it.
7254							# USES GLOBAL VARIABLES: $current_package $ris_constant,
7255	0					0	my $HERE_DOC_WINDOW = 40;
7256
7257	0					0	my $here_doc_expected = 0;
7258	0					0	my $line;
7259	0					0	my $k = 0;
7260	0					0	my $msg = "checking <<";
7261
7262	0					0	while ( $line = $self->peek_ahead( $k++ ) ) {
7263	0					0	chomp $line;
7264
7265	0	0				0	if ( $line =~ /^$next_token$/ ) {
7266	0					0	$msg .= " -- found target $next_token ahead $k lines\n";
7267	0					0	$here_doc_expected = 1; # got it
7268	0					0	last;
7269							}
7270	0	0				0	last if ( $k >= $HERE_DOC_WINDOW );
7271							}
7272
7273	0	0				0	if ( !$here_doc_expected ) {
7274
7275	0	0				0	if ( !defined($line) ) {
7276	0					0	$here_doc_expected = -1; # hit eof without seeing target
7277	0					0	$msg .= " -- must be shift; target $next_token not in file\n";
7278
7279							}
7280							else { # still unsure..taking a wild guess
7281
7282	0	0				0	if ( !$ris_constant->{$current_package}{$next_token} ) {
7283	0					0	$here_doc_expected = 1;
7284	0					0	$msg .=
7285							" -- guessing it's a here-doc ($next_token not a constant)\n";
7286							}
7287							else {
7288	0					0	$msg .=
7289							" -- guessing it's a shift ($next_token is a constant)\n";
7290							}
7291							}
7292							}
7293	0					0	$self->write_logfile_entry($msg);
7294	0					0	return $here_doc_expected;
7295							} ## end sub guess_if_here_doc
7296
7297							#######################################################################
7298							# Tokenizer Routines for scanning identifiers and related items
7299							#######################################################################
7300
7301							sub scan_bare_identifier_do {
7302
7303							# this routine is called to scan a token starting with an alphanumeric
7304							# variable or package separator, :: or '.
7305							# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
7306							# $last_nonblank_type, $rparen_type, $paren_depth
7307
7308	1674			1674	0	5086	my ( $self, $input_line, $i, $tok, $type, $prototype, $rtoken_map,
7309							$max_token_index )
7310							= @_;
7311	1674					2893	my $i_begin = $i;
7312	1674					2896	my $package = undef;
7313
7314	1674					2557	my $i_beg = $i;
7315
7316							# we have to back up one pretoken at a :: since each : is one pretoken
7317	1674	100				4192	if ( $tok eq '::' ) { $i_beg-- }
	9					25
7318	1674	50				3903	if ( $tok eq '->' ) { $i_beg-- }
	0					0
7319	1674					3099	my $pos_beg = $rtoken_map->[$i_beg];
7320	1674					5097	pos($input_line) = $pos_beg;
7321
7322							# Examples:
7323							# A::B::C
7324							# A::
7325							# ::A
7326							# A'B
7327	1674	50				12350	if ( $input_line =~ m/\G\s((?:\w(?:'\|::)))*(?:(?:->)?(\w+))?/gc ) {
7328
7329	1674					3373	my $pos = pos($input_line);
7330	1674					2798	my $numc = $pos - $pos_beg;
7331	1674					3805	$tok = substr( $input_line, $pos_beg, $numc );
7332
7333							# type 'w' includes anything without leading type info
7334							# ($,%,@,*) including something like abc::def::ghi
7335	1674					2787	$type = 'w';
7336
7337	1674					2912	my $sub_name = EMPTY_STRING;
7338	1674	100				4762	if ( defined($2) ) { $sub_name = $2; }
	1669					3167
7339	1674	100				4242	if ( defined($1) ) {
7340	274					669	$package = $1;
7341
7342							# patch: don't allow isolated package name which just ends
7343							# in the old style package separator (single quote). Example:
7344							# use CGI':all';
7345	274	50	66			1162	if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
7346	0					0	$pos--;
7347							}
7348
7349	274					763	$package =~ s/\'/::/g;
7350	274	100				889	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	9					39
7351	274					1202	$package =~ s/::$//;
7352							}
7353							else {
7354	1400					2698	$package = $current_package;
7355
7356							# patched for c043, part 1: keyword does not follow '->'
7357	1400	50	66			5737	if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) {
7358	0					0	$type = 'k';
7359							}
7360							}
7361
7362							# if it is a bareword.. patched for c043, part 2: not following '->'
7363	1674	100	66			7737	if ( $type eq 'w' && $last_nonblank_type ne '->' ) {
7364
7365							# check for v-string with leading 'v' type character
7366							# (This seems to have precedence over filehandle, type 'Y')
7367	1004	100	66			14658	if ( $tok =~ /^v\d[_\d]*$/ ) {
		100	66
		100	66
		50
		50
		100
		100
7368
7369							# we only have the first part - something like 'v101' -
7370							# look for more
7371	2	50				14	if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
7372	2					7	$pos = pos($input_line);
7373	2					4	$numc = $pos - $pos_beg;
7374	2					9	$tok = substr( $input_line, $pos_beg, $numc );
7375							}
7376	2					4	$type = 'v';
7377
7378							# warn if this version can't handle v-strings
7379	2					11	$self->report_v_string($tok);
7380							}
7381
7382							elsif ( $ris_constant->{$package}{$sub_name} ) {
7383	12					26	$type = 'C';
7384							}
7385
7386							# bareword after sort has implied empty prototype; for example:
7387							# @sorted = sort numerically ( 53, 29, 11, 32, 7 );
7388							# This has priority over whatever the user has specified.
7389							elsif ($last_nonblank_token eq 'sort'
7390							&& $last_nonblank_type eq 'k' )
7391							{
7392	1					2	$type = 'Z';
7393							}
7394
7395							# Note: strangely, perl does not seem to really let you create
7396							# functions which act like eval and do, in the sense that eval
7397							# and do may have operators following the final }, but any operators
7398							# that you create with prototype (&) apparently do not allow
7399							# trailing operators, only terms. This seems strange.
7400							# If this ever changes, here is the update
7401							# to make perltidy behave accordingly:
7402
7403							# elsif ( $ris_block_function->{$package}{$tok} ) {
7404							# $tok='eval'; # patch to do braces like eval - doesn't work
7405							# $type = 'k';
7406							#}
7407							# TODO: This could become a separate type to allow for different
7408							# future behavior:
7409							elsif ( $ris_block_function->{$package}{$sub_name} ) {
7410	0					0	$type = 'G';
7411							}
7412							elsif ( $ris_block_list_function->{$package}{$sub_name} ) {
7413	0					0	$type = 'G';
7414							}
7415							elsif ( $ris_user_function->{$package}{$sub_name} ) {
7416	6					19	$type = 'U';
7417	6					22	$prototype = $ruser_function_prototype->{$package}{$sub_name};
7418							}
7419
7420							# check for indirect object
7421							elsif (
7422
7423							# added 2001-03-27: must not be followed immediately by '('
7424							# see fhandle.t
7425							( $input_line !~ m/\G\(/gc )
7426
7427							# and
7428							&& (
7429
7430							# preceded by keyword like 'print', 'printf' and friends
7431							$is_indirect_object_taker{$last_nonblank_token}
7432
7433							# or preceded by something like 'print(' or 'printf('
7434							\|\| (
7435							( $last_nonblank_token eq '(' )
7436							&& $is_indirect_object_taker{
7437							$rparen_type->[$paren_depth]
7438							}
7439
7440							)
7441							)
7442							)
7443							{
7444
7445							# may not be indirect object unless followed by a space;
7446							# updated 2021-01-16 to consider newline to be a space.
7447							# updated for case b990 to look for either ';' or space
7448	4	50	33			99	if ( pos($input_line) == length($input_line)
7449							\|\| $input_line =~ m/\G[;\s]/gc )
7450							{
7451	4					13	$type = 'Y';
7452
7453							# Abandon Hope ...
7454							# Perl's indirect object notation is a very bad
7455							# thing and can cause subtle bugs, especially for
7456							# beginning programmers. And I haven't even been
7457							# able to figure out a sane warning scheme which
7458							# doesn't get in the way of good scripts.
7459
7460							# Complain if a filehandle has any lower case
7461							# letters. This is suggested good practice.
7462							# Use 'sub_name' because something like
7463							# main::MYHANDLE is ok for filehandle
7464	4	100				23	if ( $sub_name =~ /[a-z]/ ) {
7465
7466							# could be bug caused by older perltidy if
7467							# followed by '('
7468	1	50				6	if ( $input_line =~ m/\G\s*\(/gc ) {
7469	1					8	$self->complain(
7470							"Caution: unknown word '$tok' in indirect object slot\n"
7471							);
7472							}
7473							}
7474							}
7475
7476							# bareword not followed by a space -- may not be filehandle
7477							# (may be function call defined in a 'use' statement)
7478							else {
7479	0					0	$type = 'Z';
7480							}
7481							}
7482
7483							# none of the above special types
7484							else {
7485							}
7486							}
7487
7488							# Now we must convert back from character position
7489							# to pre_token index.
7490							# I don't think an error flag can occur here ..but who knows
7491	1674					2854	my $error;
7492	1674					4658	( $i, $error ) =
7493							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
7494	1674	50				4601	if ($error) {
7495	0					0	$self->warning(
7496							"scan_bare_identifier: Possibly invalid tokenization\n");
7497							}
7498							}
7499
7500							# no match but line not blank - could be syntax error
7501							# perl will take '::' alone without complaint
7502							else {
7503	0					0	$type = 'w';
7504
7505							# change this warning to log message if it becomes annoying
7506	0					0	$self->warning("didn't find identifier after leading ::\n");
7507							}
7508	1674					6953	return ( $i, $tok, $type, $prototype );
7509							} ## end sub scan_bare_identifier_do
7510
7511							sub scan_id_do {
7512
7513							# This is the new scanner and will eventually replace scan_identifier.
7514							# Only type 'sub' and 'package' are implemented.
7515							# Token types $ * % @ & -> are not yet implemented.
7516							#
7517							# Scan identifier following a type token.
7518							# The type of call depends on $id_scan_state: $id_scan_state = ''
7519							# for starting call, in which case $tok must be the token defining
7520							# the type.
7521							#
7522							# If the type token is the last nonblank token on the line, a value
7523							# of $id_scan_state = $tok is returned, indicating that further
7524							# calls must be made to get the identifier. If the type token is
7525							# not the last nonblank token on the line, the identifier is
7526							# scanned and handled and a value of '' is returned.
7527
7528	332			332	0	1256	my ( $self, $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
7529							$max_token_index )
7530							= @_;
7531	39			39		474	use constant DEBUG_NSCAN => 0;
	39					104
	39					49441
7532	332					705	my $type = EMPTY_STRING;
7533	332					680	my ( $i_beg, $pos_beg );
7534
7535							#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
7536							#my ($a,$b,$c) = caller;
7537							#print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
7538
7539							# on re-entry, start scanning at first token on the line
7540	332	100				951	if ($id_scan_state) {
7541	10					20	$i_beg = $i;
7542	10					27	$type = EMPTY_STRING;
7543							}
7544
7545							# on initial entry, start scanning just after type token
7546							else {
7547	322					621	$i_beg = $i + 1;
7548	322					572	$id_scan_state = $tok;
7549	322					735	$type = 't';
7550							}
7551
7552							# find $i_beg = index of next nonblank token,
7553							# and handle empty lines
7554	332					602	my $blank_line = 0;
7555	332					767	my $next_nonblank_token = $rtokens->[$i_beg];
7556	332	100				937	if ( $i_beg > $max_token_index ) {
7557	2					5	$blank_line = 1;
7558							}
7559							else {
7560
7561							# only a '#' immediately after a '$' is not a comment
7562	330	50				1084	if ( $next_nonblank_token eq '#' ) {
7563	0	0				0	if ( $tok ne '$' ) {
7564	0					0	$blank_line = 1;
7565							}
7566							}
7567
7568	330	100				1597	if ( $next_nonblank_token =~ /^\s/ ) {
7569	310					1288	( $next_nonblank_token, $i_beg ) =
7570							find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
7571							$max_token_index );
7572	310	100				1740	if ( $next_nonblank_token =~ /(^#\|^\s*$)/ ) {
7573	4					12	$blank_line = 1;
7574							}
7575							}
7576							}
7577
7578							# handle non-blank line; identifier, if any, must follow
7579	332	100				971	if ( !$blank_line ) {
7580
7581	326	100				1004	if ( $is_sub{$id_scan_state} ) {
		50
7582	300					3371	( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
7583							{
7584							input_line => $input_line,
7585							i => $i,
7586							i_beg => $i_beg,
7587							tok => $tok,
7588							type => $type,
7589							rtokens => $rtokens,
7590							rtoken_map => $rtoken_map,
7591							id_scan_state => $id_scan_state,
7592							max_token_index => $max_token_index,
7593							}
7594							);
7595							}
7596
7597							elsif ( $is_package{$id_scan_state} ) {
7598	26					97	( $i, $tok, $type ) =
7599							$self->do_scan_package( $input_line, $i, $i_beg, $tok, $type,
7600							$rtokens, $rtoken_map, $max_token_index );
7601	26					68	$id_scan_state = EMPTY_STRING;
7602							}
7603
7604							else {
7605	0					0	$self->warning("invalid token in scan_id: $tok\n");
7606	0					0	$id_scan_state = EMPTY_STRING;
7607							}
7608							}
7609
7610	332	50	33			1899	if ( $id_scan_state && ( !defined($type) \|\| !$type ) ) {
			66
7611
7612							# shouldn't happen:
7613	0					0	if (DEVEL_MODE) {
7614							$self->Fault(<<EOM);
7615							Program bug in scan_id: undefined type but scan_state=$id_scan_state
7616							EOM
7617							}
7618							$self->warning(
7619	0					0	"Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n"
7620							);
7621	0					0	$self->report_definite_bug();
7622							}
7623
7624	332					557	DEBUG_NSCAN && do {
7625							print {*STDOUT}
7626							"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
7627							};
7628	332					1444	return ( $i, $tok, $type, $id_scan_state );
7629							} ## end sub scan_id_do
7630
7631							sub check_prototype {
7632	138			138	0	482	my ( $proto, $package, $subname ) = @_;
7633	138	50				446	return if ( !defined($package) );
7634	138	50				450	return if ( !defined($subname) );
7635	138	100				396	if ( defined($proto) ) {
7636	34					174	$proto =~ s/^\s\(\s//;
7637	34					130	$proto =~ s/\s*\)$//;
7638	34	100				103	if ($proto) {
7639	5					26	$ris_user_function->{$package}{$subname} = 1;
7640	5					26	$ruser_function_prototype->{$package}{$subname} = "($proto)";
7641
7642							# prototypes containing '&' must be treated specially..
7643	5	100				29	if ( $proto =~ /\&/ ) {
7644
7645							# right curly braces of prototypes ending in
7646							# '&' may be followed by an operator
7647	1	50				9	if ( $proto =~ /\&$/ ) {
7648	0					0	$ris_block_function->{$package}{$subname} = 1;
7649							}
7650
7651							# right curly braces of prototypes NOT ending in
7652							# '&' may NOT be followed by an operator
7653							else {
7654	1					5	$ris_block_list_function->{$package}{$subname} = 1;
7655							}
7656							}
7657							}
7658							else {
7659	29					89	$ris_constant->{$package}{$subname} = 1;
7660							}
7661							}
7662							else {
7663	104					400	$ris_user_function->{$package}{$subname} = 1;
7664							}
7665	138					358	return;
7666							} ## end sub check_prototype
7667
7668							sub do_scan_package {
7669
7670							# do_scan_package parses a package name
7671							# it is called with $i_beg equal to the index of the first nonblank
7672							# token following a 'package' token.
7673							# USES GLOBAL VARIABLES: $current_package,
7674
7675							# package NAMESPACE
7676							# package NAMESPACE VERSION
7677							# package NAMESPACE BLOCK
7678							# package NAMESPACE VERSION BLOCK
7679							#
7680							# If VERSION is provided, package sets the $VERSION variable in the given
7681							# namespace to a version object with the VERSION provided. VERSION must be
7682							# a "strict" style version number as defined by the version module: a
7683							# positive decimal number (integer or decimal-fraction) without
7684							# exponentiation or else a dotted-decimal v-string with a leading 'v'
7685							# character and at least three components.
7686							# reference http://perldoc.perl.org/functions/package.html
7687
7688							my (
7689	26			26	0	87	$self, $input_line, $i,
7690							$i_beg, $tok, $type,
7691							$rtokens, $rtoken_map, $max_token_index
7692							) = @_;
7693	26					48	my $package = undef;
7694	26					52	my $pos_beg = $rtoken_map->[$i_beg];
7695	26					79	pos($input_line) = $pos_beg;
7696
7697							# handle non-blank line; package name, if any, must follow
7698	26	50				154	if ( $input_line =~ m/\G\s((?:\w(?:'\|::))\w)/gc ) {
7699	26					103	$package = $1;
7700	26	50	33			165	$package = ( defined($1) && $1 ) ? $1 : 'main';
7701	26					77	$package =~ s/\'/::/g;
7702	26	50				83	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	0					0
7703	26					56	$package =~ s/::$//;
7704	26					47	my $pos = pos($input_line);
7705	26					47	my $numc = $pos - $pos_beg;
7706	26					77	$tok = 'package ' . substr( $input_line, $pos_beg, $numc );
7707	26					49	$type = 'P'; # Fix for c250, previously 'i'
7708
7709							# Now we must convert back from character position
7710							# to pre_token index.
7711							# I don't think an error flag can occur here ..but ?
7712	26					43	my $error;
7713	26					111	( $i, $error ) =
7714							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
7715	26	50				79	if ($error) { $self->warning("Possibly invalid package\n") }
	0					0
7716	26					55	$current_package = $package;
7717
7718							# we should now have package NAMESPACE
7719							# now expecting VERSION, BLOCK, or ; to follow ...
7720							# package NAMESPACE VERSION
7721							# package NAMESPACE BLOCK
7722							# package NAMESPACE VERSION BLOCK
7723	26					80	my ( $next_nonblank_token, $i_next ) =
7724							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
7725
7726							# check that something recognizable follows, but do not parse.
7727							# A VERSION number will be parsed later as a number or v-string in the
7728							# normal way. What is important is to set the statement type if
7729							# everything looks okay so that the operator_expected() routine
7730							# knows that the number is in a package statement.
7731							# Examples of valid primitive tokens that might follow are:
7732							# 1235 . ; { } v3 v
7733							# FIX: added a '#' since a side comment may also follow
7734							# Added ':' for class attributes (for --use-feature=class, rt145706)
7735	26	50				143	if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#\:])\|v\d\|\d+$/ ) {
7736	26					75	$statement_type = $tok;
7737							}
7738							else {
7739	0					0	$self->warning(
7740							"Unexpected '$next_nonblank_token' after package name '$tok'\n"
7741							);
7742							}
7743							}
7744
7745							# no match but line not blank --
7746							# could be a label with name package, like package: , for example.
7747							else {
7748	0					0	$type = 'k';
7749							}
7750
7751	26					98	return ( $i, $tok, $type );
7752							} ## end sub do_scan_package
7753
7754							{ ## begin closure for sub scan_complex_identifier
7755
7756	39			39		405	use constant DEBUG_SCAN_ID => 0;
	39					130
	39					5324
7757
7758							# Constant hash:
7759							my %is_special_variable_char;
7760
7761							BEGIN {
7762
7763							# These are the only characters which can (currently) form special
7764							# variables, like $^W: (issue c066).
7765	39			39		297	my @q =
7766							qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
7767	39					146061	@{is_special_variable_char}{@q} = (1) x scalar(@q);
7768							} ## end BEGIN
7769
7770							# These are the possible states for this scanner:
7771							my $scan_state_SIGIL = '$';
7772							my $scan_state_ALPHA = 'A';
7773							my $scan_state_COLON = ':';
7774							my $scan_state_LPAREN = '(';
7775							my $scan_state_RPAREN = ')';
7776							my $scan_state_AMPERSAND = '&';
7777							my $scan_state_SPLIT = '^';
7778
7779							# Only these non-blank states may be returned to caller:
7780							my %is_returnable_scan_state = (
7781							$scan_state_SIGIL => 1,
7782							$scan_state_AMPERSAND => 1,
7783							);
7784
7785							# USES GLOBAL VARIABLES:
7786							# $context, $last_nonblank_token, $last_nonblank_type
7787
7788							#-----------
7789							# call args:
7790							#-----------
7791							my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
7792							$expecting, $container_type );
7793
7794							#-------------------------------------------
7795							# my variables, re-initialized on each call:
7796							#-------------------------------------------
7797							my $i_begin; # starting index $i
7798							my $type; # returned identifier type
7799							my $tok_begin; # starting token
7800							my $tok; # returned token
7801							my $id_scan_state_begin; # starting scan state
7802							my $identifier_begin; # starting identifier
7803							my $i_save; # a last good index, in case of error
7804							my $message; # hold error message for log file
7805							my $tok_is_blank;
7806							my $last_tok_is_blank;
7807							my $in_prototype_or_signature;
7808							my $saw_alpha;
7809							my $saw_type;
7810							my $allow_tick;
7811
7812							sub initialize_my_scan_id_vars {
7813
7814							# Initialize all 'my' vars on entry
7815	486			486	0	844	$i_begin = $i;
7816	486					882	$type = EMPTY_STRING;
7817	486					970	$tok_begin = $rtokens->[$i_begin];
7818	486					917	$tok = $tok_begin;
7819	486	50				1374	if ( $tok_begin eq ':' ) { $tok_begin = '::' }
	0					0
7820	486					833	$id_scan_state_begin = $id_scan_state;
7821	486					1839	$identifier_begin = $identifier;
7822	486					842	$i_save = undef;
7823
7824	486					1146	$message = EMPTY_STRING;
7825	486					839	$tok_is_blank = undef; # a flag to speed things up
7826	486					773	$last_tok_is_blank = undef;
7827
7828	486		100			1916	$in_prototype_or_signature =
7829							$container_type && $container_type =~ /^sub\b/;
7830
7831							# these flags will be used to help figure out the type:
7832	486					819	$saw_alpha = undef;
7833	486					783	$saw_type = undef;
7834
7835							# allow old package separator (') except in 'use' statement
7836	486					932	$allow_tick = ( $last_nonblank_token ne 'use' );
7837	486					904	return;
7838							} ## end sub initialize_my_scan_id_vars
7839
7840							#----------------------------------
7841							# Routines for handling scan states
7842							#----------------------------------
7843							sub do_id_scan_state_dollar {
7844
7845	514			514	0	956	my $self = shift;
7846
7847							# We saw a sigil, now looking to start a variable name
7848	514	100	66			4617	if ( $tok eq '$' ) {
		100	33
		100
		50
		50
		100
		100
		100
		100
7849
7850	31					159	$identifier .= $tok;
7851
7852							# we've got a punctuation variable if end of line (punct.t)
7853	31	50				194	if ( $i == $max_token_index ) {
7854	0					0	$type = 'i';
7855	0					0	$id_scan_state = EMPTY_STRING;
7856							}
7857							}
7858							elsif ( $tok =~ /^\w/ ) { # alphanumeric ..
7859	253					541	$saw_alpha = 1;
7860	253					530	$identifier .= $tok;
7861
7862							# now need :: except for special digit vars like '$1' (c208)
7863	253	100				900	$id_scan_state = $tok =~ /^\d/ ? EMPTY_STRING : $scan_state_COLON;
7864							}
7865							elsif ( $tok eq '::' ) {
7866	16					55	$id_scan_state = $scan_state_ALPHA;
7867	16					46	$identifier .= $tok;
7868							}
7869
7870							# POSTDEFREF ->@ ->% ->& ->*
7871							elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
7872	0					0	$identifier .= $tok;
7873							}
7874							elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
7875	0					0	$saw_alpha = 1;
7876	0					0	$id_scan_state = $scan_state_COLON; # now need ::
7877	0					0	$identifier .= $tok;
7878
7879							# Perl will accept leading digits in identifiers,
7880							# although they may not always produce useful results.
7881							# Something like $main::0 is ok. But this also works:
7882							#
7883							# sub howdy::123::bubba{ print "bubba $54321!\n" }
7884							# howdy::123::bubba();
7885							#
7886							}
7887							elsif ( $tok eq '#' ) {
7888
7889	99					221	my $is_punct_var = $identifier eq '$$';
7890
7891							# side comment or identifier?
7892	99	100	66			1099	if (
			66
			66
			33
7893
7894							# A '#' starts a comment if it follows a space. For example,
7895							# the following is equivalent to $ans=40.
7896							# my $ #
7897							# ans = 40;
7898							!$last_tok_is_blank
7899
7900							# a # inside a prototype or signature can only start a
7901							# comment
7902							&& !$in_prototype_or_signature
7903
7904							# these are valid punctuation vars: *# %# @# $#
7905							# May also be '$#array' or POSTDEFREF ->$#
7906							&& ( $identifier =~ /^[\%\@\$\*]$/
7907							\|\| $identifier =~ /\$$/ )
7908
7909							# but a '#' after '$$' is a side comment; see c147
7910							&& !$is_punct_var
7911
7912							)
7913							{
7914	95					260	$identifier .= $tok; # keep same state, a $ could follow
7915							}
7916							else {
7917
7918							# otherwise it is a side comment
7919	4	50				18	if ( $identifier eq '->' ) { }
		50
		50
7920	0					0	elsif ($is_punct_var) { $type = 'i' }
7921	4					8	elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' }
7922	0					0	else { $type = 'i' }
7923	4					9	$i = $i_save;
7924	4					9	$id_scan_state = EMPTY_STRING;
7925							}
7926							}
7927
7928							elsif ( $tok eq '{' ) {
7929
7930							# check for something like ${#} or ${?}, where ? is a special char
7931	38	100	100			529	if (
			66
			100
			100
7932							(
7933							$identifier eq '$'
7934							\|\| $identifier eq '@'
7935							\|\| $identifier eq '$#'
7936							)
7937							&& $i + 2 <= $max_token_index
7938							&& $rtokens->[ $i + 2 ] eq '}'
7939							&& $rtokens->[ $i + 1 ] !~ /[\s\w]/
7940							)
7941							{
7942	1					7	my $next2 = $rtokens->[ $i + 2 ];
7943	1					6	my $next1 = $rtokens->[ $i + 1 ];
7944	1					3	$identifier .= $tok . $next1 . $next2;
7945	1					4	$i += 2;
7946	1					3	$id_scan_state = EMPTY_STRING;
7947							}
7948							else {
7949
7950							# skip something like ${xxx} or ->{
7951	37					93	$id_scan_state = EMPTY_STRING;
7952
7953							# if this is the first token of a line, any tokens for this
7954							# identifier have already been accumulated
7955	37	100	66			170	if ( $identifier eq '$' \|\| $i == 0 ) {
7956	26					56	$identifier = EMPTY_STRING;
7957							}
7958	37					73	$i = $i_save;
7959							}
7960							}
7961
7962							# space ok after leading $ % * & @
7963							elsif ( $tok =~ /^\s*$/ ) {
7964
7965	20					70	$tok_is_blank = 1;
7966
7967							# note: an id with a leading '&' does not actually come this way
7968	20	50				113	if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
		0
7969
7970	20	100				97	if ( length($identifier) > 1 ) {
7971	8					20	$id_scan_state = EMPTY_STRING;
7972	8					23	$i = $i_save;
7973	8					18	$type = 'i'; # probably punctuation variable
7974							}
7975							else {
7976
7977							# fix c139: trim line-ending type 't'
7978	12	100				64	if ( $i == $max_token_index ) {
		100
7979	1					2	$i = $i_save;
7980	1					3	$type = 't';
7981							}
7982
7983							# spaces after $'s are common, and space after @
7984							# is harmless, so only complain about space
7985							# after other type characters. Space after $ and
7986							# @ will be removed in formatting. Report space
7987							# after % and * because they might indicate a
7988							# parsing error. In other words '% ' might be a
7989							# modulo operator. Delete this warning if it
7990							# gets annoying.
7991							elsif ( $identifier !~ /^[\@\$]$/ ) {
7992	1					5	$message =
7993							"Space in identifier, following $identifier\n";
7994							}
7995							else {
7996							## ok: silently accept space after '$' and '@' sigils
7997							}
7998							}
7999							}
8000
8001							elsif ( $identifier eq '->' ) {
8002
8003							# space after '->' is ok except at line end ..
8004							# so trim line-ending in type '->' (fixes c139)
8005	0	0				0	if ( $i == $max_token_index ) {
8006	0					0	$i = $i_save;
8007	0					0	$type = '->';
8008							}
8009							}
8010
8011							# stop at space after something other than -> or sigil
8012							# Example of what can arrive here:
8013							# eval { $MyClass->$$ };
8014							else {
8015	0					0	$id_scan_state = EMPTY_STRING;
8016	0					0	$i = $i_save;
8017	0					0	$type = 'i';
8018							}
8019							}
8020							elsif ( $tok eq '^' ) {
8021
8022							# check for some special variables like $^ $^W
8023	11	50				46	if ( $identifier =~ /^[\$\*\@\%]$/ ) {
8024	11					31	$identifier .= $tok;
8025	11					28	$type = 'i';
8026
8027							# There may be one more character, not a space, after the ^
8028	11					26	my $next1 = $rtokens->[ $i + 1 ];
8029	11					32	my $chr = substr( $next1, 0, 1 );
8030	11	100				81	if ( $is_special_variable_char{$chr} ) {
8031
8032							# It is something like $^W
8033							# Test case (c066) : $^Oeq'linux'
8034	9					19	$i++;
8035	9					16	$identifier .= $next1;
8036
8037							# If pretoken $next1 is more than one character long,
8038							# set a flag indicating that it needs to be split.
8039	9	100				37	$id_scan_state =
8040							( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
8041							}
8042							else {
8043
8044							# it is just $^
8045							# Simple test case (c065): '$aa=$^if($bb)';
8046	2					9	$id_scan_state = EMPTY_STRING;
8047							}
8048							}
8049							else {
8050	0					0	$id_scan_state = EMPTY_STRING;
8051	0					0	$i = $i_save;
8052							}
8053							}
8054							else { # something else
8055
8056	46	100	66			438	if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) {
		100	66
		100
		50
		0
		0
8057
8058							# We might be in an extrusion of
8059							# sub foo2 ( $first, $, $third ) {
8060							# looking at a line starting with a comma, like
8061							# $
8062							# ,
8063							# in this case the comma ends the signature variable
8064							# '$' which will have been previously marked type 't'
8065							# rather than 'i'.
8066	3	100				10	if ( $i == $i_begin ) {
8067	1					3	$identifier = EMPTY_STRING;
8068	1					4	$type = EMPTY_STRING;
8069							}
8070
8071							# at a # we have to mark as type 't' because more may
8072							# follow, otherwise, in a signature we can let '$' be an
8073							# identifier here for better formatting.
8074							# See 'mangle4.in' for a test case.
8075							else {
8076	2					5	$type = 'i';
8077	2	50	33			14	if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) {
8078	0					0	$type = 't';
8079							}
8080	2					3	$i = $i_save;
8081							}
8082	3					6	$id_scan_state = EMPTY_STRING;
8083							}
8084
8085							# check for various punctuation variables
8086							elsif ( $identifier =~ /^[\$\*\@\%]$/ ) {
8087	35					114	$identifier .= $tok;
8088							}
8089
8090							# POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
8091							elsif ($tok eq '*'
8092							&& $identifier =~ /\-\>([\@\%\$\*\&]\|\$\#)$/ )
8093							{
8094	6					14	$identifier .= $tok;
8095							}
8096
8097							elsif ( $identifier eq '$#' ) {
8098
8099	2	50				12	if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
	0	50				0
	0					0
8100
8101							# perl seems to allow just these: $#: $#- $#+
8102							elsif ( $tok =~ /^[\:\-\+]$/ ) {
8103	0					0	$type = 'i';
8104	0					0	$identifier .= $tok;
8105							}
8106							else {
8107	2					5	$i = $i_save;
8108	2					55	$self->write_logfile_entry(
8109							'Use of $# is deprecated' . "\n" );
8110							}
8111							}
8112							elsif ( $identifier eq '$$' ) {
8113
8114							# perl does not allow references to punctuation
8115							# variables without braces. For example, this
8116							# won't work:
8117							# $:=\4;
8118							# $a = $$:;
8119							# You would have to use
8120							# $a = ${$:};
8121
8122							# '$$' alone is punctuation variable for PID
8123	0					0	$i = $i_save;
8124	0	0				0	if ( $tok eq '{' ) { $type = 't' }
	0					0
8125	0					0	else { $type = 'i' }
8126							}
8127							elsif ( $identifier eq '->' ) {
8128	0					0	$i = $i_save;
8129							}
8130							else {
8131	0					0	$i = $i_save;
8132	0	0				0	if ( length($identifier) == 1 ) {
8133	0					0	$identifier = EMPTY_STRING;
8134							}
8135							}
8136	46					118	$id_scan_state = EMPTY_STRING;
8137							}
8138	514					948	return;
8139							} ## end sub do_id_scan_state_dollar
8140
8141							sub do_id_scan_state_alpha {
8142
8143	113			113	0	241	my $self = shift;
8144
8145							# looking for alphanumeric after ::
8146	113					438	$tok_is_blank = $tok =~ /^\s*$/;
8147
8148	113	100	33			478	if ( $tok =~ /^\w/ ) { # found it
		50	66
		50	33
		50
8149	100					198	$identifier .= $tok;
8150	100					189	$id_scan_state = $scan_state_COLON; # now need ::
8151	100					162	$saw_alpha = 1;
8152							}
8153							elsif ( $tok eq "'" && $allow_tick ) {
8154	0					0	$identifier .= $tok;
8155	0					0	$id_scan_state = $scan_state_COLON; # now need ::
8156	0					0	$saw_alpha = 1;
8157							}
8158							elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
8159	0					0	$id_scan_state = $scan_state_LPAREN;
8160	0					0	$identifier .= $tok;
8161							}
8162							elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
8163	0					0	$id_scan_state = $scan_state_RPAREN;
8164	0					0	$identifier .= $tok;
8165							}
8166							else {
8167	13					26	$id_scan_state = EMPTY_STRING;
8168	13					28	$i = $i_save;
8169							}
8170	113					197	return;
8171							} ## end sub do_id_scan_state_alpha
8172
8173							sub do_id_scan_state_colon {
8174
8175	434			434	0	874	my $self = shift;
8176
8177							# looking for possible :: after alphanumeric
8178
8179	434					1592	$tok_is_blank = $tok =~ /^\s*$/;
8180
8181	434	100	66			3552	if ( $tok eq '::' ) { # got it
		100	66
		100	66
		50
		50
8182	97					185	$identifier .= $tok;
8183	97					182	$id_scan_state = $scan_state_ALPHA; # now require alpha
8184							}
8185							elsif ( $tok =~ /^\w/ ) { # more alphanumeric is ok here
8186	20					47	$identifier .= $tok;
8187	20					46	$id_scan_state = $scan_state_COLON; # now need ::
8188	20					41	$saw_alpha = 1;
8189							}
8190							elsif ( $tok eq "'" && $allow_tick ) { # tick
8191
8192	12	50				29	if ( $is_keyword{$identifier} ) {
8193	0					0	$id_scan_state = EMPTY_STRING; # that's all
8194	0					0	$i = $i_save;
8195							}
8196							else {
8197	12					19	$identifier .= $tok;
8198							}
8199							}
8200							elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
8201	0					0	$id_scan_state = $scan_state_LPAREN;
8202	0					0	$identifier .= $tok;
8203							}
8204							elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
8205	0					0	$id_scan_state = $scan_state_RPAREN;
8206	0					0	$identifier .= $tok;
8207							}
8208							else {
8209	305					557	$id_scan_state = EMPTY_STRING; # that's all
8210	305					533	$i = $i_save;
8211							}
8212	434					709	return;
8213							} ## end sub do_id_scan_state_colon
8214
8215							sub do_id_scan_state_left_paren {
8216
8217	0			0	0	0	my $self = shift;
8218
8219							# looking for possible '(' of a prototype
8220
8221	0	0				0	if ( $tok eq '(' ) { # got it
		0
8222	0					0	$identifier .= $tok;
8223	0					0	$id_scan_state = $scan_state_RPAREN; # now find the end of it
8224							}
8225							elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
8226	0					0	$identifier .= $tok;
8227	0					0	$tok_is_blank = 1;
8228							}
8229							else {
8230	0					0	$id_scan_state = EMPTY_STRING; # that's all - no prototype
8231	0					0	$i = $i_save;
8232							}
8233	0					0	return;
8234							} ## end sub do_id_scan_state_left_paren
8235
8236							sub do_id_scan_state_right_paren {
8237
8238	0			0	0	0	my $self = shift;
8239
8240							# looking for a ')' of prototype to close a '('
8241
8242	0					0	$tok_is_blank = $tok =~ /^\s*$/;
8243
8244	0	0				0	if ( $tok eq ')' ) { # got it
		0
8245	0					0	$identifier .= $tok;
8246	0					0	$id_scan_state = EMPTY_STRING; # all done
8247							}
8248							elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
8249	0					0	$identifier .= $tok;
8250							}
8251							else { # probable error in script, but keep going
8252	0					0	warning("Unexpected '$tok' while seeking end of prototype\n");
8253	0					0	$identifier .= $tok;
8254							}
8255	0					0	return;
8256							} ## end sub do_id_scan_state_right_paren
8257
8258							sub do_id_scan_state_ampersand {
8259
8260	105			105	0	297	my $self = shift;
8261
8262							# Starting sub call after seeing an '&'
8263
8264	105	100	33			686	if ( $tok =~ /^[\$\w]/ ) { # alphanumeric ..
		50
		100
		50
		50
		0
8265	88					229	$id_scan_state = $scan_state_COLON; # now need ::
8266	88					172	$saw_alpha = 1;
8267	88					245	$identifier .= $tok;
8268							}
8269							elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
8270	0					0	$id_scan_state = $scan_state_COLON; # now need ::
8271	0					0	$saw_alpha = 1;
8272	0					0	$identifier .= $tok;
8273							}
8274							elsif ( $tok =~ /^\s*$/ ) { # allow space
8275	2					4	$tok_is_blank = 1;
8276
8277							# fix c139: trim line-ending type 't'
8278	2	50	33			11	if ( length($identifier) == 1 && $i == $max_token_index ) {
8279	2					4	$i = $i_save;
8280	2					4	$type = 't';
8281							}
8282							}
8283							elsif ( $tok eq '::' ) { # leading ::
8284	0					0	$id_scan_state = $scan_state_ALPHA; # accept alpha next
8285	0					0	$identifier .= $tok;
8286							}
8287							elsif ( $tok eq '{' ) {
8288	15	50	33			75	if ( $identifier eq '&' \|\| $i == 0 ) {
8289	15					35	$identifier = EMPTY_STRING;
8290							}
8291	15					33	$i = $i_save;
8292	15					34	$id_scan_state = EMPTY_STRING;
8293							}
8294							elsif ( $tok eq '^' ) {
8295	0	0				0	if ( $identifier eq '&' ) {
8296
8297							# Special variable (c066)
8298	0					0	$identifier .= $tok;
8299	0					0	$type = '&';
8300
8301							# There may be one more character, not a space, after the ^
8302	0					0	my $next1 = $rtokens->[ $i + 1 ];
8303	0					0	my $chr = substr( $next1, 0, 1 );
8304	0	0				0	if ( $is_special_variable_char{$chr} ) {
8305
8306							# It is something like &^O
8307	0					0	$i++;
8308	0					0	$identifier .= $next1;
8309
8310							# If pretoken $next1 is more than one character long,
8311							# set a flag indicating that it needs to be split.
8312	0	0				0	$id_scan_state =
8313							( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
8314							}
8315							else {
8316
8317							# it is &^
8318	0					0	$id_scan_state = EMPTY_STRING;
8319							}
8320							}
8321							else {
8322	0					0	$identifier = EMPTY_STRING;
8323	0					0	$i = $i_save;
8324							}
8325							}
8326							else {
8327
8328							# punctuation variable?
8329							# testfile: cunningham4.pl
8330							#
8331							# We have to be careful here. If we are in an unknown state,
8332							# we will reject the punctuation variable. In the following
8333							# example the '&' is a binary operator but we are in an unknown
8334							# state because there is no sigil on 'Prima', so we don't
8335							# know what it is. But it is a bad guess that
8336							# '&~' is a function variable.
8337							# $self->{text}->{colorMap}->[
8338							# Prima::PodView::COLOR_CODE_FOREGROUND
8339							# & ~tb::COLOR_INDEX ] =
8340							# $sec->{ColorCode}
8341
8342							# Fix for case c033: a '#' here starts a side comment
8343	0	0	0			0	if ( $identifier eq '&' && $expecting && $tok ne '#' ) {
			0
8344	0					0	$identifier .= $tok;
8345							}
8346							else {
8347	0					0	$identifier = EMPTY_STRING;
8348	0					0	$i = $i_save;
8349	0					0	$type = '&';
8350							}
8351	0					0	$id_scan_state = EMPTY_STRING;
8352							}
8353	105					221	return;
8354							} ## end sub do_id_scan_state_ampersand
8355
8356							#-------------------
8357							# hash of scanner subs
8358							#-------------------
8359							my $scan_identifier_code = {
8360							$scan_state_SIGIL => \&do_id_scan_state_dollar,
8361							$scan_state_ALPHA => \&do_id_scan_state_alpha,
8362							$scan_state_COLON => \&do_id_scan_state_colon,
8363							$scan_state_LPAREN => \&do_id_scan_state_left_paren,
8364							$scan_state_RPAREN => \&do_id_scan_state_right_paren,
8365							$scan_state_AMPERSAND => \&do_id_scan_state_ampersand,
8366							};
8367
8368							sub scan_complex_identifier {
8369
8370							# This routine assembles tokens into identifiers. It maintains a
8371							# scan state, id_scan_state. It updates id_scan_state based upon
8372							# current id_scan_state and token, and returns an updated
8373							# id_scan_state and the next index after the identifier.
8374
8375							# This routine now serves a a backup for sub scan_simple_identifier
8376							# which handles most identifiers.
8377
8378							# Note that $self must be a 'my' variable and not be a closure
8379							# variables like the other args. Otherwise it will not get
8380							# deleted by a DESTROY call at the end of a file. Then an
8381							# attempt to create multiple tokenizers can occur when multiple
8382							# files are processed, causing an error.
8383
8384							(
8385	486			486	0	1755	my $self, $i, $id_scan_state, $identifier, $rtokens,
8386							$max_token_index, $expecting, $container_type
8387							) = @_;
8388
8389							# return flag telling caller to split the pretoken
8390	486					2543	my $split_pretoken_flag;
8391
8392							#-------------------
8393							# Initialize my vars
8394							#-------------------
8395
8396	486					1676	initialize_my_scan_id_vars();
8397
8398							#--------------------------------------------------------
8399							# get started by defining a type and a state if necessary
8400							#--------------------------------------------------------
8401
8402	486	100				1560	if ( !$id_scan_state ) {
8403	479					861	$context = UNKNOWN_CONTEXT;
8404
8405							# fixup for digraph
8406	479	50				1369	if ( $tok eq '>' ) {
8407	0					0	$tok = '->';
8408	0					0	$tok_begin = $tok;
8409							}
8410	479					861	$identifier = $tok;
8411
8412	479	100	100			3211	if ( $last_nonblank_token eq '->' ) {
		100	100
		100	0
		50
		0
		0
		0
		0
8413	6					14	$identifier = '->' . $identifier;
8414	6					13	$id_scan_state = $scan_state_SIGIL;
8415							}
8416							elsif ( $tok eq '$' \|\| $tok eq '*' ) {
8417	293					560	$id_scan_state = $scan_state_SIGIL;
8418	293					507	$context = SCALAR_CONTEXT;
8419							}
8420							elsif ( $tok eq '%' \|\| $tok eq '@' ) {
8421	77					185	$id_scan_state = $scan_state_SIGIL;
8422	77					149	$context = LIST_CONTEXT;
8423							}
8424							elsif ( $tok eq '&' ) {
8425	103					231	$id_scan_state = $scan_state_AMPERSAND;
8426							}
8427							elsif ( $tok eq 'sub' or $tok eq 'package' ) {
8428	0					0	$saw_alpha = 0; # 'sub' is considered type info here
8429	0					0	$id_scan_state = $scan_state_SIGIL;
8430	0					0	$identifier .=
8431							SPACE; # need a space to separate sub from sub name
8432							}
8433							elsif ( $tok eq '::' ) {
8434	0					0	$id_scan_state = $scan_state_ALPHA;
8435							}
8436							elsif ( $tok =~ /^\w/ ) {
8437	0					0	$id_scan_state = $scan_state_COLON;
8438	0					0	$saw_alpha = 1;
8439							}
8440							elsif ( $tok eq '->' ) {
8441	0					0	$id_scan_state = $scan_state_SIGIL;
8442							}
8443							else {
8444
8445							# shouldn't happen: bad call parameter
8446	0					0	my $msg =
8447							"Program bug detected: scan_complex_identifier received bad starting token = '$tok'\n";
8448	0					0	if (DEVEL_MODE) { $self->Fault($msg) }
8449	0	0				0	if ( !$self->[_in_error_] ) {
8450	0					0	warning($msg);
8451	0					0	$self->[_in_error_] = 1;
8452							}
8453	0					0	$id_scan_state = EMPTY_STRING;
8454
8455							# emergency return
8456	0					0	goto RETURN;
8457							}
8458	479					960	$saw_type = !$saw_alpha;
8459							}
8460							else {
8461	7					15	$i--;
8462	7					33	$saw_alpha = ( $tok =~ /^\w/ );
8463	7					21	$saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
8464
8465							# check for a valid starting state
8466	7					12	if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) {
8467							$self->Fault(<<EOM);
8468							Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state'
8469							EOM
8470							}
8471							}
8472
8473							#------------------------------
8474							# loop to gather the identifier
8475							#------------------------------
8476
8477	486					868	$i_save = $i;
8478
8479	486		100			2159	while ( $i < $max_token_index && $id_scan_state ) {
8480
8481							# Be sure we have code to handle this state before we proceed
8482	1169					2427	my $code = $scan_identifier_code->{$id_scan_state};
8483	1169	100				2474	if ( !$code ) {
8484
8485	3	50				16	if ( $id_scan_state eq $scan_state_SPLIT ) {
8486							## OK: this is the signal to exit and split the pretoken
8487							}
8488
8489							# unknown state - should not happen
8490							else {
8491	0					0	if (DEVEL_MODE) {
8492							$self->Fault(<<EOM);
8493							Unknown scan state in sub scan_complex_identifier: '$id_scan_state'
8494							Scan state at sub entry was '$id_scan_state_begin'
8495							EOM
8496							}
8497	0					0	$id_scan_state = EMPTY_STRING;
8498	0					0	$i = $i_save;
8499							}
8500	3					7	last;
8501							}
8502
8503							# Remember the starting index for progress check below
8504	1166					2684	my $i_start_loop = $i;
8505
8506	1166					1749	$last_tok_is_blank = $tok_is_blank;
8507	1166	100				2049	if ($tok_is_blank) { $tok_is_blank = undef }
	11					25
8508	1155					1594	else { $i_save = $i }
8509
8510	1166					2131	$tok = $rtokens->[ ++$i ];
8511
8512							# patch to make digraph :: if necessary
8513	1166	100	100			3165	if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
8514	113					237	$tok = '::';
8515	113					193	$i++;
8516							}
8517
8518	1166					3484	$code->($self);
8519
8520							# check for forward progress: a decrease in the index $i
8521							# implies that scanning has finished
8522	1166	100				4026	last if ( $i <= $i_start_loop );
8523
8524							} ## end of main loop
8525
8526							#-------------
8527							# Check result
8528							#-------------
8529
8530							# Be sure a valid state is returned
8531	486	100				1351	if ($id_scan_state) {
8532
8533	20	100				73	if ( !$is_returnable_scan_state{$id_scan_state} ) {
8534
8535	13	100				60	if ( $id_scan_state eq $scan_state_SPLIT ) {
8536	3					7	$split_pretoken_flag = 1;
8537							}
8538
8539	13	50				50	if ( $id_scan_state eq $scan_state_RPAREN ) {
8540	0					0	warning(
8541							"Hit end of line while seeking ) to end prototype\n");
8542							}
8543
8544	13					29	$id_scan_state = EMPTY_STRING;
8545							}
8546
8547							# Patch: the deprecated variable $# does not combine with anything
8548							# on the next line.
8549	20	50				64	if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING }
	0					0
8550							}
8551
8552							# Be sure the token index is valid
8553	486	50				1338	if ( $i < 0 ) { $i = 0 }
	0					0
8554
8555							# Be sure a token type is defined
8556	486	100				1244	if ( !$type ) {
8557
8558	458	100				1154	if ($saw_type) {
		100
8559
8560	452	100	33			2015	if ($saw_alpha) {
		50	66
		100	66
			33
8561
8562							# The type without the -> should be the same as with the -> so
8563							# that if they get separated we get the same bond strengths,
8564							# etc. See b1234
8565	348	50	33			1399	if ( $identifier =~ /^->/
			33
8566							&& $last_nonblank_type eq 'w'
8567							&& substr( $identifier, 2, 1 ) =~ /^\w/ )
8568							{
8569	0					0	$type = 'w';
8570							}
8571	348					701	else { $type = 'i' }
8572							}
8573							elsif ( $identifier eq '->' ) {
8574	0					0	$type = '->';
8575							}
8576							elsif (
8577							( length($identifier) > 1 )
8578
8579							# In something like '@$=' we have an identifier '@$'
8580							# In something like '$${' we have type '$$' (and only
8581							# part of an identifier)
8582							&& !( $identifier =~ /\$$/ && $tok eq '{' )
8583
8584							## && ( $identifier !~ /^(sub \|package )$/ )
8585							&& $identifier ne 'sub '
8586							&& $identifier ne 'package '
8587							)
8588							{
8589	53					153	$type = 'i';
8590							}
8591	51					113	else { $type = 't' }
8592							}
8593							elsif ($saw_alpha) {
8594
8595							# type 'w' includes anything without leading type info
8596							# ($,%,@,*) including something like abc::def::ghi
8597	5					13	$type = 'w';
8598
8599							# Fix for b1337, if restarting scan after line break between
8600							# '->' or sigil and identifier name, use type 'i'
8601	5	50	33			31	if ( $id_scan_state_begin
8602							&& $identifier =~ /^([\$\%\@\*\&]\|->)/ )
8603							{
8604	5					12	$type = 'i';
8605							}
8606							}
8607							else {
8608	1					3	$type = EMPTY_STRING;
8609							} # this can happen on a restart
8610							}
8611
8612							# See if we formed an identifier...
8613	486	100				1192	if ($identifier) {
8614	444					832	$tok = $identifier;
8615	444	100				1117	if ($message) { $self->write_logfile_entry($message) }
	1					5
8616							}
8617
8618							# did not find an identifier, back up
8619							else {
8620	42					83	$tok = $tok_begin;
8621	42					69	$i = $i_begin;
8622							}
8623
8624							RETURN:
8625
8626	486					736	DEBUG_SCAN_ID && do {
8627							my ( $a, $b, $c ) = caller;
8628							print {*STDOUT}
8629							"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
8630							print {*STDOUT}
8631							"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
8632							};
8633	486					2313	return ( $i, $tok, $type, $id_scan_state, $identifier,
8634							$split_pretoken_flag );
8635							} ## end sub scan_complex_identifier
8636							} ## end closure for sub scan_complex_identifier
8637
8638							{ ## closure for sub do_scan_sub
8639
8640							my %warn_if_lexical;
8641
8642							BEGIN {
8643
8644							# lexical subs with these names can cause parsing errors in this version
8645	39			39		297	my @q = qw( m q qq qr qw qx s tr y );
8646	39					3456	@{warn_if_lexical}{@q} = (1) x scalar(@q);
8647							} ## end BEGIN
8648
8649							# saved package and subnames in case prototype is on separate line
8650							my ( $package_saved, $subname_saved );
8651
8652							# initialize subname each time a new 'sub' keyword is encountered
8653							sub initialize_subname {
8654	296			296	0	650	$package_saved = EMPTY_STRING;
8655	296					614	$subname_saved = EMPTY_STRING;
8656	296					551	return;
8657							}
8658
8659							use constant {
8660	39					91828	SUB_CALL => 1,
8661							PAREN_CALL => 2,
8662							PROTOTYPE_CALL => 3,
8663	39			39		352	};
	39					150
8664
8665							sub do_scan_sub {
8666
8667							# do_scan_sub parses a sub name and prototype.
8668
8669							# At present there are three basic CALL TYPES which are
8670							# distinguished by the starting value of '$tok':
8671							# 1. $tok='sub', id_scan_state='sub'
8672							# it is called with $i_beg equal to the index of the first nonblank
8673							# token following a 'sub' token.
8674							# 2. $tok='(', id_scan_state='sub',
8675							# it is called with $i_beg equal to the index of a '(' which may
8676							# start a prototype.
8677							# 3. $tok='prototype', id_scan_state='prototype'
8678							# it is called with $i_beg equal to the index of a '(' which is
8679							# preceded by ': prototype' and has $id_scan_state eq 'prototype'
8680
8681							# Examples:
8682
8683							# A single type 1 call will get both the sub and prototype
8684							# sub foo1 ( $$ ) { }
8685							# ^
8686
8687							# The subname will be obtained with a 'sub' call
8688							# The prototype on line 2 will be obtained with a '(' call
8689							# sub foo1
8690							# ^ <---call type 1
8691							# ( $$ ) { }
8692							# ^ <---call type 2
8693
8694							# The subname will be obtained with a 'sub' call
8695							# The prototype will be obtained with a 'prototype' call
8696							# sub foo1 ( $x, $y ) : prototype ( $$ ) { }
8697							# ^ <---type 1 ^ <---type 3
8698
8699							# TODO: add future error checks to be sure we have a valid
8700							# sub name. For example, 'sub &doit' is wrong. Also, be sure
8701							# a name is given if and only if a non-anonymous sub is
8702							# appropriate.
8703							# USES GLOBAL VARS: $current_package, $last_nonblank_token,
8704							# $rsaw_function_definition,
8705							# $statement_type
8706
8707	302			302	0	813	my ( $self, $rinput_hash ) = @_;
8708
8709	302					754	my $input_line = $rinput_hash->{input_line};
8710	302					698	my $i = $rinput_hash->{i};
8711	302					617	my $i_beg = $rinput_hash->{i_beg};
8712	302					647	my $tok = $rinput_hash->{tok};
8713	302					670	my $type = $rinput_hash->{type};
8714	302					591	my $rtokens = $rinput_hash->{rtokens};
8715	302					569	my $rtoken_map = $rinput_hash->{rtoken_map};
8716	302					581	my $id_scan_state = $rinput_hash->{id_scan_state};
8717	302					581	my $max_token_index = $rinput_hash->{max_token_index};
8718
8719	302					526	my $i_entry = $i;
8720
8721							# Determine the CALL TYPE
8722							# 1=sub
8723							# 2=(
8724							# 3=prototype
8725	302	100				1095	my $call_type =
		100
8726							$tok eq 'prototype' ? PROTOTYPE_CALL
8727							: $tok eq '(' ? PAREN_CALL
8728							: SUB_CALL;
8729
8730	302					1713	$id_scan_state = EMPTY_STRING; # normally we get everything in one call
8731	302					600	my $subname = $subname_saved;
8732	302					551	my $package = $package_saved;
8733	302					642	my $proto = undef;
8734	302					585	my $attrs = undef;
8735	302					529	my $match;
8736
8737	302					611	my $pos_beg = $rtoken_map->[$i_beg];
8738	302					1020	pos($input_line) = $pos_beg;
8739
8740							# Look for the sub NAME if this is a SUB call
8741	302	100	100			2812	if (
8742							$call_type == SUB_CALL
8743							&& $input_line =~ m{\G\s*
8744							((?:\w(?:'\|::))) # package - something that ends in :: or '
8745							(\w+) # NAME - required
8746							}gcx
8747							)
8748							{
8749	123					305	$match = 1;
8750	123					339	$subname = $2;
8751
8752	123		33			546	my $is_lexical_sub =
8753							$last_nonblank_type eq 'k' && $last_nonblank_token eq 'my';
8754	123	0	33			436	if ( $is_lexical_sub && $1 ) {
8755	0					0	$self->warning("'my' sub $subname cannot be in package '$1'\n");
8756	0					0	$is_lexical_sub = 0;
8757							}
8758
8759	123	50				398	if ($is_lexical_sub) {
8760
8761							# lexical subs use the block sequence number as a package name
8762	0					0	my $seqno =
8763							$rcurrent_sequence_number->[BRACE]
8764							[ $rcurrent_depth->[BRACE] ];
8765	0	0				0	$seqno = 1 if ( !defined($seqno) );
8766	0					0	$package = $seqno;
8767	0	0				0	if ( $warn_if_lexical{$subname} ) {
8768	0					0	$self->warning(
8769							"'my' sub '$subname' matches a builtin name and may not be handled correctly in this perltidy version.\n"
8770							);
8771
8772							# This may end badly, it is safest to block formatting
8773							# For an example, see perl527/lexsub.t (issue c203)
8774	0					0	$self->[_in_trouble_] = 1;
8775							}
8776							}
8777							else {
8778	123	100	66			841	$package = ( defined($1) && $1 ) ? $1 : $current_package;
8779	123					437	$package =~ s/\'/::/g;
8780	123	50				466	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	0					0
8781	123					356	$package =~ s/::$//;
8782							}
8783
8784	123					308	my $pos = pos($input_line);
8785	123					284	my $numc = $pos - $pos_beg;
8786	123					426	$tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
8787	123					270	$type = 'S'; ## Fix for c250, was 'i';
8788
8789							# remember the sub name in case another call is needed to
8790							# get the prototype
8791	123					272	$package_saved = $package;
8792	123					303	$subname_saved = $subname;
8793							}
8794
8795							# Now look for PROTO ATTRS for all call types
8796							# Look for prototype/attributes which are usually on the same
8797							# line as the sub name but which might be on a separate line.
8798							# For example, we might have an anonymous sub with attributes,
8799							# or a prototype on a separate line from its sub name
8800
8801							# NOTE: We only want to parse PROTOTYPES here. If we see anything that
8802							# does not look like a prototype, we assume it is a SIGNATURE and we
8803							# will stop and let the the standard tokenizer handle it. In
8804							# particular, we stop if we see any nested parens, braces, or commas.
8805							# Also note, a valid prototype cannot contain any alphabetic character
8806							# -- see https://perldoc.perl.org/perlsub
8807							# But it appears that an underscore is valid in a prototype, so the
8808							# regex below uses [A-Za-z] rather than \w
8809							# This is the old regex which has been replaced:
8810							# $input_line =~ m/\G(\s$[^$$\}\{\,#]$)? # PROTO
8811	302					1132	my $saw_opening_paren = $input_line =~ /\G\s*\(/;
8812	302	100	100			3075	if (
			66
8813							$input_line =~ m{\G(\s$[^$$\}\{\,#A-Za-z]$)? # PROTO
8814							(\s*:)? # ATTRS leading ':'
8815							}gcx
8816							&& ( $1 \|\| $2 )
8817							)
8818							{
8819	45					148	$proto = $1;
8820	45					111	$attrs = $2;
8821
8822							# Append the prototype to the starting token if it is 'sub' or
8823							# 'prototype'. This is not necessary but for compatibility with
8824							# previous versions when the -csc flag is used:
8825	45	100	100			299	if ( $proto && ( $match \|\| $call_type == PROTOTYPE_CALL ) ) {
		100	100
8826	24					63	$tok .= $proto;
8827							}
8828
8829							# If we just entered the sub at an opening paren on this call, not
8830							# a following :prototype, label it with the previous token. This is
8831							# necessary to propagate the sub name to its opening block.
8832							elsif ( $call_type == PAREN_CALL ) {
8833	2					6	$tok = $last_nonblank_token;
8834							}
8835							else {
8836							}
8837
8838	45		100			164	$match \|\|= 1;
8839
8840							# Patch part #1 to fixes cases b994 and b1053:
8841							# Mark an anonymous sub keyword without prototype as type 'k', i.e.
8842							# 'sub : lvalue { ...'
8843	45					89	$type = 'S'; ## C250, was 'i';
8844	45	100	100			205	if ( $tok eq 'sub' && !$proto ) { $type = 'k' }
	2					4
8845							}
8846
8847	302	100				937	if ($match) {
8848
8849							# ATTRS: if there are attributes, back up and let the ':' be
8850							# found later by the scanner.
8851	138					319	my $pos = pos($input_line);
8852	138	100				447	if ($attrs) {
8853	15					31	$pos -= length($attrs);
8854							}
8855
8856	138					326	my $next_nonblank_token = $tok;
8857
8858							# catch case of line with leading ATTR ':' after anonymous sub
8859	138	100	100			605	if ( $pos == $pos_beg && $tok eq ':' ) {
8860	1					2	$type = 'A';
8861	1					4	$self->[_in_attribute_list_] = 1;
8862							}
8863
8864							# Otherwise, if we found a match we must convert back from
8865							# string position to the pre_token index for continued parsing.
8866							else {
8867
8868							# I don't think an error flag can occur here ..but ?
8869	137					275	my $error;
8870	137					636	( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
8871							$max_token_index );
8872	137	50				502	if ($error) { $self->warning("Possibly invalid sub\n") }
	0					0
8873
8874							# Patch part #2 to fixes cases b994 and b1053:
8875							# Do not let spaces be part of the token of an anonymous sub
8876							# keyword which we marked as type 'k' above...i.e. for
8877							# something like:
8878							# 'sub : lvalue { ...'
8879							# Back up and let it be parsed as a blank
8880	137	50	66			635	if ( $type eq 'k'
			66
			33
8881							&& $attrs
8882							&& $i > $i_entry
8883							&& substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ )
8884							{
8885	2					4	$i--;
8886							}
8887
8888							# check for multiple definitions of a sub
8889	137					404	( $next_nonblank_token, my $i_next ) =
8890							find_next_nonblank_token_on_this_line( $i, $rtokens,
8891							$max_token_index );
8892							}
8893
8894	138	100				812	if ( $next_nonblank_token =~ /^(\s*\|#)$/ )
8895							{ # skip blank or side comment
8896	7					81	my ( $rpre_tokens, $rpre_types ) =
8897							$self->peek_ahead_for_n_nonblank_pre_tokens(1);
8898	7	50	33			42	if ( defined($rpre_tokens) && @{$rpre_tokens} ) {
	7					30
8899	7					29	$next_nonblank_token = $rpre_tokens->[0];
8900							}
8901							else {
8902	0					0	$next_nonblank_token = '}';
8903							}
8904							}
8905
8906							# See what's next...
8907	138	100				661	if ( $next_nonblank_token eq '{' ) {
		100
		50
		100
		50
		0
8908	106	100				426	if ($subname) {
8909
8910							# Check for multiple definitions of a sub, but
8911							# it is ok to have multiple sub BEGIN, etc,
8912							# so we do not complain if name is all caps
8913	96	50	33			603	if ( $rsaw_function_definition->{$subname}{$package}
8914							&& $subname !~ /^[A-Z]+$/ )
8915							{
8916							my $lno =
8917	0					0	$rsaw_function_definition->{$subname}{$package};
8918	0	0				0	if ( $package =~ /^\d/ ) {
8919	0					0	$self->warning(
8920							"already saw definition of lexical 'sub $subname' at line $lno\n"
8921							);
8922
8923							}
8924							else {
8925	0					0	if ( !DEVEL_MODE ) {
8926	0					0	$self->warning(
8927							"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
8928							);
8929							}
8930							}
8931							}
8932	96					381	$rsaw_function_definition->{$subname}{$package} =
8933							$self->[_last_line_number_];
8934							}
8935							}
8936							elsif ( $next_nonblank_token eq ';' ) {
8937							}
8938							elsif ( $next_nonblank_token eq '}' ) {
8939							}
8940
8941							# ATTRS - if an attribute list follows, remember the name
8942							# of the sub so the next opening brace can be labeled.
8943							# Setting 'statement_type' causes any ':'s to introduce
8944							# attributes.
8945							elsif ( $next_nonblank_token eq ':' ) {
8946	16	100				52	if ( $call_type == SUB_CALL ) {
8947	14	100				74	$statement_type =
8948							substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
8949							}
8950							}
8951
8952							# if we stopped before an open paren ...
8953							elsif ( $next_nonblank_token eq '(' ) {
8954
8955							# If we DID NOT see this paren above then it must be on the
8956							# next line so we will set a flag to come back here and see if
8957							# it is a PROTOTYPE
8958
8959							# Otherwise, we assume it is a SIGNATURE rather than a
8960							# PROTOTYPE and let the normal tokenizer handle it as a list
8961	15	100				47	if ( !$saw_opening_paren ) {
8962	4					15	$id_scan_state = 'sub'; # we must come back to get proto
8963							}
8964	15	50				64	if ( $call_type == SUB_CALL ) {
8965	15	50				61	$statement_type =
8966							substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
8967							}
8968							}
8969
8970							# something else..
8971							elsif ($next_nonblank_token) {
8972
8973	0	0	0			0	if ( $rinput_hash->{tok} eq 'method' && $call_type == SUB_CALL )
8974							{
8975							# For a method call, silently ignore this error (rt145706)
8976							# to avoid needless warnings. Example which can produce it:
8977							# test(method Pack (), "method");
8978
8979							# TODO: scan for use feature 'class' and:
8980							# - if we saw 'use feature 'class' then issue the warning.
8981							# - if we did not see use feature 'class' then issue the
8982							# warning and suggest turning off --use-feature=class
8983							}
8984							else {
8985	0	0				0	$subname = EMPTY_STRING unless defined($subname);
8986	0					0	$self->warning(
8987							"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
8988							);
8989							}
8990							}
8991
8992							# EOF technically ok
8993							else {
8994							}
8995
8996	138					536	check_prototype( $proto, $package, $subname );
8997							}
8998
8999							# no match to either sub name or prototype, but line not blank
9000							else {
9001
9002							}
9003	302					1658	return ( $i, $tok, $type, $id_scan_state );
9004							} ## end sub do_scan_sub
9005							}
9006
9007							#########################################################################
9008							# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
9009							#########################################################################
9010
9011							sub find_next_nonblank_token {
9012	6166			6166	0	12204	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9013
9014							# Returns the next nonblank token after the token at index $i
9015							# To skip past a side comment, and any subsequent block comments
9016							# and blank lines, call with i=$max_token_index
9017
9018							# Skip any ending blank (fix c258). It would be cleaner if caller passed
9019							# $rtoken_map, so we could check for type 'b', and avoid a regex test, but
9020							# benchmarking shows that this test does not take significant time. So
9021							# that would be a nice update but not essential. Also note that ending
9022							# blanks will not occur for text previously processed by perltidy.
9023	6166	100	100			18973	if ( $i == $max_token_index - 1
9024							&& $rtokens->[$max_token_index] =~ /^\s+$/ )
9025							{
9026	9					31	$i++;
9027							}
9028
9029	6166	100				12845	if ( $i >= $max_token_index ) {
9030	127	100				762	if ( !peeked_ahead() ) {
9031	125					458	peeked_ahead(1);
9032	125					727	$self->peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
9033							}
9034							}
9035
9036	6166					11341	my $next_nonblank_token = $rtokens->[ ++$i ];
9037	6166	50	33			22160	return ( SPACE, $i )
9038							if ( !defined($next_nonblank_token) \|\| !length($next_nonblank_token) );
9039
9040							# Quick test for nonblank ascii char. Note that we just have to
9041							# examine the first character here.
9042	6166					12547	my $ord = ord( substr( $next_nonblank_token, 0, 1 ) );
9043	6166	100	66			24940	if ( $ord >= ORD_PRINTABLE_MIN
		50	33
		0
9044							&& $ord <= ORD_PRINTABLE_MAX )
9045							{
9046	2353					7780	return ( $next_nonblank_token, $i );
9047							}
9048
9049							# Quick test to skip over an ascii space or tab
9050							elsif ( $ord == ORD_SPACE \|\| $ord == ORD_TAB ) {
9051	3813					6795	$next_nonblank_token = $rtokens->[ ++$i ];
9052	3813	50				8084	return ( SPACE, $i ) unless defined($next_nonblank_token);
9053							}
9054
9055							# Slow test to skip over something else identified as whitespace
9056							elsif ( $next_nonblank_token =~ /^\s*$/ ) {
9057	0					0	$next_nonblank_token = $rtokens->[ ++$i ];
9058	0	0				0	return ( SPACE, $i ) unless defined($next_nonblank_token);
9059							}
9060							else {
9061							## at nonblank
9062							}
9063
9064							# We should be at a nonblank now
9065	3813					11494	return ( $next_nonblank_token, $i );
9066							} ## end sub find_next_nonblank_token
9067
9068							sub find_next_noncomment_token {
9069	98			98	0	400	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9070
9071							# Given the current character position, look ahead past any comments
9072							# and blank lines and return the next token, including digraphs and
9073							# trigraphs.
9074
9075	98					350	my ( $next_nonblank_token, $i_next ) =
9076							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
9077
9078							# skip past any side comment
9079	98	50				564	if ( $next_nonblank_token eq '#' ) {
9080	0					0	( $next_nonblank_token, $i_next ) =
9081							$self->find_next_nonblank_token( $i_next, $rtokens,
9082							$max_token_index );
9083							}
9084
9085							# check for a digraph
9086	98	50	33			915	if ( $next_nonblank_token
			33
9087							&& $next_nonblank_token ne SPACE
9088							&& defined( $rtokens->[ $i_next + 1 ] ) )
9089							{
9090	98					303	my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
9091	98	100				419	if ( $is_digraph{$test2} ) {
9092	15					41	$next_nonblank_token = $test2;
9093	15					37	$i_next = $i_next + 1;
9094
9095							# check for a trigraph
9096	15	50				67	if ( defined( $rtokens->[ $i_next + 1 ] ) ) {
9097	15					52	my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
9098	15	50				85	if ( $is_trigraph{$test3} ) {
9099	0					0	$next_nonblank_token = $test3;
9100	0					0	$i_next = $i_next + 1;
9101							}
9102							}
9103							}
9104							}
9105
9106	98					334	return ( $next_nonblank_token, $i_next );
9107							} ## end sub find_next_noncomment_token
9108
9109							sub is_possible_numerator {
9110
9111							# Look at the next non-comment character and decide if it could be a
9112							# numerator. Return
9113							# 1 - yes
9114							# 0 - can't tell
9115							# -1 - no
9116
9117	0			0	0	0	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9118	0					0	my $is_possible_numerator = 0;
9119
9120	0					0	my $next_token = $rtokens->[ $i + 1 ];
9121	0	0				0	if ( $next_token eq '=' ) { $i++; } # handle /=
	0					0
9122	0					0	my ( $next_nonblank_token, $i_next ) =
9123							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
9124
9125	0	0				0	if ( $next_nonblank_token eq '#' ) {
9126	0					0	( $next_nonblank_token, $i_next ) =
9127							$self->find_next_nonblank_token( $max_token_index, $rtokens,
9128							$max_token_index );
9129							}
9130
9131	0	0				0	if ( $next_nonblank_token =~ / [ \( \$ \w \. \@ ] /x ) {
		0
9132	0					0	$is_possible_numerator = 1;
9133							}
9134							elsif ( $next_nonblank_token =~ /^\s*$/ ) {
9135	0					0	$is_possible_numerator = 0;
9136							}
9137							else {
9138	0					0	$is_possible_numerator = -1;
9139							}
9140
9141	0					0	return $is_possible_numerator;
9142							} ## end sub is_possible_numerator
9143
9144							{ ## closure for sub pattern_expected
9145							my %pattern_test;
9146
9147							BEGIN {
9148
9149							# List of tokens which may follow a pattern. Note that we will not
9150							# have formed digraphs at this point, so we will see '&' instead of
9151							# '&&' and '\|' instead of '\|\|'
9152
9153							# /(\)\|\}\|\;\|\&\&\|\\|\\|\|and\|or\|while\|if\|unless)/
9154	39			39		295	my @q = qw( & && \| \|\| ? : + - * and or while if unless);
9155	39					159	push @q, ')', '}', ']', '>', ',', ';';
9156	39					184768	@{pattern_test}{@q} = (1) x scalar(@q);
9157							} ## end BEGIN
9158
9159							sub pattern_expected {
9160
9161							# This a filter for a possible pattern.
9162							# It looks at the token after a possible pattern and tries to
9163							# determine if that token could end a pattern.
9164							# returns -
9165							# 1 - yes
9166							# 0 - can't tell
9167							# -1 - no
9168	0			0	0	0	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9169	0					0	my $is_pattern = 0;
9170
9171	0					0	my $next_token = $rtokens->[ $i + 1 ];
9172	0	0				0	if ( $next_token =~ /^[msixpodualgc]/ ) {
9173	0					0	$i++;
9174							} # skip possible modifier
9175	0					0	my ( $next_nonblank_token, $i_next ) =
9176							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
9177
9178	0	0				0	if ( $pattern_test{$next_nonblank_token} ) {
9179	0					0	$is_pattern = 1;
9180							}
9181							else {
9182
9183							# Added '#' to fix issue c044
9184	0	0	0			0	if ( $next_nonblank_token =~ /^\s*$/
9185							\|\| $next_nonblank_token eq '#' )
9186							{
9187	0					0	$is_pattern = 0;
9188							}
9189							else {
9190	0					0	$is_pattern = -1;
9191							}
9192							}
9193	0					0	return $is_pattern;
9194							} ## end sub pattern_expected
9195							}
9196
9197							sub find_next_nonblank_token_on_this_line {
9198	457			457	0	1097	my ( $i, $rtokens, $max_token_index ) = @_;
9199	457					758	my $next_nonblank_token;
9200
9201	457	100				1202	if ( $i < $max_token_index ) {
9202	449					1087	$next_nonblank_token = $rtokens->[ ++$i ];
9203
9204	449	100				2035	if ( $next_nonblank_token =~ /^\s*$/ ) {
9205
9206	121	100				569	if ( $i < $max_token_index ) {
9207	119					352	$next_nonblank_token = $rtokens->[ ++$i ];
9208							}
9209							}
9210							}
9211							else {
9212	8					26	$next_nonblank_token = EMPTY_STRING;
9213							}
9214	457					1524	return ( $next_nonblank_token, $i );
9215							} ## end sub find_next_nonblank_token_on_this_line
9216
9217							sub find_angle_operator_termination {
9218
9219							# We are looking at a '<' and want to know if it is an angle operator.
9220							# We are to return:
9221							# $i = pretoken index of ending '>' if found, current $i otherwise
9222							# $type = 'Q' if found, '>' otherwise
9223	8			8	0	31	my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index )
9224							= @_;
9225	8					30	my $i = $i_beg;
9226	8					22	my $type = '<';
9227	8					35	pos($input_line) = 1 + $rtoken_map->[$i];
9228
9229	8					22	my $filter;
9230
9231							# we just have to find the next '>' if a term is expected
9232	8	100				47	if ( $expecting == TERM ) { $filter = '[\>]' }
	6	50				17
9233
9234							# we have to guess if we don't know what is expected
9235	2					7	elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\\|\<]' }
9236
9237							# shouldn't happen - we shouldn't be here if operator is expected
9238							else {
9239	0					0	if (DEVEL_MODE) {
9240							$self->Fault(<<EOM);
9241							Bad call to find_angle_operator_termination
9242							EOM
9243							}
9244	0					0	return ( $i, $type );
9245							}
9246
9247							# To illustrate what we might be looking at, in case we are
9248							# guessing, here are some examples of valid angle operators
9249							# (or file globs):
9250							# <tmp_imp/*>
9251							# <FH>
9252							# <$fh>
9253							# <.c .h>
9254							# <_>
9255							# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
9256							# <${PREFIX}img.$IMAGE_TYPE>
9257							# <img*.$IMAGE_TYPE>
9258							# <Timg*.$IMAGE_TYPE>
9259							# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
9260							#
9261							# Here are some examples of lines which do not have angle operators:
9262							# return unless $self->[2]++ < $#{$self->[1]};
9263							# < 2 \|\| @$t >
9264							#
9265							# the following line from dlister.pl caused trouble:
9266							# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
9267							#
9268							# If the '<' starts an angle operator, it must end on this line and
9269							# it must not have certain characters like ';' and '=' in it. I use
9270							# this to limit the testing. This filter should be improved if
9271							# possible.
9272
9273	8	50				204	if ( $input_line =~ /($filter)/g ) {
9274
9275	8	50				40	if ( $1 eq '>' ) {
9276
9277							# We MAY have found an angle operator termination if we get
9278							# here, but we need to do more to be sure we haven't been
9279							# fooled.
9280	8					18	my $pos = pos($input_line);
9281
9282	8					23	my $pos_beg = $rtoken_map->[$i];
9283	8					27	my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
9284
9285							# Test for '<' after possible filehandle, issue c103
9286							# print $fh <>; # syntax error
9287							# print $fh <DATA>; # ok
9288							# print $fh < DATA>; # syntax error at '>'
9289							# print STDERR < DATA>; # ok, prints word 'DATA'
9290							# print BLABLA <DATA>; # ok; does nothing unless BLABLA is defined
9291	8	100				44	if ( $last_nonblank_type eq 'Z' ) {
9292
9293							# $str includes brackets; something like '<DATA>'
9294	1	0	33			10	if ( substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/
9295							&& substr( $str, 1, 1 ) !~ /[A-Za-z_]/ )
9296							{
9297	0					0	return ( $i, $type );
9298							}
9299							}
9300
9301							# Reject if the closing '>' follows a '-' as in:
9302							# if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
9303	8	100				35	if ( $expecting eq UNKNOWN ) {
9304	2					5	my $check = substr( $input_line, $pos - 2, 1 );
9305	2	100				7	if ( $check eq '-' ) {
9306	1					7	return ( $i, $type );
9307							}
9308							}
9309
9310							######################################debug#####
9311							#$self->write_diagnostics( "ANGLE? :$str\n");
9312							#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
9313							######################################debug#####
9314	7					18	$type = 'Q';
9315	7					21	my $error;
9316	7					37	( $i, $error ) =
9317							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
9318
9319							# It may be possible that a quote ends midway in a pretoken.
9320							# If this happens, it may be necessary to split the pretoken.
9321	7	50				36	if ($error) {
9322	0					0	if (DEVEL_MODE) {
9323							$self->Fault(<<EOM);
9324							unexpected error condition returned by inverse_pretoken_map
9325							EOM
9326							}
9327							$self->warning(
9328	0					0	"Possible tokinization error..please check this line\n");
9329							}
9330
9331							# Check for accidental formatting of a markup language doc...
9332							# Formatting will be skipped if we set _html_tag_count_ and
9333							# also set a warning of any kind.
9334	7					19	my $is_html_tag;
9335	7		33			34	my $is_first_string =
9336							$i_beg == 0 && $self->[_last_line_number_] == 1;
9337
9338							# html comment '<!...' of any type
9339	7	50	33			102	if ( $str =~ /^<\s*!/ ) {
		50
		50
9340	0					0	$is_html_tag = 1;
9341	0	0				0	if ($is_first_string) {
9342	0					0	$self->warning(
9343							"looks like a markup language, continuing error checks\n"
9344							);
9345							}
9346							}
9347
9348							# html end tag, something like </h1>
9349							elsif ( $str =~ /^<\s\/\w+\s>$/ ) {
9350	0					0	$is_html_tag = 1;
9351							}
9352
9353							# xml prolog?
9354							elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) {
9355	0					0	$is_html_tag = 1;
9356	0					0	$self->warning(
9357							"looks like a markup language, continuing error checks\n");
9358							}
9359							else {
9360							## doesn't look like a markup tag
9361							}
9362
9363	7	50				29	if ($is_html_tag) {
9364	0					0	$self->[_html_tag_count_]++;
9365							}
9366
9367							# count blanks on inside of brackets
9368	7					17	my $blank_count = 0;
9369	7	100				38	$blank_count++ if ( $str =~ /<\s+/ );
9370	7	100				36	$blank_count++ if ( $str =~ /\s+>/ );
9371
9372							# Now let's see where we stand....
9373							# OK if math op not possible
9374	7	100				30	if ( $expecting == TERM ) {
		50
		50
		0
9375							}
9376
9377							elsif ($is_html_tag) {
9378							}
9379
9380							# OK if there are no more than 2 non-blank pre-tokens inside
9381							# (not possible to write 2 token math between < and >)
9382							# This catches most common cases
9383							elsif ( $i <= $i_beg + 3 + $blank_count ) {
9384
9385							# No longer any need to document this common case
9386							## $self->write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
9387							}
9388
9389							# OK if there is some kind of identifier inside
9390							# print $fh <tvg::INPUT>;
9391							elsif ( $str =~ /^<\s\$?(\w\|::\|\s)+\s>$/ ) {
9392	0					0	$self->write_diagnostics("ANGLE (contains identifier): $str\n");
9393							}
9394
9395							# Not sure..
9396							else {
9397
9398							# Let's try a Brace Test: any braces inside must balance
9399	0					0	my $br = 0;
9400	0					0	while ( $str =~ /\{/g ) { $br++ }
	0					0
9401	0					0	while ( $str =~ /\}/g ) { $br-- }
	0					0
9402	0					0	my $sb = 0;
9403	0					0	while ( $str =~ /\[/g ) { $sb++ }
	0					0
9404	0					0	while ( $str =~ /\]/g ) { $sb-- }
	0					0
9405	0					0	my $pr = 0;
9406	0					0	while ( $str =~ /\(/g ) { $pr++ }
	0					0
9407	0					0	while ( $str =~ /\)/g ) { $pr-- }
	0					0
9408
9409							# if braces do not balance - not angle operator
9410	0	0	0			0	if ( $br \|\| $sb \|\| $pr ) {
			0
9411	0					0	$i = $i_beg;
9412	0					0	$type = '<';
9413	0					0	$self->write_diagnostics(
9414							"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
9415							}
9416
9417							# we should keep doing more checks here...to be continued
9418							# Tentatively accepting this as a valid angle operator.
9419							# There are lots more things that can be checked.
9420							else {
9421	0					0	$self->write_diagnostics(
9422							"ANGLE-Guessing yes: $str expecting=$expecting\n");
9423	0					0	$self->write_logfile_entry(
9424							"Guessing angle operator here: $str\n");
9425							}
9426							}
9427							}
9428
9429							# didn't find ending >
9430							else {
9431	0	0				0	if ( $expecting == TERM ) {
9432	0					0	$self->warning("No ending > for angle operator\n");
9433							}
9434							}
9435							}
9436	7					36	return ( $i, $type );
9437							} ## end sub find_angle_operator_termination
9438
9439							sub scan_number_do {
9440
9441							# scan a number in any of the formats that Perl accepts
9442							# Underbars (_) are allowed in decimal numbers.
9443							# input parameters -
9444							# $input_line - the string to scan
9445							# $i - pre_token index to start scanning
9446							# $rtoken_map - reference to the pre_token map giving starting
9447							# character position in $input_line of token $i
9448							# output parameters -
9449							# $i - last pre_token index of the number just scanned
9450							# number - the number (characters); or undef if not a number
9451
9452	629			629	0	1661	my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) =
9453							@_;
9454	629					1156	my $pos_beg = $rtoken_map->[$i];
9455	629					959	my $pos;
9456	629					982	my $i_begin = $i;
9457	629					1062	my $number = undef;
9458	629					1092	my $type = $input_type;
9459
9460	629					1439	my $first_char = substr( $input_line, $pos_beg, 1 );
9461
9462							# Look for bad starting characters; Shouldn't happen..
9463	629	50				2807	if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
9464	0					0	if (DEVEL_MODE) {
9465							$self->Fault(<<EOM);
9466							Program bug - scan_number given bad first character = '$first_char'
9467							EOM
9468							}
9469	0					0	return ( $i, $type, $number );
9470							}
9471
9472							# handle v-string without leading 'v' character ('Two Dot' rule)
9473							# (vstring.t)
9474							# Here is the format prior to including underscores:
9475							## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
9476	629					1994	pos($input_line) = $pos_beg;
9477	629	50				3072	if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) {
9478	0					0	$pos = pos($input_line);
9479	0					0	my $numc = $pos - $pos_beg;
9480	0					0	$number = substr( $input_line, $pos_beg, $numc );
9481	0					0	$type = 'v';
9482	0					0	$self->report_v_string($number);
9483							}
9484
9485							# handle octal, hex, binary
9486	629	50				1642	if ( !defined($number) ) {
9487	629					2375	pos($input_line) = $pos_beg;
9488
9489							# Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0'
9490							# For reference, the format prior to hex floating point is:
9491							# /\G[+-]?0(([xX][0-9a-fA-F_]+)\|([0-7_]+)\|([bB][01_]+))/g )
9492							# (hex) (octal) (binary)
9493	629	100				2489	if (
9494							$input_line =~ m{
9495
9496							\G[+-]?0( # leading [signed] 0
9497
9498							# a hex float, i.e. '0x0.b17217f7d1cf78p0'
9499							([xX][0-9a-fA-F_]* # X and optional leading digits
9500							(\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction
9501							[Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit
9502							[0-9a-fA-F_]*) # optional Additional exponent digits
9503
9504							# or hex integer
9505							\|([xX][0-9a-fA-F_]+)
9506
9507							# or octal fraction
9508							\|([oO]?[0-7_]+ # string of octal digits
9509							(\.([0-7][0-7_]*)?)? # optional decimal and fraction
9510							[Pp][+-]?[0-7] # REQUIRED exponent, no underscore
9511							[0-7_]*) # Additional exponent digits with underscores
9512
9513							# or octal integer
9514							\|([oO]?[0-7_]+) # string of octal digits
9515
9516							# or a binary float
9517							\|([bB][01_]* # 'b' with string of binary digits
9518							(\.([01][01_]*)?)? # optional decimal and fraction
9519							[Pp][+-]?[01] # Required exponent indicator, no underscore
9520							[01_]*) # additional exponent bits
9521
9522							# or binary integer
9523							\|([bB][01_]+) # 'b' with string of binary digits
9524
9525							)}gx
9526							)
9527							{
9528	72					132	$pos = pos($input_line);
9529	72					125	my $numc = $pos - $pos_beg;
9530	72					150	$number = substr( $input_line, $pos_beg, $numc );
9531	72					132	$type = 'n';
9532							}
9533							}
9534
9535							# handle decimal
9536	629	100				1504	if ( !defined($number) ) {
9537	557					1136	pos($input_line) = $pos_beg;
9538
9539	557	50				2830	if ( $input_line =~ /\G([+-]?[\d_](\.[\d_])?([Ee][+-]?(\d+))?)/g ) {
9540	557					1066	$pos = pos($input_line);
9541
9542							# watch out for things like 0..40 which would give 0. by this;
9543	557	100	100			2054	if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
9544							&& ( substr( $input_line, $pos, 1 ) eq '.' ) )
9545							{
9546	37					76	$pos--;
9547							}
9548	557					1006	my $numc = $pos - $pos_beg;
9549	557					1078	$number = substr( $input_line, $pos_beg, $numc );
9550	557					1063	$type = 'n';
9551							}
9552							}
9553
9554							# filter out non-numbers like e + - . e2 .e3 +e6
9555							# the rule: at least one digit, and any 'e' must be preceded by a digit
9556	629	100	66			3397	if (
			66
9557							$number !~ /\d/ # no digits
9558							\|\| ( $number =~ /^(.*)[eE]/
9559							&& $1 !~ /\d/ ) # or no digits before the 'e'
9560							)
9561							{
9562	303					498	$number = undef;
9563	303					540	$type = $input_type;
9564	303					1269	return ( $i, $type, $number );
9565							}
9566
9567							# Found a number; now we must convert back from character position
9568							# to pre_token index. An error here implies user syntax error.
9569							# An example would be an invalid octal number like '009'.
9570	326					585	my $error;
9571	326					861	( $i, $error ) =
9572							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
9573	326	50				989	if ($error) { $self->warning("Possibly invalid number\n") }
	0					0
9574
9575	326					1285	return ( $i, $type, $number );
9576							} ## end sub scan_number_do
9577
9578							sub inverse_pretoken_map {
9579
9580							# Starting with the current pre_token index $i, scan forward until
9581							# finding the index of the next pre_token whose position is $pos.
9582	2170			2170	0	5101	my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
9583	2170					3561	my $error = 0;
9584
9585	2170					5449	while ( ++$i <= $max_token_index ) {
9586
9587	4040	100				9699	if ( $pos <= $rtoken_map->[$i] ) {
9588
9589							# Let the calling routine handle errors in which we do not
9590							# land on a pre-token boundary. It can happen by running
9591							# perltidy on some non-perl scripts, for example.
9592	2135	50				5495	if ( $pos < $rtoken_map->[$i] ) { $error = 1 }
	0					0
9593	2135					3655	$i--;
9594	2135					4032	last;
9595							}
9596							}
9597	2170					5339	return ( $i, $error );
9598							} ## end sub inverse_pretoken_map
9599
9600							sub find_here_doc {
9601
9602							# find the target of a here document, if any
9603							# input parameters:
9604							# $i - token index of the second < of <<
9605							# ($i must be less than the last token index if this is called)
9606							# output parameters:
9607							# $found_target = 0 didn't find target; =1 found target
9608							# HERE_TARGET - the target string (may be empty string)
9609							# $i - unchanged if not here doc,
9610							# or index of the last token of the here target
9611							# $saw_error - flag noting unbalanced quote on here target
9612	9			9	0	44	my ( $self, $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
9613
9614	9					22	my $ibeg = $i;
9615	9					23	my $found_target = 0;
9616	9					27	my $here_doc_target = EMPTY_STRING;
9617	9					22	my $here_quote_character = EMPTY_STRING;
9618	9					26	my $saw_error = 0;
9619	9					29	my ( $next_nonblank_token, $i_next_nonblank, $next_token );
9620	9					26	$next_token = $rtokens->[ $i + 1 ];
9621
9622							# perl allows a backslash before the target string (heredoc.t)
9623	9					24	my $backslash = 0;
9624	9	50				40	if ( $next_token eq '\\' ) {
9625	0					0	$backslash = 1;
9626	0					0	$next_token = $rtokens->[ $i + 2 ];
9627							}
9628
9629	9					50	( $next_nonblank_token, $i_next_nonblank ) =
9630							find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
9631
9632	9	100	33			88	if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
		50
		50
9633
9634	6					17	my $in_quote = 1;
9635	6					13	my $quote_depth = 0;
9636	6					15	my $quote_pos = 0;
9637	6					14	my $quoted_string;
9638
9639							(
9640	6					38	$i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
9641							$quoted_string
9642							)
9643							= $self->follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
9644							$here_quote_character, $quote_pos, $quote_depth, $max_token_index );
9645
9646	6	50				37	if ($in_quote) { # didn't find end of quote, so no target found
9647	0					0	$i = $ibeg;
9648	0	0				0	if ( $expecting == TERM ) {
9649	0					0	$self->warning(
9650							"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
9651							);
9652	0					0	$saw_error = 1;
9653							}
9654							}
9655							else { # found ending quote
9656	6					19	$found_target = 1;
9657
9658	6					11	my $tokj;
9659	6					28	foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) {
9660	6					20	$tokj = $rtokens->[$j];
9661
9662							# we have to remove any backslash before the quote character
9663							# so that the here-doc-target exactly matches this string
9664							next
9665	6	0	33			37	if ( $tokj eq "\\"
			33
9666							&& $j < $i - 1
9667							&& $rtokens->[ $j + 1 ] eq $here_quote_character );
9668	6					25	$here_doc_target .= $tokj;
9669							}
9670							}
9671							}
9672
9673							elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
9674	0					0	$found_target = 1;
9675	0					0	$self->write_logfile_entry(
9676							"found blank here-target after <<; suggest using \"\"\n");
9677	0					0	$i = $ibeg;
9678							}
9679							elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
9680
9681	3					9	my $here_doc_expected;
9682	3	50				20	if ( $expecting == UNKNOWN ) {
9683	0					0	$here_doc_expected = $self->guess_if_here_doc($next_token);
9684							}
9685							else {
9686	3					11	$here_doc_expected = 1;
9687							}
9688
9689	3	50				13	if ($here_doc_expected) {
9690	3					11	$found_target = 1;
9691	3					7	$here_doc_target = $next_token;
9692	3					8	$i = $ibeg + 1;
9693							}
9694
9695							}
9696							else {
9697
9698	0	0				0	if ( $expecting == TERM ) {
9699	0					0	$found_target = 1;
9700	0					0	$self->write_logfile_entry("Note: bare here-doc operator <<\n");
9701							}
9702							else {
9703	0					0	$i = $ibeg;
9704							}
9705							}
9706
9707							# patch to neglect any prepended backslash
9708	9	50	33			68	if ( $found_target && $backslash ) { $i++ }
	0					0
9709
9710	9					53	return ( $found_target, $here_doc_target, $here_quote_character, $i,
9711							$saw_error );
9712							} ## end sub find_here_doc
9713
9714							sub do_quote {
9715
9716							# follow (or continue following) quoted string(s)
9717							# $in_quote return code:
9718							# 0 - ok, found end
9719							# 1 - still must find end of quote whose target is $quote_character
9720							# 2 - still looking for end of first of two quotes
9721							#
9722							# Returns updated strings:
9723							# $quoted_string_1 = quoted string seen while in_quote=1
9724							# $quoted_string_2 = quoted string seen while in_quote=2
9725							my (
9726
9727	2768			2768	0	7959	$self,
9728							$i,
9729							$in_quote,
9730							$quote_character,
9731							$quote_pos,
9732							$quote_depth,
9733							$quoted_string_1,
9734							$quoted_string_2,
9735							$rtokens,
9736							$rtoken_map,
9737							$max_token_index,
9738
9739							) = @_;
9740
9741	2768					4093	my $quoted_string;
9742	2768	100				6353	if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
9743	29					58	my $ibeg = $i;
9744							(
9745	29					154	$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
9746							$quoted_string
9747							)
9748							= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens,
9749							$quote_character, $quote_pos, $quote_depth, $max_token_index );
9750	29					86	$quoted_string_2 .= $quoted_string;
9751	29	50				101	if ( $in_quote == 1 ) {
9752	29	50				110	if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
	0					0
9753	29					71	$quote_character = EMPTY_STRING;
9754							}
9755							else {
9756	0					0	$quoted_string_2 .= "\n";
9757							}
9758							}
9759
9760	2768	50				5979	if ( $in_quote == 1 ) { # one (more) quote to follow
9761	2768					4425	my $ibeg = $i;
9762							(
9763	2768					7652	$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
9764							$quoted_string
9765							)
9766							= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens,
9767							$quote_character, $quote_pos, $quote_depth, $max_token_index );
9768	2768					5952	$quoted_string_1 .= $quoted_string;
9769	2768	100				6210	if ( $in_quote == 1 ) {
9770	183					375	$quoted_string_1 .= "\n";
9771							}
9772							}
9773							return (
9774
9775	2768					9569	$i,
9776							$in_quote,
9777							$quote_character,
9778							$quote_pos,
9779							$quote_depth,
9780							$quoted_string_1,
9781							$quoted_string_2,
9782
9783							);
9784							} ## end sub do_quote
9785
9786							sub follow_quoted_string {
9787
9788							# scan for a specific token, skipping escaped characters
9789							# if the quote character is blank, use the first non-blank character
9790							# input parameters:
9791							# $rtokens = reference to the array of tokens
9792							# $i = the token index of the first character to search
9793							# $in_quote = number of quoted strings being followed
9794							# $beginning_tok = the starting quote character
9795							# $quote_pos = index to check next for alphanumeric delimiter
9796							# output parameters:
9797							# $i = the token index of the ending quote character
9798							# $in_quote = decremented if found end, unchanged if not
9799							# $beginning_tok = the starting quote character
9800							# $quote_pos = index to check next for alphanumeric delimiter
9801							# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
9802							# $quoted_string = the text of the quote (without quotation tokens)
9803							my (
9804
9805	2814			2814	0	6539	$self,
9806							$i_beg,
9807							$in_quote,
9808							$rtokens,
9809							$beginning_tok,
9810							$quote_pos,
9811							$quote_depth,
9812							$max_token_index,
9813
9814							) = @_;
9815
9816	2814					4447	my ( $tok, $end_tok );
9817	2814					4690	my $i = $i_beg - 1;
9818	2814					4248	my $quoted_string = EMPTY_STRING;
9819
9820	2814					4024	0 && do {
9821							print {*STDOUT}
9822							"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
9823							};
9824
9825							# get the corresponding end token
9826	2814	100				13276	if ( $beginning_tok !~ /^\s*$/ ) {
9827	183					560	$end_tok = matching_end_token($beginning_tok);
9828							}
9829
9830							# a blank token means we must find and use the first non-blank one
9831							else {
9832	2631	100				6567	my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
9833
9834	2631					6092	while ( $i < $max_token_index ) {
9835	2631					4996	$tok = $rtokens->[ ++$i ];
9836
9837	2631	50				8026	if ( $tok !~ /^\s*$/ ) {
9838
9839	2631	50	66			7838	if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
9840	0					0	$i = $max_token_index;
9841							}
9842							else {
9843
9844	2631	100				5746	if ( length($tok) > 1 ) {
9845	1	50				5	if ( $quote_pos <= 0 ) { $quote_pos = 1 }
	1					2
9846	1					5	$beginning_tok = substr( $tok, $quote_pos - 1, 1 );
9847							}
9848							else {
9849	2630					5328	$beginning_tok = $tok;
9850	2630					4067	$quote_pos = 0;
9851							}
9852	2631					6289	$end_tok = matching_end_token($beginning_tok);
9853	2631					4642	$quote_depth = 1;
9854	2631					4694	last;
9855							}
9856							}
9857							else {
9858	0					0	$allow_quote_comments = 1;
9859							}
9860							}
9861							}
9862
9863							# There are two different loops which search for the ending quote
9864							# character. In the rare case of an alphanumeric quote delimiter, we
9865							# have to look through alphanumeric tokens character-by-character, since
9866							# the pre-tokenization process combines multiple alphanumeric
9867							# characters, whereas for a non-alphanumeric delimiter, only tokens of
9868							# length 1 can match.
9869
9870							#----------------------------------------------------------------
9871							# Case 1 (rare): loop for case of alphanumeric quote delimiter..
9872							# "quote_pos" is the position the current word to begin searching
9873							#----------------------------------------------------------------
9874	2814	100				7448	if ( $beginning_tok =~ /\w/ ) {
9875
9876							# Note this because it is not recommended practice except
9877							# for obfuscated perl contests
9878	1	50				5	if ( $in_quote == 1 ) {
9879	1					7	$self->write_logfile_entry(
9880							"Note: alphanumeric quote delimiter ($beginning_tok) \n");
9881							}
9882
9883							# Note: changed < to <= here to fix c109. Relying on extra end blanks.
9884	1					12	while ( $i <= $max_token_index ) {
9885
9886	4	100	66			15	if ( $quote_pos == 0 \|\| ( $i < 0 ) ) {
9887	3					17	$tok = $rtokens->[ ++$i ];
9888
9889	3	100				10	if ( $tok eq '\\' ) {
9890
9891							# retain backslash unless it hides the end token
9892	1	50				26	$quoted_string .= $tok
9893							unless $rtokens->[ $i + 1 ] eq $end_tok;
9894	1					2	$quote_pos++;
9895	1	50				6	last if ( $i >= $max_token_index );
9896	1					7	$tok = $rtokens->[ ++$i ];
9897							}
9898							}
9899	4					6	my $old_pos = $quote_pos;
9900
9901	4					8	$quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
9902
9903	4	100				11	if ( $quote_pos > 0 ) {
9904
9905	1					5	$quoted_string .=
9906							substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
9907
9908							# NOTE: any quote modifiers will be at the end of '$tok'. If we
9909							# wanted to check them, this is the place to get them. But
9910							# this quote form is rarely used in practice, so it isn't
9911							# worthwhile.
9912
9913	1					1	$quote_depth--;
9914
9915	1	50				5	if ( $quote_depth == 0 ) {
9916	1					2	$in_quote--;
9917	1					2	last;
9918							}
9919							}
9920							else {
9921	3	50				8	if ( $old_pos <= length($tok) ) {
9922	3					10	$quoted_string .= substr( $tok, $old_pos );
9923							}
9924							}
9925							}
9926							}
9927
9928							#-----------------------------------------------------------------------
9929							# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
9930							#-----------------------------------------------------------------------
9931							else {
9932
9933	2813					6776	while ( $i < $max_token_index ) {
9934	10798					16451	$tok = $rtokens->[ ++$i ];
9935
9936	10798	100				24351	if ( $tok eq $end_tok ) {
		100
		100
9937	2620					4169	$quote_depth--;
9938
9939	2620	100				6051	if ( $quote_depth == 0 ) {
9940	2619					3745	$in_quote--;
9941	2619					4151	last;
9942							}
9943							}
9944							elsif ( $tok eq $beginning_tok ) {
9945	1					12	$quote_depth++;
9946							}
9947							elsif ( $tok eq '\\' ) {
9948
9949							# retain backslash unless it hides the beginning or end token
9950	376					1003	$tok = $rtokens->[ ++$i ];
9951	376	100	100			2060	$quoted_string .= '\\'
9952							if ( $tok ne $end_tok && $tok ne $beginning_tok );
9953							}
9954							else {
9955							## nothing special
9956							}
9957	8179					15531	$quoted_string .= $tok;
9958							}
9959							}
9960	2814	50				6485	if ( $i > $max_token_index ) { $i = $max_token_index }
	0					0
9961							return (
9962
9963	2814					10866	$i,
9964							$in_quote,
9965							$beginning_tok,
9966							$quote_pos,
9967							$quote_depth,
9968							$quoted_string,
9969
9970							);
9971							} ## end sub follow_quoted_string
9972
9973							sub indicate_error {
9974	0			0	0	0	my ( $self, $msg, $line_number, $input_line, $pos, $carrat ) = @_;
9975	0					0	$self->interrupt_logfile();
9976	0					0	$self->warning($msg);
9977	0					0	$self->write_error_indicator_pair( $line_number, $input_line, $pos,
9978							$carrat );
9979	0					0	$self->resume_logfile();
9980	0					0	return;
9981							} ## end sub indicate_error
9982
9983							sub write_error_indicator_pair {
9984	0			0	0	0	my ( $self, $line_number, $input_line, $pos, $carrat ) = @_;
9985	0					0	my ( $offset, $numbered_line, $underline ) =
9986							make_numbered_line( $line_number, $input_line, $pos );
9987	0					0	$underline = write_on_underline( $underline, $pos - $offset, $carrat );
9988	0					0	$self->warning( $numbered_line . "\n" );
9989	0					0	$underline =~ s/\s*$//;
9990	0					0	$self->warning( $underline . "\n" );
9991	0					0	return;
9992							} ## end sub write_error_indicator_pair
9993
9994							sub make_numbered_line {
9995
9996							# Given an input line, its line number, and a character position of
9997							# interest, create a string not longer than 80 characters of the form
9998							# $lineno: sub_string
9999							# such that the sub_string of $str contains the position of interest
10000							#
10001							# Here is an example of what we want, in this case we add trailing
10002							# '...' because the line is long.
10003							#
10004							# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
10005							#
10006							# Here is another example, this time in which we used leading '...'
10007							# because of excessive length:
10008							#
10009							# 2: ... er of the World Wide Web Consortium's
10010							#
10011							# input parameters are:
10012							# $lineno = line number
10013							# $str = the text of the line
10014							# $pos = position of interest (the error) : 0 = first character
10015							#
10016							# We return :
10017							# - $offset = an offset which corrects the position in case we only
10018							# display part of a line, such that $pos-$offset is the effective
10019							# position from the start of the displayed line.
10020							# - $numbered_line = the numbered line as above,
10021							# - $underline = a blank 'underline' which is all spaces with the same
10022							# number of characters as the numbered line.
10023
10024	0			0	0	0	my ( $lineno, $str, $pos ) = @_;
10025	0	0				0	my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
10026	0					0	my $excess = length($str) - $offset - 68;
10027	0	0				0	my $numc = ( $excess > 0 ) ? 68 : undef;
10028
10029	0	0				0	if ( defined($numc) ) {
10030	0	0				0	if ( $offset == 0 ) {
10031	0					0	$str = substr( $str, $offset, $numc - 4 ) . " ...";
10032							}
10033							else {
10034	0					0	$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
10035							}
10036							}
10037							else {
10038
10039	0	0				0	if ( $offset == 0 ) {
10040							}
10041							else {
10042	0					0	$str = "... " . substr( $str, $offset + 4 );
10043							}
10044							}
10045
10046	0					0	my $numbered_line = sprintf( "%d: ", $lineno );
10047	0					0	$offset -= length($numbered_line);
10048	0					0	$numbered_line .= $str;
10049	0					0	my $underline = SPACE x length($numbered_line);
10050	0					0	return ( $offset, $numbered_line, $underline );
10051							} ## end sub make_numbered_line
10052
10053							sub write_on_underline {
10054
10055							# The "underline" is a string that shows where an error is; it starts
10056							# out as a string of blanks with the same length as the numbered line of
10057							# code above it, and we have to add marking to show where an error is.
10058							# In the example below, we want to write the string '--^' just below
10059							# the line of bad code:
10060							#
10061							# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
10062							# ---^
10063							# We are given the current underline string, plus a position and a
10064							# string to write on it.
10065							#
10066							# In the above example, there will be 2 calls to do this:
10067							# First call: $pos=19, pos_chr=^
10068							# Second call: $pos=16, pos_chr=---
10069							#
10070							# This is a trivial thing to do with substr, but there is some
10071							# checking to do.
10072
10073	0			0	0	0	my ( $underline, $pos, $pos_chr ) = @_;
10074
10075							# check for error..shouldn't happen
10076	0	0	0			0	if ( $pos < 0 \|\| $pos > length($underline) ) {
10077	0					0	return $underline;
10078							}
10079	0					0	my $excess = length($pos_chr) + $pos - length($underline);
10080	0	0				0	if ( $excess > 0 ) {
10081	0					0	$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
10082							}
10083	0					0	substr( $underline, $pos, length($pos_chr), $pos_chr );
10084	0					0	return ($underline);
10085							} ## end sub write_on_underline
10086
10087							sub pre_tokenize {
10088
10089	6195			6195	0	12861	my ( $str, $max_tokens_wanted ) = @_;
10090
10091							# Input parameters:
10092							# $str = string to be parsed
10093							# $max_tokens_wanted > 0 to stop on reaching this many tokens.
10094							# = undef or 0 means get all tokens
10095
10096							# Break a string, $str, into a sequence of preliminary tokens (pre-tokens).
10097							# We look for these types of tokens:
10098							# words (type='w'), example: 'max_tokens_wanted'
10099							# digits (type = 'd'), example: '0755'
10100							# whitespace (type = 'b'), example: ' '
10101							# single character punct (type = char) example: '='
10102
10103							# Later operations will combine one or more of these pre-tokens into final
10104							# tokens. We cannot do better than this yet because we might be in a
10105							# quoted string or pattern.
10106
10107							# An advantage of doing this pre-tokenization step is that it keeps almost
10108							# all of the regex parsing very simple and localized right here. A
10109							# disadvantage is that in some extremely rare instances we will have to go
10110							# back and split a pre-token.
10111
10112							# Return parameters:
10113	6195					10273	my @tokens = (); # array of the tokens themselves
10114	6195					12682	my @token_map = (0); # string position of start of each token
10115	6195					10091	my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
10116
10117	6195	100				13121	if ( !$max_tokens_wanted ) { $max_tokens_wanted = -1 }
	5910					9137
10118
10119	6195					14214	while ( $max_tokens_wanted-- ) {
10120
10121	82230	100				229074	if (
10122							$str =~ m{
10123							\G(
10124							(\s+) # type 'b' = whitespace - this must come before \W
10125							\| (\W) # or type=char = single-character, non-whitespace punct
10126							\| (\d+) # or type 'd' = sequence of digits - must come before \w
10127							\| (\w+) # or type 'w' = words not starting with a digit
10128							)
10129							}gcx
10130							)
10131							{
10132	76179					161064	push @tokens, $1;
10133	76179	100				186872	push @type,
		100
		100
10134							defined($2) ? 'b' : defined($3) ? $1 : defined($4) ? 'd' : 'w';
10135	76179					150863	push @token_map, pos($str);
10136							}
10137
10138							# that's all..
10139							else {
10140	6051					37545	return ( \@tokens, \@token_map, \@type );
10141							}
10142							}
10143
10144	144					917	return ( \@tokens, \@token_map, \@type );
10145							} ## end sub pre_tokenize
10146
10147							sub show_tokens {
10148
10149							# this is an old debug routine
10150							# not called, but saved for reference
10151	0			0	0	0	my ( $rtokens, $rtoken_map ) = @_;
10152	0					0	my $num = scalar( @{$rtokens} );
	0					0
10153
10154	0					0	foreach my $i ( 0 .. $num - 1 ) {
10155	0					0	my $len = length( $rtokens->[$i] );
10156	0					0	print {*STDOUT} "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
	0					0
10157							}
10158	0					0	return;
10159							} ## end sub show_tokens
10160
10161							{ ## closure for sub matching end token
10162							my %matching_end_token;
10163
10164							BEGIN {
10165	39			39		62235	%matching_end_token = (
10166							'{' => '}',
10167							'(' => ')',
10168							'[' => ']',
10169							'<' => '>',
10170							);
10171							} ## end BEGIN
10172
10173							sub matching_end_token {
10174
10175							# return closing character for a pattern
10176	2998			2998	0	4991	my $beginning_token = shift;
10177	2998	100				7656	if ( $matching_end_token{$beginning_token} ) {
10178	373					882	return $matching_end_token{$beginning_token};
10179							}
10180	2625					5924	return ($beginning_token);
10181							} ## end sub matching_end_token
10182							}
10183
10184							sub dump_token_types {
10185	0			0	0		my ( $class, $fh ) = @_;
10186
10187							# This should be the latest list of token types in use
10188							# adding NEW_TOKENS: add a comment here
10189	0						$fh->print(<<'END_OF_LIST');
10190
10191							Here is a list of the token types currently used for lines of type 'CODE'.
10192							For the following tokens, the "type" of a token is just the token itself.
10193
10194							.. :: << >> ** && .. \|\| // -> => += -= .= %= &= \|= ^= *= <>
10195							( ) <= >= == =~ !~ != ++ -- /= x=
10196							... **= <<= >>= &&= \|\|= //= <=>
10197							, + - / * \| % ! x ~ = \ ? : . < > ^ &
10198
10199							The following additional token types are defined:
10200
10201							type meaning
10202							b blank (white space)
10203							{ indent: opening structural curly brace or square bracket or paren
10204							(code block, anonymous hash reference, or anonymous array reference)
10205							} outdent: right structural curly brace or square bracket or paren
10206							[ left non-structural square bracket (enclosing an array index)
10207							] right non-structural square bracket
10208							( left non-structural paren (all but a list right of an =)
10209							) right non-structural paren
10210							L left non-structural curly brace (enclosing a key)
10211							R right non-structural curly brace
10212							; terminal semicolon
10213							f indicates a semicolon in a "for" statement
10214							h here_doc operator <<
10215							# a comment
10216							Q indicates a quote or pattern
10217							q indicates a qw quote block
10218							k a perl keyword
10219							C user-defined constant or constant function (with void prototype = ())
10220							U user-defined function taking parameters
10221							G user-defined function taking block parameter (like grep/map/eval)
10222							S sub definition (reported as type 'i' in older versions)
10223							P package definition (reported as type 'i' in older versions)
10224							t type indicater such as %,$,@,*,&,sub
10225							w bare word (perhaps a subroutine call)
10226							i identifier of some type (with leading %, $, @, *, &, sub, -> )
10227							n a number
10228							v a v-string
10229							F a file test operator (like -e)
10230							Y File handle
10231							Z identifier in indirect object slot: may be file handle, object
10232							J LABEL: code block label
10233							j LABEL after next, last, redo, goto
10234							p unary +
10235							m unary -
10236							pp pre-increment operator ++
10237							mm pre-decrement operator --
10238							A : used as attribute separator
10239
10240							Here are the '_line_type' codes used internally:
10241							SYSTEM - system-specific code before hash-bang line
10242							CODE - line of perl code (including comments)
10243							POD_START - line starting pod, such as '=head'
10244							POD - pod documentation text
10245							POD_END - last line of pod section, '=cut'
10246							HERE - text of here-document
10247							HERE_END - last line of here-doc (target word)
10248							FORMAT - format section
10249							FORMAT_END - last line of format section, '.'
10250							SKIP - code skipping section
10251							SKIP_END - last line of code skipping section, '#>>V'
10252							DATA_START - __DATA__ line
10253							DATA - unidentified text following __DATA__
10254							END_START - __END__ line
10255							END - unidentified text following __END__
10256							ERROR - we are in big trouble, probably not a perl script
10257							END_OF_LIST
10258
10259	0						return;
10260							} ## end sub dump_token_types
10261
10262							BEGIN {
10263
10264							# These names are used in error messages
10265	39			39		373	@opening_brace_names = qw# '{' '[' '(' '?' #;
10266	39					294	@closing_brace_names = qw# '}' ']' ')' ':' #;
10267
10268	39					161	my @q;
10269
10270	39					262	my @digraphs = qw(
10271							.. :: << >> ** && \|\| // -> => += -= .= %= &= \|= ^= *= <>
10272							<= >= == =~ !~ != ++ -- /= x= ~~ ~. \|. &. ^.
10273							);
10274	39					692	@is_digraph{@digraphs} = (1) x scalar(@digraphs);
10275
10276	39					237	@q = qw(
10277							. : < > * & \| / - = + - % ^ ! x ~
10278							);
10279	39					380	@can_start_digraph{@q} = (1) x scalar(@q);
10280
10281	39					183	my @trigraphs = qw( ... **= <<= >>= &&= \|\|= //= <=> !~~ &.= \|.= ^.= <<~);
10282	39					294	@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
10283
10284	39					114	my @tetragraphs = qw( <<>> );
10285	39					194	@is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
10286
10287							# make a hash of all valid token types for self-checking the tokenizer
10288							# (adding NEW_TOKENS : select a new character and add to this list)
10289							# fix for c250: added new token type 'P' and 'S'
10290	39					764	my @valid_token_types = qw#
10291							A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S
10292							{ } ( ) [ ] ; + - / * \| % ! x ~ = \ ? : . < > ^ &
10293							#;
10294	39					313	push( @valid_token_types, @digraphs );
10295	39					228	push( @valid_token_types, @trigraphs );
10296	39					101	push( @valid_token_types, @tetragraphs );
10297	39					95	push( @valid_token_types, ( '#', ',', 'CORE::' ) );
10298	39					1734	@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
10299
10300							# a list of file test letters, as in -e (Table 3-4 of 'camel 3')
10301	39					260	my @file_test_operators =
10302							qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
10303	39					542	@is_file_test_operator{@file_test_operators} =
10304							(1) x scalar(@file_test_operators);
10305
10306							# these functions have prototypes of the form (&), so when they are
10307							# followed by a block, that block MAY BE followed by an operator.
10308							# Smartmatch operator ~~ may be followed by anonymous hash or array ref
10309	39					178	@q = qw( do eval );
10310	39					186	@is_block_operator{@q} = (1) x scalar(@q);
10311
10312							# these functions allow an identifier in the indirect object slot
10313	39					112	@q = qw( print printf sort exec system say);
10314	39					222	@is_indirect_object_taker{@q} = (1) x scalar(@q);
10315
10316							# Note: 'field' will be added by sub check_options if --use-feature=class
10317	39					128	@q = qw(my our state);
10318	39					149	@is_my_our_state{@q} = (1) x scalar(@q);
10319
10320							# These tokens may precede a code block
10321							# patched for SWITCH/CASE/CATCH. Actually these could be removed
10322							# now and we could let the extended-syntax coding handle them.
10323							# Added 'default' for Switch::Plain.
10324							# Note: 'ADJUST' will be added by sub check_options if --use-feature=class
10325	39					255	@q =
10326							qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
10327							unless do while until eval for foreach map grep sort
10328							switch case given when default catch try finally);
10329	39					812	@is_code_block_token{@q} = (1) x scalar(@q);
10330
10331							# Note: this hash was formerly named '%is_not_zero_continuation_block_type'
10332							# to contrast it with the block types in '%is_zero_continuation_block_type'
10333	39					208	@q = qw( sort map grep eval do );
10334	39					160	@is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
10335
10336	39					99	@q = qw( sort map grep );
10337	39					154	@is_sort_map_grep{@q} = (1) x scalar(@q);
10338
10339	39					85	%is_grep_alias = ();
10340
10341							# I'll build the list of keywords incrementally
10342	39					93	my @Keywords = ();
10343
10344							# keywords and tokens after which a value or pattern is expected,
10345							# but not an operator. In other words, these should consume terms
10346							# to their right, or at least they are not expected to be followed
10347							# immediately by operators.
10348	39					1547	my @value_requestor = qw(
10349							AUTOLOAD
10350							BEGIN
10351							CHECK
10352							DESTROY
10353							END
10354							EQ
10355							GE
10356							GT
10357							INIT
10358							LE
10359							LT
10360							NE
10361							UNITCHECK
10362							abs
10363							accept
10364							alarm
10365							and
10366							atan2
10367							bind
10368							binmode
10369							bless
10370							break
10371							caller
10372							chdir
10373							chmod
10374							chomp
10375							chop
10376							chown
10377							chr
10378							chroot
10379							close
10380							closedir
10381							cmp
10382							connect
10383							continue
10384							cos
10385							crypt
10386							dbmclose
10387							dbmopen
10388							defined
10389							delete
10390							die
10391							dump
10392							each
10393							else
10394							elsif
10395							eof
10396							eq
10397							evalbytes
10398							exec
10399							exists
10400							exit
10401							exp
10402							fc
10403							fcntl
10404							fileno
10405							flock
10406							for
10407							foreach
10408							formline
10409							ge
10410							getc
10411							getgrgid
10412							getgrnam
10413							gethostbyaddr
10414							gethostbyname
10415							getnetbyaddr
10416							getnetbyname
10417							getpeername
10418							getpgrp
10419							getpriority
10420							getprotobyname
10421							getprotobynumber
10422							getpwnam
10423							getpwuid
10424							getservbyname
10425							getservbyport
10426							getsockname
10427							getsockopt
10428							glob
10429							gmtime
10430							goto
10431							grep
10432							gt
10433							hex
10434							if
10435							index
10436							int
10437							ioctl
10438							join
10439							keys
10440							kill
10441							last
10442							lc
10443							lcfirst
10444							le
10445							length
10446							link
10447							listen
10448							local
10449							localtime
10450							lock
10451							log
10452							lstat
10453							lt
10454							map
10455							mkdir
10456							msgctl
10457							msgget
10458							msgrcv
10459							msgsnd
10460							my
10461							ne
10462							next
10463							no
10464							not
10465							oct
10466							open
10467							opendir
10468							or
10469							ord
10470							our
10471							pack
10472							pipe
10473							pop
10474							pos
10475							print
10476							printf
10477							prototype
10478							push
10479							quotemeta
10480							rand
10481							read
10482							readdir
10483							readlink
10484							readline
10485							readpipe
10486							recv
10487							redo
10488							ref
10489							rename
10490							require
10491							reset
10492							return
10493							reverse
10494							rewinddir
10495							rindex
10496							rmdir
10497							scalar
10498							seek
10499							seekdir
10500							select
10501							semctl
10502							semget
10503							semop
10504							send
10505							sethostent
10506							setnetent
10507							setpgrp
10508							setpriority
10509							setprotoent
10510							setservent
10511							setsockopt
10512							shift
10513							shmctl
10514							shmget
10515							shmread
10516							shmwrite
10517							shutdown
10518							sin
10519							sleep
10520							socket
10521							socketpair
10522							sort
10523							splice
10524							split
10525							sprintf
10526							sqrt
10527							srand
10528							stat
10529							state
10530							study
10531							substr
10532							symlink
10533							syscall
10534							sysopen
10535							sysread
10536							sysseek
10537							system
10538							syswrite
10539							tell
10540							telldir
10541							tie
10542							tied
10543							truncate
10544							uc
10545							ucfirst
10546							umask
10547							undef
10548							unless
10549							unlink
10550							unpack
10551							unshift
10552							untie
10553							until
10554							use
10555							utime
10556							values
10557							vec
10558							waitpid
10559							warn
10560							while
10561							write
10562							xor
10563
10564							switch
10565							case
10566							default
10567							given
10568							when
10569							err
10570							say
10571							isa
10572
10573							catch
10574
10575							);
10576
10577							# Note: 'ADJUST', 'field' are added by sub check_options
10578							# if --use-feature=class
10579
10580							# patched above for SWITCH/CASE given/when err say
10581							# 'err' is a fairly safe addition.
10582							# Added 'default' for Switch::Plain. Note that we could also have
10583							# a separate set of keywords to include if we see 'use Switch::Plain'
10584	39					1627	push( @Keywords, @value_requestor );
10585
10586							# These are treated the same but are not keywords:
10587	39					148	my @extra_vr = qw(
10588							constant
10589							vars
10590							);
10591	39					281	push( @value_requestor, @extra_vr );
10592
10593	39					4772	@expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
10594
10595							# this list contains keywords which do not look for arguments,
10596							# so that they might be followed by an operator, or at least
10597							# not a term.
10598	39					372	my @operator_requestor = qw(
10599							endgrent
10600							endhostent
10601							endnetent
10602							endprotoent
10603							endpwent
10604							endservent
10605							fork
10606							getgrent
10607							gethostent
10608							getlogin
10609							getnetent
10610							getppid
10611							getprotoent
10612							getpwent
10613							getservent
10614							setgrent
10615							setpwent
10616							time
10617							times
10618							wait
10619							wantarray
10620							);
10621
10622	39					151	push( @Keywords, @operator_requestor );
10623
10624							# These are treated the same but are not considered keywords:
10625	39					93	my @extra_or = qw(
10626							STDERR
10627							STDIN
10628							STDOUT
10629							);
10630
10631	39					121	push( @operator_requestor, @extra_or );
10632
10633	39					777	@expecting_operator_token{@operator_requestor} =
10634							(1) x scalar(@operator_requestor);
10635
10636							# these token TYPES expect trailing operator but not a term
10637							# note: ++ and -- are post-increment and decrement, 'C' = constant
10638	39					170	my @operator_requestor_types = qw( ++ -- C <> q );
10639
10640							# NOTE: This hash is available but not currently used
10641	39					178	@expecting_operator_types{@operator_requestor_types} =
10642							(1) x scalar(@operator_requestor_types);
10643
10644							# these token TYPES consume values (terms)
10645							# note: pp and mm are pre-increment and decrement
10646							# f=semicolon in for, F=file test operator
10647	39					768	my @value_requestor_type = qw#
10648							L { ( [ ~ !~ =~ ; . .. ... A : && ! \|\| // = + - x
10649							*= += -= .= /= = %= x= &= \|= ^= <<= >>= &&= \|\|= //=
10650							<= >= == != => \ > < % * / ? & \| ** <=> ~~ !~~ <<~
10651							f F pp mm Y p m U J G j >> << ^ t
10652							~. ^. \|. &. ^.= \|.= &.=
10653							#;
10654	39					281	push( @value_requestor_type, ',' )
10655							; # (perl doesn't like a ',' in a qw block)
10656
10657							# NOTE: This hash is available but not currently used
10658	39					1025	@expecting_term_types{@value_requestor_type} =
10659							(1) x scalar(@value_requestor_type);
10660
10661							# Note: the following valid token types are not assigned here to
10662							# hashes requesting to be followed by values or terms, but are
10663							# instead currently hard-coded into sub operator_expected:
10664							# ) -> :: Q R Z ] b h i k n v w } #
10665
10666							# For simple syntax checking, it is nice to have a list of operators which
10667							# will really be unhappy if not followed by a term. This includes most
10668							# of the above...
10669	39					1292	@really_want_term{@value_requestor_type} =
10670							(1) x scalar(@value_requestor_type);
10671
10672							# with these exceptions...
10673	39					182	delete $really_want_term{'U'}; # user sub, depends on prototype
10674	39					105	delete $really_want_term{'F'}; # file test works on $_ if no following term
10675	39					88	delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
10676							# let perl do it
10677	39					262	@q = qw(q qq qx qr s y tr m);
10678	39					239	@is_q_qq_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
10679
10680							# Note added 'qw' here
10681	39					153	@q = qw(q qq qw qx qr s y tr m);
10682	39					194	@is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
10683
10684							# Note: 'class' will be added by sub check_options if -use-feature=class
10685	39					125	@q = qw(package);
10686	39					164	@is_package{@q} = (1) x scalar(@q);
10687
10688	39					101	@q = qw( if elsif unless );
10689	39					114	@is_if_elsif_unless{@q} = (1) x scalar(@q);
10690
10691	39					108	@q = qw( ; t );
10692	39					158	@is_semicolon_or_t{@q} = (1) x scalar(@q);
10693
10694	39					129	@q = qw( if elsif unless case when );
10695	39					168	@is_if_elsif_unless_case_when{@q} = (1) x scalar(@q);
10696
10697							# Hash of other possible line endings which may occur.
10698							# Keep these coordinated with the regex where this is used.
10699							# Note: chr(13) = chr(015)="\r".
10700	39					152	@q = ( chr(13), chr(29), chr(26) );
10701	39					189	@other_line_endings{@q} = (1) x scalar(@q);
10702
10703							# These keywords are handled specially in the tokenizer code:
10704	39					134	my @special_keywords = qw(
10705							do
10706							eval
10707							format
10708							m
10709							package
10710							q
10711							qq
10712							qr
10713							qw
10714							qx
10715							s
10716							sub
10717							tr
10718							y
10719							);
10720	39					387	push( @Keywords, @special_keywords );
10721
10722							# Keywords after which list formatting may be used
10723							# WARNING: do not include \|map\|grep\|eval or perl may die on
10724							# syntax errors (map1.t).
10725	39					623	my @keyword_taking_list = qw(
10726							and
10727							chmod
10728							chomp
10729							chop
10730							chown
10731							dbmopen
10732							die
10733							elsif
10734							exec
10735							fcntl
10736							for
10737							foreach
10738							formline
10739							getsockopt
10740							if
10741							index
10742							ioctl
10743							join
10744							kill
10745							local
10746							msgctl
10747							msgrcv
10748							msgsnd
10749							my
10750							open
10751							or
10752							our
10753							pack
10754							print
10755							printf
10756							push
10757							read
10758							readpipe
10759							recv
10760							return
10761							reverse
10762							rindex
10763							seek
10764							select
10765							semctl
10766							semget
10767							send
10768							setpriority
10769							setsockopt
10770							shmctl
10771							shmget
10772							shmread
10773							shmwrite
10774							socket
10775							socketpair
10776							sort
10777							splice
10778							split
10779							sprintf
10780							state
10781							substr
10782							syscall
10783							sysopen
10784							sysread
10785							sysseek
10786							system
10787							syswrite
10788							tie
10789							unless
10790							unlink
10791							unpack
10792							unshift
10793							until
10794							vec
10795							warn
10796							while
10797							given
10798							when
10799							);
10800
10801							# NOTE: This hash is available but not currently used
10802	39					1307	@is_keyword_taking_list{@keyword_taking_list} =
10803							(1) x scalar(@keyword_taking_list);
10804
10805							# perl functions which may be unary operators.
10806
10807							# This list is used to decide if a pattern delimited by slashes, /pattern/,
10808							# can follow one of these keywords.
10809	39					218	@q = qw(
10810							chomp eof eval fc lc pop shift uc undef
10811							);
10812
10813	39					269	@is_keyword_rejecting_slash_as_pattern_delimiter{@q} =
10814							(1) x scalar(@q);
10815
10816							# These are keywords for which an arg may optionally be omitted. They are
10817							# currently only used to disambiguate a ? used as a ternary from one used
10818							# as a (deprecated) pattern delimiter. In the future, they might be used
10819							# to give a warning about ambiguous syntax before a /.
10820							# Note: split has been omitted (see note below).
10821	39					499	my @keywords_taking_optional_arg = qw(
10822							abs
10823							alarm
10824							caller
10825							chdir
10826							chomp
10827							chop
10828							chr
10829							chroot
10830							close
10831							cos
10832							defined
10833							die
10834							eof
10835							eval
10836							evalbytes
10837							exit
10838							exp
10839							fc
10840							getc
10841							glob
10842							gmtime
10843							hex
10844							int
10845							last
10846							lc
10847							lcfirst
10848							length
10849							localtime
10850							log
10851							lstat
10852							mkdir
10853							next
10854							oct
10855							ord
10856							pop
10857							pos
10858							print
10859							printf
10860							prototype
10861							quotemeta
10862							rand
10863							readline
10864							readlink
10865							readpipe
10866							redo
10867							ref
10868							require
10869							reset
10870							reverse
10871							rmdir
10872							say
10873							select
10874							shift
10875							sin
10876							sleep
10877							sqrt
10878							srand
10879							stat
10880							study
10881							tell
10882							uc
10883							ucfirst
10884							umask
10885							undef
10886							unlink
10887							warn
10888							write
10889							);
10890	39					1074	@is_keyword_taking_optional_arg{@keywords_taking_optional_arg} =
10891							(1) x scalar(@keywords_taking_optional_arg);
10892
10893							# This list is used to decide if a pattern delimited by question marks,
10894							# ?pattern?, can follow one of these keywords. Note that from perl 5.22
10895							# on, a ?pattern? is not recognized, so we can be much more strict than
10896							# with a /pattern/. Note that 'split' is not in this list. In current
10897							# versions of perl a question following split must be a ternary, but
10898							# in older versions it could be a pattern. The guessing algorithm will
10899							# decide. We are combining two lists here to simplify the test.
10900	39					772	@q = ( @keywords_taking_optional_arg, @operator_requestor );
10901	39					1574	@is_keyword_rejecting_question_as_pattern_delimiter{@q} =
10902							(1) x scalar(@q);
10903
10904							# These are not used in any way yet
10905							# my @unused_keywords = qw(
10906							# __FILE__
10907							# __LINE__
10908							# __PACKAGE__
10909							# );
10910
10911							# The list of keywords was originally extracted from function 'keyword' in
10912							# perl file toke.c version 5.005.03, using this utility, plus a
10913							# little editing: (file getkwd.pl):
10914							# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
10915							# Add 'get' prefix where necessary, then split into the above lists.
10916							# This list should be updated as necessary.
10917							# The list should not contain these special variables:
10918							# ARGV DATA ENV SIG STDERR STDIN STDOUT
10919							# __DATA__ __END__
10920
10921	39					9059	@is_keyword{@Keywords} = (1) x scalar(@Keywords);
10922							} ## end BEGIN
10923							1;