File Coverage

blib/lib/Perl/Tidy/Tokenizer.pm

Criterion	Covered	Total	%
statement	2334	3199	72.9
branch	1118	1694	66.0
condition	517	874	59.1
subroutine	160	191	83.7
pod	0	146	0.0
total	4129	6104	67.6

line	stmt	bran	cond	sub	pod	time	code
1							#####################################################################
2							#
3							# The Perl::Tidy::Tokenizer package is essentially a filter which
4							# reads lines of perl source code from a source object and provides
5							# corresponding tokenized lines through its get_line() method. Lines
6							# flow from the source_object to the caller like this:
7							#
8							# source_object --> Tokenizer --> calling routine
9							# get_line() get_line() line_of_tokens
10							#
11							# The source object can be a STRING ref, an ARRAY ref, or an object with a
12							# get_line() method which supplies one line (a character string) perl call.
13							# The Tokenizer returns a reference to a data structure 'line_of_tokens'
14							# containing one tokenized line for each call to its get_line() method.
15							#
16							# NOTE: This is not a real class. Only one tokenizer my be used.
17							#
18							########################################################################
19
20							package Perl::Tidy::Tokenizer;
21	39			39		306	use strict;
	39					109
	39					1322
22	39			39		250	use warnings;
	39					105
	39					1140
23	39			39		220	use English qw( -no_match_vars );
	39					127
	39					283
24
25							our $VERSION = '20230909';
26
27	39			39		14644	use Carp;
	39					116
	39					2461
28
29	39			39		311	use constant DEVEL_MODE => 0;
	39					132
	39					2491
30	39			39		275	use constant EMPTY_STRING => q{};
	39					157
	39					2267
31	39			39		289	use constant SPACE => q{ };
	39					117
	39					2522
32
33							# Decimal values of some ascii characters for quick checks
34	39			39		286	use constant ORD_TAB => 9;
	39					98
	39					2044
35	39			39		265	use constant ORD_SPACE => 32;
	39					127
	39					2163
36	39			39		297	use constant ORD_PRINTABLE_MIN => 33;
	39					123
	39					2033
37	39			39		279	use constant ORD_PRINTABLE_MAX => 126;
	39					100
	39					7971
38
39							# GLOBAL VARIABLES which change during tokenization:
40							# These could also be stored in $self but it is more convenient and
41							# efficient to make them global lexical variables.
42							# INITIALIZER: sub prepare_for_a_new_file
43							my (
44
45							$brace_depth,
46							$context,
47							$current_package,
48							$last_nonblank_block_type,
49							$last_nonblank_token,
50							$last_nonblank_type,
51							$next_sequence_number,
52							$paren_depth,
53							$rbrace_context,
54							$rbrace_package,
55							$rbrace_structural_type,
56							$rbrace_type,
57							$rcurrent_depth,
58							$rcurrent_sequence_number,
59							$rdepth_array,
60							$ris_block_function,
61							$ris_block_list_function,
62							$ris_constant,
63							$ris_user_function,
64							$rnested_statement_type,
65							$rnested_ternary_flag,
66							$rparen_semicolon_count,
67							$rparen_vars,
68							$rparen_type,
69							$rsaw_function_definition,
70							$rsaw_use_module,
71							$rsquare_bracket_structural_type,
72							$rsquare_bracket_type,
73							$rstarting_line_of_current_depth,
74							$rtotal_depth,
75							$ruser_function_prototype,
76							$square_bracket_depth,
77							$statement_type,
78							$total_depth,
79							);
80
81							my (
82
83							# GLOBAL CONSTANTS for routines in this package,
84							# INITIALIZER: BEGIN block.
85							%can_start_digraph,
86							%expecting_operator_token,
87							%expecting_operator_types,
88							%expecting_term_token,
89							%expecting_term_types,
90							%is_block_operator,
91							%is_digraph,
92							%is_file_test_operator,
93							%is_if_elsif_unless,
94							%is_if_elsif_unless_case_when,
95							%is_indirect_object_taker,
96							%is_keyword_rejecting_question_as_pattern_delimiter,
97							%is_keyword_rejecting_slash_as_pattern_delimiter,
98							%is_keyword_taking_list,
99							%is_keyword_taking_optional_arg,
100							%is_q_qq_qw_qx_qr_s_y_tr_m,
101							%is_q_qq_qx_qr_s_y_tr_m,
102							%is_semicolon_or_t,
103							%is_sort_map_grep,
104							%is_sort_map_grep_eval_do,
105							%is_tetragraph,
106							%is_trigraph,
107							%is_valid_token_type,
108							%other_line_endings,
109							%really_want_term,
110							@closing_brace_names,
111							@opening_brace_names,
112
113							# GLOBAL CONSTANT hash lookup table of operator expected values
114							# INITIALIZER: BEGIN block
115							%op_expected_table,
116
117							# GLOBAL VARIABLES which are constant after being configured.
118							# INITIALIZER: BEGIN block and modified by sub check_options
119							%is_code_block_token,
120							%is_keyword,
121							%is_my_our_state,
122							%is_package,
123
124							# INITIALIZER: sub check_options
125							$code_skipping_pattern_begin,
126							$code_skipping_pattern_end,
127							$rOpts_code_skipping,
128							$rOpts_code_skipping_begin,
129							%is_END_DATA_format_sub,
130							%is_grep_alias,
131							%is_sub,
132							$guess_if_method,
133							);
134
135							# possible values of operator_expected()
136	39			39		301	use constant TERM => -1;
	39					1477
	39					2274
137	39			39		265	use constant UNKNOWN => 0;
	39					138
	39					2074
138	39			39		257	use constant OPERATOR => 1;
	39					138
	39					2357
139
140							# possible values of context
141	39			39		320	use constant SCALAR_CONTEXT => -1;
	39					156
	39					2332
142	39			39		313	use constant UNKNOWN_CONTEXT => 0;
	39					100
	39					2013
143	39			39		265	use constant LIST_CONTEXT => 1;
	39					111
	39					2251
144
145							# Maximum number of little messages; probably need not be changed.
146	39			39		339	use constant MAX_NAG_MESSAGES => 6;
	39					140
	39					8660
147
148	0					0	BEGIN {
149
150							# Array index names for $self.
151							# Do not combine with other BEGIN blocks (c101).
152	39			39		284081	my $i = 0;
153							use constant {
154	39					26373	_rhere_target_list_ => $i++,
155							_in_here_doc_ => $i++,
156							_here_doc_target_ => $i++,
157							_here_quote_character_ => $i++,
158							_in_data_ => $i++,
159							_in_end_ => $i++,
160							_in_format_ => $i++,
161							_in_error_ => $i++,
162							_in_trouble_ => $i++,
163							_warning_count_ => $i++,
164							_html_tag_count_ => $i++,
165							_in_pod_ => $i++,
166							_in_skipped_ => $i++,
167							_in_attribute_list_ => $i++,
168							_in_quote_ => $i++,
169							_quote_target_ => $i++,
170							_line_start_quote_ => $i++,
171							_starting_level_ => $i++,
172							_know_starting_level_ => $i++,
173							_tabsize_ => $i++,
174							_indent_columns_ => $i++,
175							_look_for_hash_bang_ => $i++,
176							_trim_qw_ => $i++,
177							_continuation_indentation_ => $i++,
178							_outdent_labels_ => $i++,
179							_last_line_number_ => $i++,
180							_saw_perl_dash_P_ => $i++,
181							_saw_perl_dash_w_ => $i++,
182							_saw_use_strict_ => $i++,
183							_saw_v_string_ => $i++,
184							_saw_brace_error_ => $i++,
185							_hit_bug_ => $i++,
186							_look_for_autoloader_ => $i++,
187							_look_for_selfloader_ => $i++,
188							_saw_autoloader_ => $i++,
189							_saw_selfloader_ => $i++,
190							_saw_hash_bang_ => $i++,
191							_saw_end_ => $i++,
192							_saw_data_ => $i++,
193							_saw_negative_indentation_ => $i++,
194							_started_tokenizing_ => $i++,
195							_debugger_object_ => $i++,
196							_diagnostics_object_ => $i++,
197							_logger_object_ => $i++,
198							_unexpected_error_count_ => $i++,
199							_started_looking_for_here_target_at_ => $i++,
200							_nearly_matched_here_target_at_ => $i++,
201							_line_of_text_ => $i++,
202							_rlower_case_labels_at_ => $i++,
203							_extended_syntax_ => $i++,
204							_maximum_level_ => $i++,
205							_true_brace_error_count_ => $i++,
206							_rOpts_maximum_level_errors_ => $i++,
207							_rOpts_maximum_unexpected_errors_ => $i++,
208							_rOpts_logfile_ => $i++,
209							_rOpts_ => $i++,
210							_rinput_lines_ => $i++,
211							_input_line_index_next_ => $i++,
212	39			39		294	};
	39					111
213							} ## end BEGIN
214
215							{ ## closure for subs to count instances
216
217							# methods to count instances
218							my $_count = 0;
219	0			0	0	0	sub get_count { return $_count; }
220	561			561		2473	sub _increment_count { return ++$_count }
221	561			561		1217	sub _decrement_count { return --$_count }
222							}
223
224							sub DESTROY {
225	561			561		1441	my $self = shift;
226	561					3087	$self->_decrement_count();
227	561					8561	return;
228							}
229
230							sub AUTOLOAD {
231
232							# Catch any undefined sub calls so that we are sure to get
233							# some diagnostic information. This sub should never be called
234							# except for a programming error.
235	0			0		0	our $AUTOLOAD;
236	0	0				0	return if ( $AUTOLOAD =~ /\bDESTROY$/ );
237	0					0	my ( $pkg, $fname, $lno ) = caller();
238	0					0	my $my_package = __PACKAGE__;
239	0					0	print {*STDERR} <<EOM;
	0					0
240							======================================================================
241							Error detected in package '$my_package', version $VERSION
242							Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
243							Called from package: '$pkg'
244							Called from File '$fname' at line '$lno'
245							This error is probably due to a recent programming change
246							======================================================================
247							EOM
248	0					0	exit 1;
249							} ## end sub AUTOLOAD
250
251							sub Die {
252	0			0	0	0	my ($msg) = @_;
253	0					0	Perl::Tidy::Die($msg);
254	0					0	croak "unexpected return from Perl::Tidy::Die";
255							}
256
257							sub Fault {
258	0			0	0	0	my ( $self, $msg ) = @_;
259
260							# This routine is called for errors that really should not occur
261							# except if there has been a bug introduced by a recent program change.
262							# Please add comments at calls to Fault to explain why the call
263							# should not occur, and where to look to fix it.
264	0					0	my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
265	0					0	my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
266	0					0	my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
267	0					0	my $pkg = __PACKAGE__;
268
269							# Catch potential error of Fault not called as a method
270	0					0	my $input_stream_name;
271	0	0				0	if ( !ref($self) ) {
272	0					0	$msg = "Fault not called as a method - please fix\n";
273	0	0	0			0	if ( $self && length($self) < 200 ) { $msg .= $self }
	0					0
274	0					0	$self = undef;
275	0					0	$input_stream_name = "(UNKNOWN)";
276							}
277							else {
278	0					0	$input_stream_name = $self->get_input_stream_name();
279							}
280
281	0					0	Die(<<EOM);
282							==============================================================================
283							While operating on input stream with name: '$input_stream_name'
284							A fault was detected at line $line0 of sub '$subroutine1'
285							in file '$filename1'
286							which was called from line $line1 of sub '$subroutine2'
287							Message: '$msg'
288							This is probably an error introduced by a recent programming change.
289							$pkg reports VERSION='$VERSION'.
290							==============================================================================
291							EOM
292
293							# We shouldn't get here, but this return is to keep Perl-Critic from
294							# complaining.
295	0					0	return;
296							} ## end sub Fault
297
298							sub make_code_skipping_pattern {
299	1118			1118	0	3400	my ( $rOpts, $opt_name, $default ) = @_;
300	1118					2594	my $param = $rOpts->{$opt_name};
301	1118	100				3016	if ( !$param ) { $param = $default }
	1116					2054
302	1118					4614	$param =~ s/^\s*//; # allow leading spaces to be like format-skipping
303	1118	50				5200	if ( $param !~ /^#/ ) {
304	0					0	Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
305							}
306	1118					3484	my $pattern = '^\s*' . $param . '\b';
307	1118	50				3342	if ( Perl::Tidy::Formatter::bad_pattern($pattern) ) {
308	0					0	Die(
309							"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
310							);
311							}
312	1118					3777	return $pattern;
313							} ## end sub make_code_skipping_pattern
314
315							sub check_options {
316
317							# Check Tokenizer parameters
318	559			559	0	1751	my $rOpts = shift;
319
320	559					2161	%is_sub = ();
321	559					1809	$is_sub{'sub'} = 1;
322
323	559					4658	%is_END_DATA_format_sub = (
324							'__END__' => 1,
325							'__DATA__' => 1,
326							'format' => 1,
327							'sub' => 1,
328							);
329
330							# Install any aliases to 'sub'
331	559	100				2355	if ( $rOpts->{'sub-alias-list'} ) {
332
333							# Note that any 'sub-alias-list' has been preprocessed to
334							# be a trimmed, space-separated list which includes 'sub'
335							# for example, it might be 'sub method fun'
336	3					32	my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'};
337	3					17	foreach my $word (@sub_alias_list) {
338	11					24	$is_sub{$word} = 1;
339	11					22	$is_END_DATA_format_sub{$word} = 1;
340							}
341							}
342
343							# Set global flag to say if we have to guess if bareword 'method' is
344							# a sub when 'method' is in %is_sub. This will be true unless:
345							# (1) the user entered 'method' as sub alias, or
346							# (2) the user set --use-feature=class
347							# In these two cases we can assume that 'method' is a sub alias.
348	559					1323	$guess_if_method = 1;
349	559	100				2104	if ( $is_sub{'method'} ) { $guess_if_method = 0 }
	2					6
350
351							#------------------------------------------------
352							# Update hash values for any -use-feature options
353							#------------------------------------------------
354
355	559					1381	my $use_feature_class = 1;
356	559	50				2180	if ( $rOpts->{'use-feature'} ) {
357	0	0				0	if ( $rOpts->{'use-feature'} =~ /\bnoclass\b/ ) {
		0
358	0					0	$use_feature_class = 0;
359							}
360							elsif ( $rOpts->{'use-feature'} =~ /\bclass\b/ ) {
361	0					0	$guess_if_method = 0;
362							}
363							else {
364							## neither 'class' nor 'noclass' seen so use default
365							}
366							}
367
368							# These are the main updates for this option. There are additional
369							# changes elsewhere, usually indicated with a comment 'rt145706'
370
371							# Update hash values for use_feature=class, added for rt145706
372							# see 'perlclass.pod'
373
374							# IMPORTANT: We are changing global hash values initially set in a BEGIN
375							# block. Values must be defined (true or false) for each of these new
376							# words whether true or false. Otherwise, programs using the module which
377							# change options between runs (such as test code) will have
378							# incorrect settings and fail.
379
380							# There are 4 new keywords:
381
382							# 'class' - treated specially as generalization of 'package'
383							# Note: we must not set 'class' to be a keyword to avoid problems
384							# with older uses.
385	559					1828	$is_package{'class'} = $use_feature_class;
386
387							# 'method' - treated like sub using the sub-alias-list option
388							# Note: we must not set 'method' to be a keyword to avoid problems
389							# with older uses.
390	559	50				1851	if ($use_feature_class) {
391	559					1649	$is_sub{'method'} = 1;
392	559					1615	$is_END_DATA_format_sub{'method'} = 1;
393							}
394
395							# 'field' - added as a keyword, and works like 'my'
396	559					1606	$is_keyword{'field'} = $use_feature_class;
397	559					1614	$is_my_our_state{'field'} = $use_feature_class;
398
399							# 'ADJUST' - added as a keyword and works like 'BEGIN'
400							# TODO: if ADJUST gets a paren list, this will need to be updated
401	559					1429	$is_keyword{'ADJUST'} = $use_feature_class;
402	559					1478	$is_code_block_token{'ADJUST'} = $use_feature_class;
403
404	559					2079	%is_grep_alias = ();
405	559	50				2077	if ( $rOpts->{'grep-alias-list'} ) {
406
407							# Note that 'grep-alias-list' has been preprocessed to be a trimmed,
408							# space-separated list
409	559					5709	my @q = split /\s+/, $rOpts->{'grep-alias-list'};
410	559					5057	@{is_grep_alias}{@q} = (1) x scalar(@q);
411							}
412
413	559					2000	$rOpts_code_skipping = $rOpts->{'code-skipping'};
414	559					1449	$rOpts_code_skipping_begin = $rOpts->{'code-skipping-begin'};
415	559					2862	$code_skipping_pattern_begin =
416							make_code_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' );
417	559					2116	$code_skipping_pattern_end =
418							make_code_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' );
419
420	559					2161	return;
421							} ## end sub check_options
422
423							sub new {
424
425	561			561	0	5686	my ( $class, @args ) = @_;
426
427							# Note: 'tabs' and 'indent_columns' are temporary and should be
428							# removed asap
429	561					9472	my %defaults = (
430							source_object => undef,
431							debugger_object => undef,
432							diagnostics_object => undef,
433							logger_object => undef,
434							starting_level => undef,
435							indent_columns => 4,
436							tabsize => 8,
437							look_for_hash_bang => 0,
438							trim_qw => 1,
439							look_for_autoloader => 1,
440							look_for_selfloader => 1,
441							starting_line_number => 1,
442							extended_syntax => 0,
443							rOpts => {},
444							);
445	561					7570	my %args = ( %defaults, @args );
446
447							# we are given an object with a get_line() method to supply source lines
448	561					2327	my $source_object = $args{source_object};
449	561					1952	my $rOpts = $args{rOpts};
450
451							# Check call args
452	561	50				2165	if ( !defined($source_object) ) {
453	0					0	Die(
454							"Perl::Tidy::Tokenizer::new called without a 'source_object' parameter\n"
455							);
456							}
457	561	50				2187	if ( !ref($source_object) ) {
458	0					0	Die(<<EOM);
459							sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference;
460							'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method
461							EOM
462							}
463
464							# Tokenizer state data is as follows:
465							# _rhere_target_list_ reference to list of here-doc targets
466							# _here_doc_target_ the target string for a here document
467							# _here_quote_character_ the type of here-doc quoting (" ' ` or none)
468							# to determine if interpolation is done
469							# _quote_target_ character we seek if chasing a quote
470							# _line_start_quote_ line where we started looking for a long quote
471							# _in_here_doc_ flag indicating if we are in a here-doc
472							# _in_pod_ flag set if we are in pod documentation
473							# _in_skipped_ flag set if we are in a skipped section
474							# _in_error_ flag set if we saw severe error (binary in script)
475							# _in_trouble_ set if we saw a troublesome lexical like 'my sub s'
476							# _warning_count_ number of calls to logger sub warning
477							# _html_tag_count_ number of apparent html tags seen (indicates html)
478							# _in_data_ flag set if we are in __DATA__ section
479							# _in_end_ flag set if we are in __END__ section
480							# _in_format_ flag set if we are in a format description
481							# _in_attribute_list_ flag telling if we are looking for attributes
482							# _in_quote_ flag telling if we are chasing a quote
483							# _starting_level_ indentation level of first line
484							# _diagnostics_object_ place to write debugging information
485							# _unexpected_error_count_ error count used to limit output
486							# _lower_case_labels_at_ line numbers where lower case labels seen
487							# _hit_bug_ program bug detected
488
489	561					1432	my $self = [];
490	561					1775	$self->[_rhere_target_list_] = [];
491	561					1427	$self->[_in_here_doc_] = 0;
492	561					1651	$self->[_here_doc_target_] = EMPTY_STRING;
493	561					1423	$self->[_here_quote_character_] = EMPTY_STRING;
494	561					1480	$self->[_in_data_] = 0;
495	561					1531	$self->[_in_end_] = 0;
496	561					1549	$self->[_in_format_] = 0;
497	561					1350	$self->[_in_error_] = 0;
498	561					1376	$self->[_in_trouble_] = 0;
499	561					1224	$self->[_warning_count_] = 0;
500	561					1318	$self->[_html_tag_count_] = 0;
501	561					1309	$self->[_in_pod_] = 0;
502	561					1383	$self->[_in_skipped_] = 0;
503	561					1221	$self->[_in_attribute_list_] = 0;
504	561					1260	$self->[_in_quote_] = 0;
505	561					1468	$self->[_quote_target_] = EMPTY_STRING;
506	561					1348	$self->[_line_start_quote_] = -1;
507	561					1436	$self->[_starting_level_] = $args{starting_level};
508	561					1839	$self->[_know_starting_level_] = defined( $args{starting_level} );
509	561					1548	$self->[_tabsize_] = $args{tabsize};
510	561					1871	$self->[_indent_columns_] = $args{indent_columns};
511	561					1347	$self->[_look_for_hash_bang_] = $args{look_for_hash_bang};
512	561					1461	$self->[_trim_qw_] = $args{trim_qw};
513	561					1461	$self->[_continuation_indentation_] = $args{continuation_indentation};
514	561					1390	$self->[_outdent_labels_] = $args{outdent_labels};
515	561					1515	$self->[_last_line_number_] = $args{starting_line_number} - 1;
516	561					1329	$self->[_saw_perl_dash_P_] = 0;
517	561					1475	$self->[_saw_perl_dash_w_] = 0;
518	561					1195	$self->[_saw_use_strict_] = 0;
519	561					1187	$self->[_saw_v_string_] = 0;
520	561					1214	$self->[_saw_brace_error_] = 0;
521	561					1221	$self->[_hit_bug_] = 0;
522	561					1240	$self->[_look_for_autoloader_] = $args{look_for_autoloader};
523	561					1543	$self->[_look_for_selfloader_] = $args{look_for_selfloader};
524	561					1428	$self->[_saw_autoloader_] = 0;
525	561					1190	$self->[_saw_selfloader_] = 0;
526	561					1282	$self->[_saw_hash_bang_] = 0;
527	561					1163	$self->[_saw_end_] = 0;
528	561					1188	$self->[_saw_data_] = 0;
529	561					1190	$self->[_saw_negative_indentation_] = 0;
530	561					1438	$self->[_started_tokenizing_] = 0;
531	561					1343	$self->[_debugger_object_] = $args{debugger_object};
532	561					1335	$self->[_diagnostics_object_] = $args{diagnostics_object};
533	561					1312	$self->[_logger_object_] = $args{logger_object};
534	561					1407	$self->[_unexpected_error_count_] = 0;
535	561					1242	$self->[_started_looking_for_here_target_at_] = 0;
536	561					1228	$self->[_nearly_matched_here_target_at_] = undef;
537	561					1219	$self->[_line_of_text_] = EMPTY_STRING;
538	561					1469	$self->[_rlower_case_labels_at_] = undef;
539	561					1497	$self->[_extended_syntax_] = $args{extended_syntax};
540	561					1515	$self->[_maximum_level_] = 0;
541	561					1234	$self->[_true_brace_error_count_] = 0;
542	561					1425	$self->[_rOpts_maximum_level_errors_] = $rOpts->{'maximum-level-errors'};
543							$self->[_rOpts_maximum_unexpected_errors_] =
544	561					1486	$rOpts->{'maximum-unexpected-errors'};
545	561					1572	$self->[_rOpts_logfile_] = $rOpts->{'logfile'};
546	561					1573	$self->[_rOpts_] = $rOpts;
547
548							# These vars are used for guessing indentation and must be positive
549	561	50				2116	$self->[_tabsize_] = 8 if ( !$self->[_tabsize_] );
550	561	100				2111	$self->[_indent_columns_] = 4 if ( !$self->[_indent_columns_] );
551
552	561					1480	bless $self, $class;
553
554	561					3744	$self->prepare_for_a_new_file($source_object);
555	561					3477	$self->find_starting_indentation_level();
556
557							# This is not a full class yet, so die if an attempt is made to
558							# create more than one object.
559
560	561	50				2747	if ( _increment_count() > 1 ) {
561	0					0	confess
562							"Attempt to create more than 1 object in $class, which is not a true class yet\n";
563							}
564
565	561					6418	return $self;
566
567							} ## end sub new
568
569							# Called externally
570							sub get_unexpected_error_count {
571	4			4	0	17	my ($self) = @_;
572	4					18	return $self->[_unexpected_error_count_];
573							}
574
575							# Called externally
576							sub is_keyword {
577	2791			2791	0	5126	my ($str) = @_;
578	2791					10244	return $is_keyword{$str};
579							}
580
581							#----------------------------------------------------------------
582							# Line input routines, previously handled by the LineBuffer class
583							#----------------------------------------------------------------
584							sub make_source_array {
585
586	561			561	0	1788	my ( $self, $line_source_object ) = @_;
587
588							# Convert the source into an array of lines
589	561					1332	my $rinput_lines = [];
590
591	561					1529	my $rsource = ref($line_source_object);
592
593	561	50				3719	if ( !$rsource ) {
		50
		50
594
595							# shouldn't happen: this should have been checked in sub new
596	0					0	$self->Fault(<<EOM);
597							sub Perl::Tidy::Tokenizer::new received a 'source_object' parameter which is not a reference;
598							'source_object' must be a reference to a STRING, ARRAY, or object with a 'getline' method
599							EOM
600							}
601
602							# handle an ARRAY ref
603							elsif ( $rsource eq 'ARRAY' ) {
604	0					0	$rinput_lines = $line_source_object;
605							}
606
607							# handle a SCALAR ref
608							elsif ( $rsource eq 'SCALAR' ) {
609	561					974	my @lines = split /^/, ${$line_source_object};
	561					9070
610	561					2442	$rinput_lines = \@lines;
611							}
612
613							# handle an object - must have a get_line method
614							else {
615	0					0	while ( my $line = $line_source_object->get_line() ) {
616	0					0	push( @{$rinput_lines}, $line );
	0					0
617							}
618							}
619
620	561					2245	$self->[_rinput_lines_] = $rinput_lines;
621	561					1574	$self->[_input_line_index_next_] = 0;
622	561					1310	return;
623							} ## end sub make_source_array
624
625							sub peek_ahead {
626	1234			1234	0	2926	my ( $self, $buffer_index ) = @_;
627
628							# look $buffer_index lines ahead of the current location without disturbing
629							# the input
630	1234					2014	my $line;
631	1234					2429	my $rinput_lines = $self->[_rinput_lines_];
632	1234					2534	my $line_index = $buffer_index + $self->[_input_line_index_next_];
633	1234	100				2091	if ( $line_index < @{$rinput_lines} ) {
	1234					3595
634	1222					2607	$line = $rinput_lines->[$line_index];
635							}
636	1234					4657	return $line;
637							} ## end sub peek_ahead
638
639							#-----------------------------------------
640							# interface to Perl::Tidy::Logger routines
641							#-----------------------------------------
642							sub warning {
643
644	0			0	0	0	my ( $self, $msg ) = @_;
645
646	0					0	my $logger_object = $self->[_logger_object_];
647	0					0	$self->[_warning_count_]++;
648	0	0				0	if ($logger_object) {
649	0					0	my $msg_line_number = $self->[_last_line_number_];
650	0					0	$logger_object->warning( $msg, $msg_line_number );
651							}
652	0					0	return;
653							} ## end sub warning
654
655							sub get_input_stream_name {
656
657	0			0	0	0	my $self = shift;
658
659	0					0	my $input_stream_name = EMPTY_STRING;
660	0					0	my $logger_object = $self->[_logger_object_];
661	0	0				0	if ($logger_object) {
662	0					0	$input_stream_name = $logger_object->get_input_stream_name();
663							}
664	0					0	return $input_stream_name;
665							} ## end sub get_input_stream_name
666
667							sub complain {
668
669	32			32	0	112	my ( $self, $msg ) = @_;
670
671	32					80	my $logger_object = $self->[_logger_object_];
672	32	50				101	if ($logger_object) {
673	32					72	my $input_line_number = $self->[_last_line_number_];
674	32					185	$logger_object->complain( $msg, $input_line_number );
675							}
676	32					82	return;
677							} ## end sub complain
678
679							sub write_logfile_entry {
680
681	1857			1857	0	4409	my ( $self, $msg ) = @_;
682
683	1857					3529	my $logger_object = $self->[_logger_object_];
684	1857	100				4484	if ($logger_object) {
685	1851					5579	$logger_object->write_logfile_entry($msg);
686							}
687	1857					5683	return;
688							} ## end sub write_logfile_entry
689
690							sub interrupt_logfile {
691
692	0			0	0	0	my $self = shift;
693
694	0					0	my $logger_object = $self->[_logger_object_];
695	0	0				0	if ($logger_object) {
696	0					0	$logger_object->interrupt_logfile();
697							}
698	0					0	return;
699							} ## end sub interrupt_logfile
700
701							sub resume_logfile {
702
703	0			0	0	0	my $self = shift;
704
705	0					0	my $logger_object = $self->[_logger_object_];
706	0	0				0	if ($logger_object) {
707	0					0	$logger_object->resume_logfile();
708							}
709	0					0	return;
710							} ## end sub resume_logfile
711
712							sub brace_warning {
713	0			0	0	0	my ( $self, $msg ) = @_;
714	0					0	$self->[_saw_brace_error_]++;
715
716	0					0	my $logger_object = $self->[_logger_object_];
717	0	0				0	if ($logger_object) {
718	0					0	my $msg_line_number = $self->[_last_line_number_];
719	0					0	$logger_object->brace_warning( $msg, $msg_line_number );
720							}
721	0					0	return;
722							} ## end sub brace_warning
723
724							sub increment_brace_error {
725
726							# This is same as sub brace_warning but without a message
727	0			0	0	0	my $self = shift;
728	0					0	$self->[_saw_brace_error_]++;
729
730	0					0	my $logger_object = $self->[_logger_object_];
731	0	0				0	if ($logger_object) {
732	0					0	$logger_object->increment_brace_error();
733							}
734	0					0	return;
735							} ## end sub increment_brace_error
736
737							sub get_saw_brace_error {
738	0			0	0	0	my $self = shift;
739	0					0	return $self->[_saw_brace_error_];
740							} ## end sub get_saw_brace_error
741
742							sub report_definite_bug {
743	0			0	0	0	my $self = shift;
744	0					0	$self->[_hit_bug_] = 1;
745	0					0	my $logger_object = $self->[_logger_object_];
746	0	0				0	if ($logger_object) {
747	0					0	$logger_object->report_definite_bug();
748							}
749	0					0	return;
750							} ## end sub report_definite_bug
751
752							#-------------------------------------
753							# Interface to Perl::Tidy::Diagnostics
754							#-------------------------------------
755							sub write_diagnostics {
756	0			0	0	0	my ( $self, $msg ) = @_;
757	0					0	my $input_line_number = $self->[_last_line_number_];
758	0					0	my $diagnostics_object = $self->[_diagnostics_object_];
759	0	0				0	if ($diagnostics_object) {
760	0					0	$diagnostics_object->write_diagnostics( $msg, $input_line_number );
761							}
762	0					0	return;
763							} ## end sub write_diagnostics
764
765							sub report_tokenization_errors {
766
767	561			561	0	1990	my ($self) = @_;
768
769							# Report any tokenization errors and return a flag '$severe_error'.
770							# Set $severe_error = 1 if the tokenization errors are so severe that
771							# the formatter should not attempt to format the file. Instead, it will
772							# just output the file verbatim.
773
774							# set severe error flag if tokenizer has encountered file reading problems
775							# (i.e. unexpected binary characters)
776							# or code which may not be formatted correctly (such as 'my sub q')
777							# The difference between _in_error_ and _in_trouble_ is that
778							# _in_error_ stops the tokenizer immediately whereas
779							# _in_trouble_ lets the tokenizer finish so that all errors are seen
780							# Both block formatting and cause the input stream to be output verbatim.
781	561		33			3403	my $severe_error = $self->[_in_error_] \|\| $self->[_in_trouble_];
782
783							# And do not format if it looks like an html file (c209)
784	561		33			4248	$severe_error \|\|= $self->[_html_tag_count_] && $self->[_warning_count_];
			33
785
786							# Inform the logger object on length of input stream
787	561					1544	my $logger_object = $self->[_logger_object_];
788	561	100				1985	if ($logger_object) {
789	559					1486	my $last_line_number = $self->[_last_line_number_];
790	559					2955	$logger_object->set_last_input_line_number($last_line_number);
791							}
792
793	561					1645	my $maxle = $self->[_rOpts_maximum_level_errors_];
794	561					1410	my $maxue = $self->[_rOpts_maximum_unexpected_errors_];
795	561	50				1835	$maxle = 1 unless defined($maxle);
796	561	50				1898	$maxue = 0 unless defined($maxue);
797
798	561					2396	my $level = get_indentation_level();
799	561	50				2263	if ( $level != $self->[_starting_level_] ) {
800	0					0	$self->warning("final indentation level: $level\n");
801	0					0	my $level_diff = $self->[_starting_level_] - $level;
802	0	0				0	if ( $level_diff < 0 ) { $level_diff = -$level_diff }
	0					0
803
804							# Set severe error flag if the level error is greater than 1.
805							# The formatter can function for any level error but it is probably
806							# best not to attempt formatting for a high level error.
807	0	0	0			0	if ( $maxle >= 0 && $level_diff > $maxle ) {
808	0					0	$severe_error = 1;
809	0					0	$self->warning(<<EOM);
810							Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting
811							EOM
812							}
813							}
814
815	561					3127	$self->check_final_nesting_depths();
816
817							# Likewise, large numbers of brace errors usually indicate non-perl
818							# scripts, so set the severe error flag at a low number. This is similar
819							# to the level check, but different because braces may balance but be
820							# incorrectly interlaced.
821	561	50				2903	if ( $self->[_true_brace_error_count_] > 2 ) {
822	0					0	$severe_error = 1;
823							}
824
825	561	50	66			2606	if ( $self->[_look_for_hash_bang_]
826							&& !$self->[_saw_hash_bang_] )
827							{
828	0					0	$self->warning(
829							"hit EOF without seeing hash-bang line; maybe don't need -x?\n");
830							}
831
832	561	50				1969	if ( $self->[_in_format_] ) {
833	0					0	$self->warning("hit EOF while in format description\n");
834							}
835
836	561	50				2105	if ( $self->[_in_skipped_] ) {
837	0					0	$self->write_logfile_entry(
838							"hit EOF while in lines skipped with --code-skipping\n");
839							}
840
841	561	50				2028	if ( $self->[_in_pod_] ) {
842
843							# Just write log entry if this is after __END__ or __DATA__
844							# because this happens to often, and it is not likely to be
845							# a parsing error.
846	0	0	0			0	if ( $self->[_saw_data_] \|\| $self->[_saw_end_] ) {
847	0					0	$self->write_logfile_entry(
848							"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
849							);
850							}
851
852							else {
853	0					0	$self->complain(
854							"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
855							);
856							}
857
858							}
859
860	561	50				1933	if ( $self->[_in_here_doc_] ) {
861	0					0	$severe_error = 1;
862	0					0	my $here_doc_target = $self->[_here_doc_target_];
863	0					0	my $started_looking_for_here_target_at =
864							$self->[_started_looking_for_here_target_at_];
865	0	0				0	if ($here_doc_target) {
866	0					0	$self->warning(
867							"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
868							);
869							}
870							else {
871	0					0	$self->warning(<<EOM);
872							Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string.
873							(Perl will match to the end of file but this may not be intended).
874							EOM
875							}
876	0					0	my $nearly_matched_here_target_at =
877							$self->[_nearly_matched_here_target_at_];
878	0	0				0	if ($nearly_matched_here_target_at) {
879	0					0	$self->warning(
880							"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
881							);
882							}
883							}
884
885							# Something is seriously wrong if we ended inside a quote
886	561	50				2337	if ( $self->[_in_quote_] ) {
887	0					0	$severe_error = 1;
888	0					0	my $line_start_quote = $self->[_line_start_quote_];
889	0					0	my $quote_target = $self->[_quote_target_];
890	0	0				0	my $what =
891							( $self->[_in_attribute_list_] )
892							? "attribute list"
893							: "quote/pattern";
894	0					0	$self->warning(
895							"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
896							);
897							}
898
899	561	50				1995	if ( $self->[_hit_bug_] ) {
900	0					0	$severe_error = 1;
901							}
902
903							# Multiple "unexpected" type tokenization errors usually indicate parsing
904							# non-perl scripts, or that something is seriously wrong, so we should
905							# avoid formatting them. This can happen for example if we run perltidy on
906							# a shell script or an html file. But unfortunately this check can
907							# interfere with some extended syntaxes, such as RPerl, so it has to be off
908							# by default.
909	561					1463	my $ue_count = $self->[_unexpected_error_count_];
910	561	50	33			2529	if ( $maxue > 0 && $ue_count > $maxue ) {
911	0					0	$self->warning(<<EOM);
912							Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting
913							EOM
914	0					0	$severe_error = 1;
915							}
916
917	561	100				1927	if ( !$self->[_saw_perl_dash_w_] ) {
918	545	50				3621	if ( $] < 5.006 ) {
919	0					0	$self->write_logfile_entry("Suggest including '-w parameter'\n");
920							}
921							else {
922	545					1894	$self->write_logfile_entry("Suggest including 'use warnings;'\n");
923							}
924							}
925
926	561	50				5161	if ( $self->[_saw_perl_dash_P_] ) {
927	0					0	$self->write_logfile_entry(
928							"Use of -P parameter for defines is discouraged\n");
929							}
930
931	561	100				2472	if ( !$self->[_saw_use_strict_] ) {
932	547					1675	$self->write_logfile_entry("Suggest including 'use strict;'\n");
933							}
934
935							# it is suggested that labels have at least one upper case character
936							# for legibility and to avoid code breakage as new keywords are introduced
937	561	100				3932	if ( $self->[_rlower_case_labels_at_] ) {
938	12					44	my @lower_case_labels_at = @{ $self->[_rlower_case_labels_at_] };
	12					46
939	12					48	$self->write_logfile_entry(
940							"Suggest using upper case characters in label(s)\n");
941	12					54	local $LIST_SEPARATOR = ')(';
942	12					112	$self->write_logfile_entry(
943							" defined at line(s): (@lower_case_labels_at)\n");
944							}
945	561					2365	return $severe_error;
946							} ## end sub report_tokenization_errors
947
948							sub report_v_string {
949
950							# warn if this version can't handle v-strings
951	2			2	0	9	my ( $self, $tok ) = @_;
952	2	50				8	if ( !$self->[_saw_v_string_] ) {
953	2					6	$self->[_saw_v_string_] = $self->[_last_line_number_];
954							}
955	2	50				10	if ( $] < 5.006 ) {
956	0					0	$self->warning(
957							"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
958							);
959							}
960	2					7	return;
961							} ## end sub report_v_string
962
963							sub is_valid_token_type {
964	3			3	0	7	my ($type) = @_;
965	3					15	return $is_valid_token_type{$type};
966							}
967
968							sub log_numbered_msg {
969	167			167	0	480	my ( $self, $msg ) = @_;
970
971							# write input line number + message to logfile
972	167					350	my $input_line_number = $self->[_last_line_number_];
973	167					896	$self->write_logfile_entry("Line $input_line_number: $msg");
974	167					359	return;
975							} ## end sub log_numbered_msg
976
977							sub get_line {
978
979	8221			8221	0	15270	my $self = shift;
980
981							# Read the next input line and tokenize it
982							# Returns:
983							# $line_of_tokens = ref to hash of info for the tokenized line
984
985							# USES GLOBAL VARIABLES:
986							# $brace_depth, $square_bracket_depth, $paren_depth
987
988							# get the next line from the input array
989	8221					12202	my $input_line;
990	8221					14231	my $rinput_lines = $self->[_rinput_lines_];
991	8221					13022	my $line_index = $self->[_input_line_index_next_];
992	8221	100				11827	if ( $line_index < @{$rinput_lines} ) {
	8221					19167
993	7660					16116	$input_line = $rinput_lines->[ $line_index++ ];
994	7660					12420	$self->[_input_line_index_next_] = $line_index;
995							}
996
997	8221					14154	$self->[_line_of_text_] = $input_line;
998
999	8221	100				19934	return if ( !defined($input_line) );
1000
1001	7660					12922	my $input_line_number = ++$self->[_last_line_number_];
1002
1003							# Find and remove what characters terminate this line, including any
1004							# control r
1005	7660					12011	my $input_line_separator = EMPTY_STRING;
1006	7660	100				20193	if ( chomp($input_line) ) {
1007	7659					17040	$input_line_separator = $INPUT_RECORD_SEPARATOR;
1008							}
1009
1010							# The first test here very significantly speeds things up, but be sure to
1011							# keep the regex and hash %other_line_endings the same.
1012	7660	100				23205	if ( $other_line_endings{ substr( $input_line, -1 ) } ) {
1013	24	50				218	if ( $input_line =~ s/([\r\035\032])+$// ) {
1014	24					71	$input_line_separator = $1 . $input_line_separator;
1015							}
1016							}
1017
1018							# for backwards compatibility we keep the line text terminated with
1019							# a newline character
1020	7660					15945	$input_line .= "\n";
1021	7660					13464	$self->[_line_of_text_] = $input_line;
1022
1023							# create a data structure describing this line which will be
1024							# returned to the caller.
1025
1026							# _line_type codes are:
1027							# SYSTEM - system-specific code before hash-bang line
1028							# CODE - line of perl code (including comments)
1029							# POD_START - line starting pod, such as '=head'
1030							# POD - pod documentation text
1031							# POD_END - last line of pod section, '=cut'
1032							# HERE - text of here-document
1033							# HERE_END - last line of here-doc (target word)
1034							# FORMAT - format section
1035							# FORMAT_END - last line of format section, '.'
1036							# SKIP - code skipping section
1037							# SKIP_END - last line of code skipping section, '#>>V'
1038							# DATA_START - __DATA__ line
1039							# DATA - unidentified text following __DATA__
1040							# END_START - __END__ line
1041							# END - unidentified text following __END__
1042							# ERROR - we are in big trouble, probably not a perl script
1043
1044							# Other variables:
1045							# _curly_brace_depth - depth of curly braces at start of line
1046							# _square_bracket_depth - depth of square brackets at start of line
1047							# _paren_depth - depth of parens at start of line
1048							# _starting_in_quote - this line continues a multi-line quote
1049							# (so don't trim leading blanks!)
1050							# _ending_in_quote - this line ends in a multi-line quote
1051							# (so don't trim trailing blanks!)
1052	7660					42656	my $line_of_tokens = {
1053							_line_type => 'EOF',
1054							_line_text => $input_line,
1055							_line_number => $input_line_number,
1056							_guessed_indentation_level => 0,
1057							_curly_brace_depth => $brace_depth,
1058							_square_bracket_depth => $square_bracket_depth,
1059							_paren_depth => $paren_depth,
1060							_quote_character => EMPTY_STRING,
1061							## Skip these needless initializations for efficiency:
1062							## _rtoken_type => undef,
1063							## _rtokens => undef,
1064							## _rlevels => undef,
1065							## _rblock_type => undef,
1066							## _rtype_sequence => undef,
1067							## _rci_levels => undef,
1068							## _starting_in_quote => 0,
1069							## _ending_in_quote => 0,
1070							};
1071
1072							# must print line unchanged if we are in a here document
1073	7660	100				38934	if ( $self->[_in_here_doc_] ) {
		100
		100
		100
		50
		100
		100
1074
1075	24					86	$line_of_tokens->{_line_type} = 'HERE';
1076	24					63	my $here_doc_target = $self->[_here_doc_target_];
1077	24					58	my $here_quote_character = $self->[_here_quote_character_];
1078	24					40	my $candidate_target = $input_line;
1079	24					53	chomp $candidate_target;
1080
1081							# Handle <<~ targets, which are indicated here by a leading space on
1082							# the here quote character
1083	24	100				94	if ( $here_quote_character =~ /^\s/ ) {
1084	4					18	$candidate_target =~ s/^\s*//;
1085							}
1086	24	100				67	if ( $candidate_target eq $here_doc_target ) {
1087	9					47	$self->[_nearly_matched_here_target_at_] = undef;
1088	9					30	$line_of_tokens->{_line_type} = 'HERE_END';
1089	9					53	$self->log_numbered_msg("Exiting HERE document $here_doc_target\n");
1090
1091	9					54	my $rhere_target_list = $self->[_rhere_target_list_];
1092	9	50				24	if ( @{$rhere_target_list} ) { # there can be multiple here targets
	9					48
1093							( $here_doc_target, $here_quote_character ) =
1094	0					0	@{ shift @{$rhere_target_list} };
	0					0
	0					0
1095	0					0	$self->[_here_doc_target_] = $here_doc_target;
1096	0					0	$self->[_here_quote_character_] = $here_quote_character;
1097	0					0	$self->log_numbered_msg(
1098							"Entering HERE document $here_doc_target\n");
1099	0					0	$self->[_nearly_matched_here_target_at_] = undef;
1100	0					0	$self->[_started_looking_for_here_target_at_] =
1101							$input_line_number;
1102							}
1103							else {
1104	9					33	$self->[_in_here_doc_] = 0;
1105	9					26	$self->[_here_doc_target_] = EMPTY_STRING;
1106	9					30	$self->[_here_quote_character_] = EMPTY_STRING;
1107							}
1108							}
1109
1110							# check for error of extra whitespace
1111							# note for PERL6: leading whitespace is allowed
1112							else {
1113	15					142	$candidate_target =~ s/\s*$//;
1114	15					66	$candidate_target =~ s/^\s*//;
1115	15	50				60	if ( $candidate_target eq $here_doc_target ) {
1116	0					0	$self->[_nearly_matched_here_target_at_] = $input_line_number;
1117							}
1118							}
1119	24					99	return $line_of_tokens;
1120							}
1121
1122							# Print line unchanged if we are in a format section
1123							elsif ( $self->[_in_format_] ) {
1124
1125	3	100				23	if ( $input_line =~ /^\.[\s#]*$/ ) {
1126
1127							# Decrement format depth count at a '.' after a 'format'
1128	1					3	$self->[_in_format_]--;
1129
1130							# This is the end when count reaches 0
1131	1	50				7	if ( !$self->[_in_format_] ) {
1132	1					5	$self->log_numbered_msg("Exiting format section\n");
1133	1					7	$line_of_tokens->{_line_type} = 'FORMAT_END';
1134
1135							# Make the tokenizer mark an opening brace which follows
1136							# as a code block. Fixes issue c202/t032.
1137	1					5	$last_nonblank_token = ';';
1138	1					3	$last_nonblank_type = ';';
1139							}
1140							}
1141							else {
1142	2					5	$line_of_tokens->{_line_type} = 'FORMAT';
1143	2	50				11	if ( $input_line =~ /^\s*format\s+\w+/ ) {
1144
1145							# Increment format depth count at a 'format' within a 'format'
1146							# This is a simple way to handle nested formats (issue c019).
1147	0					0	$self->[_in_format_]++;
1148							}
1149							}
1150	3					11	return $line_of_tokens;
1151							}
1152
1153							# must print line unchanged if we are in pod documentation
1154							elsif ( $self->[_in_pod_] ) {
1155
1156	47					164	$line_of_tokens->{_line_type} = 'POD';
1157	47	100				225	if ( $input_line =~ /^=cut/ ) {
1158	20					95	$line_of_tokens->{_line_type} = 'POD_END';
1159	20					104	$self->log_numbered_msg("Exiting POD section\n");
1160	20					92	$self->[_in_pod_] = 0;
1161							}
1162	47	50	33			150	if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) {
1163	0					0	$self->warning(
1164							"Hash-bang in pod can cause older versions of perl to fail! \n"
1165							);
1166							}
1167
1168	47					165	return $line_of_tokens;
1169							}
1170
1171							# print line unchanged if in skipped section
1172							elsif ( $self->[_in_skipped_] ) {
1173
1174	8					18	$line_of_tokens->{_line_type} = 'SKIP';
1175	8	100				90	if ( $input_line =~ /$code_skipping_pattern_end/ ) {
		50
1176	2					7	$line_of_tokens->{_line_type} = 'SKIP_END';
1177	2					10	$self->log_numbered_msg("Exiting code-skipping section\n");
1178	2					5	$self->[_in_skipped_] = 0;
1179							}
1180							elsif ( $input_line =~ /$code_skipping_pattern_begin/ ) {
1181
1182							# warn of duplicate starting comment lines, git #118
1183	0					0	my $lno = $self->[_in_skipped_];
1184	0					0	$self->warning(
1185							"Already in code-skipping section which started at line $lno\n"
1186							);
1187							}
1188							else {
1189							## ok - not a code-skipping control line
1190							}
1191	8					31	return $line_of_tokens;
1192							}
1193
1194							# must print line unchanged if we have seen a severe error (i.e., we
1195							# are seeing illegal tokens and cannot continue. Syntax errors do
1196							# not pass this route). Calling routine can decide what to do, but
1197							# the default can be to just pass all lines as if they were after __END__
1198							elsif ( $self->[_in_error_] ) {
1199	0					0	$line_of_tokens->{_line_type} = 'ERROR';
1200	0					0	return $line_of_tokens;
1201							}
1202
1203							# print line unchanged if we are __DATA__ section
1204							elsif ( $self->[_in_data_] ) {
1205
1206							# ...but look for POD
1207							# Note that the _in_data and _in_end flags remain set
1208							# so that we return to that state after seeing the
1209							# end of a pod section
1210	1	50	33			8	if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1211	0					0	$line_of_tokens->{_line_type} = 'POD_START';
1212	0					0	$self->log_numbered_msg("Entering POD section\n");
1213	0					0	$self->[_in_pod_] = 1;
1214	0					0	return $line_of_tokens;
1215							}
1216							else {
1217	1					4	$line_of_tokens->{_line_type} = 'DATA';
1218	1					4	return $line_of_tokens;
1219							}
1220							}
1221
1222							# print line unchanged if we are in __END__ section
1223							elsif ( $self->[_in_end_] ) {
1224
1225							# ...but look for POD
1226							# Note that the _in_data and _in_end flags remain set
1227							# so that we return to that state after seeing the
1228							# end of a pod section
1229	48	100	66			347	if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1230	6					29	$line_of_tokens->{_line_type} = 'POD_START';
1231	6					31	$self->log_numbered_msg("Entering POD section\n");
1232	6					34	$self->[_in_pod_] = 1;
1233	6					69	return $line_of_tokens;
1234							}
1235							else {
1236	42					73	$line_of_tokens->{_line_type} = 'END';
1237	42					139	return $line_of_tokens;
1238							}
1239							}
1240							else {
1241							## ok
1242							}
1243
1244							# check for a hash-bang line if we haven't seen one
1245	7529	100	100			33761	if ( !$self->[_saw_hash_bang_]
			66
1246							&& substr( $input_line, 0, 2 ) eq '#!'
1247							&& $input_line =~ /^\#\!.*perl\b/ )
1248							{
1249	15					53	$self->[_saw_hash_bang_] = $input_line_number;
1250
1251							# check for -w and -P flags
1252	15	50				102	if ( $input_line =~ /^\#\!.perl\s.-.*P/ ) {
1253	0					0	$self->[_saw_perl_dash_P_] = 1;
1254							}
1255
1256	15	100				97	if ( $input_line =~ /^\#\!.perl\s.-.*w/ ) {
1257	8					26	$self->[_saw_perl_dash_w_] = 1;
1258							}
1259
1260	15	100	33			137	if (
			66
			100
			66
1261							$input_line_number > 1
1262
1263							# leave any hash bang in a BEGIN block alone
1264							# i.e. see 'debugger-duck_type.t'
1265							&& !(
1266							$last_nonblank_block_type
1267							&& $last_nonblank_block_type eq 'BEGIN'
1268							)
1269							&& !$self->[_look_for_hash_bang_]
1270
1271							# Try to avoid giving a false alarm at a simple comment.
1272							# These look like valid hash-bang lines:
1273
1274							#!/usr/bin/perl -w
1275							#! /usr/bin/perl -w
1276							#!c:\perl\bin\perl.exe
1277
1278							# These are comments:
1279							#! I love perl
1280							#! sunos does not yet provide a /usr/bin/perl
1281
1282							# Comments typically have multiple spaces, which suggests
1283							# the filter
1284							&& $input_line =~ /^\#\!(\s+)?(\S+)?perl/
1285							)
1286							{
1287
1288							# this is helpful for VMS systems; we may have accidentally
1289							# tokenized some DCL commands
1290	1	50				5	if ( $self->[_started_tokenizing_] ) {
1291	0					0	$self->warning(
1292							"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
1293							);
1294							}
1295							else {
1296	1					6	$self->complain("Useless hash-bang after line 1\n");
1297							}
1298							}
1299
1300							# Report the leading hash-bang as a system line
1301							# This will prevent -dac from deleting it
1302							else {
1303	14					51	$line_of_tokens->{_line_type} = 'SYSTEM';
1304	14					79	return $line_of_tokens;
1305							}
1306							}
1307
1308							# wait for a hash-bang before parsing if the user invoked us with -x
1309	7515	100	100			18601	if ( $self->[_look_for_hash_bang_]
1310							&& !$self->[_saw_hash_bang_] )
1311							{
1312	5					13	$line_of_tokens->{_line_type} = 'SYSTEM';
1313	5					19	return $line_of_tokens;
1314							}
1315
1316							# a first line of the form ': #' will be marked as SYSTEM
1317							# since lines of this form may be used by tcsh
1318	7510	50	66			19881	if ( $input_line_number == 1 && $input_line =~ /^\s\:\s\#/ ) {
1319	0					0	$line_of_tokens->{_line_type} = 'SYSTEM';
1320	0					0	return $line_of_tokens;
1321							}
1322
1323							# now we know that it is ok to tokenize the line...
1324							# the line tokenizer will modify any of these private variables:
1325							# _rhere_target_list_
1326							# _in_data_
1327							# _in_end_
1328							# _in_format_
1329							# _in_error_
1330							# _in_skipped_
1331							# _in_pod_
1332							# _in_quote_
1333	7510					23419	$self->tokenize_this_line($line_of_tokens);
1334
1335							# Now finish defining the return structure and return it
1336	7510					15892	$line_of_tokens->{_ending_in_quote} = $self->[_in_quote_];
1337
1338							# handle severe error (binary data in script)
1339	7510	50				16987	if ( $self->[_in_error_] ) {
1340	0					0	$self->[_in_quote_] = 0; # to avoid any more messages
1341	0					0	$self->warning("Giving up after error\n");
1342	0					0	$line_of_tokens->{_line_type} = 'ERROR';
1343	0					0	reset_indentation_level(0); # avoid error messages
1344	0					0	return $line_of_tokens;
1345							}
1346
1347							# handle start of pod documentation
1348	7510	100				15976	if ( $self->[_in_pod_] ) {
1349
1350							# This gets tricky..above a __DATA__ or __END__ section, perl
1351							# accepts '=cut' as the start of pod section. But afterwards,
1352							# only pod utilities see it and they may ignore an =cut without
1353							# leading =head. In any case, this isn't good.
1354	14	50				84	if ( $input_line =~ /^=cut\b/ ) {
1355	0	0	0			0	if ( $self->[_saw_data_] \|\| $self->[_saw_end_] ) {
1356	0					0	$self->complain("=cut while not in pod ignored\n");
1357	0					0	$self->[_in_pod_] = 0;
1358	0					0	$line_of_tokens->{_line_type} = 'POD_END';
1359							}
1360							else {
1361	0					0	$line_of_tokens->{_line_type} = 'POD_START';
1362	0					0	if ( !DEVEL_MODE ) {
1363	0					0	$self->warning(
1364							"=cut starts a pod section .. this can fool pod utilities.\n"
1365							);
1366							}
1367	0					0	$self->log_numbered_msg("Entering POD section\n");
1368							}
1369							}
1370
1371							else {
1372	14					70	$line_of_tokens->{_line_type} = 'POD_START';
1373	14					76	$self->log_numbered_msg("Entering POD section\n");
1374							}
1375
1376	14					94	return $line_of_tokens;
1377							}
1378
1379							# handle start of skipped section
1380	7496	100				14876	if ( $self->[_in_skipped_] ) {
1381
1382	2					8	$line_of_tokens->{_line_type} = 'SKIP';
1383	2					14	$self->log_numbered_msg("Entering code-skipping section\n");
1384	2					10	return $line_of_tokens;
1385							}
1386
1387							# see if this line contains here doc targets
1388	7494					12459	my $rhere_target_list = $self->[_rhere_target_list_];
1389	7494	100				10782	if ( @{$rhere_target_list} ) {
	7494					17331
1390
1391							my ( $here_doc_target, $here_quote_character ) =
1392	9					52	@{ shift @{$rhere_target_list} };
	9					25
	9					43
1393	9					32	$self->[_in_here_doc_] = 1;
1394	9					31	$self->[_here_doc_target_] = $here_doc_target;
1395	9					24	$self->[_here_quote_character_] = $here_quote_character;
1396	9					69	$self->log_numbered_msg("Entering HERE document $here_doc_target\n");
1397	9					34	$self->[_started_looking_for_here_target_at_] = $input_line_number;
1398							}
1399
1400							# NOTE: __END__ and __DATA__ statements are written unformatted
1401							# because they can theoretically contain additional characters
1402							# which are not tokenized (and cannot be read with <DATA> either!).
1403	7494	100				20350	if ( $self->[_in_data_] ) {
		100
1404	1					4	$line_of_tokens->{_line_type} = 'DATA_START';
1405	1					6	$self->log_numbered_msg("Starting __DATA__ section\n");
1406	1					4	$self->[_saw_data_] = 1;
1407
1408							# keep parsing after __DATA__ if use SelfLoader was seen
1409	1	50				5	if ( $self->[_saw_selfloader_] ) {
1410	0					0	$self->[_in_data_] = 0;
1411	0					0	$self->log_numbered_msg(
1412							"SelfLoader seen, continuing; -nlsl deactivates\n");
1413							}
1414
1415	1					5	return $line_of_tokens;
1416							}
1417
1418							elsif ( $self->[_in_end_] ) {
1419	6					50	$line_of_tokens->{_line_type} = 'END_START';
1420	6					31	$self->log_numbered_msg("Starting __END__ section\n");
1421	6					22	$self->[_saw_end_] = 1;
1422
1423							# keep parsing after __END__ if use AutoLoader was seen
1424	6	50				71	if ( $self->[_saw_autoloader_] ) {
1425	0					0	$self->[_in_end_] = 0;
1426	0					0	$self->log_numbered_msg(
1427							"AutoLoader seen, continuing; -nlal deactivates\n");
1428							}
1429	6					28	return $line_of_tokens;
1430							}
1431							else {
1432							## ok: not in __END__ or __DATA__
1433							}
1434
1435							# now, finally, we know that this line is type 'CODE'
1436	7487					14137	$line_of_tokens->{_line_type} = 'CODE';
1437
1438							# remember if we have seen any real code
1439	7487	100	100			23773	if ( !$self->[_started_tokenizing_]
			100
1440							&& $input_line !~ /^\s*$/
1441							&& $input_line !~ /^\s*#/ )
1442							{
1443	557					2855	$self->[_started_tokenizing_] = 1;
1444							}
1445
1446	7487	100				16073	if ( $self->[_debugger_object_] ) {
1447	7					36	$self->[_debugger_object_]->write_debug_entry($line_of_tokens);
1448							}
1449
1450							# Note: if keyword 'format' occurs in this line code, it is still CODE
1451							# (keyword 'format' need not start a line)
1452	7487	100				15537	if ( $self->[_in_format_] ) {
1453	1					7	$self->log_numbered_msg("Entering format section\n");
1454							}
1455
1456	7487	100	100			29299	if ( $self->[_in_quote_]
		100	100
1457							and ( $self->[_line_start_quote_] < 0 ) )
1458							{
1459
1460							#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
1461	49	100				431	if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) {
1462	48					140	$self->[_line_start_quote_] = $input_line_number;
1463	48					361	$self->log_numbered_msg(
1464							"Start multi-line quote or pattern ending in $quote_target\n");
1465							}
1466							}
1467							elsif ( ( $self->[_line_start_quote_] >= 0 )
1468							&& !$self->[_in_quote_] )
1469							{
1470	48					175	$self->[_line_start_quote_] = -1;
1471	48					200	$self->log_numbered_msg("End of multi-line quote or pattern\n");
1472							}
1473							else {
1474							## ok
1475							}
1476
1477							# we are returning a line of CODE
1478	7487					29946	return $line_of_tokens;
1479							} ## end sub get_line
1480
1481							sub find_starting_indentation_level {
1482
1483							# We need to find the indentation level of the first line of the
1484							# script being formatted. Often it will be zero for an entire file,
1485							# but if we are formatting a local block of code (within an editor for
1486							# example) it may not be zero. The user may specify this with the
1487							# -sil=n parameter but normally doesn't so we have to guess.
1488							#
1489	561			561	0	1796	my ($self) = @_;
1490	561					1370	my $starting_level = 0;
1491
1492							# use value if given as parameter
1493	561	100				2942	if ( $self->[_know_starting_level_] ) {
		100
1494	1					4	$starting_level = $self->[_starting_level_];
1495							}
1496
1497							# if we know there is a hash_bang line, the level must be zero
1498							elsif ( $self->[_look_for_hash_bang_] ) {
1499	1					4	$self->[_know_starting_level_] = 1;
1500							}
1501
1502							# otherwise figure it out from the input file
1503							else {
1504	559					1222	my $line;
1505	559					1209	my $i = 0;
1506
1507							# keep looking at lines until we find a hash bang or piece of code
1508							# ( or, for now, an =pod line)
1509	559					1298	my $msg = EMPTY_STRING;
1510	559					1143	my $in_code_skipping;
1511	559					3067	while ( $line = $self->peek_ahead( $i++ ) ) {
1512
1513							# if first line is #! then assume starting level is zero
1514	870	100	100			5504	if ( $i == 1 && $line =~ /^\#\!/ ) {
1515	13					42	$starting_level = 0;
1516	13					51	last;
1517							}
1518
1519							# ignore lines fenced off with code-skipping comments
1520	857	100				5043	if ( $line =~ /^\s*#/ ) {
1521	296	50				1103	if ( !$in_code_skipping ) {
1522	296	50	33			3102	if ( $rOpts_code_skipping
1523							&& $line =~ /$code_skipping_pattern_begin/ )
1524							{
1525	0					0	$in_code_skipping = 1;
1526							}
1527							}
1528							else {
1529	0	0				0	if ( $line =~ /$code_skipping_pattern_end/ ) {
1530	0					0	$in_code_skipping = 0;
1531							}
1532							}
1533	296					1054	next;
1534							}
1535	561	50				2583	next if ($in_code_skipping);
1536
1537	561	100				3613	next if ( $line =~ /^\s*$/ ); # skip past blank lines
1538
1539	543					2793	$starting_level = $self->guess_old_indentation_level($line);
1540	543					1485	last;
1541							}
1542	559					2559	$msg = "Line $i implies starting-indentation-level = $starting_level\n";
1543	559					3140	$self->write_logfile_entry("$msg");
1544							}
1545	561					2207	$self->[_starting_level_] = $starting_level;
1546	561					3775	reset_indentation_level($starting_level);
1547	561					1082	return;
1548							} ## end sub find_starting_indentation_level
1549
1550							sub guess_old_indentation_level {
1551	543			543	0	1812	my ( $self, $line ) = @_;
1552
1553							# Guess the indentation level of an input line.
1554							#
1555							# For the first line of code this result will define the starting
1556							# indentation level. It will mainly be non-zero when perltidy is applied
1557							# within an editor to a local block of code.
1558							#
1559							# This is an impossible task in general because we can't know what tabs
1560							# meant for the old script and how many spaces were used for one
1561							# indentation level in the given input script. For example it may have
1562							# been previously formatted with -i=7 -et=3. But we can at least try to
1563							# make sure that perltidy guesses correctly if it is applied repeatedly to
1564							# a block of code within an editor, so that the block stays at the same
1565							# level when perltidy is applied repeatedly.
1566							#
1567							# USES GLOBAL VARIABLES: (none)
1568	543					1315	my $level = 0;
1569
1570							# find leading tabs, spaces, and any statement label
1571	543					1094	my $spaces = 0;
1572	543	50				4279	if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
1573
1574							# If there are leading tabs, we use the tab scheme for this run, if
1575							# any, so that the code will remain stable when editing.
1576	543	100				2518	if ($1) { $spaces += length($1) * $self->[_tabsize_] }
	2					10
1577
1578	543	100				2143	if ($2) { $spaces += length($2) }
	79					294
1579
1580							# correct for outdented labels
1581	543	50	66			2345	if ( $3 && $self->[_outdent_labels_] ) {
1582	1					3	$spaces += $self->[_continuation_indentation_];
1583							}
1584							}
1585
1586							# compute indentation using the value of -i for this run.
1587							# If -i=0 is used for this run (which is possible) it doesn't matter
1588							# what we do here but we'll guess that the old run used 4 spaces per level.
1589	543					1612	my $indent_columns = $self->[_indent_columns_];
1590	543	50				1830	$indent_columns = 4 if ( !$indent_columns );
1591	543					1961	$level = int( $spaces / $indent_columns );
1592	543					1430	return ($level);
1593							} ## end sub guess_old_indentation_level
1594
1595							# This is a currently unused debug routine
1596							sub dump_functions {
1597
1598	0			0	0	0	my $fh = *STDOUT;
1599	0					0	foreach my $pkg ( keys %{$ris_user_function} ) {
	0					0
1600	0					0	$fh->print("\nnon-constant subs in package $pkg\n");
1601
1602	0					0	foreach my $sub ( keys %{ $ris_user_function->{$pkg} } ) {
	0					0
1603	0					0	my $msg = EMPTY_STRING;
1604	0	0				0	if ( $ris_block_list_function->{$pkg}{$sub} ) {
1605	0					0	$msg = 'block_list';
1606							}
1607
1608	0	0				0	if ( $ris_block_function->{$pkg}{$sub} ) {
1609	0					0	$msg = 'block';
1610							}
1611	0					0	$fh->print("$sub $msg\n");
1612							}
1613							}
1614
1615	0					0	foreach my $pkg ( keys %{$ris_constant} ) {
	0					0
1616	0					0	$fh->print("\nconstants and constant subs in package $pkg\n");
1617
1618	0					0	foreach my $sub ( keys %{ $ris_constant->{$pkg} } ) {
	0					0
1619	0					0	$fh->print("$sub\n");
1620							}
1621							}
1622	0					0	return;
1623							} ## end sub dump_functions
1624
1625							sub prepare_for_a_new_file {
1626
1627	561			561	0	1975	my ( $self, $source_object ) = @_;
1628
1629							# copy the source object lines to an array of lines
1630	561					3055	$self->make_source_array($source_object);
1631
1632							# previous tokens needed to determine what to expect next
1633	561					1786	$last_nonblank_token = ';'; # the only possible starting state which
1634	561					1384	$last_nonblank_type = ';'; # will make a leading brace a code block
1635	561					1328	$last_nonblank_block_type = EMPTY_STRING;
1636
1637							# scalars for remembering statement types across multiple lines
1638	561					1348	$statement_type = EMPTY_STRING; # '' or 'use' or 'sub..' or 'case..'
1639
1640							# scalars for remembering where we are in the file
1641	561					1355	$current_package = "main";
1642	561					1265	$context = UNKNOWN_CONTEXT;
1643
1644							# hashes used to remember function information
1645	561					1932	$ris_constant = {}; # user-defined constants
1646	561					1856	$ris_user_function = {}; # user-defined functions
1647	561					1611	$ruser_function_prototype = {}; # their prototypes
1648	561					1591	$ris_block_function = {};
1649	561					1531	$ris_block_list_function = {};
1650	561					1419	$rsaw_function_definition = {};
1651	561					1323	$rsaw_use_module = {};
1652
1653							# variables used to track depths of various containers
1654							# and report nesting errors
1655	561					1137	$paren_depth = 0;
1656	561					1179	$brace_depth = 0;
1657	561					1090	$square_bracket_depth = 0;
1658	561					2693	$rcurrent_depth = [ (0) x scalar @closing_brace_names ];
1659	561					1323	$total_depth = 0;
1660	561					2316	$rtotal_depth = [];
1661	561					2114	$rcurrent_sequence_number = [];
1662	561					1240	$next_sequence_number = 2; # The value 1 is reserved for SEQ_ROOT
1663
1664	561					1882	$rparen_type = [];
1665	561					1629	$rparen_semicolon_count = [];
1666	561					2379	$rparen_vars = [];
1667	561					1862	$rbrace_type = [];
1668	561					1711	$rbrace_structural_type = [];
1669	561					1517	$rbrace_context = [];
1670	561					1707	$rbrace_package = [];
1671	561					1596	$rsquare_bracket_type = [];
1672	561					1460	$rsquare_bracket_structural_type = [];
1673	561					3510	$rdepth_array = [];
1674	561					1235	$rnested_ternary_flag = [];
1675	561					3692	$rnested_statement_type = [];
1676	561					3116	$rstarting_line_of_current_depth = [];
1677
1678	561					1773	$rparen_type->[$paren_depth] = EMPTY_STRING;
1679	561					1583	$rparen_semicolon_count->[$paren_depth] = 0;
1680	561					1488	$rparen_vars->[$paren_depth] = [];
1681	561					1579	$rbrace_type->[$brace_depth] = ';'; # identify opening brace as code block
1682	561					1535	$rbrace_structural_type->[$brace_depth] = EMPTY_STRING;
1683	561					1413	$rbrace_context->[$brace_depth] = UNKNOWN_CONTEXT;
1684	561					1376	$rbrace_package->[$paren_depth] = $current_package;
1685	561					1366	$rsquare_bracket_type->[$square_bracket_depth] = EMPTY_STRING;
1686	561					1309	$rsquare_bracket_structural_type->[$square_bracket_depth] = EMPTY_STRING;
1687
1688	561					2697	initialize_tokenizer_state();
1689	561					1134	return;
1690							} ## end sub prepare_for_a_new_file
1691
1692							{ ## closure for sub tokenize_this_line
1693
1694	39			39		397	use constant BRACE => 0;
	39					98
	39					2686
1695	39			39		307	use constant SQUARE_BRACKET => 1;
	39					109
	39					2316
1696	39			39		288	use constant PAREN => 2;
	39					97
	39					2227
1697	39			39		296	use constant QUESTION_COLON => 3;
	39					87
	39					86024
1698
1699							# TV1: scalars for processing one LINE.
1700							# Re-initialized on each entry to sub tokenize_this_line.
1701							my (
1702							$block_type, $container_type, $expecting,
1703							$i, $i_tok, $input_line,
1704							$input_line_number, $last_nonblank_i, $max_token_index,
1705							$next_tok, $next_type, $peeked_ahead,
1706							$prototype, $rhere_target_list, $rtoken_map,
1707							$rtoken_type, $rtokens, $tok,
1708							$type, $type_sequence, $indent_flag,
1709							);
1710
1711							# TV2: refs to ARRAYS for processing one LINE
1712							# Re-initialized on each call.
1713							my $routput_token_list = []; # stack of output token indexes
1714							my $routput_token_type = []; # token types
1715							my $routput_block_type = []; # types of code block
1716							my $routput_container_type = []; # paren types, such as if, elsif, ..
1717							my $routput_type_sequence = []; # nesting sequential number
1718							my $routput_indent_flag = []; #
1719
1720							# TV3: SCALARS for quote variables. These are initialized with a
1721							# subroutine call and continually updated as lines are processed.
1722							my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
1723							$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
1724
1725							# TV4: SCALARS for multi-line identifiers and
1726							# statements. These are initialized with a subroutine call
1727							# and continually updated as lines are processed.
1728							my ( $id_scan_state, $identifier, $want_paren );
1729
1730							# TV5: SCALARS for tracking indentation level.
1731							# Initialized once and continually updated as lines are
1732							# processed.
1733							my (
1734							$nesting_token_string, $nesting_block_string,
1735							$nesting_block_flag, $level_in_tokenizer,
1736							);
1737
1738							# TV6: SCALARS for remembering several previous
1739							# tokens. Initialized once and continually updated as
1740							# lines are processed.
1741							my (
1742							$last_nonblank_container_type, $last_nonblank_type_sequence,
1743							$last_last_nonblank_token, $last_last_nonblank_type,
1744							$last_nonblank_prototype,
1745							);
1746
1747							# ----------------------------------------------------------------
1748							# beginning of tokenizer variable access and manipulation routines
1749							# ----------------------------------------------------------------
1750
1751							sub initialize_tokenizer_state {
1752
1753							# GV1: initialized once
1754							# TV1: initialized on each call
1755							# TV2: initialized on each call
1756							# TV3:
1757	561			561	0	1251	$in_quote = 0;
1758	561					1371	$quote_type = 'Q';
1759	561					1218	$quote_character = EMPTY_STRING;
1760	561					1224	$quote_pos = 0;
1761	561					1177	$quote_depth = 0;
1762	561					1247	$quoted_string_1 = EMPTY_STRING;
1763	561					1247	$quoted_string_2 = EMPTY_STRING;
1764	561					1183	$allowed_quote_modifiers = EMPTY_STRING;
1765
1766							# TV4:
1767	561					1144	$id_scan_state = EMPTY_STRING;
1768	561					1186	$identifier = EMPTY_STRING;
1769	561					1194	$want_paren = EMPTY_STRING;
1770
1771							# TV5:
1772	561					1361	$nesting_token_string = EMPTY_STRING;
1773	561					1229	$nesting_block_string = '1'; # initially in a block
1774	561					1069	$nesting_block_flag = 1;
1775	561					1107	$level_in_tokenizer = 0;
1776
1777							# TV6:
1778	561					1232	$last_nonblank_container_type = EMPTY_STRING;
1779	561					1155	$last_nonblank_type_sequence = EMPTY_STRING;
1780	561					1275	$last_last_nonblank_token = ';';
1781	561					1163	$last_last_nonblank_type = ';';
1782	561					1181	$last_nonblank_prototype = EMPTY_STRING;
1783	561					1154	return;
1784							} ## end sub initialize_tokenizer_state
1785
1786							sub save_tokenizer_state {
1787
1788							# Global variables:
1789	0			0	0	0	my $rGV1 = [
1790							$brace_depth,
1791							$context,
1792							$current_package,
1793							$last_nonblank_block_type,
1794							$last_nonblank_token,
1795							$last_nonblank_type,
1796							$next_sequence_number,
1797							$paren_depth,
1798							$rbrace_context,
1799							$rbrace_package,
1800							$rbrace_structural_type,
1801							$rbrace_type,
1802							$rcurrent_depth,
1803							$rcurrent_sequence_number,
1804							$rdepth_array,
1805							$ris_block_function,
1806							$ris_block_list_function,
1807							$ris_constant,
1808							$ris_user_function,
1809							$rnested_statement_type,
1810							$rnested_ternary_flag,
1811							$rparen_semicolon_count,
1812							$rparen_vars,
1813							$rparen_type,
1814							$rsaw_function_definition,
1815							$rsaw_use_module,
1816							$rsquare_bracket_structural_type,
1817							$rsquare_bracket_type,
1818							$rstarting_line_of_current_depth,
1819							$rtotal_depth,
1820							$ruser_function_prototype,
1821							$square_bracket_depth,
1822							$statement_type,
1823							$total_depth,
1824
1825							];
1826
1827							# Tokenizer closure variables:
1828	0					0	my $rTV1 = [
1829							$block_type, $container_type, $expecting,
1830							$i, $i_tok, $input_line,
1831							$input_line_number, $last_nonblank_i, $max_token_index,
1832							$next_tok, $next_type, $peeked_ahead,
1833							$prototype, $rhere_target_list, $rtoken_map,
1834							$rtoken_type, $rtokens, $tok,
1835							$type, $type_sequence, $indent_flag,
1836							];
1837
1838	0					0	my $rTV2 = [
1839							$routput_token_list, $routput_token_type,
1840							$routput_block_type, $routput_container_type,
1841							$routput_type_sequence, $routput_indent_flag,
1842							];
1843
1844	0					0	my $rTV3 = [
1845							$in_quote, $quote_type,
1846							$quote_character, $quote_pos,
1847							$quote_depth, $quoted_string_1,
1848							$quoted_string_2, $allowed_quote_modifiers,
1849							];
1850
1851	0					0	my $rTV4 = [ $id_scan_state, $identifier, $want_paren ];
1852
1853	0					0	my $rTV5 = [
1854							$nesting_token_string, $nesting_block_string,
1855							$nesting_block_flag, $level_in_tokenizer,
1856							];
1857
1858	0					0	my $rTV6 = [
1859							$last_nonblank_container_type, $last_nonblank_type_sequence,
1860							$last_last_nonblank_token, $last_last_nonblank_type,
1861							$last_nonblank_prototype,
1862							];
1863	0					0	return [ $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
1864							} ## end sub save_tokenizer_state
1865
1866							sub restore_tokenizer_state {
1867	0			0	0	0	my ($rstate) = @_;
1868	0					0	my ( $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
	0					0
1869
1870							(
1871							$brace_depth,
1872							$context,
1873							$current_package,
1874							$last_nonblank_block_type,
1875							$last_nonblank_token,
1876							$last_nonblank_type,
1877							$next_sequence_number,
1878							$paren_depth,
1879							$rbrace_context,
1880							$rbrace_package,
1881							$rbrace_structural_type,
1882							$rbrace_type,
1883							$rcurrent_depth,
1884							$rcurrent_sequence_number,
1885							$rdepth_array,
1886							$ris_block_function,
1887							$ris_block_list_function,
1888							$ris_constant,
1889							$ris_user_function,
1890							$rnested_statement_type,
1891							$rnested_ternary_flag,
1892							$rparen_semicolon_count,
1893							$rparen_vars,
1894							$rparen_type,
1895							$rsaw_function_definition,
1896							$rsaw_use_module,
1897							$rsquare_bracket_structural_type,
1898							$rsquare_bracket_type,
1899							$rstarting_line_of_current_depth,
1900							$rtotal_depth,
1901							$ruser_function_prototype,
1902							$square_bracket_depth,
1903							$statement_type,
1904							$total_depth,
1905
1906	0					0	) = @{$rGV1};
	0					0
1907
1908							(
1909							$block_type, $container_type, $expecting,
1910							$i, $i_tok, $input_line,
1911							$input_line_number, $last_nonblank_i, $max_token_index,
1912							$next_tok, $next_type, $peeked_ahead,
1913							$prototype, $rhere_target_list, $rtoken_map,
1914							$rtoken_type, $rtokens, $tok,
1915							$type, $type_sequence, $indent_flag,
1916	0					0	) = @{$rTV1};
	0					0
1917
1918							(
1919							$routput_token_list, $routput_token_type,
1920							$routput_block_type, $routput_container_type,
1921							$routput_type_sequence, $routput_indent_flag,
1922	0					0	) = @{$rTV2};
	0					0
1923
1924							(
1925							$in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
1926							$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
1927	0					0	) = @{$rTV3};
	0					0
1928
1929	0					0	( $id_scan_state, $identifier, $want_paren ) = @{$rTV4};
	0					0
1930
1931							(
1932							$nesting_token_string, $nesting_block_string,
1933							$nesting_block_flag, $level_in_tokenizer,
1934	0					0	) = @{$rTV5};
	0					0
1935
1936							(
1937							$last_nonblank_container_type, $last_nonblank_type_sequence,
1938							$last_last_nonblank_token, $last_last_nonblank_type,
1939							$last_nonblank_prototype,
1940	0					0	) = @{$rTV6};
	0					0
1941	0					0	return;
1942							} ## end sub restore_tokenizer_state
1943
1944							sub split_pretoken {
1945
1946	8			8	0	19	my ( $self, $numc ) = @_;
1947
1948							# Split the leading $numc characters from the current token (at index=$i)
1949							# which is pre-type 'w' and insert the remainder back into the pretoken
1950							# stream with appropriate settings. Since we are splitting a pre-type 'w',
1951							# there are three cases, depending on if the remainder starts with a digit:
1952							# Case 1: remainder is type 'd', all digits
1953							# Case 2: remainder is type 'd' and type 'w': digits and other characters
1954							# Case 3: remainder is type 'w'
1955
1956							# Examples, for $numc=1:
1957							# $tok => $tok_0 $tok_1 $tok_2
1958							# 'x10' => 'x' '10' # case 1
1959							# 'x10if' => 'x' '10' 'if' # case 2
1960							# '0ne => 'O' 'ne' # case 3
1961
1962							# where:
1963							# $tok_1 is a possible string of digits (pre-type 'd')
1964							# $tok_2 is a possible word (pre-type 'w')
1965
1966							# return 1 if successful
1967							# return undef if error (shouldn't happen)
1968
1969							# Calling routine should update '$type' and '$tok' if successful.
1970
1971	8					18	my $pretoken = $rtokens->[$i];
1972	8	50	33			80	if ( $pretoken
			33
1973							&& length($pretoken) > $numc
1974							&& substr( $pretoken, $numc ) =~ /^(\d)(.)$/ )
1975							{
1976
1977							# Split $tok into up to 3 tokens:
1978	8					21	my $tok_0 = substr( $pretoken, 0, $numc );
1979	8	50				28	my $tok_1 = defined($1) ? $1 : EMPTY_STRING;
1980	8	50				27	my $tok_2 = defined($2) ? $2 : EMPTY_STRING;
1981
1982	8					12	my $len_0 = length($tok_0);
1983	8					13	my $len_1 = length($tok_1);
1984	8					14	my $len_2 = length($tok_2);
1985
1986	8					16	my $pre_type_0 = 'w';
1987	8					13	my $pre_type_1 = 'd';
1988	8					14	my $pre_type_2 = 'w';
1989
1990	8					16	my $pos_0 = $rtoken_map->[$i];
1991	8					14	my $pos_1 = $pos_0 + $len_0;
1992	8					13	my $pos_2 = $pos_1 + $len_1;
1993
1994	8					13	my $isplice = $i + 1;
1995
1996							# Splice in any digits
1997	8	100				24	if ($len_1) {
1998	5					12	splice @{$rtoken_map}, $isplice, 0, $pos_1;
	5					22
1999	5					18	splice @{$rtokens}, $isplice, 0, $tok_1;
	5					12
2000	5					11	splice @{$rtoken_type}, $isplice, 0, $pre_type_1;
	5					11
2001	5					11	$max_token_index++;
2002	5					10	$isplice++;
2003							}
2004
2005							# Splice in any trailing word
2006	8	100				20	if ($len_2) {
2007	4					5	splice @{$rtoken_map}, $isplice, 0, $pos_2;
	4					11
2008	4					11	splice @{$rtokens}, $isplice, 0, $tok_2;
	4					10
2009	4					6	splice @{$rtoken_type}, $isplice, 0, $pre_type_2;
	4					12
2010	4					6	$max_token_index++;
2011							}
2012
2013	8					18	$rtokens->[$i] = $tok_0;
2014	8					28	return 1;
2015							}
2016							else {
2017
2018							# Shouldn't get here
2019	0					0	if (DEVEL_MODE) {
2020							$self->Fault(<<EOM);
2021							While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken()
2022							EOM
2023							}
2024							}
2025	0					0	return;
2026							} ## end sub split_pretoken
2027
2028							sub get_indentation_level {
2029	561			561	0	1551	return $level_in_tokenizer;
2030							}
2031
2032							sub reset_indentation_level {
2033	561			561	0	1458	$level_in_tokenizer = shift;
2034	561					1135	return;
2035							}
2036
2037							sub peeked_ahead {
2038	252			252	0	490	my $flag = shift;
2039	252	100				680	$peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
2040	252					606	return $peeked_ahead;
2041							}
2042
2043							# ------------------------------------------------------------
2044							# end of tokenizer variable access and manipulation routines
2045							# ------------------------------------------------------------
2046
2047							#------------------------------
2048							# beginning of tokenizer hashes
2049							#------------------------------
2050
2051							my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
2052
2053							# These block types terminate statements and do not need a trailing
2054							# semicolon
2055							# patched for SWITCH/CASE/
2056							# NOTE: not currently used but may be used in the future
2057							my %is_zero_continuation_block_type;
2058							my @q;
2059							@q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
2060							if elsif else unless while until for foreach switch case given when);
2061							@is_zero_continuation_block_type{@q} = (1) x scalar(@q);
2062
2063							my %is_logical_container;
2064							@q = qw(if elsif unless while and or err not && ! \|\| for foreach);
2065							@is_logical_container{@q} = (1) x scalar(@q);
2066
2067							my %is_binary_type;
2068							@q = qw(\|\| &&);
2069							@is_binary_type{@q} = (1) x scalar(@q);
2070
2071							my %is_binary_keyword;
2072							@q = qw(and or err eq ne cmp);
2073							@is_binary_keyword{@q} = (1) x scalar(@q);
2074
2075							# 'L' is token for opening { at hash key
2076							my %is_opening_type;
2077							@q = qw< L { ( [ >;
2078							@is_opening_type{@q} = (1) x scalar(@q);
2079
2080							my %is_opening_or_ternary_type;
2081							push @q, '?';
2082							@is_opening_or_ternary_type{@q} = (1) x scalar(@q);
2083
2084							# 'R' is token for closing } at hash key
2085							my %is_closing_type;
2086							@q = qw< R } ) ] >;
2087							@is_closing_type{@q} = (1) x scalar(@q);
2088
2089							my %is_closing_or_ternary_type;
2090							push @q, ':';
2091							@is_closing_or_ternary_type{@q} = (1) x scalar(@q);
2092
2093							my %is_redo_last_next_goto;
2094							@q = qw(redo last next goto);
2095							@is_redo_last_next_goto{@q} = (1) x scalar(@q);
2096
2097							my %is_use_require;
2098							@q = qw(use require);
2099							@is_use_require{@q} = (1) x scalar(@q);
2100
2101							# This hash holds the array index in $self for these keywords:
2102							# Fix for issue c035: removed 'format' from this hash
2103							my %is_END_DATA = (
2104							'__END__' => _in_end_,
2105							'__DATA__' => _in_data_,
2106							);
2107
2108							my %is_list_end_type;
2109							@q = qw( ; { } );
2110							push @q, ',';
2111							@is_list_end_type{@q} = (1) x scalar(@q);
2112
2113							# original ref: camel 3 p 147,
2114							# but perl may accept undocumented flags
2115							# perl 5.10 adds 'p' (preserve)
2116							# Perl version 5.22 added 'n'
2117							# From http://perldoc.perl.org/perlop.html we have
2118							# /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
2119							# s/PATTERN/REPLACEMENT/msixpodualngcer
2120							# y/SEARCHLIST/REPLACEMENTLIST/cdsr
2121							# tr/SEARCHLIST/REPLACEMENTLIST/cdsr
2122							# qr/STRING/msixpodualn
2123							my %quote_modifiers = (
2124							's' => '[msixpodualngcer]',
2125							'y' => '[cdsr]',
2126							'tr' => '[cdsr]',
2127							'm' => '[msixpodualngc]',
2128							'qr' => '[msixpodualn]',
2129							'q' => EMPTY_STRING,
2130							'qq' => EMPTY_STRING,
2131							'qw' => EMPTY_STRING,
2132							'qx' => EMPTY_STRING,
2133							);
2134
2135							# table showing how many quoted things to look for after quote operator..
2136							# s, y, tr have 2 (pattern and replacement)
2137							# others have 1 (pattern only)
2138							my %quote_items = (
2139							's' => 2,
2140							'y' => 2,
2141							'tr' => 2,
2142							'm' => 1,
2143							'qr' => 1,
2144							'q' => 1,
2145							'qq' => 1,
2146							'qw' => 1,
2147							'qx' => 1,
2148							);
2149
2150							my %is_for_foreach;
2151							@q = qw(for foreach);
2152							@is_for_foreach{@q} = (1) x scalar(@q);
2153
2154							# These keywords may introduce blocks after parenthesized expressions,
2155							# in the form:
2156							# keyword ( .... ) { BLOCK }
2157							# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
2158							# NOTE for --use-feature=class: if ADJUST blocks eventually take a
2159							# parameter list, then ADJUST might need to be added to this list (see
2160							# perlclass.pod)
2161							my %is_blocktype_with_paren;
2162							@q =
2163							qw(if elsif unless while until for foreach switch case given when catch);
2164							@is_blocktype_with_paren{@q} = (1) x scalar(@q);
2165
2166							my %is_case_default;
2167							@q = qw(case default);
2168							@is_case_default{@q} = (1) x scalar(@q);
2169
2170							#------------------------
2171							# end of tokenizer hashes
2172							#------------------------
2173
2174							# ------------------------------------------------------------
2175							# beginning of various scanner interface routines
2176							# ------------------------------------------------------------
2177							sub scan_replacement_text {
2178
2179							# check for here-docs in replacement text invoked by
2180							# a substitution operator with executable modifier 'e'.
2181							#
2182							# given:
2183							# $replacement_text
2184							# return:
2185							# $rht = reference to any here-doc targets
2186	0			0	0	0	my ( $self, $replacement_text ) = @_;
2187
2188							# quick check
2189	0	0				0	return if ( $replacement_text !~ /<</ );
2190
2191	0					0	$self->write_logfile_entry(
2192							"scanning replacement text for here-doc targets\n");
2193
2194							# save the logger object for error messages
2195	0					0	my $logger_object = $self->[_logger_object_];
2196
2197							# save all lexical variables
2198	0					0	my $rstate = save_tokenizer_state();
2199	0					0	_decrement_count(); # avoid error check for multiple tokenizers
2200
2201							# make a new tokenizer
2202	0					0	my $tokenizer = Perl::Tidy::Tokenizer->new(
2203							source_object => \$replacement_text,
2204							logger_object => $logger_object,
2205							starting_line_number => $input_line_number,
2206							);
2207
2208							# scan the replacement text
2209	0					0	while ( $tokenizer->get_line() ) { }
2210
2211							# remove any here doc targets
2212	0					0	my $rht = undef;
2213	0	0				0	if ( $tokenizer->[_in_here_doc_] ) {
2214	0					0	$rht = [];
2215	0					0	push @{$rht},
	0					0
2216							[
2217							$tokenizer->[_here_doc_target_],
2218							$tokenizer->[_here_quote_character_]
2219							];
2220	0	0				0	if ( $tokenizer->[_rhere_target_list_] ) {
2221	0					0	push @{$rht}, @{ $tokenizer->[_rhere_target_list_] };
	0					0
	0					0
2222	0					0	$tokenizer->[_rhere_target_list_] = undef;
2223							}
2224	0					0	$tokenizer->[_in_here_doc_] = undef;
2225							}
2226
2227							# now its safe to report errors
2228	0					0	my $severe_error = $tokenizer->report_tokenization_errors();
2229
2230							# TODO: Could propagate a severe error up
2231
2232							# restore all tokenizer lexical variables
2233	0					0	restore_tokenizer_state($rstate);
2234
2235							# return the here doc targets
2236	0					0	return $rht;
2237							} ## end sub scan_replacement_text
2238
2239							sub scan_bare_identifier {
2240	1672			1672	0	2959	my $self = shift;
2241	1672					5151	( $i, $tok, $type, $prototype ) =
2242							$self->scan_bare_identifier_do( $input_line, $i, $tok, $type,
2243							$prototype, $rtoken_map, $max_token_index );
2244	1672					3466	return;
2245							} ## end sub scan_bare_identifier
2246
2247							sub scan_identifier {
2248
2249	486			486	0	861	my $self = shift;
2250
2251							(
2252	486					1952	$i, $tok, $type, $id_scan_state, $identifier,
2253							my $split_pretoken_flag
2254							)
2255							= $self->scan_complex_identifier( $i, $id_scan_state, $identifier,
2256							$rtokens, $max_token_index, $expecting,
2257							$rparen_type->[$paren_depth] );
2258
2259							# Check for signal to fix a special variable adjacent to a keyword,
2260							# such as '$^One$0'.
2261	486	100				1467	if ($split_pretoken_flag) {
2262
2263							# Try to fix it by splitting the pretoken
2264	3	50	33			23	if ( $i > 0
			33
2265							&& $rtokens->[ $i - 1 ] eq '^'
2266							&& $self->split_pretoken(1) )
2267							{
2268	3					10	$identifier = substr( $identifier, 0, 3 );
2269	3					5	$tok = $identifier;
2270							}
2271							else {
2272
2273							# This shouldn't happen ...
2274	0					0	my $var = substr( $tok, 0, 3 );
2275	0					0	my $excess = substr( $tok, 3 );
2276	0					0	$self->interrupt_logfile();
2277	0					0	$self->warning(<<EOM);
2278							$input_line_number: Trouble parsing at characters '$excess' after special variable '$var'.
2279							A space may be needed after '$var'.
2280							EOM
2281	0					0	$self->resume_logfile();
2282							}
2283							}
2284	486					883	return;
2285							} ## end sub scan_identifier
2286
2287	39			39		359	use constant VERIFY_FASTSCAN => 0;
	39					104
	39					3977
2288							my %fast_scan_context;
2289
2290							BEGIN {
2291	39			39		50174	%fast_scan_context = (
2292							'$' => SCALAR_CONTEXT,
2293							'*' => SCALAR_CONTEXT,
2294							'@' => LIST_CONTEXT,
2295							'%' => LIST_CONTEXT,
2296							'&' => UNKNOWN_CONTEXT,
2297							);
2298							} ## end BEGIN
2299
2300							sub scan_simple_identifier {
2301
2302							# This is a wrapper for sub scan_identifier. It does a fast preliminary
2303							# scan for certain common identifiers:
2304							# '$var', '@var', %var, *var, &var, '@{...}', '%{...}'
2305							# If it does not find one of these, or this is a restart, it calls the
2306							# original scanner directly.
2307
2308							# This gives the same results as the full scanner in about 1/4 the
2309							# total runtime for a typical input stream.
2310
2311							# Notation:
2312							# $var * 2
2313							# ^^ ^
2314							# \|\| \|
2315							# \|\| ---- $i_next [= next nonblank pretoken ]
2316							# \|----$i_plus_1 [= a bareword ]
2317							# ---$i_begin [= a sigil]
2318
2319	4791			4791	0	7125	my $self = shift;
2320
2321	4791					6836	my $i_begin = $i;
2322	4791					7241	my $tok_begin = $tok;
2323	4791					7307	my $i_plus_1 = $i + 1;
2324	4791					6793	my $fast_scan_type;
2325
2326							#-------------------------------------------------------
2327							# Do full scan for anything following a pointer, such as
2328							# $cref->&*; # a postderef
2329							#-------------------------------------------------------
2330	4791	100	66			27369	if ( $last_nonblank_token eq '->' ) {
		100	66
		50	33
			0
			33
2331
2332							}
2333
2334							#------------------------------
2335							# quick scan with leading sigil
2336							#------------------------------
2337							elsif ( !$id_scan_state
2338							&& $i_plus_1 <= $max_token_index
2339							&& $fast_scan_context{$tok} )
2340							{
2341	4678					8438	$context = $fast_scan_context{$tok};
2342
2343							# look for $var, @var, ...
2344	4678	100	100			11163	if ( $rtoken_type->[$i_plus_1] eq 'w' ) {
		100	66
2345	4390					6957	my $pretype_next = EMPTY_STRING;
2346	4390	100				9434	if ( $i_plus_1 < $max_token_index ) {
2347	4274					6657	my $i_next = $i_plus_1 + 1;
2348	4274	100	100			13230	if ( $rtoken_type->[$i_next] eq 'b'
2349							&& $i_next < $max_token_index )
2350							{
2351	1707					3085	$i_next += 1;
2352							}
2353	4274					7308	$pretype_next = $rtoken_type->[$i_next];
2354							}
2355	4390	100	100			16207	if ( $pretype_next ne ':' && $pretype_next ne "'" ) {
2356
2357							# Found type 'i' like '$var', '@var', or '%var'
2358	4282					8389	$identifier = $tok . $rtokens->[$i_plus_1];
2359	4282					6509	$tok = $identifier;
2360	4282					6973	$type = 'i';
2361	4282					6136	$i = $i_plus_1;
2362	4282					7289	$fast_scan_type = $type;
2363							}
2364							}
2365
2366							# Look for @{ or %{ .
2367							# But we must let the full scanner handle things ${ because it may
2368							# keep going to get a complete identifier like '${#}' .
2369							elsif (
2370							$rtoken_type->[$i_plus_1] eq '{'
2371							&& ( $tok_begin eq '@'
2372							\|\| $tok_begin eq '%' )
2373							)
2374							{
2375
2376	30					70	$identifier = $tok;
2377	30					64	$type = 't';
2378	30					58	$fast_scan_type = $type;
2379							}
2380							else {
2381							## out of tricks
2382							}
2383							}
2384
2385							#---------------------------
2386							# Quick scan with leading ->
2387							# Look for ->[ and ->{
2388							#---------------------------
2389							elsif (
2390							$tok eq '->'
2391							&& $i < $max_token_index
2392							&& ( $rtokens->[$i_plus_1] eq '{'
2393							\|\| $rtokens->[$i_plus_1] eq '[' )
2394							)
2395							{
2396	0					0	$type = $tok;
2397	0					0	$fast_scan_type = $type;
2398	0					0	$identifier = $tok;
2399	0					0	$context = UNKNOWN_CONTEXT;
2400							}
2401							else {
2402							## out of tricks
2403							}
2404
2405							#--------------------------------------
2406							# Verify correctness during development
2407							#--------------------------------------
2408	4791					6827	if ( VERIFY_FASTSCAN && $fast_scan_type ) {
2409
2410							# We will call the full method
2411							my $identifier_simple = $identifier;
2412							my $tok_simple = $tok;
2413							my $i_simple = $i;
2414							my $context_simple = $context;
2415
2416							$tok = $tok_begin;
2417							$i = $i_begin;
2418							$self->scan_identifier();
2419
2420							if ( $tok ne $tok_simple
2421							\|\| $type ne $fast_scan_type
2422							\|\| $i != $i_simple
2423							\|\| $identifier ne $identifier_simple
2424							\|\| $id_scan_state
2425							\|\| $context ne $context_simple )
2426							{
2427							print {*STDERR} <<EOM;
2428							scan_simple_identifier differs from scan_identifier:
2429							simple: i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple
2430							full: i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state
2431							EOM
2432							}
2433							}
2434
2435							#-------------------------------------------------
2436							# call full scanner if fast method did not succeed
2437							#-------------------------------------------------
2438	4791	100				10064	if ( !$fast_scan_type ) {
2439	479					1641	$self->scan_identifier();
2440							}
2441	4791					8394	return;
2442							} ## end sub scan_simple_identifier
2443
2444							sub method_ok_here {
2445
2446							# Return:
2447							# false if this is definitely an invalid method declaration
2448							# true otherwise (even if not sure)
2449
2450							# We are trying to avoid problems with old uses of 'method'
2451							# when --use-feature=class is set (rt145706).
2452							# For example, this should cause a return of 'false':
2453
2454							# method paint => sub {
2455							# return;
2456							# };
2457
2458	6			6	0	15	my $self = shift;
2459
2460							# from do_scan_sub:
2461	6					16	my $i_beg = $i + 1;
2462	6					13	my $pos_beg = $rtoken_map->[$i_beg];
2463	6					21	pos($input_line) = $pos_beg;
2464
2465							# TEST 1: look a valid sub NAME
2466	6	50				43	if (
2467							$input_line =~ m{\G\s*
2468							((?:\w(?:'\|::))) # package - something that ends in :: or '
2469							(\w+) # NAME - required
2470							}gcx
2471							)
2472							{
2473							# For possible future use..
2474	6					21	my $subname = $2;
2475	6	50				22	my $package = $1 ? $1 : EMPTY_STRING;
2476							}
2477							else {
2478	0					0	return;
2479							}
2480
2481							# TEST 2: look for invalid characters after name, such as here:
2482							# method paint => sub {
2483							# ...
2484							# }
2485	6					15	my $next_char = EMPTY_STRING;
2486	6	100				30	if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
	5					13
2487	6	100	66			37	if ( !$next_char \|\| $next_char eq '#' ) {
2488	1					7	( $next_char, my $i_next ) =
2489							$self->find_next_nonblank_token( $max_token_index,
2490							$rtokens, $max_token_index );
2491							}
2492
2493	6	50				22	if ( !$next_char ) {
2494
2495							# out of characters - give up
2496	0					0	return;
2497							}
2498
2499							# Possibly valid next token types:
2500							# '(' could start prototype or signature
2501							# ':' could start ATTRIBUTE
2502							# '{' cold start BLOCK
2503							# ';' or '}' could end a statement
2504	6	100				28	if ( $next_char !~ /^[\(\:\{\;\}]/ ) {
2505
2506							# This does not match use feature 'class' syntax
2507	3					21	return;
2508							}
2509
2510							# We will stop here and assume that this is valid syntax for
2511							# use feature 'class'.
2512	3					15	return 1;
2513							} ## end sub method_ok_here
2514
2515							sub class_ok_here {
2516
2517							# Return:
2518							# false if this is definitely an invalid class declaration
2519							# true otherwise (even if not sure)
2520
2521							# We are trying to avoid problems with old uses of 'class'
2522							# when --use-feature=class is set (rt145706). We look ahead
2523							# see if this use of 'class' is obviously inconsistent with
2524							# the syntax of use feature 'class'. This allows the default
2525							# setting --use-feature=class to work for old syntax too.
2526
2527							# Valid class declarations look like
2528							# class NAME ?ATTRS ?VERSION ?BLOCK
2529							# where ATTRS VERSION and BLOCK are optional
2530
2531							# For example, this should produce a return of 'false':
2532							#
2533							# class ExtendsBasicAttributes is BasicAttributes{
2534
2535	6			6	0	12	my $self = shift;
2536
2537							# TEST 1: class stmt can only go where a new statment can start
2538	6	50				16	if ( !new_statement_ok() ) { return }
	0					0
2539
2540	6					17	my $i_beg = $i + 1;
2541	6					29	my $pos_beg = $rtoken_map->[$i_beg];
2542	6					18	pos($input_line) = $pos_beg;
2543
2544							# TEST 2: look for a valid NAME
2545	6	50				39	if (
2546							$input_line =~ m{\G\s*
2547							((?:\w(?:'\|::))) # package - something that ends in :: or '
2548							(\w+) # NAME - required
2549							}gcx
2550							)
2551							{
2552							# For possible future use..
2553	6					17	my $subname = $2;
2554	6	100				21	my $package = $1 ? $1 : EMPTY_STRING;
2555							}
2556							else {
2557	0					0	return;
2558							}
2559
2560							# TEST 3: look for valid characters after NAME
2561	6					12	my $next_char = EMPTY_STRING;
2562	6	100				22	if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
	5					11
2563	6	100	66			32	if ( !$next_char \|\| $next_char eq '#' ) {
2564	1					4	( $next_char, my $i_next ) =
2565							$self->find_next_nonblank_token( $max_token_index,
2566							$rtokens, $max_token_index );
2567							}
2568	6	50				15	if ( !$next_char ) {
2569
2570							# out of characters - give up
2571	0					0	return;
2572							}
2573
2574							# Must see one of: ATTRIBUTE, VERSION, BLOCK, or end stmt
2575
2576							# Possibly valid next token types:
2577							# ':' could start ATTRIBUTE
2578							# '\d' could start VERSION
2579							# '{' cold start BLOCK
2580							# ';' could end a statement
2581							# '}' could end statement but would be strange
2582
2583	6	100				19	if ( $next_char !~ /^[\:\d\{\;\}]/ ) {
2584
2585							# This does not match use feature 'class' syntax
2586	2					9	return;
2587							}
2588
2589							# We will stop here and assume that this is valid syntax for
2590							# use feature 'class'.
2591	4					20	return 1;
2592							} ## end sub class_ok_here
2593
2594							sub scan_id {
2595	331			331	0	704	my $self = shift;
2596	331					1302	( $i, $tok, $type, $id_scan_state ) =
2597							$self->scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
2598							$id_scan_state, $max_token_index );
2599	331					832	return;
2600							} ## end sub scan_id
2601
2602							sub scan_number {
2603	629			629	0	1053	my $self = shift;
2604	629					964	my $number;
2605	629					1770	( $i, $type, $number ) =
2606							$self->scan_number_do( $input_line, $i, $rtoken_map, $type,
2607							$max_token_index );
2608	629					1424	return $number;
2609							} ## end sub scan_number
2610
2611	39			39		359	use constant VERIFY_FASTNUM => 0;
	39					131
	39					32266
2612
2613							sub scan_number_fast {
2614
2615							# This is a wrapper for sub scan_number. It does a fast preliminary
2616							# scan for a simple integer. It calls the original scan_number if it
2617							# does not find one.
2618
2619	2277			2277	0	3469	my $self = shift;
2620	2277					3559	my $i_begin = $i;
2621	2277					3709	my $tok_begin = $tok;
2622	2277					3147	my $number;
2623
2624							#---------------------------------
2625							# Quick check for (signed) integer
2626							#---------------------------------
2627
2628							# This will be the string of digits:
2629	2277					3369	my $i_d = $i;
2630	2277					3822	my $tok_d = $tok;
2631	2277					3912	my $typ_d = $rtoken_type->[$i_d];
2632
2633							# check for signed integer
2634	2277					3636	my $sign = EMPTY_STRING;
2635	2277	50	66			8699	if ( $typ_d ne 'd'
			66
			33
2636							&& ( $typ_d eq '+' \|\| $typ_d eq '-' )
2637							&& $i_d < $max_token_index )
2638							{
2639	343					585	$sign = $tok_d;
2640	343					583	$i_d++;
2641	343					606	$tok_d = $rtokens->[$i_d];
2642	343					599	$typ_d = $rtoken_type->[$i_d];
2643							}
2644
2645							# Handle integers
2646	2277	100	100			16884	if (
			100
2647							$typ_d eq 'd'
2648							&& (
2649							$i_d == $max_token_index
2650							\|\| ( $i_d < $max_token_index
2651							&& $rtoken_type->[ $i_d + 1 ] ne '.'
2652							&& $rtoken_type->[ $i_d + 1 ] ne 'w' )
2653							)
2654							)
2655							{
2656							# Let let full scanner handle multi-digit integers beginning with
2657							# '0' because there could be error messages. For example, '009' is
2658							# not a valid number.
2659
2660	1715	100	100			7269	if ( $tok_d eq '0' \|\| substr( $tok_d, 0, 1 ) ne '0' ) {
2661	1658					3208	$number = $sign . $tok_d;
2662	1658					2561	$type = 'n';
2663	1658					2802	$i = $i_d;
2664							}
2665							}
2666
2667							#--------------------------------------
2668							# Verify correctness during development
2669							#--------------------------------------
2670	2277					3241	if ( VERIFY_FASTNUM && defined($number) ) {
2671
2672							# We will call the full method
2673							my $type_simple = $type;
2674							my $i_simple = $i;
2675							my $number_simple = $number;
2676
2677							$tok = $tok_begin;
2678							$i = $i_begin;
2679							$number = $self->scan_number();
2680
2681							if ( $type ne $type_simple
2682							\|\| ( $i != $i_simple && $i <= $max_token_index )
2683							\|\| $number ne $number_simple )
2684							{
2685							print {*STDERR} <<EOM;
2686							scan_number_fast differs from scan_number:
2687							simple: i=$i_simple, type=$type_simple, number=$number_simple
2688							full: i=$i, type=$type, number=$number
2689							EOM
2690							}
2691							}
2692
2693							#----------------------------------------
2694							# call full scanner if may not be integer
2695							#----------------------------------------
2696	2277	100				5066	if ( !defined($number) ) {
2697	619					1692	$number = $self->scan_number();
2698							}
2699	2277					5488	return $number;
2700							} ## end sub scan_number_fast
2701
2702							# a sub to warn if token found where term expected
2703							sub error_if_expecting_TERM {
2704	9			9	0	23	my $self = shift;
2705	9	50				31	if ( $expecting == TERM ) {
2706	9	50				32	if ( $really_want_term{$last_nonblank_type} ) {
2707	0					0	$self->report_unexpected( $tok, "term", $i_tok,
2708							$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line );
2709	0					0	return 1;
2710							}
2711							}
2712	9					20	return;
2713							} ## end sub error_if_expecting_TERM
2714
2715							# a sub to warn if token found where operator expected
2716							sub error_if_expecting_OPERATOR {
2717	769			769	0	1577	my ( $self, $thing ) = @_;
2718	769	50				1859	if ( $expecting == OPERATOR ) {
2719	0	0				0	if ( !defined($thing) ) { $thing = $tok }
	0					0
2720	0					0	$self->report_unexpected( $thing, "operator", $i_tok,
2721							$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line );
2722	0	0				0	if ( $i_tok == 0 ) {
2723	0					0	$self->interrupt_logfile();
2724	0					0	$self->warning("Missing ';' or ',' above?\n");
2725	0					0	$self->resume_logfile();
2726							}
2727	0					0	return 1;
2728							}
2729	769					1404	return;
2730							} ## end sub error_if_expecting_OPERATOR
2731
2732							# ------------------------------------------------------------
2733							# end scanner interfaces
2734							# ------------------------------------------------------------
2735
2736							#------------------
2737							# Tokenization subs
2738							#------------------
2739							sub do_GREATER_THAN_SIGN {
2740
2741	31			31	0	100	my $self = shift;
2742
2743							# '>'
2744	31	50				120	$self->error_if_expecting_TERM()
2745							if ( $expecting == TERM );
2746	31					73	return;
2747							} ## end sub do_GREATER_THAN_SIGN
2748
2749							sub do_VERTICAL_LINE {
2750
2751	4			4	0	9	my $self = shift;
2752
2753							# '\|'
2754	4	50				13	$self->error_if_expecting_TERM()
2755							if ( $expecting == TERM );
2756	4					9	return;
2757							} ## end sub do_VERTICAL_LINE
2758
2759							# An identifier in possible indirect object location followed by any of
2760							# these tokens: -> , ; } (plus others) is not an indirect object. Fix c257.
2761							my %Z_test_hash;
2762
2763							BEGIN {
2764	39			39		384	my @qZ = qw#
2765							-> ; } ) ]
2766							=> =~ = == !~ \|\| >= != *= .. && \|= .= -= += <= %=
2767							^= &&= \|\|= //= <=>
2768							#;
2769	39					168	push @qZ, ',';
2770	39					352195	@{Z_test_hash}{@qZ} = (1) x scalar(@qZ);
2771							}
2772
2773							sub do_DOLLAR_SIGN {
2774
2775	4036			4036	0	6699	my $self = shift;
2776
2777							# '$'
2778							# start looking for a scalar
2779	4036	50				8890	$self->error_if_expecting_OPERATOR("Scalar")
2780							if ( $expecting == OPERATOR );
2781	4036					12581	$self->scan_simple_identifier();
2782
2783	4036	100				9110	if ( $identifier eq '$^W' ) {
2784	1					3	$self->[_saw_perl_dash_w_] = 1;
2785							}
2786
2787							# Check for identifier in indirect object slot
2788							# (vorboard.pl, sort.t). Something like:
2789							# /^(print\|printf\|sort\|exec\|system)$/
2790	4036	100	66			32229	if (
			100
			100
			66
			66
2791							$is_indirect_object_taker{$last_nonblank_token}
2792							&& $last_nonblank_type eq 'k'
2793							\|\| ( ( $last_nonblank_token eq '(' )
2794							&& $is_indirect_object_taker{ $rparen_type->[$paren_depth] } )
2795							\|\| ( $last_nonblank_type eq 'w'
2796							\|\| $last_nonblank_type eq 'U' ) # possible object
2797							)
2798							{
2799
2800							# An identifier followed by '->' is not indirect object;
2801							# fixes b1175, b1176. Fix c257: Likewise for other tokens like
2802							# comma, semicolon, closing brace, and single space.
2803	98					640	my ( $next_nonblank_token, $i_next ) =
2804							$self->find_next_noncomment_token( $i, $rtokens,
2805							$max_token_index );
2806	98	100				396	$type = 'Z' if ( !$Z_test_hash{$next_nonblank_token} );
2807							}
2808	4036					6659	return;
2809							} ## end sub do_DOLLAR_SIGN
2810
2811							sub do_LEFT_PARENTHESIS {
2812
2813	2125			2125	0	3962	my $self = shift;
2814
2815							# '('
2816	2125					3452	++$paren_depth;
2817
2818							# variable to enable check for brace after closing paren (c230)
2819	2125					3802	my $want_brace = EMPTY_STRING;
2820
2821	2125	100				6374	if ($want_paren) {
		100
2822	240					564	$container_type = $want_paren;
2823	240					502	$want_brace = $want_paren;
2824	240					501	$want_paren = EMPTY_STRING;
2825							}
2826							elsif ( $statement_type =~ /^sub\b/ ) {
2827	14					39	$container_type = $statement_type;
2828							}
2829							else {
2830	1871					3217	$container_type = $last_nonblank_token;
2831
2832							# We can check for a syntax error here of unexpected '(',
2833							# but this is going to get messy...
2834	1871	100	100			7703	if (
2835							$expecting == OPERATOR
2836
2837							# Be sure this is not a method call of the form
2838							# &method(...), $method->(..), &{method}(...),
2839							# $ref[2](list) is ok & short for $ref[2]->(list)
2840							# NOTE: at present, braces in something like &{ xxx }
2841							# are not marked as a block, we might have a method call.
2842							# Added ')' to fix case c017, something like ()()()
2843							&& $last_nonblank_token !~ /^(?:[\]\}\)\&]\|\-\>)/
2844							)
2845							{
2846
2847							# ref: camel 3 p 703.
2848	3	50				10	if ( $last_last_nonblank_token eq 'do' ) {
2849	0					0	$self->complain(
2850							"do SUBROUTINE is deprecated; consider & or -> notation\n"
2851							);
2852							}
2853							else {
2854
2855							# if this is an empty list, (), then it is not an
2856							# error; for example, we might have a constant pi and
2857							# invoke it with pi() or just pi;
2858	3					8	my ( $next_nonblank_token, $i_next ) =
2859							$self->find_next_nonblank_token( $i, $rtokens,
2860							$max_token_index );
2861
2862							# Patch for c029: give up error check if
2863							# a side comment follows
2864	3	50	33			13	if ( $next_nonblank_token ne ')'
2865							&& $next_nonblank_token ne '#' )
2866							{
2867	0					0	my $hint;
2868
2869	0					0	$self->error_if_expecting_OPERATOR('(');
2870
2871	0	0				0	if ( $last_nonblank_type eq 'C' ) {
		0
2872	0					0	$hint =
2873							"$last_nonblank_token has a void prototype\n";
2874							}
2875							elsif ( $last_nonblank_type eq 'i' ) {
2876	0	0	0			0	if ( $i_tok > 0
2877							&& $last_nonblank_token =~ /^\$/ )
2878							{
2879	0					0	$hint =
2880							"Do you mean '$last_nonblank_token->(' ?\n";
2881							}
2882							}
2883							else {
2884							## no hint
2885							}
2886	0	0				0	if ($hint) {
2887	0					0	$self->interrupt_logfile();
2888	0					0	$self->warning($hint);
2889	0					0	$self->resume_logfile();
2890							}
2891							} ## end if ( $next_nonblank_token...
2892							} ## end else [ if ( $last_last_nonblank_token...
2893							} ## end if ( $expecting == OPERATOR...
2894							}
2895
2896	2125					7273	( $type_sequence, $indent_flag ) =
2897							$self->increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
2898
2899							# propagate types down through nested parens
2900							# for example: the second paren in 'if ((' would be structural
2901							# since the first is.
2902
2903	2125	100				5491	if ( $last_nonblank_token eq '(' ) {
2904	61					178	$type = $last_nonblank_type;
2905							}
2906
2907							# We exclude parens as structural after a ',' because it
2908							# causes subtle problems with continuation indentation for
2909							# something like this, where the first 'or' will not get
2910							# indented.
2911							#
2912							# assert(
2913							# __LINE__,
2914							# ( not defined $check )
2915							# or ref $check
2916							# or $check eq "new"
2917							# or $check eq "old",
2918							# );
2919							#
2920							# Likewise, we exclude parens where a statement can start
2921							# because of problems with continuation indentation, like
2922							# these:
2923							#
2924							# ($firstline =~ /^#\!.*perl/)
2925							# and (print $File::Find::name, "\n")
2926							# and (return 1);
2927							#
2928							# (ref($usage_fref) =~ /CODE/)
2929							# ? &$usage_fref
2930							# : (&blast_usage, &blast_params, &blast_general_params);
2931
2932							else {
2933	2064					3480	$type = '{';
2934							}
2935
2936	2125	50				5224	if ( $last_nonblank_type eq ')' ) {
2937	0					0	$self->warning(
2938							"Syntax error? found token '$last_nonblank_type' then '('\n");
2939							}
2940
2941							# git #105: Copy container type and want-brace flag at ') (';
2942							# propagate the container type onward so that any subsequent brace gets
2943							# correctly marked. I have implemented this as a general rule, which
2944							# should be safe, but if necessary it could be restricted to certain
2945							# container statement types such as 'for'.
2946	2125	100				5036	if ( $last_nonblank_token eq ')' ) {
2947	1					2	my $rvars = $rparen_vars->[$paren_depth];
2948	1	50				7	if ( defined($rvars) ) {
2949	1					4	$container_type = $rparen_type->[$paren_depth];
2950	1					4	( my $type_lp, $want_brace ) = @{$rvars};
	1					5
2951							}
2952							}
2953
2954	2125					4171	$rparen_type->[$paren_depth] = $container_type;
2955	2125					6030	$rparen_vars->[$paren_depth] = [ $type, $want_brace ];
2956	2125					3998	$rparen_semicolon_count->[$paren_depth] = 0;
2957
2958	2125					3791	return;
2959
2960							} ## end sub do_LEFT_PARENTHESIS
2961
2962							sub do_RIGHT_PARENTHESIS {
2963
2964	2125			2125	0	4387	my $self = shift;
2965
2966							# ')'
2967	2125					7368	( $type_sequence, $indent_flag ) =
2968							$self->decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
2969
2970	2125					4669	my $rvars = $rparen_vars->[$paren_depth];
2971	2125	50				5248	if ( defined($rvars) ) {
2972	2125					3442	my ( $type_lp, $want_brace ) = @{$rvars};
	2125					4750
2973	2125	50	33			8903	if ( $type_lp && $type_lp eq '{' ) {
2974	2125					4131	$type = '}';
2975							}
2976							}
2977
2978	2125					4221	$container_type = $rparen_type->[$paren_depth];
2979
2980							# restore statement type as 'sub' at closing paren of a signature
2981							# so that a subsequent ':' is identified as an attribute
2982	2125	100				6106	if ( $container_type =~ /^sub\b/ ) {
2983	24					61	$statement_type = $container_type;
2984							}
2985
2986							# /^(for\|foreach)$/
2987	2125	100				6147	if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) {
2988	69					187	my $num_sc = $rparen_semicolon_count->[$paren_depth];
2989	69	50	66			448	if ( $num_sc > 0 && $num_sc != 2 ) {
2990	0					0	$self->warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
2991							}
2992							}
2993
2994	2125	50				4939	if ( $paren_depth > 0 ) { $paren_depth-- }
	2125					3254
2995	2125					3617	return;
2996							} ## end sub do_RIGHT_PARENTHESIS
2997
2998							sub do_COMMA {
2999
3000	3075			3075	0	5176	my $self = shift;
3001
3002							# ','
3003	3075	100	33			10478	if ( $last_nonblank_type eq ',' ) {
		50
3004	10					49	$self->complain("Repeated ','s \n");
3005							}
3006
3007							# Note that we have to check both token and type here because a
3008							# comma following a qw list can have last token='(' but type = 'q'
3009							elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) {
3010	0					0	$self->warning("Unexpected leading ',' after a '('\n");
3011							}
3012							else {
3013							## ok: no complaints needed
3014							}
3015
3016							# patch for operator_expected: note if we are in the list (use.t)
3017	3075	100				6466	if ( $statement_type eq 'use' ) { $statement_type = '_use' }
	6					12
3018	3075					4636	return;
3019
3020							} ## end sub do_COMMA
3021
3022							sub do_SEMICOLON {
3023
3024	2448			2448	0	4603	my $self = shift;
3025
3026							# ';'
3027	2448					4116	$context = UNKNOWN_CONTEXT;
3028	2448					3929	$statement_type = EMPTY_STRING;
3029	2448					4511	$want_paren = EMPTY_STRING;
3030
3031							# /^(for\|foreach)$/
3032	2448	100				6758	if ( $is_for_foreach{ $rparen_type->[$paren_depth] } )
3033							{ # mark ; in for loop
3034
3035							# Be careful: we do not want a semicolon such as the
3036							# following to be included:
3037							#
3038							# for (sort {strcoll($a,$b);} keys %investments) {
3039
3040	35	100	66			250	if ( $brace_depth == $rdepth_array->[PAREN][BRACE][$paren_depth]
3041							&& $square_bracket_depth ==
3042							$rdepth_array->[PAREN][SQUARE_BRACKET][$paren_depth] )
3043							{
3044
3045	34					68	$type = 'f';
3046	34					72	$rparen_semicolon_count->[$paren_depth]++;
3047							}
3048							}
3049	2448					4045	return;
3050							} ## end sub do_SEMICOLON
3051
3052							sub do_QUOTATION_MARK {
3053
3054	1125			1125	0	2189	my $self = shift;
3055
3056							# '"'
3057	1125	50				2920	$self->error_if_expecting_OPERATOR("String")
3058							if ( $expecting == OPERATOR );
3059	1125					1874	$in_quote = 1;
3060	1125					1958	$type = 'Q';
3061	1125					1832	$allowed_quote_modifiers = EMPTY_STRING;
3062	1125					1911	return;
3063							} ## end sub do_QUOTATION_MARK
3064
3065							sub do_APOSTROPHE {
3066
3067	1164			1164	0	2158	my $self = shift;
3068
3069							# "'"
3070	1164	50				2813	$self->error_if_expecting_OPERATOR("String")
3071							if ( $expecting == OPERATOR );
3072	1164					1847	$in_quote = 1;
3073	1164					1922	$type = 'Q';
3074	1164					1918	$allowed_quote_modifiers = EMPTY_STRING;
3075	1164					1898	return;
3076							} ## end sub do_APOSTROPHE
3077
3078							sub do_BACKTICK {
3079
3080	0			0	0	0	my $self = shift;
3081
3082							# '`'
3083	0	0				0	$self->error_if_expecting_OPERATOR("String")
3084							if ( $expecting == OPERATOR );
3085	0					0	$in_quote = 1;
3086	0					0	$type = 'Q';
3087	0					0	$allowed_quote_modifiers = EMPTY_STRING;
3088	0					0	return;
3089							} ## end sub do_BACKTICK
3090
3091							sub do_SLASH {
3092
3093	207			207	0	450	my $self = shift;
3094
3095							# '/'
3096	207					365	my $is_pattern;
3097
3098							# a pattern cannot follow certain keywords which take optional
3099							# arguments, like 'shift' and 'pop'. See also '?'.
3100	207	50	66			1036	if (
		50
3101							$last_nonblank_type eq 'k'
3102							&& $is_keyword_rejecting_slash_as_pattern_delimiter{
3103							$last_nonblank_token}
3104							)
3105							{
3106	0					0	$is_pattern = 0;
3107							}
3108							elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess..
3109	0					0	my $msg;
3110	0					0	( $is_pattern, $msg ) =
3111							$self->guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
3112							$max_token_index );
3113
3114	0	0				0	if ($msg) {
3115	0					0	$self->write_diagnostics("DIVIDE:$msg\n");
3116	0					0	$self->write_logfile_entry($msg);
3117							}
3118							}
3119	207					435	else { $is_pattern = ( $expecting == TERM ) }
3120
3121	207	100				504	if ($is_pattern) {
3122	78					171	$in_quote = 1;
3123	78					159	$type = 'Q';
3124	78					148	$allowed_quote_modifiers = '[msixpodualngc]';
3125							}
3126							else { # not a pattern; check for a /= token
3127
3128	129	50				382	if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /=
3129	0					0	$i++;
3130	0					0	$tok = '/=';
3131	0					0	$type = $tok;
3132							}
3133
3134							#DEBUG - collecting info on what tokens follow a divide
3135							# for development of guessing algorithm
3136							## if (
3137							## $self->is_possible_numerator( $i, $rtokens,
3138							## $max_token_index ) < 0
3139							## )
3140							## {
3141							## $self->write_diagnostics("DIVIDE? $input_line\n");
3142							## }
3143							}
3144	207					393	return;
3145							} ## end sub do_SLASH
3146
3147							sub do_LEFT_CURLY_BRACKET {
3148
3149	1668			1668	0	3237	my $self = shift;
3150
3151							# '{'
3152							# if we just saw a ')', we will label this block with
3153							# its type. We need to do this to allow sub
3154							# code_block_type to determine if this brace starts a
3155							# code block or anonymous hash. (The type of a paren
3156							# pair is the preceding token, such as 'if', 'else',
3157							# etc).
3158	1668					3248	$container_type = EMPTY_STRING;
3159
3160							# ATTRS: for a '{' following an attribute list, reset
3161							# things to look like we just saw a sub name
3162							# Added 'package' (can be 'class') for --use-feature=class (rt145706)
3163	1668	100	100			15477	if ( substr( $statement_type, 0, 3 ) eq 'sub' ) {
		100	66
		50	33
		100
		50
3164	34					93	$last_nonblank_token = $statement_type;
3165	34					75	$last_nonblank_type = 'S'; # c250 change
3166	34					71	$statement_type = EMPTY_STRING;
3167							}
3168							elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) {
3169	4					9	$last_nonblank_token = $statement_type;
3170	4					7	$last_nonblank_type = 'P'; # c250 change
3171	4					8	$statement_type = EMPTY_STRING;
3172							}
3173
3174							# patch for SWITCH/CASE: hide these keywords from an immediately
3175							# following opening brace
3176							elsif ( ( $statement_type eq 'case' \|\| $statement_type eq 'when' )
3177							&& $statement_type eq $last_nonblank_token )
3178							{
3179	0					0	$last_nonblank_token = ";";
3180							}
3181
3182							elsif ( $last_nonblank_token eq ')' ) {
3183	241					694	$last_nonblank_token = $rparen_type->[ $paren_depth + 1 ];
3184
3185							# defensive move in case of a nesting error (pbug.t)
3186							# in which this ')' had no previous '('
3187							# this nesting error will have been caught
3188	241	50				757	if ( !defined($last_nonblank_token) ) {
3189	0					0	$last_nonblank_token = 'if';
3190							}
3191
3192							# Syntax check at '){'
3193	241	100				1095	if ( $is_blocktype_with_paren{$last_nonblank_token} ) {
3194
3195	227					564	my $rvars = $rparen_vars->[ $paren_depth + 1 ];
3196	227	50				766	if ( defined($rvars) ) {
3197	227					430	my ( $type_lp, $want_brace ) = @{$rvars};
	227					593
3198
3199							# Now verify that this is not a trailing form
3200	227	50				765	if ( !$want_brace ) {
3201	0					0	$self->warning(
3202							"syntax error at ') {', unexpected '{' after closing ')' of a trailing '$last_nonblank_token'\n"
3203							);
3204							}
3205							}
3206							}
3207							else {
3208	14	50				75	if ( $self->[_extended_syntax_] ) {
3209
3210							# we append a trailing () to mark this as an unknown
3211							# block type. This allows perltidy to format some
3212							# common extensions of perl syntax.
3213							# This is used by sub code_block_type
3214	14					49	$last_nonblank_token .= '()';
3215							}
3216							else {
3217	0					0	my $list =
3218							join( SPACE, sort keys %is_blocktype_with_paren );
3219	0					0	$self->warning(
3220							"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
3221							);
3222							}
3223							}
3224							}
3225
3226							# patch for paren-less for/foreach glitch, part 2.
3227							# see note below under 'qw'
3228							elsif ($last_nonblank_token eq 'qw'
3229							&& $is_for_foreach{$want_paren} )
3230							{
3231	0					0	$last_nonblank_token = $want_paren;
3232	0	0				0	if ( $last_last_nonblank_token eq $want_paren ) {
3233	0					0	$self->warning(
3234							"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
3235							);
3236
3237							}
3238	0					0	$want_paren = EMPTY_STRING;
3239							}
3240							else {
3241							## ok: not special
3242							}
3243
3244							# now identify which of the three possible types of
3245							# curly braces we have: hash index container, anonymous
3246							# hash reference, or code block.
3247
3248							# non-structural (hash index) curly brace pair
3249							# get marked 'L' and 'R'
3250	1668	100				4981	if ( is_non_structural_brace() ) {
3251	367					767	$type = 'L';
3252
3253							# patch for SWITCH/CASE:
3254							# allow paren-less identifier after 'when'
3255							# if the brace is preceded by a space
3256	367	0	33			1346	if ( $statement_type eq 'when'
			33
			0
			0
3257							&& $last_nonblank_type eq 'i'
3258							&& $last_last_nonblank_type eq 'k'
3259							&& ( $i_tok == 0 \|\| $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
3260							{
3261	0					0	$type = '{';
3262	0					0	$block_type = $statement_type;
3263							}
3264							}
3265
3266							# code and anonymous hash have the same type, '{', but are
3267							# distinguished by 'block_type',
3268							# which will be blank for an anonymous hash
3269							else {
3270
3271	1301					4506	$block_type =
3272							$self->code_block_type( $i_tok, $rtokens, $rtoken_type,
3273							$max_token_index );
3274
3275							# patch to promote bareword type to function taking block
3276	1301	100	100			6023	if ( $block_type
			66
3277							&& $last_nonblank_type eq 'w'
3278							&& $last_nonblank_i >= 0 )
3279							{
3280	34	50				144	if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
3281							$routput_token_type->[$last_nonblank_i] =
3282	34	100				166	$is_grep_alias{$block_type} ? 'k' : 'G';
3283							}
3284							}
3285
3286							# patch for SWITCH/CASE: if we find a stray opening block brace
3287							# where we might accept a 'case' or 'when' block, then take it
3288	1301	100	100			5285	if ( $statement_type eq 'case'
3289							\|\| $statement_type eq 'when' )
3290							{
3291	38	100	66			237	if ( !$block_type \|\| $block_type eq '}' ) {
3292	4					9	$block_type = $statement_type;
3293							}
3294							}
3295							}
3296
3297	1668					3905	$rbrace_type->[ ++$brace_depth ] = $block_type;
3298
3299							# Patch for CLASS BLOCK definitions: do not update the package for the
3300							# current depth if this is a BLOCK type definition.
3301							# TODO: should make 'class' separate from 'package' and only do
3302							# this for 'class'
3303	1668	100				5418	$rbrace_package->[$brace_depth] = $current_package
3304							if ( substr( $block_type, 0, 8 ) ne 'package ' );
3305
3306	1668					3744	$rbrace_structural_type->[$brace_depth] = $type;
3307	1668					2981	$rbrace_context->[$brace_depth] = $context;
3308	1668					4984	( $type_sequence, $indent_flag ) =
3309							$self->increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3310	1668					3542	return;
3311							} ## end sub do_LEFT_CURLY_BRACKET
3312
3313							sub do_RIGHT_CURLY_BRACKET {
3314
3315	1668			1668	0	3525	my $self = shift;
3316
3317							# '}'
3318	1668					3575	$block_type = $rbrace_type->[$brace_depth];
3319	1668	100				4247	if ($block_type) { $statement_type = EMPTY_STRING }
	972					1811
3320	1668	100				4020	if ( defined( $rbrace_package->[$brace_depth] ) ) {
3321	1664					3245	$current_package = $rbrace_package->[$brace_depth];
3322							}
3323
3324							# can happen on brace error (caught elsewhere)
3325							else {
3326							}
3327	1668					5599	( $type_sequence, $indent_flag ) =
3328							$self->decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3329
3330	1668	100				5515	if ( $rbrace_structural_type->[$brace_depth] eq 'L' ) {
3331	367					792	$type = 'R';
3332							}
3333
3334							# propagate type information for 'do' and 'eval' blocks, and also
3335							# for smartmatch operator. This is necessary to enable us to know
3336							# if an operator or term is expected next.
3337	1668	100				4690	if ( $is_block_operator{$block_type} ) {
3338	83					252	$tok = $block_type;
3339							}
3340
3341	1668					2960	$context = $rbrace_context->[$brace_depth];
3342	1668	50				4002	if ( $brace_depth > 0 ) { $brace_depth--; }
	1668					2573
3343	1668					2840	return;
3344							} ## end sub do_RIGHT_CURLY_BRACKET
3345
3346							sub do_AMPERSAND {
3347
3348	126			126	0	361	my $self = shift;
3349
3350							# '&' = maybe sub call? start looking
3351							# We have to check for sub call unless we are sure we
3352							# are expecting an operator. This example from s2p
3353							# got mistaken as a q operator in an early version:
3354							# print BODY &q(<<'EOT');
3355	126	100				447	if ( $expecting != OPERATOR ) {
3356
3357							# But only look for a sub call if we are expecting a term or
3358							# if there is no existing space after the &.
3359							# For example we probably don't want & as sub call here:
3360							# Fcntl::S_IRUSR & $mode;
3361	107	100	66			484	if ( $expecting == TERM \|\| $next_type ne 'b' ) {
3362	104					337	$self->scan_simple_identifier();
3363							}
3364							}
3365							else {
3366							}
3367	126					279	return;
3368							} ## end sub do_AMPERSAND
3369
3370							sub do_LESS_THAN_SIGN {
3371
3372	29			29	0	104	my $self = shift;
3373
3374							# '<' - angle operator or less than?
3375	29	100				138	if ( $expecting != OPERATOR ) {
3376	8					57	( $i, $type ) =
3377							$self->find_angle_operator_termination( $input_line, $i,
3378							$rtoken_map, $expecting, $max_token_index );
3379
3380							## This message is not very helpful and quite confusing if the above
3381							## routine decided not to write a message with the line number.
3382							## if ( $type eq '<' && $expecting == TERM ) {
3383							## $self->error_if_expecting_TERM();
3384							## $self->interrupt_logfile();
3385							## $self->warning("Unterminated <> operator?\n");
3386							## $self->resume_logfile();
3387							## }
3388
3389							}
3390							else {
3391							}
3392	29					69	return;
3393							} ## end sub do_LESS_THAN_SIGN
3394
3395							sub do_QUESTION_MARK {
3396
3397	187			187	0	538	my $self = shift;
3398
3399							# '?' = conditional or starting pattern?
3400	187					418	my $is_pattern;
3401
3402							# Patch for rt #126965
3403							# a pattern cannot follow certain keywords which take optional
3404							# arguments, like 'shift' and 'pop'. See also '/'.
3405	187	100	66			1759	if (
		100
		100
3406							$last_nonblank_type eq 'k'
3407							&& $is_keyword_rejecting_question_as_pattern_delimiter{
3408							$last_nonblank_token}
3409							)
3410							{
3411	1					4	$is_pattern = 0;
3412							}
3413
3414							# patch for RT#131288, user constant function without prototype
3415							# last type is 'U' followed by ?.
3416							elsif ( $last_nonblank_type =~ /^[FUY]$/ ) {
3417	1					4	$is_pattern = 0;
3418							}
3419							elsif ( $expecting == UNKNOWN ) {
3420
3421							# In older versions of Perl, a bare ? can be a pattern
3422							# delimiter. In perl version 5.22 this was
3423							# dropped, but we have to support it in order to format
3424							# older programs. See:
3425							## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html
3426							# For example, the following line worked
3427							# at one time:
3428							# ?(.*)? && (print $1,"\n");
3429							# In current versions it would have to be written with slashes:
3430							# /(.*)/ && (print $1,"\n");
3431	11					25	my $msg;
3432	11					76	( $is_pattern, $msg ) =
3433							$self->guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
3434							$max_token_index );
3435
3436	11	50				46	if ($msg) { $self->write_logfile_entry($msg) }
	11					43
3437							}
3438	174					522	else { $is_pattern = ( $expecting == TERM ) }
3439
3440	187	50				547	if ($is_pattern) {
3441	0					0	$in_quote = 1;
3442	0					0	$type = 'Q';
3443	0					0	$allowed_quote_modifiers = '[msixpodualngc]';
3444							}
3445							else {
3446	187					723	( $type_sequence, $indent_flag ) =
3447							$self->increase_nesting_depth( QUESTION_COLON,
3448							$rtoken_map->[$i_tok] );
3449							}
3450	187					488	return;
3451							} ## end sub do_QUESTION_MARK
3452
3453							sub do_STAR {
3454
3455	238			238	0	512	my $self = shift;
3456
3457							# '*' = typeglob, or multiply?
3458	238	50	66			898	if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) {
3459	0	0	0			0	if ( $next_type ne 'b'
			0
3460							&& $next_type ne '('
3461							&& $next_type ne '#' ) # Fix c036
3462							{
3463	0					0	$expecting = TERM;
3464							}
3465							}
3466	238	100				629	if ( $expecting == TERM ) {
3467	21					91	$self->scan_simple_identifier();
3468							}
3469							else {
3470
3471	217	50				824	if ( $rtokens->[ $i + 1 ] eq '=' ) {
		100
3472	0					0	$tok = '*=';
3473	0					0	$type = $tok;
3474	0					0	$i++;
3475							}
3476							elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
3477	36					103	$tok = '**';
3478	36					84	$type = $tok;
3479	36					68	$i++;
3480	36	50				127	if ( $rtokens->[ $i + 1 ] eq '=' ) {
3481	0					0	$tok = '**=';
3482	0					0	$type = $tok;
3483	0					0	$i++;
3484							}
3485							}
3486							else {
3487							## not multiple characters
3488							}
3489							}
3490	238					438	return;
3491							} ## end sub do_STAR
3492
3493							sub do_DOT {
3494
3495	150			150	0	333	my $self = shift;
3496
3497							# '.' = what kind of . ?
3498	150	100				463	if ( $expecting != OPERATOR ) {
3499	10					51	$self->scan_number();
3500	10	100				43	if ( $type eq '.' ) {
3501	2	50				8	$self->error_if_expecting_TERM()
3502							if ( $expecting == TERM );
3503							}
3504							}
3505							else {
3506							}
3507	150					289	return;
3508							} ## end sub do_DOT
3509
3510							sub do_COLON {
3511
3512	271			271	0	810	my $self = shift;
3513
3514							# ':' = label, ternary, attribute, ?
3515
3516							# if this is the first nonblank character, call it a label
3517							# since perl seems to just swallow it
3518	271	50	66			3691	if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
		100	66
		100	66
		100	66
		100
3519	0					0	$type = 'J';
3520							}
3521
3522							# ATTRS: check for a ':' which introduces an attribute list
3523							# either after a 'sub' keyword or within a paren list
3524							# Added 'package' (can be 'class') for --use-feature=class (rt145706)
3525							elsif ( $statement_type =~ /^(sub\|package)\b/ ) {
3526	22					51	$type = 'A';
3527	22					53	$self->[_in_attribute_list_] = 1;
3528							}
3529
3530							# Within a signature, unless we are in a ternary. For example,
3531							# from 't/filter_example.t':
3532							# method foo4 ( $class: $bar ) { $class->bar($bar) }
3533							elsif ( $rparen_type->[$paren_depth] =~ /^sub\b/
3534							&& !is_balanced_closing_container(QUESTION_COLON) )
3535							{
3536	1					5	$type = 'A';
3537	1					3	$self->[_in_attribute_list_] = 1;
3538							}
3539
3540							# check for scalar attribute, such as
3541							# my $foo : shared = 1;
3542							elsif ($is_my_our_state{$statement_type}
3543							&& $rcurrent_depth->[QUESTION_COLON] == 0 )
3544							{
3545	15					38	$type = 'A';
3546	15					42	$self->[_in_attribute_list_] = 1;
3547							}
3548
3549							# Look for Switch::Plain syntax if an error would otherwise occur
3550							# here. Note that we do not need to check if the extended syntax
3551							# flag is set because otherwise an error would occur, and we would
3552							# then have to output a message telling the user to set the
3553							# extended syntax flag to avoid the error.
3554							# case 1: {
3555							# default: {
3556							# default:
3557							# Note that the line 'default:' will be parsed as a label elsewhere.
3558							elsif ( $is_case_default{$statement_type}
3559							&& !is_balanced_closing_container(QUESTION_COLON) )
3560							{
3561							# mark it as a perltidy label type
3562	46					103	$type = 'J';
3563							}
3564
3565							# otherwise, it should be part of a ?/: operator
3566							else {
3567	187					834	( $type_sequence, $indent_flag ) =
3568							$self->decrease_nesting_depth( QUESTION_COLON,
3569							$rtoken_map->[$i_tok] );
3570	187	50				1093	if ( $last_nonblank_token eq '?' ) {
3571	0					0	$self->warning("Syntax error near ? :\n");
3572							}
3573							}
3574	271					541	return;
3575							} ## end sub do_COLON
3576
3577							sub do_PLUS_SIGN {
3578
3579	227			227	0	428	my $self = shift;
3580
3581							# '+' = what kind of plus?
3582	227	100				851	if ( $expecting == TERM ) {
		100
3583	13					49	my $number = $self->scan_number_fast();
3584
3585							# unary plus is safest assumption if not a number
3586	13	50				42	if ( !defined($number) ) { $type = 'p'; }
	13					41
3587							}
3588							elsif ( $expecting == OPERATOR ) {
3589							}
3590							else {
3591	3	100				14	if ( $next_type eq 'w' ) { $type = 'p' }
	2					3
3592							}
3593	227					389	return;
3594							} ## end sub do_PLUS_SIGN
3595
3596							sub do_AT_SIGN {
3597
3598	438			438	0	1153	my $self = shift;
3599
3600							# '@' = sigil for array?
3601	438	50				1478	$self->error_if_expecting_OPERATOR("Array")
3602							if ( $expecting == OPERATOR );
3603	438					1791	$self->scan_simple_identifier();
3604	438					918	return;
3605							} ## end sub do_AT_SIGN
3606
3607							sub do_PERCENT_SIGN {
3608
3609	202			202	0	576	my $self = shift;
3610
3611							# '%' = hash or modulo?
3612							# first guess is hash if no following blank or paren
3613	202	50				711	if ( $expecting == UNKNOWN ) {
3614	0	0	0			0	if ( $next_type ne 'b' && $next_type ne '(' ) {
3615	0					0	$expecting = TERM;
3616							}
3617							}
3618	202	100				655	if ( $expecting == TERM ) {
3619	192					794	$self->scan_simple_identifier();
3620							}
3621	202					503	return;
3622							} ## end sub do_PERCENT_SIGN
3623
3624							sub do_LEFT_SQUARE_BRACKET {
3625
3626	594			594	0	1188	my $self = shift;
3627
3628							# '['
3629	594					2453	$rsquare_bracket_type->[ ++$square_bracket_depth ] =
3630							$last_nonblank_token;
3631	594					2168	( $type_sequence, $indent_flag ) =
3632							$self->increase_nesting_depth( SQUARE_BRACKET,
3633							$rtoken_map->[$i_tok] );
3634
3635							# It may seem odd, but structural square brackets have
3636							# type '{' and '}'. This simplifies the indentation logic.
3637	594	100				1841	if ( !is_non_structural_brace() ) {
3638	287					733	$type = '{';
3639							}
3640	594					1440	$rsquare_bracket_structural_type->[$square_bracket_depth] = $type;
3641	594					1105	return;
3642							} ## end sub do_LEFT_SQUARE_BRACKET
3643
3644							sub do_RIGHT_SQUARE_BRACKET {
3645
3646	594			594	0	2351	my $self = shift;
3647
3648							# ']'
3649	594					2166	( $type_sequence, $indent_flag ) =
3650							$self->decrease_nesting_depth( SQUARE_BRACKET,
3651							$rtoken_map->[$i_tok] );
3652
3653	594	100				2061	if ( $rsquare_bracket_structural_type->[$square_bracket_depth] eq '{' )
3654							{
3655	287					1810	$type = '}';
3656							}
3657
3658							# propagate type information for smartmatch operator. This is
3659							# necessary to enable us to know if an operator or term is expected
3660							# next.
3661	594	100				1710	if ( $rsquare_bracket_type->[$square_bracket_depth] eq '~~' ) {
3662	20					42	$tok = $rsquare_bracket_type->[$square_bracket_depth];
3663							}
3664
3665	594	50				1567	if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
	594					956
3666	594					989	return;
3667							} ## end sub do_RIGHT_SQUARE_BRACKET
3668
3669							sub do_MINUS_SIGN {
3670
3671	441			441	0	902	my $self = shift;
3672
3673							# '-' = what kind of minus?
3674	441	100	100			2843	if ( ( $expecting != OPERATOR )
		100
		100
3675							&& $is_file_test_operator{$next_tok} )
3676							{
3677	10					44	my ( $next_nonblank_token, $i_next ) =
3678							$self->find_next_nonblank_token( $i + 1, $rtokens,
3679							$max_token_index );
3680
3681							# check for a quoted word like "-w=>xx";
3682							# it is sufficient to just check for a following '='
3683	10	50				82	if ( $next_nonblank_token eq '=' ) {
3684	0					0	$type = 'm';
3685							}
3686							else {
3687	10					23	$i++;
3688	10					29	$tok .= $next_tok;
3689	10					24	$type = 'F';
3690							}
3691							}
3692							elsif ( $expecting == TERM ) {
3693	330					1051	my $number = $self->scan_number_fast();
3694
3695							# maybe part of bareword token? unary is safest
3696	330	100				877	if ( !defined($number) ) { $type = 'm'; }
	288					519
3697
3698							}
3699							elsif ( $expecting == OPERATOR ) {
3700							}
3701							else {
3702
3703	4	50				14	if ( $next_type eq 'w' ) {
3704	4					20	$type = 'm';
3705							}
3706							}
3707	441					763	return;
3708							} ## end sub do_MINUS_SIGN
3709
3710							sub do_CARAT_SIGN {
3711
3712	12			12	0	25	my $self = shift;
3713
3714							# '^'
3715							# check for special variables like ${^WARNING_BITS}
3716	12	100				44	if ( $expecting == TERM ) {
3717
3718	5	50	33			58	if ( $last_nonblank_token eq '{'
			33
3719							&& ( $next_tok !~ /^\d/ )
3720							&& ( $next_tok =~ /^\w/ ) )
3721							{
3722
3723	5	100				19	if ( $next_tok eq 'W' ) {
3724	1					4	$self->[_saw_perl_dash_w_] = 1;
3725							}
3726	5					12	$tok = $tok . $next_tok;
3727	5					10	$i = $i + 1;
3728	5					11	$type = 'w';
3729
3730							# Optional coding to try to catch syntax errors. This can
3731							# be removed if it ever causes incorrect warning messages.
3732							# The '{^' should be preceded by either by a type or '$#'
3733							# Examples:
3734							# $#{^CAPTURE} ok
3735							# *${^LAST_FH}{NAME} ok
3736							# @{^HOWDY} ok
3737							# $hash{^HOWDY} error
3738
3739							# Note that a type sigil '$' may be tokenized as 'Z'
3740							# after something like 'print', so allow type 'Z'
3741	5	0	33			23	if ( $last_last_nonblank_type ne 't'
			33
3742							&& $last_last_nonblank_type ne 'Z'
3743							&& $last_last_nonblank_token ne '$#' )
3744							{
3745	0					0	$self->warning("Possible syntax error near '{^'\n");
3746							}
3747							}
3748
3749							else {
3750	0	0				0	if ( !$self->error_if_expecting_TERM() ) {
3751
3752							# Something like this is valid but strange:
3753							# undef ^I;
3754	0					0	$self->complain("The '^' seems unusual here\n");
3755							}
3756							}
3757							}
3758	12					26	return;
3759							} ## end sub do_CARAT_SIGN
3760
3761							sub do_DOUBLE_COLON {
3762
3763	9			9	0	16	my $self = shift;
3764
3765							# '::' = probably a sub call
3766	9					35	$self->scan_bare_identifier();
3767	9					19	return;
3768							} ## end sub do_DOUBLE_COLON
3769
3770							sub do_LEFT_SHIFT {
3771
3772	7			7	0	30	my $self = shift;
3773
3774							# '<<' = maybe a here-doc?
3775	7	50				33	if ( $expecting != OPERATOR ) {
3776	7					24	my ( $found_target, $here_doc_target, $here_quote_character,
3777							$saw_error );
3778							(
3779	7					54	$found_target, $here_doc_target, $here_quote_character, $i,
3780							$saw_error
3781							)
3782							= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
3783							$max_token_index );
3784
3785	7	50				30	if ($found_target) {
		0
3786	7					16	push @{$rhere_target_list},
	7					34
3787							[ $here_doc_target, $here_quote_character ];
3788	7					26	$type = 'h';
3789	7	50				69	if ( length($here_doc_target) > 80 ) {
		50
		100
3790	0					0	my $truncated = substr( $here_doc_target, 0, 80 );
3791	0					0	$self->complain("Long here-target: '$truncated' ...\n");
3792							}
3793							elsif ( !$here_doc_target ) {
3794	0	0				0	$self->warning(
3795							'Use of bare << to mean <<"" is deprecated' . "\n" )
3796							if ( !$here_quote_character );
3797							}
3798							elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
3799	2					13	$self->complain(
3800							"Unconventional here-target: '$here_doc_target'\n");
3801							}
3802							else {
3803							## ok: nothing to complain about
3804							}
3805							}
3806							elsif ( $expecting == TERM ) {
3807	0	0				0	if ( !$saw_error ) {
3808
3809							# shouldn't happen..arriving here implies an error in
3810							# the logic in sub 'find_here_doc'
3811	0					0	if (DEVEL_MODE) {
3812							$self->Fault(<<EOM);
3813							Program bug; didn't find here doc target
3814							EOM
3815							}
3816							$self->warning(
3817	0					0	"Possible program error: didn't find here doc target\n"
3818							);
3819	0					0	$self->report_definite_bug();
3820							}
3821							}
3822
3823							# target not found, expecting == UNKNOWN
3824							else {
3825							# assume it is a shift
3826							}
3827							}
3828							else {
3829							}
3830	7					20	return;
3831							} ## end sub do_LEFT_SHIFT
3832
3833							sub do_NEW_HERE_DOC {
3834
3835							# '<<~' = a here-doc, new type added in v26
3836
3837	2			2	0	9	my $self = shift;
3838
3839							return
3840	2	50				10	if ( $i >= $max_token_index ); # here-doc not possible if end of line
3841	2	50				11	if ( $expecting != OPERATOR ) {
3842	2					8	my ( $found_target, $here_doc_target, $here_quote_character,
3843							$saw_error );
3844							(
3845	2					14	$found_target, $here_doc_target, $here_quote_character, $i,
3846							$saw_error
3847							)
3848							= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
3849							$max_token_index );
3850
3851	2	50				9	if ($found_target) {
		0
3852
3853	2	50				18	if ( length($here_doc_target) > 80 ) {
		50
3854	0					0	my $truncated = substr( $here_doc_target, 0, 80 );
3855	0					0	$self->complain("Long here-target: '$truncated' ...\n");
3856							}
3857							elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
3858	0					0	$self->complain(
3859							"Unconventional here-target: '$here_doc_target'\n");
3860							}
3861							else {
3862							## ok: nothing to complain about
3863							}
3864
3865							# Note that we put a leading space on the here quote
3866							# character indicate that it may be preceded by spaces
3867	2					8	$here_quote_character = SPACE . $here_quote_character;
3868	2					5	push @{$rhere_target_list},
	2					9
3869							[ $here_doc_target, $here_quote_character ];
3870	2					5	$type = 'h';
3871							}
3872
3873							# target not found ..
3874							elsif ( $expecting == TERM ) {
3875	0	0				0	if ( !$saw_error ) {
3876
3877							# shouldn't happen..arriving here implies an error in
3878							# the logic in sub 'find_here_doc'
3879	0					0	if (DEVEL_MODE) {
3880							$self->Fault(<<EOM);
3881							Program bug; didn't find here doc target
3882							EOM
3883							}
3884							$self->warning(
3885	0					0	"Possible program error: didn't find here doc target\n"
3886							);
3887	0					0	$self->report_definite_bug();
3888							}
3889							}
3890
3891							# Target not found, expecting==UNKNOWN
3892							else {
3893	0					0	$self->warning("didn't find here doc target after '<<~'\n");
3894							}
3895							}
3896							else {
3897	0					0	$self->error_if_expecting_OPERATOR();
3898							}
3899	2					6	return;
3900							} ## end sub do_NEW_HERE_DOC
3901
3902							sub do_POINTER {
3903
3904							# '->'
3905	886			886	0	1568	return;
3906							}
3907
3908							sub do_PLUS_PLUS {
3909
3910	46			46	0	182	my $self = shift;
3911
3912							# '++'
3913							# type = 'pp' for pre-increment, '++' for post-increment
3914	46	100				264	if ( $expecting == OPERATOR ) { $type = '++' }
	37	100				144
3915	7					22	elsif ( $expecting == TERM ) { $type = 'pp' }
3916
3917							# handle ( $expecting == UNKNOWN )
3918							else {
3919
3920							# look ahead ..
3921	2					8	my ( $next_nonblank_token, $i_next ) =
3922							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
3923
3924							# Fix for c042: look past a side comment
3925	2	50				22	if ( $next_nonblank_token eq '#' ) {
3926	0					0	( $next_nonblank_token, $i_next ) =
3927							$self->find_next_nonblank_token( $max_token_index,
3928							$rtokens, $max_token_index );
3929							}
3930
3931	2	50				11	if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
	0					0
3932							}
3933	46					113	return;
3934							} ## end sub do_PLUS_PLUS
3935
3936							sub do_FAT_COMMA {
3937
3938	1025			1025	0	1809	my $self = shift;
3939
3940							# '=>'
3941	1025	50				2465	if ( $last_nonblank_type eq $tok ) {
3942	0					0	$self->complain("Repeated '=>'s \n");
3943							}
3944
3945							# patch for operator_expected: note if we are in the list (use.t)
3946							# TODO: make version numbers a new token type
3947	1025	100				2364	if ( $statement_type eq 'use' ) { $statement_type = '_use' }
	18					42
3948	1025					1717	return;
3949							} ## end sub do_FAT_COMMA
3950
3951							sub do_MINUS_MINUS {
3952
3953	2			2	0	5	my $self = shift;
3954
3955							# '--'
3956							# type = 'mm' for pre-decrement, '--' for post-decrement
3957
3958	2	50				15	if ( $expecting == OPERATOR ) { $type = '--' }
	0	50				0
3959	2					7	elsif ( $expecting == TERM ) { $type = 'mm' }
3960
3961							# handle ( $expecting == UNKNOWN )
3962							else {
3963
3964							# look ahead ..
3965	0					0	my ( $next_nonblank_token, $i_next ) =
3966							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
3967
3968							# Fix for c042: look past a side comment
3969	0	0				0	if ( $next_nonblank_token eq '#' ) {
3970	0					0	( $next_nonblank_token, $i_next ) =
3971							$self->find_next_nonblank_token( $max_token_index,
3972							$rtokens, $max_token_index );
3973							}
3974
3975	0	0				0	if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
	0					0
3976							}
3977
3978	2					5	return;
3979							} ## end sub do_MINUS_MINUS
3980
3981							sub do_LOGICAL_AND {
3982
3983	58			58	0	136	my $self = shift;
3984
3985							# '&&'
3986	58	50	33			247	$self->error_if_expecting_TERM()
3987							if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015
3988	58					121	return;
3989							} ## end sub do_LOGICAL_AND
3990
3991							sub do_LOGICAL_OR {
3992
3993	74			74	0	170	my $self = shift;
3994
3995							# '\|\|'
3996	74	100	66			378	$self->error_if_expecting_TERM()
3997							if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015
3998	74					133	return;
3999							} ## end sub do_LOGICAL_OR
4000
4001							sub do_SLASH_SLASH {
4002
4003	10			10	0	22	my $self = shift;
4004
4005							# '//'
4006	10	100				31	$self->error_if_expecting_TERM()
4007							if ( $expecting == TERM );
4008	10					20	return;
4009							} ## end sub do_SLASH_SLASH
4010
4011							sub do_DIGITS {
4012
4013	1934			1934	0	3195	my $self = shift;
4014
4015							# 'd' = string of digits
4016	1934	50				4255	$self->error_if_expecting_OPERATOR("Number")
4017							if ( $expecting == OPERATOR );
4018
4019	1934					5147	my $number = $self->scan_number_fast();
4020	1934	50				4490	if ( !defined($number) ) {
4021
4022							# shouldn't happen - we should always get a number
4023	0					0	if (DEVEL_MODE) {
4024							$self->Fault(<<EOM);
4025							non-number beginning with digit--program bug
4026							EOM
4027							}
4028							$self->warning(
4029	0					0	"Unexpected error condition: non-number beginning with digit\n"
4030							);
4031	0					0	$self->report_definite_bug();
4032							}
4033	1934					3108	return;
4034							} ## end sub do_DIGITS
4035
4036							sub do_ATTRIBUTE_LIST {
4037
4038	39			39	0	121	my ( $self, $next_nonblank_token ) = @_;
4039
4040							# Called at a bareword encountered while in an attribute list
4041							# returns 'is_attribute':
4042							# true if attribute found
4043							# false if an attribute (continue parsing bareword)
4044
4045							# treat bare word followed by open paren like qw(
4046	39	100				120	if ( $next_nonblank_token eq '(' ) {
4047
4048							# For something like:
4049							# : prototype($$)
4050							# we should let do_scan_sub see it so that it can see
4051							# the prototype. All other attributes get parsed as a
4052							# quoted string.
4053	18	100				85	if ( $tok eq 'prototype' ) {
4054	2					6	$id_scan_state = 'prototype';
4055
4056							# start just after the word 'prototype'
4057	2					8	my $i_beg = $i + 1;
4058	2					23	( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
4059							{
4060							input_line => $input_line,
4061							i => $i,
4062							i_beg => $i_beg,
4063							tok => $tok,
4064							type => $type,
4065							rtokens => $rtokens,
4066							rtoken_map => $rtoken_map,
4067							id_scan_state => $id_scan_state,
4068							max_token_index => $max_token_index,
4069							}
4070							);
4071
4072							# If successful, mark as type 'q' to be consistent
4073							# with other attributes. Type 'w' would also work.
4074	2	50				12	if ( $i > $i_beg ) {
4075	2					5	$type = 'q';
4076	2					10	return 1;
4077							}
4078
4079							# If not successful, continue and parse as a quote.
4080							}
4081
4082							# All other attribute lists must be parsed as quotes
4083							# (see 'signatures.t' for good examples)
4084	16					58	$in_quote = $quote_items{'q'};
4085	16					47	$allowed_quote_modifiers = $quote_modifiers{'q'};
4086	16					35	$type = 'q';
4087	16					30	$quote_type = 'q';
4088	16					40	return 1;
4089							}
4090
4091							# handle bareword not followed by open paren
4092							else {
4093	21					66	$type = 'w';
4094	21					55	return 1;
4095							}
4096
4097							# attribute not found
4098	0					0	return;
4099							} ## end sub do_ATTRIBUTE_LIST
4100
4101							sub do_QUOTED_BAREWORD {
4102
4103	786			786	0	1475	my $self = shift;
4104
4105							# find type of a bareword followed by a '=>'
4106	786	100				4092	if ( $ris_constant->{$current_package}{$tok} ) {
		50
		50
4107	14					37	$type = 'C';
4108							}
4109							elsif ( $ris_user_function->{$current_package}{$tok} ) {
4110	0					0	$type = 'U';
4111	0					0	$prototype = $ruser_function_prototype->{$current_package}{$tok};
4112							}
4113							elsif ( $tok =~ /^v\d+$/ ) {
4114	0					0	$type = 'v';
4115	0					0	$self->report_v_string($tok);
4116							}
4117							else {
4118
4119							# Bareword followed by a fat comma - see 'git18.in'
4120							# If tok is something like 'x17' then it could
4121							# actually be operator x followed by number 17.
4122							# For example, here:
4123							# 123x17 => [ 792, 1224 ],
4124							# (a key of 123 repeated 17 times, perhaps not
4125							# what was intended). We will mark x17 as type
4126							# 'n' and it will be split. If the previous token
4127							# was also a bareword then it is not very clear is
4128							# going on. In this case we will not be sure that
4129							# an operator is expected, so we just mark it as a
4130							# bareword. Perl is a little murky in what it does
4131							# with stuff like this, and its behavior can change
4132							# over time. Something like
4133							# a x18 => [792, 1224], will compile as
4134							# a key with 18 a's. But something like
4135							# push @array, a x18;
4136							# is a syntax error.
4137	772	100	66			2813	if (
			33
			66
4138							$expecting == OPERATOR
4139							&& substr( $tok, 0, 1 ) eq 'x'
4140							&& ( length($tok) == 1
4141							\|\| substr( $tok, 1, 1 ) =~ /^\d/ )
4142							)
4143							{
4144	3					6	$type = 'n';
4145	3	50				11	if ( $self->split_pretoken(1) ) {
4146	3					6	$type = 'x';
4147	3					5	$tok = 'x';
4148							}
4149							}
4150							else {
4151
4152							# git #18
4153	769					1334	$type = 'w';
4154	769					2034	$self->error_if_expecting_OPERATOR();
4155							}
4156							}
4157	786					1276	return;
4158							} ## end sub do_QUOTED_BAREWORD
4159
4160							sub do_X_OPERATOR {
4161
4162	17			17	0	41	my $self = shift;
4163
4164	17	100				73	if ( $tok eq 'x' ) {
4165	15	50				86	if ( $rtokens->[ $i + 1 ] eq '=' ) { # x=
4166	0					0	$tok = 'x=';
4167	0					0	$type = $tok;
4168	0					0	$i++;
4169							}
4170							else {
4171	15					40	$type = 'x';
4172							}
4173							}
4174							else {
4175
4176							# Split a pretoken like 'x10' into 'x' and '10'.
4177							# Note: In previous versions of perltidy it was marked
4178							# as a number, $type = 'n', and fixed downstream by the
4179							# Formatter.
4180	2					4	$type = 'n';
4181	2	50				8	if ( $self->split_pretoken(1) ) {
4182	2					5	$type = 'x';
4183	2					5	$tok = 'x';
4184							}
4185							}
4186	17					39	return;
4187							} ## end sub do_X_OPERATOR
4188
4189							sub do_USE_CONSTANT {
4190
4191	16			16	0	47	my $self = shift;
4192
4193	16					64	$self->scan_bare_identifier();
4194	16					85	my ( $next_nonblank_tok2, $i_next2 ) =
4195							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4196
4197	16	50				89	if ($next_nonblank_tok2) {
4198
4199	16	100				59	if ( $is_keyword{$next_nonblank_tok2} ) {
4200
4201							# Assume qw is used as a quote and okay, as in:
4202							# use constant qw{ DEBUG 0 };
4203							# Not worth trying to parse for just a warning
4204
4205							# NOTE: This warning is deactivated because recent
4206							# versions of perl do not complain here, but
4207							# the coding is retained for reference.
4208	1					2	if ( 0 && $next_nonblank_tok2 ne 'qw' ) {
4209							$self->warning(
4210							"Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n"
4211							);
4212							}
4213							}
4214
4215							else {
4216	15					336	$ris_constant->{$current_package}{$next_nonblank_tok2} = 1;
4217							}
4218							}
4219	16					50	return;
4220							} ## end sub do_USE_CONSTANT
4221
4222							sub do_KEYWORD {
4223
4224	2641			2641	0	4493	my $self = shift;
4225
4226							# found a keyword - set any associated flags
4227	2641					4460	$type = 'k';
4228
4229							# Since for and foreach may not be followed immediately
4230							# by an opening paren, we have to remember which keyword
4231							# is associated with the next '('
4232							# Previously, before update c230 : if ( $is_for_foreach{$tok} ) {
4233							##(if elsif unless while until for foreach switch case given when catch)
4234	2641	100				7473	if ( $is_blocktype_with_paren{$tok} ) {
4235	395	100				1381	if ( new_statement_ok() ) {
4236	309					714	$want_paren = $tok;
4237							}
4238							}
4239
4240							# recognize 'use' statements, which are special
4241	2641	100	100			18746	if ( $is_use_require{$tok} ) {
		100	66
		100
		100
		100
		100
4242	175					349	$statement_type = $tok;
4243	175	50				577	$self->error_if_expecting_OPERATOR()
4244							if ( $expecting == OPERATOR );
4245							}
4246
4247							# remember my and our to check for trailing ": shared"
4248							elsif ( $is_my_our_state{$tok} ) {
4249	628					1360	$statement_type = $tok;
4250							}
4251
4252							# Check for misplaced 'elsif' and 'else', but allow isolated
4253							# else or elsif blocks to be formatted. This is indicated
4254							# by a last noblank token of ';'
4255							elsif ( $tok eq 'elsif' ) {
4256	29	50	66			208	if (
4257							$last_nonblank_token ne ';'
4258
4259							## !~ /^(if\|elsif\|unless)$/
4260							&& !$is_if_elsif_unless{$last_nonblank_block_type}
4261							)
4262							{
4263	0					0	$self->warning(
4264							"expecting '$tok' to follow one of 'if\|elsif\|unless'\n");
4265							}
4266							}
4267							elsif ( $tok eq 'else' ) {
4268
4269							# patched for SWITCH/CASE
4270	44	50	66			646	if (
			66
4271							$last_nonblank_token ne ';'
4272
4273							## !~ /^(if\|elsif\|unless\|case\|when)$/
4274							&& !$is_if_elsif_unless_case_when{$last_nonblank_block_type}
4275
4276							# patch to avoid an unwanted error message for
4277							# the case of a parenless 'case' (RT 105484):
4278							# switch ( 1 ) { case x { 2 } else { } }
4279							## !~ /^(if\|elsif\|unless\|case\|when)$/
4280							&& !$is_if_elsif_unless_case_when{$statement_type}
4281							)
4282							{
4283	0					0	$self->warning(
4284							"expecting '$tok' to follow one of 'if\|elsif\|unless\|case\|when'\n"
4285							);
4286							}
4287							}
4288
4289							# patch for SWITCH/CASE if 'case' and 'when are
4290							# treated as keywords. Also 'default' for Switch::Plain
4291							elsif ($tok eq 'when'
4292							\|\| $tok eq 'case'
4293							\|\| $tok eq 'default' )
4294							{
4295	56					115	$statement_type = $tok; # next '{' is block
4296							}
4297
4298							# feature 'err' was removed in Perl 5.10. So mark this as
4299							# a bareword unless an operator is expected (see c158).
4300							elsif ( $tok eq 'err' ) {
4301	1	50				6	if ( $expecting != OPERATOR ) { $type = 'w' }
	1					2
4302							}
4303							else {
4304							## no special treatment needed
4305							}
4306
4307	2641					4748	return;
4308							} ## end sub do_KEYWORD
4309
4310							sub do_QUOTE_OPERATOR {
4311
4312	202			202	0	471	my $self = shift;
4313
4314	202	50				724	if ( $expecting == OPERATOR ) {
4315
4316							# Be careful not to call an error for a qw quote
4317							# where a parenthesized list is allowed. For example,
4318							# it could also be a for/foreach construct such as
4319							#
4320							# foreach my $key qw\Uno Due Tres Quadro\ {
4321							# print "Set $key\n";
4322							# }
4323							#
4324
4325							# Or it could be a function call.
4326							# NOTE: Braces in something like &{ xxx } are not
4327							# marked as a block, we might have a method call.
4328							# &method(...), $method->(..), &{method}(...),
4329							# $ref[2](list) is ok & short for $ref[2]->(list)
4330							#
4331							# See notes in 'sub code_block_type' and
4332							# 'sub is_non_structural_brace'
4333
4334							my $paren_list_possible = $tok eq 'qw'
4335							&& ( $last_nonblank_token =~ /^([\]\}\&]\|\-\>)/
4336	0		0			0	\|\| $is_for_foreach{$want_paren} );
4337
4338	0	0				0	if ( !$paren_list_possible ) {
4339	0					0	$self->error_if_expecting_OPERATOR();
4340							}
4341							}
4342	202					553	$in_quote = $quote_items{$tok};
4343	202					533	$allowed_quote_modifiers = $quote_modifiers{$tok};
4344
4345							# All quote types are 'Q' except possibly qw quotes.
4346							# qw quotes are special in that they may generally be trimmed
4347							# of leading and trailing whitespace. So they are given a
4348							# separate type, 'q', unless requested otherwise.
4349	202	100	66			1078	$type =
4350							( $tok eq 'qw' && $self->[_trim_qw_] )
4351							? 'q'
4352							: 'Q';
4353	202					424	$quote_type = $type;
4354	202					390	return;
4355							} ## end sub do_QUOTE_OPERATOR
4356
4357							sub do_UNKNOWN_BAREWORD {
4358
4359	957			957	0	2249	my ( $self, $next_nonblank_token ) = @_;
4360
4361	957					3352	$self->scan_bare_identifier();
4362
4363	957	100	100			3732	if ( $statement_type eq 'use'
4364							&& $last_nonblank_token eq 'use' )
4365							{
4366	108					517	$rsaw_use_module->{$current_package}->{$tok} = 1;
4367							}
4368
4369	957	100				2534	if ( $type eq 'w' ) {
4370
4371	932	50				2556	if ( $expecting == OPERATOR ) {
4372
4373							# Patch to avoid error message for RPerl overloaded
4374							# operator functions: use overload
4375							# '+' => \&sse_add,
4376							# '-' => \&sse_sub,
4377							# '*' => \&sse_mul,
4378							# '/' => \&sse_div;
4379							# TODO: this could eventually be generalized
4380	0	0	0			0	if ( $rsaw_use_module->{$current_package}->{'RPerl'}
		0
		0
4381							&& $tok =~ /^sse_(mul\|div\|add\|sub)$/ )
4382							{
4383
4384							}
4385
4386							# Fix part 1 for git #63 in which a comment falls
4387							# between an -> and the following word. An
4388							# alternate fix would be to change operator_expected
4389							# to return an UNKNOWN for this type.
4390							elsif ( $last_nonblank_type eq '->' ) {
4391
4392							}
4393
4394							# don't complain about possible indirect object
4395							# notation.
4396							# For example:
4397							# package main;
4398							# sub new($) { ... }
4399							# $b = new A::; # calls A::new
4400							# $c = new A; # same thing but suspicious
4401							# This will call A::new but we have a 'new' in
4402							# main:: which looks like a constant.
4403							#
4404							elsif ( $last_nonblank_type eq 'C' ) {
4405	0	0				0	if ( $tok !~ /::$/ ) {
4406	0					0	$self->complain(<<EOM);
4407							Expecting operator after '$last_nonblank_token' but found bare word '$tok'
4408							Maybe indirectet object notation?
4409							EOM
4410							}
4411							}
4412							else {
4413	0					0	$self->error_if_expecting_OPERATOR("bareword");
4414							}
4415							}
4416
4417							# mark bare words immediately followed by a paren as
4418							# functions
4419	932					2563	$next_tok = $rtokens->[ $i + 1 ];
4420	932	100				2623	if ( $next_tok eq '(' ) {
4421
4422							# Patch for issue c151, where we are processing a snippet and
4423							# have not seen that SPACE is a constant. In this case 'x' is
4424							# probably an operator. The only disadvantage with an incorrect
4425							# guess is that the space after it may be incorrect. For example
4426							# $str .= SPACE x ( 16 - length($str) ); See also b1410.
4427	276	50	33			1554	if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' }
	0	50				0
4428
4429							# Fix part 2 for git #63. Leave type as 'w' to keep
4430							# the type the same as if the -> were not separated
4431	276					580	elsif ( $last_nonblank_type ne '->' ) { $type = 'U' }
4432
4433							# not a special case
4434							else { }
4435
4436							}
4437
4438							# underscore after file test operator is file handle
4439	932	50	66			2723	if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
4440	0					0	$type = 'Z';
4441							}
4442
4443							# patch for SWITCH/CASE if 'case' and 'when are
4444							# not treated as keywords:
4445	932	50	33			4849	if (
			33
			33
4446							( $tok eq 'case' && $rbrace_type->[$brace_depth] eq 'switch' )
4447							\|\| ( $tok eq 'when'
4448							&& $rbrace_type->[$brace_depth] eq 'given' )
4449							)
4450							{
4451	0					0	$statement_type = $tok; # next '{' is block
4452	0					0	$type = 'k'; # for keyword syntax coloring
4453							}
4454	932	100				2708	if ( $next_nonblank_token eq '(' ) {
4455
4456							# patch for SWITCH/CASE if switch and given not keywords
4457							# Switch is not a perl 5 keyword, but we will gamble
4458							# and mark switch followed by paren as a keyword. This
4459							# is only necessary to get html syntax coloring nice,
4460							# and does not commit this as being a switch/case.
4461	241	50	33			1877	if ( $tok eq 'switch' \|\| $tok eq 'given' ) {
		50	33
4462	0					0	$type = 'k'; # for keyword syntax coloring
4463							}
4464
4465							# mark 'x' as operator for something like this (see b1410)
4466							# my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths );
4467							elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) {
4468	0					0	$type = 'x';
4469							}
4470							else {
4471							## not a special case
4472							}
4473							}
4474							}
4475	957					1795	return;
4476							} ## end sub do_UNKNOWN_BAREWORD
4477
4478							sub sub_attribute_ok_here {
4479
4480	35			35	0	142	my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_;
4481
4482							# Decide if 'sub :' can be the start of a sub attribute list.
4483							# We will decide based on if the colon is followed by a
4484							# bareword which is not a keyword.
4485							# Changed inext+1 to inext to fixed case b1190.
4486	35					78	my $sub_attribute_ok_here;
4487	35	50	66			197	if ( $is_sub{$tok_kw}
			66
4488							&& $expecting != OPERATOR
4489							&& $next_nonblank_token eq ':' )
4490							{
4491	3					17	my ( $nn_nonblank_token, $i_nn ) =
4492							$self->find_next_nonblank_token( $i_next, $rtokens,
4493							$max_token_index );
4494							$sub_attribute_ok_here =
4495							$nn_nonblank_token =~ /^\w/
4496							&& $nn_nonblank_token !~ /^\d/
4497	3		66			50	&& !$is_keyword{$nn_nonblank_token};
4498							}
4499	35					222	return $sub_attribute_ok_here;
4500							} ## end sub sub_attribute_ok_here
4501
4502							sub do_BAREWORD {
4503
4504	5832			5832	0	10830	my ( $self, $is_END_or_DATA ) = @_;
4505
4506							# handle a bareword token:
4507							# returns
4508							# true if this token ends the current line
4509							# false otherwise
4510
4511	5832					16261	my ( $next_nonblank_token, $i_next ) =
4512							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4513
4514							# a bare word immediately followed by :: is not a keyword;
4515							# use $tok_kw when testing for keywords to avoid a mistake
4516	5832					11025	my $tok_kw = $tok;
4517	5832	100	100			17319	if ( $rtokens->[ $i + 1 ] eq ':'
4518							&& $rtokens->[ $i + 2 ] eq ':' )
4519							{
4520	266					617	$tok_kw .= '::';
4521							}
4522
4523	5832	100				13183	if ( $self->[_in_attribute_list_] ) {
4524	39					206	my $is_attribute = $self->do_ATTRIBUTE_LIST($next_nonblank_token);
4525	39	50				148	return if ($is_attribute);
4526							}
4527
4528							#----------------------------------------
4529							# Starting final if-elsif- chain of tests
4530							#----------------------------------------
4531
4532							# This is the return flag:
4533							# true => this is the last token on the line
4534							# false => keep tokenizing the line
4535	5793					8750	my $is_last;
4536
4537							# The following blocks of code must update these vars:
4538							# $type - the final token type, must always be set
4539
4540							# In addition, if additional pretokens are added:
4541							# $tok - the final token
4542							# $i - the index of the last pretoken
4543
4544							# They may also need to check and set various flags
4545
4546							# Scan a bare word following a -> as an identifier; it could
4547							# have a long package name. Fixes c037, c041.
4548	5793	100	100			94991	if ( $last_nonblank_token eq '->' ) {
		100	100
		100	66
		100	66
		100	100
		100	66
		100	66
		50	100
		50	100
		100	33
		100	0
		100	0
		100	33
		100	0
		100	0
		100	66
		100	100
		100	100
			100
			66
			66
			66
4549	669					2500	$self->scan_bare_identifier();
4550
4551							# a bareward after '->' gets type 'i'
4552	669					1243	$type = 'i';
4553							}
4554
4555							# Quote a word followed by => operator
4556							# unless the word __END__ or __DATA__ and the only word on
4557							# the line.
4558							elsif ( !$is_END_or_DATA
4559							&& $next_nonblank_token eq '='
4560							&& $rtokens->[ $i_next + 1 ] eq '>' )
4561							{
4562	786					2500	$self->do_QUOTED_BAREWORD();
4563							}
4564
4565							# quote a bare word within braces..like xxx->{s}; note that we
4566							# must be sure this is not a structural brace, to avoid
4567							# mistaking {s} in the following for a quoted bare word:
4568							# for(@[){s}bla}BLA}
4569							# Also treat q in something like var{-q} as a bare word, not
4570							# a quote operator
4571							elsif (
4572							$next_nonblank_token eq '}'
4573							&& (
4574							$last_nonblank_type eq 'L'
4575							\|\| ( $last_nonblank_type eq 'm'
4576							&& $last_last_nonblank_type eq 'L' )
4577							)
4578							)
4579							{
4580	100					263	$type = 'w';
4581							}
4582
4583							# handle operator x (now we know it isn't $x=)
4584							elsif (
4585							$expecting == OPERATOR
4586							&& substr( $tok, 0, 1 ) eq 'x'
4587							&& ( length($tok) == 1
4588							\|\| substr( $tok, 1, 1 ) =~ /^\d/ )
4589							)
4590							{
4591	17					137	$self->do_X_OPERATOR();
4592							}
4593							elsif ( $tok_kw eq 'CORE::' ) {
4594	3					9	$type = $tok = $tok_kw;
4595	3					6	$i += 2;
4596							}
4597							elsif ( ( $tok eq 'strict' )
4598							and ( $last_nonblank_token eq 'use' ) )
4599							{
4600	14					40	$self->[_saw_use_strict_] = 1;
4601	14					83	$self->scan_bare_identifier();
4602							}
4603
4604							elsif ( ( $tok eq 'warnings' )
4605							and ( $last_nonblank_token eq 'use' ) )
4606							{
4607	7					22	$self->[_saw_perl_dash_w_] = 1;
4608
4609							# scan as identifier, so that we pick up something like:
4610							# use warnings::register
4611	7					28	$self->scan_bare_identifier();
4612							}
4613
4614							elsif (
4615							$tok eq 'AutoLoader'
4616							&& $self->[_look_for_autoloader_]
4617							&& (
4618							$last_nonblank_token eq 'use'
4619
4620							# these regexes are from AutoSplit.pm, which we want
4621							# to mimic
4622							\|\| $input_line =~ /^\s*(use\|require)\s+AutoLoader\b/
4623							\|\| $input_line =~ /\bISA\s=.\bAutoLoader\b/
4624							)
4625							)
4626							{
4627	0					0	$self->write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
4628	0					0	$self->[_saw_autoloader_] = 1;
4629	0					0	$self->[_look_for_autoloader_] = 0;
4630	0					0	$self->scan_bare_identifier();
4631							}
4632
4633							elsif (
4634							$tok eq 'SelfLoader'
4635							&& $self->[_look_for_selfloader_]
4636							&& ( $last_nonblank_token eq 'use'
4637							\|\| $input_line =~ /^\s*(use\|require)\s+SelfLoader\b/
4638							\|\| $input_line =~ /\bISA\s=.\bSelfLoader\b/ )
4639							)
4640							{
4641	0					0	$self->write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
4642	0					0	$self->[_saw_selfloader_] = 1;
4643	0					0	$self->[_look_for_selfloader_] = 0;
4644	0					0	$self->scan_bare_identifier();
4645							}
4646
4647							elsif ( ( $tok eq 'constant' )
4648							and ( $last_nonblank_token eq 'use' ) )
4649							{
4650	16					88	$self->do_USE_CONSTANT();
4651							}
4652
4653							# various quote operators
4654							elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
4655	202					993	$self->do_QUOTE_OPERATOR();
4656							}
4657
4658							# check for a statement label
4659							elsif (
4660							( $next_nonblank_token eq ':' )
4661							&& ( $rtokens->[ $i_next + 1 ] ne ':' )
4662							&& ( $i_next <= $max_token_index ) # colon on same line
4663
4664							# like 'sub : lvalue' ?
4665							&& !$self->sub_attribute_ok_here( $tok_kw, $next_nonblank_token,
4666							$i_next )
4667							&& new_statement_ok()
4668							)
4669							{
4670	33	100				181	if ( $tok !~ /[A-Z]/ ) {
4671	15					32	push @{ $self->[_rlower_case_labels_at_] }, $input_line_number;
	15					58
4672							}
4673	33					117	$type = 'J';
4674	33					104	$tok .= ':';
4675	33					68	$i = $i_next;
4676							}
4677
4678							# 'sub' or other sub alias
4679							elsif ( $is_sub{$tok_kw} ) {
4680
4681							# Update for --use-feature=class (rt145706):
4682							# We have to be extra careful to avoid misparsing other uses of
4683							# 'method' in older scripts.
4684	302	100	100			1459	if ( $tok_kw eq 'method' && $guess_if_method ) {
4685	10	100	66			102	if ( $expecting == OPERATOR
			100
4686							\|\| $next_nonblank_token !~ /^[\w\:]/
4687							\|\| !$self->method_ok_here() )
4688							{
4689	7					28	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
4690							}
4691							else {
4692	3					182	initialize_subname();
4693	3					18	$self->scan_id();
4694							}
4695							}
4696							else {
4697	292	50				895	$self->error_if_expecting_OPERATOR()
4698							if ( $expecting == OPERATOR );
4699	292					1143	initialize_subname();
4700	292					1063	$self->scan_id();
4701							}
4702							}
4703
4704							# 'package'
4705							elsif ( $is_package{$tok_kw} ) {
4706
4707							# Update for --use-feature=class (rt145706):
4708							# We have to be extra careful because 'class' may be used for other
4709							# purposes on older code; i.e.
4710							# class($x) - valid sub call
4711							# package($x) - error
4712	30	100				95	if ( $tok_kw eq 'class' ) {
4713	8	100	66			68	if ( $expecting == OPERATOR
			100
4714							\|\| $next_nonblank_token !~ /^[\w\:]/
4715							\|\| !$self->class_ok_here() )
4716							{
4717	4					16	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
4718							}
4719	4					12	else { $self->scan_id() }
4720							}
4721							else {
4722	22	50				65	$self->error_if_expecting_OPERATOR()
4723							if ( $expecting == OPERATOR );
4724	22					86	$self->scan_id();
4725							}
4726							}
4727
4728							# Fix for c035: split 'format' from 'is_format_END_DATA' to be
4729							# more restrictive. Require a new statement to be ok here.
4730							elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
4731	1					3	$type = ';'; # make tokenizer look for TERM next
4732	1					4	$self->[_in_format_] = 1;
4733	1					2	$is_last = 1; ## is last token on this line
4734							}
4735
4736							# Note on token types for format, __DATA__, __END__:
4737							# It simplifies things to give these type ';', so that when we
4738							# start rescanning we will be expecting a token of type TERM.
4739							# We will switch to type 'k' before outputting the tokens.
4740							elsif ( $is_END_DATA{$tok_kw} ) {
4741	7					28	$type = ';'; # make tokenizer look for TERM next
4742
4743							# Remember that we are in one of these three sections
4744	7					26	$self->[ $is_END_DATA{$tok_kw} ] = 1;
4745	7					12	$is_last = 1; ## is last token on this line
4746							}
4747
4748							elsif ( $is_keyword{$tok_kw} ) {
4749	2641					8749	$self->do_KEYWORD();
4750							}
4751
4752							# check for inline label following
4753							# /^(redo\|last\|next\|goto)$/
4754							elsif (( $last_nonblank_type eq 'k' )
4755							&& ( $is_redo_last_next_goto{$last_nonblank_token} ) )
4756							{
4757	19					54	$type = 'j';
4758							}
4759
4760							# something else --
4761							else {
4762	946					3254	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
4763							}
4764
4765	5793					11682	return $is_last;
4766
4767							} ## end sub do_BAREWORD
4768
4769							sub do_FOLLOW_QUOTE {
4770
4771	2768			2768	0	4389	my $self = shift;
4772
4773							# Continue following a quote on a new line
4774	2768					4571	$type = $quote_type;
4775
4776	2768	100				4066	if ( !@{$routput_token_list} ) { # initialize if continuation line
	2768					7093
4777	184					384	push( @{$routput_token_list}, $i );
	184					398
4778	184					475	$routput_token_type->[$i] = $type;
4779
4780							}
4781
4782							# scan for the end of the quote or pattern
4783							(
4784	2768					8854	$i,
4785							$in_quote,
4786							$quote_character,
4787							$quote_pos,
4788							$quote_depth,
4789							$quoted_string_1,
4790							$quoted_string_2,
4791
4792							) = $self->do_quote(
4793
4794							$i,
4795							$in_quote,
4796							$quote_character,
4797							$quote_pos,
4798							$quote_depth,
4799							$quoted_string_1,
4800							$quoted_string_2,
4801							$rtokens,
4802							$rtoken_map,
4803							$max_token_index,
4804
4805							);
4806
4807							# all done if we didn't find it
4808	2768	100				6930	if ($in_quote) { return }
	183					398
4809
4810							# save pattern and replacement text for rescanning
4811	2585					4350	my $qs1 = $quoted_string_1;
4812
4813							# re-initialize for next search
4814	2585					4052	$quote_character = EMPTY_STRING;
4815	2585					3704	$quote_pos = 0;
4816	2585					3974	$quote_type = 'Q';
4817	2585					3831	$quoted_string_1 = EMPTY_STRING;
4818	2585					3911	$quoted_string_2 = EMPTY_STRING;
4819	2585	100				6397	if ( ++$i > $max_token_index ) { return }
	116					376
4820
4821							# look for any modifiers
4822	2469	100				5335	if ($allowed_quote_modifiers) {
4823
4824							# check for exact quote modifiers
4825	144	100				750	if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
4826	30					63	my $str = $rtokens->[$i];
4827	30					55	my $saw_modifier_e;
4828	30					521	while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
4829	47					110	my $pos = pos($str);
4830	47					111	my $char = substr( $str, $pos - 1, 1 );
4831	47		66			301	$saw_modifier_e \|\|= ( $char eq 'e' );
4832							}
4833
4834							# For an 'e' quote modifier we must scan the replacement
4835							# text for here-doc targets...
4836							# but if the modifier starts a new line we can skip
4837							# this because either the here doc will be fully
4838							# contained in the replacement text (so we can
4839							# ignore it) or Perl will not find it.
4840							# See test 'here2.in'.
4841	30	50	66			160	if ( $saw_modifier_e && $i_tok >= 0 ) {
4842
4843	0					0	my $rht = $self->scan_replacement_text($qs1);
4844
4845							# Change type from 'Q' to 'h' for quotes with
4846							# here-doc targets so that the formatter (see sub
4847							# process_line_of_CODE) will not make any line
4848							# breaks after this point.
4849	0	0				0	if ($rht) {
4850	0					0	push @{$rhere_target_list}, @{$rht};
	0					0
	0					0
4851	0					0	$type = 'h';
4852	0	0				0	if ( $i_tok < 0 ) {
4853	0					0	my $ilast = $routput_token_list->[-1];
4854	0					0	$routput_token_type->[$ilast] = $type;
4855							}
4856							}
4857							}
4858
4859	30	50				110	if ( defined( pos($str) ) ) {
4860
4861							# matched
4862	30	50				97	if ( pos($str) == length($str) ) {
4863	30	50				128	if ( ++$i > $max_token_index ) { return }
	0					0
4864							}
4865
4866							# Looks like a joined quote modifier
4867							# and keyword, maybe something like
4868							# s/xxx/yyy/gefor @k=...
4869							# Example is "galgen.pl". Would have to split
4870							# the word and insert a new token in the
4871							# pre-token list. This is so rare that I haven't
4872							# done it. Will just issue a warning citation.
4873
4874							# This error might also be triggered if my quote
4875							# modifier characters are incomplete
4876							else {
4877	0					0	$self->warning(<<EOM);
4878
4879							Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
4880							Please put a space between quote modifiers and trailing keywords.
4881							EOM
4882
4883							# print "token $rtokens->[$i]\n";
4884							# my $num = length($str) - pos($str);
4885							# $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
4886							# print "continuing with new token $rtokens->[$i]\n";
4887
4888							# skipping past this token does least damage
4889	0	0				0	if ( ++$i > $max_token_index ) { return }
	0					0
4890							}
4891							}
4892							else {
4893
4894							# example file: rokicki4.pl
4895							# This error might also be triggered if my quote
4896							# modifier characters are incomplete
4897	0					0	$self->write_logfile_entry(
4898							"Note: found word $str at quote modifier location\n");
4899							}
4900							}
4901
4902							# re-initialize
4903	144					298	$allowed_quote_modifiers = EMPTY_STRING;
4904							}
4905	2469					4483	return;
4906							} ## end sub do_FOLLOW_QUOTE
4907
4908							# ------------------------------------------------------------
4909							# begin hash of code for handling most token types
4910							# ------------------------------------------------------------
4911							my $tokenization_code = {
4912
4913							'>' => \&do_GREATER_THAN_SIGN,
4914							'\|' => \&do_VERTICAL_LINE,
4915							'$' => \&do_DOLLAR_SIGN,
4916							'(' => \&do_LEFT_PARENTHESIS,
4917							')' => \&do_RIGHT_PARENTHESIS,
4918							',' => \&do_COMMA,
4919							';' => \&do_SEMICOLON,
4920							'"' => \&do_QUOTATION_MARK,
4921							"'" => \&do_APOSTROPHE,
4922							'`' => \&do_BACKTICK,
4923							'/' => \&do_SLASH,
4924							'{' => \&do_LEFT_CURLY_BRACKET,
4925							'}' => \&do_RIGHT_CURLY_BRACKET,
4926							'&' => \&do_AMPERSAND,
4927							'<' => \&do_LESS_THAN_SIGN,
4928							'?' => \&do_QUESTION_MARK,
4929							'*' => \&do_STAR,
4930							'.' => \&do_DOT,
4931							':' => \&do_COLON,
4932							'+' => \&do_PLUS_SIGN,
4933							'@' => \&do_AT_SIGN,
4934							'%' => \&do_PERCENT_SIGN,
4935							'[' => \&do_LEFT_SQUARE_BRACKET,
4936							']' => \&do_RIGHT_SQUARE_BRACKET,
4937							'-' => \&do_MINUS_SIGN,
4938							'^' => \&do_CARAT_SIGN,
4939							'::' => \&do_DOUBLE_COLON,
4940							'<<' => \&do_LEFT_SHIFT,
4941							'<<~' => \&do_NEW_HERE_DOC,
4942							'->' => \&do_POINTER,
4943							'++' => \&do_PLUS_PLUS,
4944							'=>' => \&do_FAT_COMMA,
4945							'--' => \&do_MINUS_MINUS,
4946							'&&' => \&do_LOGICAL_AND,
4947							'\|\|' => \&do_LOGICAL_OR,
4948							'//' => \&do_SLASH_SLASH,
4949
4950							# No special code for these types yet, but syntax checks
4951							# could be added.
4952							## '!' => undef,
4953							## '!=' => undef,
4954							## '!~' => undef,
4955							## '%=' => undef,
4956							## '&&=' => undef,
4957							## '&=' => undef,
4958							## '+=' => undef,
4959							## '-=' => undef,
4960							## '..' => undef,
4961							## '..' => undef,
4962							## '...' => undef,
4963							## '.=' => undef,
4964							## '<<=' => undef,
4965							## '<=' => undef,
4966							## '<=>' => undef,
4967							## '<>' => undef,
4968							## '=' => undef,
4969							## '==' => undef,
4970							## '=~' => undef,
4971							## '>=' => undef,
4972							## '>>' => undef,
4973							## '>>=' => undef,
4974							## '\\' => undef,
4975							## '^=' => undef,
4976							## '\|=' => undef,
4977							## '\|\|=' => undef,
4978							## '//=' => undef,
4979							## '~' => undef,
4980							## '~~' => undef,
4981							## '!~~' => undef,
4982
4983							};
4984
4985							# ------------------------------------------------------------
4986							# end hash of code for handling individual token types
4987							# ------------------------------------------------------------
4988
4989	39			39		412	use constant DEBUG_TOKENIZE => 0;
	39					157
	39					134030
4990
4991							sub tokenize_this_line {
4992
4993							# This routine breaks a line of perl code into tokens which are of use in
4994							# indentation and reformatting. One of my goals has been to define tokens
4995							# such that a newline may be inserted between any pair of tokens without
4996							# changing or invalidating the program. This version comes close to this,
4997							# although there are necessarily a few exceptions which must be caught by
4998							# the formatter. Many of these involve the treatment of bare words.
4999							#
5000							# The tokens and their types are returned in arrays. See previous
5001							# routine for their names.
5002							#
5003							# See also the array "valid_token_types" in the BEGIN section for an
5004							# up-to-date list.
5005							#
5006							# To simplify things, token types are either a single character, or they
5007							# are identical to the tokens themselves.
5008							#
5009							# As a debugging aid, the -D flag creates a file containing a side-by-side
5010							# comparison of the input string and its tokenization for each line of a file.
5011							# This is an invaluable debugging aid.
5012							#
5013							# In addition to tokens, and some associated quantities, the tokenizer
5014							# also returns flags indication any special line types. These include
5015							# quotes, here_docs, formats.
5016							#
5017							# -----------------------------------------------------------------------
5018							#
5019							# How to add NEW_TOKENS:
5020							#
5021							# New token types will undoubtedly be needed in the future both to keep up
5022							# with changes in perl and to help adapt the tokenizer to other applications.
5023							#
5024							# Here are some notes on the minimal steps. I wrote these notes while
5025							# adding the 'v' token type for v-strings, which are things like version
5026							# numbers 5.6.0, and ip addresses, and will use that as an example. ( You
5027							# can use your editor to search for the string "NEW_TOKENS" to find the
5028							# appropriate sections to change):
5029							#
5030							# *. Try to talk somebody else into doing it! If not, ..
5031							#
5032							# *. Make a backup of your current version in case things don't work out!
5033							#
5034							# *. Think of a new, unused character for the token type, and add to
5035							# the array @valid_token_types in the BEGIN section of this package.
5036							# For example, I used 'v' for v-strings.
5037							#
5038							# *. Implement coding to recognize the $type of the token in this routine.
5039							# This is the hardest part, and is best done by imitating or modifying
5040							# some of the existing coding. For example, to recognize v-strings, I
5041							# patched 'sub scan_bare_identifier' to recognize v-strings beginning with
5042							# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
5043							#
5044							# *. Update sub operator_expected. This update is critically important but
5045							# the coding is trivial. Look at the comments in that routine for help.
5046							# For v-strings, which should behave like numbers, I just added 'v' to the
5047							# regex used to handle numbers and strings (types 'n' and 'Q').
5048							#
5049							# *. Implement a 'bond strength' rule in sub set_bond_strengths in
5050							# Perl::Tidy::Formatter for breaking lines around this token type. You can
5051							# skip this step and take the default at first, then adjust later to get
5052							# desired results. For adding type 'v', I looked at sub bond_strength and
5053							# saw that number type 'n' was using default strengths, so I didn't do
5054							# anything. I may tune it up someday if I don't like the way line
5055							# breaks with v-strings look.
5056							#
5057							# *. Implement a 'whitespace' rule in sub set_whitespace_flags in
5058							# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
5059							# and saw that type 'n' used spaces on both sides, so I just added 'v'
5060							# to the array @spaces_both_sides.
5061							#
5062							# *. Update HtmlWriter package so that users can colorize the token as
5063							# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
5064							# that package. For v-strings, I initially chose to use a default color
5065							# equal to the default for numbers, but it might be nice to change that
5066							# eventually.
5067							#
5068							# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
5069							#
5070							# *. Run lots and lots of debug tests. Start with special files designed
5071							# to test the new token type. Run with the -D flag to create a .DEBUG
5072							# file which shows the tokenization. When these work ok, test as many old
5073							# scripts as possible. Start with all of the '.t' files in the 'test'
5074							# directory of the distribution file. Compare .tdy output with previous
5075							# version and updated version to see the differences. Then include as
5076							# many more files as possible. My own technique has been to collect a huge
5077							# number of perl scripts (thousands!) into one directory and run perltidy
5078							# *, then run diff between the output of the previous version and the
5079							# current version.
5080							#
5081							# *. For another example, search for the smartmatch operator '~~'
5082							# with your editor to see where updates were made for it.
5083							#
5084							# -----------------------------------------------------------------------
5085
5086	7510			7510	0	15198	my ( $self, $line_of_tokens ) = @_;
5087	7510					14684	my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
5088
5089							# Extract line number for use in error messages
5090	7510					12413	$input_line_number = $line_of_tokens->{_line_number};
5091
5092							# Check for pod documentation
5093	7510	100	66			20124	if ( substr( $untrimmed_input_line, 0, 1 ) eq '='
5094							&& $untrimmed_input_line =~ /^=[A-Za-z_]/ )
5095							{
5096
5097							# Must not be in multi-line quote
5098							# and must not be in an equation
5099	14					38	my $blank_after_Z = 1;
5100	14	50	33			109	if (
5101							!$in_quote
5102							&& ( $self->operator_expected( '=', 'b', $blank_after_Z ) ==
5103							TERM )
5104							)
5105							{
5106	14					38	$self->[_in_pod_] = 1;
5107	14					34	return;
5108							}
5109							}
5110
5111	7496					13911	$input_line = $untrimmed_input_line;
5112
5113	7496					13780	chomp $input_line;
5114
5115							# Set a flag to indicate if we might be at an __END__ or __DATA__ line
5116							# This will be used below to avoid quoting a bare word followed by
5117							# a fat comma.
5118	7496					10905	my $is_END_or_DATA;
5119
5120							# Reinitialize the multi-line quote flag
5121	7496	100	100			19124	if ( $in_quote && $quote_type eq 'Q' ) {
5122	47					148	$line_of_tokens->{_starting_in_quote} = 1;
5123							}
5124							else {
5125	7449					12676	$line_of_tokens->{_starting_in_quote} = 0;
5126
5127							# Trim start of this line unless we are continuing a quoted line.
5128							# Do not trim end because we might end in a quote (test: deken4.pl)
5129							# Perl::Tidy::Formatter will delete needless trailing blanks
5130	7449	100				34073	if ( !length($input_line) ) {
		100
5131
5132							# line is empty
5133							}
5134							elsif ( $input_line =~ m/\S/g ) {
5135
5136							# There are $spaces blank characters before a nonblank character
5137	6643					13170	my $spaces = pos($input_line) - 1;
5138	6643	100				15364	if ( $spaces > 0 ) {
5139
5140							# Trim the leading spaces
5141	3541					9010	$input_line = substr( $input_line, $spaces );
5142
5143							# Find actual space count if there are leading tabs
5144	3541	100	66			12825	if (
5145							ord( substr( $untrimmed_input_line, 0, 1 ) ) == ORD_TAB
5146							&& $untrimmed_input_line =~ /^(\t+)/ )
5147							{
5148	213					455	my $tabsize = $self->[_tabsize_];
5149	213					605	$spaces += length($1) * ( $tabsize - 1 );
5150							}
5151
5152							# Calculate a guessed level for nonblank lines to avoid
5153							# calls to sub guess_old_indentation_level()
5154	3541					7194	my $indent_columns = $self->[_indent_columns_];
5155							$line_of_tokens->{_guessed_indentation_level} =
5156	3541					10218	int( $spaces / $indent_columns );
5157							}
5158							}
5159							else {
5160
5161							# line has all blank characters
5162	9					37	$input_line = EMPTY_STRING;
5163							}
5164
5165							}
5166
5167	7496	100				16380	if ( !$in_quote ) {
5168
5169							# Optimize handling of a blank line
5170	7312	100				16299	if ( !length($input_line) ) {
5171	806					2275	$line_of_tokens->{_line_type} = 'CODE';
5172	806					1835	$line_of_tokens->{_rtokens} = [];
5173	806					2061	$line_of_tokens->{_rtoken_type} = [];
5174	806					1774	$line_of_tokens->{_rlevels} = [];
5175	806					1679	$line_of_tokens->{_rci_levels} = [];
5176	806					1670	$line_of_tokens->{_rblock_type} = [];
5177	806					1703	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5178	806					2236	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
5179	806					1694	return;
5180							}
5181
5182							# Check comments
5183	6506	100				15435	if ( substr( $input_line, 0, 1 ) eq '#' ) {
5184
5185							# and check for skipped section
5186	788	50	66			4623	if (
			66
			66
5187							(
5188							substr( $input_line, 0, 4 ) eq '#<<V'
5189							\|\| $rOpts_code_skipping_begin
5190							)
5191							&& $rOpts_code_skipping
5192							&& $input_line =~ /$code_skipping_pattern_begin/
5193							)
5194							{
5195	2					9	$self->[_in_skipped_] = $self->[_last_line_number_];
5196	2					6	return;
5197							}
5198
5199							# Optional fast processing of a block comment
5200	786					1766	$line_of_tokens->{_line_type} = 'CODE';
5201	786					3294	$line_of_tokens->{_rtokens} = [$input_line];
5202	786					2248	$line_of_tokens->{_rtoken_type} = ['#'];
5203	786					2108	$line_of_tokens->{_rlevels} = [$level_in_tokenizer];
5204	786					1847	$line_of_tokens->{_rci_levels} = [0];
5205	786					2066	$line_of_tokens->{_rblock_type} = [EMPTY_STRING];
5206	786					1798	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5207	786					2612	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
5208	786					1771	return;
5209							}
5210
5211							# Look for __END__ or __DATA__ lines
5212	5718	100	100			16018	if ( substr( $input_line, 0, 1 ) eq '_'
5213							&& $input_line =~ /^__(END\|DATA)__\s*$/ )
5214							{
5215	7					18	$is_END_or_DATA = 1;
5216							}
5217							}
5218
5219							# update the copy of the line for use in error messages
5220							# This must be exactly what we give the pre_tokenizer
5221	5902					10954	$self->[_line_of_text_] = $input_line;
5222
5223							# re-initialize for the main loop
5224	5902					14186	$routput_token_list = []; # stack of output token indexes
5225	5902					17994	$routput_token_type = []; # token types
5226	5902					16534	$routput_block_type = []; # types of code block
5227	5902					15616	$routput_container_type = []; # paren types, such as if, elsif, ..
5228	5902					14729	$routput_type_sequence = []; # nesting sequential number
5229
5230	5902					9635	$rhere_target_list = [];
5231
5232	5902					9766	$tok = $last_nonblank_token;
5233	5902					8996	$type = $last_nonblank_type;
5234	5902					9079	$prototype = $last_nonblank_prototype;
5235	5902					8845	$last_nonblank_i = -1;
5236	5902					8940	$block_type = $last_nonblank_block_type;
5237	5902					8807	$container_type = $last_nonblank_container_type;
5238	5902					8618	$type_sequence = $last_nonblank_type_sequence;
5239	5902					8371	$indent_flag = 0;
5240	5902					8496	$peeked_ahead = 0;
5241
5242	5902					18210	$self->tokenizer_main_loop($is_END_or_DATA);
5243
5244							#-----------------------------------------------
5245							# all done tokenizing this line ...
5246							# now prepare the final list of tokens and types
5247							#-----------------------------------------------
5248	5902					18260	$self->tokenizer_wrapup_line($line_of_tokens);
5249
5250	5902					10832	return;
5251							} ## end sub tokenize_this_line
5252
5253							sub tokenizer_main_loop {
5254
5255	5902			5902	0	11845	my ( $self, $is_END_or_DATA ) = @_;
5256
5257							#---------------------------------
5258							# Break one input line into tokens
5259							#---------------------------------
5260
5261							# Input parameter:
5262							# $is_END_or_DATA is true for a __END__ or __DATA__ line
5263
5264							# start by breaking the line into pre-tokens
5265	5902					14524	( $rtokens, $rtoken_map, $rtoken_type ) = pre_tokenize($input_line);
5266
5267	5902					24026	$max_token_index = scalar( @{$rtokens} ) - 1;
	5902					10827
5268	5902					8960	push( @{$rtokens}, SPACE, SPACE, SPACE )
	5902					14800
5269							; # extra whitespace simplifies logic
5270	5902					8787	push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced
	5902					12660
5271	5902					8480	push( @{$rtoken_type}, 'b', 'b', 'b' );
	5902					12400
5272
5273							# initialize for main loop
5274	5902					8449	if (0) { #<<< this is not necessary
5275							foreach my $ii ( 0 .. $max_token_index + 3 ) {
5276							$routput_token_type->[$ii] = EMPTY_STRING;
5277							$routput_block_type->[$ii] = EMPTY_STRING;
5278							$routput_container_type->[$ii] = EMPTY_STRING;
5279							$routput_type_sequence->[$ii] = EMPTY_STRING;
5280							$routput_indent_flag->[$ii] = 0;
5281							}
5282							}
5283
5284	5902					8715	$i = -1;
5285	5902					8894	$i_tok = -1;
5286
5287							#-----------------------
5288							# main tokenization loop
5289							#-----------------------
5290
5291							# we are looking at each pre-token of one line and combining them
5292							# into tokens
5293	5902					13541	while ( ++$i <= $max_token_index ) {
5294
5295							# continue looking for the end of a quote
5296	50796	100				87393	if ($in_quote) {
5297	2768					9005	$self->do_FOLLOW_QUOTE();
5298	2768	100	100			11289	last if ( $in_quote \|\| $i > $max_token_index );
5299							}
5300
5301	50497	100	100			137316	if ( $type ne 'b' && $type ne 'CORE::' ) {
5302
5303							# try to catch some common errors
5304	35283	100	100			75902	if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
5305
5306	1592	100				4839	if ( $last_nonblank_token eq 'eq' ) {
		50
5307	9					59	$self->complain("Should 'eq' be '==' here ?\n");
5308							}
5309							elsif ( $last_nonblank_token eq 'ne' ) {
5310	0					0	$self->complain("Should 'ne' be '!=' here ?\n");
5311							}
5312							else {
5313							# that's all
5314							}
5315							}
5316
5317							# fix c090, only rotate vars if a new token will be stored
5318	35283	100				62955	if ( $i_tok >= 0 ) {
5319
5320	29519					44184	$last_last_nonblank_token = $last_nonblank_token;
5321	29519					40593	$last_last_nonblank_type = $last_nonblank_type;
5322
5323	29519					41735	$last_nonblank_prototype = $prototype;
5324	29519					39544	$last_nonblank_block_type = $block_type;
5325	29519					39689	$last_nonblank_container_type = $container_type;
5326	29519					39763	$last_nonblank_type_sequence = $type_sequence;
5327	29519					38589	$last_nonblank_i = $i_tok;
5328
5329							# Fix part #3 for git82: propagate type 'Z' though L-R pair
5330	29519	100	100			61846	if ( !( $type eq 'R' && $last_nonblank_type eq 'Z' ) ) {
5331	29518					40827	$last_nonblank_token = $tok;
5332	29518					40279	$last_nonblank_type = $type;
5333							}
5334							}
5335
5336							# Patch for c030: Fix things in case a '->' got separated from
5337							# the subsequent identifier by a side comment. We need the
5338							# last_nonblank_token to have a leading -> to avoid triggering
5339							# an operator expected error message at the next '('. See also
5340							# fix for git #63.
5341	35283	100				64498	if ( $last_last_nonblank_token eq '->' ) {
5342	885	100	66			5104	if ( $last_nonblank_type eq 'w'
5343							\|\| $last_nonblank_type eq 'i' )
5344							{
5345	674					1690	$last_nonblank_token = '->' . $last_nonblank_token;
5346	674					1334	$last_nonblank_type = 'i';
5347							}
5348							}
5349							}
5350
5351							# store previous token type
5352	50497	100				86378	if ( $i_tok >= 0 ) {
5353	44733					86656	$routput_token_type->[$i_tok] = $type;
5354	44733					73265	$routput_block_type->[$i_tok] = $block_type;
5355	44733					73133	$routput_container_type->[$i_tok] = $container_type;
5356	44733					70103	$routput_type_sequence->[$i_tok] = $type_sequence;
5357	44733					67550	$routput_indent_flag->[$i_tok] = $indent_flag;
5358							}
5359
5360							# get the next pre-token and type
5361							# $tok and $type will be modified to make the output token
5362	50497					79628	my $pre_tok = $tok = $rtokens->[$i]; # get the next pre-token
5363	50497					75952	my $pre_type = $type = $rtoken_type->[$i]; # and type
5364
5365							# remember the starting index of this token; we will be updating $i
5366	50497					67464	$i_tok = $i;
5367
5368							# re-initialize various flags for the next output token
5369							(
5370
5371	50497					89767	$block_type,
5372							$container_type,
5373							$type_sequence,
5374							$indent_flag,
5375							$prototype,
5376							)
5377							= (
5378
5379							EMPTY_STRING,
5380							EMPTY_STRING,
5381							EMPTY_STRING,
5382							0,
5383							EMPTY_STRING,
5384							);
5385
5386							# this pre-token will start an output token
5387	50497					62928	push( @{$routput_token_list}, $i_tok );
	50497					85383
5388
5389							# The search for the full token ends in one of 5 main END NODES:
5390
5391							#-----------------------
5392							# END NODE 1: whitespace
5393							#-----------------------
5394	50497	100				106731	next if ( $pre_type eq 'b' );
5395
5396							#----------------------
5397							# END NODE 2: a comment
5398							#----------------------
5399	35141	100				60791	last if ( $pre_type eq '#' );
5400
5401							# continue gathering identifier if necessary
5402	34813	100				61058	if ($id_scan_state) {
5403
5404	17	100	66			144	if ( $is_sub{$id_scan_state} \|\| $is_package{$id_scan_state} ) {
5405	10					56	$self->scan_id();
5406							}
5407							else {
5408	7					36	$self->scan_identifier();
5409							}
5410
5411	17	100				71	if ($id_scan_state) {
5412
5413							# Still scanning ...
5414							# Check for side comment between sub and prototype (c061)
5415
5416							# done if nothing left to scan on this line
5417	1	50				3	last if ( $i > $max_token_index );
5418
5419	1					5	my ( $next_nonblank_token, $i_next ) =
5420							find_next_nonblank_token_on_this_line( $i, $rtokens,
5421							$max_token_index );
5422
5423							# done if it was just some trailing space
5424	1	50				4	last if ( $i_next > $max_token_index );
5425
5426							# something remains on the line ... must be a side comment
5427	1					3	next;
5428							}
5429
5430	16	100	100			135	next if ( ( $i > 0 ) \|\| $type );
5431
5432							# didn't find any token; start over
5433	7					23	$type = $pre_type;
5434	7					17	$tok = $pre_tok;
5435							}
5436
5437							#-----------------------------------------------------------
5438							# Combine pre-tokens into digraphs and trigraphs if possible
5439							#-----------------------------------------------------------
5440
5441							# See if we can make a digraph...
5442							# The following tokens are excluded and handled specially:
5443							# '/=' is excluded because the / might start a pattern.
5444							# 'x=' is excluded since it might be $x=, with $ on previous line
5445							# '*' and = might be typeglobs of punctuation variables
5446							# I have allowed tokens starting with <, such as <=,
5447							# because I don't think these could be valid angle operators.
5448							# test file: storrs4.pl
5449	34803	100	100			108213	if ( $can_start_digraph{$tok}
			100
5450							&& $i < $max_token_index
5451							&& $is_digraph{ $tok . $rtokens->[ $i + 1 ] } )
5452							{
5453
5454	2559					4945	my $combine_ok = 1;
5455	2559					5337	my $test_tok = $tok . $rtokens->[ $i + 1 ];
5456
5457							# check for special cases which cannot be combined
5458
5459							# '//' must be defined_or operator if an operator is expected.
5460							# TODO: Code for other ambiguous digraphs (/=, x=, *, =)
5461							# could be migrated here for clarity
5462
5463							# Patch for RT#102371, misparsing a // in the following snippet:
5464							# state $b //= ccc();
5465							# The solution is to always accept the digraph (or trigraph)
5466							# after type 'Z' (possible file handle). The reason is that
5467							# sub operator_expected gives TERM expected here, which is
5468							# wrong in this case.
5469	2559	100	66			7033	if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
5470
5471							# note that here $tok = '/' and the next tok and type is '/'
5472	16					33	my $blank_after_Z;
5473	16					56	$expecting =
5474							$self->operator_expected( $tok, '/', $blank_after_Z );
5475
5476							# Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
5477	16	100				53	$combine_ok = 0 if ( $expecting == TERM );
5478							}
5479
5480							# Patch for RT #114359: mis-parsing of "print $x ** 0.5;
5481							# Accept the digraphs '**' only after type 'Z'
5482							# Otherwise postpone the decision.
5483	2559	100				6047	if ( $test_tok eq '**' ) {
5484	39	100				126	if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
	37					79
5485							}
5486
5487	2559	50	66			16219	if (
			66
			33
5488
5489							# still ok to combine?
5490							$combine_ok
5491
5492							&& ( $test_tok ne '/=' ) # might be pattern
5493							&& ( $test_tok ne 'x=' ) # might be $x
5494							&& ( $test_tok ne '*=' ) # typeglob?
5495
5496							# Moved above as part of fix for
5497							# RT #114359: Missparsing of "print $x ** 0.5;
5498							# && ( $test_tok ne '**' ) # typeglob?
5499							)
5500							{
5501	2518					4014	$tok = $test_tok;
5502	2518					3621	$i++;
5503
5504							# Now try to assemble trigraphs. Note that all possible
5505							# perl trigraphs can be constructed by appending a character
5506							# to a digraph.
5507	2518					4293	$test_tok = $tok . $rtokens->[ $i + 1 ];
5508
5509	2518	100				6095	if ( $is_trigraph{$test_tok} ) {
5510	76					186	$tok = $test_tok;
5511	76					166	$i++;
5512							}
5513
5514							# The only current tetragraph is the double diamond operator
5515							# and its first three characters are NOT a trigraph, so
5516							# we do can do a special test for it
5517							else {
5518	2442	100				5773	if ( $test_tok eq '<<>' ) {
5519	1					4	$test_tok .= $rtokens->[ $i + 2 ];
5520	1	50				4	if ( $is_tetragraph{$test_tok} ) {
5521	1					2	$tok = $test_tok;
5522	1					5	$i += 2;
5523							}
5524							}
5525							}
5526							}
5527							}
5528
5529	34803					49416	$type = $tok;
5530	34803					57425	$next_tok = $rtokens->[ $i + 1 ];
5531	34803					51733	$next_type = $rtoken_type->[ $i + 1 ];
5532
5533							# expecting an operator here? first try table lookup, then function
5534	34803					64390	$expecting = $op_expected_table{$last_nonblank_type};
5535	34803	100				67575	if ( !defined($expecting) ) {
5536	8391		100			20217	my $blank_after_Z = $last_nonblank_type eq 'Z'
5537							&& ( $i == 0 \|\| $rtoken_type->[ $i - 1 ] eq 'b' );
5538	8391					22469	$expecting =
5539							$self->operator_expected( $tok, $next_type, $blank_after_Z );
5540							}
5541
5542	34803					45843	DEBUG_TOKENIZE && do {
5543							local $LIST_SEPARATOR = ')(';
5544							my @debug_list = (
5545							$last_nonblank_token, $tok,
5546							$next_tok, $brace_depth,
5547							$rbrace_type->[$brace_depth], $paren_depth,
5548							$rparen_type->[$paren_depth],
5549							);
5550							print {*STDOUT} "TOKENIZE:(@debug_list)\n";
5551							};
5552
5553							# We have the next token, $tok.
5554							# Now we have to examine this token and decide what it is
5555							# and define its $type
5556
5557							#------------------------
5558							# END NODE 3: a bare word
5559							#------------------------
5560	34803	100				64558	if ( $pre_type eq 'w' ) {
5561	5832					17547	my $is_last = $self->do_BAREWORD($is_END_or_DATA);
5562	5832	100				12147	last if ($is_last);
5563	5824					14552	next;
5564							}
5565
5566							# Turn off attribute list on first non-blank, non-bareword.
5567							# Added '#' to fix c038 (later moved above).
5568	28971		100			55711	$self->[_in_attribute_list_] &&= 0;
5569
5570							#-------------------------------
5571							# END NODE 4: a string of digits
5572							#-------------------------------
5573	28971	100				51696	if ( $pre_type eq 'd' ) {
5574	1934					7387	$self->do_DIGITS();
5575	1934					4437	next;
5576							}
5577
5578							#------------------------------------------
5579							# END NODE 5: everything else (punctuation)
5580							#------------------------------------------
5581	27037					54229	my $code = $tokenization_code->{$tok};
5582	27037	100				51112	if ($code) {
5583	25304					76515	$code->($self);
5584	25304	100				65819	redo if $in_quote;
5585							}
5586							} ## End main tokenizer loop
5587
5588							# Store the final token
5589	5902	100				13059	if ( $i_tok >= 0 ) {
5590	5764					12319	$routput_token_type->[$i_tok] = $type;
5591	5764					10529	$routput_block_type->[$i_tok] = $block_type;
5592	5764					10587	$routput_container_type->[$i_tok] = $container_type;
5593	5764					9909	$routput_type_sequence->[$i_tok] = $type_sequence;
5594	5764					10314	$routput_indent_flag->[$i_tok] = $indent_flag;
5595							}
5596
5597							# Remember last nonblank values
5598	5902	100	100			21637	if ( $type ne 'b' && $type ne '#' ) {
5599
5600	5429					8824	$last_last_nonblank_token = $last_nonblank_token;
5601	5429					8361	$last_last_nonblank_type = $last_nonblank_type;
5602
5603	5429					8198	$last_nonblank_prototype = $prototype;
5604	5429					8068	$last_nonblank_block_type = $block_type;
5605	5429					7625	$last_nonblank_container_type = $container_type;
5606	5429					8285	$last_nonblank_type_sequence = $type_sequence;
5607
5608	5429					7533	$last_nonblank_token = $tok;
5609	5429					8101	$last_nonblank_type = $type;
5610							}
5611
5612							# reset indentation level if necessary at a sub or package
5613							# in an attempt to recover from a nesting error
5614	5902	50				12236	if ( $level_in_tokenizer < 0 ) {
5615	0	0				0	if ( $input_line =~ /^\s*(sub\|package)\s+(\w+)/ ) {
5616	0					0	reset_indentation_level(0);
5617	0					0	$self->brace_warning("resetting level to 0 at $1 $2\n");
5618							}
5619							}
5620
5621	5902					9728	$self->[_in_quote_] = $in_quote;
5622	5902	100				13400	$self->[_quote_target_] =
5623							$in_quote ? matching_end_token($quote_character) : EMPTY_STRING;
5624	5902					11102	$self->[_rhere_target_list_] = $rhere_target_list;
5625
5626	5902					9941	return;
5627							} ## end sub tokenizer_main_loop
5628
5629							sub tokenizer_wrapup_line {
5630	5902			5902	0	11199	my ( $self, $line_of_tokens ) = @_;
5631
5632							#---------------------------------------------------------
5633							# Package a line of tokens for shipping back to the caller
5634							#---------------------------------------------------------
5635
5636							# Arrays to hold token values for this line:
5637	5902					10506	my ( @levels, @block_type, @type_sequence, @token_type, @tokens );
5638
5639	5902					14536	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5640
5641							# Remember starting nesting block string
5642	5902					9907	my $nesting_block_string_0 = $nesting_block_string;
5643
5644							#-----------------
5645							# Loop over tokens
5646							#-----------------
5647							# $i is the index of the pretoken which starts this full token
5648	5902					8535	foreach my $i ( @{$routput_token_list} ) {
	5902					12720
5649
5650	50681					73141	my $type_i = $routput_token_type->[$i];
5651
5652							#----------------------------------------
5653							# Section 1. Handle a non-sequenced token
5654							#----------------------------------------
5655	50681	100				80911	if ( !$routput_type_sequence->[$i] ) {
5656
5657							#-------------------------------
5658							# Section 1.1. types ';' and 't'
5659							#-------------------------------
5660							# - output anonymous 'sub' as keyword (type 'k')
5661							# - output __END__, __DATA__, and format as type 'k' instead
5662							# of ';' to make html colors correct, etc.
5663	41533	100				90328	if ( $is_semicolon_or_t{$type_i} ) {
		50
5664	2674					6026	my $tok_i = $rtokens->[$i];
5665	2674	100				7668	if ( $is_END_DATA_format_sub{$tok_i} ) {
5666	172					461	$type_i = 'k';
5667							}
5668							}
5669
5670							#----------------------------------------------
5671							# Section 1.2. Check for an invalid token type.
5672							#----------------------------------------------
5673							# This can happen by running perltidy on non-scripts although
5674							# it could also be bug introduced by programming change. Perl
5675							# silently accepts a 032 (^Z) and takes it as the end
5676							elsif ( !$is_valid_token_type{$type_i} ) {
5677	0					0	my $val = ord($type_i);
5678	0					0	$self->warning(
5679							"unexpected character decimal $val ($type_i) in script\n"
5680							);
5681	0					0	$self->[_in_error_] = 1;
5682							}
5683							else {
5684							## ok - valid token type other than ; and t
5685							}
5686
5687							#----------------------------------------------------
5688							# Section 1.3. Store values for a non-sequenced token
5689							#----------------------------------------------------
5690	41533					66315	push( @levels, $level_in_tokenizer );
5691	41533					61537	push( @block_type, EMPTY_STRING );
5692	41533					60375	push( @type_sequence, EMPTY_STRING );
5693	41533					77073	push( @token_type, $type_i );
5694
5695							}
5696
5697							#------------------------------------
5698							# Section 2. Handle a sequenced token
5699							# One of { [ ( ? : ) ] }
5700							#------------------------------------
5701							else {
5702
5703							# $level_i is the level we will store. Levels of braces are
5704							# set so that the leading braces have a HIGHER level than their
5705							# CONTENTS, which is convenient for indentation.
5706	9148					13733	my $level_i = $level_in_tokenizer;
5707
5708							# $tok_i is the PRE-token. It only equals the token for symbols
5709	9148					14676	my $tok_i = $rtokens->[$i];
5710
5711							# $routput_indent_flag->[$i] indicates that we need a change
5712							# in level at a nested ternary, as follows
5713							# 1 => at a nested ternary ?
5714							# -1 => at a nested ternary :
5715							# 0 => otherwise
5716
5717							#--------------------------------------------
5718							# Section 2.1 Handle a level-increasing token
5719							#--------------------------------------------
5720	9148	100				24398	if ( $is_opening_or_ternary_type{$type_i} ) {
		50
5721
5722	4574	100				9431	if ( $type_i eq '?' ) {
5723
5724	187	100				1010	if ( $routput_indent_flag->[$i] > 0 ) {
5725	8					16	$level_in_tokenizer++;
5726
5727							# break BEFORE '?' in a nested ternary
5728	8					24	$level_i = $level_in_tokenizer;
5729	8					21	$nesting_block_string .= "$nesting_block_flag";
5730
5731							}
5732							}
5733							else {
5734
5735	4387					7363	$nesting_token_string .= $tok_i;
5736
5737	4387	100	100			12162	if ( $type_i eq '{' \|\| $type_i eq 'L' ) {
5738
5739	4080					5925	$level_in_tokenizer++;
5740
5741	4080	100				7935	if ( $routput_block_type->[$i] ) {
5742	972					1905	$nesting_block_flag = 1;
5743	972					1992	$nesting_block_string .= '1';
5744							}
5745							else {
5746	3108					4892	$nesting_block_flag = 0;
5747	3108					5303	$nesting_block_string .= '0';
5748							}
5749							}
5750							}
5751							}
5752
5753							#---------------------------------------------
5754							# Section 2.2. Handle a level-decreasing token
5755							#---------------------------------------------
5756							elsif ( $is_closing_or_ternary_type{$type_i} ) {
5757
5758	4574	100				10311	if ( $type_i ne ':' ) {
5759	4387					8285	my $char = chop $nesting_token_string;
5760	4387	50				11603	if ( $char ne $matching_start_token{$tok_i} ) {
5761	0					0	$nesting_token_string .= $char . $tok_i;
5762							}
5763							}
5764
5765	4574	100	100			14693	if (
			100
			100
5766							$type_i eq '}'
5767							\|\| $type_i eq 'R'
5768
5769							# only the second and higher ? : have levels
5770							\|\| $type_i eq ':' && $routput_indent_flag->[$i] < 0
5771							)
5772							{
5773
5774	4088					6339	$level_i = --$level_in_tokenizer;
5775
5776	4088	50				8200	if ( $level_in_tokenizer < 0 ) {
5777	0	0				0	if ( !$self->[_saw_negative_indentation_] ) {
5778	0					0	$self->[_saw_negative_indentation_] = 1;
5779	0					0	$self->warning(
5780							"Starting negative indentation\n");
5781							}
5782							}
5783
5784							# restore previous level values
5785	4088	50				8381	if ( length($nesting_block_string) > 1 )
5786							{ # true for valid script
5787	4088					6409	chop $nesting_block_string;
5788	4088					8150	$nesting_block_flag =
5789							substr( $nesting_block_string, -1 ) eq '1';
5790							}
5791
5792							}
5793							}
5794
5795							#-----------------------------------------------------
5796							# Section 2.3. Unexpected sequenced token type - error
5797							#-----------------------------------------------------
5798							else {
5799
5800							# The tokenizer should only be assigning sequence numbers
5801							# to types { [ ( ? ) ] } :
5802	0					0	DEVEL_MODE && $self->Fault(<<EOM);
5803							unexpected sequence number on token type $type_i with pre-tok=$tok_i
5804							EOM
5805							}
5806
5807							#------------------------------------------------
5808							# Section 2.4. Store values for a sequenced token
5809							#------------------------------------------------
5810
5811							# The starting nesting block string, which is used in any .LOG
5812							# output, should include the first token of the line
5813	9148	100				18805	if ( !@levels ) {
5814	1574					2941	$nesting_block_string_0 = $nesting_block_string;
5815							}
5816
5817							# Store values for a sequenced token
5818	9148					16779	push( @levels, $level_i );
5819	9148					17473	push( @block_type, $routput_block_type->[$i] );
5820	9148					15415	push( @type_sequence, $routput_type_sequence->[$i] );
5821	9148					19071	push( @token_type, $type_i );
5822
5823							}
5824							} ## End loop to over tokens
5825
5826							#---------------------
5827							# Post-loop operations
5828							#---------------------
5829
5830	5902					14720	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0;
5831
5832							# Form and store the tokens
5833	5902	50				13591	if (@levels) {
5834
5835	5902					8349	my $im = shift @{$routput_token_list};
	5902					10793
5836	5902					10121	my $offset = $rtoken_map->[$im];
5837	5902					8476	foreach my $i ( @{$routput_token_list} ) {
	5902					10506
5838	44779					61730	my $numc = $rtoken_map->[$i] - $offset;
5839	44779					79618	push( @tokens, substr( $input_line, $offset, $numc ) );
5840	44779					56961	$offset += $numc;
5841
5842	44779					60753	if ( DEVEL_MODE && $numc <= 0 ) {
5843
5844							# Should not happen unless @{$rtoken_map} is corrupted
5845							$self->Fault(
5846							"number of characters is '$numc' but should be >0\n");
5847							}
5848							}
5849
5850							# Form and store the final token of this line
5851	5902					11997	my $numc = length($input_line) - $offset;
5852	5902					12223	push( @tokens, substr( $input_line, $offset, $numc ) );
5853
5854	5902					9022	if ( DEVEL_MODE && $numc <= 0 ) {
5855							$self->Fault(
5856							"Number of Characters is '$numc' but should be >0\n");
5857							}
5858							}
5859
5860							# NOTE: This routine returns ci=0. Eventually '_rci_levels' can be
5861							# removed. The ci values are computed later by sub Formatter::set_ci.
5862	5902					19678	my @ci_levels = (0) x scalar(@levels);
5863
5864							# Wrap up this line of tokens for shipping to the Formatter
5865	5902					13822	$line_of_tokens->{_rtoken_type} = \@token_type;
5866	5902					11320	$line_of_tokens->{_rtokens} = \@tokens;
5867	5902					11345	$line_of_tokens->{_rblock_type} = \@block_type;
5868	5902					11386	$line_of_tokens->{_rtype_sequence} = \@type_sequence;
5869	5902					18923	$line_of_tokens->{_rlevels} = \@levels;
5870	5902					11777	$line_of_tokens->{_rci_levels} = \@ci_levels;
5871
5872	5902					15726	return;
5873							} ## end sub tokenizer_wrapup_line
5874
5875							} ## end tokenize_this_line
5876
5877							#######################################################################
5878							# Tokenizer routines which assist in identifying token types
5879							#######################################################################
5880
5881							# Define Global '%op_expected_table'
5882							# = hash table of operator expected values based on last nonblank token
5883
5884							# exceptions to perl's weird parsing rules after type 'Z'
5885							my %is_weird_parsing_rule_exception;
5886
5887							my %is_paren_dollar;
5888
5889							my %is_n_v;
5890
5891							BEGIN {
5892
5893							# Always expecting TERM following these types:
5894							# note: this is identical to '@value_requestor_type' defined later.
5895							# Fix for c250: add new type 'P' for package (expecting VERSION or {}
5896							# after package NAMESPACE, so expecting TERM)
5897							# Fix for c250: add new type 'S' for sub (not expecting operator)
5898	39			39		917	my @q = qw(
5899							; ! + x & ? F J - p / Y : % f U ~ A G j L P S * . \| ^ < = [ m { \ > t
5900							\|\| >= != mm = => .. !~ == && \|= .= pp -= =~ += <= %= ^= x= ~~ * << /=
5901							&= // >> ~. &. \|. ^.
5902							... **= <<= >>= &&= \|\|= //= <=> !~~ &.= \|.= ^.= <<~
5903							);
5904	39					4593	push @q, ',';
5905	39					133	push @q, '('; # for completeness, not currently a token type
5906	39					75	push @q, '->'; # was previously in UNKNOWN
5907	39					1325	@{op_expected_table}{@q} = (TERM) x scalar(@q);
5908
5909							# Always UNKNOWN following these types;
5910							# previously had '->' in this list for c030
5911	39					209	@q = qw( w );
5912	39					120	@{op_expected_table}{@q} = (UNKNOWN) x scalar(@q);
5913
5914							# Always expecting OPERATOR ...
5915							# 'n' and 'v' are currently excluded because they might be VERSION numbers
5916							# 'i' is currently excluded because it might be a package
5917							# 'q' is currently excluded because it might be a prototype
5918							# Fix for c030: removed '->' from this list:
5919							# Fix for c250: added 'i' because new type 'P' was added
5920	39					115	@q = qw( -- C h R ++ ] Q <> i ); ## n v q );
5921	39					75	push @q, ')';
5922	39					255	@{op_expected_table}{@q} = (OPERATOR) x scalar(@q);
5923
5924							# Fix for git #62: added '*' and '%'
5925	39					98	@q = qw( < ? * % );
5926	39					139	@{is_weird_parsing_rule_exception}{@q} = (1) x scalar(@q);
5927
5928	39					87	@q = qw<) $>;
5929	39					101	@{is_paren_dollar}{@q} = (1) x scalar(@q);
5930
5931	39					95	@q = qw( n v );
5932	39					1347	@{is_n_v}{@q} = (1) x scalar(@q);
5933
5934							} ## end BEGIN
5935
5936	39			39		334	use constant DEBUG_OPERATOR_EXPECTED => 0;
	39					125
	39					86972
5937
5938							sub operator_expected {
5939
5940							# Returns a parameter indicating what types of tokens can occur next
5941
5942							# Call format:
5943							# $op_expected =
5944							# $self->operator_expected( $tok, $next_type, $blank_after_Z );
5945							# where
5946							# $tok is the current token
5947							# $next_type is the type of the next token (blank or not)
5948							# $blank_after_Z = flag for guessing after a type 'Z':
5949							# true if $tok follows type 'Z' with intermediate blank
5950							# false if $tok follows type 'Z' with no intermediate blank
5951							# ignored if $tok does not follow type 'Z'
5952
5953							# Many perl symbols have two or more meanings. For example, '<<'
5954							# can be a shift operator or a here-doc operator. The
5955							# interpretation of these symbols depends on the current state of
5956							# the tokenizer, which may either be expecting a term or an
5957							# operator. For this example, a << would be a shift if an OPERATOR
5958							# is expected, and a here-doc if a TERM is expected. This routine
5959							# is called to make this decision for any current token. It returns
5960							# one of three possible values:
5961							#
5962							# OPERATOR - operator expected (or at least, not a term)
5963							# UNKNOWN - can't tell
5964							# TERM - a term is expected (or at least, not an operator)
5965							#
5966							# The decision is based on what has been seen so far. This
5967							# information is stored in the "$last_nonblank_type" and
5968							# "$last_nonblank_token" variables. For example, if the
5969							# $last_nonblank_type is '=~', then we are expecting a TERM, whereas
5970							# if $last_nonblank_type is 'n' (numeric), we are expecting an
5971							# OPERATOR.
5972							#
5973							# If a UNKNOWN is returned, the calling routine must guess. A major
5974							# goal of this tokenizer is to minimize the possibility of returning
5975							# UNKNOWN, because a wrong guess can spoil the formatting of a
5976							# script.
5977							#
5978							# Adding NEW_TOKENS: it is critically important that this routine be
5979							# updated to allow it to determine if an operator or term is to be
5980							# expected after the new token. Doing this simply involves adding
5981							# the new token character to one of the regexes in this routine or
5982							# to one of the hash lists
5983							# that it uses, which are initialized in the BEGIN section.
5984							# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
5985							# $statement_type
5986
5987							# When possible, token types should be selected such that we can determine
5988							# the 'operator_expected' value by a simple hash lookup. If there are
5989							# exceptions, that is an indication that a new type is needed.
5990
5991	8421			8421	0	17898	my ( $self, $tok, $next_type, $blank_after_Z ) = @_;
5992
5993							#--------------------------------------------
5994							# Section 1: Table lookup will get most cases
5995							#--------------------------------------------
5996
5997							# Many types are can be obtained by a table lookup. This typically handles
5998							# more than half of the calls. For speed, the caller may try table lookup
5999							# first before calling this sub.
6000	8421					13581	my $op_expected = $op_expected_table{$last_nonblank_type};
6001	8421	100				17183	if ( defined($op_expected) ) {
6002							DEBUG_OPERATOR_EXPECTED
6003	24					47	&& print {*STDOUT}
6004							"OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
6005	24					89	return $op_expected;
6006							}
6007
6008							#---------------------------------------------
6009							# Section 2: Handle special cases if necessary
6010							#---------------------------------------------
6011
6012							# Types 'k', '}' and 'Z' depend on context
6013							# Types 'n', 'v', 'q' also depend on context.
6014
6015							# identifier...
6016							# Fix for c250: removed coding for type 'i' because 'i' and new type 'P'
6017							# are now done by hash table lookup
6018
6019							# keyword...
6020	8397	100				25394	if ( $last_nonblank_type eq 'k' ) {
		100
		100
		100
		100
6021
6022							# keywords expecting OPERATOR:
6023	2644	100				12562	if ( $expecting_operator_token{$last_nonblank_token} ) {
		100
6024	7					23	$op_expected = OPERATOR;
6025							}
6026
6027							# keywords expecting TERM:
6028							elsif ( $expecting_term_token{$last_nonblank_token} ) {
6029
6030							# Exceptions from TERM:
6031
6032							# // may follow perl functions which may be unary operators
6033							# see test file dor.t (defined or);
6034	2538	100	100			10982	if (
		50	100
			66
6035							$tok eq '/'
6036							&& $next_type eq '/'
6037							&& $is_keyword_rejecting_slash_as_pattern_delimiter{
6038							$last_nonblank_token}
6039							)
6040							{
6041	1					4	$op_expected = OPERATOR;
6042							}
6043
6044							# Patch to allow a ? following 'split' to be a deprecated pattern
6045							# delimiter. This patch is coordinated with the omission of split
6046							# from the list
6047							# %is_keyword_rejecting_question_as_pattern_delimiter. This patch
6048							# will force perltidy to guess.
6049							elsif ($tok eq '?'
6050							&& $last_nonblank_token eq 'split' )
6051							{
6052	0					0	$op_expected = UNKNOWN;
6053							}
6054							else {
6055	2537					4479	$op_expected = TERM;
6056							}
6057							}
6058							else {
6059	99					244	$op_expected = TERM;
6060							}
6061							} ## end type 'k'
6062
6063							# closing container token...
6064
6065							# Note that the actual token for type '}' may also be a ')'.
6066
6067							# Also note that $last_nonblank_token is not the token corresponding to
6068							# $last_nonblank_type when the type is a closing container. In that
6069							# case it is the token before the corresponding opening container token.
6070							# So for example, for this snippet
6071							# $a = do { BLOCK } / 2;
6072							# the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'.
6073
6074							elsif ( $last_nonblank_type eq '}' ) {
6075	3590					6531	$op_expected = UNKNOWN;
6076
6077							# handle something after 'do' and 'eval'
6078	3590	100	66			19311	if ( $is_block_operator{$last_nonblank_token} ) {
		100
		100
6079
6080							# something like $a = do { BLOCK } / 2;
6081	82					246	$op_expected = OPERATOR; # block mode following }
6082							}
6083
6084							# $last_nonblank_token =~ /^(\)\|\$\|\-\>)/
6085							elsif ( $is_paren_dollar{ substr( $last_nonblank_token, 0, 1 ) }
6086							\|\| substr( $last_nonblank_token, 0, 2 ) eq '->' )
6087							{
6088	2122					3753	$op_expected = OPERATOR;
6089	2122	50				5117	if ( $last_nonblank_token eq '$' ) { $op_expected = UNKNOWN }
	0					0
6090							}
6091
6092							# Check for smartmatch operator before preceding brace or square
6093							# bracket. For example, at the ? after the ] in the following
6094							# expressions we are expecting an operator:
6095							#
6096							# qr/3/ ~~ ['1234'] ? 1 : 0;
6097							# map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
6098							elsif ( $last_nonblank_token eq '~~' ) {
6099	20					47	$op_expected = OPERATOR;
6100							}
6101
6102							# A right brace here indicates the end of a simple block. All
6103							# non-structural right braces have type 'R' all braces associated with
6104							# block operator keywords have been given those keywords as
6105							# "last_nonblank_token" and caught above. (This statement is order
6106							# dependent, and must come after checking $last_nonblank_token).
6107							else {
6108
6109							# patch for dor.t (defined or).
6110	1366	50	33			6277	if ( $tok eq '/'
		100	33
6111							&& $next_type eq '/'
6112							&& $last_nonblank_token eq ']' )
6113							{
6114	0					0	$op_expected = OPERATOR;
6115							}
6116
6117							# Patch for RT #116344: misparse a ternary operator after an
6118							# anonymous hash, like this:
6119							# return ref {} ? 1 : 0;
6120							# The right brace should really be marked type 'R' in this case,
6121							# and it is safest to return an UNKNOWN here. Expecting a TERM will
6122							# cause the '?' to always be interpreted as a pattern delimiter
6123							# rather than introducing a ternary operator.
6124							elsif ( $tok eq '?' ) {
6125	1					3	$op_expected = UNKNOWN;
6126							}
6127							else {
6128	1365					2320	$op_expected = TERM;
6129							}
6130							}
6131							} ## end type '}'
6132
6133							# number or v-string...
6134							# An exception is for VERSION numbers a 'use' statement. It has the format
6135							# use Module VERSION LIST
6136							# We could avoid this exception by writing a special sub to parse 'use'
6137							# statements and perhaps mark these numbers with a new type V (for VERSION)
6138							##elsif ( $last_nonblank_type =~ /^[nv]$/ ) {
6139							elsif ( $is_n_v{$last_nonblank_type} ) {
6140	1985					3257	$op_expected = OPERATOR;
6141	1985	100				4413	if ( $statement_type eq 'use' ) {
6142	11					43	$op_expected = UNKNOWN;
6143							}
6144							}
6145
6146							# quote...
6147							# TODO: labeled prototype words would better be given type 'A' or maybe
6148							# 'J'; not 'q'; or maybe mark as type 'Y'?
6149							elsif ( $last_nonblank_type eq 'q' ) {
6150	137	50				621	if ( $last_nonblank_token eq 'prototype' ) {
		100
6151	0					0	$op_expected = TERM;
6152							}
6153
6154							# update for --use-feature=class (rt145706):
6155							# Look for class VERSION after possible attribute, as in
6156							# class Example::Subclass : isa(Example::Base) 1.345 { ... }
6157							elsif ( $statement_type =~ /^package\b/ ) {
6158	3					6	$op_expected = TERM;
6159							}
6160
6161							# everything else
6162							else {
6163	134					343	$op_expected = OPERATOR;
6164							}
6165							}
6166
6167							# file handle or similar
6168							elsif ( $last_nonblank_type eq 'Z' ) {
6169
6170							# angle.t
6171	40	100	33			603	if ( $last_nonblank_token =~ /^\w/ ) {
		50	66
		100
		50
		100
6172	2					5	$op_expected = UNKNOWN;
6173							}
6174
6175							# Exception to weird parsing rules for 'x(' ... see case b1205:
6176							# In something like 'print $vv x(...' the x is an operator;
6177							# Likewise in 'print $vv x$ww' the x is an operator (case b1207)
6178							# otherwise x follows the weird parsing rules.
6179							elsif ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) {
6180	0					0	$op_expected = OPERATOR;
6181							}
6182
6183							# The 'weird parsing rules' of next section do not work for '<' and '?'
6184							# It is best to mark them as unknown. Test case:
6185							# print $fh <DATA>;
6186							elsif ( $is_weird_parsing_rule_exception{$tok} ) {
6187	4					13	$op_expected = UNKNOWN;
6188							}
6189
6190							# For possible file handle like "$a", Perl uses weird parsing rules.
6191							# For example:
6192							# print $a/2,"/hi"; - division
6193							# print $a / 2,"/hi"; - division
6194							# print $a/ 2,"/hi"; - division
6195							# print $a /2,"/hi"; - pattern (and error)!
6196							# Some examples where this logic works okay, for '&','*','+':
6197							# print $fh &xsi_protos(@mods);
6198							# my $x = new $CompressClass *FH;
6199							# print $OUT +( $count % 15 ? ", " : "\n\t" );
6200							elsif ($blank_after_Z
6201							&& $next_type ne 'b' )
6202							{
6203	0					0	$op_expected = TERM;
6204							}
6205
6206							# Note that '?' and '<' have been moved above
6207							# ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]\|\>\>)$/ ) {
6208							elsif ( $tok =~ /^([x\/\+\-\*\%\&\.]\|\>\>)$/ ) {
6209
6210							# Do not complain in 'use' statements, which have special syntax.
6211							# For example, from RT#130344:
6212							# use lib $FindBin::Bin . '/lib';
6213	9	50				29	if ( $statement_type ne 'use' ) {
6214	9					37	$self->complain(
6215							"operator in possible indirect object location not recommended\n"
6216							);
6217							}
6218	9					19	$op_expected = OPERATOR;
6219							}
6220
6221							# all other cases
6222							else {
6223	25					77	$op_expected = UNKNOWN;
6224							}
6225							}
6226
6227							# anything else...
6228							else {
6229	1					2	$op_expected = UNKNOWN;
6230							}
6231
6232							DEBUG_OPERATOR_EXPECTED
6233	8397					11127	&& print {*STDOUT}
6234							"OPERATOR_EXPECTED: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
6235
6236	8397					15924	return $op_expected;
6237
6238							} ## end sub operator_expected
6239
6240							sub new_statement_ok {
6241
6242							# Returns:
6243							# true if a new statement can begin here
6244							# false otherwise
6245
6246							# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
6247							# $brace_depth, $rbrace_type
6248
6249							# Uses:
6250							# - See if a 'class' statement can occur here
6251							# - See if a keyword begins at a new statement; i.e. is an 'if' a
6252							# block if or a trailing if? Also see if 'format' starts a statement.
6253							# - Decide if a ':' is part of a statement label (not a ternary)
6254
6255							# Curly braces are tricky because some small blocks do not get marked as
6256							# blocks..
6257
6258							# if it follows an opening curly brace..
6259	435	100	66	435	0	2321	if ( $last_nonblank_token eq '{' ) {
		100
6260
6261							# The safe thing is to return true in all cases because:
6262							# - a ternary ':' cannot occur here
6263							# - an 'if' here, for example, cannot be a trailing if
6264							# See test case c231 for an example.
6265							# This works but could be improved, if necessary, by returning
6266							# 'false' at obvious non-blocks.
6267	59					220	return 1;
6268							}
6269
6270							# if it follows a closing code block curly brace..
6271							elsif ($last_nonblank_token eq '}'
6272							&& $last_nonblank_type eq $last_nonblank_token )
6273							{
6274
6275							# a new statement can follow certain closing block braces ...
6276							# FIXME: The following has worked well but returns true in some cases
6277							# where it really should not. We could fix this by either excluding
6278							# certain blocks, like sort/map/grep/eval/asub or by just including
6279							# certain blocks.
6280	99					377	return $rbrace_type->[$brace_depth];
6281							}
6282
6283							# otherwise, it is a label if and only if it follows a ';' (real or fake)
6284							# or another label
6285							else {
6286	277		100			1718	return ( $last_nonblank_type eq ';' \|\| $last_nonblank_type eq 'J' );
6287							}
6288							} ## end sub new_statement_ok
6289
6290							sub code_block_type {
6291
6292							# Decide if this is a block of code, and its type.
6293							# Must be called only when $type = $token = '{'
6294							# The problem is to distinguish between the start of a block of code
6295							# and the start of an anonymous hash reference
6296							# Returns "" if not code block, otherwise returns 'last_nonblank_token'
6297							# to indicate the type of code block. (For example, 'last_nonblank_token'
6298							# might be 'if' for an if block, 'else' for an else block, etc).
6299							# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
6300							# $last_nonblank_block_type, $brace_depth, $rbrace_type
6301
6302							# handle case of multiple '{'s
6303
6304							# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
6305
6306	1301			1301	0	3333	my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
6307	1301	100	66			16557	if ( $last_nonblank_token eq '{'
		100	66
		100	66
		100	100
		100	66
		100	66
		50
		50
		100
		100
		100
6308							&& $last_nonblank_type eq $last_nonblank_token )
6309							{
6310
6311							# opening brace where a statement may appear is probably
6312							# a code block but might be and anonymous hash reference
6313	90	50				342	if ( $rbrace_type->[$brace_depth] ) {
6314	90					314	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6315							$max_token_index );
6316							}
6317
6318							# cannot start a code block within an anonymous hash
6319							else {
6320	0					0	return EMPTY_STRING;
6321							}
6322							}
6323
6324							elsif ( $last_nonblank_token eq ';' ) {
6325
6326							# an opening brace where a statement may appear is probably
6327							# a code block but might be and anonymous hash reference
6328	48					276	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6329							$max_token_index );
6330							}
6331
6332							# handle case of '}{'
6333							elsif ($last_nonblank_token eq '}'
6334							&& $last_nonblank_type eq $last_nonblank_token )
6335							{
6336
6337							# a } { situation ...
6338							# could be hash reference after code block..(blktype1.t)
6339	9	50				28	if ($last_nonblank_block_type) {
6340	9					44	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6341							$max_token_index );
6342							}
6343
6344							# must be a block if it follows a closing hash reference
6345							else {
6346	0					0	return $last_nonblank_token;
6347							}
6348							}
6349
6350							#--------------------------------------------------------------
6351							# NOTE: braces after type characters start code blocks, but for
6352							# simplicity these are not identified as such. See also
6353							# sub is_non_structural_brace.
6354							#--------------------------------------------------------------
6355
6356							## elsif ( $last_nonblank_type eq 't' ) {
6357							## return $last_nonblank_token;
6358							## }
6359
6360							# brace after label:
6361							elsif ( $last_nonblank_type eq 'J' ) {
6362	34					113	return $last_nonblank_token;
6363							}
6364
6365							# otherwise, look at previous token. This must be a code block if
6366							# it follows any of these:
6367							# /^(BEGIN\|END\|CHECK\|INIT\|AUTOLOAD\|DESTROY\|UNITCHECK\|continue\|if\|elsif\|else\|unless\|do\|while\|until\|eval\|for\|foreach\|map\|grep\|sort)$/
6368							elsif ($is_code_block_token{$last_nonblank_token}
6369							\|\| $is_grep_alias{$last_nonblank_token} )
6370							{
6371
6372							# Bug Patch: Note that the opening brace after the 'if' in the following
6373							# snippet is an anonymous hash ref and not a code block!
6374							# print 'hi' if { x => 1, }->{x};
6375							# We can identify this situation because the last nonblank type
6376							# will be a keyword (instead of a closing paren)
6377	480	50	33			2515	if (
			66
6378							$last_nonblank_type eq 'k'
6379							&& ( $last_nonblank_token eq 'if'
6380							\|\| $last_nonblank_token eq 'unless' )
6381							)
6382							{
6383	0					0	return EMPTY_STRING;
6384							}
6385							else {
6386	480					1811	return $last_nonblank_token;
6387							}
6388							}
6389
6390							# or a sub or package BLOCK
6391							# Fixed for c250 to include new package type 'P', and change 'i' to 'S'
6392							elsif (
6393							$last_nonblank_type eq 'P'
6394							\|\| $last_nonblank_type eq 'S'
6395							\|\| ( $last_nonblank_type eq 't'
6396							&& substr( $last_nonblank_token, 0, 3 ) eq 'sub' )
6397							)
6398							{
6399	294					929	return $last_nonblank_token;
6400							}
6401
6402							elsif ( $statement_type =~ /^(sub\|package)\b/ ) {
6403	0					0	return $statement_type;
6404							}
6405
6406							# user-defined subs with block parameters (like grep/map/eval)
6407							elsif ( $last_nonblank_type eq 'G' ) {
6408	0					0	return $last_nonblank_token;
6409							}
6410
6411							# check bareword
6412							elsif ( $last_nonblank_type eq 'w' ) {
6413
6414							# check for syntax 'use MODULE LIST'
6415							# This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031
6416	22	100				89	return EMPTY_STRING if ( $statement_type eq 'use' );
6417
6418	21					122	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6419							$max_token_index );
6420							}
6421
6422							# Patch for bug # RT #94338 reported by Daniel Trizen
6423							# for-loop in a parenthesized block-map triggering an error message:
6424							# map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
6425							# Check for a code block within a parenthesized function call
6426							elsif ( $last_nonblank_token eq '(' ) {
6427	81					215	my $paren_type = $rparen_type->[$paren_depth];
6428
6429							# /^(map\|grep\|sort)$/
6430	81	100	66			439	if ( $paren_type && $is_sort_map_grep{$paren_type} ) {
6431
6432							# We will mark this as a code block but use type 't' instead
6433							# of the name of the containing function. This will allow for
6434							# correct parsing but will usually produce better formatting.
6435							# Braces with block type 't' are not broken open automatically
6436							# in the formatter as are other code block types, and this usually
6437							# works best.
6438	1					5	return 't'; # (Not $paren_type)
6439							}
6440							else {
6441	80					264	return EMPTY_STRING;
6442							}
6443							}
6444
6445							# handle unknown syntax ') {'
6446							# we previously appended a '()' to mark this case
6447							elsif ( $last_nonblank_token =~ /$/ ) {
6448	14					117	return $last_nonblank_token;
6449							}
6450
6451							# anything else must be anonymous hash reference
6452							else {
6453	229					724	return EMPTY_STRING;
6454							}
6455							} ## end sub code_block_type
6456
6457							sub decide_if_code_block {
6458
6459							# USES GLOBAL VARIABLES: $last_nonblank_token
6460	168			168	0	481	my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
6461
6462	168					580	my ( $next_nonblank_token, $i_next ) =
6463							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
6464
6465							# we are at a '{' where a statement may appear.
6466							# We must decide if this brace starts an anonymous hash or a code
6467							# block.
6468							# return "" if anonymous hash, and $last_nonblank_token otherwise
6469
6470							# initialize to be code BLOCK
6471	168					498	my $code_block_type = $last_nonblank_token;
6472
6473							# Check for the common case of an empty anonymous hash reference:
6474							# Maybe something like sub { { } }
6475	168	100				607	if ( $next_nonblank_token eq '}' ) {
6476	5					11	$code_block_type = EMPTY_STRING;
6477							}
6478
6479							else {
6480
6481							# To guess if this '{' is an anonymous hash reference, look ahead
6482							# and test as follows:
6483							#
6484							# it is a hash reference if next come:
6485							# - a string or digit followed by a comma or =>
6486							# - bareword followed by =>
6487							# otherwise it is a code block
6488							#
6489							# Examples of anonymous hash ref:
6490							# {'aa',};
6491							# {1,2}
6492							#
6493							# Examples of code blocks:
6494							# {1; print "hello\n", 1;}
6495							# {$a,1};
6496
6497							# We are only going to look ahead one more (nonblank/comment) line.
6498							# Strange formatting could cause a bad guess, but that's unlikely.
6499	163					378	my @pre_types;
6500							my @pre_tokens;
6501
6502							# Ignore the rest of this line if it is a side comment
6503	163	100				493	if ( $next_nonblank_token ne '#' ) {
6504	139					585	@pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ];
	139					806
6505	139					418	@pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ];
	139					757
6506							}
6507
6508							# Here 20 is arbitrary but generous, and prevents wasting lots of time
6509							# in mangled files
6510	163					708	my ( $rpre_tokens, $rpre_types ) =
6511							$self->peek_ahead_for_n_nonblank_pre_tokens(20);
6512	163	100	66			619	if ( defined($rpre_types) && @{$rpre_types} ) {
	155					1554
6513	155					294	push @pre_types, @{$rpre_types};
	155					620
6514	155					333	push @pre_tokens, @{$rpre_tokens};
	155					731
6515							}
6516
6517							# put a sentinel token to simplify stopping the search
6518	163					439	push @pre_types, '}';
6519	163					368	push @pre_types, '}';
6520
6521	163					320	my $jbeg = 0;
6522	163	100				550	$jbeg = 1 if $pre_types[0] eq 'b';
6523
6524							# first look for one of these
6525							# - bareword
6526							# - bareword with leading -
6527							# - digit
6528							# - quoted string
6529	163					303	my $j = $jbeg;
6530	163	100	33			1165	if ( $pre_types[$j] =~ /^[\'\"]/ ) {
		100
		100
		50
6531
6532							# find the closing quote; don't worry about escapes
6533	1					3	my $quote_mark = $pre_types[$j];
6534	1					4	foreach my $k ( $j + 1 .. @pre_types - 2 ) {
6535	1	50				6	if ( $pre_types[$k] eq $quote_mark ) {
6536	1					3	$j = $k + 1;
6537							##my $next = $pre_types[$j];
6538	1					3	last;
6539							}
6540							}
6541							}
6542							elsif ( $pre_types[$j] eq 'd' ) {
6543	8					15	$j++;
6544							}
6545							elsif ( $pre_types[$j] eq 'w' ) {
6546	71					180	$j++;
6547							}
6548							elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
6549	0					0	$j++;
6550							}
6551							else {
6552							# none of the above
6553							}
6554	163	100				509	if ( $j > $jbeg ) {
6555
6556	80	100				329	$j++ if $pre_types[$j] eq 'b';
6557
6558							# Patched for RT #95708
6559	80	100	33			716	if (
			66
			66
6560
6561							# it is a comma which is not a pattern delimiter except for qw
6562							(
6563							$pre_types[$j] eq ','
6564							## !~ /^(s\|m\|y\|tr\|qr\|q\|qq\|qx)$/
6565							&& !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] }
6566							)
6567
6568							# or a =>
6569							\|\| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
6570							)
6571							{
6572	18					45	$code_block_type = EMPTY_STRING;
6573							}
6574							}
6575
6576	163	100				541	if ($code_block_type) {
6577
6578							# Patch for cases b1085 b1128: It is uncertain if this is a block.
6579							# If this brace follows a bareword, then append a space as a signal
6580							# to the formatter that this may not be a block brace. To find the
6581							# corresponding code in Formatter.pm search for 'b1085'.
6582	145	100				1258	$code_block_type .= SPACE if ( $code_block_type =~ /^\w/ );
6583							}
6584							}
6585
6586	168					608	return $code_block_type;
6587							} ## end sub decide_if_code_block
6588
6589							sub report_unexpected {
6590
6591							# report unexpected token type and show where it is
6592							# USES GLOBAL VARIABLES: (none)
6593	0			0	0	0	my ( $self, $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
6594							$rpretoken_type, $input_line )
6595							= @_;
6596
6597	0	0				0	if ( ++$self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) {
6598	0					0	my $msg = "found $found where $expecting expected";
6599	0					0	my $pos = $rpretoken_map->[$i_tok];
6600	0					0	$self->interrupt_logfile();
6601	0					0	my $input_line_number = $self->[_last_line_number_];
6602	0					0	my ( $offset, $numbered_line, $underline ) =
6603							make_numbered_line( $input_line_number, $input_line, $pos );
6604	0					0	$underline = write_on_underline( $underline, $pos - $offset, '^' );
6605
6606	0					0	my $trailer = EMPTY_STRING;
6607	0	0	0			0	if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
6608	0					0	my $pos_prev = $rpretoken_map->[$last_nonblank_i];
6609	0					0	my $num;
6610	0	0				0	if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) {
6611	0					0	$num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev;
6612							}
6613							else {
6614	0					0	$num = $pos - $pos_prev;
6615							}
6616	0	0				0	if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
	0					0
	0					0
6617
6618							$underline =
6619	0					0	write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
6620	0					0	$trailer = " (previous token underlined)";
6621							}
6622	0					0	$underline =~ s/\s+$//;
6623	0					0	$self->warning( $numbered_line . "\n" );
6624	0					0	$self->warning( $underline . "\n" );
6625	0					0	$self->warning( $msg . $trailer . "\n" );
6626	0					0	$self->resume_logfile();
6627							}
6628	0					0	return;
6629							} ## end sub report_unexpected
6630
6631							my %is_sigil_or_paren;
6632							my %is_R_closing_sb;
6633
6634							BEGIN {
6635
6636	39			39		308	my @q = qw< $ & % * @ ) >;
6637	39					317	@{is_sigil_or_paren}{@q} = (1) x scalar(@q);
6638
6639	39					153	@q = qw(R ]);
6640	39					87832	@{is_R_closing_sb}{@q} = (1) x scalar(@q);
6641							} ## end BEGIN
6642
6643							sub is_non_structural_brace {
6644
6645							# Decide if a brace or bracket is structural or non-structural
6646							# by looking at the previous token and type
6647							# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
6648
6649							# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
6650							# Tentatively deactivated because it caused the wrong operator expectation
6651							# for this code:
6652							# $user = @vars[1] / 100;
6653							# Must update sub operator_expected before re-implementing.
6654							# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
6655							# return 0;
6656							# }
6657
6658							#--------------------------------------------------------------
6659							# NOTE: braces after type characters start code blocks, but for
6660							# simplicity these are not identified as such. See also
6661							# sub code_block_type
6662							#--------------------------------------------------------------
6663
6664							##if ($last_nonblank_type eq 't') {return 0}
6665
6666							# otherwise, it is non-structural if it is decorated
6667							# by type information.
6668							# For example, the '{' here is non-structural: ${xxx}
6669							# Removed '::' to fix c074
6670							## $last_nonblank_token =~ /^([\$\@\*\&\%\)]\|->\|::)/
6671							return (
6672							## $last_nonblank_token =~ /^([\$\@\*\&\%\)]\|->)/
6673							$is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) }
6674							\|\| substr( $last_nonblank_token, 0, 2 ) eq '->'
6675
6676							# or if we follow a hash or array closing curly brace or bracket
6677							# For example, the second '{' in this is non-structural: $a{'x'}{'y'}
6678							# because the first '}' would have been given type 'R'
6679							##\|\| $last_nonblank_type =~ /^([R\]])$/
6680	2262		66	2262	0	14710	\|\| $is_R_closing_sb{$last_nonblank_type}
6681							);
6682							} ## end sub is_non_structural_brace
6683
6684							#######################################################################
6685							# Tokenizer routines for tracking container nesting depths
6686							#######################################################################
6687
6688							# The following routines keep track of nesting depths of the nesting
6689							# types, ( [ { and ?. This is necessary for determining the indentation
6690							# level, and also for debugging programs. Not only do they keep track of
6691							# nesting depths of the individual brace types, but they check that each
6692							# of the other brace types is balanced within matching pairs. For
6693							# example, if the program sees this sequence:
6694							#
6695							# { ( ( ) }
6696							#
6697							# then it can determine that there is an extra left paren somewhere
6698							# between the { and the }. And so on with every other possible
6699							# combination of outer and inner brace types. For another
6700							# example:
6701							#
6702							# ( [ ..... ] ] )
6703							#
6704							# which has an extra ] within the parens.
6705							#
6706							# The brace types have indexes 0 .. 3 which are indexes into
6707							# the matrices.
6708							#
6709							# The pair ? : are treated as just another nesting type, with ? acting
6710							# as the opening brace and : acting as the closing brace.
6711							#
6712							# The matrix
6713							#
6714							# $rdepth_array->[$a][$b][ $rcurrent_depth->[$a] ] = $rcurrent_depth->[$b];
6715							#
6716							# saves the nesting depth of brace type $b (where $b is either of the other
6717							# nesting types) when brace type $a enters a new depth. When this depth
6718							# decreases, a check is made that the current depth of brace types $b is
6719							# unchanged, or otherwise there must have been an error. This can
6720							# be very useful for localizing errors, particularly when perl runs to
6721							# the end of a large file (such as this one) and announces that there
6722							# is a problem somewhere.
6723							#
6724							# A numerical sequence number is maintained for every nesting type,
6725							# so that each matching pair can be uniquely identified in a simple
6726							# way.
6727
6728							sub increase_nesting_depth {
6729	4574			4574	0	9142	my ( $self, $aa, $pos ) = @_;
6730
6731							# USES GLOBAL VARIABLES: $rcurrent_depth,
6732							# $rcurrent_sequence_number, $rdepth_array,
6733							# $rstarting_line_of_current_depth, $statement_type
6734	4574					8234	my $cd_aa = ++$rcurrent_depth->[$aa];
6735	4574					6591	$total_depth++;
6736	4574					9142	$rtotal_depth->[$aa][$cd_aa] = $total_depth;
6737	4574					7334	my $input_line_number = $self->[_last_line_number_];
6738	4574					7766	my $input_line = $self->[_line_of_text_];
6739
6740							# Sequence numbers increment by number of items. This keeps
6741							# a unique set of numbers but still allows the relative location
6742							# of any type to be determined.
6743
6744							# make a new unique sequence number
6745	4574					7684	my $seqno = $next_sequence_number++;
6746
6747	4574					8626	$rcurrent_sequence_number->[$aa][$cd_aa] = $seqno;
6748
6749	4574					14863	$rstarting_line_of_current_depth->[$aa][$cd_aa] =
6750							[ $input_line_number, $input_line, $pos ];
6751
6752	4574					14300	for my $bb ( 0 .. @closing_brace_names - 1 ) {
6753	18296	100				33421	next if ( $bb == $aa );
6754	13722					26829	$rdepth_array->[$aa][$bb][$cd_aa] = $rcurrent_depth->[$bb];
6755							}
6756
6757							# set a flag for indenting a nested ternary statement
6758	4574					8023	my $indent = 0;
6759	4574	100				10649	if ( $aa == QUESTION_COLON ) {
6760	187					718	$rnested_ternary_flag->[$cd_aa] = 0;
6761	187	100				723	if ( $cd_aa > 1 ) {
6762	17	100				151	if ( $rnested_ternary_flag->[ $cd_aa - 1 ] == 0 ) {
6763	16					66	my $pdepth = $rtotal_depth->[$aa][ $cd_aa - 1 ];
6764	16	100				68	if ( $pdepth == $total_depth - 1 ) {
6765	8					18	$indent = 1;
6766	8					22	$rnested_ternary_flag->[ $cd_aa - 1 ] = -1;
6767							}
6768							}
6769							}
6770							}
6771
6772							# Fix part #1 for git82: save last token type for propagation of type 'Z'
6773	4574					15681	$rnested_statement_type->[$aa][$cd_aa] =
6774							[ $statement_type, $last_nonblank_type, $last_nonblank_token ];
6775	4574					7742	$statement_type = EMPTY_STRING;
6776	4574					12467	return ( $seqno, $indent );
6777							} ## end sub increase_nesting_depth
6778
6779							sub is_balanced_closing_container {
6780
6781							# Return true if a closing container can go here without error
6782							# Return false if not
6783	47			47	0	150	my ($aa) = @_;
6784
6785							# cannot close if there was no opening
6786	47					90	my $cd_aa = $rcurrent_depth->[$aa];
6787	47	100				209	return if ( $cd_aa <= 0 );
6788
6789							# check that any other brace types $bb contained within would be balanced
6790	8					26	for my $bb ( 0 .. @closing_brace_names - 1 ) {
6791	8	50				25	next if ( $bb == $aa );
6792							return
6793	8	50				60	if ( $rdepth_array->[$aa][$bb][$cd_aa] != $rcurrent_depth->[$bb] );
6794							}
6795
6796							# OK, everything will be balanced
6797	0					0	return 1;
6798							} ## end sub is_balanced_closing_container
6799
6800							sub decrease_nesting_depth {
6801
6802	4574			4574	0	9174	my ( $self, $aa, $pos ) = @_;
6803
6804							# USES GLOBAL VARIABLES: $rcurrent_depth,
6805							# $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth
6806							# $statement_type
6807	4574					7043	my $seqno = 0;
6808	4574					7210	my $input_line_number = $self->[_last_line_number_];
6809	4574					7971	my $input_line = $self->[_line_of_text_];
6810
6811	4574					6950	my $outdent = 0;
6812	4574					6565	$total_depth--;
6813	4574					7947	my $cd_aa = $rcurrent_depth->[$aa];
6814	4574	50				9273	if ( $cd_aa > 0 ) {
6815
6816							# set a flag for un-indenting after seeing a nested ternary statement
6817	4574					8396	$seqno = $rcurrent_sequence_number->[$aa][$cd_aa];
6818	4574	100				9598	if ( $aa == QUESTION_COLON ) {
6819	187					608	$outdent = $rnested_ternary_flag->[$cd_aa];
6820							}
6821
6822							# Fix part #2 for git82: use saved type for propagation of type 'Z'
6823							# through type L-R braces. Perl seems to allow ${bareword}
6824							# as an indirect object, but nothing much more complex than that.
6825							( $statement_type, my $saved_type, my $saved_token ) =
6826	4574					6905	@{ $rnested_statement_type->[$aa][ $rcurrent_depth->[$aa] ] };
	4574					12872
6827	4574	50	100			16045	if ( $aa == BRACE
			66
			66
6828							&& $saved_type eq 'Z'
6829							&& $last_nonblank_type eq 'w'
6830							&& $rbrace_structural_type->[$brace_depth] eq 'L' )
6831							{
6832	1					3	$last_nonblank_type = $saved_type;
6833							}
6834
6835							# check that any brace types $bb contained within are balanced
6836	4574					12633	for my $bb ( 0 .. @closing_brace_names - 1 ) {
6837	18296	100				32271	next if ( $bb == $aa );
6838
6839	13722	50				31984	if ( $rdepth_array->[$aa][$bb][$cd_aa] != $rcurrent_depth->[$bb] ) {
6840	0					0	my $diff =
6841							$rcurrent_depth->[$bb] - $rdepth_array->[$aa][$bb][$cd_aa];
6842
6843							# don't whine too many times
6844	0					0	my $saw_brace_error = $self->get_saw_brace_error();
6845	0	0	0			0	if (
			0
6846							$saw_brace_error <= MAX_NAG_MESSAGES
6847
6848							# if too many closing types have occurred, we probably
6849							# already caught this error
6850							&& ( ( $diff > 0 ) \|\| ( $saw_brace_error <= 0 ) )
6851							)
6852							{
6853	0					0	$self->interrupt_logfile();
6854	0					0	my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa];
6855	0					0	my $sl = $rsl->[0];
6856	0					0	my $rel = [ $input_line_number, $input_line, $pos ];
6857	0					0	my $el = $rel->[0];
6858	0					0	my ($ess);
6859
6860	0	0	0			0	if ( $diff == 1 \|\| $diff == -1 ) {
6861	0					0	$ess = EMPTY_STRING;
6862							}
6863							else {
6864	0					0	$ess = 's';
6865							}
6866	0	0				0	my $bname =
6867							( $diff > 0 )
6868							? $opening_brace_names[$bb]
6869							: $closing_brace_names[$bb];
6870	0					0	$self->write_error_indicator_pair( @{$rsl}, '^' );
	0					0
6871	0					0	my $msg = <<"EOM";
6872							Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
6873							EOM
6874
6875	0	0				0	if ( $diff > 0 ) {
6876	0					0	my $rml =
6877							$rstarting_line_of_current_depth->[$bb]
6878							[ $rcurrent_depth->[$bb] ];
6879	0					0	my $ml = $rml->[0];
6880	0					0	$msg .=
6881							" The most recent un-matched $bname is on line $ml\n";
6882	0					0	$self->write_error_indicator_pair( @{$rml}, '^' );
	0					0
6883							}
6884	0					0	$self->write_error_indicator_pair( @{$rel}, '^' );
	0					0
6885	0					0	$self->warning($msg);
6886	0					0	$self->resume_logfile();
6887							}
6888	0					0	$self->increment_brace_error();
6889							}
6890							}
6891	4574					8439	$rcurrent_depth->[$aa]--;
6892							}
6893							else {
6894
6895	0					0	my $saw_brace_error = $self->get_saw_brace_error();
6896	0	0				0	if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
6897	0					0	my $msg = <<"EOM";
6898							There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
6899							EOM
6900	0					0	$self->indicate_error( $msg, $input_line_number, $input_line, $pos,
6901							'^' );
6902							}
6903	0					0	$self->increment_brace_error();
6904
6905							# keep track of errors in braces alone (ignoring ternary nesting errors)
6906	0	0				0	$self->[_true_brace_error_count_]++
6907							if ( $closing_brace_names[$aa] ne "':'" );
6908							}
6909	4574					12458	return ( $seqno, $outdent );
6910							} ## end sub decrease_nesting_depth
6911
6912							sub check_final_nesting_depths {
6913
6914							# USES GLOBAL VARIABLES: $rcurrent_depth, $rstarting_line_of_current_depth
6915	561			561	0	1456	my $self = shift;
6916
6917	561					2504	for my $aa ( 0 .. @closing_brace_names - 1 ) {
6918
6919	2244					3897	my $cd_aa = $rcurrent_depth->[$aa];
6920	2244	50				5206	if ($cd_aa) {
6921	0					0	my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa];
6922	0					0	my $sl = $rsl->[0];
6923	0					0	my $msg = <<"EOM";
6924							Final nesting depth of $opening_brace_names[$aa]s is $cd_aa
6925							The most recent un-matched $opening_brace_names[$aa] is on line $sl
6926							EOM
6927	0					0	$self->indicate_error( $msg, @{$rsl}, '^' );
	0					0
6928	0					0	$self->increment_brace_error();
6929							}
6930							}
6931	561					1613	return;
6932							} ## end sub check_final_nesting_depths
6933
6934							#######################################################################
6935							# Tokenizer routines for looking ahead in input stream
6936							#######################################################################
6937
6938							sub peek_ahead_for_n_nonblank_pre_tokens {
6939
6940							# returns next n pretokens if they exist
6941							# returns undef's if hits eof without seeing any pretokens
6942							# USES GLOBAL VARIABLES: (none)
6943	170			170	0	430	my ( $self, $max_pretokens ) = @_;
6944	170					321	my $line;
6945	170					310	my $i = 0;
6946	170					405	my ( $rpre_tokens, $rmap, $rpre_types );
6947
6948	170					562	while ( $line = $self->peek_ahead( $i++ ) ) {
6949	182					990	$line =~ s/^\s*//; # trim leading blanks
6950	182	100				705	next if ( length($line) <= 0 ); # skip blank
6951	176	100				642	next if ( $line =~ /^#/ ); # skip comment
6952	162					514	( $rpre_tokens, $rmap, $rpre_types ) =
6953							pre_tokenize( $line, $max_pretokens );
6954	162					469	last;
6955							}
6956	170					620	return ( $rpre_tokens, $rpre_types );
6957							} ## end sub peek_ahead_for_n_nonblank_pre_tokens
6958
6959							# look ahead for next non-blank, non-comment line of code
6960							sub peek_ahead_for_nonblank_token {
6961
6962							# USES GLOBAL VARIABLES: (none)
6963	125			125	0	386	my ( $self, $rtokens, $max_token_index ) = @_;
6964	125					246	my $line;
6965	125					262	my $i = 0;
6966
6967	125					511	while ( $line = $self->peek_ahead( $i++ ) ) {
6968	169					995	$line =~ s/^\s*//; # trim leading blanks
6969	169	100				660	next if ( length($line) <= 0 ); # skip blank
6970	144	100				682	next if ( $line =~ /^#/ ); # skip comment
6971
6972							# Updated from 2 to 3 to get trigraphs, added for case b1175
6973	123					466	my ( $rtok, $rmap, $rtype ) = pre_tokenize( $line, 3 );
6974	123					429	my $j = $max_token_index + 1;
6975
6976	123					297	foreach my $tok ( @{$rtok} ) {
	123					364
6977	355	100				1398	last if ( $tok =~ "\n" );
6978	320					835	$rtokens->[ ++$j ] = $tok;
6979							}
6980	123					491	last;
6981							}
6982	125					360	return;
6983							} ## end sub peek_ahead_for_nonblank_token
6984
6985							#######################################################################
6986							# Tokenizer guessing routines for ambiguous situations
6987							#######################################################################
6988
6989							sub guess_if_pattern_or_conditional {
6990
6991							# this routine is called when we have encountered a ? following an
6992							# unknown bareword, and we must decide if it starts a pattern or not
6993							# input parameters:
6994							# $i - token index of the ? starting possible pattern
6995							# output parameters:
6996							# $is_pattern = 0 if probably not pattern, =1 if probably a pattern
6997							# msg = a warning or diagnostic message
6998							# USES GLOBAL VARIABLES: $last_nonblank_token
6999
7000	11			11	0	51	my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
7001	11					31	my $is_pattern = 0;
7002	11					55	my $msg = "guessing that ? after $last_nonblank_token starts a ";
7003
7004	11	50				46	if ( $i >= $max_token_index ) {
7005	0					0	$msg .= "conditional (no end to pattern found on the line)\n";
7006							}
7007							else {
7008	11					29	my $ibeg = $i;
7009	11					29	$i = $ibeg + 1;
7010	11					33	my $next_token = $rtokens->[$i]; # first token after ?
7011
7012							# look for a possible ending ? on this line..
7013	11					27	my $in_quote = 1;
7014	11					23	my $quote_depth = 0;
7015	11					23	my $quote_character = EMPTY_STRING;
7016	11					25	my $quote_pos = 0;
7017	11					25	my $quoted_string;
7018							(
7019
7020	11					57	$i,
7021							$in_quote,
7022							$quote_character,
7023							$quote_pos,
7024							$quote_depth,
7025							$quoted_string,
7026
7027							) = $self->follow_quoted_string(
7028
7029							$ibeg,
7030							$in_quote,
7031							$rtokens,
7032							$quote_character,
7033							$quote_pos,
7034							$quote_depth,
7035							$max_token_index,
7036
7037							);
7038
7039	11	50				71	if ($in_quote) {
7040
7041							# we didn't find an ending ? on this line,
7042							# so we bias towards conditional
7043	11					32	$is_pattern = 0;
7044	11					38	$msg .= "conditional (no ending ? on this line)\n";
7045
7046							# we found an ending ?, so we bias towards a pattern
7047							}
7048							else {
7049
7050							# Watch out for an ending ? in quotes, like this
7051							# my $case_flag = File::Spec->case_tolerant ? '(?i)' : '';
7052	0					0	my $s_quote = 0;
7053	0					0	my $d_quote = 0;
7054	0					0	my $colons = 0;
7055	0					0	foreach my $ii ( $ibeg + 1 .. $i - 1 ) {
7056	0					0	my $tok = $rtokens->[$ii];
7057	0	0				0	if ( $tok eq ":" ) { $colons++ }
	0					0
7058	0	0				0	if ( $tok eq "'" ) { $s_quote++ }
	0					0
7059	0	0				0	if ( $tok eq '"' ) { $d_quote++ }
	0					0
7060							}
7061	0	0	0			0	if ( $s_quote % 2 \|\| $d_quote % 2 \|\| $colons ) {
		0	0
7062	0					0	$is_pattern = 0;
7063	0					0	$msg .= "found ending ? but unbalanced quote chars\n";
7064							}
7065							elsif (
7066							$self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 )
7067							{
7068	0					0	$is_pattern = 1;
7069	0					0	$msg .= "pattern (found ending ? and pattern expected)\n";
7070							}
7071							else {
7072	0					0	$msg .= "pattern (uncertain, but found ending ?)\n";
7073							}
7074							}
7075							}
7076	11					41	return ( $is_pattern, $msg );
7077							} ## end sub guess_if_pattern_or_conditional
7078
7079							my %is_known_constant;
7080							my %is_known_function;
7081
7082							BEGIN {
7083
7084							# Constants like 'pi' in Trig.pm are common
7085	39			39		266	my @q = qw(pi pi2 pi4 pip2 pip4);
7086	39					265	@{is_known_constant}{@q} = (1) x scalar(@q);
7087
7088							# parenless calls of 'ok' are common
7089	39					140	@q = qw( ok );
7090	39					70432	@{is_known_function}{@q} = (1) x scalar(@q);
7091							} ## end BEGIN
7092
7093							sub guess_if_pattern_or_division {
7094
7095							# this routine is called when we have encountered a / following an
7096							# unknown bareword, and we must decide if it starts a pattern or is a
7097							# division
7098							# input parameters:
7099							# $i - token index of the / starting possible pattern
7100							# output parameters:
7101							# $is_pattern = 0 if probably division, =1 if probably a pattern
7102							# msg = a warning or diagnostic message
7103							# USES GLOBAL VARIABLES: $last_nonblank_token
7104	0			0	0	0	my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
7105	0					0	my $is_pattern = 0;
7106	0					0	my $msg = "guessing that / after $last_nonblank_token starts a ";
7107
7108	0	0				0	if ( $i >= $max_token_index ) {
7109	0					0	$msg .= "division (no end to pattern found on the line)\n";
7110							}
7111							else {
7112	0					0	my $ibeg = $i;
7113	0					0	my $divide_possible =
7114							$self->is_possible_numerator( $i, $rtokens, $max_token_index );
7115
7116	0	0				0	if ( $divide_possible < 0 ) {
7117	0					0	$msg = "pattern (division not possible here)\n";
7118	0					0	$is_pattern = 1;
7119	0					0	return ( $is_pattern, $msg );
7120							}
7121
7122	0					0	$i = $ibeg + 1;
7123	0					0	my $next_token = $rtokens->[$i]; # first token after slash
7124
7125							# One of the things we can look at is the spacing around the slash.
7126							# There # are four possible spacings around the first slash:
7127							#
7128							# return pi/two;#/; -/-
7129							# return pi/ two;#/; -/+
7130							# return pi / two;#/; +/+
7131							# return pi /two;#/; +/- <-- possible pattern
7132							#
7133							# Spacing rule: a space before the slash but not after the slash
7134							# usually indicates a pattern. We can use this to break ties.
7135
7136	0		0			0	my $is_pattern_by_spacing =
7137							( $i > 1 && $next_token !~ m/^\s/ && $rtokens->[ $i - 2 ] =~ m/^\s/ );
7138
7139							# look for a possible ending / on this line..
7140	0					0	my $in_quote = 1;
7141	0					0	my $quote_depth = 0;
7142	0					0	my $quote_character = EMPTY_STRING;
7143	0					0	my $quote_pos = 0;
7144	0					0	my $quoted_string;
7145							(
7146	0					0	$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
7147							$quoted_string
7148							)
7149							= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens,
7150							$quote_character, $quote_pos, $quote_depth, $max_token_index );
7151
7152	0	0				0	if ($in_quote) {
7153
7154							# we didn't find an ending / on this line, so we bias towards
7155							# division
7156	0	0				0	if ( $divide_possible >= 0 ) {
7157	0					0	$is_pattern = 0;
7158	0					0	$msg .= "division (no ending / on this line)\n";
7159							}
7160							else {
7161
7162							# assuming a multi-line pattern ... this is risky, but division
7163							# does not seem possible. If this fails, it would either be due
7164							# to a syntax error in the code, or the division_expected logic
7165							# needs to be fixed.
7166	0					0	$msg = "multi-line pattern (division not possible)\n";
7167	0					0	$is_pattern = 1;
7168							}
7169							}
7170
7171							# we found an ending /, so we bias slightly towards a pattern
7172							else {
7173
7174	0					0	my $pattern_expected =
7175							$self->pattern_expected( $i, $rtokens, $max_token_index );
7176
7177	0	0				0	if ( $pattern_expected >= 0 ) {
7178
7179							# pattern looks possible...
7180	0	0				0	if ( $divide_possible >= 0 ) {
7181
7182							# Both pattern and divide can work here...
7183
7184							# Increase weight of divide if a pure number follows
7185	0					0	$divide_possible += $next_token =~ /^\d+$/;
7186
7187							# Check for known constants in the numerator, like 'pi'
7188	0	0				0	if ( $is_known_constant{$last_nonblank_token} ) {
		0
		0
		0
7189	0					0	$msg .=
7190							"division (pattern works too but saw known constant '$last_nonblank_token')\n";
7191	0					0	$is_pattern = 0;
7192							}
7193
7194							# A very common bare word in pattern expressions is 'ok'
7195							elsif ( $is_known_function{$last_nonblank_token} ) {
7196	0					0	$msg .=
7197							"pattern (division works too but saw '$last_nonblank_token')\n";
7198	0					0	$is_pattern = 1;
7199							}
7200
7201							# If one rule is more definite, use it
7202							elsif ( $divide_possible > $pattern_expected ) {
7203	0					0	$msg .=
7204							"division (more likely based on following tokens)\n";
7205	0					0	$is_pattern = 0;
7206							}
7207
7208							# otherwise, use the spacing rule
7209							elsif ($is_pattern_by_spacing) {
7210	0					0	$msg .=
7211							"pattern (guess on spacing, but division possible too)\n";
7212	0					0	$is_pattern = 1;
7213							}
7214							else {
7215	0					0	$msg .=
7216							"division (guess on spacing, but pattern is possible too)\n";
7217	0					0	$is_pattern = 0;
7218							}
7219							}
7220
7221							# divide_possible < 0 means divide can not work here
7222							else {
7223	0					0	$is_pattern = 1;
7224	0					0	$msg .= "pattern (division not possible)\n";
7225							}
7226							}
7227
7228							# pattern does not look possible...
7229							else {
7230
7231	0	0				0	if ( $divide_possible >= 0 ) {
7232	0					0	$is_pattern = 0;
7233	0					0	$msg .= "division (pattern not possible)\n";
7234							}
7235
7236							# Neither pattern nor divide look possible...go by spacing
7237							else {
7238	0	0				0	if ($is_pattern_by_spacing) {
7239	0					0	$msg .= "pattern (guess on spacing)\n";
7240	0					0	$is_pattern = 1;
7241							}
7242							else {
7243	0					0	$msg .= "division (guess on spacing)\n";
7244	0					0	$is_pattern = 0;
7245							}
7246							}
7247							}
7248							}
7249							}
7250	0					0	return ( $is_pattern, $msg );
7251							} ## end sub guess_if_pattern_or_division
7252
7253							# try to resolve here-doc vs. shift by looking ahead for
7254							# non-code or the end token (currently only looks for end token)
7255							# returns 1 if it is probably a here doc, 0 if not
7256							sub guess_if_here_doc {
7257
7258	0			0	0	0	my ( $self, $next_token ) = @_;
7259
7260							# This is how many lines we will search for a target as part of the
7261							# guessing strategy. It is a constant because there is probably
7262							# little reason to change it.
7263							# USES GLOBAL VARIABLES: $current_package $ris_constant,
7264	0					0	my $HERE_DOC_WINDOW = 40;
7265
7266	0					0	my $here_doc_expected = 0;
7267	0					0	my $line;
7268	0					0	my $k = 0;
7269	0					0	my $msg = "checking <<";
7270
7271	0					0	while ( $line = $self->peek_ahead( $k++ ) ) {
7272	0					0	chomp $line;
7273
7274	0	0				0	if ( $line =~ /^$next_token$/ ) {
7275	0					0	$msg .= " -- found target $next_token ahead $k lines\n";
7276	0					0	$here_doc_expected = 1; # got it
7277	0					0	last;
7278							}
7279	0	0				0	last if ( $k >= $HERE_DOC_WINDOW );
7280							}
7281
7282	0	0				0	if ( !$here_doc_expected ) {
7283
7284	0	0				0	if ( !defined($line) ) {
7285	0					0	$here_doc_expected = -1; # hit eof without seeing target
7286	0					0	$msg .= " -- must be shift; target $next_token not in file\n";
7287
7288							}
7289							else { # still unsure..taking a wild guess
7290
7291	0	0				0	if ( !$ris_constant->{$current_package}{$next_token} ) {
7292	0					0	$here_doc_expected = 1;
7293	0					0	$msg .=
7294							" -- guessing it's a here-doc ($next_token not a constant)\n";
7295							}
7296							else {
7297	0					0	$msg .=
7298							" -- guessing it's a shift ($next_token is a constant)\n";
7299							}
7300							}
7301							}
7302	0					0	$self->write_logfile_entry($msg);
7303	0					0	return $here_doc_expected;
7304							} ## end sub guess_if_here_doc
7305
7306							#######################################################################
7307							# Tokenizer Routines for scanning identifiers and related items
7308							#######################################################################
7309
7310							sub scan_bare_identifier_do {
7311
7312							# this routine is called to scan a token starting with an alphanumeric
7313							# variable or package separator, :: or '.
7314							# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
7315							# $last_nonblank_type, $rparen_type, $paren_depth
7316
7317	1672			1672	0	4993	my ( $self, $input_line, $i, $tok, $type, $prototype, $rtoken_map,
7318							$max_token_index )
7319							= @_;
7320	1672					2747	my $i_begin = $i;
7321	1672					2911	my $package = undef;
7322
7323	1672					2599	my $i_beg = $i;
7324
7325							# we have to back up one pretoken at a :: since each : is one pretoken
7326	1672	100				4507	if ( $tok eq '::' ) { $i_beg-- }
	9					15
7327	1672	50				3941	if ( $tok eq '->' ) { $i_beg-- }
	0					0
7328	1672					3076	my $pos_beg = $rtoken_map->[$i_beg];
7329	1672					5048	pos($input_line) = $pos_beg;
7330
7331							# Examples:
7332							# A::B::C
7333							# A::
7334							# ::A
7335							# A'B
7336	1672	50				12109	if ( $input_line =~ m/\G\s((?:\w(?:'\|::)))*(?:(?:->)?(\w+))?/gc ) {
7337
7338	1672					3348	my $pos = pos($input_line);
7339	1672					2832	my $numc = $pos - $pos_beg;
7340	1672					3627	$tok = substr( $input_line, $pos_beg, $numc );
7341
7342							# type 'w' includes anything without leading type info
7343							# ($,%,@,*) including something like abc::def::ghi
7344	1672					2810	$type = 'w';
7345
7346	1672					2824	my $sub_name = EMPTY_STRING;
7347	1672	100				4796	if ( defined($2) ) { $sub_name = $2; }
	1667					3207
7348	1672	100				4253	if ( defined($1) ) {
7349	274					671	$package = $1;
7350
7351							# patch: don't allow isolated package name which just ends
7352							# in the old style package separator (single quote). Example:
7353							# use CGI':all';
7354	274	50	66			1093	if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
7355	0					0	$pos--;
7356							}
7357
7358	274					788	$package =~ s/\'/::/g;
7359	274	100				808	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	9					25
7360	274					1213	$package =~ s/::$//;
7361							}
7362							else {
7363	1398					2646	$package = $current_package;
7364
7365							# patched for c043, part 1: keyword does not follow '->'
7366	1398	50	66			5514	if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) {
7367	0					0	$type = 'k';
7368							}
7369							}
7370
7371							# if it is a bareword.. patched for c043, part 2: not following '->'
7372	1672	100	66			7677	if ( $type eq 'w' && $last_nonblank_type ne '->' ) {
7373
7374							# check for v-string with leading 'v' type character
7375							# (This seems to have precedence over filehandle, type 'Y')
7376	1003	100	66			14705	if ( $tok =~ /^v\d[_\d]*$/ ) {
		100	66
		100	66
		50
		50
		100
		100
7377
7378							# we only have the first part - something like 'v101' -
7379							# look for more
7380	2	50				17	if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
7381	2					8	$pos = pos($input_line);
7382	2					4	$numc = $pos - $pos_beg;
7383	2					6	$tok = substr( $input_line, $pos_beg, $numc );
7384							}
7385	2					6	$type = 'v';
7386
7387							# warn if this version can't handle v-strings
7388	2					12	$self->report_v_string($tok);
7389							}
7390
7391							elsif ( $ris_constant->{$package}{$sub_name} ) {
7392	12					52	$type = 'C';
7393							}
7394
7395							# bareword after sort has implied empty prototype; for example:
7396							# @sorted = sort numerically ( 53, 29, 11, 32, 7 );
7397							# This has priority over whatever the user has specified.
7398							elsif ($last_nonblank_token eq 'sort'
7399							&& $last_nonblank_type eq 'k' )
7400							{
7401	1					4	$type = 'Z';
7402							}
7403
7404							# Note: strangely, perl does not seem to really let you create
7405							# functions which act like eval and do, in the sense that eval
7406							# and do may have operators following the final }, but any operators
7407							# that you create with prototype (&) apparently do not allow
7408							# trailing operators, only terms. This seems strange.
7409							# If this ever changes, here is the update
7410							# to make perltidy behave accordingly:
7411
7412							# elsif ( $ris_block_function->{$package}{$tok} ) {
7413							# $tok='eval'; # patch to do braces like eval - doesn't work
7414							# $type = 'k';
7415							#}
7416							# TODO: This could become a separate type to allow for different
7417							# future behavior:
7418							elsif ( $ris_block_function->{$package}{$sub_name} ) {
7419	0					0	$type = 'G';
7420							}
7421							elsif ( $ris_block_list_function->{$package}{$sub_name} ) {
7422	0					0	$type = 'G';
7423							}
7424							elsif ( $ris_user_function->{$package}{$sub_name} ) {
7425	6					16	$type = 'U';
7426	6					18	$prototype = $ruser_function_prototype->{$package}{$sub_name};
7427							}
7428
7429							# check for indirect object
7430							elsif (
7431
7432							# added 2001-03-27: must not be followed immediately by '('
7433							# see fhandle.t
7434							( $input_line !~ m/\G\(/gc )
7435
7436							# and
7437							&& (
7438
7439							# preceded by keyword like 'print', 'printf' and friends
7440							$is_indirect_object_taker{$last_nonblank_token}
7441
7442							# or preceded by something like 'print(' or 'printf('
7443							\|\| (
7444							( $last_nonblank_token eq '(' )
7445							&& $is_indirect_object_taker{
7446							$rparen_type->[$paren_depth]
7447							}
7448
7449							)
7450							)
7451							)
7452							{
7453
7454							# may not be indirect object unless followed by a space;
7455							# updated 2021-01-16 to consider newline to be a space.
7456							# updated for case b990 to look for either ';' or space
7457	4	50	33			130	if ( pos($input_line) == length($input_line)
7458							\|\| $input_line =~ m/\G[;\s]/gc )
7459							{
7460	4					16	$type = 'Y';
7461
7462							# Abandon Hope ...
7463							# Perl's indirect object notation is a very bad
7464							# thing and can cause subtle bugs, especially for
7465							# beginning programmers. And I haven't even been
7466							# able to figure out a sane warning scheme which
7467							# doesn't get in the way of good scripts.
7468
7469							# Complain if a filehandle has any lower case
7470							# letters. This is suggested good practice.
7471							# Use 'sub_name' because something like
7472							# main::MYHANDLE is ok for filehandle
7473	4	100				22	if ( $sub_name =~ /[a-z]/ ) {
7474
7475							# could be bug caused by older perltidy if
7476							# followed by '('
7477	1	50				5	if ( $input_line =~ m/\G\s*\(/gc ) {
7478	1					7	$self->complain(
7479							"Caution: unknown word '$tok' in indirect object slot\n"
7480							);
7481							}
7482							}
7483							}
7484
7485							# bareword not followed by a space -- may not be filehandle
7486							# (may be function call defined in a 'use' statement)
7487							else {
7488	0					0	$type = 'Z';
7489							}
7490							}
7491
7492							# none of the above special types
7493							else {
7494							}
7495							}
7496
7497							# Now we must convert back from character position
7498							# to pre_token index.
7499							# I don't think an error flag can occur here ..but who knows
7500	1672					2924	my $error;
7501	1672					5133	( $i, $error ) =
7502							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
7503	1672	50				4632	if ($error) {
7504	0					0	$self->warning(
7505							"scan_bare_identifier: Possibly invalid tokenization\n");
7506							}
7507							}
7508
7509							# no match but line not blank - could be syntax error
7510							# perl will take '::' alone without complaint
7511							else {
7512	0					0	$type = 'w';
7513
7514							# change this warning to log message if it becomes annoying
7515	0					0	$self->warning("didn't find identifier after leading ::\n");
7516							}
7517	1672					6874	return ( $i, $tok, $type, $prototype );
7518							} ## end sub scan_bare_identifier_do
7519
7520							sub scan_id_do {
7521
7522							# This is the new scanner and will eventually replace scan_identifier.
7523							# Only type 'sub' and 'package' are implemented.
7524							# Token types $ * % @ & -> are not yet implemented.
7525							#
7526							# Scan identifier following a type token.
7527							# The type of call depends on $id_scan_state: $id_scan_state = ''
7528							# for starting call, in which case $tok must be the token defining
7529							# the type.
7530							#
7531							# If the type token is the last nonblank token on the line, a value
7532							# of $id_scan_state = $tok is returned, indicating that further
7533							# calls must be made to get the identifier. If the type token is
7534							# not the last nonblank token on the line, the identifier is
7535							# scanned and handled and a value of '' is returned.
7536
7537	331			331	0	1229	my ( $self, $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
7538							$max_token_index )
7539							= @_;
7540	39			39		476	use constant DEBUG_NSCAN => 0;
	39					93
	39					48307
7541	331					626	my $type = EMPTY_STRING;
7542	331					725	my ( $i_beg, $pos_beg );
7543
7544							#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
7545							#my ($a,$b,$c) = caller;
7546							#print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
7547
7548							# on re-entry, start scanning at first token on the line
7549	331	100				1002	if ($id_scan_state) {
7550	10					40	$i_beg = $i;
7551	10					26	$type = EMPTY_STRING;
7552							}
7553
7554							# on initial entry, start scanning just after type token
7555							else {
7556	321					606	$i_beg = $i + 1;
7557	321					599	$id_scan_state = $tok;
7558	321					653	$type = 't';
7559							}
7560
7561							# find $i_beg = index of next nonblank token,
7562							# and handle empty lines
7563	331					586	my $blank_line = 0;
7564	331					751	my $next_nonblank_token = $rtokens->[$i_beg];
7565	331	100				933	if ( $i_beg > $max_token_index ) {
7566	2					8	$blank_line = 1;
7567							}
7568							else {
7569
7570							# only a '#' immediately after a '$' is not a comment
7571	329	50				983	if ( $next_nonblank_token eq '#' ) {
7572	0	0				0	if ( $tok ne '$' ) {
7573	0					0	$blank_line = 1;
7574							}
7575							}
7576
7577	329	100				1505	if ( $next_nonblank_token =~ /^\s/ ) {
7578	309					1327	( $next_nonblank_token, $i_beg ) =
7579							find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
7580							$max_token_index );
7581	309	100				1728	if ( $next_nonblank_token =~ /(^#\|^\s*$)/ ) {
7582	4					12	$blank_line = 1;
7583							}
7584							}
7585							}
7586
7587							# handle non-blank line; identifier, if any, must follow
7588	331	100				1041	if ( !$blank_line ) {
7589
7590	325	100				1019	if ( $is_sub{$id_scan_state} ) {
		50
7591	299					3299	( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
7592							{
7593							input_line => $input_line,
7594							i => $i,
7595							i_beg => $i_beg,
7596							tok => $tok,
7597							type => $type,
7598							rtokens => $rtokens,
7599							rtoken_map => $rtoken_map,
7600							id_scan_state => $id_scan_state,
7601							max_token_index => $max_token_index,
7602							}
7603							);
7604							}
7605
7606							elsif ( $is_package{$id_scan_state} ) {
7607	26					102	( $i, $tok, $type ) =
7608							$self->do_scan_package( $input_line, $i, $i_beg, $tok, $type,
7609							$rtokens, $rtoken_map, $max_token_index );
7610	26					64	$id_scan_state = EMPTY_STRING;
7611							}
7612
7613							else {
7614	0					0	$self->warning("invalid token in scan_id: $tok\n");
7615	0					0	$id_scan_state = EMPTY_STRING;
7616							}
7617							}
7618
7619	331	50	33			1921	if ( $id_scan_state && ( !defined($type) \|\| !$type ) ) {
			66
7620
7621							# shouldn't happen:
7622	0					0	if (DEVEL_MODE) {
7623							$self->Fault(<<EOM);
7624							Program bug in scan_id: undefined type but scan_state=$id_scan_state
7625							EOM
7626							}
7627							$self->warning(
7628	0					0	"Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n"
7629							);
7630	0					0	$self->report_definite_bug();
7631							}
7632
7633	331					541	DEBUG_NSCAN && do {
7634							print {*STDOUT}
7635							"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
7636							};
7637	331					1390	return ( $i, $tok, $type, $id_scan_state );
7638							} ## end sub scan_id_do
7639
7640							sub check_prototype {
7641	137			137	0	419	my ( $proto, $package, $subname ) = @_;
7642	137	50				451	return if ( !defined($package) );
7643	137	50				442	return if ( !defined($subname) );
7644	137	100				393	if ( defined($proto) ) {
7645	34					157	$proto =~ s/^\s\(\s//;
7646	34					131	$proto =~ s/\s*\)$//;
7647	34	100				96	if ($proto) {
7648	5					20	$ris_user_function->{$package}{$subname} = 1;
7649	5					18	$ruser_function_prototype->{$package}{$subname} = "($proto)";
7650
7651							# prototypes containing '&' must be treated specially..
7652	5	100				36	if ( $proto =~ /\&/ ) {
7653
7654							# right curly braces of prototypes ending in
7655							# '&' may be followed by an operator
7656	1	50				22	if ( $proto =~ /\&$/ ) {
7657	0					0	$ris_block_function->{$package}{$subname} = 1;
7658							}
7659
7660							# right curly braces of prototypes NOT ending in
7661							# '&' may NOT be followed by an operator
7662							else {
7663	1					4	$ris_block_list_function->{$package}{$subname} = 1;
7664							}
7665							}
7666							}
7667							else {
7668	29					107	$ris_constant->{$package}{$subname} = 1;
7669							}
7670							}
7671							else {
7672	103					392	$ris_user_function->{$package}{$subname} = 1;
7673							}
7674	137					324	return;
7675							} ## end sub check_prototype
7676
7677							sub do_scan_package {
7678
7679							# do_scan_package parses a package name
7680							# it is called with $i_beg equal to the index of the first nonblank
7681							# token following a 'package' token.
7682							# USES GLOBAL VARIABLES: $current_package,
7683
7684							# package NAMESPACE
7685							# package NAMESPACE VERSION
7686							# package NAMESPACE BLOCK
7687							# package NAMESPACE VERSION BLOCK
7688							#
7689							# If VERSION is provided, package sets the $VERSION variable in the given
7690							# namespace to a version object with the VERSION provided. VERSION must be
7691							# a "strict" style version number as defined by the version module: a
7692							# positive decimal number (integer or decimal-fraction) without
7693							# exponentiation or else a dotted-decimal v-string with a leading 'v'
7694							# character and at least three components.
7695							# reference http://perldoc.perl.org/functions/package.html
7696
7697							my (
7698	26			26	0	83	$self, $input_line, $i,
7699							$i_beg, $tok, $type,
7700							$rtokens, $rtoken_map, $max_token_index
7701							) = @_;
7702	26					54	my $package = undef;
7703	26					51	my $pos_beg = $rtoken_map->[$i_beg];
7704	26					78	pos($input_line) = $pos_beg;
7705
7706							# handle non-blank line; package name, if any, must follow
7707	26	50				193	if ( $input_line =~ m/\G\s((?:\w(?:'\|::))\w)/gc ) {
7708	26					73	$package = $1;
7709	26	50	33			161	$package = ( defined($1) && $1 ) ? $1 : 'main';
7710	26					70	$package =~ s/\'/::/g;
7711	26	50				78	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	0					0
7712	26					55	$package =~ s/::$//;
7713	26					50	my $pos = pos($input_line);
7714	26					54	my $numc = $pos - $pos_beg;
7715	26					72	$tok = 'package ' . substr( $input_line, $pos_beg, $numc );
7716	26					45	$type = 'P'; # Fix for c250, previously 'i'
7717
7718							# Now we must convert back from character position
7719							# to pre_token index.
7720							# I don't think an error flag can occur here ..but ?
7721	26					47	my $error;
7722	26					95	( $i, $error ) =
7723							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
7724	26	50				76	if ($error) { $self->warning("Possibly invalid package\n") }
	0					0
7725	26					64	$current_package = $package;
7726
7727							# we should now have package NAMESPACE
7728							# now expecting VERSION, BLOCK, or ; to follow ...
7729							# package NAMESPACE VERSION
7730							# package NAMESPACE BLOCK
7731							# package NAMESPACE VERSION BLOCK
7732	26					85	my ( $next_nonblank_token, $i_next ) =
7733							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
7734
7735							# check that something recognizable follows, but do not parse.
7736							# A VERSION number will be parsed later as a number or v-string in the
7737							# normal way. What is important is to set the statement type if
7738							# everything looks okay so that the operator_expected() routine
7739							# knows that the number is in a package statement.
7740							# Examples of valid primitive tokens that might follow are:
7741							# 1235 . ; { } v3 v
7742							# FIX: added a '#' since a side comment may also follow
7743							# Added ':' for class attributes (for --use-feature=class, rt145706)
7744	26	50				111	if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#\:])\|v\d\|\d+$/ ) {
7745	26					66	$statement_type = $tok;
7746							}
7747							else {
7748	0					0	$self->warning(
7749							"Unexpected '$next_nonblank_token' after package name '$tok'\n"
7750							);
7751							}
7752							}
7753
7754							# no match but line not blank --
7755							# could be a label with name package, like package: , for example.
7756							else {
7757	0					0	$type = 'k';
7758							}
7759
7760	26					96	return ( $i, $tok, $type );
7761							} ## end sub do_scan_package
7762
7763							{ ## begin closure for sub scan_complex_identifier
7764
7765	39			39		399	use constant DEBUG_SCAN_ID => 0;
	39					91
	39					5113
7766
7767							# Constant hash:
7768							my %is_special_variable_char;
7769
7770							BEGIN {
7771
7772							# These are the only characters which can (currently) form special
7773							# variables, like $^W: (issue c066).
7774	39			39		422	my @q =
7775							qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
7776	39					146390	@{is_special_variable_char}{@q} = (1) x scalar(@q);
7777							} ## end BEGIN
7778
7779							# These are the possible states for this scanner:
7780							my $scan_state_SIGIL = '$';
7781							my $scan_state_ALPHA = 'A';
7782							my $scan_state_COLON = ':';
7783							my $scan_state_LPAREN = '(';
7784							my $scan_state_RPAREN = ')';
7785							my $scan_state_AMPERSAND = '&';
7786							my $scan_state_SPLIT = '^';
7787
7788							# Only these non-blank states may be returned to caller:
7789							my %is_returnable_scan_state = (
7790							$scan_state_SIGIL => 1,
7791							$scan_state_AMPERSAND => 1,
7792							);
7793
7794							# USES GLOBAL VARIABLES:
7795							# $context, $last_nonblank_token, $last_nonblank_type
7796
7797							#-----------
7798							# call args:
7799							#-----------
7800							my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
7801							$expecting, $container_type );
7802
7803							#-------------------------------------------
7804							# my variables, re-initialized on each call:
7805							#-------------------------------------------
7806							my $i_begin; # starting index $i
7807							my $type; # returned identifier type
7808							my $tok_begin; # starting token
7809							my $tok; # returned token
7810							my $id_scan_state_begin; # starting scan state
7811							my $identifier_begin; # starting identifier
7812							my $i_save; # a last good index, in case of error
7813							my $message; # hold error message for log file
7814							my $tok_is_blank;
7815							my $last_tok_is_blank;
7816							my $in_prototype_or_signature;
7817							my $saw_alpha;
7818							my $saw_type;
7819							my $allow_tick;
7820
7821							sub initialize_my_scan_id_vars {
7822
7823							# Initialize all 'my' vars on entry
7824	486			486	0	873	$i_begin = $i;
7825	486					1894	$type = EMPTY_STRING;
7826	486					922	$tok_begin = $rtokens->[$i_begin];
7827	486					1126	$tok = $tok_begin;
7828	486	50				1611	if ( $tok_begin eq ':' ) { $tok_begin = '::' }
	0					0
7829	486					849	$id_scan_state_begin = $id_scan_state;
7830	486					821	$identifier_begin = $identifier;
7831	486					831	$i_save = undef;
7832
7833	486					855	$message = EMPTY_STRING;
7834	486					822	$tok_is_blank = undef; # a flag to speed things up
7835	486					738	$last_tok_is_blank = undef;
7836
7837	486		100			1850	$in_prototype_or_signature =
7838							$container_type && $container_type =~ /^sub\b/;
7839
7840							# these flags will be used to help figure out the type:
7841	486					755	$saw_alpha = undef;
7842	486					751	$saw_type = undef;
7843
7844							# allow old package separator (') except in 'use' statement
7845	486					929	$allow_tick = ( $last_nonblank_token ne 'use' );
7846	486					824	return;
7847							} ## end sub initialize_my_scan_id_vars
7848
7849							#----------------------------------
7850							# Routines for handling scan states
7851							#----------------------------------
7852							sub do_id_scan_state_dollar {
7853
7854	514			514	0	882	my $self = shift;
7855
7856							# We saw a sigil, now looking to start a variable name
7857	514	100	66			4189	if ( $tok eq '$' ) {
		100	33
		100
		50
		50
		100
		100
		100
		100
7858
7859	31					93	$identifier .= $tok;
7860
7861							# we've got a punctuation variable if end of line (punct.t)
7862	31	50				162	if ( $i == $max_token_index ) {
7863	0					0	$type = 'i';
7864	0					0	$id_scan_state = EMPTY_STRING;
7865							}
7866							}
7867							elsif ( $tok =~ /^\w/ ) { # alphanumeric ..
7868	253					454	$saw_alpha = 1;
7869	253					517	$identifier .= $tok;
7870
7871							# now need :: except for special digit vars like '$1' (c208)
7872	253	100				853	$id_scan_state = $tok =~ /^\d/ ? EMPTY_STRING : $scan_state_COLON;
7873							}
7874							elsif ( $tok eq '::' ) {
7875	16					51	$id_scan_state = $scan_state_ALPHA;
7876	16					40	$identifier .= $tok;
7877							}
7878
7879							# POSTDEFREF ->@ ->% ->& ->*
7880							elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
7881	0					0	$identifier .= $tok;
7882							}
7883							elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
7884	0					0	$saw_alpha = 1;
7885	0					0	$id_scan_state = $scan_state_COLON; # now need ::
7886	0					0	$identifier .= $tok;
7887
7888							# Perl will accept leading digits in identifiers,
7889							# although they may not always produce useful results.
7890							# Something like $main::0 is ok. But this also works:
7891							#
7892							# sub howdy::123::bubba{ print "bubba $54321!\n" }
7893							# howdy::123::bubba();
7894							#
7895							}
7896							elsif ( $tok eq '#' ) {
7897
7898	99					259	my $is_punct_var = $identifier eq '$$';
7899
7900							# side comment or identifier?
7901	99	100	66			959	if (
			66
			66
			33
7902
7903							# A '#' starts a comment if it follows a space. For example,
7904							# the following is equivalent to $ans=40.
7905							# my $ #
7906							# ans = 40;
7907							!$last_tok_is_blank
7908
7909							# a # inside a prototype or signature can only start a
7910							# comment
7911							&& !$in_prototype_or_signature
7912
7913							# these are valid punctuation vars: *# %# @# $#
7914							# May also be '$#array' or POSTDEFREF ->$#
7915							&& ( $identifier =~ /^[\%\@\$\*]$/
7916							\|\| $identifier =~ /\$$/ )
7917
7918							# but a '#' after '$$' is a side comment; see c147
7919							&& !$is_punct_var
7920
7921							)
7922							{
7923	95					221	$identifier .= $tok; # keep same state, a $ could follow
7924							}
7925							else {
7926
7927							# otherwise it is a side comment
7928	4	50				20	if ( $identifier eq '->' ) { }
		50
		50
7929	0					0	elsif ($is_punct_var) { $type = 'i' }
7930	4					7	elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' }
7931	0					0	else { $type = 'i' }
7932	4					7	$i = $i_save;
7933	4					8	$id_scan_state = EMPTY_STRING;
7934							}
7935							}
7936
7937							elsif ( $tok eq '{' ) {
7938
7939							# check for something like ${#} or ${?}, where ? is a special char
7940	38	100	100			596	if (
			66
			100
			100
7941							(
7942							$identifier eq '$'
7943							\|\| $identifier eq '@'
7944							\|\| $identifier eq '$#'
7945							)
7946							&& $i + 2 <= $max_token_index
7947							&& $rtokens->[ $i + 2 ] eq '}'
7948							&& $rtokens->[ $i + 1 ] !~ /[\s\w]/
7949							)
7950							{
7951	1					4	my $next2 = $rtokens->[ $i + 2 ];
7952	1					6	my $next1 = $rtokens->[ $i + 1 ];
7953	1					6	$identifier .= $tok . $next1 . $next2;
7954	1					3	$i += 2;
7955	1					2	$id_scan_state = EMPTY_STRING;
7956							}
7957							else {
7958
7959							# skip something like ${xxx} or ->{
7960	37					86	$id_scan_state = EMPTY_STRING;
7961
7962							# if this is the first token of a line, any tokens for this
7963							# identifier have already been accumulated
7964	37	100	66			163	if ( $identifier eq '$' \|\| $i == 0 ) {
7965	26					49	$identifier = EMPTY_STRING;
7966							}
7967	37					77	$i = $i_save;
7968							}
7969							}
7970
7971							# space ok after leading $ % * & @
7972							elsif ( $tok =~ /^\s*$/ ) {
7973
7974	20					58	$tok_is_blank = 1;
7975
7976							# note: an id with a leading '&' does not actually come this way
7977	20	50				80	if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
		0
7978
7979	20	100				69	if ( length($identifier) > 1 ) {
7980	8					15	$id_scan_state = EMPTY_STRING;
7981	8					16	$i = $i_save;
7982	8					20	$type = 'i'; # probably punctuation variable
7983							}
7984							else {
7985
7986							# fix c139: trim line-ending type 't'
7987	12	100				60	if ( $i == $max_token_index ) {
		100
7988	1					3	$i = $i_save;
7989	1					3	$type = 't';
7990							}
7991
7992							# spaces after $'s are common, and space after @
7993							# is harmless, so only complain about space
7994							# after other type characters. Space after $ and
7995							# @ will be removed in formatting. Report space
7996							# after % and * because they might indicate a
7997							# parsing error. In other words '% ' might be a
7998							# modulo operator. Delete this warning if it
7999							# gets annoying.
8000							elsif ( $identifier !~ /^[\@\$]$/ ) {
8001	1					5	$message =
8002							"Space in identifier, following $identifier\n";
8003							}
8004							else {
8005							## ok: silently accept space after '$' and '@' sigils
8006							}
8007							}
8008							}
8009
8010							elsif ( $identifier eq '->' ) {
8011
8012							# space after '->' is ok except at line end ..
8013							# so trim line-ending in type '->' (fixes c139)
8014	0	0				0	if ( $i == $max_token_index ) {
8015	0					0	$i = $i_save;
8016	0					0	$type = '->';
8017							}
8018							}
8019
8020							# stop at space after something other than -> or sigil
8021							# Example of what can arrive here:
8022							# eval { $MyClass->$$ };
8023							else {
8024	0					0	$id_scan_state = EMPTY_STRING;
8025	0					0	$i = $i_save;
8026	0					0	$type = 'i';
8027							}
8028							}
8029							elsif ( $tok eq '^' ) {
8030
8031							# check for some special variables like $^ $^W
8032	11	50				50	if ( $identifier =~ /^[\$\*\@\%]$/ ) {
8033	11					32	$identifier .= $tok;
8034	11					23	$type = 'i';
8035
8036							# There may be one more character, not a space, after the ^
8037	11					31	my $next1 = $rtokens->[ $i + 1 ];
8038	11					29	my $chr = substr( $next1, 0, 1 );
8039	11	100				45	if ( $is_special_variable_char{$chr} ) {
8040
8041							# It is something like $^W
8042							# Test case (c066) : $^Oeq'linux'
8043	9					19	$i++;
8044	9					19	$identifier .= $next1;
8045
8046							# If pretoken $next1 is more than one character long,
8047							# set a flag indicating that it needs to be split.
8048	9	100				38	$id_scan_state =
8049							( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
8050							}
8051							else {
8052
8053							# it is just $^
8054							# Simple test case (c065): '$aa=$^if($bb)';
8055	2					4	$id_scan_state = EMPTY_STRING;
8056							}
8057							}
8058							else {
8059	0					0	$id_scan_state = EMPTY_STRING;
8060	0					0	$i = $i_save;
8061							}
8062							}
8063							else { # something else
8064
8065	46	100	66			418	if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) {
		100	66
		100
		50
		0
		0
8066
8067							# We might be in an extrusion of
8068							# sub foo2 ( $first, $, $third ) {
8069							# looking at a line starting with a comma, like
8070							# $
8071							# ,
8072							# in this case the comma ends the signature variable
8073							# '$' which will have been previously marked type 't'
8074							# rather than 'i'.
8075	3	100				12	if ( $i == $i_begin ) {
8076	1					4	$identifier = EMPTY_STRING;
8077	1					12	$type = EMPTY_STRING;
8078							}
8079
8080							# at a # we have to mark as type 't' because more may
8081							# follow, otherwise, in a signature we can let '$' be an
8082							# identifier here for better formatting.
8083							# See 'mangle4.in' for a test case.
8084							else {
8085	2					4	$type = 'i';
8086	2	50	33			14	if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) {
8087	0					0	$type = 't';
8088							}
8089	2					42	$i = $i_save;
8090							}
8091	3					8	$id_scan_state = EMPTY_STRING;
8092							}
8093
8094							# check for various punctuation variables
8095							elsif ( $identifier =~ /^[\$\*\@\%]$/ ) {
8096	35					110	$identifier .= $tok;
8097							}
8098
8099							# POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
8100							elsif ($tok eq '*'
8101							&& $identifier =~ /\-\>([\@\%\$\*\&]\|\$\#)$/ )
8102							{
8103	6					14	$identifier .= $tok;
8104							}
8105
8106							elsif ( $identifier eq '$#' ) {
8107
8108	2	50				12	if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
	0	50				0
	0					0
8109
8110							# perl seems to allow just these: $#: $#- $#+
8111							elsif ( $tok =~ /^[\:\-\+]$/ ) {
8112	0					0	$type = 'i';
8113	0					0	$identifier .= $tok;
8114							}
8115							else {
8116	2					6	$i = $i_save;
8117	2					10	$self->write_logfile_entry(
8118							'Use of $# is deprecated' . "\n" );
8119							}
8120							}
8121							elsif ( $identifier eq '$$' ) {
8122
8123							# perl does not allow references to punctuation
8124							# variables without braces. For example, this
8125							# won't work:
8126							# $:=\4;
8127							# $a = $$:;
8128							# You would have to use
8129							# $a = ${$:};
8130
8131							# '$$' alone is punctuation variable for PID
8132	0					0	$i = $i_save;
8133	0	0				0	if ( $tok eq '{' ) { $type = 't' }
	0					0
8134	0					0	else { $type = 'i' }
8135							}
8136							elsif ( $identifier eq '->' ) {
8137	0					0	$i = $i_save;
8138							}
8139							else {
8140	0					0	$i = $i_save;
8141	0	0				0	if ( length($identifier) == 1 ) {
8142	0					0	$identifier = EMPTY_STRING;
8143							}
8144							}
8145	46					109	$id_scan_state = EMPTY_STRING;
8146							}
8147	514					910	return;
8148							} ## end sub do_id_scan_state_dollar
8149
8150							sub do_id_scan_state_alpha {
8151
8152	113			113	0	226	my $self = shift;
8153
8154							# looking for alphanumeric after ::
8155	113					365	$tok_is_blank = $tok =~ /^\s*$/;
8156
8157	113	100	33			472	if ( $tok =~ /^\w/ ) { # found it
		50	66
		50	33
		50
8158	100					199	$identifier .= $tok;
8159	100					201	$id_scan_state = $scan_state_COLON; # now need ::
8160	100					165	$saw_alpha = 1;
8161							}
8162							elsif ( $tok eq "'" && $allow_tick ) {
8163	0					0	$identifier .= $tok;
8164	0					0	$id_scan_state = $scan_state_COLON; # now need ::
8165	0					0	$saw_alpha = 1;
8166							}
8167							elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
8168	0					0	$id_scan_state = $scan_state_LPAREN;
8169	0					0	$identifier .= $tok;
8170							}
8171							elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
8172	0					0	$id_scan_state = $scan_state_RPAREN;
8173	0					0	$identifier .= $tok;
8174							}
8175							else {
8176	13					22	$id_scan_state = EMPTY_STRING;
8177	13					25	$i = $i_save;
8178							}
8179	113					220	return;
8180							} ## end sub do_id_scan_state_alpha
8181
8182							sub do_id_scan_state_colon {
8183
8184	434			434	0	792	my $self = shift;
8185
8186							# looking for possible :: after alphanumeric
8187
8188	434					1508	$tok_is_blank = $tok =~ /^\s*$/;
8189
8190	434	100	66			3383	if ( $tok eq '::' ) { # got it
		100	66
		100	66
		50
		50
8191	97					173	$identifier .= $tok;
8192	97					196	$id_scan_state = $scan_state_ALPHA; # now require alpha
8193							}
8194							elsif ( $tok =~ /^\w/ ) { # more alphanumeric is ok here
8195	20					53	$identifier .= $tok;
8196	20					62	$id_scan_state = $scan_state_COLON; # now need ::
8197	20					38	$saw_alpha = 1;
8198							}
8199							elsif ( $tok eq "'" && $allow_tick ) { # tick
8200
8201	12	50				37	if ( $is_keyword{$identifier} ) {
8202	0					0	$id_scan_state = EMPTY_STRING; # that's all
8203	0					0	$i = $i_save;
8204							}
8205							else {
8206	12					22	$identifier .= $tok;
8207							}
8208							}
8209							elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
8210	0					0	$id_scan_state = $scan_state_LPAREN;
8211	0					0	$identifier .= $tok;
8212							}
8213							elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
8214	0					0	$id_scan_state = $scan_state_RPAREN;
8215	0					0	$identifier .= $tok;
8216							}
8217							else {
8218	305					579	$id_scan_state = EMPTY_STRING; # that's all
8219	305					526	$i = $i_save;
8220							}
8221	434					687	return;
8222							} ## end sub do_id_scan_state_colon
8223
8224							sub do_id_scan_state_left_paren {
8225
8226	0			0	0	0	my $self = shift;
8227
8228							# looking for possible '(' of a prototype
8229
8230	0	0				0	if ( $tok eq '(' ) { # got it
		0
8231	0					0	$identifier .= $tok;
8232	0					0	$id_scan_state = $scan_state_RPAREN; # now find the end of it
8233							}
8234							elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
8235	0					0	$identifier .= $tok;
8236	0					0	$tok_is_blank = 1;
8237							}
8238							else {
8239	0					0	$id_scan_state = EMPTY_STRING; # that's all - no prototype
8240	0					0	$i = $i_save;
8241							}
8242	0					0	return;
8243							} ## end sub do_id_scan_state_left_paren
8244
8245							sub do_id_scan_state_right_paren {
8246
8247	0			0	0	0	my $self = shift;
8248
8249							# looking for a ')' of prototype to close a '('
8250
8251	0					0	$tok_is_blank = $tok =~ /^\s*$/;
8252
8253	0	0				0	if ( $tok eq ')' ) { # got it
		0
8254	0					0	$identifier .= $tok;
8255	0					0	$id_scan_state = EMPTY_STRING; # all done
8256							}
8257							elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
8258	0					0	$identifier .= $tok;
8259							}
8260							else { # probable error in script, but keep going
8261	0					0	warning("Unexpected '$tok' while seeking end of prototype\n");
8262	0					0	$identifier .= $tok;
8263							}
8264	0					0	return;
8265							} ## end sub do_id_scan_state_right_paren
8266
8267							sub do_id_scan_state_ampersand {
8268
8269	105			105	0	267	my $self = shift;
8270
8271							# Starting sub call after seeing an '&'
8272
8273	105	100	33			705	if ( $tok =~ /^[\$\w]/ ) { # alphanumeric ..
		50
		100
		50
		50
		0
8274	88					209	$id_scan_state = $scan_state_COLON; # now need ::
8275	88					164	$saw_alpha = 1;
8276	88					200	$identifier .= $tok;
8277							}
8278							elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
8279	0					0	$id_scan_state = $scan_state_COLON; # now need ::
8280	0					0	$saw_alpha = 1;
8281	0					0	$identifier .= $tok;
8282							}
8283							elsif ( $tok =~ /^\s*$/ ) { # allow space
8284	2					5	$tok_is_blank = 1;
8285
8286							# fix c139: trim line-ending type 't'
8287	2	50	33			14	if ( length($identifier) == 1 && $i == $max_token_index ) {
8288	2					3	$i = $i_save;
8289	2					7	$type = 't';
8290							}
8291							}
8292							elsif ( $tok eq '::' ) { # leading ::
8293	0					0	$id_scan_state = $scan_state_ALPHA; # accept alpha next
8294	0					0	$identifier .= $tok;
8295							}
8296							elsif ( $tok eq '{' ) {
8297	15	50	33			74	if ( $identifier eq '&' \|\| $i == 0 ) {
8298	15					33	$identifier = EMPTY_STRING;
8299							}
8300	15					39	$i = $i_save;
8301	15					28	$id_scan_state = EMPTY_STRING;
8302							}
8303							elsif ( $tok eq '^' ) {
8304	0	0				0	if ( $identifier eq '&' ) {
8305
8306							# Special variable (c066)
8307	0					0	$identifier .= $tok;
8308	0					0	$type = '&';
8309
8310							# There may be one more character, not a space, after the ^
8311	0					0	my $next1 = $rtokens->[ $i + 1 ];
8312	0					0	my $chr = substr( $next1, 0, 1 );
8313	0	0				0	if ( $is_special_variable_char{$chr} ) {
8314
8315							# It is something like &^O
8316	0					0	$i++;
8317	0					0	$identifier .= $next1;
8318
8319							# If pretoken $next1 is more than one character long,
8320							# set a flag indicating that it needs to be split.
8321	0	0				0	$id_scan_state =
8322							( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
8323							}
8324							else {
8325
8326							# it is &^
8327	0					0	$id_scan_state = EMPTY_STRING;
8328							}
8329							}
8330							else {
8331	0					0	$identifier = EMPTY_STRING;
8332	0					0	$i = $i_save;
8333							}
8334							}
8335							else {
8336
8337							# punctuation variable?
8338							# testfile: cunningham4.pl
8339							#
8340							# We have to be careful here. If we are in an unknown state,
8341							# we will reject the punctuation variable. In the following
8342							# example the '&' is a binary operator but we are in an unknown
8343							# state because there is no sigil on 'Prima', so we don't
8344							# know what it is. But it is a bad guess that
8345							# '&~' is a function variable.
8346							# $self->{text}->{colorMap}->[
8347							# Prima::PodView::COLOR_CODE_FOREGROUND
8348							# & ~tb::COLOR_INDEX ] =
8349							# $sec->{ColorCode}
8350
8351							# Fix for case c033: a '#' here starts a side comment
8352	0	0	0			0	if ( $identifier eq '&' && $expecting && $tok ne '#' ) {
			0
8353	0					0	$identifier .= $tok;
8354							}
8355							else {
8356	0					0	$identifier = EMPTY_STRING;
8357	0					0	$i = $i_save;
8358	0					0	$type = '&';
8359							}
8360	0					0	$id_scan_state = EMPTY_STRING;
8361							}
8362	105					197	return;
8363							} ## end sub do_id_scan_state_ampersand
8364
8365							#-------------------
8366							# hash of scanner subs
8367							#-------------------
8368							my $scan_identifier_code = {
8369							$scan_state_SIGIL => \&do_id_scan_state_dollar,
8370							$scan_state_ALPHA => \&do_id_scan_state_alpha,
8371							$scan_state_COLON => \&do_id_scan_state_colon,
8372							$scan_state_LPAREN => \&do_id_scan_state_left_paren,
8373							$scan_state_RPAREN => \&do_id_scan_state_right_paren,
8374							$scan_state_AMPERSAND => \&do_id_scan_state_ampersand,
8375							};
8376
8377							sub scan_complex_identifier {
8378
8379							# This routine assembles tokens into identifiers. It maintains a
8380							# scan state, id_scan_state. It updates id_scan_state based upon
8381							# current id_scan_state and token, and returns an updated
8382							# id_scan_state and the next index after the identifier.
8383
8384							# This routine now serves a a backup for sub scan_simple_identifier
8385							# which handles most identifiers.
8386
8387							# Note that $self must be a 'my' variable and not be a closure
8388							# variables like the other args. Otherwise it will not get
8389							# deleted by a DESTROY call at the end of a file. Then an
8390							# attempt to create multiple tokenizers can occur when multiple
8391							# files are processed, causing an error.
8392
8393							(
8394	486			486	0	1753	my $self, $i, $id_scan_state, $identifier, $rtokens,
8395							$max_token_index, $expecting, $container_type
8396							) = @_;
8397
8398							# return flag telling caller to split the pretoken
8399	486					2534	my $split_pretoken_flag;
8400
8401							#-------------------
8402							# Initialize my vars
8403							#-------------------
8404
8405	486					1742	initialize_my_scan_id_vars();
8406
8407							#--------------------------------------------------------
8408							# get started by defining a type and a state if necessary
8409							#--------------------------------------------------------
8410
8411	486	100				1177	if ( !$id_scan_state ) {
8412	479					776	$context = UNKNOWN_CONTEXT;
8413
8414							# fixup for digraph
8415	479	50				1314	if ( $tok eq '>' ) {
8416	0					0	$tok = '->';
8417	0					0	$tok_begin = $tok;
8418							}
8419	479					826	$identifier = $tok;
8420
8421	479	100	100			3072	if ( $last_nonblank_token eq '->' ) {
		100	100
		100	0
		50
		0
		0
		0
		0
8422	6					15	$identifier = '->' . $identifier;
8423	6					13	$id_scan_state = $scan_state_SIGIL;
8424							}
8425							elsif ( $tok eq '$' \|\| $tok eq '*' ) {
8426	293					535	$id_scan_state = $scan_state_SIGIL;
8427	293					571	$context = SCALAR_CONTEXT;
8428							}
8429							elsif ( $tok eq '%' \|\| $tok eq '@' ) {
8430	77					186	$id_scan_state = $scan_state_SIGIL;
8431	77					159	$context = LIST_CONTEXT;
8432							}
8433							elsif ( $tok eq '&' ) {
8434	103					241	$id_scan_state = $scan_state_AMPERSAND;
8435							}
8436							elsif ( $tok eq 'sub' or $tok eq 'package' ) {
8437	0					0	$saw_alpha = 0; # 'sub' is considered type info here
8438	0					0	$id_scan_state = $scan_state_SIGIL;
8439	0					0	$identifier .=
8440							SPACE; # need a space to separate sub from sub name
8441							}
8442							elsif ( $tok eq '::' ) {
8443	0					0	$id_scan_state = $scan_state_ALPHA;
8444							}
8445							elsif ( $tok =~ /^\w/ ) {
8446	0					0	$id_scan_state = $scan_state_COLON;
8447	0					0	$saw_alpha = 1;
8448							}
8449							elsif ( $tok eq '->' ) {
8450	0					0	$id_scan_state = $scan_state_SIGIL;
8451							}
8452							else {
8453
8454							# shouldn't happen: bad call parameter
8455	0					0	my $msg =
8456							"Program bug detected: scan_complex_identifier received bad starting token = '$tok'\n";
8457	0					0	if (DEVEL_MODE) { $self->Fault($msg) }
8458	0	0				0	if ( !$self->[_in_error_] ) {
8459	0					0	warning($msg);
8460	0					0	$self->[_in_error_] = 1;
8461							}
8462	0					0	$id_scan_state = EMPTY_STRING;
8463
8464							# emergency return
8465	0					0	goto RETURN;
8466							}
8467	479					908	$saw_type = !$saw_alpha;
8468							}
8469							else {
8470	7					19	$i--;
8471	7					34	$saw_alpha = ( $tok =~ /^\w/ );
8472	7					20	$saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
8473
8474							# check for a valid starting state
8475	7					11	if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) {
8476							$self->Fault(<<EOM);
8477							Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state'
8478							EOM
8479							}
8480							}
8481
8482							#------------------------------
8483							# loop to gather the identifier
8484							#------------------------------
8485
8486	486					844	$i_save = $i;
8487
8488	486		100			2118	while ( $i < $max_token_index && $id_scan_state ) {
8489
8490							# Be sure we have code to handle this state before we proceed
8491	1169					2392	my $code = $scan_identifier_code->{$id_scan_state};
8492	1169	100				2391	if ( !$code ) {
8493
8494	3	50				12	if ( $id_scan_state eq $scan_state_SPLIT ) {
8495							## OK: this is the signal to exit and split the pretoken
8496							}
8497
8498							# unknown state - should not happen
8499							else {
8500	0					0	if (DEVEL_MODE) {
8501							$self->Fault(<<EOM);
8502							Unknown scan state in sub scan_complex_identifier: '$id_scan_state'
8503							Scan state at sub entry was '$id_scan_state_begin'
8504							EOM
8505							}
8506	0					0	$id_scan_state = EMPTY_STRING;
8507	0					0	$i = $i_save;
8508							}
8509	3					5	last;
8510							}
8511
8512							# Remember the starting index for progress check below
8513	1166					1627	my $i_start_loop = $i;
8514
8515	1166					1613	$last_tok_is_blank = $tok_is_blank;
8516	1166	100				1987	if ($tok_is_blank) { $tok_is_blank = undef }
	11					24
8517	1155					1579	else { $i_save = $i }
8518
8519	1166					1947	$tok = $rtokens->[ ++$i ];
8520
8521							# patch to make digraph :: if necessary
8522	1166	100	100			2942	if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
8523	113					244	$tok = '::';
8524	113					203	$i++;
8525							}
8526
8527	1166					3480	$code->($self);
8528
8529							# check for forward progress: a decrease in the index $i
8530							# implies that scanning has finished
8531	1166	100				3807	last if ( $i <= $i_start_loop );
8532
8533							} ## end of main loop
8534
8535							#-------------
8536							# Check result
8537							#-------------
8538
8539							# Be sure a valid state is returned
8540	486	100				1367	if ($id_scan_state) {
8541
8542	20	100				89	if ( !$is_returnable_scan_state{$id_scan_state} ) {
8543
8544	13	100				56	if ( $id_scan_state eq $scan_state_SPLIT ) {
8545	3					7	$split_pretoken_flag = 1;
8546							}
8547
8548	13	50				56	if ( $id_scan_state eq $scan_state_RPAREN ) {
8549	0					0	warning(
8550							"Hit end of line while seeking ) to end prototype\n");
8551							}
8552
8553	13					29	$id_scan_state = EMPTY_STRING;
8554							}
8555
8556							# Patch: the deprecated variable $# does not combine with anything
8557							# on the next line.
8558	20	50				73	if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING }
	0					0
8559							}
8560
8561							# Be sure the token index is valid
8562	486	50				1331	if ( $i < 0 ) { $i = 0 }
	0					0
8563
8564							# Be sure a token type is defined
8565	486	100				1246	if ( !$type ) {
8566
8567	458	100				1088	if ($saw_type) {
		100
8568
8569	452	100	33			2118	if ($saw_alpha) {
		50	66
		100	66
			33
8570
8571							# The type without the -> should be the same as with the -> so
8572							# that if they get separated we get the same bond strengths,
8573							# etc. See b1234
8574	348	50	33			1359	if ( $identifier =~ /^->/
			33
8575							&& $last_nonblank_type eq 'w'
8576							&& substr( $identifier, 2, 1 ) =~ /^\w/ )
8577							{
8578	0					0	$type = 'w';
8579							}
8580	348					746	else { $type = 'i' }
8581							}
8582							elsif ( $identifier eq '->' ) {
8583	0					0	$type = '->';
8584							}
8585							elsif (
8586							( length($identifier) > 1 )
8587
8588							# In something like '@$=' we have an identifier '@$'
8589							# In something like '$${' we have type '$$' (and only
8590							# part of an identifier)
8591							&& !( $identifier =~ /\$$/ && $tok eq '{' )
8592
8593							## && ( $identifier !~ /^(sub \|package )$/ )
8594							&& $identifier ne 'sub '
8595							&& $identifier ne 'package '
8596							)
8597							{
8598	53					187	$type = 'i';
8599							}
8600	51					104	else { $type = 't' }
8601							}
8602							elsif ($saw_alpha) {
8603
8604							# type 'w' includes anything without leading type info
8605							# ($,%,@,*) including something like abc::def::ghi
8606	5					9	$type = 'w';
8607
8608							# Fix for b1337, if restarting scan after line break between
8609							# '->' or sigil and identifier name, use type 'i'
8610	5	50	33			35	if ( $id_scan_state_begin
8611							&& $identifier =~ /^([\$\%\@\*\&]\|->)/ )
8612							{
8613	5					10	$type = 'i';
8614							}
8615							}
8616							else {
8617	1					3	$type = EMPTY_STRING;
8618							} # this can happen on a restart
8619							}
8620
8621							# See if we formed an identifier...
8622	486	100				1171	if ($identifier) {
8623	444					796	$tok = $identifier;
8624	444	100				1113	if ($message) { $self->write_logfile_entry($message) }
	1					8
8625							}
8626
8627							# did not find an identifier, back up
8628							else {
8629	42					90	$tok = $tok_begin;
8630	42					87	$i = $i_begin;
8631							}
8632
8633							RETURN:
8634
8635	486					746	DEBUG_SCAN_ID && do {
8636							my ( $a, $b, $c ) = caller;
8637							print {*STDOUT}
8638							"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
8639							print {*STDOUT}
8640							"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
8641							};
8642	486					2283	return ( $i, $tok, $type, $id_scan_state, $identifier,
8643							$split_pretoken_flag );
8644							} ## end sub scan_complex_identifier
8645							} ## end closure for sub scan_complex_identifier
8646
8647							{ ## closure for sub do_scan_sub
8648
8649							my %warn_if_lexical;
8650
8651							BEGIN {
8652
8653							# lexical subs with these names can cause parsing errors in this version
8654	39			39		324	my @q = qw( m q qq qr qw qx s tr y );
8655	39					3504	@{warn_if_lexical}{@q} = (1) x scalar(@q);
8656							} ## end BEGIN
8657
8658							# saved package and subnames in case prototype is on separate line
8659							my ( $package_saved, $subname_saved );
8660
8661							# initialize subname each time a new 'sub' keyword is encountered
8662							sub initialize_subname {
8663	295			295	0	664	$package_saved = EMPTY_STRING;
8664	295					597	$subname_saved = EMPTY_STRING;
8665	295					521	return;
8666							}
8667
8668							use constant {
8669	39					92987	SUB_CALL => 1,
8670							PAREN_CALL => 2,
8671							PROTOTYPE_CALL => 3,
8672	39			39		461	};
	39					128
8673
8674							sub do_scan_sub {
8675
8676							# do_scan_sub parses a sub name and prototype.
8677
8678							# At present there are three basic CALL TYPES which are
8679							# distinguished by the starting value of '$tok':
8680							# 1. $tok='sub', id_scan_state='sub'
8681							# it is called with $i_beg equal to the index of the first nonblank
8682							# token following a 'sub' token.
8683							# 2. $tok='(', id_scan_state='sub',
8684							# it is called with $i_beg equal to the index of a '(' which may
8685							# start a prototype.
8686							# 3. $tok='prototype', id_scan_state='prototype'
8687							# it is called with $i_beg equal to the index of a '(' which is
8688							# preceded by ': prototype' and has $id_scan_state eq 'prototype'
8689
8690							# Examples:
8691
8692							# A single type 1 call will get both the sub and prototype
8693							# sub foo1 ( $$ ) { }
8694							# ^
8695
8696							# The subname will be obtained with a 'sub' call
8697							# The prototype on line 2 will be obtained with a '(' call
8698							# sub foo1
8699							# ^ <---call type 1
8700							# ( $$ ) { }
8701							# ^ <---call type 2
8702
8703							# The subname will be obtained with a 'sub' call
8704							# The prototype will be obtained with a 'prototype' call
8705							# sub foo1 ( $x, $y ) : prototype ( $$ ) { }
8706							# ^ <---type 1 ^ <---type 3
8707
8708							# TODO: add future error checks to be sure we have a valid
8709							# sub name. For example, 'sub &doit' is wrong. Also, be sure
8710							# a name is given if and only if a non-anonymous sub is
8711							# appropriate.
8712							# USES GLOBAL VARS: $current_package, $last_nonblank_token,
8713							# $rsaw_function_definition,
8714							# $statement_type
8715
8716	301			301	0	848	my ( $self, $rinput_hash ) = @_;
8717
8718	301					708	my $input_line = $rinput_hash->{input_line};
8719	301					645	my $i = $rinput_hash->{i};
8720	301					577	my $i_beg = $rinput_hash->{i_beg};
8721	301					635	my $tok = $rinput_hash->{tok};
8722	301					631	my $type = $rinput_hash->{type};
8723	301					587	my $rtokens = $rinput_hash->{rtokens};
8724	301					548	my $rtoken_map = $rinput_hash->{rtoken_map};
8725	301					1567	my $id_scan_state = $rinput_hash->{id_scan_state};
8726	301					614	my $max_token_index = $rinput_hash->{max_token_index};
8727
8728	301					518	my $i_entry = $i;
8729
8730							# Determine the CALL TYPE
8731							# 1=sub
8732							# 2=(
8733							# 3=prototype
8734	301	100				1085	my $call_type =
		100
8735							$tok eq 'prototype' ? PROTOTYPE_CALL
8736							: $tok eq '(' ? PAREN_CALL
8737							: SUB_CALL;
8738
8739	301					530	$id_scan_state = EMPTY_STRING; # normally we get everything in one call
8740	301					584	my $subname = $subname_saved;
8741	301					535	my $package = $package_saved;
8742	301					570	my $proto = undef;
8743	301					522	my $attrs = undef;
8744	301					518	my $match;
8745
8746	301					603	my $pos_beg = $rtoken_map->[$i_beg];
8747	301					972	pos($input_line) = $pos_beg;
8748
8749							# Look for the sub NAME if this is a SUB call
8750	301	100	100			2785	if (
8751							$call_type == SUB_CALL
8752							&& $input_line =~ m{\G\s*
8753							((?:\w(?:'\|::))) # package - something that ends in :: or '
8754							(\w+) # NAME - required
8755							}gcx
8756							)
8757							{
8758	122					307	$match = 1;
8759	122					336	$subname = $2;
8760
8761	122		33			479	my $is_lexical_sub =
8762							$last_nonblank_type eq 'k' && $last_nonblank_token eq 'my';
8763	122	0	33			434	if ( $is_lexical_sub && $1 ) {
8764	0					0	$self->warning("'my' sub $subname cannot be in package '$1'\n");
8765	0					0	$is_lexical_sub = 0;
8766							}
8767
8768	122	50				362	if ($is_lexical_sub) {
8769
8770							# lexical subs use the block sequence number as a package name
8771	0					0	my $seqno =
8772							$rcurrent_sequence_number->[BRACE]
8773							[ $rcurrent_depth->[BRACE] ];
8774	0	0				0	$seqno = 1 if ( !defined($seqno) );
8775	0					0	$package = $seqno;
8776	0	0				0	if ( $warn_if_lexical{$subname} ) {
8777	0					0	$self->warning(
8778							"'my' sub '$subname' matches a builtin name and may not be handled correctly in this perltidy version.\n"
8779							);
8780
8781							# This may end badly, it is safest to block formatting
8782							# For an example, see perl527/lexsub.t (issue c203)
8783	0					0	$self->[_in_trouble_] = 1;
8784							}
8785							}
8786							else {
8787	122	100	66			825	$package = ( defined($1) && $1 ) ? $1 : $current_package;
8788	122					400	$package =~ s/\'/::/g;
8789	122	50				440	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	0					0
8790	122					317	$package =~ s/::$//;
8791							}
8792
8793	122					284	my $pos = pos($input_line);
8794	122					287	my $numc = $pos - $pos_beg;
8795	122					403	$tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
8796	122					300	$type = 'S'; ## Fix for c250, was 'i';
8797
8798							# remember the sub name in case another call is needed to
8799							# get the prototype
8800	122					251	$package_saved = $package;
8801	122					247	$subname_saved = $subname;
8802							}
8803
8804							# Now look for PROTO ATTRS for all call types
8805							# Look for prototype/attributes which are usually on the same
8806							# line as the sub name but which might be on a separate line.
8807							# For example, we might have an anonymous sub with attributes,
8808							# or a prototype on a separate line from its sub name
8809
8810							# NOTE: We only want to parse PROTOTYPES here. If we see anything that
8811							# does not look like a prototype, we assume it is a SIGNATURE and we
8812							# will stop and let the the standard tokenizer handle it. In
8813							# particular, we stop if we see any nested parens, braces, or commas.
8814							# Also note, a valid prototype cannot contain any alphabetic character
8815							# -- see https://perldoc.perl.org/perlsub
8816							# But it appears that an underscore is valid in a prototype, so the
8817							# regex below uses [A-Za-z] rather than \w
8818							# This is the old regex which has been replaced:
8819							# $input_line =~ m/\G(\s$[^$$\}\{\,#]$)? # PROTO
8820	301					1109	my $saw_opening_paren = $input_line =~ /\G\s*\(/;
8821	301	100	100			2946	if (
			66
8822							$input_line =~ m{\G(\s$[^$$\}\{\,#A-Za-z]$)? # PROTO
8823							(\s*:)? # ATTRS leading ':'
8824							}gcx
8825							&& ( $1 \|\| $2 )
8826							)
8827							{
8828	45					117	$proto = $1;
8829	45					89	$attrs = $2;
8830
8831							# Append the prototype to the starting token if it is 'sub' or
8832							# 'prototype'. This is not necessary but for compatibility with
8833							# previous versions when the -csc flag is used:
8834	45	100	100			326	if ( $proto && ( $match \|\| $call_type == PROTOTYPE_CALL ) ) {
		100	100
8835	24					64	$tok .= $proto;
8836							}
8837
8838							# If we just entered the sub at an opening paren on this call, not
8839							# a following :prototype, label it with the previous token. This is
8840							# necessary to propagate the sub name to its opening block.
8841							elsif ( $call_type == PAREN_CALL ) {
8842	2					5	$tok = $last_nonblank_token;
8843							}
8844							else {
8845							}
8846
8847	45		100			167	$match \|\|= 1;
8848
8849							# Patch part #1 to fixes cases b994 and b1053:
8850							# Mark an anonymous sub keyword without prototype as type 'k', i.e.
8851							# 'sub : lvalue { ...'
8852	45					94	$type = 'S'; ## C250, was 'i';
8853	45	100	100			209	if ( $tok eq 'sub' && !$proto ) { $type = 'k' }
	2					6
8854							}
8855
8856	301	100				866	if ($match) {
8857
8858							# ATTRS: if there are attributes, back up and let the ':' be
8859							# found later by the scanner.
8860	137					299	my $pos = pos($input_line);
8861	137	100				398	if ($attrs) {
8862	15					33	$pos -= length($attrs);
8863							}
8864
8865	137					303	my $next_nonblank_token = $tok;
8866
8867							# catch case of line with leading ATTR ':' after anonymous sub
8868	137	100	100			545	if ( $pos == $pos_beg && $tok eq ':' ) {
8869	1					2	$type = 'A';
8870	1					3	$self->[_in_attribute_list_] = 1;
8871							}
8872
8873							# Otherwise, if we found a match we must convert back from
8874							# string position to the pre_token index for continued parsing.
8875							else {
8876
8877							# I don't think an error flag can occur here ..but ?
8878	136					245	my $error;
8879	136					539	( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
8880							$max_token_index );
8881	136	50				486	if ($error) { $self->warning("Possibly invalid sub\n") }
	0					0
8882
8883							# Patch part #2 to fixes cases b994 and b1053:
8884							# Do not let spaces be part of the token of an anonymous sub
8885							# keyword which we marked as type 'k' above...i.e. for
8886							# something like:
8887							# 'sub : lvalue { ...'
8888							# Back up and let it be parsed as a blank
8889	136	50	66			606	if ( $type eq 'k'
			66
			33
8890							&& $attrs
8891							&& $i > $i_entry
8892							&& substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ )
8893							{
8894	2					6	$i--;
8895							}
8896
8897							# check for multiple definitions of a sub
8898	136					404	( $next_nonblank_token, my $i_next ) =
8899							find_next_nonblank_token_on_this_line( $i, $rtokens,
8900							$max_token_index );
8901							}
8902
8903	137	100				785	if ( $next_nonblank_token =~ /^(\s*\|#)$/ )
8904							{ # skip blank or side comment
8905	7					79	my ( $rpre_tokens, $rpre_types ) =
8906							$self->peek_ahead_for_n_nonblank_pre_tokens(1);
8907	7	50	33			45	if ( defined($rpre_tokens) && @{$rpre_tokens} ) {
	7					34
8908	7					37	$next_nonblank_token = $rpre_tokens->[0];
8909							}
8910							else {
8911	0					0	$next_nonblank_token = '}';
8912							}
8913							}
8914
8915							# See what's next...
8916	137	100				667	if ( $next_nonblank_token eq '{' ) {
		100
		50
		100
		50
		0
8917	105	100				325	if ($subname) {
8918
8919							# Check for multiple definitions of a sub, but
8920							# it is ok to have multiple sub BEGIN, etc,
8921							# so we do not complain if name is all caps
8922	95	50	33			556	if ( $rsaw_function_definition->{$subname}{$package}
8923							&& $subname !~ /^[A-Z]+$/ )
8924							{
8925							my $lno =
8926	0					0	$rsaw_function_definition->{$subname}{$package};
8927	0	0				0	if ( $package =~ /^\d/ ) {
8928	0					0	$self->warning(
8929							"already saw definition of lexical 'sub $subname' at line $lno\n"
8930							);
8931
8932							}
8933							else {
8934	0					0	if ( !DEVEL_MODE ) {
8935	0					0	$self->warning(
8936							"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
8937							);
8938							}
8939							}
8940							}
8941	95					380	$rsaw_function_definition->{$subname}{$package} =
8942							$self->[_last_line_number_];
8943							}
8944							}
8945							elsif ( $next_nonblank_token eq ';' ) {
8946							}
8947							elsif ( $next_nonblank_token eq '}' ) {
8948							}
8949
8950							# ATTRS - if an attribute list follows, remember the name
8951							# of the sub so the next opening brace can be labeled.
8952							# Setting 'statement_type' causes any ':'s to introduce
8953							# attributes.
8954							elsif ( $next_nonblank_token eq ':' ) {
8955	16	100				60	if ( $call_type == SUB_CALL ) {
8956	14	100				70	$statement_type =
8957							substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
8958							}
8959							}
8960
8961							# if we stopped before an open paren ...
8962							elsif ( $next_nonblank_token eq '(' ) {
8963
8964							# If we DID NOT see this paren above then it must be on the
8965							# next line so we will set a flag to come back here and see if
8966							# it is a PROTOTYPE
8967
8968							# Otherwise, we assume it is a SIGNATURE rather than a
8969							# PROTOTYPE and let the normal tokenizer handle it as a list
8970	15	100				53	if ( !$saw_opening_paren ) {
8971	4					966	$id_scan_state = 'sub'; # we must come back to get proto
8972							}
8973	15	50				60	if ( $call_type == SUB_CALL ) {
8974	15	50				64	$statement_type =
8975							substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
8976							}
8977							}
8978
8979							# something else..
8980							elsif ($next_nonblank_token) {
8981
8982	0	0	0			0	if ( $rinput_hash->{tok} eq 'method' && $call_type == SUB_CALL )
8983							{
8984							# For a method call, silently ignore this error (rt145706)
8985							# to avoid needless warnings. Example which can produce it:
8986							# test(method Pack (), "method");
8987
8988							# TODO: scan for use feature 'class' and:
8989							# - if we saw 'use feature 'class' then issue the warning.
8990							# - if we did not see use feature 'class' then issue the
8991							# warning and suggest turning off --use-feature=class
8992							}
8993							else {
8994	0	0				0	$subname = EMPTY_STRING unless defined($subname);
8995	0					0	$self->warning(
8996							"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
8997							);
8998							}
8999							}
9000
9001							# EOF technically ok
9002							else {
9003							}
9004
9005	137					478	check_prototype( $proto, $package, $subname );
9006							}
9007
9008							# no match to either sub name or prototype, but line not blank
9009							else {
9010
9011							}
9012	301					1857	return ( $i, $tok, $type, $id_scan_state );
9013							} ## end sub do_scan_sub
9014							}
9015
9016							#########################################################################
9017							# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
9018							#########################################################################
9019
9020							sub find_next_nonblank_token {
9021	6160			6160	0	12190	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9022
9023							# Returns the next nonblank token after the token at index $i
9024							# To skip past a side comment, and any subsequent block comments
9025							# and blank lines, call with i=$max_token_index
9026
9027							# Skip any ending blank (fix c258). It would be cleaner if caller passed
9028							# $rtoken_map, so we could check for type 'b', and avoid a regex test, but
9029							# benchmarking shows that this test does not take significant time. So
9030							# that would be a nice update but not essential. Also note that ending
9031							# blanks will not occur for text previously processed by perltidy.
9032	6160	100	100			19364	if ( $i == $max_token_index - 1
9033							&& $rtokens->[$max_token_index] =~ /^\s+$/ )
9034							{
9035	9					26	$i++;
9036							}
9037
9038	6160	100				12811	if ( $i >= $max_token_index ) {
9039	127	100				714	if ( !peeked_ahead() ) {
9040	125					431	peeked_ahead(1);
9041	125					796	$self->peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
9042							}
9043							}
9044
9045	6160					10772	my $next_nonblank_token = $rtokens->[ ++$i ];
9046	6160	50	33			21916	return ( SPACE, $i )
9047							if ( !defined($next_nonblank_token) \|\| !length($next_nonblank_token) );
9048
9049							# Quick test for nonblank ascii char. Note that we just have to
9050							# examine the first character here.
9051	6160					12367	my $ord = ord( substr( $next_nonblank_token, 0, 1 ) );
9052	6160	100	66			24256	if ( $ord >= ORD_PRINTABLE_MIN
		50	33
		0
9053							&& $ord <= ORD_PRINTABLE_MAX )
9054							{
9055	2353					8029	return ( $next_nonblank_token, $i );
9056							}
9057
9058							# Quick test to skip over an ascii space or tab
9059							elsif ( $ord == ORD_SPACE \|\| $ord == ORD_TAB ) {
9060	3807					7029	$next_nonblank_token = $rtokens->[ ++$i ];
9061	3807	50				8178	return ( SPACE, $i ) unless defined($next_nonblank_token);
9062							}
9063
9064							# Slow test to skip over something else identified as whitespace
9065							elsif ( $next_nonblank_token =~ /^\s*$/ ) {
9066	0					0	$next_nonblank_token = $rtokens->[ ++$i ];
9067	0	0				0	return ( SPACE, $i ) unless defined($next_nonblank_token);
9068							}
9069							else {
9070							## at nonblank
9071							}
9072
9073							# We should be at a nonblank now
9074	3807					11932	return ( $next_nonblank_token, $i );
9075							} ## end sub find_next_nonblank_token
9076
9077							sub find_next_noncomment_token {
9078	98			98	0	351	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9079
9080							# Given the current character position, look ahead past any comments
9081							# and blank lines and return the next token, including digraphs and
9082							# trigraphs.
9083
9084	98					369	my ( $next_nonblank_token, $i_next ) =
9085							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
9086
9087							# skip past any side comment
9088	98	50				580	if ( $next_nonblank_token eq '#' ) {
9089	0					0	( $next_nonblank_token, $i_next ) =
9090							$self->find_next_nonblank_token( $i_next, $rtokens,
9091							$max_token_index );
9092							}
9093
9094							# check for a digraph
9095	98	50	33			873	if ( $next_nonblank_token
			33
9096							&& $next_nonblank_token ne SPACE
9097							&& defined( $rtokens->[ $i_next + 1 ] ) )
9098							{
9099	98					307	my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
9100	98	100				425	if ( $is_digraph{$test2} ) {
9101	15					73	$next_nonblank_token = $test2;
9102	15					51	$i_next = $i_next + 1;
9103
9104							# check for a trigraph
9105	15	50				72	if ( defined( $rtokens->[ $i_next + 1 ] ) ) {
9106	15					52	my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
9107	15	50				69	if ( $is_trigraph{$test3} ) {
9108	0					0	$next_nonblank_token = $test3;
9109	0					0	$i_next = $i_next + 1;
9110							}
9111							}
9112							}
9113							}
9114
9115	98					327	return ( $next_nonblank_token, $i_next );
9116							} ## end sub find_next_noncomment_token
9117
9118							sub is_possible_numerator {
9119
9120							# Look at the next non-comment character and decide if it could be a
9121							# numerator. Return
9122							# 1 - yes
9123							# 0 - can't tell
9124							# -1 - no
9125
9126	0			0	0	0	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9127	0					0	my $is_possible_numerator = 0;
9128
9129	0					0	my $next_token = $rtokens->[ $i + 1 ];
9130	0	0				0	if ( $next_token eq '=' ) { $i++; } # handle /=
	0					0
9131	0					0	my ( $next_nonblank_token, $i_next ) =
9132							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
9133
9134	0	0				0	if ( $next_nonblank_token eq '#' ) {
9135	0					0	( $next_nonblank_token, $i_next ) =
9136							$self->find_next_nonblank_token( $max_token_index, $rtokens,
9137							$max_token_index );
9138							}
9139
9140	0	0				0	if ( $next_nonblank_token =~ / [ \( \$ \w \. \@ ] /x ) {
		0
9141	0					0	$is_possible_numerator = 1;
9142							}
9143							elsif ( $next_nonblank_token =~ /^\s*$/ ) {
9144	0					0	$is_possible_numerator = 0;
9145							}
9146							else {
9147	0					0	$is_possible_numerator = -1;
9148							}
9149
9150	0					0	return $is_possible_numerator;
9151							} ## end sub is_possible_numerator
9152
9153							{ ## closure for sub pattern_expected
9154							my %pattern_test;
9155
9156							BEGIN {
9157
9158							# List of tokens which may follow a pattern. Note that we will not
9159							# have formed digraphs at this point, so we will see '&' instead of
9160							# '&&' and '\|' instead of '\|\|'
9161
9162							# /(\)\|\}\|\;\|\&\&\|\\|\\|\|and\|or\|while\|if\|unless)/
9163	39			39		307	my @q = qw( & && \| \|\| ? : + - * and or while if unless);
9164	39					152	push @q, ')', '}', ']', '>', ',', ';';
9165	39					182620	@{pattern_test}{@q} = (1) x scalar(@q);
9166							} ## end BEGIN
9167
9168							sub pattern_expected {
9169
9170							# This a filter for a possible pattern.
9171							# It looks at the token after a possible pattern and tries to
9172							# determine if that token could end a pattern.
9173							# returns -
9174							# 1 - yes
9175							# 0 - can't tell
9176							# -1 - no
9177	0			0	0	0	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9178	0					0	my $is_pattern = 0;
9179
9180	0					0	my $next_token = $rtokens->[ $i + 1 ];
9181	0	0				0	if ( $next_token =~ /^[msixpodualgc]/ ) {
9182	0					0	$i++;
9183							} # skip possible modifier
9184	0					0	my ( $next_nonblank_token, $i_next ) =
9185							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
9186
9187	0	0				0	if ( $pattern_test{$next_nonblank_token} ) {
9188	0					0	$is_pattern = 1;
9189							}
9190							else {
9191
9192							# Added '#' to fix issue c044
9193	0	0	0			0	if ( $next_nonblank_token =~ /^\s*$/
9194							\|\| $next_nonblank_token eq '#' )
9195							{
9196	0					0	$is_pattern = 0;
9197							}
9198							else {
9199	0					0	$is_pattern = -1;
9200							}
9201							}
9202	0					0	return $is_pattern;
9203							} ## end sub pattern_expected
9204							}
9205
9206							sub find_next_nonblank_token_on_this_line {
9207	455			455	0	1081	my ( $i, $rtokens, $max_token_index ) = @_;
9208	455					820	my $next_nonblank_token;
9209
9210	455	100				1158	if ( $i < $max_token_index ) {
9211	447					1060	$next_nonblank_token = $rtokens->[ ++$i ];
9212
9213	447	100				1987	if ( $next_nonblank_token =~ /^\s*$/ ) {
9214
9215	120	100				522	if ( $i < $max_token_index ) {
9216	118					321	$next_nonblank_token = $rtokens->[ ++$i ];
9217							}
9218							}
9219							}
9220							else {
9221	8					33	$next_nonblank_token = EMPTY_STRING;
9222							}
9223	455					1449	return ( $next_nonblank_token, $i );
9224							} ## end sub find_next_nonblank_token_on_this_line
9225
9226							sub find_angle_operator_termination {
9227
9228							# We are looking at a '<' and want to know if it is an angle operator.
9229							# We are to return:
9230							# $i = pretoken index of ending '>' if found, current $i otherwise
9231							# $type = 'Q' if found, '>' otherwise
9232	8			8	0	34	my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index )
9233							= @_;
9234	8					20	my $i = $i_beg;
9235	8					17	my $type = '<';
9236	8					39	pos($input_line) = 1 + $rtoken_map->[$i];
9237
9238	8					20	my $filter;
9239
9240							# we just have to find the next '>' if a term is expected
9241	8	100				33	if ( $expecting == TERM ) { $filter = '[\>]' }
	6	50				18
9242
9243							# we have to guess if we don't know what is expected
9244	2					5	elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\\|\<]' }
9245
9246							# shouldn't happen - we shouldn't be here if operator is expected
9247							else {
9248	0					0	if (DEVEL_MODE) {
9249							$self->Fault(<<EOM);
9250							Bad call to find_angle_operator_termination
9251							EOM
9252							}
9253	0					0	return ( $i, $type );
9254							}
9255
9256							# To illustrate what we might be looking at, in case we are
9257							# guessing, here are some examples of valid angle operators
9258							# (or file globs):
9259							# <tmp_imp/*>
9260							# <FH>
9261							# <$fh>
9262							# <.c .h>
9263							# <_>
9264							# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
9265							# <${PREFIX}img.$IMAGE_TYPE>
9266							# <img*.$IMAGE_TYPE>
9267							# <Timg*.$IMAGE_TYPE>
9268							# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
9269							#
9270							# Here are some examples of lines which do not have angle operators:
9271							# return unless $self->[2]++ < $#{$self->[1]};
9272							# < 2 \|\| @$t >
9273							#
9274							# the following line from dlister.pl caused trouble:
9275							# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
9276							#
9277							# If the '<' starts an angle operator, it must end on this line and
9278							# it must not have certain characters like ';' and '=' in it. I use
9279							# this to limit the testing. This filter should be improved if
9280							# possible.
9281
9282	8	50				213	if ( $input_line =~ /($filter)/g ) {
9283
9284	8	50				44	if ( $1 eq '>' ) {
9285
9286							# We MAY have found an angle operator termination if we get
9287							# here, but we need to do more to be sure we haven't been
9288							# fooled.
9289	8					21	my $pos = pos($input_line);
9290
9291	8					24	my $pos_beg = $rtoken_map->[$i];
9292	8					33	my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
9293
9294							# Test for '<' after possible filehandle, issue c103
9295							# print $fh <>; # syntax error
9296							# print $fh <DATA>; # ok
9297							# print $fh < DATA>; # syntax error at '>'
9298							# print STDERR < DATA>; # ok, prints word 'DATA'
9299							# print BLABLA <DATA>; # ok; does nothing unless BLABLA is defined
9300	8	100				38	if ( $last_nonblank_type eq 'Z' ) {
9301
9302							# $str includes brackets; something like '<DATA>'
9303	1	0	33			6	if ( substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/
9304							&& substr( $str, 1, 1 ) !~ /[A-Za-z_]/ )
9305							{
9306	0					0	return ( $i, $type );
9307							}
9308							}
9309
9310							# Reject if the closing '>' follows a '-' as in:
9311							# if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
9312	8	100				43	if ( $expecting eq UNKNOWN ) {
9313	2					8	my $check = substr( $input_line, $pos - 2, 1 );
9314	2	100				7	if ( $check eq '-' ) {
9315	1					19	return ( $i, $type );
9316							}
9317							}
9318
9319							######################################debug#####
9320							#$self->write_diagnostics( "ANGLE? :$str\n");
9321							#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
9322							######################################debug#####
9323	7					22	$type = 'Q';
9324	7					64	my $error;
9325	7					42	( $i, $error ) =
9326							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
9327
9328							# It may be possible that a quote ends midway in a pretoken.
9329							# If this happens, it may be necessary to split the pretoken.
9330	7	50				36	if ($error) {
9331	0					0	if (DEVEL_MODE) {
9332							$self->Fault(<<EOM);
9333							unexpected error condition returned by inverse_pretoken_map
9334							EOM
9335							}
9336							$self->warning(
9337	0					0	"Possible tokinization error..please check this line\n");
9338							}
9339
9340							# Check for accidental formatting of a markup language doc...
9341							# Formatting will be skipped if we set _html_tag_count_ and
9342							# also set a warning of any kind.
9343	7					20	my $is_html_tag;
9344	7		33			45	my $is_first_string =
9345							$i_beg == 0 && $self->[_last_line_number_] == 1;
9346
9347							# html comment '<!...' of any type
9348	7	50	33			94	if ( $str =~ /^<\s*!/ ) {
		50
		50
9349	0					0	$is_html_tag = 1;
9350	0	0				0	if ($is_first_string) {
9351	0					0	$self->warning(
9352							"looks like a markup language, continuing error checks\n"
9353							);
9354							}
9355							}
9356
9357							# html end tag, something like </h1>
9358							elsif ( $str =~ /^<\s\/\w+\s>$/ ) {
9359	0					0	$is_html_tag = 1;
9360							}
9361
9362							# xml prolog?
9363							elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) {
9364	0					0	$is_html_tag = 1;
9365	0					0	$self->warning(
9366							"looks like a markup language, continuing error checks\n");
9367							}
9368							else {
9369							## doesn't look like a markup tag
9370							}
9371
9372	7	50				29	if ($is_html_tag) {
9373	0					0	$self->[_html_tag_count_]++;
9374							}
9375
9376							# count blanks on inside of brackets
9377	7					17	my $blank_count = 0;
9378	7	100				45	$blank_count++ if ( $str =~ /<\s+/ );
9379	7	100				39	$blank_count++ if ( $str =~ /\s+>/ );
9380
9381							# Now let's see where we stand....
9382							# OK if math op not possible
9383	7	100				35	if ( $expecting == TERM ) {
		50
		50
		0
9384							}
9385
9386							elsif ($is_html_tag) {
9387							}
9388
9389							# OK if there are no more than 2 non-blank pre-tokens inside
9390							# (not possible to write 2 token math between < and >)
9391							# This catches most common cases
9392							elsif ( $i <= $i_beg + 3 + $blank_count ) {
9393
9394							# No longer any need to document this common case
9395							## $self->write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
9396							}
9397
9398							# OK if there is some kind of identifier inside
9399							# print $fh <tvg::INPUT>;
9400							elsif ( $str =~ /^<\s\$?(\w\|::\|\s)+\s>$/ ) {
9401	0					0	$self->write_diagnostics("ANGLE (contains identifier): $str\n");
9402							}
9403
9404							# Not sure..
9405							else {
9406
9407							# Let's try a Brace Test: any braces inside must balance
9408	0					0	my $br = 0;
9409	0					0	while ( $str =~ /\{/g ) { $br++ }
	0					0
9410	0					0	while ( $str =~ /\}/g ) { $br-- }
	0					0
9411	0					0	my $sb = 0;
9412	0					0	while ( $str =~ /\[/g ) { $sb++ }
	0					0
9413	0					0	while ( $str =~ /\]/g ) { $sb-- }
	0					0
9414	0					0	my $pr = 0;
9415	0					0	while ( $str =~ /\(/g ) { $pr++ }
	0					0
9416	0					0	while ( $str =~ /\)/g ) { $pr-- }
	0					0
9417
9418							# if braces do not balance - not angle operator
9419	0	0	0			0	if ( $br \|\| $sb \|\| $pr ) {
			0
9420	0					0	$i = $i_beg;
9421	0					0	$type = '<';
9422	0					0	$self->write_diagnostics(
9423							"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
9424							}
9425
9426							# we should keep doing more checks here...to be continued
9427							# Tentatively accepting this as a valid angle operator.
9428							# There are lots more things that can be checked.
9429							else {
9430	0					0	$self->write_diagnostics(
9431							"ANGLE-Guessing yes: $str expecting=$expecting\n");
9432	0					0	$self->write_logfile_entry(
9433							"Guessing angle operator here: $str\n");
9434							}
9435							}
9436							}
9437
9438							# didn't find ending >
9439							else {
9440	0	0				0	if ( $expecting == TERM ) {
9441	0					0	$self->warning("No ending > for angle operator\n");
9442							}
9443							}
9444							}
9445	7					37	return ( $i, $type );
9446							} ## end sub find_angle_operator_termination
9447
9448							sub scan_number_do {
9449
9450							# scan a number in any of the formats that Perl accepts
9451							# Underbars (_) are allowed in decimal numbers.
9452							# input parameters -
9453							# $input_line - the string to scan
9454							# $i - pre_token index to start scanning
9455							# $rtoken_map - reference to the pre_token map giving starting
9456							# character position in $input_line of token $i
9457							# output parameters -
9458							# $i - last pre_token index of the number just scanned
9459							# number - the number (characters); or undef if not a number
9460
9461	629			629	0	1994	my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) =
9462							@_;
9463	629					1157	my $pos_beg = $rtoken_map->[$i];
9464	629					996	my $pos;
9465	629					985	my $i_begin = $i;
9466	629					999	my $number = undef;
9467	629					999	my $type = $input_type;
9468
9469	629					2584	my $first_char = substr( $input_line, $pos_beg, 1 );
9470
9471							# Look for bad starting characters; Shouldn't happen..
9472	629	50				2759	if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
9473	0					0	if (DEVEL_MODE) {
9474							$self->Fault(<<EOM);
9475							Program bug - scan_number given bad first character = '$first_char'
9476							EOM
9477							}
9478	0					0	return ( $i, $type, $number );
9479							}
9480
9481							# handle v-string without leading 'v' character ('Two Dot' rule)
9482							# (vstring.t)
9483							# Here is the format prior to including underscores:
9484							## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
9485	629					1871	pos($input_line) = $pos_beg;
9486	629	50				3071	if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) {
9487	0					0	$pos = pos($input_line);
9488	0					0	my $numc = $pos - $pos_beg;
9489	0					0	$number = substr( $input_line, $pos_beg, $numc );
9490	0					0	$type = 'v';
9491	0					0	$self->report_v_string($number);
9492							}
9493
9494							# handle octal, hex, binary
9495	629	50				1618	if ( !defined($number) ) {
9496	629					1266	pos($input_line) = $pos_beg;
9497
9498							# Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0'
9499							# For reference, the format prior to hex floating point is:
9500							# /\G[+-]?0(([xX][0-9a-fA-F_]+)\|([0-7_]+)\|([bB][01_]+))/g )
9501							# (hex) (octal) (binary)
9502	629	100				2375	if (
9503							$input_line =~ m{
9504
9505							\G[+-]?0( # leading [signed] 0
9506
9507							# a hex float, i.e. '0x0.b17217f7d1cf78p0'
9508							([xX][0-9a-fA-F_]* # X and optional leading digits
9509							(\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction
9510							[Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit
9511							[0-9a-fA-F_]*) # optional Additional exponent digits
9512
9513							# or hex integer
9514							\|([xX][0-9a-fA-F_]+)
9515
9516							# or octal fraction
9517							\|([oO]?[0-7_]+ # string of octal digits
9518							(\.([0-7][0-7_]*)?)? # optional decimal and fraction
9519							[Pp][+-]?[0-7] # REQUIRED exponent, no underscore
9520							[0-7_]*) # Additional exponent digits with underscores
9521
9522							# or octal integer
9523							\|([oO]?[0-7_]+) # string of octal digits
9524
9525							# or a binary float
9526							\|([bB][01_]* # 'b' with string of binary digits
9527							(\.([01][01_]*)?)? # optional decimal and fraction
9528							[Pp][+-]?[01] # Required exponent indicator, no underscore
9529							[01_]*) # additional exponent bits
9530
9531							# or binary integer
9532							\|([bB][01_]+) # 'b' with string of binary digits
9533
9534							)}gx
9535							)
9536							{
9537	72					150	$pos = pos($input_line);
9538	72					120	my $numc = $pos - $pos_beg;
9539	72					151	$number = substr( $input_line, $pos_beg, $numc );
9540	72					133	$type = 'n';
9541							}
9542							}
9543
9544							# handle decimal
9545	629	100				1487	if ( !defined($number) ) {
9546	557					1037	pos($input_line) = $pos_beg;
9547
9548	557	50				2679	if ( $input_line =~ /\G([+-]?[\d_](\.[\d_])?([Ee][+-]?(\d+))?)/g ) {
9549	557					1032	$pos = pos($input_line);
9550
9551							# watch out for things like 0..40 which would give 0. by this;
9552	557	100	100			1934	if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
9553							&& ( substr( $input_line, $pos, 1 ) eq '.' ) )
9554							{
9555	37					70	$pos--;
9556							}
9557	557					983	my $numc = $pos - $pos_beg;
9558	557					1041	$number = substr( $input_line, $pos_beg, $numc );
9559	557					981	$type = 'n';
9560							}
9561							}
9562
9563							# filter out non-numbers like e + - . e2 .e3 +e6
9564							# the rule: at least one digit, and any 'e' must be preceded by a digit
9565	629	100	66			3254	if (
			66
9566							$number !~ /\d/ # no digits
9567							\|\| ( $number =~ /^(.*)[eE]/
9568							&& $1 !~ /\d/ ) # or no digits before the 'e'
9569							)
9570							{
9571	303					515	$number = undef;
9572	303					526	$type = $input_type;
9573	303					1253	return ( $i, $type, $number );
9574							}
9575
9576							# Found a number; now we must convert back from character position
9577							# to pre_token index. An error here implies user syntax error.
9578							# An example would be an invalid octal number like '009'.
9579	326					610	my $error;
9580	326					872	( $i, $error ) =
9581							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
9582	326	50				916	if ($error) { $self->warning("Possibly invalid number\n") }
	0					0
9583
9584	326					1167	return ( $i, $type, $number );
9585							} ## end sub scan_number_do
9586
9587							sub inverse_pretoken_map {
9588
9589							# Starting with the current pre_token index $i, scan forward until
9590							# finding the index of the next pre_token whose position is $pos.
9591	2167			2167	0	5110	my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
9592	2167					3867	my $error = 0;
9593
9594	2167					5630	while ( ++$i <= $max_token_index ) {
9595
9596	4035	100				9661	if ( $pos <= $rtoken_map->[$i] ) {
9597
9598							# Let the calling routine handle errors in which we do not
9599							# land on a pre-token boundary. It can happen by running
9600							# perltidy on some non-perl scripts, for example.
9601	2132	50				5297	if ( $pos < $rtoken_map->[$i] ) { $error = 1 }
	0					0
9602	2132					3128	$i--;
9603	2132					3666	last;
9604							}
9605							}
9606	2167					5445	return ( $i, $error );
9607							} ## end sub inverse_pretoken_map
9608
9609							sub find_here_doc {
9610
9611							# find the target of a here document, if any
9612							# input parameters:
9613							# $i - token index of the second < of <<
9614							# ($i must be less than the last token index if this is called)
9615							# output parameters:
9616							# $found_target = 0 didn't find target; =1 found target
9617							# HERE_TARGET - the target string (may be empty string)
9618							# $i - unchanged if not here doc,
9619							# or index of the last token of the here target
9620							# $saw_error - flag noting unbalanced quote on here target
9621	9			9	0	42	my ( $self, $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
9622
9623	9					24	my $ibeg = $i;
9624	9					26	my $found_target = 0;
9625	9					26	my $here_doc_target = EMPTY_STRING;
9626	9					25	my $here_quote_character = EMPTY_STRING;
9627	9					21	my $saw_error = 0;
9628	9					22	my ( $next_nonblank_token, $i_next_nonblank, $next_token );
9629	9					29	$next_token = $rtokens->[ $i + 1 ];
9630
9631							# perl allows a backslash before the target string (heredoc.t)
9632	9					20	my $backslash = 0;
9633	9	50				42	if ( $next_token eq '\\' ) {
9634	0					0	$backslash = 1;
9635	0					0	$next_token = $rtokens->[ $i + 2 ];
9636							}
9637
9638	9					54	( $next_nonblank_token, $i_next_nonblank ) =
9639							find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
9640
9641	9	100	33			78	if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
		50
		50
9642
9643	6					18	my $in_quote = 1;
9644	6					12	my $quote_depth = 0;
9645	6					16	my $quote_pos = 0;
9646	6					10	my $quoted_string;
9647
9648							(
9649	6					36	$i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
9650							$quoted_string
9651							)
9652							= $self->follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
9653							$here_quote_character, $quote_pos, $quote_depth, $max_token_index );
9654
9655	6	50				27	if ($in_quote) { # didn't find end of quote, so no target found
9656	0					0	$i = $ibeg;
9657	0	0				0	if ( $expecting == TERM ) {
9658	0					0	$self->warning(
9659							"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
9660							);
9661	0					0	$saw_error = 1;
9662							}
9663							}
9664							else { # found ending quote
9665	6					14	$found_target = 1;
9666
9667	6					14	my $tokj;
9668	6					28	foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) {
9669	6					20	$tokj = $rtokens->[$j];
9670
9671							# we have to remove any backslash before the quote character
9672							# so that the here-doc-target exactly matches this string
9673							next
9674	6	0	33			35	if ( $tokj eq "\\"
			33
9675							&& $j < $i - 1
9676							&& $rtokens->[ $j + 1 ] eq $here_quote_character );
9677	6					27	$here_doc_target .= $tokj;
9678							}
9679							}
9680							}
9681
9682							elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
9683	0					0	$found_target = 1;
9684	0					0	$self->write_logfile_entry(
9685							"found blank here-target after <<; suggest using \"\"\n");
9686	0					0	$i = $ibeg;
9687							}
9688							elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
9689
9690	3					7	my $here_doc_expected;
9691	3	50				16	if ( $expecting == UNKNOWN ) {
9692	0					0	$here_doc_expected = $self->guess_if_here_doc($next_token);
9693							}
9694							else {
9695	3					8	$here_doc_expected = 1;
9696							}
9697
9698	3	50				14	if ($here_doc_expected) {
9699	3					8	$found_target = 1;
9700	3					7	$here_doc_target = $next_token;
9701	3					7	$i = $ibeg + 1;
9702							}
9703
9704							}
9705							else {
9706
9707	0	0				0	if ( $expecting == TERM ) {
9708	0					0	$found_target = 1;
9709	0					0	$self->write_logfile_entry("Note: bare here-doc operator <<\n");
9710							}
9711							else {
9712	0					0	$i = $ibeg;
9713							}
9714							}
9715
9716							# patch to neglect any prepended backslash
9717	9	50	33			70	if ( $found_target && $backslash ) { $i++ }
	0					0
9718
9719	9					49	return ( $found_target, $here_doc_target, $here_quote_character, $i,
9720							$saw_error );
9721							} ## end sub find_here_doc
9722
9723							sub do_quote {
9724
9725							# follow (or continue following) quoted string(s)
9726							# $in_quote return code:
9727							# 0 - ok, found end
9728							# 1 - still must find end of quote whose target is $quote_character
9729							# 2 - still looking for end of first of two quotes
9730							#
9731							# Returns updated strings:
9732							# $quoted_string_1 = quoted string seen while in_quote=1
9733							# $quoted_string_2 = quoted string seen while in_quote=2
9734							my (
9735
9736	2768			2768	0	8691	$self,
9737							$i,
9738							$in_quote,
9739							$quote_character,
9740							$quote_pos,
9741							$quote_depth,
9742							$quoted_string_1,
9743							$quoted_string_2,
9744							$rtokens,
9745							$rtoken_map,
9746							$max_token_index,
9747
9748							) = @_;
9749
9750	2768					4242	my $quoted_string;
9751	2768	100				6470	if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
9752	29					70	my $ibeg = $i;
9753							(
9754	29					138	$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
9755							$quoted_string
9756							)
9757							= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens,
9758							$quote_character, $quote_pos, $quote_depth, $max_token_index );
9759	29					72	$quoted_string_2 .= $quoted_string;
9760	29	50				89	if ( $in_quote == 1 ) {
9761	29	50				124	if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
	0					0
9762	29					61	$quote_character = EMPTY_STRING;
9763							}
9764							else {
9765	0					0	$quoted_string_2 .= "\n";
9766							}
9767							}
9768
9769	2768	50				6124	if ( $in_quote == 1 ) { # one (more) quote to follow
9770	2768					4273	my $ibeg = $i;
9771							(
9772	2768					7826	$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
9773							$quoted_string
9774							)
9775							= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens,
9776							$quote_character, $quote_pos, $quote_depth, $max_token_index );
9777	2768					5949	$quoted_string_1 .= $quoted_string;
9778	2768	100				6305	if ( $in_quote == 1 ) {
9779	183					377	$quoted_string_1 .= "\n";
9780							}
9781							}
9782							return (
9783
9784	2768					9533	$i,
9785							$in_quote,
9786							$quote_character,
9787							$quote_pos,
9788							$quote_depth,
9789							$quoted_string_1,
9790							$quoted_string_2,
9791
9792							);
9793							} ## end sub do_quote
9794
9795							sub follow_quoted_string {
9796
9797							# scan for a specific token, skipping escaped characters
9798							# if the quote character is blank, use the first non-blank character
9799							# input parameters:
9800							# $rtokens = reference to the array of tokens
9801							# $i = the token index of the first character to search
9802							# $in_quote = number of quoted strings being followed
9803							# $beginning_tok = the starting quote character
9804							# $quote_pos = index to check next for alphanumeric delimiter
9805							# output parameters:
9806							# $i = the token index of the ending quote character
9807							# $in_quote = decremented if found end, unchanged if not
9808							# $beginning_tok = the starting quote character
9809							# $quote_pos = index to check next for alphanumeric delimiter
9810							# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
9811							# $quoted_string = the text of the quote (without quotation tokens)
9812							my (
9813
9814	2814			2814	0	6525	$self,
9815							$i_beg,
9816							$in_quote,
9817							$rtokens,
9818							$beginning_tok,
9819							$quote_pos,
9820							$quote_depth,
9821							$max_token_index,
9822
9823							) = @_;
9824
9825	2814					4945	my ( $tok, $end_tok );
9826	2814					4730	my $i = $i_beg - 1;
9827	2814					4423	my $quoted_string = EMPTY_STRING;
9828
9829	2814					4031	0 && do {
9830							print {*STDOUT}
9831							"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
9832							};
9833
9834							# get the corresponding end token
9835	2814	100				13169	if ( $beginning_tok !~ /^\s*$/ ) {
9836	183					541	$end_tok = matching_end_token($beginning_tok);
9837							}
9838
9839							# a blank token means we must find and use the first non-blank one
9840							else {
9841	2631	100				6187	my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
9842
9843	2631					6049	while ( $i < $max_token_index ) {
9844	2631					5315	$tok = $rtokens->[ ++$i ];
9845
9846	2631	50				7998	if ( $tok !~ /^\s*$/ ) {
9847
9848	2631	50	66			7666	if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
9849	0					0	$i = $max_token_index;
9850							}
9851							else {
9852
9853	2631	100				5747	if ( length($tok) > 1 ) {
9854	1	50				5	if ( $quote_pos <= 0 ) { $quote_pos = 1 }
	1					4
9855	1					4	$beginning_tok = substr( $tok, $quote_pos - 1, 1 );
9856							}
9857							else {
9858	2630					4654	$beginning_tok = $tok;
9859	2630					4125	$quote_pos = 0;
9860							}
9861	2631					6377	$end_tok = matching_end_token($beginning_tok);
9862	2631					4615	$quote_depth = 1;
9863	2631					4750	last;
9864							}
9865							}
9866							else {
9867	0					0	$allow_quote_comments = 1;
9868							}
9869							}
9870							}
9871
9872							# There are two different loops which search for the ending quote
9873							# character. In the rare case of an alphanumeric quote delimiter, we
9874							# have to look through alphanumeric tokens character-by-character, since
9875							# the pre-tokenization process combines multiple alphanumeric
9876							# characters, whereas for a non-alphanumeric delimiter, only tokens of
9877							# length 1 can match.
9878
9879							#----------------------------------------------------------------
9880							# Case 1 (rare): loop for case of alphanumeric quote delimiter..
9881							# "quote_pos" is the position the current word to begin searching
9882							#----------------------------------------------------------------
9883	2814	100				7797	if ( $beginning_tok =~ /\w/ ) {
9884
9885							# Note this because it is not recommended practice except
9886							# for obfuscated perl contests
9887	1	50				4	if ( $in_quote == 1 ) {
9888	1					9	$self->write_logfile_entry(
9889							"Note: alphanumeric quote delimiter ($beginning_tok) \n");
9890							}
9891
9892							# Note: changed < to <= here to fix c109. Relying on extra end blanks.
9893	1					5	while ( $i <= $max_token_index ) {
9894
9895	4	100	66			17	if ( $quote_pos == 0 \|\| ( $i < 0 ) ) {
9896	3					7	$tok = $rtokens->[ ++$i ];
9897
9898	3	100				8	if ( $tok eq '\\' ) {
9899
9900							# retain backslash unless it hides the end token
9901	1	50				54	$quoted_string .= $tok
9902							unless $rtokens->[ $i + 1 ] eq $end_tok;
9903	1					8	$quote_pos++;
9904	1	50				6	last if ( $i >= $max_token_index );
9905	1					5	$tok = $rtokens->[ ++$i ];
9906							}
9907							}
9908	4					8	my $old_pos = $quote_pos;
9909
9910	4					8	$quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
9911
9912	4	100				11	if ( $quote_pos > 0 ) {
9913
9914	1					16	$quoted_string .=
9915							substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
9916
9917							# NOTE: any quote modifiers will be at the end of '$tok'. If we
9918							# wanted to check them, this is the place to get them. But
9919							# this quote form is rarely used in practice, so it isn't
9920							# worthwhile.
9921
9922	1					3	$quote_depth--;
9923
9924	1	50				4	if ( $quote_depth == 0 ) {
9925	1					3	$in_quote--;
9926	1					3	last;
9927							}
9928							}
9929							else {
9930	3	50				7	if ( $old_pos <= length($tok) ) {
9931	3					9	$quoted_string .= substr( $tok, $old_pos );
9932							}
9933							}
9934							}
9935							}
9936
9937							#-----------------------------------------------------------------------
9938							# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
9939							#-----------------------------------------------------------------------
9940							else {
9941
9942	2813					6490	while ( $i < $max_token_index ) {
9943	10798					16323	$tok = $rtokens->[ ++$i ];
9944
9945	10798	100				24279	if ( $tok eq $end_tok ) {
		100
		100
9946	2620					4208	$quote_depth--;
9947
9948	2620	100				5936	if ( $quote_depth == 0 ) {
9949	2619					3860	$in_quote--;
9950	2619					4131	last;
9951							}
9952							}
9953							elsif ( $tok eq $beginning_tok ) {
9954	1					3	$quote_depth++;
9955							}
9956							elsif ( $tok eq '\\' ) {
9957
9958							# retain backslash unless it hides the beginning or end token
9959	376					939	$tok = $rtokens->[ ++$i ];
9960	376	100	100			1960	$quoted_string .= '\\'
9961							if ( $tok ne $end_tok && $tok ne $beginning_tok );
9962							}
9963							else {
9964							## nothing special
9965							}
9966	8179					15255	$quoted_string .= $tok;
9967							}
9968							}
9969	2814	50				6171	if ( $i > $max_token_index ) { $i = $max_token_index }
	0					0
9970							return (
9971
9972	2814					10673	$i,
9973							$in_quote,
9974							$beginning_tok,
9975							$quote_pos,
9976							$quote_depth,
9977							$quoted_string,
9978
9979							);
9980							} ## end sub follow_quoted_string
9981
9982							sub indicate_error {
9983	0			0	0	0	my ( $self, $msg, $line_number, $input_line, $pos, $carrat ) = @_;
9984	0					0	$self->interrupt_logfile();
9985	0					0	$self->warning($msg);
9986	0					0	$self->write_error_indicator_pair( $line_number, $input_line, $pos,
9987							$carrat );
9988	0					0	$self->resume_logfile();
9989	0					0	return;
9990							} ## end sub indicate_error
9991
9992							sub write_error_indicator_pair {
9993	0			0	0	0	my ( $self, $line_number, $input_line, $pos, $carrat ) = @_;
9994	0					0	my ( $offset, $numbered_line, $underline ) =
9995							make_numbered_line( $line_number, $input_line, $pos );
9996	0					0	$underline = write_on_underline( $underline, $pos - $offset, $carrat );
9997	0					0	$self->warning( $numbered_line . "\n" );
9998	0					0	$underline =~ s/\s*$//;
9999	0					0	$self->warning( $underline . "\n" );
10000	0					0	return;
10001							} ## end sub write_error_indicator_pair
10002
10003							sub make_numbered_line {
10004
10005							# Given an input line, its line number, and a character position of
10006							# interest, create a string not longer than 80 characters of the form
10007							# $lineno: sub_string
10008							# such that the sub_string of $str contains the position of interest
10009							#
10010							# Here is an example of what we want, in this case we add trailing
10011							# '...' because the line is long.
10012							#
10013							# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
10014							#
10015							# Here is another example, this time in which we used leading '...'
10016							# because of excessive length:
10017							#
10018							# 2: ... er of the World Wide Web Consortium's
10019							#
10020							# input parameters are:
10021							# $lineno = line number
10022							# $str = the text of the line
10023							# $pos = position of interest (the error) : 0 = first character
10024							#
10025							# We return :
10026							# - $offset = an offset which corrects the position in case we only
10027							# display part of a line, such that $pos-$offset is the effective
10028							# position from the start of the displayed line.
10029							# - $numbered_line = the numbered line as above,
10030							# - $underline = a blank 'underline' which is all spaces with the same
10031							# number of characters as the numbered line.
10032
10033	0			0	0	0	my ( $lineno, $str, $pos ) = @_;
10034	0	0				0	my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
10035	0					0	my $excess = length($str) - $offset - 68;
10036	0	0				0	my $numc = ( $excess > 0 ) ? 68 : undef;
10037
10038	0	0				0	if ( defined($numc) ) {
10039	0	0				0	if ( $offset == 0 ) {
10040	0					0	$str = substr( $str, $offset, $numc - 4 ) . " ...";
10041							}
10042							else {
10043	0					0	$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
10044							}
10045							}
10046							else {
10047
10048	0	0				0	if ( $offset == 0 ) {
10049							}
10050							else {
10051	0					0	$str = "... " . substr( $str, $offset + 4 );
10052							}
10053							}
10054
10055	0					0	my $numbered_line = sprintf( "%d: ", $lineno );
10056	0					0	$offset -= length($numbered_line);
10057	0					0	$numbered_line .= $str;
10058	0					0	my $underline = SPACE x length($numbered_line);
10059	0					0	return ( $offset, $numbered_line, $underline );
10060							} ## end sub make_numbered_line
10061
10062							sub write_on_underline {
10063
10064							# The "underline" is a string that shows where an error is; it starts
10065							# out as a string of blanks with the same length as the numbered line of
10066							# code above it, and we have to add marking to show where an error is.
10067							# In the example below, we want to write the string '--^' just below
10068							# the line of bad code:
10069							#
10070							# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
10071							# ---^
10072							# We are given the current underline string, plus a position and a
10073							# string to write on it.
10074							#
10075							# In the above example, there will be 2 calls to do this:
10076							# First call: $pos=19, pos_chr=^
10077							# Second call: $pos=16, pos_chr=---
10078							#
10079							# This is a trivial thing to do with substr, but there is some
10080							# checking to do.
10081
10082	0			0	0	0	my ( $underline, $pos, $pos_chr ) = @_;
10083
10084							# check for error..shouldn't happen
10085	0	0	0			0	if ( $pos < 0 \|\| $pos > length($underline) ) {
10086	0					0	return $underline;
10087							}
10088	0					0	my $excess = length($pos_chr) + $pos - length($underline);
10089	0	0				0	if ( $excess > 0 ) {
10090	0					0	$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
10091							}
10092	0					0	substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
10093	0					0	return ($underline);
10094							} ## end sub write_on_underline
10095
10096							sub pre_tokenize {
10097
10098	6187			6187	0	12313	my ( $str, $max_tokens_wanted ) = @_;
10099
10100							# Input parameters:
10101							# $str = string to be parsed
10102							# $max_tokens_wanted > 0 to stop on reaching this many tokens.
10103							# = undef or 0 means get all tokens
10104
10105							# Break a string, $str, into a sequence of preliminary tokens (pre-tokens).
10106							# We look for these types of tokens:
10107							# words (type='w'), example: 'max_tokens_wanted'
10108							# digits (type = 'd'), example: '0755'
10109							# whitespace (type = 'b'), example: ' '
10110							# single character punct (type = char) example: '='
10111
10112							# Later operations will combine one or more of these pre-tokens into final
10113							# tokens. We cannot do better than this yet because we might be in a
10114							# quoted string or pattern.
10115
10116							# An advantage of doing this pre-tokenization step is that it keeps almost
10117							# all of the regex parsing very simple and localized right here. A
10118							# disadvantage is that in some extremely rare instances we will have to go
10119							# back and split a pre-token.
10120
10121							# Return parameters:
10122	6187					10336	my @tokens = (); # array of the tokens themselves
10123	6187					12732	my @token_map = (0); # string position of start of each token
10124	6187					9681	my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
10125
10126	6187	100				13303	if ( !$max_tokens_wanted ) { $max_tokens_wanted = -1 }
	5902					9192
10127
10128	6187					14169	while ( $max_tokens_wanted-- ) {
10129
10130	82185	100				227575	if (
10131							$str =~ m{
10132							\G(
10133							(\s+) # type 'b' = whitespace - this must come before \W
10134							\| (\W) # or type=char = single-character, non-whitespace punct
10135							\| (\d+) # or type 'd' = sequence of digits - must come before \w
10136							\| (\w+) # or type 'w' = words not starting with a digit
10137							)
10138							}gcx
10139							)
10140							{
10141	76142					159576	push @tokens, $1;
10142	76142	100				185396	push @type,
		100
		100
10143							defined($2) ? 'b' : defined($3) ? $1 : defined($4) ? 'd' : 'w';
10144	76142					147633	push @token_map, pos($str);
10145							}
10146
10147							# that's all..
10148							else {
10149	6043					36598	return ( \@tokens, \@token_map, \@type );
10150							}
10151							}
10152
10153	144					1041	return ( \@tokens, \@token_map, \@type );
10154							} ## end sub pre_tokenize
10155
10156							sub show_tokens {
10157
10158							# this is an old debug routine
10159							# not called, but saved for reference
10160	0			0	0	0	my ( $rtokens, $rtoken_map ) = @_;
10161	0					0	my $num = scalar( @{$rtokens} );
	0					0
10162
10163	0					0	foreach my $i ( 0 .. $num - 1 ) {
10164	0					0	my $len = length( $rtokens->[$i] );
10165	0					0	print {*STDOUT} "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
	0					0
10166							}
10167	0					0	return;
10168							} ## end sub show_tokens
10169
10170							{ ## closure for sub matching end token
10171							my %matching_end_token;
10172
10173							BEGIN {
10174	39			39		62257	%matching_end_token = (
10175							'{' => '}',
10176							'(' => ')',
10177							'[' => ']',
10178							'<' => '>',
10179							);
10180							} ## end BEGIN
10181
10182							sub matching_end_token {
10183
10184							# return closing character for a pattern
10185	2998			2998	0	5029	my $beginning_token = shift;
10186	2998	100				7663	if ( $matching_end_token{$beginning_token} ) {
10187	373					982	return $matching_end_token{$beginning_token};
10188							}
10189	2625					6068	return ($beginning_token);
10190							} ## end sub matching_end_token
10191							}
10192
10193							sub dump_token_types {
10194	0			0	0		my ( $class, $fh ) = @_;
10195
10196							# This should be the latest list of token types in use
10197							# adding NEW_TOKENS: add a comment here
10198	0						$fh->print(<<'END_OF_LIST');
10199
10200							Here is a list of the token types currently used for lines of type 'CODE'.
10201							For the following tokens, the "type" of a token is just the token itself.
10202
10203							.. :: << >> ** && .. \|\| // -> => += -= .= %= &= \|= ^= *= <>
10204							( ) <= >= == =~ !~ != ++ -- /= x=
10205							... **= <<= >>= &&= \|\|= //= <=>
10206							, + - / * \| % ! x ~ = \ ? : . < > ^ &
10207
10208							The following additional token types are defined:
10209
10210							type meaning
10211							b blank (white space)
10212							{ indent: opening structural curly brace or square bracket or paren
10213							(code block, anonymous hash reference, or anonymous array reference)
10214							} outdent: right structural curly brace or square bracket or paren
10215							[ left non-structural square bracket (enclosing an array index)
10216							] right non-structural square bracket
10217							( left non-structural paren (all but a list right of an =)
10218							) right non-structural paren
10219							L left non-structural curly brace (enclosing a key)
10220							R right non-structural curly brace
10221							; terminal semicolon
10222							f indicates a semicolon in a "for" statement
10223							h here_doc operator <<
10224							# a comment
10225							Q indicates a quote or pattern
10226							q indicates a qw quote block
10227							k a perl keyword
10228							C user-defined constant or constant function (with void prototype = ())
10229							U user-defined function taking parameters
10230							G user-defined function taking block parameter (like grep/map/eval)
10231							S sub definition (reported as type 'i' in older versions)
10232							P package definition (reported as type 'i' in older versions)
10233							t type indicater such as %,$,@,*,&,sub
10234							w bare word (perhaps a subroutine call)
10235							i identifier of some type (with leading %, $, @, *, &, sub, -> )
10236							n a number
10237							v a v-string
10238							F a file test operator (like -e)
10239							Y File handle
10240							Z identifier in indirect object slot: may be file handle, object
10241							J LABEL: code block label
10242							j LABEL after next, last, redo, goto
10243							p unary +
10244							m unary -
10245							pp pre-increment operator ++
10246							mm pre-decrement operator --
10247							A : used as attribute separator
10248
10249							Here are the '_line_type' codes used internally:
10250							SYSTEM - system-specific code before hash-bang line
10251							CODE - line of perl code (including comments)
10252							POD_START - line starting pod, such as '=head'
10253							POD - pod documentation text
10254							POD_END - last line of pod section, '=cut'
10255							HERE - text of here-document
10256							HERE_END - last line of here-doc (target word)
10257							FORMAT - format section
10258							FORMAT_END - last line of format section, '.'
10259							SKIP - code skipping section
10260							SKIP_END - last line of code skipping section, '#>>V'
10261							DATA_START - __DATA__ line
10262							DATA - unidentified text following __DATA__
10263							END_START - __END__ line
10264							END - unidentified text following __END__
10265							ERROR - we are in big trouble, probably not a perl script
10266							END_OF_LIST
10267
10268	0						return;
10269							} ## end sub dump_token_types
10270
10271							BEGIN {
10272
10273							# These names are used in error messages
10274	39			39		304	@opening_brace_names = qw# '{' '[' '(' '?' #;
10275	39					113	@closing_brace_names = qw# '}' ']' ')' ':' #;
10276
10277	39					106	my @q;
10278
10279	39					286	my @digraphs = qw(
10280							.. :: << >> ** && \|\| // -> => += -= .= %= &= \|= ^= *= <>
10281							<= >= == =~ !~ != ++ -- /= x= ~~ ~. \|. &. ^.
10282							);
10283	39					899	@is_digraph{@digraphs} = (1) x scalar(@digraphs);
10284
10285	39					428	@q = qw(
10286							. : < > * & \| / - = + - % ^ ! x ~
10287							);
10288	39					366	@can_start_digraph{@q} = (1) x scalar(@q);
10289
10290	39					151	my @trigraphs = qw( ... **= <<= >>= &&= \|\|= //= <=> !~~ &.= \|.= ^.= <<~);
10291	39					254	@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
10292
10293	39					135	my @tetragraphs = qw( <<>> );
10294	39					138	@is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
10295
10296							# make a hash of all valid token types for self-checking the tokenizer
10297							# (adding NEW_TOKENS : select a new character and add to this list)
10298							# fix for c250: added new token type 'P' and 'S'
10299	39					625	my @valid_token_types = qw#
10300							A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S
10301							{ } ( ) [ ] ; + - / * \| % ! x ~ = \ ? : . < > ^ &
10302							#;
10303	39					520	push( @valid_token_types, @digraphs );
10304	39					319	push( @valid_token_types, @trigraphs );
10305	39					105	push( @valid_token_types, @tetragraphs );
10306	39					116	push( @valid_token_types, ( '#', ',', 'CORE::' ) );
10307	39					1597	@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
10308
10309							# a list of file test letters, as in -e (Table 3-4 of 'camel 3')
10310	39					285	my @file_test_operators =
10311							qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
10312	39					807	@is_file_test_operator{@file_test_operators} =
10313							(1) x scalar(@file_test_operators);
10314
10315							# these functions have prototypes of the form (&), so when they are
10316							# followed by a block, that block MAY BE followed by an operator.
10317							# Smartmatch operator ~~ may be followed by anonymous hash or array ref
10318	39					167	@q = qw( do eval );
10319	39					177	@is_block_operator{@q} = (1) x scalar(@q);
10320
10321							# these functions allow an identifier in the indirect object slot
10322	39					111	@q = qw( print printf sort exec system say);
10323	39					242	@is_indirect_object_taker{@q} = (1) x scalar(@q);
10324
10325							# Note: 'field' will be added by sub check_options if --use-feature=class
10326	39					100	@q = qw(my our state);
10327	39					190	@is_my_our_state{@q} = (1) x scalar(@q);
10328
10329							# These tokens may precede a code block
10330							# patched for SWITCH/CASE/CATCH. Actually these could be removed
10331							# now and we could let the extended-syntax coding handle them.
10332							# Added 'default' for Switch::Plain.
10333							# Note: 'ADJUST' will be added by sub check_options if --use-feature=class
10334	39					233	@q =
10335							qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
10336							unless do while until eval for foreach map grep sort
10337							switch case given when default catch try finally);
10338	39					681	@is_code_block_token{@q} = (1) x scalar(@q);
10339
10340							# Note: this hash was formerly named '%is_not_zero_continuation_block_type'
10341							# to contrast it with the block types in '%is_zero_continuation_block_type'
10342	39					181	@q = qw( sort map grep eval do );
10343	39					131	@is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
10344
10345	39					124	@q = qw( sort map grep );
10346	39					132	@is_sort_map_grep{@q} = (1) x scalar(@q);
10347
10348	39					90	%is_grep_alias = ();
10349
10350							# I'll build the list of keywords incrementally
10351	39					92	my @Keywords = ();
10352
10353							# keywords and tokens after which a value or pattern is expected,
10354							# but not an operator. In other words, these should consume terms
10355							# to their right, or at least they are not expected to be followed
10356							# immediately by operators.
10357	39					1441	my @value_requestor = qw(
10358							AUTOLOAD
10359							BEGIN
10360							CHECK
10361							DESTROY
10362							END
10363							EQ
10364							GE
10365							GT
10366							INIT
10367							LE
10368							LT
10369							NE
10370							UNITCHECK
10371							abs
10372							accept
10373							alarm
10374							and
10375							atan2
10376							bind
10377							binmode
10378							bless
10379							break
10380							caller
10381							chdir
10382							chmod
10383							chomp
10384							chop
10385							chown
10386							chr
10387							chroot
10388							close
10389							closedir
10390							cmp
10391							connect
10392							continue
10393							cos
10394							crypt
10395							dbmclose
10396							dbmopen
10397							defined
10398							delete
10399							die
10400							dump
10401							each
10402							else
10403							elsif
10404							eof
10405							eq
10406							evalbytes
10407							exec
10408							exists
10409							exit
10410							exp
10411							fc
10412							fcntl
10413							fileno
10414							flock
10415							for
10416							foreach
10417							formline
10418							ge
10419							getc
10420							getgrgid
10421							getgrnam
10422							gethostbyaddr
10423							gethostbyname
10424							getnetbyaddr
10425							getnetbyname
10426							getpeername
10427							getpgrp
10428							getpriority
10429							getprotobyname
10430							getprotobynumber
10431							getpwnam
10432							getpwuid
10433							getservbyname
10434							getservbyport
10435							getsockname
10436							getsockopt
10437							glob
10438							gmtime
10439							goto
10440							grep
10441							gt
10442							hex
10443							if
10444							index
10445							int
10446							ioctl
10447							join
10448							keys
10449							kill
10450							last
10451							lc
10452							lcfirst
10453							le
10454							length
10455							link
10456							listen
10457							local
10458							localtime
10459							lock
10460							log
10461							lstat
10462							lt
10463							map
10464							mkdir
10465							msgctl
10466							msgget
10467							msgrcv
10468							msgsnd
10469							my
10470							ne
10471							next
10472							no
10473							not
10474							oct
10475							open
10476							opendir
10477							or
10478							ord
10479							our
10480							pack
10481							pipe
10482							pop
10483							pos
10484							print
10485							printf
10486							prototype
10487							push
10488							quotemeta
10489							rand
10490							read
10491							readdir
10492							readlink
10493							readline
10494							readpipe
10495							recv
10496							redo
10497							ref
10498							rename
10499							require
10500							reset
10501							return
10502							reverse
10503							rewinddir
10504							rindex
10505							rmdir
10506							scalar
10507							seek
10508							seekdir
10509							select
10510							semctl
10511							semget
10512							semop
10513							send
10514							sethostent
10515							setnetent
10516							setpgrp
10517							setpriority
10518							setprotoent
10519							setservent
10520							setsockopt
10521							shift
10522							shmctl
10523							shmget
10524							shmread
10525							shmwrite
10526							shutdown
10527							sin
10528							sleep
10529							socket
10530							socketpair
10531							sort
10532							splice
10533							split
10534							sprintf
10535							sqrt
10536							srand
10537							stat
10538							state
10539							study
10540							substr
10541							symlink
10542							syscall
10543							sysopen
10544							sysread
10545							sysseek
10546							system
10547							syswrite
10548							tell
10549							telldir
10550							tie
10551							tied
10552							truncate
10553							uc
10554							ucfirst
10555							umask
10556							undef
10557							unless
10558							unlink
10559							unpack
10560							unshift
10561							untie
10562							until
10563							use
10564							utime
10565							values
10566							vec
10567							waitpid
10568							warn
10569							while
10570							write
10571							xor
10572
10573							switch
10574							case
10575							default
10576							given
10577							when
10578							err
10579							say
10580							isa
10581
10582							catch
10583
10584							);
10585
10586							# Note: 'ADJUST', 'field' are added by sub check_options
10587							# if --use-feature=class
10588
10589							# patched above for SWITCH/CASE given/when err say
10590							# 'err' is a fairly safe addition.
10591							# Added 'default' for Switch::Plain. Note that we could also have
10592							# a separate set of keywords to include if we see 'use Switch::Plain'
10593	39					1984	push( @Keywords, @value_requestor );
10594
10595							# These are treated the same but are not keywords:
10596	39					166	my @extra_vr = qw(
10597							constant
10598							vars
10599							);
10600	39					327	push( @value_requestor, @extra_vr );
10601
10602	39					4733	@expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
10603
10604							# this list contains keywords which do not look for arguments,
10605							# so that they might be followed by an operator, or at least
10606							# not a term.
10607	39					250	my @operator_requestor = qw(
10608							endgrent
10609							endhostent
10610							endnetent
10611							endprotoent
10612							endpwent
10613							endservent
10614							fork
10615							getgrent
10616							gethostent
10617							getlogin
10618							getnetent
10619							getppid
10620							getprotoent
10621							getpwent
10622							getservent
10623							setgrent
10624							setpwent
10625							time
10626							times
10627							wait
10628							wantarray
10629							);
10630
10631	39					174	push( @Keywords, @operator_requestor );
10632
10633							# These are treated the same but are not considered keywords:
10634	39					88	my @extra_or = qw(
10635							STDERR
10636							STDIN
10637							STDOUT
10638							);
10639
10640	39					111	push( @operator_requestor, @extra_or );
10641
10642	39					874	@expecting_operator_token{@operator_requestor} =
10643							(1) x scalar(@operator_requestor);
10644
10645							# these token TYPES expect trailing operator but not a term
10646							# note: ++ and -- are post-increment and decrement, 'C' = constant
10647	39					202	my @operator_requestor_types = qw( ++ -- C <> q );
10648
10649							# NOTE: This hash is available but not currently used
10650	39					184	@expecting_operator_types{@operator_requestor_types} =
10651							(1) x scalar(@operator_requestor_types);
10652
10653							# these token TYPES consume values (terms)
10654							# note: pp and mm are pre-increment and decrement
10655							# f=semicolon in for, F=file test operator
10656	39					713	my @value_requestor_type = qw#
10657							L { ( [ ~ !~ =~ ; . .. ... A : && ! \|\| // = + - x
10658							*= += -= .= /= = %= x= &= \|= ^= <<= >>= &&= \|\|= //=
10659							<= >= == != => \ > < % * / ? & \| ** <=> ~~ !~~ <<~
10660							f F pp mm Y p m U J G j >> << ^ t
10661							~. ^. \|. &. ^.= \|.= &.=
10662							#;
10663	39					196	push( @value_requestor_type, ',' )
10664							; # (perl doesn't like a ',' in a qw block)
10665
10666							# NOTE: This hash is available but not currently used
10667	39					1555	@expecting_term_types{@value_requestor_type} =
10668							(1) x scalar(@value_requestor_type);
10669
10670							# Note: the following valid token types are not assigned here to
10671							# hashes requesting to be followed by values or terms, but are
10672							# instead currently hard-coded into sub operator_expected:
10673							# ) -> :: Q R Z ] b h i k n v w } #
10674
10675							# For simple syntax checking, it is nice to have a list of operators which
10676							# will really be unhappy if not followed by a term. This includes most
10677							# of the above...
10678	39					952	@really_want_term{@value_requestor_type} =
10679							(1) x scalar(@value_requestor_type);
10680
10681							# with these exceptions...
10682	39					154	delete $really_want_term{'U'}; # user sub, depends on prototype
10683	39					95	delete $really_want_term{'F'}; # file test works on $_ if no following term
10684	39					77	delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
10685							# let perl do it
10686	39					138	@q = qw(q qq qx qr s y tr m);
10687	39					195	@is_q_qq_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
10688
10689							# Note added 'qw' here
10690	39					150	@q = qw(q qq qw qx qr s y tr m);
10691	39					188	@is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
10692
10693							# Note: 'class' will be added by sub check_options if -use-feature=class
10694	39					129	@q = qw(package);
10695	39					129	@is_package{@q} = (1) x scalar(@q);
10696
10697	39					115	@q = qw( if elsif unless );
10698	39					150	@is_if_elsif_unless{@q} = (1) x scalar(@q);
10699
10700	39					99	@q = qw( ; t );
10701	39					117	@is_semicolon_or_t{@q} = (1) x scalar(@q);
10702
10703	39					135	@q = qw( if elsif unless case when );
10704	39					173	@is_if_elsif_unless_case_when{@q} = (1) x scalar(@q);
10705
10706							# Hash of other possible line endings which may occur.
10707							# Keep these coordinated with the regex where this is used.
10708							# Note: chr(13) = chr(015)="\r".
10709	39					120	@q = ( chr(13), chr(29), chr(26) );
10710	39					210	@other_line_endings{@q} = (1) x scalar(@q);
10711
10712							# These keywords are handled specially in the tokenizer code:
10713	39					307	my @special_keywords = qw(
10714							do
10715							eval
10716							format
10717							m
10718							package
10719							q
10720							qq
10721							qr
10722							qw
10723							qx
10724							s
10725							sub
10726							tr
10727							y
10728							);
10729	39					454	push( @Keywords, @special_keywords );
10730
10731							# Keywords after which list formatting may be used
10732							# WARNING: do not include \|map\|grep\|eval or perl may die on
10733							# syntax errors (map1.t).
10734	39					415	my @keyword_taking_list = qw(
10735							and
10736							chmod
10737							chomp
10738							chop
10739							chown
10740							dbmopen
10741							die
10742							elsif
10743							exec
10744							fcntl
10745							for
10746							foreach
10747							formline
10748							getsockopt
10749							if
10750							index
10751							ioctl
10752							join
10753							kill
10754							local
10755							msgctl
10756							msgrcv
10757							msgsnd
10758							my
10759							open
10760							or
10761							our
10762							pack
10763							print
10764							printf
10765							push
10766							read
10767							readpipe
10768							recv
10769							return
10770							reverse
10771							rindex
10772							seek
10773							select
10774							semctl
10775							semget
10776							send
10777							setpriority
10778							setsockopt
10779							shmctl
10780							shmget
10781							shmread
10782							shmwrite
10783							socket
10784							socketpair
10785							sort
10786							splice
10787							split
10788							sprintf
10789							state
10790							substr
10791							syscall
10792							sysopen
10793							sysread
10794							sysseek
10795							system
10796							syswrite
10797							tie
10798							unless
10799							unlink
10800							unpack
10801							unshift
10802							until
10803							vec
10804							warn
10805							while
10806							given
10807							when
10808							);
10809
10810							# NOTE: This hash is available but not currently used
10811	39					1437	@is_keyword_taking_list{@keyword_taking_list} =
10812							(1) x scalar(@keyword_taking_list);
10813
10814							# perl functions which may be unary operators.
10815
10816							# This list is used to decide if a pattern delimited by slashes, /pattern/,
10817							# can follow one of these keywords.
10818	39					281	@q = qw(
10819							chomp eof eval fc lc pop shift uc undef
10820							);
10821
10822	39					300	@is_keyword_rejecting_slash_as_pattern_delimiter{@q} =
10823							(1) x scalar(@q);
10824
10825							# These are keywords for which an arg may optionally be omitted. They are
10826							# currently only used to disambiguate a ? used as a ternary from one used
10827							# as a (deprecated) pattern delimiter. In the future, they might be used
10828							# to give a warning about ambiguous syntax before a /.
10829							# Note: split has been omitted (see note below).
10830	39					423	my @keywords_taking_optional_arg = qw(
10831							abs
10832							alarm
10833							caller
10834							chdir
10835							chomp
10836							chop
10837							chr
10838							chroot
10839							close
10840							cos
10841							defined
10842							die
10843							eof
10844							eval
10845							evalbytes
10846							exit
10847							exp
10848							fc
10849							getc
10850							glob
10851							gmtime
10852							hex
10853							int
10854							last
10855							lc
10856							lcfirst
10857							length
10858							localtime
10859							log
10860							lstat
10861							mkdir
10862							next
10863							oct
10864							ord
10865							pop
10866							pos
10867							print
10868							printf
10869							prototype
10870							quotemeta
10871							rand
10872							readline
10873							readlink
10874							readpipe
10875							redo
10876							ref
10877							require
10878							reset
10879							reverse
10880							rmdir
10881							say
10882							select
10883							shift
10884							sin
10885							sleep
10886							sqrt
10887							srand
10888							stat
10889							study
10890							tell
10891							uc
10892							ucfirst
10893							umask
10894							undef
10895							unlink
10896							warn
10897							write
10898							);
10899	39					975	@is_keyword_taking_optional_arg{@keywords_taking_optional_arg} =
10900							(1) x scalar(@keywords_taking_optional_arg);
10901
10902							# This list is used to decide if a pattern delimited by question marks,
10903							# ?pattern?, can follow one of these keywords. Note that from perl 5.22
10904							# on, a ?pattern? is not recognized, so we can be much more strict than
10905							# with a /pattern/. Note that 'split' is not in this list. In current
10906							# versions of perl a question following split must be a ternary, but
10907							# in older versions it could be a pattern. The guessing algorithm will
10908							# decide. We are combining two lists here to simplify the test.
10909	39					949	@q = ( @keywords_taking_optional_arg, @operator_requestor );
10910	39					1423	@is_keyword_rejecting_question_as_pattern_delimiter{@q} =
10911							(1) x scalar(@q);
10912
10913							# These are not used in any way yet
10914							# my @unused_keywords = qw(
10915							# __FILE__
10916							# __LINE__
10917							# __PACKAGE__
10918							# );
10919
10920							# The list of keywords was originally extracted from function 'keyword' in
10921							# perl file toke.c version 5.005.03, using this utility, plus a
10922							# little editing: (file getkwd.pl):
10923							# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
10924							# Add 'get' prefix where necessary, then split into the above lists.
10925							# This list should be updated as necessary.
10926							# The list should not contain these special variables:
10927							# ARGV DATA ENV SIG STDERR STDIN STDOUT
10928							# __DATA__ __END__
10929
10930	39					10120	@is_keyword{@Keywords} = (1) x scalar(@Keywords);
10931							} ## end BEGIN
10932							1;