File Coverage

blib/lib/Perl/Tidy/Tokenizer.pm

Criterion	Covered	Total	%
statement	2315	3295	70.2
branch	1098	1748	62.8
condition	523	948	55.1
subroutine	160	192	83.3
pod	0	147	0.0
total	4096	6330	64.7

line	stmt	bran	cond	sub	pod	time	code
1							#####################################################################
2							#
3							# The Perl::Tidy::Tokenizer package is essentially a filter which
4							# reads lines of perl source code from a source object and provides
5							# corresponding tokenized lines through its get_line() method. Lines
6							# flow from the source_object to the caller like this:
7							#
8							# source_object --> LineBuffer_object --> Tokenizer --> calling routine
9							# get_line() get_line() get_line() line_of_tokens
10							#
11							# The source object can be any object with a get_line() method which
12							# supplies one line (a character string) perl call.
13							# The LineBuffer object is created by the Tokenizer.
14							# The Tokenizer returns a reference to a data structure 'line_of_tokens'
15							# containing one tokenized line for each call to its get_line() method.
16							#
17							# WARNING: This is not a real class. Only one tokenizer my be used.
18							#
19							########################################################################
20
21							package Perl::Tidy::Tokenizer;
22	38			38		289	use strict;
	38					108
	38					1231
23	38			38		204	use warnings;
	38					90
	38					1379
24	38			38		238	use English qw( -no_match_vars );
	38					119
	38					234
25
26							our $VERSION = '20230701';
27
28	38			38		30891	use Perl::Tidy::LineBuffer;
	38					124
	38					1176
29	38			38		295	use Carp;
	38					112
	38					2242
30
31	38			38		257	use constant DEVEL_MODE => 0;
	38					114
	38					2142
32	38			38		254	use constant EMPTY_STRING => q{};
	38					104
	38					1796
33	38			38		325	use constant SPACE => q{ };
	38					120
	38					2281
34
35							# Decimal values of some ascii characters for quick checks
36	38			38		284	use constant ORD_TAB => 9;
	38					84
	38					2092
37	38			38		270	use constant ORD_SPACE => 32;
	38					97
	38					1957
38	38			38		268	use constant ORD_PRINTABLE_MIN => 33;
	38					103
	38					2012
39	38			38		291	use constant ORD_PRINTABLE_MAX => 126;
	38					98
	38					7241
40
41							# GLOBAL VARIABLES which change during tokenization:
42							# These could also be stored in $self but it is more convenient and
43							# efficient to make them global lexical variables.
44							# INITIALIZER: sub prepare_for_a_new_file
45							my (
46
47							$brace_depth,
48							$context,
49							$current_package,
50							$last_nonblank_block_type,
51							$last_nonblank_token,
52							$last_nonblank_type,
53							$next_sequence_number,
54							$paren_depth,
55							$rbrace_context,
56							$rbrace_package,
57							$rbrace_structural_type,
58							$rbrace_type,
59							$rcurrent_depth,
60							$rcurrent_sequence_number,
61							$rdepth_array,
62							$ris_block_function,
63							$ris_block_list_function,
64							$ris_constant,
65							$ris_user_function,
66							$rnested_statement_type,
67							$rnested_ternary_flag,
68							$rparen_semicolon_count,
69							$rparen_structural_type,
70							$rparen_type,
71							$rsaw_function_definition,
72							$rsaw_use_module,
73							$rsquare_bracket_structural_type,
74							$rsquare_bracket_type,
75							$rstarting_line_of_current_depth,
76							$rtotal_depth,
77							$ruser_function_prototype,
78							$square_bracket_depth,
79							$statement_type,
80							$total_depth,
81							);
82
83							my (
84
85							# GLOBAL CONSTANTS for routines in this package,
86							# INITIALIZER: BEGIN block.
87							%can_start_digraph,
88							%expecting_operator_token,
89							%expecting_operator_types,
90							%expecting_term_token,
91							%expecting_term_types,
92							%is_block_operator,
93							%is_comma_question_colon,
94							%is_digraph,
95							%is_file_test_operator,
96							%is_if_elsif_unless,
97							%is_if_elsif_unless_case_when,
98							%is_indirect_object_taker,
99							%is_keyword_rejecting_question_as_pattern_delimiter,
100							%is_keyword_rejecting_slash_as_pattern_delimiter,
101							%is_keyword_taking_list,
102							%is_keyword_taking_optional_arg,
103							%is_q_qq_qw_qx_qr_s_y_tr_m,
104							%is_q_qq_qx_qr_s_y_tr_m,
105							%is_semicolon_or_t,
106							%is_sort_map_grep,
107							%is_sort_map_grep_eval_do,
108							%is_tetragraph,
109							%is_trigraph,
110							%is_valid_token_type,
111							%other_line_endings,
112							%really_want_term,
113							@closing_brace_names,
114							@opening_brace_names,
115
116							# GLOBAL VARIABLES which are constant after being configured.
117							# INITIALIZER: BEGIN block and modified by sub check_options
118							%is_code_block_token,
119							%is_keyword,
120							%is_my_our_state,
121							%is_package,
122
123							# INITIALIZER: sub check_options
124							$code_skipping_pattern_begin,
125							$code_skipping_pattern_end,
126							$rOpts_code_skipping,
127							%is_END_DATA_format_sub,
128							%is_grep_alias,
129							%is_sub,
130							);
131
132							# possible values of operator_expected()
133	38			38		321	use constant TERM => -1;
	38					145
	38					2163
134	38			38		274	use constant UNKNOWN => 0;
	38					104
	38					1986
135	38			38		316	use constant OPERATOR => 1;
	38					84
	38					2046
136
137							# possible values of context
138	38			38		246	use constant SCALAR_CONTEXT => -1;
	38					127
	38					5617
139	38			38		274	use constant UNKNOWN_CONTEXT => 0;
	38					82
	38					2882
140	38			38		278	use constant LIST_CONTEXT => 1;
	38					112
	38					1896
141
142							# Maximum number of little messages; probably need not be changed.
143	38			38		266	use constant MAX_NAG_MESSAGES => 6;
	38					115
	38					8286
144
145	0					0	BEGIN {
146
147							# Array index names for $self.
148							# Do not combine with other BEGIN blocks (c101).
149	38			38		249989	my $i = 0;
150							use constant {
151	38					23422	_rhere_target_list_ => $i++,
152							_in_here_doc_ => $i++,
153							_here_doc_target_ => $i++,
154							_here_quote_character_ => $i++,
155							_in_data_ => $i++,
156							_in_end_ => $i++,
157							_in_format_ => $i++,
158							_in_error_ => $i++,
159							_in_trouble_ => $i++,
160							_warning_count_ => $i++,
161							_html_tag_count_ => $i++,
162							_in_pod_ => $i++,
163							_in_skipped_ => $i++,
164							_in_attribute_list_ => $i++,
165							_in_quote_ => $i++,
166							_quote_target_ => $i++,
167							_line_start_quote_ => $i++,
168							_starting_level_ => $i++,
169							_know_starting_level_ => $i++,
170							_tabsize_ => $i++,
171							_indent_columns_ => $i++,
172							_look_for_hash_bang_ => $i++,
173							_trim_qw_ => $i++,
174							_continuation_indentation_ => $i++,
175							_outdent_labels_ => $i++,
176							_last_line_number_ => $i++,
177							_saw_perl_dash_P_ => $i++,
178							_saw_perl_dash_w_ => $i++,
179							_saw_use_strict_ => $i++,
180							_saw_v_string_ => $i++,
181							_saw_brace_error_ => $i++,
182							_hit_bug_ => $i++,
183							_look_for_autoloader_ => $i++,
184							_look_for_selfloader_ => $i++,
185							_saw_autoloader_ => $i++,
186							_saw_selfloader_ => $i++,
187							_saw_hash_bang_ => $i++,
188							_saw_end_ => $i++,
189							_saw_data_ => $i++,
190							_saw_negative_indentation_ => $i++,
191							_started_tokenizing_ => $i++,
192							_line_buffer_object_ => $i++,
193							_debugger_object_ => $i++,
194							_diagnostics_object_ => $i++,
195							_logger_object_ => $i++,
196							_unexpected_error_count_ => $i++,
197							_started_looking_for_here_target_at_ => $i++,
198							_nearly_matched_here_target_at_ => $i++,
199							_line_of_text_ => $i++,
200							_rlower_case_labels_at_ => $i++,
201							_extended_syntax_ => $i++,
202							_maximum_level_ => $i++,
203							_true_brace_error_count_ => $i++,
204							_rOpts_maximum_level_errors_ => $i++,
205							_rOpts_maximum_unexpected_errors_ => $i++,
206							_rOpts_logfile_ => $i++,
207							_rOpts_ => $i++,
208							_calculate_ci_ => $i++,
209	38			38		335	};
	38					113
210							} ## end BEGIN
211
212							{ ## closure for subs to count instances
213
214							# methods to count instances
215							my $_count = 0;
216	0			0	0	0	sub get_count { return $_count; }
217	556			556		2540	sub _increment_count { return ++$_count }
218	556			556		1311	sub _decrement_count { return --$_count }
219							}
220
221							sub DESTROY {
222	556			556		1412	my $self = shift;
223	556					2961	$self->_decrement_count();
224	556					7795	return;
225							}
226
227							sub AUTOLOAD {
228
229							# Catch any undefined sub calls so that we are sure to get
230							# some diagnostic information. This sub should never be called
231							# except for a programming error.
232	0			0		0	our $AUTOLOAD;
233	0	0				0	return if ( $AUTOLOAD =~ /\bDESTROY$/ );
234	0					0	my ( $pkg, $fname, $lno ) = caller();
235	0					0	my $my_package = __PACKAGE__;
236	0					0	print STDERR <<EOM;
237							======================================================================
238							Error detected in package '$my_package', version $VERSION
239							Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
240							Called from package: '$pkg'
241							Called from File '$fname' at line '$lno'
242							This error is probably due to a recent programming change
243							======================================================================
244							EOM
245	0					0	exit 1;
246							} ## end sub AUTOLOAD
247
248							sub Die {
249	0			0	0	0	my ($msg) = @_;
250	0					0	Perl::Tidy::Die($msg);
251	0					0	croak "unexpected return from Perl::Tidy::Die";
252							}
253
254							sub Fault {
255	0			0	0	0	my ( $self, $msg ) = @_;
256
257							# This routine is called for errors that really should not occur
258							# except if there has been a bug introduced by a recent program change.
259							# Please add comments at calls to Fault to explain why the call
260							# should not occur, and where to look to fix it.
261	0					0	my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
262	0					0	my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
263	0					0	my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
264	0					0	my $pkg = __PACKAGE__;
265
266							# Catch potential error of Fault not called as a method
267	0					0	my $input_stream_name;
268	0	0				0	if ( !ref($self) ) {
269	0					0	$msg = "Fault not called as a method - please fix\n";
270	0	0	0			0	if ( $self && length($self) < 200 ) { $msg .= $self }
	0					0
271	0					0	$self = undef;
272	0					0	$input_stream_name = "(UNKNOWN)";
273							}
274							else {
275	0					0	$input_stream_name = $self->get_input_stream_name();
276							}
277
278	0					0	Die(<<EOM);
279							==============================================================================
280							While operating on input stream with name: '$input_stream_name'
281							A fault was detected at line $line0 of sub '$subroutine1'
282							in file '$filename1'
283							which was called from line $line1 of sub '$subroutine2'
284							Message: '$msg'
285							This is probably an error introduced by a recent programming change.
286							$pkg reports VERSION='$VERSION'.
287							==============================================================================
288							EOM
289
290							# We shouldn't get here, but this return is to keep Perl-Critic from
291							# complaining.
292	0					0	return;
293							} ## end sub Fault
294
295							sub bad_pattern {
296
297							# See if a pattern will compile. We have to use a string eval here,
298							# but it should be safe because the pattern has been constructed
299							# by this program.
300	1108			1108	0	2598	my ($pattern) = @_;
301	1108					82436	my $ok = eval "'##'=~/$pattern/";
302	1108		33			10116	return !defined($ok) \|\| $EVAL_ERROR;
303							} ## end sub bad_pattern
304
305							sub make_code_skipping_pattern {
306	1108			1108	0	3389	my ( $rOpts, $opt_name, $default ) = @_;
307	1108					2756	my $param = $rOpts->{$opt_name};
308	1108	100				2973	unless ($param) { $param = $default }
	1106					2203
309	1108					4601	$param =~ s/^\s*//; # allow leading spaces to be like format-skipping
310	1108	50				4777	if ( $param !~ /^#/ ) {
311	0					0	Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
312							}
313	1108					3863	my $pattern = '^\s*' . $param . '\b';
314	1108	50				3357	if ( bad_pattern($pattern) ) {
315	0					0	Die(
316							"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
317							);
318							}
319	1108					3945	return $pattern;
320							} ## end sub make_code_skipping_pattern
321
322							sub check_options {
323
324							# Check Tokenizer parameters
325	554			554	0	1543	my $rOpts = shift;
326
327	554					2188	%is_sub = ();
328	554					1834	$is_sub{'sub'} = 1;
329
330	554					4129	%is_END_DATA_format_sub = (
331							'__END__' => 1,
332							'__DATA__' => 1,
333							'format' => 1,
334							'sub' => 1,
335							);
336
337							# Install any aliases to 'sub'
338	554	50				2205	if ( $rOpts->{'sub-alias-list'} ) {
339
340							# Note that any 'sub-alias-list' has been preprocessed to
341							# be a trimmed, space-separated list which includes 'sub'
342							# for example, it might be 'sub method fun'
343	554					4373	my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'};
344	554					2355	foreach my $word (@sub_alias_list) {
345	1114					2421	$is_sub{$word} = 1;
346	1114					2684	$is_END_DATA_format_sub{$word} = 1;
347							}
348							}
349
350							#------------------------------------------------
351							# Update hash values for any -use-feature options
352							#------------------------------------------------
353	554					4419	my $use_feature_class = $rOpts->{'use-feature'} =~ /\bclass\b/;
354
355							# These are the main updates for this option. There are additional
356							# changes elsewhere, usually indicated with a comment 'rt145706'
357
358							# Update hash values for use_feature=class, added for rt145706
359							# see 'perlclass.pod'
360
361							# IMPORTANT: We are changing global hash values initially set in a BEGIN
362							# block. Values must be defined (true or false) for each of these new
363							# words whether true or false. Otherwise, programs using the module which
364							# change options between runs (such as test code) will have
365							# incorrect settings and fail.
366
367							# There are 4 new keywords:
368
369							# 'class' - treated specially as generalization of 'package'
370							# Note: we must not set 'class' to be a keyword to avoid problems
371							# with older uses.
372	554					2475	$is_package{'class'} = $use_feature_class;
373
374							# 'method' - treated like sub using the sub-alias-list option
375							# Note: we must not set 'method' to be a keyword to avoid problems
376							# with older uses.
377
378							# 'field' - added as a keyword, and works like 'my'
379	554					1764	$is_keyword{'field'} = $use_feature_class;
380	554					1511	$is_my_our_state{'field'} = $use_feature_class;
381
382							# 'ADJUST' - added as a keyword and works like 'BEGIN'
383							# TODO: if ADJUST gets a paren list, this will need to be updated
384	554					1587	$is_keyword{'ADJUST'} = $use_feature_class;
385	554					1587	$is_code_block_token{'ADJUST'} = $use_feature_class;
386
387	554					2142	%is_grep_alias = ();
388	554	50				1964	if ( $rOpts->{'grep-alias-list'} ) {
389
390							# Note that 'grep-alias-list' has been preprocessed to be a trimmed,
391							# space-separated list
392	554					3797	my @q = split /\s+/, $rOpts->{'grep-alias-list'};
393	554					4306	@{is_grep_alias}{@q} = (1) x scalar(@q);
394							}
395
396	554					1678	$rOpts_code_skipping = $rOpts->{'code-skipping'};
397	554					2823	$code_skipping_pattern_begin =
398							make_code_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' );
399	554					2105	$code_skipping_pattern_end =
400							make_code_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' );
401
402	554					2752	return;
403							} ## end sub check_options
404
405							sub new {
406
407	556			556	0	5789	my ( $class, @args ) = @_;
408
409							# Note: 'tabs' and 'indent_columns' are temporary and should be
410							# removed asap
411	556					8851	my %defaults = (
412							source_object => undef,
413							debugger_object => undef,
414							diagnostics_object => undef,
415							logger_object => undef,
416							starting_level => undef,
417							indent_columns => 4,
418							tabsize => 8,
419							look_for_hash_bang => 0,
420							trim_qw => 1,
421							look_for_autoloader => 1,
422							look_for_selfloader => 1,
423							starting_line_number => 1,
424							extended_syntax => 0,
425							rOpts => {},
426							);
427	556					7890	my %args = ( %defaults, @args );
428
429							# we are given an object with a get_line() method to supply source lines
430	556					2259	my $source_object = $args{source_object};
431	556					1627	my $rOpts = $args{rOpts};
432
433							# we create another object with a get_line() and peek_ahead() method
434	556					4097	my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
435
436							# Tokenizer state data is as follows:
437							# _rhere_target_list_ reference to list of here-doc targets
438							# _here_doc_target_ the target string for a here document
439							# _here_quote_character_ the type of here-doc quoting (" ' ` or none)
440							# to determine if interpolation is done
441							# _quote_target_ character we seek if chasing a quote
442							# _line_start_quote_ line where we started looking for a long quote
443							# _in_here_doc_ flag indicating if we are in a here-doc
444							# _in_pod_ flag set if we are in pod documentation
445							# _in_skipped_ flag set if we are in a skipped section
446							# _in_error_ flag set if we saw severe error (binary in script)
447							# _in_trouble_ set if we saw a troublesome lexical like 'my sub s'
448							# _warning_count_ number of calls to logger sub warning
449							# _html_tag_count_ number of apparent html tags seen (indicates html)
450							# _in_data_ flag set if we are in __DATA__ section
451							# _in_end_ flag set if we are in __END__ section
452							# _in_format_ flag set if we are in a format description
453							# _in_attribute_list_ flag telling if we are looking for attributes
454							# _in_quote_ flag telling if we are chasing a quote
455							# _starting_level_ indentation level of first line
456							# _line_buffer_object_ object with get_line() method to supply source code
457							# _diagnostics_object_ place to write debugging information
458							# _unexpected_error_count_ error count used to limit output
459							# _lower_case_labels_at_ line numbers where lower case labels seen
460							# _hit_bug_ program bug detected
461
462	556					1621	my $self = [];
463	556					1726	$self->[_rhere_target_list_] = [];
464	556					1298	$self->[_in_here_doc_] = 0;
465	556					1545	$self->[_here_doc_target_] = EMPTY_STRING;
466	556					1449	$self->[_here_quote_character_] = EMPTY_STRING;
467	556					1718	$self->[_in_data_] = 0;
468	556					1645	$self->[_in_end_] = 0;
469	556					1413	$self->[_in_format_] = 0;
470	556					1334	$self->[_in_error_] = 0;
471	556					1334	$self->[_in_trouble_] = 0;
472	556					1345	$self->[_warning_count_] = 0;
473	556					1363	$self->[_html_tag_count_] = 0;
474	556					1346	$self->[_in_pod_] = 0;
475	556					1336	$self->[_in_skipped_] = 0;
476	556					1215	$self->[_in_attribute_list_] = 0;
477	556					1313	$self->[_in_quote_] = 0;
478	556					1446	$self->[_quote_target_] = EMPTY_STRING;
479	556					1249	$self->[_line_start_quote_] = -1;
480	556					1302	$self->[_starting_level_] = $args{starting_level};
481	556					1658	$self->[_know_starting_level_] = defined( $args{starting_level} );
482	556					1484	$self->[_tabsize_] = $args{tabsize};
483	556					1547	$self->[_indent_columns_] = $args{indent_columns};
484	556					1506	$self->[_look_for_hash_bang_] = $args{look_for_hash_bang};
485	556					1387	$self->[_trim_qw_] = $args{trim_qw};
486	556					1423	$self->[_continuation_indentation_] = $args{continuation_indentation};
487	556					1385	$self->[_outdent_labels_] = $args{outdent_labels};
488	556					1622	$self->[_last_line_number_] = $args{starting_line_number} - 1;
489	556					1163	$self->[_saw_perl_dash_P_] = 0;
490	556					1372	$self->[_saw_perl_dash_w_] = 0;
491	556					1205	$self->[_saw_use_strict_] = 0;
492	556					1280	$self->[_saw_v_string_] = 0;
493	556					1311	$self->[_saw_brace_error_] = 0;
494	556					1179	$self->[_hit_bug_] = 0;
495	556					1254	$self->[_look_for_autoloader_] = $args{look_for_autoloader};
496	556					1559	$self->[_look_for_selfloader_] = $args{look_for_selfloader};
497	556					1322	$self->[_saw_autoloader_] = 0;
498	556					1196	$self->[_saw_selfloader_] = 0;
499	556					1163	$self->[_saw_hash_bang_] = 0;
500	556					1200	$self->[_saw_end_] = 0;
501	556					1238	$self->[_saw_data_] = 0;
502	556					1250	$self->[_saw_negative_indentation_] = 0;
503	556					1408	$self->[_started_tokenizing_] = 0;
504	556					1270	$self->[_line_buffer_object_] = $line_buffer_object;
505	556					1283	$self->[_debugger_object_] = $args{debugger_object};
506	556					1477	$self->[_diagnostics_object_] = $args{diagnostics_object};
507	556					1297	$self->[_logger_object_] = $args{logger_object};
508	556					1275	$self->[_unexpected_error_count_] = 0;
509	556					1220	$self->[_started_looking_for_here_target_at_] = 0;
510	556					1286	$self->[_nearly_matched_here_target_at_] = undef;
511	556					1600	$self->[_line_of_text_] = EMPTY_STRING;
512	556					1351	$self->[_rlower_case_labels_at_] = undef;
513	556					1284	$self->[_extended_syntax_] = $args{extended_syntax};
514	556					1414	$self->[_maximum_level_] = 0;
515	556					1335	$self->[_true_brace_error_count_] = 0;
516	556					1364	$self->[_rOpts_maximum_level_errors_] = $rOpts->{'maximum-level-errors'};
517							$self->[_rOpts_maximum_unexpected_errors_] =
518	556					1500	$rOpts->{'maximum-unexpected-errors'};
519	556					1449	$self->[_rOpts_logfile_] = $rOpts->{'logfile'};
520	556					1302	$self->[_rOpts_] = $rOpts;
521
522							# -exp=ci0 and -exp=ci1 turn on the tokenizer ci calculation for testing.
523							# See comments in sub Perl::Tidy::Formatter::set_ci.
524	556					1305	my $calculate_ci = 0; # current default
525	556	50	33			2581	if ( $rOpts->{'experimental'} && $rOpts->{'experimental'} =~ /\bci(\d+)\b/ )
526							{
527	0		0			0	$calculate_ci = ( $1 == 0 \|\| $1 == 1 );
528							}
529	556					1320	$self->[_calculate_ci_] = $calculate_ci;
530
531							# These vars are used for guessing indentation and must be positive
532	556	50				2202	$self->[_tabsize_] = 8 if ( !$self->[_tabsize_] );
533	556	100				1933	$self->[_indent_columns_] = 4 if ( !$self->[_indent_columns_] );
534
535	556					1375	bless $self, $class;
536
537	556					3387	$self->prepare_for_a_new_file();
538	556					3241	$self->find_starting_indentation_level();
539
540							# This is not a full class yet, so die if an attempt is made to
541							# create more than one object.
542
543	556	50				2876	if ( _increment_count() > 1 ) {
544	0					0	confess
545							"Attempt to create more than 1 object in $class, which is not a true class yet\n";
546							}
547
548	556					6064	return $self;
549
550							} ## end sub new
551
552							# Called externally
553							sub get_unexpected_error_count {
554	4			4	0	16	my ($self) = @_;
555	4					17	return $self->[_unexpected_error_count_];
556							}
557
558							# Called externally
559							sub is_keyword {
560	2766			2766	0	4959	my ($str) = @_;
561	2766					9828	return $is_keyword{$str};
562							}
563
564							#-----------------------------------------
565							# interface to Perl::Tidy::Logger routines
566							#-----------------------------------------
567							sub warning {
568
569	0			0	0	0	my ( $self, $msg ) = @_;
570
571	0					0	my $logger_object = $self->[_logger_object_];
572	0					0	$self->[_warning_count_]++;
573	0	0				0	if ($logger_object) {
574	0					0	my $msg_line_number = $self->[_last_line_number_];
575	0					0	$logger_object->warning( $msg, $msg_line_number );
576							}
577	0					0	return;
578							} ## end sub warning
579
580							sub get_input_stream_name {
581
582	0			0	0	0	my $self = shift;
583
584	0					0	my $input_stream_name = EMPTY_STRING;
585	0					0	my $logger_object = $self->[_logger_object_];
586	0	0				0	if ($logger_object) {
587	0					0	$input_stream_name = $logger_object->get_input_stream_name();
588							}
589	0					0	return $input_stream_name;
590							} ## end sub get_input_stream_name
591
592							sub complain {
593
594	32			32	0	99	my ( $self, $msg ) = @_;
595
596	32					72	my $logger_object = $self->[_logger_object_];
597	32	50				101	if ($logger_object) {
598	32					85	my $input_line_number = $self->[_last_line_number_];
599	32					166	$logger_object->complain( $msg, $input_line_number );
600							}
601	32					81	return;
602							} ## end sub complain
603
604							sub write_logfile_entry {
605
606	1840			1840	0	4254	my ( $self, $msg ) = @_;
607
608	1840					3502	my $logger_object = $self->[_logger_object_];
609	1840	100				4597	if ($logger_object) {
610	1834					5296	$logger_object->write_logfile_entry($msg);
611							}
612	1840					5323	return;
613							} ## end sub write_logfile_entry
614
615							sub interrupt_logfile {
616
617	0			0	0	0	my $self = shift;
618
619	0					0	my $logger_object = $self->[_logger_object_];
620	0	0				0	if ($logger_object) {
621	0					0	$logger_object->interrupt_logfile();
622							}
623	0					0	return;
624							} ## end sub interrupt_logfile
625
626							sub resume_logfile {
627
628	0			0	0	0	my $self = shift;
629
630	0					0	my $logger_object = $self->[_logger_object_];
631	0	0				0	if ($logger_object) {
632	0					0	$logger_object->resume_logfile();
633							}
634	0					0	return;
635							} ## end sub resume_logfile
636
637							sub brace_warning {
638	0			0	0	0	my ( $self, $msg ) = @_;
639	0					0	$self->[_saw_brace_error_]++;
640
641	0					0	my $logger_object = $self->[_logger_object_];
642	0	0				0	if ($logger_object) {
643	0					0	my $msg_line_number = $self->[_last_line_number_];
644	0					0	$logger_object->brace_warning( $msg, $msg_line_number );
645							}
646	0					0	return;
647							} ## end sub brace_warning
648
649							sub increment_brace_error {
650
651							# This is same as sub brace_warning but without a message
652	0			0	0	0	my $self = shift;
653	0					0	$self->[_saw_brace_error_]++;
654
655	0					0	my $logger_object = $self->[_logger_object_];
656	0	0				0	if ($logger_object) {
657	0					0	$logger_object->increment_brace_error();
658							}
659	0					0	return;
660							} ## end sub increment_brace_error
661
662							sub get_saw_brace_error {
663	0			0	0	0	my $self = shift;
664	0					0	return $self->[_saw_brace_error_];
665							} ## end sub get_saw_brace_error
666
667							sub report_definite_bug {
668	0			0	0	0	my $self = shift;
669	0					0	$self->[_hit_bug_] = 1;
670	0					0	my $logger_object = $self->[_logger_object_];
671	0	0				0	if ($logger_object) {
672	0					0	$logger_object->report_definite_bug();
673							}
674	0					0	return;
675							} ## end sub report_definite_bug
676
677							#-------------------------------------
678							# Interface to Perl::Tidy::Diagnostics
679							#-------------------------------------
680							sub write_diagnostics {
681	0			0	0	0	my ( $self, $msg ) = @_;
682	0					0	my $input_line_number = $self->[_last_line_number_];
683	0					0	my $diagnostics_object = $self->[_diagnostics_object_];
684	0	0				0	if ($diagnostics_object) {
685	0					0	$diagnostics_object->write_diagnostics( $msg, $input_line_number );
686							}
687	0					0	return;
688							} ## end sub write_diagnostics
689
690							sub report_tokenization_errors {
691
692	556			556	0	1891	my ($self) = @_;
693
694							# Report any tokenization errors and return a flag '$severe_error'.
695							# Set $severe_error = 1 if the tokenization errors are so severe that
696							# the formatter should not attempt to format the file. Instead, it will
697							# just output the file verbatim.
698
699							# set severe error flag if tokenizer has encountered file reading problems
700							# (i.e. unexpected binary characters)
701							# or code which may not be formatted correctly (such as 'my sub q')
702							# The difference between _in_error_ and _in_trouble_ is that
703							# _in_error_ stops the tokenizer immediately whereas
704							# _in_trouble_ lets the tokenizer finish so that all errors are seen
705							# Both block formatting and cause the input stream to be output verbatim.
706	556		33			3617	my $severe_error = $self->[_in_error_] \|\| $self->[_in_trouble_];
707
708							# And do not format if it looks like an html file (c209)
709	556		33			4142	$severe_error \|\|= $self->[_html_tag_count_] && $self->[_warning_count_];
			33
710
711							# Inform the logger object on length of input stream
712	556					1467	my $logger_object = $self->[_logger_object_];
713	556	100				2044	if ($logger_object) {
714	554					1469	my $last_line_number = $self->[_last_line_number_];
715	554					2873	$logger_object->set_last_input_line_number($last_line_number);
716							}
717
718	556					1507	my $maxle = $self->[_rOpts_maximum_level_errors_];
719	556					1569	my $maxue = $self->[_rOpts_maximum_unexpected_errors_];
720	556	50				1783	$maxle = 1 unless defined($maxle);
721	556	50				1761	$maxue = 0 unless defined($maxue);
722
723	556					2386	my $level = get_indentation_level();
724	556	50				2174	if ( $level != $self->[_starting_level_] ) {
725	0					0	$self->warning("final indentation level: $level\n");
726	0					0	my $level_diff = $self->[_starting_level_] - $level;
727	0	0				0	if ( $level_diff < 0 ) { $level_diff = -$level_diff }
	0					0
728
729							# Set severe error flag if the level error is greater than 1.
730							# The formatter can function for any level error but it is probably
731							# best not to attempt formatting for a high level error.
732	0	0	0			0	if ( $maxle >= 0 && $level_diff > $maxle ) {
733	0					0	$severe_error = 1;
734	0					0	$self->warning(<<EOM);
735							Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting
736							EOM
737							}
738							}
739
740	556					3163	$self->check_final_nesting_depths();
741
742							# Likewise, large numbers of brace errors usually indicate non-perl
743							# scripts, so set the severe error flag at a low number. This is similar
744							# to the level check, but different because braces may balance but be
745							# incorrectly interlaced.
746	556	50				2824	if ( $self->[_true_brace_error_count_] > 2 ) {
747	0					0	$severe_error = 1;
748							}
749
750	556	50	66			2634	if ( $self->[_look_for_hash_bang_]
751							&& !$self->[_saw_hash_bang_] )
752							{
753	0					0	$self->warning(
754							"hit EOF without seeing hash-bang line; maybe don't need -x?\n");
755							}
756
757	556	50				1904	if ( $self->[_in_format_] ) {
758	0					0	$self->warning("hit EOF while in format description\n");
759							}
760
761	556	50				1997	if ( $self->[_in_skipped_] ) {
762	0					0	$self->write_logfile_entry(
763							"hit EOF while in lines skipped with --code-skipping\n");
764							}
765
766	556	50				1995	if ( $self->[_in_pod_] ) {
767
768							# Just write log entry if this is after __END__ or __DATA__
769							# because this happens to often, and it is not likely to be
770							# a parsing error.
771	0	0	0			0	if ( $self->[_saw_data_] \|\| $self->[_saw_end_] ) {
772	0					0	$self->write_logfile_entry(
773							"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
774							);
775							}
776
777							else {
778	0					0	$self->complain(
779							"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
780							);
781							}
782
783							}
784
785	556	50				2027	if ( $self->[_in_here_doc_] ) {
786	0					0	$severe_error = 1;
787	0					0	my $here_doc_target = $self->[_here_doc_target_];
788	0					0	my $started_looking_for_here_target_at =
789							$self->[_started_looking_for_here_target_at_];
790	0	0				0	if ($here_doc_target) {
791	0					0	$self->warning(
792							"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
793							);
794							}
795							else {
796	0					0	$self->warning(<<EOM);
797							Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string.
798							(Perl will match to the end of file but this may not be intended).
799							EOM
800							}
801	0					0	my $nearly_matched_here_target_at =
802							$self->[_nearly_matched_here_target_at_];
803	0	0				0	if ($nearly_matched_here_target_at) {
804	0					0	$self->warning(
805							"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
806							);
807							}
808							}
809
810							# Something is seriously wrong if we ended inside a quote
811	556	50				2020	if ( $self->[_in_quote_] ) {
812	0					0	$severe_error = 1;
813	0					0	my $line_start_quote = $self->[_line_start_quote_];
814	0					0	my $quote_target = $self->[_quote_target_];
815	0	0				0	my $what =
816							( $self->[_in_attribute_list_] )
817							? "attribute list"
818							: "quote/pattern";
819	0					0	$self->warning(
820							"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
821							);
822							}
823
824	556	50				2289	if ( $self->[_hit_bug_] ) {
825	0					0	$severe_error = 1;
826							}
827
828							# Multiple "unexpected" type tokenization errors usually indicate parsing
829							# non-perl scripts, or that something is seriously wrong, so we should
830							# avoid formatting them. This can happen for example if we run perltidy on
831							# a shell script or an html file. But unfortunately this check can
832							# interfere with some extended syntaxes, such as RPerl, so it has to be off
833							# by default.
834	556					1420	my $ue_count = $self->[_unexpected_error_count_];
835	556	50	33			2447	if ( $maxue > 0 && $ue_count > $maxue ) {
836	0					0	$self->warning(<<EOM);
837							Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting
838							EOM
839	0					0	$severe_error = 1;
840							}
841
842	556	100				1997	unless ( $self->[_saw_perl_dash_w_] ) {
843	540	50				2199	if ( $] < 5.006 ) {
844	0					0	$self->write_logfile_entry("Suggest including '-w parameter'\n");
845							}
846							else {
847	540					1905	$self->write_logfile_entry("Suggest including 'use warnings;'\n");
848							}
849							}
850
851	556	50				3412	if ( $self->[_saw_perl_dash_P_] ) {
852	0					0	$self->write_logfile_entry(
853							"Use of -P parameter for defines is discouraged\n");
854							}
855
856	556	100				2241	unless ( $self->[_saw_use_strict_] ) {
857	542					1592	$self->write_logfile_entry("Suggest including 'use strict;'\n");
858							}
859
860							# it is suggested that labels have at least one upper case character
861							# for legibility and to avoid code breakage as new keywords are introduced
862	556	100				3352	if ( $self->[_rlower_case_labels_at_] ) {
863	12					33	my @lower_case_labels_at = @{ $self->[_rlower_case_labels_at_] };
	12					36
864	12					51	$self->write_logfile_entry(
865							"Suggest using upper case characters in label(s)\n");
866	12					94	local $LIST_SEPARATOR = ')(';
867	12					81	$self->write_logfile_entry(
868							" defined at line(s): (@lower_case_labels_at)\n");
869							}
870	556					2154	return $severe_error;
871							} ## end sub report_tokenization_errors
872
873							sub report_v_string {
874
875							# warn if this version can't handle v-strings
876	2			2	0	10	my ( $self, $tok ) = @_;
877	2	50				10	unless ( $self->[_saw_v_string_] ) {
878	2					6	$self->[_saw_v_string_] = $self->[_last_line_number_];
879							}
880	2	50				8	if ( $] < 5.006 ) {
881	0					0	$self->warning(
882							"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
883							);
884							}
885	2					6	return;
886							} ## end sub report_v_string
887
888							sub is_valid_token_type {
889	3			3	0	7	my ($type) = @_;
890	3					13	return $is_valid_token_type{$type};
891							}
892
893							sub log_numbered_msg {
894	165			165	0	817	my ( $self, $msg ) = @_;
895
896							# write input line number + message to logfile
897	165					364	my $input_line_number = $self->[_last_line_number_];
898	165					780	$self->write_logfile_entry("Line $input_line_number: $msg");
899	165					407	return;
900							} ## end sub log_numbered_msg
901
902							# returns the next tokenized line
903							sub get_line {
904
905	8205			8205	0	15740	my $self = shift;
906
907							# USES GLOBAL VARIABLES:
908							# $brace_depth, $square_bracket_depth, $paren_depth
909
910	8205					25794	my $input_line = $self->[_line_buffer_object_]->get_line();
911	8205					16028	$self->[_line_of_text_] = $input_line;
912
913	8205	100				19855	return unless ($input_line);
914
915	7649					12591	my $input_line_number = ++$self->[_last_line_number_];
916
917							# Find and remove what characters terminate this line, including any
918							# control r
919	7649					11649	my $input_line_separator = EMPTY_STRING;
920	7649	50				21229	if ( chomp($input_line) ) {
921	7649					17473	$input_line_separator = $INPUT_RECORD_SEPARATOR;
922							}
923
924							# The first test here very significantly speeds things up, but be sure to
925							# keep the regex and hash %other_line_endings the same.
926	7649	100				23927	if ( $other_line_endings{ substr( $input_line, -1 ) } ) {
927	24	50				357	if ( $input_line =~ s/((\r\|\035\|\032)+)$// ) {
928	24					82	$input_line_separator = $2 . $input_line_separator;
929							}
930							}
931
932							# for backwards compatibility we keep the line text terminated with
933							# a newline character
934	7649					15528	$input_line .= "\n";
935	7649					13520	$self->[_line_of_text_] = $input_line;
936
937							# create a data structure describing this line which will be
938							# returned to the caller.
939
940							# _line_type codes are:
941							# SYSTEM - system-specific code before hash-bang line
942							# CODE - line of perl code (including comments)
943							# POD_START - line starting pod, such as '=head'
944							# POD - pod documentation text
945							# POD_END - last line of pod section, '=cut'
946							# HERE - text of here-document
947							# HERE_END - last line of here-doc (target word)
948							# FORMAT - format section
949							# FORMAT_END - last line of format section, '.'
950							# SKIP - code skipping section
951							# SKIP_END - last line of code skipping section, '#>>V'
952							# DATA_START - __DATA__ line
953							# DATA - unidentified text following __DATA__
954							# END_START - __END__ line
955							# END - unidentified text following __END__
956							# ERROR - we are in big trouble, probably not a perl script
957
958							# Other variables:
959							# _curly_brace_depth - depth of curly braces at start of line
960							# _square_bracket_depth - depth of square brackets at start of line
961							# _paren_depth - depth of parens at start of line
962							# _starting_in_quote - this line continues a multi-line quote
963							# (so don't trim leading blanks!)
964							# _ending_in_quote - this line ends in a multi-line quote
965							# (so don't trim trailing blanks!)
966	7649					42598	my $line_of_tokens = {
967							_line_type => 'EOF',
968							_line_text => $input_line,
969							_line_number => $input_line_number,
970							_guessed_indentation_level => 0,
971							_curly_brace_depth => $brace_depth,
972							_square_bracket_depth => $square_bracket_depth,
973							_paren_depth => $paren_depth,
974							_quote_character => EMPTY_STRING,
975							## Skip these needless initializations for efficiency:
976							## _rtoken_type => undef,
977							## _rtokens => undef,
978							## _rlevels => undef,
979							## _rblock_type => undef,
980							## _rtype_sequence => undef,
981							## _rci_levels => undef,
982							## _starting_in_quote => 0,
983							## _ending_in_quote => 0,
984							};
985
986							# must print line unchanged if we are in a here document
987	7649	100				38786	if ( $self->[_in_here_doc_] ) {
		100
		100
		100
		50
		100
		100
988
989	24					82	$line_of_tokens->{_line_type} = 'HERE';
990	24					56	my $here_doc_target = $self->[_here_doc_target_];
991	24					63	my $here_quote_character = $self->[_here_quote_character_];
992	24					62	my $candidate_target = $input_line;
993	24					45	chomp $candidate_target;
994
995							# Handle <<~ targets, which are indicated here by a leading space on
996							# the here quote character
997	24	100				111	if ( $here_quote_character =~ /^\s/ ) {
998	4					16	$candidate_target =~ s/^\s*//;
999							}
1000	24	100				87	if ( $candidate_target eq $here_doc_target ) {
1001	9					52	$self->[_nearly_matched_here_target_at_] = undef;
1002	9					43	$line_of_tokens->{_line_type} = 'HERE_END';
1003	9					65	$self->log_numbered_msg("Exiting HERE document $here_doc_target\n");
1004
1005	9					45	my $rhere_target_list = $self->[_rhere_target_list_];
1006	9	50				42	if ( @{$rhere_target_list} ) { # there can be multiple here targets
	9					41
1007							( $here_doc_target, $here_quote_character ) =
1008	0					0	@{ shift @{$rhere_target_list} };
	0					0
	0					0
1009	0					0	$self->[_here_doc_target_] = $here_doc_target;
1010	0					0	$self->[_here_quote_character_] = $here_quote_character;
1011	0					0	$self->log_numbered_msg(
1012							"Entering HERE document $here_doc_target\n");
1013	0					0	$self->[_nearly_matched_here_target_at_] = undef;
1014	0					0	$self->[_started_looking_for_here_target_at_] =
1015							$input_line_number;
1016							}
1017							else {
1018	9					32	$self->[_in_here_doc_] = 0;
1019	9					31	$self->[_here_doc_target_] = EMPTY_STRING;
1020	9					31	$self->[_here_quote_character_] = EMPTY_STRING;
1021							}
1022							}
1023
1024							# check for error of extra whitespace
1025							# note for PERL6: leading whitespace is allowed
1026							else {
1027	15					147	$candidate_target =~ s/\s*$//;
1028	15					72	$candidate_target =~ s/^\s*//;
1029	15	50				64	if ( $candidate_target eq $here_doc_target ) {
1030	0					0	$self->[_nearly_matched_here_target_at_] = $input_line_number;
1031							}
1032							}
1033	24					103	return $line_of_tokens;
1034							}
1035
1036							# Print line unchanged if we are in a format section
1037							elsif ( $self->[_in_format_] ) {
1038
1039	3	100				15	if ( $input_line =~ /^\.[\s#]*$/ ) {
1040
1041							# Decrement format depth count at a '.' after a 'format'
1042	1					5	$self->[_in_format_]--;
1043
1044							# This is the end when count reaches 0
1045	1	50				5	if ( !$self->[_in_format_] ) {
1046	1					4	$self->log_numbered_msg("Exiting format section\n");
1047	1					3	$line_of_tokens->{_line_type} = 'FORMAT_END';
1048
1049							# Make the tokenizer mark an opening brace which follows
1050							# as a code block. Fixes issue c202/t032.
1051	1					3	$last_nonblank_token = ';';
1052	1					3	$last_nonblank_type = ';';
1053							}
1054							}
1055							else {
1056	2					4	$line_of_tokens->{_line_type} = 'FORMAT';
1057	2	50				9	if ( $input_line =~ /^\s*format\s+\w+/ ) {
1058
1059							# Increment format depth count at a 'format' within a 'format'
1060							# This is a simple way to handle nested formats (issue c019).
1061	0					0	$self->[_in_format_]++;
1062							}
1063							}
1064	3					11	return $line_of_tokens;
1065							}
1066
1067							# must print line unchanged if we are in pod documentation
1068							elsif ( $self->[_in_pod_] ) {
1069
1070	44					117	$line_of_tokens->{_line_type} = 'POD';
1071	44	100				267	if ( $input_line =~ /^=cut/ ) {
1072	19					61	$line_of_tokens->{_line_type} = 'POD_END';
1073	19					68	$self->log_numbered_msg("Exiting POD section\n");
1074	19					51	$self->[_in_pod_] = 0;
1075							}
1076	44	50	33			144	if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) {
1077	0					0	$self->warning(
1078							"Hash-bang in pod can cause older versions of perl to fail! \n"
1079							);
1080							}
1081
1082	44					152	return $line_of_tokens;
1083							}
1084
1085							# print line unchanged if in skipped section
1086							elsif ( $self->[_in_skipped_] ) {
1087
1088	8					27	$line_of_tokens->{_line_type} = 'SKIP';
1089	8	100				102	if ( $input_line =~ /$code_skipping_pattern_end/ ) {
		50
1090	2					10	$line_of_tokens->{_line_type} = 'SKIP_END';
1091	2					10	$self->log_numbered_msg("Exiting code-skipping section\n");
1092	2					10	$self->[_in_skipped_] = 0;
1093							}
1094							elsif ( $input_line =~ /$code_skipping_pattern_begin/ ) {
1095
1096							# warn of duplicate starting comment lines, git #118
1097	0					0	my $lno = $self->[_in_skipped_];
1098	0					0	$self->warning(
1099							"Already in code-skipping section which started at line $lno\n"
1100							);
1101							}
1102	8					32	return $line_of_tokens;
1103							}
1104
1105							# must print line unchanged if we have seen a severe error (i.e., we
1106							# are seeing illegal tokens and cannot continue. Syntax errors do
1107							# not pass this route). Calling routine can decide what to do, but
1108							# the default can be to just pass all lines as if they were after __END__
1109							elsif ( $self->[_in_error_] ) {
1110	0					0	$line_of_tokens->{_line_type} = 'ERROR';
1111	0					0	return $line_of_tokens;
1112							}
1113
1114							# print line unchanged if we are __DATA__ section
1115							elsif ( $self->[_in_data_] ) {
1116
1117							# ...but look for POD
1118							# Note that the _in_data and _in_end flags remain set
1119							# so that we return to that state after seeing the
1120							# end of a pod section
1121	1	50	33			10	if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1122	0					0	$line_of_tokens->{_line_type} = 'POD_START';
1123	0					0	$self->log_numbered_msg("Entering POD section\n");
1124	0					0	$self->[_in_pod_] = 1;
1125	0					0	return $line_of_tokens;
1126							}
1127							else {
1128	1					3	$line_of_tokens->{_line_type} = 'DATA';
1129	1					5	return $line_of_tokens;
1130							}
1131							}
1132
1133							# print line unchanged if we are in __END__ section
1134							elsif ( $self->[_in_end_] ) {
1135
1136							# ...but look for POD
1137							# Note that the _in_data and _in_end flags remain set
1138							# so that we return to that state after seeing the
1139							# end of a pod section
1140	48	100	66			273	if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
1141	6					29	$line_of_tokens->{_line_type} = 'POD_START';
1142	6					23	$self->log_numbered_msg("Entering POD section\n");
1143	6					42	$self->[_in_pod_] = 1;
1144	6					30	return $line_of_tokens;
1145							}
1146							else {
1147	42					85	$line_of_tokens->{_line_type} = 'END';
1148	42					132	return $line_of_tokens;
1149							}
1150							}
1151
1152							# check for a hash-bang line if we haven't seen one
1153	7521	100				16727	if ( !$self->[_saw_hash_bang_] ) {
1154	6973	100				20727	if ( $input_line =~ /^\#\!.*perl\b/ ) {
1155	15					55	$self->[_saw_hash_bang_] = $input_line_number;
1156
1157							# check for -w and -P flags
1158	15	50				86	if ( $input_line =~ /^\#\!.perl\s.-.*P/ ) {
1159	0					0	$self->[_saw_perl_dash_P_] = 1;
1160							}
1161
1162	15	100				94	if ( $input_line =~ /^\#\!.perl\s.-.*w/ ) {
1163	8					27	$self->[_saw_perl_dash_w_] = 1;
1164							}
1165
1166	15	100	33			123	if (
			66
			100
			66
1167							$input_line_number > 1
1168
1169							# leave any hash bang in a BEGIN block alone
1170							# i.e. see 'debugger-duck_type.t'
1171							&& !(
1172							$last_nonblank_block_type
1173							&& $last_nonblank_block_type eq 'BEGIN'
1174							)
1175							&& !$self->[_look_for_hash_bang_]
1176
1177							# Try to avoid giving a false alarm at a simple comment.
1178							# These look like valid hash-bang lines:
1179
1180							#!/usr/bin/perl -w
1181							#! /usr/bin/perl -w
1182							#!c:\perl\bin\perl.exe
1183
1184							# These are comments:
1185							#! I love perl
1186							#! sunos does not yet provide a /usr/bin/perl
1187
1188							# Comments typically have multiple spaces, which suggests
1189							# the filter
1190							&& $input_line =~ /^\#\!(\s+)?(\S+)?perl/
1191							)
1192							{
1193
1194							# this is helpful for VMS systems; we may have accidentally
1195							# tokenized some DCL commands
1196	1	50				4	if ( $self->[_started_tokenizing_] ) {
1197	0					0	$self->warning(
1198							"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
1199							);
1200							}
1201							else {
1202	1					6	$self->complain("Useless hash-bang after line 1\n");
1203							}
1204							}
1205
1206							# Report the leading hash-bang as a system line
1207							# This will prevent -dac from deleting it
1208							else {
1209	14					50	$line_of_tokens->{_line_type} = 'SYSTEM';
1210	14					131	return $line_of_tokens;
1211							}
1212							}
1213							}
1214
1215							# wait for a hash-bang before parsing if the user invoked us with -x
1216	7507	100	100			18409	if ( $self->[_look_for_hash_bang_]
1217							&& !$self->[_saw_hash_bang_] )
1218							{
1219	5					9	$line_of_tokens->{_line_type} = 'SYSTEM';
1220	5					17	return $line_of_tokens;
1221							}
1222
1223							# a first line of the form ': #' will be marked as SYSTEM
1224							# since lines of this form may be used by tcsh
1225	7502	50	66			19912	if ( $input_line_number == 1 && $input_line =~ /^\s\:\s\#/ ) {
1226	0					0	$line_of_tokens->{_line_type} = 'SYSTEM';
1227	0					0	return $line_of_tokens;
1228							}
1229
1230							# now we know that it is ok to tokenize the line...
1231							# the line tokenizer will modify any of these private variables:
1232							# _rhere_target_list_
1233							# _in_data_
1234							# _in_end_
1235							# _in_format_
1236							# _in_error_
1237							# _in_skipped_
1238							# _in_pod_
1239							# _in_quote_
1240	7502					24455	$self->tokenize_this_line($line_of_tokens);
1241
1242							# Now finish defining the return structure and return it
1243	7502					15635	$line_of_tokens->{_ending_in_quote} = $self->[_in_quote_];
1244
1245							# handle severe error (binary data in script)
1246	7502	50				17287	if ( $self->[_in_error_] ) {
1247	0					0	$self->[_in_quote_] = 0; # to avoid any more messages
1248	0					0	$self->warning("Giving up after error\n");
1249	0					0	$line_of_tokens->{_line_type} = 'ERROR';
1250	0					0	reset_indentation_level(0); # avoid error messages
1251	0					0	return $line_of_tokens;
1252							}
1253
1254							# handle start of pod documentation
1255	7502	100				15698	if ( $self->[_in_pod_] ) {
1256
1257							# This gets tricky..above a __DATA__ or __END__ section, perl
1258							# accepts '=cut' as the start of pod section. But afterwards,
1259							# only pod utilities see it and they may ignore an =cut without
1260							# leading =head. In any case, this isn't good.
1261	13	50				58	if ( $input_line =~ /^=cut\b/ ) {
1262	0	0	0			0	if ( $self->[_saw_data_] \|\| $self->[_saw_end_] ) {
1263	0					0	$self->complain("=cut while not in pod ignored\n");
1264	0					0	$self->[_in_pod_] = 0;
1265	0					0	$line_of_tokens->{_line_type} = 'POD_END';
1266							}
1267							else {
1268	0					0	$line_of_tokens->{_line_type} = 'POD_START';
1269	0					0	$self->warning(
1270							"=cut starts a pod section .. this can fool pod utilities.\n"
1271							) unless (DEVEL_MODE);
1272	0					0	$self->log_numbered_msg("Entering POD section\n");
1273							}
1274							}
1275
1276							else {
1277	13					38	$line_of_tokens->{_line_type} = 'POD_START';
1278	13					58	$self->log_numbered_msg("Entering POD section\n");
1279							}
1280
1281	13					62	return $line_of_tokens;
1282							}
1283
1284							# handle start of skipped section
1285	7489	100				15155	if ( $self->[_in_skipped_] ) {
1286
1287	2					6	$line_of_tokens->{_line_type} = 'SKIP';
1288	2					12	$self->log_numbered_msg("Entering code-skipping section\n");
1289	2					8	return $line_of_tokens;
1290							}
1291
1292							# see if this line contains here doc targets
1293	7487					11942	my $rhere_target_list = $self->[_rhere_target_list_];
1294	7487	100				10474	if ( @{$rhere_target_list} ) {
	7487					17069
1295
1296							my ( $here_doc_target, $here_quote_character ) =
1297	9					39	@{ shift @{$rhere_target_list} };
	9					24
	9					43
1298	9					38	$self->[_in_here_doc_] = 1;
1299	9					36	$self->[_here_doc_target_] = $here_doc_target;
1300	9					30	$self->[_here_quote_character_] = $here_quote_character;
1301	9					70	$self->log_numbered_msg("Entering HERE document $here_doc_target\n");
1302	9					36	$self->[_started_looking_for_here_target_at_] = $input_line_number;
1303							}
1304
1305							# NOTE: __END__ and __DATA__ statements are written unformatted
1306							# because they can theoretically contain additional characters
1307							# which are not tokenized (and cannot be read with <DATA> either!).
1308	7487	100				20623	if ( $self->[_in_data_] ) {
		100
1309	1					3	$line_of_tokens->{_line_type} = 'DATA_START';
1310	1					6	$self->log_numbered_msg("Starting __DATA__ section\n");
1311	1					3	$self->[_saw_data_] = 1;
1312
1313							# keep parsing after __DATA__ if use SelfLoader was seen
1314	1	50				3	if ( $self->[_saw_selfloader_] ) {
1315	0					0	$self->[_in_data_] = 0;
1316	0					0	$self->log_numbered_msg(
1317							"SelfLoader seen, continuing; -nlsl deactivates\n");
1318							}
1319
1320	1					6	return $line_of_tokens;
1321							}
1322
1323							elsif ( $self->[_in_end_] ) {
1324	6					31	$line_of_tokens->{_line_type} = 'END_START';
1325	6					32	$self->log_numbered_msg("Starting __END__ section\n");
1326	6					15	$self->[_saw_end_] = 1;
1327
1328							# keep parsing after __END__ if use AutoLoader was seen
1329	6	50				23	if ( $self->[_saw_autoloader_] ) {
1330	0					0	$self->[_in_end_] = 0;
1331	0					0	$self->log_numbered_msg(
1332							"AutoLoader seen, continuing; -nlal deactivates\n");
1333							}
1334	6					29	return $line_of_tokens;
1335							}
1336
1337							# now, finally, we know that this line is type 'CODE'
1338	7480					13980	$line_of_tokens->{_line_type} = 'CODE';
1339
1340							# remember if we have seen any real code
1341	7480	100	100			23649	if ( !$self->[_started_tokenizing_]
			100
1342							&& $input_line !~ /^\s*$/
1343							&& $input_line !~ /^\s*#/ )
1344							{
1345	553					2078	$self->[_started_tokenizing_] = 1;
1346							}
1347
1348	7480	100				16071	if ( $self->[_debugger_object_] ) {
1349	7					33	$self->[_debugger_object_]->write_debug_entry($line_of_tokens);
1350							}
1351
1352							# Note: if keyword 'format' occurs in this line code, it is still CODE
1353							# (keyword 'format' need not start a line)
1354	7480	100				15466	if ( $self->[_in_format_] ) {
1355	1					7	$self->log_numbered_msg("Entering format section\n");
1356							}
1357
1358	7480	100	100			29097	if ( $self->[_in_quote_]
		100	100
1359							and ( $self->[_line_start_quote_] < 0 ) )
1360							{
1361
1362							#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
1363	49	100				413	if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) {
1364	48					132	$self->[_line_start_quote_] = $input_line_number;
1365	48					328	$self->log_numbered_msg(
1366							"Start multi-line quote or pattern ending in $quote_target\n");
1367							}
1368							}
1369							elsif ( ( $self->[_line_start_quote_] >= 0 )
1370							&& !$self->[_in_quote_] )
1371							{
1372	48					209	$self->[_line_start_quote_] = -1;
1373	48					252	$self->log_numbered_msg("End of multi-line quote or pattern\n");
1374							}
1375
1376							# we are returning a line of CODE
1377	7480					29692	return $line_of_tokens;
1378							} ## end sub get_line
1379
1380							sub find_starting_indentation_level {
1381
1382							# We need to find the indentation level of the first line of the
1383							# script being formatted. Often it will be zero for an entire file,
1384							# but if we are formatting a local block of code (within an editor for
1385							# example) it may not be zero. The user may specify this with the
1386							# -sil=n parameter but normally doesn't so we have to guess.
1387							#
1388	556			556	0	1740	my ($self) = @_;
1389	556					1355	my $starting_level = 0;
1390
1391							# use value if given as parameter
1392	556	100				2861	if ( $self->[_know_starting_level_] ) {
		100
1393	1					2	$starting_level = $self->[_starting_level_];
1394							}
1395
1396							# if we know there is a hash_bang line, the level must be zero
1397							elsif ( $self->[_look_for_hash_bang_] ) {
1398	1					3	$self->[_know_starting_level_] = 1;
1399							}
1400
1401							# otherwise figure it out from the input file
1402							else {
1403	554					1189	my $line;
1404	554					1171	my $i = 0;
1405
1406							# keep looking at lines until we find a hash bang or piece of code
1407	554					1258	my $msg = EMPTY_STRING;
1408	554					3560	while ( $line = $self->[_line_buffer_object_]->peek_ahead( $i++ ) ) {
1409
1410							# if first line is #! then assume starting level is zero
1411	866	100	100			4973	if ( $i == 1 && $line =~ /^\#\!/ ) {
1412	13					51	$starting_level = 0;
1413	13					33	last;
1414							}
1415	853	100				4654	next if ( $line =~ /^\s*#/ ); # skip past comments
1416	557	100				3847	next if ( $line =~ /^\s*$/ ); # skip past blank lines
1417	539					2658	$starting_level = $self->guess_old_indentation_level($line);
1418	539					1369	last;
1419							}
1420	554					2620	$msg = "Line $i implies starting-indentation-level = $starting_level\n";
1421	554					2980	$self->write_logfile_entry("$msg");
1422							}
1423	556					2236	$self->[_starting_level_] = $starting_level;
1424	556					3524	reset_indentation_level($starting_level);
1425	556					1186	return;
1426							} ## end sub find_starting_indentation_level
1427
1428							sub guess_old_indentation_level {
1429	539			539	0	1870	my ( $self, $line ) = @_;
1430
1431							# Guess the indentation level of an input line.
1432							#
1433							# For the first line of code this result will define the starting
1434							# indentation level. It will mainly be non-zero when perltidy is applied
1435							# within an editor to a local block of code.
1436							#
1437							# This is an impossible task in general because we can't know what tabs
1438							# meant for the old script and how many spaces were used for one
1439							# indentation level in the given input script. For example it may have
1440							# been previously formatted with -i=7 -et=3. But we can at least try to
1441							# make sure that perltidy guesses correctly if it is applied repeatedly to
1442							# a block of code within an editor, so that the block stays at the same
1443							# level when perltidy is applied repeatedly.
1444							#
1445							# USES GLOBAL VARIABLES: (none)
1446	539					1257	my $level = 0;
1447
1448							# find leading tabs, spaces, and any statement label
1449	539					1146	my $spaces = 0;
1450	539	50				4304	if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
1451
1452							# If there are leading tabs, we use the tab scheme for this run, if
1453							# any, so that the code will remain stable when editing.
1454	539	100				2602	if ($1) { $spaces += length($1) * $self->[_tabsize_] }
	2					10
1455
1456	539	100				2040	if ($2) { $spaces += length($2) }
	77					292
1457
1458							# correct for outdented labels
1459	539	50	66			2419	if ( $3 && $self->[_outdent_labels_] ) {
1460	1					6	$spaces += $self->[_continuation_indentation_];
1461							}
1462							}
1463
1464							# compute indentation using the value of -i for this run.
1465							# If -i=0 is used for this run (which is possible) it doesn't matter
1466							# what we do here but we'll guess that the old run used 4 spaces per level.
1467	539					1651	my $indent_columns = $self->[_indent_columns_];
1468	539	50				1730	$indent_columns = 4 if ( !$indent_columns );
1469	539					2206	$level = int( $spaces / $indent_columns );
1470	539					1449	return ($level);
1471							} ## end sub guess_old_indentation_level
1472
1473							# This is a currently unused debug routine
1474							sub dump_functions {
1475
1476	0			0	0	0	my $fh = *STDOUT;
1477	0					0	foreach my $pkg ( keys %{$ris_user_function} ) {
	0					0
1478	0					0	$fh->print("\nnon-constant subs in package $pkg\n");
1479
1480	0					0	foreach my $sub ( keys %{ $ris_user_function->{$pkg} } ) {
	0					0
1481	0					0	my $msg = EMPTY_STRING;
1482	0	0				0	if ( $ris_block_list_function->{$pkg}{$sub} ) {
1483	0					0	$msg = 'block_list';
1484							}
1485
1486	0	0				0	if ( $ris_block_function->{$pkg}{$sub} ) {
1487	0					0	$msg = 'block';
1488							}
1489	0					0	$fh->print("$sub $msg\n");
1490							}
1491							}
1492
1493	0					0	foreach my $pkg ( keys %{$ris_constant} ) {
	0					0
1494	0					0	$fh->print("\nconstants and constant subs in package $pkg\n");
1495
1496	0					0	foreach my $sub ( keys %{ $ris_constant->{$pkg} } ) {
	0					0
1497	0					0	$fh->print("$sub\n");
1498							}
1499							}
1500	0					0	return;
1501							} ## end sub dump_functions
1502
1503							sub prepare_for_a_new_file {
1504
1505	556			556	0	1359	my $self = shift;
1506
1507							# previous tokens needed to determine what to expect next
1508	556					1662	$last_nonblank_token = ';'; # the only possible starting state which
1509	556					1438	$last_nonblank_type = ';'; # will make a leading brace a code block
1510	556					1353	$last_nonblank_block_type = EMPTY_STRING;
1511
1512							# scalars for remembering statement types across multiple lines
1513	556					1409	$statement_type = EMPTY_STRING; # '' or 'use' or 'sub..' or 'case..'
1514
1515							# scalars for remembering where we are in the file
1516	556					1343	$current_package = "main";
1517	556					1212	$context = UNKNOWN_CONTEXT;
1518
1519							# hashes used to remember function information
1520	556					1915	$ris_constant = {}; # user-defined constants
1521	556					1613	$ris_user_function = {}; # user-defined functions
1522	556					1550	$ruser_function_prototype = {}; # their prototypes
1523	556					1706	$ris_block_function = {};
1524	556					1687	$ris_block_list_function = {};
1525	556					1405	$rsaw_function_definition = {};
1526	556					1418	$rsaw_use_module = {};
1527
1528							# variables used to track depths of various containers
1529							# and report nesting errors
1530	556					1283	$paren_depth = 0;
1531	556					1124	$brace_depth = 0;
1532	556					1033	$square_bracket_depth = 0;
1533	556					2434	$rcurrent_depth = [ (0) x scalar @closing_brace_names ];
1534	556					1179	$total_depth = 0;
1535	556					2172	$rtotal_depth = [];
1536	556					1942	$rcurrent_sequence_number = [];
1537	556					1347	$next_sequence_number = 2; # The value 1 is reserved for SEQ_ROOT
1538
1539	556					1973	$rparen_type = [];
1540	556					1635	$rparen_semicolon_count = [];
1541	556					1847	$rparen_structural_type = [];
1542	556					1744	$rbrace_type = [];
1543	556					1706	$rbrace_structural_type = [];
1544	556					1462	$rbrace_context = [];
1545	556					1623	$rbrace_package = [];
1546	556					1491	$rsquare_bracket_type = [];
1547	556					1430	$rsquare_bracket_structural_type = [];
1548	556					3467	$rdepth_array = [];
1549	556					1200	$rnested_ternary_flag = [];
1550	556					3879	$rnested_statement_type = [];
1551	556					3130	$rstarting_line_of_current_depth = [];
1552
1553	556					1744	$rparen_type->[$paren_depth] = EMPTY_STRING;
1554	556					1388	$rparen_semicolon_count->[$paren_depth] = 0;
1555	556					1379	$rparen_structural_type->[$brace_depth] = EMPTY_STRING;
1556	556					1400	$rbrace_type->[$brace_depth] = ';'; # identify opening brace as code block
1557	556					1360	$rbrace_structural_type->[$brace_depth] = EMPTY_STRING;
1558	556					1270	$rbrace_context->[$brace_depth] = UNKNOWN_CONTEXT;
1559	556					1413	$rbrace_package->[$paren_depth] = $current_package;
1560	556					1426	$rsquare_bracket_type->[$square_bracket_depth] = EMPTY_STRING;
1561	556					1296	$rsquare_bracket_structural_type->[$square_bracket_depth] = EMPTY_STRING;
1562
1563	556					2790	initialize_tokenizer_state();
1564	556					1158	return;
1565							} ## end sub prepare_for_a_new_file
1566
1567							{ ## closure for sub tokenize_this_line
1568
1569	38			38		375	use constant BRACE => 0;
	38					88
	38					2682
1570	38			38		293	use constant SQUARE_BRACKET => 1;
	38					81
	38					2187
1571	38			38		352	use constant PAREN => 2;
	38					132
	38					2409
1572	38			38		307	use constant QUESTION_COLON => 3;
	38					127
	38					87603
1573
1574							# TV1: scalars for processing one LINE.
1575							# Re-initialized on each entry to sub tokenize_this_line.
1576							my (
1577							$block_type, $container_type, $expecting,
1578							$i, $i_tok, $input_line,
1579							$input_line_number, $last_nonblank_i, $max_token_index,
1580							$next_tok, $next_type, $peeked_ahead,
1581							$prototype, $rhere_target_list, $rtoken_map,
1582							$rtoken_type, $rtokens, $tok,
1583							$type, $type_sequence, $indent_flag,
1584							);
1585
1586							# TV2: refs to ARRAYS for processing one LINE
1587							# Re-initialized on each call.
1588							my $routput_token_list = []; # stack of output token indexes
1589							my $routput_token_type = []; # token types
1590							my $routput_block_type = []; # types of code block
1591							my $routput_container_type = []; # paren types, such as if, elsif, ..
1592							my $routput_type_sequence = []; # nesting sequential number
1593							my $routput_indent_flag = []; #
1594
1595							# TV3: SCALARS for quote variables. These are initialized with a
1596							# subroutine call and continually updated as lines are processed.
1597							my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
1598							$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
1599
1600							# TV4: SCALARS for multi-line identifiers and
1601							# statements. These are initialized with a subroutine call
1602							# and continually updated as lines are processed.
1603							my ( $id_scan_state, $identifier, $want_paren );
1604
1605							# TV5: SCALARS for tracking indentation level.
1606							# Initialized once and continually updated as lines are
1607							# processed.
1608							my (
1609							$nesting_token_string, $nesting_type_string,
1610							$nesting_block_string, $nesting_block_flag,
1611							$nesting_list_string, $nesting_list_flag,
1612							$ci_string_in_tokenizer, $continuation_string_in_tokenizer,
1613							$in_statement_continuation, $level_in_tokenizer,
1614							$slevel_in_tokenizer, $rslevel_stack,
1615							);
1616
1617							# TV6: SCALARS for remembering several previous
1618							# tokens. Initialized once and continually updated as
1619							# lines are processed.
1620							my (
1621							$last_nonblank_container_type, $last_nonblank_type_sequence,
1622							$last_last_nonblank_token, $last_last_nonblank_type,
1623							$last_last_nonblank_block_type, $last_last_nonblank_container_type,
1624							$last_last_nonblank_type_sequence, $last_nonblank_prototype,
1625							);
1626
1627							# ----------------------------------------------------------------
1628							# beginning of tokenizer variable access and manipulation routines
1629							# ----------------------------------------------------------------
1630
1631							sub initialize_tokenizer_state {
1632
1633							# GV1: initialized once
1634							# TV1: initialized on each call
1635							# TV2: initialized on each call
1636							# TV3:
1637	556			556	0	1264	$in_quote = 0;
1638	556					1335	$quote_type = 'Q';
1639	556					1140	$quote_character = EMPTY_STRING;
1640	556					1101	$quote_pos = 0;
1641	556					1072	$quote_depth = 0;
1642	556					1163	$quoted_string_1 = EMPTY_STRING;
1643	556					1144	$quoted_string_2 = EMPTY_STRING;
1644	556					1171	$allowed_quote_modifiers = EMPTY_STRING;
1645
1646							# TV4:
1647	556					1065	$id_scan_state = EMPTY_STRING;
1648	556					1196	$identifier = EMPTY_STRING;
1649	556					1299	$want_paren = EMPTY_STRING;
1650
1651							# TV5:
1652	556					1303	$nesting_token_string = EMPTY_STRING;
1653	556					1199	$nesting_type_string = EMPTY_STRING;
1654	556					1219	$nesting_block_string = '1'; # initially in a block
1655	556					1008	$nesting_block_flag = 1;
1656	556					1094	$nesting_list_string = '0'; # initially not in a list
1657	556					1058	$nesting_list_flag = 0; # initially not in a list
1658	556					1098	$ci_string_in_tokenizer = EMPTY_STRING;
1659	556					1164	$continuation_string_in_tokenizer = "0";
1660	556					1039	$in_statement_continuation = 0;
1661	556					1087	$level_in_tokenizer = 0;
1662	556					1082	$slevel_in_tokenizer = 0;
1663	556					1676	$rslevel_stack = [];
1664
1665							# TV6:
1666	556					1285	$last_nonblank_container_type = EMPTY_STRING;
1667	556					1281	$last_nonblank_type_sequence = EMPTY_STRING;
1668	556					1342	$last_last_nonblank_token = ';';
1669	556					1194	$last_last_nonblank_type = ';';
1670	556					1154	$last_last_nonblank_block_type = EMPTY_STRING;
1671	556					1087	$last_last_nonblank_container_type = EMPTY_STRING;
1672	556					1093	$last_last_nonblank_type_sequence = EMPTY_STRING;
1673	556					1045	$last_nonblank_prototype = EMPTY_STRING;
1674	556					1106	return;
1675							} ## end sub initialize_tokenizer_state
1676
1677							sub save_tokenizer_state {
1678
1679							# Global variables:
1680	0			0	0	0	my $rGV1 = [
1681							$brace_depth,
1682							$context,
1683							$current_package,
1684							$last_nonblank_block_type,
1685							$last_nonblank_token,
1686							$last_nonblank_type,
1687							$next_sequence_number,
1688							$paren_depth,
1689							$rbrace_context,
1690							$rbrace_package,
1691							$rbrace_structural_type,
1692							$rbrace_type,
1693							$rcurrent_depth,
1694							$rcurrent_sequence_number,
1695							$rdepth_array,
1696							$ris_block_function,
1697							$ris_block_list_function,
1698							$ris_constant,
1699							$ris_user_function,
1700							$rnested_statement_type,
1701							$rnested_ternary_flag,
1702							$rparen_semicolon_count,
1703							$rparen_structural_type,
1704							$rparen_type,
1705							$rsaw_function_definition,
1706							$rsaw_use_module,
1707							$rsquare_bracket_structural_type,
1708							$rsquare_bracket_type,
1709							$rstarting_line_of_current_depth,
1710							$rtotal_depth,
1711							$ruser_function_prototype,
1712							$square_bracket_depth,
1713							$statement_type,
1714							$total_depth,
1715
1716							];
1717
1718							# Tokenizer closure variables:
1719	0					0	my $rTV1 = [
1720							$block_type, $container_type, $expecting,
1721							$i, $i_tok, $input_line,
1722							$input_line_number, $last_nonblank_i, $max_token_index,
1723							$next_tok, $next_type, $peeked_ahead,
1724							$prototype, $rhere_target_list, $rtoken_map,
1725							$rtoken_type, $rtokens, $tok,
1726							$type, $type_sequence, $indent_flag,
1727							];
1728
1729	0					0	my $rTV2 = [
1730							$routput_token_list, $routput_token_type,
1731							$routput_block_type, $routput_container_type,
1732							$routput_type_sequence, $routput_indent_flag,
1733							];
1734
1735	0					0	my $rTV3 = [
1736							$in_quote, $quote_type,
1737							$quote_character, $quote_pos,
1738							$quote_depth, $quoted_string_1,
1739							$quoted_string_2, $allowed_quote_modifiers,
1740							];
1741
1742	0					0	my $rTV4 = [ $id_scan_state, $identifier, $want_paren ];
1743
1744	0					0	my $rTV5 = [
1745							$nesting_token_string, $nesting_type_string,
1746							$nesting_block_string, $nesting_block_flag,
1747							$nesting_list_string, $nesting_list_flag,
1748							$ci_string_in_tokenizer, $continuation_string_in_tokenizer,
1749							$in_statement_continuation, $level_in_tokenizer,
1750							$slevel_in_tokenizer, $rslevel_stack,
1751							];
1752
1753	0					0	my $rTV6 = [
1754							$last_nonblank_container_type,
1755							$last_nonblank_type_sequence,
1756							$last_last_nonblank_token,
1757							$last_last_nonblank_type,
1758							$last_last_nonblank_block_type,
1759							$last_last_nonblank_container_type,
1760							$last_last_nonblank_type_sequence,
1761							$last_nonblank_prototype,
1762							];
1763	0					0	return [ $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
1764							} ## end sub save_tokenizer_state
1765
1766							sub restore_tokenizer_state {
1767	0			0	0	0	my ($rstate) = @_;
1768	0					0	my ( $rGV1, $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
	0					0
1769
1770							(
1771							$brace_depth,
1772							$context,
1773							$current_package,
1774							$last_nonblank_block_type,
1775							$last_nonblank_token,
1776							$last_nonblank_type,
1777							$next_sequence_number,
1778							$paren_depth,
1779							$rbrace_context,
1780							$rbrace_package,
1781							$rbrace_structural_type,
1782							$rbrace_type,
1783							$rcurrent_depth,
1784							$rcurrent_sequence_number,
1785							$rdepth_array,
1786							$ris_block_function,
1787							$ris_block_list_function,
1788							$ris_constant,
1789							$ris_user_function,
1790							$rnested_statement_type,
1791							$rnested_ternary_flag,
1792							$rparen_semicolon_count,
1793							$rparen_structural_type,
1794							$rparen_type,
1795							$rsaw_function_definition,
1796							$rsaw_use_module,
1797							$rsquare_bracket_structural_type,
1798							$rsquare_bracket_type,
1799							$rstarting_line_of_current_depth,
1800							$rtotal_depth,
1801							$ruser_function_prototype,
1802							$square_bracket_depth,
1803							$statement_type,
1804							$total_depth,
1805
1806	0					0	) = @{$rGV1};
	0					0
1807
1808							(
1809							$block_type, $container_type, $expecting,
1810							$i, $i_tok, $input_line,
1811							$input_line_number, $last_nonblank_i, $max_token_index,
1812							$next_tok, $next_type, $peeked_ahead,
1813							$prototype, $rhere_target_list, $rtoken_map,
1814							$rtoken_type, $rtokens, $tok,
1815							$type, $type_sequence, $indent_flag,
1816	0					0	) = @{$rTV1};
	0					0
1817
1818							(
1819							$routput_token_list, $routput_token_type,
1820							$routput_block_type, $routput_container_type,
1821							$routput_type_sequence, $routput_indent_flag,
1822	0					0	) = @{$rTV2};
	0					0
1823
1824							(
1825							$in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
1826							$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
1827	0					0	) = @{$rTV3};
	0					0
1828
1829	0					0	( $id_scan_state, $identifier, $want_paren ) = @{$rTV4};
	0					0
1830
1831							(
1832							$nesting_token_string, $nesting_type_string,
1833							$nesting_block_string, $nesting_block_flag,
1834							$nesting_list_string, $nesting_list_flag,
1835							$ci_string_in_tokenizer, $continuation_string_in_tokenizer,
1836							$in_statement_continuation, $level_in_tokenizer,
1837							$slevel_in_tokenizer, $rslevel_stack,
1838	0					0	) = @{$rTV5};
	0					0
1839
1840							(
1841							$last_nonblank_container_type,
1842							$last_nonblank_type_sequence,
1843							$last_last_nonblank_token,
1844							$last_last_nonblank_type,
1845							$last_last_nonblank_block_type,
1846							$last_last_nonblank_container_type,
1847							$last_last_nonblank_type_sequence,
1848							$last_nonblank_prototype,
1849	0					0	) = @{$rTV6};
	0					0
1850	0					0	return;
1851							} ## end sub restore_tokenizer_state
1852
1853							sub split_pretoken {
1854
1855	8			8	0	18	my ( $self, $numc ) = @_;
1856
1857							# Split the leading $numc characters from the current token (at index=$i)
1858							# which is pre-type 'w' and insert the remainder back into the pretoken
1859							# stream with appropriate settings. Since we are splitting a pre-type 'w',
1860							# there are three cases, depending on if the remainder starts with a digit:
1861							# Case 1: remainder is type 'd', all digits
1862							# Case 2: remainder is type 'd' and type 'w': digits and other characters
1863							# Case 3: remainder is type 'w'
1864
1865							# Examples, for $numc=1:
1866							# $tok => $tok_0 $tok_1 $tok_2
1867							# 'x10' => 'x' '10' # case 1
1868							# 'x10if' => 'x' '10' 'if' # case 2
1869							# '0ne => 'O' 'ne' # case 3
1870
1871							# where:
1872							# $tok_1 is a possible string of digits (pre-type 'd')
1873							# $tok_2 is a possible word (pre-type 'w')
1874
1875							# return 1 if successful
1876							# return undef if error (shouldn't happen)
1877
1878							# Calling routine should update '$type' and '$tok' if successful.
1879
1880	8					21	my $pretoken = $rtokens->[$i];
1881	8	50	33			75	if ( $pretoken
			33
1882							&& length($pretoken) > $numc
1883							&& substr( $pretoken, $numc ) =~ /^(\d)(.)$/ )
1884							{
1885
1886							# Split $tok into up to 3 tokens:
1887	8					22	my $tok_0 = substr( $pretoken, 0, $numc );
1888	8	50				29	my $tok_1 = defined($1) ? $1 : EMPTY_STRING;
1889	8	50				27	my $tok_2 = defined($2) ? $2 : EMPTY_STRING;
1890
1891	8					17	my $len_0 = length($tok_0);
1892	8					14	my $len_1 = length($tok_1);
1893	8					13	my $len_2 = length($tok_2);
1894
1895	8					14	my $pre_type_0 = 'w';
1896	8					14	my $pre_type_1 = 'd';
1897	8					12	my $pre_type_2 = 'w';
1898
1899	8					16	my $pos_0 = $rtoken_map->[$i];
1900	8					25	my $pos_1 = $pos_0 + $len_0;
1901	8					19	my $pos_2 = $pos_1 + $len_1;
1902
1903	8					15	my $isplice = $i + 1;
1904
1905							# Splice in any digits
1906	8	100				20	if ($len_1) {
1907	5					10	splice @{$rtoken_map}, $isplice, 0, $pos_1;
	5					14
1908	5					10	splice @{$rtokens}, $isplice, 0, $tok_1;
	5					13
1909	5					11	splice @{$rtoken_type}, $isplice, 0, $pre_type_1;
	5					10
1910	5					10	$max_token_index++;
1911	5					8	$isplice++;
1912							}
1913
1914							# Splice in any trailing word
1915	8	100				22	if ($len_2) {
1916	4					7	splice @{$rtoken_map}, $isplice, 0, $pos_2;
	4					11
1917	4					21	splice @{$rtokens}, $isplice, 0, $tok_2;
	4					9
1918	4					8	splice @{$rtoken_type}, $isplice, 0, $pre_type_2;
	4					8
1919	4					6	$max_token_index++;
1920							}
1921
1922	8					18	$rtokens->[$i] = $tok_0;
1923	8					31	return 1;
1924							}
1925							else {
1926
1927							# Shouldn't get here
1928	0					0	if (DEVEL_MODE) {
1929							$self->Fault(<<EOM);
1930							While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken()
1931							EOM
1932							}
1933							}
1934	0					0	return;
1935							} ## end sub split_pretoken
1936
1937							sub get_indentation_level {
1938	556			556	0	1528	return $level_in_tokenizer;
1939							}
1940
1941							sub reset_indentation_level {
1942	556			556	0	1553	$level_in_tokenizer = $slevel_in_tokenizer = shift;
1943	556					1062	push @{$rslevel_stack}, $slevel_in_tokenizer;
	556					1478
1944	556					1056	return;
1945							}
1946
1947							sub peeked_ahead {
1948	232			232	0	429	my $flag = shift;
1949	232	100				588	$peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
1950	232					534	return $peeked_ahead;
1951							}
1952
1953							# ------------------------------------------------------------
1954							# end of tokenizer variable access and manipulation routines
1955							# ------------------------------------------------------------
1956
1957							#------------------------------
1958							# beginning of tokenizer hashes
1959							#------------------------------
1960
1961							my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
1962
1963							# These block types terminate statements and do not need a trailing
1964							# semicolon
1965							# patched for SWITCH/CASE/
1966							my %is_zero_continuation_block_type;
1967							my @q;
1968							@q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
1969							if elsif else unless while until for foreach switch case given when);
1970							@is_zero_continuation_block_type{@q} = (1) x scalar(@q);
1971
1972							my %is_logical_container;
1973							@q = qw(if elsif unless while and or err not && ! \|\| for foreach);
1974							@is_logical_container{@q} = (1) x scalar(@q);
1975
1976							my %is_binary_type;
1977							@q = qw(\|\| &&);
1978							@is_binary_type{@q} = (1) x scalar(@q);
1979
1980							my %is_binary_keyword;
1981							@q = qw(and or err eq ne cmp);
1982							@is_binary_keyword{@q} = (1) x scalar(@q);
1983
1984							# 'L' is token for opening { at hash key
1985							my %is_opening_type;
1986							@q = qw< L { ( [ >;
1987							@is_opening_type{@q} = (1) x scalar(@q);
1988
1989							my %is_opening_or_ternary_type;
1990							push @q, '?';
1991							@is_opening_or_ternary_type{@q} = (1) x scalar(@q);
1992
1993							# 'R' is token for closing } at hash key
1994							my %is_closing_type;
1995							@q = qw< R } ) ] >;
1996							@is_closing_type{@q} = (1) x scalar(@q);
1997
1998							my %is_closing_or_ternary_type;
1999							push @q, ':';
2000							@is_closing_or_ternary_type{@q} = (1) x scalar(@q);
2001
2002							my %is_redo_last_next_goto;
2003							@q = qw(redo last next goto);
2004							@is_redo_last_next_goto{@q} = (1) x scalar(@q);
2005
2006							my %is_use_require;
2007							@q = qw(use require);
2008							@is_use_require{@q} = (1) x scalar(@q);
2009
2010							# This hash holds the array index in $self for these keywords:
2011							# Fix for issue c035: removed 'format' from this hash
2012							my %is_END_DATA = (
2013							'__END__' => _in_end_,
2014							'__DATA__' => _in_data_,
2015							);
2016
2017							my %is_list_end_type;
2018							@q = qw( ; { } );
2019							push @q, ',';
2020							@is_list_end_type{@q} = (1) x scalar(@q);
2021
2022							# original ref: camel 3 p 147,
2023							# but perl may accept undocumented flags
2024							# perl 5.10 adds 'p' (preserve)
2025							# Perl version 5.22 added 'n'
2026							# From http://perldoc.perl.org/perlop.html we have
2027							# /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
2028							# s/PATTERN/REPLACEMENT/msixpodualngcer
2029							# y/SEARCHLIST/REPLACEMENTLIST/cdsr
2030							# tr/SEARCHLIST/REPLACEMENTLIST/cdsr
2031							# qr/STRING/msixpodualn
2032							my %quote_modifiers = (
2033							's' => '[msixpodualngcer]',
2034							'y' => '[cdsr]',
2035							'tr' => '[cdsr]',
2036							'm' => '[msixpodualngc]',
2037							'qr' => '[msixpodualn]',
2038							'q' => EMPTY_STRING,
2039							'qq' => EMPTY_STRING,
2040							'qw' => EMPTY_STRING,
2041							'qx' => EMPTY_STRING,
2042							);
2043
2044							# table showing how many quoted things to look for after quote operator..
2045							# s, y, tr have 2 (pattern and replacement)
2046							# others have 1 (pattern only)
2047							my %quote_items = (
2048							's' => 2,
2049							'y' => 2,
2050							'tr' => 2,
2051							'm' => 1,
2052							'qr' => 1,
2053							'q' => 1,
2054							'qq' => 1,
2055							'qw' => 1,
2056							'qx' => 1,
2057							);
2058
2059							my %is_for_foreach;
2060							@q = qw(for foreach);
2061							@is_for_foreach{@q} = (1) x scalar(@q);
2062
2063							# These keywords may introduce blocks after parenthesized expressions,
2064							# in the form:
2065							# keyword ( .... ) { BLOCK }
2066							# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
2067							# NOTE for --use-feature=class: if ADJUST blocks eventually take a
2068							# parameter list, then ADJUST might need to be added to this list (see
2069							# perlclass.pod)
2070							my %is_blocktype_with_paren;
2071							@q =
2072							qw(if elsif unless while until for foreach switch case given when catch);
2073							@is_blocktype_with_paren{@q} = (1) x scalar(@q);
2074
2075							my %is_case_default;
2076							@q = qw(case default);
2077							@is_case_default{@q} = (1) x scalar(@q);
2078
2079							#------------------------
2080							# end of tokenizer hashes
2081							#------------------------
2082
2083							# ------------------------------------------------------------
2084							# beginning of various scanner interface routines
2085							# ------------------------------------------------------------
2086							sub scan_replacement_text {
2087
2088							# check for here-docs in replacement text invoked by
2089							# a substitution operator with executable modifier 'e'.
2090							#
2091							# given:
2092							# $replacement_text
2093							# return:
2094							# $rht = reference to any here-doc targets
2095	0			0	0	0	my ( $self, $replacement_text ) = @_;
2096
2097							# quick check
2098	0	0				0	return unless ( $replacement_text =~ /<</ );
2099
2100	0					0	$self->write_logfile_entry(
2101							"scanning replacement text for here-doc targets\n");
2102
2103							# save the logger object for error messages
2104	0					0	my $logger_object = $self->[_logger_object_];
2105
2106							# save all lexical variables
2107	0					0	my $rstate = save_tokenizer_state();
2108	0					0	_decrement_count(); # avoid error check for multiple tokenizers
2109
2110							# make a new tokenizer
2111	0					0	my $rOpts = {};
2112	0					0	my $source_object = Perl::Tidy::LineSource->new(
2113							input_file => \$replacement_text,
2114							rOpts => $rOpts,
2115							);
2116	0					0	my $tokenizer = Perl::Tidy::Tokenizer->new(
2117							source_object => $source_object,
2118							logger_object => $logger_object,
2119							starting_line_number => $input_line_number,
2120							);
2121
2122							# scan the replacement text
2123	0					0	1 while ( $tokenizer->get_line() );
2124
2125							# remove any here doc targets
2126	0					0	my $rht = undef;
2127	0	0				0	if ( $tokenizer->[_in_here_doc_] ) {
2128	0					0	$rht = [];
2129	0					0	push @{$rht},
	0					0
2130							[
2131							$tokenizer->[_here_doc_target_],
2132							$tokenizer->[_here_quote_character_]
2133							];
2134	0	0				0	if ( $tokenizer->[_rhere_target_list_] ) {
2135	0					0	push @{$rht}, @{ $tokenizer->[_rhere_target_list_] };
	0					0
	0					0
2136	0					0	$tokenizer->[_rhere_target_list_] = undef;
2137							}
2138	0					0	$tokenizer->[_in_here_doc_] = undef;
2139							}
2140
2141							# now its safe to report errors
2142	0					0	my $severe_error = $tokenizer->report_tokenization_errors();
2143
2144							# TODO: Could propagate a severe error up
2145
2146							# restore all tokenizer lexical variables
2147	0					0	restore_tokenizer_state($rstate);
2148
2149							# return the here doc targets
2150	0					0	return $rht;
2151							} ## end sub scan_replacement_text
2152
2153							sub scan_bare_identifier {
2154	1672			1672	0	3074	my $self = shift;
2155	1672					5135	( $i, $tok, $type, $prototype ) =
2156							$self->scan_bare_identifier_do( $input_line, $i, $tok, $type,
2157							$prototype, $rtoken_map, $max_token_index );
2158	1672					3496	return;
2159							} ## end sub scan_bare_identifier
2160
2161							sub scan_identifier {
2162
2163	486			486	0	923	my $self = shift;
2164
2165							(
2166	486					2936	$i, $tok, $type, $id_scan_state, $identifier,
2167							my $split_pretoken_flag
2168							)
2169							= $self->scan_complex_identifier( $i, $id_scan_state, $identifier,
2170							$rtokens, $max_token_index, $expecting,
2171							$rparen_type->[$paren_depth] );
2172
2173							# Check for signal to fix a special variable adjacent to a keyword,
2174							# such as '$^One$0'.
2175	486	100				1587	if ($split_pretoken_flag) {
2176
2177							# Try to fix it by splitting the pretoken
2178	3	50	33			34	if ( $i > 0
			33
2179							&& $rtokens->[ $i - 1 ] eq '^'
2180							&& $self->split_pretoken(1) )
2181							{
2182	3					8	$identifier = substr( $identifier, 0, 3 );
2183	3					6	$tok = $identifier;
2184							}
2185							else {
2186
2187							# This shouldn't happen ...
2188	0					0	my $var = substr( $tok, 0, 3 );
2189	0					0	my $excess = substr( $tok, 3 );
2190	0					0	$self->interrupt_logfile();
2191	0					0	$self->warning(<<EOM);
2192							$input_line_number: Trouble parsing at characters '$excess' after special variable '$var'.
2193							A space may be needed after '$var'.
2194							EOM
2195	0					0	$self->resume_logfile();
2196							}
2197							}
2198	486					908	return;
2199							} ## end sub scan_identifier
2200
2201	38			38		484	use constant VERIFY_FASTSCAN => 0;
	38					95
	38					3780
2202							my %fast_scan_context;
2203
2204							BEGIN {
2205	38			38		45804	%fast_scan_context = (
2206							'$' => SCALAR_CONTEXT,
2207							'*' => SCALAR_CONTEXT,
2208							'@' => LIST_CONTEXT,
2209							'%' => LIST_CONTEXT,
2210							'&' => UNKNOWN_CONTEXT,
2211							);
2212							} ## end BEGIN
2213
2214							sub scan_simple_identifier {
2215
2216							# This is a wrapper for sub scan_identifier. It does a fast preliminary
2217							# scan for certain common identifiers:
2218							# '$var', '@var', %var, *var, &var, '@{...}', '%{...}'
2219							# If it does not find one of these, or this is a restart, it calls the
2220							# original scanner directly.
2221
2222							# This gives the same results as the full scanner in about 1/4 the
2223							# total runtime for a typical input stream.
2224
2225							# Notation:
2226							# $var * 2
2227							# ^^ ^
2228							# \|\| \|
2229							# \|\| ---- $i_next [= next nonblank pretoken ]
2230							# \|----$i_plus_1 [= a bareword ]
2231							# ---$i_begin [= a sigil]
2232
2233	4779			4779	0	7198	my $self = shift;
2234
2235	4779					6976	my $i_begin = $i;
2236	4779					7512	my $tok_begin = $tok;
2237	4779					7187	my $i_plus_1 = $i + 1;
2238	4779					6847	my $fast_scan_type;
2239
2240							#-------------------------------------------------------
2241							# Do full scan for anything following a pointer, such as
2242							# $cref->&*; # a postderef
2243							#-------------------------------------------------------
2244	4779	100	66			27116	if ( $last_nonblank_token eq '->' ) {
		100	66
		50	33
			0
			33
2245
2246							}
2247
2248							#------------------------------
2249							# quick scan with leading sigil
2250							#------------------------------
2251							elsif ( !$id_scan_state
2252							&& $i_plus_1 <= $max_token_index
2253							&& $fast_scan_context{$tok} )
2254							{
2255	4666					8780	$context = $fast_scan_context{$tok};
2256
2257							# look for $var, @var, ...
2258	4666	100	100			11470	if ( $rtoken_type->[$i_plus_1] eq 'w' ) {
		100	66
2259	4378					7355	my $pretype_next = EMPTY_STRING;
2260	4378	100				9429	if ( $i_plus_1 < $max_token_index ) {
2261	4262					7113	my $i_next = $i_plus_1 + 1;
2262	4262	100	100			13989	if ( $rtoken_type->[$i_next] eq 'b'
2263							&& $i_next < $max_token_index )
2264							{
2265	1699					2902	$i_next += 1;
2266							}
2267	4262					7283	$pretype_next = $rtoken_type->[$i_next];
2268							}
2269	4378	100	100			15813	if ( $pretype_next ne ':' && $pretype_next ne "'" ) {
2270
2271							# Found type 'i' like '$var', '@var', or '%var'
2272	4270					8548	$identifier = $tok . $rtokens->[$i_plus_1];
2273	4270					6820	$tok = $identifier;
2274	4270					6984	$type = 'i';
2275	4270					6107	$i = $i_plus_1;
2276	4270					7201	$fast_scan_type = $type;
2277							}
2278							}
2279
2280							# Look for @{ or %{ .
2281							# But we must let the full scanner handle things ${ because it may
2282							# keep going to get a complete identifier like '${#}' .
2283							elsif (
2284							$rtoken_type->[$i_plus_1] eq '{'
2285							&& ( $tok_begin eq '@'
2286							\|\| $tok_begin eq '%' )
2287							)
2288							{
2289
2290	30					81	$identifier = $tok;
2291	30					63	$type = 't';
2292	30					62	$fast_scan_type = $type;
2293							}
2294							}
2295
2296							#---------------------------
2297							# Quick scan with leading ->
2298							# Look for ->[ and ->{
2299							#---------------------------
2300							elsif (
2301							$tok eq '->'
2302							&& $i < $max_token_index
2303							&& ( $rtokens->[$i_plus_1] eq '{'
2304							\|\| $rtokens->[$i_plus_1] eq '[' )
2305							)
2306							{
2307	0					0	$type = $tok;
2308	0					0	$fast_scan_type = $type;
2309	0					0	$identifier = $tok;
2310	0					0	$context = UNKNOWN_CONTEXT;
2311							}
2312
2313							#--------------------------------------
2314							# Verify correctness during development
2315							#--------------------------------------
2316	4779					6730	if ( VERIFY_FASTSCAN && $fast_scan_type ) {
2317
2318							# We will call the full method
2319							my $identifier_simple = $identifier;
2320							my $tok_simple = $tok;
2321							my $i_simple = $i;
2322							my $context_simple = $context;
2323
2324							$tok = $tok_begin;
2325							$i = $i_begin;
2326							$self->scan_identifier();
2327
2328							if ( $tok ne $tok_simple
2329							\|\| $type ne $fast_scan_type
2330							\|\| $i != $i_simple
2331							\|\| $identifier ne $identifier_simple
2332							\|\| $id_scan_state
2333							\|\| $context ne $context_simple )
2334							{
2335							print STDERR <<EOM;
2336							scan_simple_identifier differs from scan_identifier:
2337							simple: i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple
2338							full: i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state
2339							EOM
2340							}
2341							}
2342
2343							#-------------------------------------------------
2344							# call full scanner if fast method did not succeed
2345							#-------------------------------------------------
2346	4779	100				10150	if ( !$fast_scan_type ) {
2347	479					1798	$self->scan_identifier();
2348							}
2349	4779					7961	return;
2350							} ## end sub scan_simple_identifier
2351
2352							sub method_ok_here {
2353
2354							# Return:
2355							# false if this is definitely an invalid method declaration
2356							# true otherwise (even if not sure)
2357
2358							# We are trying to avoid problems with old uses of 'method'
2359							# when --use-feature=class is set (rt145706).
2360							# For example, this should cause a return of 'false':
2361
2362							# method paint => sub {
2363							# return;
2364							# };
2365
2366	8			8	0	21	my $self = shift;
2367
2368							# from do_scan_sub:
2369	8					18	my $i_beg = $i + 1;
2370	8					22	my $pos_beg = $rtoken_map->[$i_beg];
2371	8					24	pos($input_line) = $pos_beg;
2372
2373							# TEST 1: look a valid sub NAME
2374	8	50				54	if (
2375							$input_line =~ m/\G\s*
2376							((?:\w(?:'\|::))) # package - something that ends in :: or '
2377							(\w+) # NAME - required
2378							/gcx
2379							)
2380							{
2381							# For possible future use..
2382	8					25	my $subname = $2;
2383	8	50				33	my $package = $1 ? $1 : EMPTY_STRING;
2384							}
2385							else {
2386	0					0	return;
2387							}
2388
2389							# TEST 2: look for invalid characters after name, such as here:
2390							# method paint => sub {
2391							# ...
2392							# }
2393	8					19	my $next_char = EMPTY_STRING;
2394	8	100				38	if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
	7					21
2395	8	100	66			48	if ( !$next_char \|\| $next_char eq '#' ) {
2396	1					14	( $next_char, my $i_next ) =
2397							$self->find_next_nonblank_token( $max_token_index,
2398							$rtokens, $max_token_index );
2399							}
2400
2401	8	50				27	if ( !$next_char ) {
2402
2403							# out of characters - give up
2404	0					0	return;
2405							}
2406
2407							# Possibly valid next token types:
2408							# '(' could start prototype or signature
2409							# ':' could start ATTRIBUTE
2410							# '{' cold start BLOCK
2411							# ';' or '}' could end a statement
2412	8	100				30	if ( $next_char !~ /^[\(\:\{\;\}]/ ) {
2413
2414							# This does not match use feature 'class' syntax
2415	3					13	return;
2416							}
2417
2418							# We will stop here and assume that this is valid syntax for
2419							# use feature 'class'.
2420	5					25	return 1;
2421							} ## end sub method_ok_here
2422
2423							sub class_ok_here {
2424
2425							# Return:
2426							# false if this is definitely an invalid class declaration
2427							# true otherwise (even if not sure)
2428
2429							# We are trying to avoid problems with old uses of 'class'
2430							# when --use-feature=class is set (rt145706). We look ahead
2431							# see if this use of 'class' is obviously inconsistent with
2432							# the syntax of use feature 'class'. This allows the default
2433							# setting --use-feature=class to work for old syntax too.
2434
2435							# Valid class declarations look like
2436							# class NAME ?ATTRS ?VERSION ?BLOCK
2437							# where ATTRS VERSION and BLOCK are optional
2438
2439							# For example, this should produce a return of 'false':
2440							#
2441							# class ExtendsBasicAttributes is BasicAttributes{
2442
2443	6			6	0	12	my $self = shift;
2444
2445							# TEST 1: class stmt can only go where a new statment can start
2446	6	50				18	if ( !new_statement_ok() ) { return }
	0					0
2447
2448	6					12	my $i_beg = $i + 1;
2449	6					15	my $pos_beg = $rtoken_map->[$i_beg];
2450	6					19	pos($input_line) = $pos_beg;
2451
2452							# TEST 2: look for a valid NAME
2453	6	50				35	if (
2454							$input_line =~ m/\G\s*
2455							((?:\w(?:'\|::))) # package - something that ends in :: or '
2456							(\w+) # NAME - required
2457							/gcx
2458							)
2459							{
2460							# For possible future use..
2461	6					20	my $subname = $2;
2462	6	100				20	my $package = $1 ? $1 : EMPTY_STRING;
2463							}
2464							else {
2465	0					0	return;
2466							}
2467
2468							# TEST 3: look for valid characters after NAME
2469	6					10	my $next_char = EMPTY_STRING;
2470	6	100				20	if ( $input_line =~ m/\s*(\S)/gcx ) { $next_char = $1 }
	5					18
2471	6	100	66			27	if ( !$next_char \|\| $next_char eq '#' ) {
2472	1					4	( $next_char, my $i_next ) =
2473							$self->find_next_nonblank_token( $max_token_index,
2474							$rtokens, $max_token_index );
2475							}
2476	6	50				17	if ( !$next_char ) {
2477
2478							# out of characters - give up
2479	0					0	return;
2480							}
2481
2482							# Must see one of: ATTRIBUTE, VERSION, BLOCK, or end stmt
2483
2484							# Possibly valid next token types:
2485							# ':' could start ATTRIBUTE
2486							# '\d' could start VERSION
2487							# '{' cold start BLOCK
2488							# ';' could end a statement
2489							# '}' could end statement but would be strange
2490
2491	6	100				22	if ( $next_char !~ /^[\:\d\{\;\}]/ ) {
2492
2493							# This does not match use feature 'class' syntax
2494	2					9	return;
2495							}
2496
2497							# We will stop here and assume that this is valid syntax for
2498							# use feature 'class'.
2499	4					17	return 1;
2500							} ## end sub class_ok_here
2501
2502							sub scan_id {
2503	330			330	0	706	my $self = shift;
2504	330					1299	( $i, $tok, $type, $id_scan_state ) =
2505							$self->scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
2506							$id_scan_state, $max_token_index );
2507	330					818	return;
2508							} ## end sub scan_id
2509
2510							sub scan_number {
2511	629			629	0	1134	my $self = shift;
2512	629					1021	my $number;
2513	629					1848	( $i, $type, $number ) =
2514							$self->scan_number_do( $input_line, $i, $rtoken_map, $type,
2515							$max_token_index );
2516	629					1530	return $number;
2517							} ## end sub scan_number
2518
2519	38			38		1630	use constant VERIFY_FASTNUM => 0;
	38					98
	38					351150
2520
2521							sub scan_number_fast {
2522
2523							# This is a wrapper for sub scan_number. It does a fast preliminary
2524							# scan for a simple integer. It calls the original scan_number if it
2525							# does not find one.
2526
2527	2272			2272	0	3565	my $self = shift;
2528	2272					3440	my $i_begin = $i;
2529	2272					3499	my $tok_begin = $tok;
2530	2272					3248	my $number;
2531
2532							#---------------------------------
2533							# Quick check for (signed) integer
2534							#---------------------------------
2535
2536							# This will be the string of digits:
2537	2272					3355	my $i_d = $i;
2538	2272					3453	my $tok_d = $tok;
2539	2272					4254	my $typ_d = $rtoken_type->[$i_d];
2540
2541							# check for signed integer
2542	2272					3666	my $sign = EMPTY_STRING;
2543	2272	50	66			6962	if ( $typ_d ne 'd'
			66
			33
2544							&& ( $typ_d eq '+' \|\| $typ_d eq '-' )
2545							&& $i_d < $max_token_index )
2546							{
2547	343					595	$sign = $tok_d;
2548	343					532	$i_d++;
2549	343					592	$tok_d = $rtokens->[$i_d];
2550	343					614	$typ_d = $rtoken_type->[$i_d];
2551							}
2552
2553							# Handle integers
2554	2272	100	100			16419	if (
			100
2555							$typ_d eq 'd'
2556							&& (
2557							$i_d == $max_token_index
2558							\|\| ( $i_d < $max_token_index
2559							&& $rtoken_type->[ $i_d + 1 ] ne '.'
2560							&& $rtoken_type->[ $i_d + 1 ] ne 'w' )
2561							)
2562							)
2563							{
2564							# Let let full scanner handle multi-digit integers beginning with
2565							# '0' because there could be error messages. For example, '009' is
2566							# not a valid number.
2567
2568	1710	100	100			7451	if ( $tok_d eq '0' \|\| substr( $tok_d, 0, 1 ) ne '0' ) {
2569	1653					3136	$number = $sign . $tok_d;
2570	1653					2652	$type = 'n';
2571	1653					2682	$i = $i_d;
2572							}
2573							}
2574
2575							#--------------------------------------
2576							# Verify correctness during development
2577							#--------------------------------------
2578	2272					3278	if ( VERIFY_FASTNUM && defined($number) ) {
2579
2580							# We will call the full method
2581							my $type_simple = $type;
2582							my $i_simple = $i;
2583							my $number_simple = $number;
2584
2585							$tok = $tok_begin;
2586							$i = $i_begin;
2587							$number = $self->scan_number();
2588
2589							if ( $type ne $type_simple
2590							\|\| ( $i != $i_simple && $i <= $max_token_index )
2591							\|\| $number ne $number_simple )
2592							{
2593							print STDERR <<EOM;
2594							scan_number_fast differs from scan_number:
2595							simple: i=$i_simple, type=$type_simple, number=$number_simple
2596							full: i=$i, type=$type, number=$number
2597							EOM
2598							}
2599							}
2600
2601							#----------------------------------------
2602							# call full scanner if may not be integer
2603							#----------------------------------------
2604	2272	100				4978	if ( !defined($number) ) {
2605	619					1585	$number = $self->scan_number();
2606							}
2607	2272					5263	return $number;
2608							} ## end sub scan_number_fast
2609
2610							# a sub to warn if token found where term expected
2611							sub error_if_expecting_TERM {
2612	9			9	0	19	my $self = shift;
2613	9	50				29	if ( $expecting == TERM ) {
2614	9	50				30	if ( $really_want_term{$last_nonblank_type} ) {
2615	0					0	$self->report_unexpected( $tok, "term", $i_tok,
2616							$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line );
2617	0					0	return 1;
2618							}
2619							}
2620	9					18	return;
2621							} ## end sub error_if_expecting_TERM
2622
2623							# a sub to warn if token found where operator expected
2624							sub error_if_expecting_OPERATOR {
2625	769			769	0	1666	my ( $self, $thing ) = @_;
2626	769	50				1833	if ( $expecting == OPERATOR ) {
2627	0	0				0	if ( !defined($thing) ) { $thing = $tok }
	0					0
2628	0					0	$self->report_unexpected( $thing, "operator", $i_tok,
2629							$last_nonblank_i, $rtoken_map, $rtoken_type, $input_line );
2630	0	0				0	if ( $i_tok == 0 ) {
2631	0					0	$self->interrupt_logfile();
2632	0					0	$self->warning("Missing ';' or ',' above?\n");
2633	0					0	$self->resume_logfile();
2634							}
2635	0					0	return 1;
2636							}
2637	769					1517	return;
2638							} ## end sub error_if_expecting_OPERATOR
2639
2640							# ------------------------------------------------------------
2641							# end scanner interfaces
2642							# ------------------------------------------------------------
2643
2644							#------------------
2645							# Tokenization subs
2646							#------------------
2647							sub do_GREATER_THAN_SIGN {
2648
2649	31			31	0	84	my $self = shift;
2650
2651							# '>'
2652	31	50				118	$self->error_if_expecting_TERM()
2653							if ( $expecting == TERM );
2654	31					71	return;
2655							} ## end sub do_GREATER_THAN_SIGN
2656
2657							sub do_VERTICAL_LINE {
2658
2659	4			4	0	10	my $self = shift;
2660
2661							# '\|'
2662	4	50				12	$self->error_if_expecting_TERM()
2663							if ( $expecting == TERM );
2664	4					7	return;
2665							} ## end sub do_VERTICAL_LINE
2666
2667							sub do_DOLLAR_SIGN {
2668
2669	4024			4024	0	7182	my $self = shift;
2670
2671							# '$'
2672							# start looking for a scalar
2673	4024	50				9035	$self->error_if_expecting_OPERATOR("Scalar")
2674							if ( $expecting == OPERATOR );
2675	4024					12174	$self->scan_simple_identifier();
2676
2677	4024	100				9213	if ( $identifier eq '$^W' ) {
2678	1					4	$self->[_saw_perl_dash_w_] = 1;
2679							}
2680
2681							# Check for identifier in indirect object slot
2682							# (vorboard.pl, sort.t). Something like:
2683							# /^(print\|printf\|sort\|exec\|system)$/
2684	4024	100	66			31930	if (
			100
			100
			66
			66
2685							$is_indirect_object_taker{$last_nonblank_token}
2686							&& $last_nonblank_type eq 'k'
2687							\|\| ( ( $last_nonblank_token eq '(' )
2688							&& $is_indirect_object_taker{ $rparen_type->[$paren_depth] } )
2689							\|\| ( $last_nonblank_type eq 'w'
2690							\|\| $last_nonblank_type eq 'U' ) # possible object
2691							)
2692							{
2693
2694							# An identifier followed by '->' is not indirect object;
2695							# fixes b1175, b1176
2696	98					647	my ( $next_nonblank_type, $i_next ) =
2697							$self->find_next_noncomment_type( $i, $rtokens,
2698							$max_token_index );
2699	98	100				379	$type = 'Z' if ( $next_nonblank_type ne '->' );
2700							}
2701	4024					6532	return;
2702							} ## end sub do_DOLLAR_SIGN
2703
2704							sub do_LEFT_PARENTHESIS {
2705
2706	2121			2121	0	4408	my $self = shift;
2707
2708							# '('
2709	2121					3533	++$paren_depth;
2710	2121					4350	$rparen_semicolon_count->[$paren_depth] = 0;
2711	2121	100				6387	if ($want_paren) {
		100
2712	68					193	$container_type = $want_paren;
2713	68					164	$want_paren = EMPTY_STRING;
2714							}
2715							elsif ( $statement_type =~ /^sub\b/ ) {
2716	14					42	$container_type = $statement_type;
2717							}
2718							else {
2719	2039					3570	$container_type = $last_nonblank_token;
2720
2721							# We can check for a syntax error here of unexpected '(',
2722							# but this is going to get messy...
2723	2039	100	100			7969	if (
2724							$expecting == OPERATOR
2725
2726							# Be sure this is not a method call of the form
2727							# &method(...), $method->(..), &{method}(...),
2728							# $ref[2](list) is ok & short for $ref[2]->(list)
2729							# NOTE: at present, braces in something like &{ xxx }
2730							# are not marked as a block, we might have a method call.
2731							# Added ')' to fix case c017, something like ()()()
2732							&& $last_nonblank_token !~ /^([\]\}\)\&]\|\-\>)/
2733							)
2734							{
2735
2736							# ref: camel 3 p 703.
2737	3	50				11	if ( $last_last_nonblank_token eq 'do' ) {
2738	0					0	$self->complain(
2739							"do SUBROUTINE is deprecated; consider & or -> notation\n"
2740							);
2741							}
2742							else {
2743
2744							# if this is an empty list, (), then it is not an
2745							# error; for example, we might have a constant pi and
2746							# invoke it with pi() or just pi;
2747	3					10	my ( $next_nonblank_token, $i_next ) =
2748							$self->find_next_nonblank_token( $i, $rtokens,
2749							$max_token_index );
2750
2751							# Patch for c029: give up error check if
2752							# a side comment follows
2753	3	50	33			20	if ( $next_nonblank_token ne ')'
2754							&& $next_nonblank_token ne '#' )
2755							{
2756	0					0	my $hint;
2757
2758	0					0	$self->error_if_expecting_OPERATOR('(');
2759
2760	0	0				0	if ( $last_nonblank_type eq 'C' ) {
		0
2761	0					0	$hint =
2762							"$last_nonblank_token has a void prototype\n";
2763							}
2764							elsif ( $last_nonblank_type eq 'i' ) {
2765	0	0	0			0	if ( $i_tok > 0
2766							&& $last_nonblank_token =~ /^\$/ )
2767							{
2768	0					0	$hint =
2769							"Do you mean '$last_nonblank_token->(' ?\n";
2770							}
2771							}
2772	0	0				0	if ($hint) {
2773	0					0	$self->interrupt_logfile();
2774	0					0	$self->warning($hint);
2775	0					0	$self->resume_logfile();
2776							}
2777							} ## end if ( $next_nonblank_token...
2778							} ## end else [ if ( $last_last_nonblank_token...
2779							} ## end if ( $expecting == OPERATOR...
2780							}
2781
2782							# Do not update container type at ') ('; fix for git #105. This will
2783							# propagate the container type onward so that any subsequent brace gets
2784							# correctly marked. I have implemented this as a general rule, which
2785							# should be safe, but if necessary it could be restricted to certain
2786							# container statement types such as 'for'.
2787	2121	100				6272	$rparen_type->[$paren_depth] = $container_type
2788							if ( $last_nonblank_token ne ')' );
2789
2790	2121					6422	( $type_sequence, $indent_flag ) =
2791							$self->increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
2792
2793							# propagate types down through nested parens
2794							# for example: the second paren in 'if ((' would be structural
2795							# since the first is.
2796
2797	2121	100				5635	if ( $last_nonblank_token eq '(' ) {
2798	61					233	$type = $last_nonblank_type;
2799							}
2800
2801							# We exclude parens as structural after a ',' because it
2802							# causes subtle problems with continuation indentation for
2803							# something like this, where the first 'or' will not get
2804							# indented.
2805							#
2806							# assert(
2807							# __LINE__,
2808							# ( not defined $check )
2809							# or ref $check
2810							# or $check eq "new"
2811							# or $check eq "old",
2812							# );
2813							#
2814							# Likewise, we exclude parens where a statement can start
2815							# because of problems with continuation indentation, like
2816							# these:
2817							#
2818							# ($firstline =~ /^#\!.*perl/)
2819							# and (print $File::Find::name, "\n")
2820							# and (return 1);
2821							#
2822							# (ref($usage_fref) =~ /CODE/)
2823							# ? &$usage_fref
2824							# : (&blast_usage, &blast_params, &blast_general_params);
2825
2826							else {
2827	2060					3641	$type = '{';
2828							}
2829
2830	2121	50				5061	if ( $last_nonblank_type eq ')' ) {
2831	0					0	$self->warning(
2832							"Syntax error? found token '$last_nonblank_type' then '('\n");
2833							}
2834	2121					4115	$rparen_structural_type->[$paren_depth] = $type;
2835	2121					3477	return;
2836
2837							} ## end sub do_LEFT_PARENTHESIS
2838
2839							sub do_RIGHT_PARENTHESIS {
2840
2841	2121			2121	0	4395	my $self = shift;
2842
2843							# ')'
2844	2121					6662	( $type_sequence, $indent_flag ) =
2845							$self->decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
2846
2847	2121	50				6339	if ( $rparen_structural_type->[$paren_depth] eq '{' ) {
2848	2121					3736	$type = '}';
2849							}
2850
2851	2121					4074	$container_type = $rparen_type->[$paren_depth];
2852
2853							# restore statement type as 'sub' at closing paren of a signature
2854							# so that a subsequent ':' is identified as an attribute
2855	2121	100				6167	if ( $container_type =~ /^sub\b/ ) {
2856	24					70	$statement_type = $container_type;
2857							}
2858
2859							# /^(for\|foreach)$/
2860	2121	100				5771	if ( $is_for_foreach{ $rparen_type->[$paren_depth] } ) {
2861	69					238	my $num_sc = $rparen_semicolon_count->[$paren_depth];
2862	69	50	66			486	if ( $num_sc > 0 && $num_sc != 2 ) {
2863	0					0	$self->warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
2864							}
2865							}
2866
2867	2121	50				4920	if ( $paren_depth > 0 ) { $paren_depth-- }
	2121					3241
2868	2121					3548	return;
2869							} ## end sub do_RIGHT_PARENTHESIS
2870
2871							sub do_COMMA {
2872
2873	3075			3075	0	5166	my $self = shift;
2874
2875							# ','
2876	3075	100	33			11169	if ( $last_nonblank_type eq ',' ) {
		50
2877	10					62	$self->complain("Repeated ','s \n");
2878							}
2879
2880							# Note that we have to check both token and type here because a
2881							# comma following a qw list can have last token='(' but type = 'q'
2882							elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) {
2883	0					0	$self->warning("Unexpected leading ',' after a '('\n");
2884							}
2885
2886							# patch for operator_expected: note if we are in the list (use.t)
2887	3075	100				6430	if ( $statement_type eq 'use' ) { $statement_type = '_use' }
	6					15
2888	3075					4684	return;
2889
2890							} ## end sub do_COMMA
2891
2892							sub do_SEMICOLON {
2893
2894	2448			2448	0	4848	my $self = shift;
2895
2896							# ';'
2897	2448					3965	$context = UNKNOWN_CONTEXT;
2898	2448					4033	$statement_type = EMPTY_STRING;
2899	2448					4028	$want_paren = EMPTY_STRING;
2900
2901							# /^(for\|foreach)$/
2902	2448	100				7086	if ( $is_for_foreach{ $rparen_type->[$paren_depth] } )
2903							{ # mark ; in for loop
2904
2905							# Be careful: we do not want a semicolon such as the
2906							# following to be included:
2907							#
2908							# for (sort {strcoll($a,$b);} keys %investments) {
2909
2910	35	100	66			240	if ( $brace_depth == $rdepth_array->[PAREN][BRACE][$paren_depth]
2911							&& $square_bracket_depth ==
2912							$rdepth_array->[PAREN][SQUARE_BRACKET][$paren_depth] )
2913							{
2914
2915	34					68	$type = 'f';
2916	34					76	$rparen_semicolon_count->[$paren_depth]++;
2917							}
2918							}
2919	2448					4654	return;
2920							} ## end sub do_SEMICOLON
2921
2922							sub do_QUOTATION_MARK {
2923
2924	1124			1124	0	2276	my $self = shift;
2925
2926							# '"'
2927	1124	50				2799	$self->error_if_expecting_OPERATOR("String")
2928							if ( $expecting == OPERATOR );
2929	1124					1838	$in_quote = 1;
2930	1124					1934	$type = 'Q';
2931	1124					1877	$allowed_quote_modifiers = EMPTY_STRING;
2932	1124					1824	return;
2933							} ## end sub do_QUOTATION_MARK
2934
2935							sub do_APOSTROPHE {
2936
2937	1160			1160	0	2243	my $self = shift;
2938
2939							# "'"
2940	1160	50				2861	$self->error_if_expecting_OPERATOR("String")
2941							if ( $expecting == OPERATOR );
2942	1160					1944	$in_quote = 1;
2943	1160					1968	$type = 'Q';
2944	1160					1881	$allowed_quote_modifiers = EMPTY_STRING;
2945	1160					1879	return;
2946							} ## end sub do_APOSTROPHE
2947
2948							sub do_BACKTICK {
2949
2950	0			0	0	0	my $self = shift;
2951
2952							# '`'
2953	0	0				0	$self->error_if_expecting_OPERATOR("String")
2954							if ( $expecting == OPERATOR );
2955	0					0	$in_quote = 1;
2956	0					0	$type = 'Q';
2957	0					0	$allowed_quote_modifiers = EMPTY_STRING;
2958	0					0	return;
2959							} ## end sub do_BACKTICK
2960
2961							sub do_SLASH {
2962
2963	207			207	0	477	my $self = shift;
2964
2965							# '/'
2966	207					384	my $is_pattern;
2967
2968							# a pattern cannot follow certain keywords which take optional
2969							# arguments, like 'shift' and 'pop'. See also '?'.
2970	207	50	66			1127	if (
		50
2971							$last_nonblank_type eq 'k'
2972							&& $is_keyword_rejecting_slash_as_pattern_delimiter{
2973							$last_nonblank_token}
2974							)
2975							{
2976	0					0	$is_pattern = 0;
2977							}
2978							elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess..
2979	0					0	my $msg;
2980	0					0	( $is_pattern, $msg ) =
2981							$self->guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
2982							$max_token_index );
2983
2984	0	0				0	if ($msg) {
2985	0					0	$self->write_diagnostics("DIVIDE:$msg\n");
2986	0					0	$self->write_logfile_entry($msg);
2987							}
2988							}
2989	207					711	else { $is_pattern = ( $expecting == TERM ) }
2990
2991	207	100				498	if ($is_pattern) {
2992	78					152	$in_quote = 1;
2993	78					151	$type = 'Q';
2994	78					156	$allowed_quote_modifiers = '[msixpodualngc]';
2995							}
2996							else { # not a pattern; check for a /= token
2997
2998	129	50				409	if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /=
2999	0					0	$i++;
3000	0					0	$tok = '/=';
3001	0					0	$type = $tok;
3002							}
3003
3004							#DEBUG - collecting info on what tokens follow a divide
3005							# for development of guessing algorithm
3006							## if (
3007							## $self->is_possible_numerator( $i, $rtokens,
3008							## $max_token_index ) < 0
3009							## )
3010							## {
3011							## $self->write_diagnostics("DIVIDE? $input_line\n");
3012							## }
3013							}
3014	207					429	return;
3015							} ## end sub do_SLASH
3016
3017							sub do_LEFT_CURLY_BRACKET {
3018
3019	1659			1659	0	3236	my $self = shift;
3020
3021							# '{'
3022							# if we just saw a ')', we will label this block with
3023							# its type. We need to do this to allow sub
3024							# code_block_type to determine if this brace starts a
3025							# code block or anonymous hash. (The type of a paren
3026							# pair is the preceding token, such as 'if', 'else',
3027							# etc).
3028	1659					3006	$container_type = EMPTY_STRING;
3029
3030							# ATTRS: for a '{' following an attribute list, reset
3031							# things to look like we just saw the sub name
3032							# Added 'package' (can be 'class') for --use-feature=class (rt145706)
3033	1659	100	100			13756	if ( $statement_type =~ /^(sub\|package)\b/ ) {
		50	66
		100	33
		50
3034	38					94	$last_nonblank_token = $statement_type;
3035	38					81	$last_nonblank_type = 'i';
3036	38					70	$statement_type = EMPTY_STRING;
3037							}
3038
3039							# patch for SWITCH/CASE: hide these keywords from an immediately
3040							# following opening brace
3041							elsif ( ( $statement_type eq 'case' \|\| $statement_type eq 'when' )
3042							&& $statement_type eq $last_nonblank_token )
3043							{
3044	0					0	$last_nonblank_token = ";";
3045							}
3046
3047							elsif ( $last_nonblank_token eq ')' ) {
3048	237					763	$last_nonblank_token = $rparen_type->[ $paren_depth + 1 ];
3049
3050							# defensive move in case of a nesting error (pbug.t)
3051							# in which this ')' had no previous '('
3052							# this nesting error will have been caught
3053	237	50				770	if ( !defined($last_nonblank_token) ) {
3054	0					0	$last_nonblank_token = 'if';
3055							}
3056
3057							# check for syntax error here;
3058	237	100				1041	unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
3059	14	50				76	if ( $self->[_extended_syntax_] ) {
3060
3061							# we append a trailing () to mark this as an unknown
3062							# block type. This allows perltidy to format some
3063							# common extensions of perl syntax.
3064							# This is used by sub code_block_type
3065	14					48	$last_nonblank_token .= '()';
3066							}
3067							else {
3068	0					0	my $list =
3069							join( SPACE, sort keys %is_blocktype_with_paren );
3070	0					0	$self->warning(
3071							"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
3072							);
3073							}
3074							}
3075							}
3076
3077							# patch for paren-less for/foreach glitch, part 2.
3078							# see note below under 'qw'
3079							elsif ($last_nonblank_token eq 'qw'
3080							&& $is_for_foreach{$want_paren} )
3081							{
3082	0					0	$last_nonblank_token = $want_paren;
3083	0	0				0	if ( $last_last_nonblank_token eq $want_paren ) {
3084	0					0	$self->warning(
3085							"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
3086							);
3087
3088							}
3089	0					0	$want_paren = EMPTY_STRING;
3090							}
3091
3092							# now identify which of the three possible types of
3093							# curly braces we have: hash index container, anonymous
3094							# hash reference, or code block.
3095
3096							# non-structural (hash index) curly brace pair
3097							# get marked 'L' and 'R'
3098	1659	100				4549	if ( is_non_structural_brace() ) {
3099	363					865	$type = 'L';
3100
3101							# patch for SWITCH/CASE:
3102							# allow paren-less identifier after 'when'
3103							# if the brace is preceded by a space
3104	363	0	33			1444	if ( $statement_type eq 'when'
			33
			0
			0
3105							&& $last_nonblank_type eq 'i'
3106							&& $last_last_nonblank_type eq 'k'
3107							&& ( $i_tok == 0 \|\| $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
3108							{
3109	0					0	$type = '{';
3110	0					0	$block_type = $statement_type;
3111							}
3112							}
3113
3114							# code and anonymous hash have the same type, '{', but are
3115							# distinguished by 'block_type',
3116							# which will be blank for an anonymous hash
3117							else {
3118
3119	1296					4418	$block_type =
3120							$self->code_block_type( $i_tok, $rtokens, $rtoken_type,
3121							$max_token_index );
3122
3123							# patch to promote bareword type to function taking block
3124	1296	100	100			6063	if ( $block_type
			66
3125							&& $last_nonblank_type eq 'w'
3126							&& $last_nonblank_i >= 0 )
3127							{
3128	34	50				181	if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
3129							$routput_token_type->[$last_nonblank_i] =
3130	34	100				220	$is_grep_alias{$block_type} ? 'k' : 'G';
3131							}
3132							}
3133
3134							# patch for SWITCH/CASE: if we find a stray opening block brace
3135							# where we might accept a 'case' or 'when' block, then take it
3136	1296	100	100			5549	if ( $statement_type eq 'case'
3137							\|\| $statement_type eq 'when' )
3138							{
3139	38	100	66			219	if ( !$block_type \|\| $block_type eq '}' ) {
3140	4					6	$block_type = $statement_type;
3141							}
3142							}
3143							}
3144
3145	1659					3781	$rbrace_type->[ ++$brace_depth ] = $block_type;
3146
3147							# Patch for CLASS BLOCK definitions: do not update the package for the
3148							# current depth if this is a BLOCK type definition.
3149							# TODO: should make 'class' separate from 'package' and only do
3150							# this for 'class'
3151	1659	100				5504	$rbrace_package->[$brace_depth] = $current_package
3152							if ( substr( $block_type, 0, 8 ) ne 'package ' );
3153
3154	1659					3422	$rbrace_structural_type->[$brace_depth] = $type;
3155	1659					3263	$rbrace_context->[$brace_depth] = $context;
3156	1659					4768	( $type_sequence, $indent_flag ) =
3157							$self->increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3158	1659					3342	return;
3159							} ## end sub do_LEFT_CURLY_BRACKET
3160
3161							sub do_RIGHT_CURLY_BRACKET {
3162
3163	1659			1659	0	3697	my $self = shift;
3164
3165							# '}'
3166	1659					3560	$block_type = $rbrace_type->[$brace_depth];
3167	1659	100				4267	if ($block_type) { $statement_type = EMPTY_STRING }
	967					1999
3168	1659	100				3848	if ( defined( $rbrace_package->[$brace_depth] ) ) {
3169	1655					3187	$current_package = $rbrace_package->[$brace_depth];
3170							}
3171
3172							# can happen on brace error (caught elsewhere)
3173							else {
3174							}
3175	1659					5050	( $type_sequence, $indent_flag ) =
3176							$self->decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
3177
3178	1659	100				5435	if ( $rbrace_structural_type->[$brace_depth] eq 'L' ) {
3179	363					776	$type = 'R';
3180							}
3181
3182							# propagate type information for 'do' and 'eval' blocks, and also
3183							# for smartmatch operator. This is necessary to enable us to know
3184							# if an operator or term is expected next.
3185	1659	100				4701	if ( $is_block_operator{$block_type} ) {
3186	83					224	$tok = $block_type;
3187							}
3188
3189	1659					2914	$context = $rbrace_context->[$brace_depth];
3190	1659	50				3927	if ( $brace_depth > 0 ) { $brace_depth--; }
	1659					3304
3191	1659					2764	return;
3192							} ## end sub do_RIGHT_CURLY_BRACKET
3193
3194							sub do_AMPERSAND {
3195
3196	126			126	0	326	my $self = shift;
3197
3198							# '&' = maybe sub call? start looking
3199							# We have to check for sub call unless we are sure we
3200							# are expecting an operator. This example from s2p
3201							# got mistaken as a q operator in an early version:
3202							# print BODY &q(<<'EOT');
3203	126	100				380	if ( $expecting != OPERATOR ) {
3204
3205							# But only look for a sub call if we are expecting a term or
3206							# if there is no existing space after the &.
3207							# For example we probably don't want & as sub call here:
3208							# Fcntl::S_IRUSR & $mode;
3209	107	100	66			487	if ( $expecting == TERM \|\| $next_type ne 'b' ) {
3210	104					321	$self->scan_simple_identifier();
3211							}
3212							}
3213							else {
3214							}
3215	126					289	return;
3216							} ## end sub do_AMPERSAND
3217
3218							sub do_LESS_THAN_SIGN {
3219
3220	29			29	0	94	my $self = shift;
3221
3222							# '<' - angle operator or less than?
3223	29	100				121	if ( $expecting != OPERATOR ) {
3224	8					55	( $i, $type ) =
3225							$self->find_angle_operator_termination( $input_line, $i,
3226							$rtoken_map, $expecting, $max_token_index );
3227
3228							## This message is not very helpful and quite confusing if the above
3229							## routine decided not to write a message with the line number.
3230							## if ( $type eq '<' && $expecting == TERM ) {
3231							## $self->error_if_expecting_TERM();
3232							## $self->interrupt_logfile();
3233							## $self->warning("Unterminated <> operator?\n");
3234							## $self->resume_logfile();
3235							## }
3236
3237							}
3238							else {
3239							}
3240	29					340	return;
3241							} ## end sub do_LESS_THAN_SIGN
3242
3243							sub do_QUESTION_MARK {
3244
3245	187			187	0	553	my $self = shift;
3246
3247							# '?' = conditional or starting pattern?
3248	187					380	my $is_pattern;
3249
3250							# Patch for rt #126965
3251							# a pattern cannot follow certain keywords which take optional
3252							# arguments, like 'shift' and 'pop'. See also '/'.
3253	187	100	66			1522	if (
		100
		100
3254							$last_nonblank_type eq 'k'
3255							&& $is_keyword_rejecting_question_as_pattern_delimiter{
3256							$last_nonblank_token}
3257							)
3258							{
3259	1					3	$is_pattern = 0;
3260							}
3261
3262							# patch for RT#131288, user constant function without prototype
3263							# last type is 'U' followed by ?.
3264							elsif ( $last_nonblank_type =~ /^[FUY]$/ ) {
3265	1					3	$is_pattern = 0;
3266							}
3267							elsif ( $expecting == UNKNOWN ) {
3268
3269							# In older versions of Perl, a bare ? can be a pattern
3270							# delimiter. In perl version 5.22 this was
3271							# dropped, but we have to support it in order to format
3272							# older programs. See:
3273							## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html
3274							# For example, the following line worked
3275							# at one time:
3276							# ?(.*)? && (print $1,"\n");
3277							# In current versions it would have to be written with slashes:
3278							# /(.*)/ && (print $1,"\n");
3279	11					32	my $msg;
3280	11					69	( $is_pattern, $msg ) =
3281							$self->guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
3282							$max_token_index );
3283
3284	11	50				51	if ($msg) { $self->write_logfile_entry($msg) }
	11					48
3285							}
3286	174					442	else { $is_pattern = ( $expecting == TERM ) }
3287
3288	187	50				548	if ($is_pattern) {
3289	0					0	$in_quote = 1;
3290	0					0	$type = 'Q';
3291	0					0	$allowed_quote_modifiers = '[msixpodualngc]';
3292							}
3293							else {
3294	187					729	( $type_sequence, $indent_flag ) =
3295							$self->increase_nesting_depth( QUESTION_COLON,
3296							$rtoken_map->[$i_tok] );
3297							}
3298	187					443	return;
3299							} ## end sub do_QUESTION_MARK
3300
3301							sub do_STAR {
3302
3303	238			238	0	484	my $self = shift;
3304
3305							# '*' = typeglob, or multiply?
3306	238	50	66			841	if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) {
3307	0	0	0			0	if ( $next_type ne 'b'
			0
3308							&& $next_type ne '('
3309							&& $next_type ne '#' ) # Fix c036
3310							{
3311	0					0	$expecting = TERM;
3312							}
3313							}
3314	238	100				622	if ( $expecting == TERM ) {
3315	21					89	$self->scan_simple_identifier();
3316							}
3317							else {
3318
3319	217	50				877	if ( $rtokens->[ $i + 1 ] eq '=' ) {
		100
3320	0					0	$tok = '*=';
3321	0					0	$type = $tok;
3322	0					0	$i++;
3323							}
3324							elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
3325	36					102	$tok = '**';
3326	36					67	$type = $tok;
3327	36					61	$i++;
3328	36	50				123	if ( $rtokens->[ $i + 1 ] eq '=' ) {
3329	0					0	$tok = '**=';
3330	0					0	$type = $tok;
3331	0					0	$i++;
3332							}
3333							}
3334							}
3335	238					420	return;
3336							} ## end sub do_STAR
3337
3338							sub do_DOT {
3339
3340	150			150	0	329	my $self = shift;
3341
3342							# '.' = what kind of . ?
3343	150	100				448	if ( $expecting != OPERATOR ) {
3344	10					39	$self->scan_number();
3345	10	100				29	if ( $type eq '.' ) {
3346	2	50				7	$self->error_if_expecting_TERM()
3347							if ( $expecting == TERM );
3348							}
3349							}
3350							else {
3351							}
3352	150					299	return;
3353							} ## end sub do_DOT
3354
3355							sub do_COLON {
3356
3357	271			271	0	718	my $self = shift;
3358
3359							# ':' = label, ternary, attribute, ?
3360
3361							# if this is the first nonblank character, call it a label
3362							# since perl seems to just swallow it
3363	271	50	66			3765	if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
		100	66
		100	66
		100	66
		100
3364	0					0	$type = 'J';
3365							}
3366
3367							# ATTRS: check for a ':' which introduces an attribute list
3368							# either after a 'sub' keyword or within a paren list
3369							# Added 'package' (can be 'class') for --use-feature=class (rt145706)
3370							elsif ( $statement_type =~ /^(sub\|package)\b/ ) {
3371	22					60	$type = 'A';
3372	22					62	$self->[_in_attribute_list_] = 1;
3373							}
3374
3375							# Within a signature, unless we are in a ternary. For example,
3376							# from 't/filter_example.t':
3377							# method foo4 ( $class: $bar ) { $class->bar($bar) }
3378							elsif ( $rparen_type->[$paren_depth] =~ /^sub\b/
3379							&& !is_balanced_closing_container(QUESTION_COLON) )
3380							{
3381	1					3	$type = 'A';
3382	1					2	$self->[_in_attribute_list_] = 1;
3383							}
3384
3385							# check for scalar attribute, such as
3386							# my $foo : shared = 1;
3387							elsif ($is_my_our_state{$statement_type}
3388							&& $rcurrent_depth->[QUESTION_COLON] == 0 )
3389							{
3390	15					31	$type = 'A';
3391	15					30	$self->[_in_attribute_list_] = 1;
3392							}
3393
3394							# Look for Switch::Plain syntax if an error would otherwise occur
3395							# here. Note that we do not need to check if the extended syntax
3396							# flag is set because otherwise an error would occur, and we would
3397							# then have to output a message telling the user to set the
3398							# extended syntax flag to avoid the error.
3399							# case 1: {
3400							# default: {
3401							# default:
3402							# Note that the line 'default:' will be parsed as a label elsewhere.
3403							elsif ( $is_case_default{$statement_type}
3404							&& !is_balanced_closing_container(QUESTION_COLON) )
3405							{
3406							# mark it as a perltidy label type
3407	46					106	$type = 'J';
3408							}
3409
3410							# otherwise, it should be part of a ?/: operator
3411							else {
3412	187					764	( $type_sequence, $indent_flag ) =
3413							$self->decrease_nesting_depth( QUESTION_COLON,
3414							$rtoken_map->[$i_tok] );
3415	187	50				962	if ( $last_nonblank_token eq '?' ) {
3416	0					0	$self->warning("Syntax error near ? :\n");
3417							}
3418							}
3419	271					519	return;
3420							} ## end sub do_COLON
3421
3422							sub do_PLUS_SIGN {
3423
3424	227			227	0	499	my $self = shift;
3425
3426							# '+' = what kind of plus?
3427	227	100				943	if ( $expecting == TERM ) {
		100
3428	13					54	my $number = $self->scan_number_fast();
3429
3430							# unary plus is safest assumption if not a number
3431	13	50				54	if ( !defined($number) ) { $type = 'p'; }
	13					26
3432							}
3433							elsif ( $expecting == OPERATOR ) {
3434							}
3435							else {
3436	3	100				12	if ( $next_type eq 'w' ) { $type = 'p' }
	2					6
3437							}
3438	227					429	return;
3439							} ## end sub do_PLUS_SIGN
3440
3441							sub do_AT_SIGN {
3442
3443	438			438	0	1211	my $self = shift;
3444
3445							# '@' = sigil for array?
3446	438	50				1436	$self->error_if_expecting_OPERATOR("Array")
3447							if ( $expecting == OPERATOR );
3448	438					1616	$self->scan_simple_identifier();
3449	438					835	return;
3450							} ## end sub do_AT_SIGN
3451
3452							sub do_PERCENT_SIGN {
3453
3454	202			202	0	670	my $self = shift;
3455
3456							# '%' = hash or modulo?
3457							# first guess is hash if no following blank or paren
3458	202	50				724	if ( $expecting == UNKNOWN ) {
3459	0	0	0			0	if ( $next_type ne 'b' && $next_type ne '(' ) {
3460	0					0	$expecting = TERM;
3461							}
3462							}
3463	202	100				660	if ( $expecting == TERM ) {
3464	192					710	$self->scan_simple_identifier();
3465							}
3466	202					494	return;
3467							} ## end sub do_PERCENT_SIGN
3468
3469							sub do_LEFT_SQUARE_BRACKET {
3470
3471	594			594	0	1257	my $self = shift;
3472
3473							# '['
3474	594					1392	$rsquare_bracket_type->[ ++$square_bracket_depth ] =
3475							$last_nonblank_token;
3476	594					2025	( $type_sequence, $indent_flag ) =
3477							$self->increase_nesting_depth( SQUARE_BRACKET,
3478							$rtoken_map->[$i_tok] );
3479
3480							# It may seem odd, but structural square brackets have
3481							# type '{' and '}'. This simplifies the indentation logic.
3482	594	100				1900	if ( !is_non_structural_brace() ) {
3483	287					673	$type = '{';
3484							}
3485	594					1315	$rsquare_bracket_structural_type->[$square_bracket_depth] = $type;
3486	594					1076	return;
3487							} ## end sub do_LEFT_SQUARE_BRACKET
3488
3489							sub do_RIGHT_SQUARE_BRACKET {
3490
3491	594			594	0	1311	my $self = shift;
3492
3493							# ']'
3494	594					2052	( $type_sequence, $indent_flag ) =
3495							$self->decrease_nesting_depth( SQUARE_BRACKET,
3496							$rtoken_map->[$i_tok] );
3497
3498	594	100				2119	if ( $rsquare_bracket_structural_type->[$square_bracket_depth] eq '{' )
3499							{
3500	287					642	$type = '}';
3501							}
3502
3503							# propagate type information for smartmatch operator. This is
3504							# necessary to enable us to know if an operator or term is expected
3505							# next.
3506	594	100				1693	if ( $rsquare_bracket_type->[$square_bracket_depth] eq '~~' ) {
3507	20					43	$tok = $rsquare_bracket_type->[$square_bracket_depth];
3508							}
3509
3510	594	50				1573	if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
	594					963
3511	594					1024	return;
3512							} ## end sub do_RIGHT_SQUARE_BRACKET
3513
3514							sub do_MINUS_SIGN {
3515
3516	441			441	0	916	my $self = shift;
3517
3518							# '-' = what kind of minus?
3519	441	100	100			2991	if ( ( $expecting != OPERATOR )
		100
		100
3520							&& $is_file_test_operator{$next_tok} )
3521							{
3522	10					52	my ( $next_nonblank_token, $i_next ) =
3523							$self->find_next_nonblank_token( $i + 1, $rtokens,
3524							$max_token_index );
3525
3526							# check for a quoted word like "-w=>xx";
3527							# it is sufficient to just check for a following '='
3528	10	50				81	if ( $next_nonblank_token eq '=' ) {
3529	0					0	$type = 'm';
3530							}
3531							else {
3532	10					28	$i++;
3533	10					26	$tok .= $next_tok;
3534	10					37	$type = 'F';
3535							}
3536							}
3537							elsif ( $expecting == TERM ) {
3538	330					908	my $number = $self->scan_number_fast();
3539
3540							# maybe part of bareword token? unary is safest
3541	330	100				907	if ( !defined($number) ) { $type = 'm'; }
	288					542
3542
3543							}
3544							elsif ( $expecting == OPERATOR ) {
3545							}
3546							else {
3547
3548	4	50				17	if ( $next_type eq 'w' ) {
3549	4					10	$type = 'm';
3550							}
3551							}
3552	441					791	return;
3553							} ## end sub do_MINUS_SIGN
3554
3555							sub do_CARAT_SIGN {
3556
3557	12			12	0	25	my $self = shift;
3558
3559							# '^'
3560							# check for special variables like ${^WARNING_BITS}
3561	12	100				35	if ( $expecting == TERM ) {
3562
3563	5	50	33			52	if ( $last_nonblank_token eq '{'
			33
3564							&& ( $next_tok !~ /^\d/ )
3565							&& ( $next_tok =~ /^\w/ ) )
3566							{
3567
3568	5	100				25	if ( $next_tok eq 'W' ) {
3569	1					3	$self->[_saw_perl_dash_w_] = 1;
3570							}
3571	5					13	$tok = $tok . $next_tok;
3572	5					9	$i = $i + 1;
3573	5					12	$type = 'w';
3574
3575							# Optional coding to try to catch syntax errors. This can
3576							# be removed if it ever causes incorrect warning messages.
3577							# The '{^' should be preceded by either by a type or '$#'
3578							# Examples:
3579							# $#{^CAPTURE} ok
3580							# *${^LAST_FH}{NAME} ok
3581							# @{^HOWDY} ok
3582							# $hash{^HOWDY} error
3583
3584							# Note that a type sigil '$' may be tokenized as 'Z'
3585							# after something like 'print', so allow type 'Z'
3586	5	0	33			20	if ( $last_last_nonblank_type ne 't'
			33
3587							&& $last_last_nonblank_type ne 'Z'
3588							&& $last_last_nonblank_token ne '$#' )
3589							{
3590	0					0	$self->warning("Possible syntax error near '{^'\n");
3591							}
3592							}
3593
3594							else {
3595	0	0				0	unless ( $self->error_if_expecting_TERM() ) {
3596
3597							# Something like this is valid but strange:
3598							# undef ^I;
3599	0					0	$self->complain("The '^' seems unusual here\n");
3600							}
3601							}
3602							}
3603	12					24	return;
3604							} ## end sub do_CARAT_SIGN
3605
3606							sub do_DOUBLE_COLON {
3607
3608	9			9	0	16	my $self = shift;
3609
3610							# '::' = probably a sub call
3611	9					28	$self->scan_bare_identifier();
3612	9					17	return;
3613							} ## end sub do_DOUBLE_COLON
3614
3615							sub do_LEFT_SHIFT {
3616
3617	7			7	0	27	my $self = shift;
3618
3619							# '<<' = maybe a here-doc?
3620
3621							## This check removed because it could be a deprecated here-doc with
3622							## no specified target. See example in log 16 Sep 2020.
3623							## return
3624							## unless ( $i < $max_token_index )
3625							## ; # here-doc not possible if end of line
3626
3627	7	50				38	if ( $expecting != OPERATOR ) {
3628	7					23	my ( $found_target, $here_doc_target, $here_quote_character,
3629							$saw_error );
3630							(
3631	7					55	$found_target, $here_doc_target, $here_quote_character, $i,
3632							$saw_error
3633							)
3634							= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
3635							$max_token_index );
3636
3637	7	50				35	if ($found_target) {
		0
3638	7					17	push @{$rhere_target_list},
	7					27
3639							[ $here_doc_target, $here_quote_character ];
3640	7					53	$type = 'h';
3641	7	50				87	if ( length($here_doc_target) > 80 ) {
		50
		100
3642	0					0	my $truncated = substr( $here_doc_target, 0, 80 );
3643	0					0	$self->complain("Long here-target: '$truncated' ...\n");
3644							}
3645							elsif ( !$here_doc_target ) {
3646	0	0				0	$self->warning(
3647							'Use of bare << to mean <<"" is deprecated' . "\n" )
3648							unless ($here_quote_character);
3649							}
3650							elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
3651	2					12	$self->complain(
3652							"Unconventional here-target: '$here_doc_target'\n");
3653							}
3654							}
3655							elsif ( $expecting == TERM ) {
3656	0	0				0	unless ($saw_error) {
3657
3658							# shouldn't happen..arriving here implies an error in
3659							# the logic in sub 'find_here_doc'
3660	0					0	if (DEVEL_MODE) {
3661							$self->Fault(<<EOM);
3662							Program bug; didn't find here doc target
3663							EOM
3664							}
3665							$self->warning(
3666	0					0	"Possible program error: didn't find here doc target\n"
3667							);
3668	0					0	$self->report_definite_bug();
3669							}
3670							}
3671							}
3672							else {
3673							}
3674	7					24	return;
3675							} ## end sub do_LEFT_SHIFT
3676
3677							sub do_NEW_HERE_DOC {
3678
3679							# '<<~' = a here-doc, new type added in v26
3680
3681	2			2	0	9	my $self = shift;
3682
3683							return
3684	2	50				12	unless ( $i < $max_token_index )
3685							; # here-doc not possible if end of line
3686	2	50				11	if ( $expecting != OPERATOR ) {
3687	2					14	my ( $found_target, $here_doc_target, $here_quote_character,
3688							$saw_error );
3689							(
3690	2					12	$found_target, $here_doc_target, $here_quote_character, $i,
3691							$saw_error
3692							)
3693							= $self->find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
3694							$max_token_index );
3695
3696	2	50				8	if ($found_target) {
		0
3697
3698	2	50				19	if ( length($here_doc_target) > 80 ) {
		50
3699	0					0	my $truncated = substr( $here_doc_target, 0, 80 );
3700	0					0	$self->complain("Long here-target: '$truncated' ...\n");
3701							}
3702							elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
3703	0					0	$self->complain(
3704							"Unconventional here-target: '$here_doc_target'\n");
3705							}
3706
3707							# Note that we put a leading space on the here quote
3708							# character indicate that it may be preceded by spaces
3709	2					6	$here_quote_character = SPACE . $here_quote_character;
3710	2					6	push @{$rhere_target_list},
	2					7
3711							[ $here_doc_target, $here_quote_character ];
3712	2					7	$type = 'h';
3713							}
3714							elsif ( $expecting == TERM ) {
3715	0	0				0	unless ($saw_error) {
3716
3717							# shouldn't happen..arriving here implies an error in
3718							# the logic in sub 'find_here_doc'
3719	0					0	if (DEVEL_MODE) {
3720							$self->Fault(<<EOM);
3721							Program bug; didn't find here doc target
3722							EOM
3723							}
3724							$self->warning(
3725	0					0	"Possible program error: didn't find here doc target\n"
3726							);
3727	0					0	$self->report_definite_bug();
3728							}
3729							}
3730							}
3731							else {
3732	0					0	$self->error_if_expecting_OPERATOR();
3733							}
3734	2					6	return;
3735							} ## end sub do_NEW_HERE_DOC
3736
3737							sub do_POINTER {
3738
3739							# '->'
3740	886			886	0	1594	return;
3741							}
3742
3743							sub do_PLUS_PLUS {
3744
3745	46			46	0	156	my $self = shift;
3746
3747							# '++'
3748							# type = 'pp' for pre-increment, '++' for post-increment
3749	46	100				260	if ( $expecting == TERM ) { $type = 'pp' }
	7	100				18
3750							elsif ( $expecting == UNKNOWN ) {
3751
3752	2					9	my ( $next_nonblank_token, $i_next ) =
3753							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
3754
3755							# Fix for c042: look past a side comment
3756	2	50				11	if ( $next_nonblank_token eq '#' ) {
3757	0					0	( $next_nonblank_token, $i_next ) =
3758							$self->find_next_nonblank_token( $max_token_index,
3759							$rtokens, $max_token_index );
3760							}
3761
3762	2	50				8	if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
	0					0
3763							}
3764	46					137	return;
3765							} ## end sub do_PLUS_PLUS
3766
3767							sub do_FAT_COMMA {
3768
3769	1025			1025	0	1857	my $self = shift;
3770
3771							# '=>'
3772	1025	50				2555	if ( $last_nonblank_type eq $tok ) {
3773	0					0	$self->complain("Repeated '=>'s \n");
3774							}
3775
3776							# patch for operator_expected: note if we are in the list (use.t)
3777							# TODO: make version numbers a new token type
3778	1025	100				2448	if ( $statement_type eq 'use' ) { $statement_type = '_use' }
	18					45
3779	1025					1655	return;
3780							} ## end sub do_FAT_COMMA
3781
3782							sub do_MINUS_MINUS {
3783
3784	2			2	0	8	my $self = shift;
3785
3786							# '--'
3787							# type = 'mm' for pre-decrement, '--' for post-decrement
3788
3789	2	50				8	if ( $expecting == TERM ) { $type = 'mm' }
	2	0				9
3790							elsif ( $expecting == UNKNOWN ) {
3791	0					0	my ( $next_nonblank_token, $i_next ) =
3792							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
3793
3794							# Fix for c042: look past a side comment
3795	0	0				0	if ( $next_nonblank_token eq '#' ) {
3796	0					0	( $next_nonblank_token, $i_next ) =
3797							$self->find_next_nonblank_token( $max_token_index,
3798							$rtokens, $max_token_index );
3799							}
3800
3801	0	0				0	if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
	0					0
3802							}
3803	2					7	return;
3804							} ## end sub do_MINUS_MINUS
3805
3806							sub do_LOGICAL_AND {
3807
3808	58			58	0	137	my $self = shift;
3809
3810							# '&&'
3811	58	50	33			236	$self->error_if_expecting_TERM()
3812							if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015
3813	58					113	return;
3814							} ## end sub do_LOGICAL_AND
3815
3816							sub do_LOGICAL_OR {
3817
3818	74			74	0	198	my $self = shift;
3819
3820							# '\|\|'
3821	74	100	66			339	$self->error_if_expecting_TERM()
3822							if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015
3823	74					149	return;
3824							} ## end sub do_LOGICAL_OR
3825
3826							sub do_SLASH_SLASH {
3827
3828	10			10	0	22	my $self = shift;
3829
3830							# '//'
3831	10	100				35	$self->error_if_expecting_TERM()
3832							if ( $expecting == TERM );
3833	10					17	return;
3834							} ## end sub do_SLASH_SLASH
3835
3836							sub do_DIGITS {
3837
3838	1929			1929	0	3241	my $self = shift;
3839
3840							# 'd' = string of digits
3841	1929	50				4362	$self->error_if_expecting_OPERATOR("Number")
3842							if ( $expecting == OPERATOR );
3843
3844	1929					4889	my $number = $self->scan_number_fast();
3845	1929	50				4792	if ( !defined($number) ) {
3846
3847							# shouldn't happen - we should always get a number
3848	0					0	if (DEVEL_MODE) {
3849							$self->Fault(<<EOM);
3850							non-number beginning with digit--program bug
3851							EOM
3852							}
3853							$self->warning(
3854	0					0	"Unexpected error condition: non-number beginning with digit\n"
3855							);
3856	0					0	$self->report_definite_bug();
3857							}
3858	1929					4636	return;
3859							} ## end sub do_DIGITS
3860
3861							sub do_ATTRIBUTE_LIST {
3862
3863	39			39	0	109	my ( $self, $next_nonblank_token ) = @_;
3864
3865							# Called at a bareword encountered while in an attribute list
3866							# returns 'is_attribute':
3867							# true if attribute found
3868							# false if an attribute (continue parsing bareword)
3869
3870							# treat bare word followed by open paren like qw(
3871	39	100				130	if ( $next_nonblank_token eq '(' ) {
3872
3873							# For something like:
3874							# : prototype($$)
3875							# we should let do_scan_sub see it so that it can see
3876							# the prototype. All other attributes get parsed as a
3877							# quoted string.
3878	18	100				62	if ( $tok eq 'prototype' ) {
3879	2					7	$id_scan_state = 'prototype';
3880
3881							# start just after the word 'prototype'
3882	2					5	my $i_beg = $i + 1;
3883	2					21	( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
3884							{
3885							input_line => $input_line,
3886							i => $i,
3887							i_beg => $i_beg,
3888							tok => $tok,
3889							type => $type,
3890							rtokens => $rtokens,
3891							rtoken_map => $rtoken_map,
3892							id_scan_state => $id_scan_state,
3893							max_token_index => $max_token_index,
3894							}
3895							);
3896
3897							# If successful, mark as type 'q' to be consistent
3898							# with other attributes. Type 'w' would also work.
3899	2	50				20	if ( $i > $i_beg ) {
3900	2					5	$type = 'q';
3901	2					8	return 1;
3902							}
3903
3904							# If not successful, continue and parse as a quote.
3905							}
3906
3907							# All other attribute lists must be parsed as quotes
3908							# (see 'signatures.t' for good examples)
3909	16					50	$in_quote = $quote_items{'q'};
3910	16					43	$allowed_quote_modifiers = $quote_modifiers{'q'};
3911	16					32	$type = 'q';
3912	16					31	$quote_type = 'q';
3913	16					36	return 1;
3914							}
3915
3916							# handle bareword not followed by open paren
3917							else {
3918	21					44	$type = 'w';
3919	21					52	return 1;
3920							}
3921
3922							# attribute not found
3923	0					0	return;
3924							} ## end sub do_ATTRIBUTE_LIST
3925
3926							sub do_QUOTED_BAREWORD {
3927
3928	786			786	0	1424	my $self = shift;
3929
3930							# find type of a bareword followed by a '=>'
3931	786	100				4006	if ( $ris_constant->{$current_package}{$tok} ) {
		50
		50
3932	14					34	$type = 'C';
3933							}
3934							elsif ( $ris_user_function->{$current_package}{$tok} ) {
3935	0					0	$type = 'U';
3936	0					0	$prototype = $ruser_function_prototype->{$current_package}{$tok};
3937							}
3938							elsif ( $tok =~ /^v\d+$/ ) {
3939	0					0	$type = 'v';
3940	0					0	$self->report_v_string($tok);
3941							}
3942							else {
3943
3944							# Bareword followed by a fat comma - see 'git18.in'
3945							# If tok is something like 'x17' then it could
3946							# actually be operator x followed by number 17.
3947							# For example, here:
3948							# 123x17 => [ 792, 1224 ],
3949							# (a key of 123 repeated 17 times, perhaps not
3950							# what was intended). We will mark x17 as type
3951							# 'n' and it will be split. If the previous token
3952							# was also a bareword then it is not very clear is
3953							# going on. In this case we will not be sure that
3954							# an operator is expected, so we just mark it as a
3955							# bareword. Perl is a little murky in what it does
3956							# with stuff like this, and its behavior can change
3957							# over time. Something like
3958							# a x18 => [792, 1224], will compile as
3959							# a key with 18 a's. But something like
3960							# push @array, a x18;
3961							# is a syntax error.
3962	772	100	66			2739	if (
			33
			66
3963							$expecting == OPERATOR
3964							&& substr( $tok, 0, 1 ) eq 'x'
3965							&& ( length($tok) == 1
3966							\|\| substr( $tok, 1, 1 ) =~ /^\d/ )
3967							)
3968							{
3969	3					5	$type = 'n';
3970	3	50				12	if ( $self->split_pretoken(1) ) {
3971	3					5	$type = 'x';
3972	3					7	$tok = 'x';
3973							}
3974							}
3975							else {
3976
3977							# git #18
3978	769					1331	$type = 'w';
3979	769					1981	$self->error_if_expecting_OPERATOR();
3980							}
3981							}
3982	786					1257	return;
3983							} ## end sub do_QUOTED_BAREWORD
3984
3985							sub do_X_OPERATOR {
3986
3987	17			17	0	46	my $self = shift;
3988
3989	17	100				64	if ( $tok eq 'x' ) {
3990	15	50				80	if ( $rtokens->[ $i + 1 ] eq '=' ) { # x=
3991	0					0	$tok = 'x=';
3992	0					0	$type = $tok;
3993	0					0	$i++;
3994							}
3995							else {
3996	15					39	$type = 'x';
3997							}
3998							}
3999							else {
4000
4001							# Split a pretoken like 'x10' into 'x' and '10'.
4002							# Note: In previous versions of perltidy it was marked
4003							# as a number, $type = 'n', and fixed downstream by the
4004							# Formatter.
4005	2					6	$type = 'n';
4006	2	50				6	if ( $self->split_pretoken(1) ) {
4007	2					5	$type = 'x';
4008	2					4	$tok = 'x';
4009							}
4010							}
4011	17					32	return;
4012							} ## end sub do_X_OPERATOR
4013
4014							sub do_USE_CONSTANT {
4015
4016	16			16	0	36	my $self = shift;
4017
4018	16					61	$self->scan_bare_identifier();
4019	16					92	my ( $next_nonblank_tok2, $i_next2 ) =
4020							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4021
4022	16	50				74	if ($next_nonblank_tok2) {
4023
4024	16	100				98	if ( $is_keyword{$next_nonblank_tok2} ) {
4025
4026							# Assume qw is used as a quote and okay, as in:
4027							# use constant qw{ DEBUG 0 };
4028							# Not worth trying to parse for just a warning
4029
4030							# NOTE: This warning is deactivated because recent
4031							# versions of perl do not complain here, but
4032							# the coding is retained for reference.
4033	1					2	if ( 0 && $next_nonblank_tok2 ne 'qw' ) {
4034							$self->warning(
4035							"Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n"
4036							);
4037							}
4038							}
4039
4040							else {
4041	15					54	$ris_constant->{$current_package}{$next_nonblank_tok2} = 1;
4042							}
4043							}
4044	16					65	return;
4045							} ## end sub do_USE_CONSTANT
4046
4047							sub do_KEYWORD {
4048
4049	2636			2636	0	4855	my $self = shift;
4050
4051							# found a keyword - set any associated flags
4052	2636					4500	$type = 'k';
4053
4054							# Since for and foreach may not be followed immediately
4055							# by an opening paren, we have to remember which keyword
4056							# is associated with the next '('
4057	2636	100	100			20838	if ( $is_for_foreach{$tok} ) {
		100	66
		100
		100
		100
		100
		100
4058	74	100				328	if ( new_statement_ok() ) {
4059	72					243	$want_paren = $tok;
4060							}
4061							}
4062
4063							# recognize 'use' statements, which are special
4064							elsif ( $is_use_require{$tok} ) {
4065	175					419	$statement_type = $tok;
4066	175	50				529	$self->error_if_expecting_OPERATOR()
4067							if ( $expecting == OPERATOR );
4068							}
4069
4070							# remember my and our to check for trailing ": shared"
4071							elsif ( $is_my_our_state{$tok} ) {
4072	628					1404	$statement_type = $tok;
4073							}
4074
4075							# Check for misplaced 'elsif' and 'else', but allow isolated
4076							# else or elsif blocks to be formatted. This is indicated
4077							# by a last noblank token of ';'
4078							elsif ( $tok eq 'elsif' ) {
4079	27	50	66			211	if (
4080							$last_nonblank_token ne ';'
4081
4082							## !~ /^(if\|elsif\|unless)$/
4083							&& !$is_if_elsif_unless{$last_nonblank_block_type}
4084							)
4085							{
4086	0					0	$self->warning(
4087							"expecting '$tok' to follow one of 'if\|elsif\|unless'\n");
4088							}
4089							}
4090							elsif ( $tok eq 'else' ) {
4091
4092							# patched for SWITCH/CASE
4093	44	50	66			535	if (
			66
4094							$last_nonblank_token ne ';'
4095
4096							## !~ /^(if\|elsif\|unless\|case\|when)$/
4097							&& !$is_if_elsif_unless_case_when{$last_nonblank_block_type}
4098
4099							# patch to avoid an unwanted error message for
4100							# the case of a parenless 'case' (RT 105484):
4101							# switch ( 1 ) { case x { 2 } else { } }
4102							## !~ /^(if\|elsif\|unless\|case\|when)$/
4103							&& !$is_if_elsif_unless_case_when{$statement_type}
4104							)
4105							{
4106	0					0	$self->warning(
4107							"expecting '$tok' to follow one of 'if\|elsif\|unless\|case\|when'\n"
4108							);
4109							}
4110							}
4111
4112							# patch for SWITCH/CASE if 'case' and 'when are
4113							# treated as keywords. Also 'default' for Switch::Plain
4114							elsif ($tok eq 'when'
4115							\|\| $tok eq 'case'
4116							\|\| $tok eq 'default' )
4117							{
4118	56					129	$statement_type = $tok; # next '{' is block
4119							}
4120
4121							# feature 'err' was removed in Perl 5.10. So mark this as
4122							# a bareword unless an operator is expected (see c158).
4123							elsif ( $tok eq 'err' ) {
4124	1	50				9	if ( $expecting != OPERATOR ) { $type = 'w' }
	1					2
4125							}
4126
4127	2636					4711	return;
4128							} ## end sub do_KEYWORD
4129
4130							sub do_QUOTE_OPERATOR {
4131
4132	202			202	0	457	my $self = shift;
4133
4134	202	50				638	if ( $expecting == OPERATOR ) {
4135
4136							# Be careful not to call an error for a qw quote
4137							# where a parenthesized list is allowed. For example,
4138							# it could also be a for/foreach construct such as
4139							#
4140							# foreach my $key qw\Uno Due Tres Quadro\ {
4141							# print "Set $key\n";
4142							# }
4143							#
4144
4145							# Or it could be a function call.
4146							# NOTE: Braces in something like &{ xxx } are not
4147							# marked as a block, we might have a method call.
4148							# &method(...), $method->(..), &{method}(...),
4149							# $ref[2](list) is ok & short for $ref[2]->(list)
4150							#
4151							# See notes in 'sub code_block_type' and
4152							# 'sub is_non_structural_brace'
4153
4154	0	0	0			0	unless (
			0
4155							$tok eq 'qw'
4156							&& ( $last_nonblank_token =~ /^([\]\}\&]\|\-\>)/
4157							\|\| $is_for_foreach{$want_paren} )
4158							)
4159							{
4160	0					0	$self->error_if_expecting_OPERATOR();
4161							}
4162							}
4163	202					556	$in_quote = $quote_items{$tok};
4164	202					554	$allowed_quote_modifiers = $quote_modifiers{$tok};
4165
4166							# All quote types are 'Q' except possibly qw quotes.
4167							# qw quotes are special in that they may generally be trimmed
4168							# of leading and trailing whitespace. So they are given a
4169							# separate type, 'q', unless requested otherwise.
4170	202	100	66			987	$type =
4171							( $tok eq 'qw' && $self->[_trim_qw_] )
4172							? 'q'
4173							: 'Q';
4174	202					380	$quote_type = $type;
4175	202					374	return;
4176							} ## end sub do_QUOTE_OPERATOR
4177
4178							sub do_UNKNOWN_BAREWORD {
4179
4180	957			957	0	3120	my ( $self, $next_nonblank_token ) = @_;
4181
4182	957					3149	$self->scan_bare_identifier();
4183
4184	957	100	100			3494	if ( $statement_type eq 'use'
4185							&& $last_nonblank_token eq 'use' )
4186							{
4187	108					422	$rsaw_use_module->{$current_package}->{$tok} = 1;
4188							}
4189
4190	957	100				2602	if ( $type eq 'w' ) {
4191
4192	932	50				2428	if ( $expecting == OPERATOR ) {
4193
4194							# Patch to avoid error message for RPerl overloaded
4195							# operator functions: use overload
4196							# '+' => \&sse_add,
4197							# '-' => \&sse_sub,
4198							# '*' => \&sse_mul,
4199							# '/' => \&sse_div;
4200							# TODO: this could eventually be generalized
4201	0	0	0			0	if ( $rsaw_use_module->{$current_package}->{'RPerl'}
		0
		0
4202							&& $tok =~ /^sse_(mul\|div\|add\|sub)$/ )
4203							{
4204
4205							}
4206
4207							# Fix part 1 for git #63 in which a comment falls
4208							# between an -> and the following word. An
4209							# alternate fix would be to change operator_expected
4210							# to return an UNKNOWN for this type.
4211							elsif ( $last_nonblank_type eq '->' ) {
4212
4213							}
4214
4215							# don't complain about possible indirect object
4216							# notation.
4217							# For example:
4218							# package main;
4219							# sub new($) { ... }
4220							# $b = new A::; # calls A::new
4221							# $c = new A; # same thing but suspicious
4222							# This will call A::new but we have a 'new' in
4223							# main:: which looks like a constant.
4224							#
4225							elsif ( $last_nonblank_type eq 'C' ) {
4226	0	0				0	if ( $tok !~ /::$/ ) {
4227	0					0	$self->complain(<<EOM);
4228							Expecting operator after '$last_nonblank_token' but found bare word '$tok'
4229							Maybe indirectet object notation?
4230							EOM
4231							}
4232							}
4233							else {
4234	0					0	$self->error_if_expecting_OPERATOR("bareword");
4235							}
4236							}
4237
4238							# mark bare words immediately followed by a paren as
4239							# functions
4240	932					2242	$next_tok = $rtokens->[ $i + 1 ];
4241	932	100				2535	if ( $next_tok eq '(' ) {
4242
4243							# Patch for issue c151, where we are processing a snippet and
4244							# have not seen that SPACE is a constant. In this case 'x' is
4245							# probably an operator. The only disadvantage with an incorrect
4246							# guess is that the space after it may be incorrect. For example
4247							# $str .= SPACE x ( 16 - length($str) ); See also b1410.
4248	276	50	33			1583	if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' }
	0	50				0
4249
4250							# Fix part 2 for git #63. Leave type as 'w' to keep
4251							# the type the same as if the -> were not separated
4252	276					567	elsif ( $last_nonblank_type ne '->' ) { $type = 'U' }
4253
4254							}
4255
4256							# underscore after file test operator is file handle
4257	932	50	66			2757	if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
4258	0					0	$type = 'Z';
4259							}
4260
4261							# patch for SWITCH/CASE if 'case' and 'when are
4262							# not treated as keywords:
4263	932	50	33			4691	if (
			33
			33
4264							( $tok eq 'case' && $rbrace_type->[$brace_depth] eq 'switch' )
4265							\|\| ( $tok eq 'when'
4266							&& $rbrace_type->[$brace_depth] eq 'given' )
4267							)
4268							{
4269	0					0	$statement_type = $tok; # next '{' is block
4270	0					0	$type = 'k'; # for keyword syntax coloring
4271							}
4272	932	100				2334	if ( $next_nonblank_token eq '(' ) {
4273
4274							# patch for SWITCH/CASE if switch and given not keywords
4275							# Switch is not a perl 5 keyword, but we will gamble
4276							# and mark switch followed by paren as a keyword. This
4277							# is only necessary to get html syntax coloring nice,
4278							# and does not commit this as being a switch/case.
4279	241	50	33			1941	if ( $tok eq 'switch' \|\| $tok eq 'given' ) {
		50	33
4280	0					0	$type = 'k'; # for keyword syntax coloring
4281							}
4282
4283							# mark 'x' as operator for something like this (see b1410)
4284							# my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths );
4285							elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) {
4286	0					0	$type = 'x';
4287							}
4288							}
4289							}
4290	957					1764	return;
4291							} ## end sub do_UNKNOWN_BAREWORD
4292
4293							sub sub_attribute_ok_here {
4294
4295	35			35	0	163	my ( $self, $tok_kw, $next_nonblank_token, $i_next ) = @_;
4296
4297							# Decide if 'sub :' can be the start of a sub attribute list.
4298							# We will decide based on if the colon is followed by a
4299							# bareword which is not a keyword.
4300							# Changed inext+1 to inext to fixed case b1190.
4301	35					65	my $sub_attribute_ok_here;
4302	35	50	66			161	if ( $is_sub{$tok_kw}
			66
4303							&& $expecting != OPERATOR
4304							&& $next_nonblank_token eq ':' )
4305							{
4306	3					12	my ( $nn_nonblank_token, $i_nn ) =
4307							$self->find_next_nonblank_token( $i_next, $rtokens,
4308							$max_token_index );
4309							$sub_attribute_ok_here =
4310							$nn_nonblank_token =~ /^\w/
4311							&& $nn_nonblank_token !~ /^\d/
4312	3		66			29	&& !$is_keyword{$nn_nonblank_token};
4313							}
4314	35					300	return $sub_attribute_ok_here;
4315							} ## end sub sub_attribute_ok_here
4316
4317							sub do_BAREWORD {
4318
4319	5826			5826	0	11547	my ( $self, $is_END_or_DATA ) = @_;
4320
4321							# handle a bareword token:
4322							# returns
4323							# true if this token ends the current line
4324							# false otherwise
4325
4326	5826					16287	my ( $next_nonblank_token, $i_next ) =
4327							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
4328
4329							# a bare word immediately followed by :: is not a keyword;
4330							# use $tok_kw when testing for keywords to avoid a mistake
4331	5826					11407	my $tok_kw = $tok;
4332	5826	100	100			16815	if ( $rtokens->[ $i + 1 ] eq ':'
4333							&& $rtokens->[ $i + 2 ] eq ':' )
4334							{
4335	266					613	$tok_kw .= '::';
4336							}
4337
4338	5826	100				13224	if ( $self->[_in_attribute_list_] ) {
4339	39					187	my $is_attribute = $self->do_ATTRIBUTE_LIST($next_nonblank_token);
4340	39	50				119	return if ($is_attribute);
4341							}
4342
4343							#----------------------------------------
4344							# Starting final if-elsif- chain of tests
4345							#----------------------------------------
4346
4347							# This is the return flag:
4348							# true => this is the last token on the line
4349							# false => keep tokenizing the line
4350	5787					8778	my $is_last;
4351
4352							# The following blocks of code must update these vars:
4353							# $type - the final token type, must always be set
4354
4355							# In addition, if additional pretokens are added:
4356							# $tok - the final token
4357							# $i - the index of the last pretoken
4358
4359							# They may also need to check and set various flags
4360
4361							# Scan a bare word following a -> as an identifier; it could
4362							# have a long package name. Fixes c037, c041.
4363	5787	100	100			93750	if ( $last_nonblank_token eq '->' ) {
		100	100
		100	66
		100	66
		100	100
		100	66
		100	66
		50	100
		50	100
		100	33
		100	0
		100	0
		100	33
		100	0
		100	0
		100	66
		100	100
		100	100
			100
			66
			66
			66
4364	669					2362	$self->scan_bare_identifier();
4365
4366							# a bareward after '->' gets type 'i'
4367	669					1240	$type = 'i';
4368							}
4369
4370							# Quote a word followed by => operator
4371							# unless the word __END__ or __DATA__ and the only word on
4372							# the line.
4373							elsif ( !$is_END_or_DATA
4374							&& $next_nonblank_token eq '='
4375							&& $rtokens->[ $i_next + 1 ] eq '>' )
4376							{
4377	786					2366	$self->do_QUOTED_BAREWORD();
4378							}
4379
4380							# quote a bare word within braces..like xxx->{s}; note that we
4381							# must be sure this is not a structural brace, to avoid
4382							# mistaking {s} in the following for a quoted bare word:
4383							# for(@[){s}bla}BLA}
4384							# Also treat q in something like var{-q} as a bare word, not
4385							# a quote operator
4386							elsif (
4387							$next_nonblank_token eq '}'
4388							&& (
4389							$last_nonblank_type eq 'L'
4390							\|\| ( $last_nonblank_type eq 'm'
4391							&& $last_last_nonblank_type eq 'L' )
4392							)
4393							)
4394							{
4395	100					242	$type = 'w';
4396							}
4397
4398							# handle operator x (now we know it isn't $x=)
4399							elsif (
4400							$expecting == OPERATOR
4401							&& substr( $tok, 0, 1 ) eq 'x'
4402							&& ( length($tok) == 1
4403							\|\| substr( $tok, 1, 1 ) =~ /^\d/ )
4404							)
4405							{
4406	17					108	$self->do_X_OPERATOR();
4407							}
4408							elsif ( $tok_kw eq 'CORE::' ) {
4409	3					25	$type = $tok = $tok_kw;
4410	3					6	$i += 2;
4411							}
4412							elsif ( ( $tok eq 'strict' )
4413							and ( $last_nonblank_token eq 'use' ) )
4414							{
4415	14					33	$self->[_saw_use_strict_] = 1;
4416	14					73	$self->scan_bare_identifier();
4417							}
4418
4419							elsif ( ( $tok eq 'warnings' )
4420							and ( $last_nonblank_token eq 'use' ) )
4421							{
4422	7					24	$self->[_saw_perl_dash_w_] = 1;
4423
4424							# scan as identifier, so that we pick up something like:
4425							# use warnings::register
4426	7					27	$self->scan_bare_identifier();
4427							}
4428
4429							elsif (
4430							$tok eq 'AutoLoader'
4431							&& $self->[_look_for_autoloader_]
4432							&& (
4433							$last_nonblank_token eq 'use'
4434
4435							# these regexes are from AutoSplit.pm, which we want
4436							# to mimic
4437							\|\| $input_line =~ /^\s*(use\|require)\s+AutoLoader\b/
4438							\|\| $input_line =~ /\bISA\s=.\bAutoLoader\b/
4439							)
4440							)
4441							{
4442	0					0	$self->write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
4443	0					0	$self->[_saw_autoloader_] = 1;
4444	0					0	$self->[_look_for_autoloader_] = 0;
4445	0					0	$self->scan_bare_identifier();
4446							}
4447
4448							elsif (
4449							$tok eq 'SelfLoader'
4450							&& $self->[_look_for_selfloader_]
4451							&& ( $last_nonblank_token eq 'use'
4452							\|\| $input_line =~ /^\s*(use\|require)\s+SelfLoader\b/
4453							\|\| $input_line =~ /\bISA\s=.\bSelfLoader\b/ )
4454							)
4455							{
4456	0					0	$self->write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
4457	0					0	$self->[_saw_selfloader_] = 1;
4458	0					0	$self->[_look_for_selfloader_] = 0;
4459	0					0	$self->scan_bare_identifier();
4460							}
4461
4462							elsif ( ( $tok eq 'constant' )
4463							and ( $last_nonblank_token eq 'use' ) )
4464							{
4465	16					73	$self->do_USE_CONSTANT();
4466							}
4467
4468							# various quote operators
4469							elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
4470	202					910	$self->do_QUOTE_OPERATOR();
4471							}
4472
4473							# check for a statement label
4474							elsif (
4475							( $next_nonblank_token eq ':' )
4476							&& ( $rtokens->[ $i_next + 1 ] ne ':' )
4477							&& ( $i_next <= $max_token_index ) # colon on same line
4478
4479							# like 'sub : lvalue' ?
4480							&& !$self->sub_attribute_ok_here( $tok_kw, $next_nonblank_token,
4481							$i_next )
4482							&& label_ok()
4483							)
4484							{
4485	33	100				205	if ( $tok !~ /[A-Z]/ ) {
4486	15					35	push @{ $self->[_rlower_case_labels_at_] }, $input_line_number;
	15					52
4487							}
4488	33					82	$type = 'J';
4489	33					87	$tok .= ':';
4490	33					67	$i = $i_next;
4491							}
4492
4493							# 'sub' or other sub alias
4494							elsif ( $is_sub{$tok_kw} ) {
4495
4496							# Update for --use-feature=class (rt145706):
4497							# We have to be extra careful to avoid misparsing other uses of
4498							# 'method' in older scripts.
4499	301	100				1105	if ( $tok_kw eq 'method' ) {
4500	12	100	66			132	if ( $expecting == OPERATOR
			100
4501							\|\| $next_nonblank_token !~ /^(\w\|\:)/
4502							\|\| !$self->method_ok_here() )
4503							{
4504	7					24	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
4505							}
4506							else {
4507	5					23	initialize_subname();
4508	5					20	$self->scan_id();
4509							}
4510							}
4511							else {
4512	289	50				886	$self->error_if_expecting_OPERATOR()
4513							if ( $expecting == OPERATOR );
4514	289					1067	initialize_subname();
4515	289					1111	$self->scan_id();
4516							}
4517							}
4518
4519							# 'package'
4520							elsif ( $is_package{$tok_kw} ) {
4521
4522							# Update for --use-feature=class (rt145706):
4523							# We have to be extra careful because 'class' may be used for other
4524							# purposes on older code; i.e.
4525							# class($x) - valid sub call
4526							# package($x) - error
4527	30	100				101	if ( $tok_kw eq 'class' ) {
4528	8	100	66			75	if ( $expecting == OPERATOR
			100
4529							\|\| $next_nonblank_token !~ /^(\w\|\:)/
4530							\|\| !$self->class_ok_here() )
4531							{
4532	4					13	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
4533							}
4534	4					15	else { $self->scan_id() }
4535							}
4536							else {
4537	22	50				63	$self->error_if_expecting_OPERATOR()
4538							if ( $expecting == OPERATOR );
4539	22					79	$self->scan_id();
4540							}
4541							}
4542
4543							# Fix for c035: split 'format' from 'is_format_END_DATA' to be
4544							# more restrictive. Require a new statement to be ok here.
4545							elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
4546	1					3	$type = ';'; # make tokenizer look for TERM next
4547	1					3	$self->[_in_format_] = 1;
4548	1					2	$is_last = 1; ## is last token on this line
4549							}
4550
4551							# Note on token types for format, __DATA__, __END__:
4552							# It simplifies things to give these type ';', so that when we
4553							# start rescanning we will be expecting a token of type TERM.
4554							# We will switch to type 'k' before outputting the tokens.
4555							elsif ( $is_END_DATA{$tok_kw} ) {
4556	7					16	$type = ';'; # make tokenizer look for TERM next
4557
4558							# Remember that we are in one of these three sections
4559	7					21	$self->[ $is_END_DATA{$tok_kw} ] = 1;
4560	7					14	$is_last = 1; ## is last token on this line
4561							}
4562
4563							elsif ( $is_keyword{$tok_kw} ) {
4564	2636					7923	$self->do_KEYWORD();
4565							}
4566
4567							# check for inline label following
4568							# /^(redo\|last\|next\|goto)$/
4569							elsif (( $last_nonblank_type eq 'k' )
4570							&& ( $is_redo_last_next_goto{$last_nonblank_token} ) )
4571							{
4572	19					49	$type = 'j';
4573							}
4574
4575							# something else --
4576							else {
4577	946					3129	$self->do_UNKNOWN_BAREWORD($next_nonblank_token);
4578							}
4579
4580	5787					11267	return $is_last;
4581
4582							} ## end sub do_BAREWORD
4583
4584							sub do_FOLLOW_QUOTE {
4585
4586	2763			2763	0	4475	my $self = shift;
4587
4588							# Continue following a quote on a new line
4589	2763					4432	$type = $quote_type;
4590
4591	2763	100				4041	unless ( @{$routput_token_list} ) { # initialize if continuation line
	2763					6809
4592	184					470	push( @{$routput_token_list}, $i );
	184					431
4593	184					436	$routput_token_type->[$i] = $type;
4594
4595							}
4596
4597							# scan for the end of the quote or pattern
4598							(
4599	2763					8883	$i,
4600							$in_quote,
4601							$quote_character,
4602							$quote_pos,
4603							$quote_depth,
4604							$quoted_string_1,
4605							$quoted_string_2,
4606
4607							) = $self->do_quote(
4608
4609							$i,
4610							$in_quote,
4611							$quote_character,
4612							$quote_pos,
4613							$quote_depth,
4614							$quoted_string_1,
4615							$quoted_string_2,
4616							$rtokens,
4617							$rtoken_map,
4618							$max_token_index,
4619
4620							);
4621
4622							# all done if we didn't find it
4623	2763	100				6953	if ($in_quote) { return }
	183					367
4624
4625							# save pattern and replacement text for rescanning
4626	2580					4349	my $qs1 = $quoted_string_1;
4627
4628							# re-initialize for next search
4629	2580					4075	$quote_character = EMPTY_STRING;
4630	2580					3820	$quote_pos = 0;
4631	2580					3844	$quote_type = 'Q';
4632	2580					3818	$quoted_string_1 = EMPTY_STRING;
4633	2580					3798	$quoted_string_2 = EMPTY_STRING;
4634	2580	100				5651	if ( ++$i > $max_token_index ) { return }
	116					320
4635
4636							# look for any modifiers
4637	2464	100				5334	if ($allowed_quote_modifiers) {
4638
4639							# check for exact quote modifiers
4640	144	100				740	if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
4641	30					89	my $str = $rtokens->[$i];
4642	30					59	my $saw_modifier_e;
4643	30					501	while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
4644	47					118	my $pos = pos($str);
4645	47					111	my $char = substr( $str, $pos - 1, 1 );
4646	47		66			307	$saw_modifier_e \|\|= ( $char eq 'e' );
4647							}
4648
4649							# For an 'e' quote modifier we must scan the replacement
4650							# text for here-doc targets...
4651							# but if the modifier starts a new line we can skip
4652							# this because either the here doc will be fully
4653							# contained in the replacement text (so we can
4654							# ignore it) or Perl will not find it.
4655							# See test 'here2.in'.
4656	30	50	66			161	if ( $saw_modifier_e && $i_tok >= 0 ) {
4657
4658	0					0	my $rht = $self->scan_replacement_text($qs1);
4659
4660							# Change type from 'Q' to 'h' for quotes with
4661							# here-doc targets so that the formatter (see sub
4662							# process_line_of_CODE) will not make any line
4663							# breaks after this point.
4664	0	0				0	if ($rht) {
4665	0					0	push @{$rhere_target_list}, @{$rht};
	0					0
	0					0
4666	0					0	$type = 'h';
4667	0	0				0	if ( $i_tok < 0 ) {
4668	0					0	my $ilast = $routput_token_list->[-1];
4669	0					0	$routput_token_type->[$ilast] = $type;
4670							}
4671							}
4672							}
4673
4674	30	50				96	if ( defined( pos($str) ) ) {
4675
4676							# matched
4677	30	50				100	if ( pos($str) == length($str) ) {
4678	30	50				128	if ( ++$i > $max_token_index ) { return }
	0					0
4679							}
4680
4681							# Looks like a joined quote modifier
4682							# and keyword, maybe something like
4683							# s/xxx/yyy/gefor @k=...
4684							# Example is "galgen.pl". Would have to split
4685							# the word and insert a new token in the
4686							# pre-token list. This is so rare that I haven't
4687							# done it. Will just issue a warning citation.
4688
4689							# This error might also be triggered if my quote
4690							# modifier characters are incomplete
4691							else {
4692	0					0	$self->warning(<<EOM);
4693
4694							Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
4695							Please put a space between quote modifiers and trailing keywords.
4696							EOM
4697
4698							# print "token $rtokens->[$i]\n";
4699							# my $num = length($str) - pos($str);
4700							# $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
4701							# print "continuing with new token $rtokens->[$i]\n";
4702
4703							# skipping past this token does least damage
4704	0	0				0	if ( ++$i > $max_token_index ) { return }
	0					0
4705							}
4706							}
4707							else {
4708
4709							# example file: rokicki4.pl
4710							# This error might also be triggered if my quote
4711							# modifier characters are incomplete
4712	0					0	$self->write_logfile_entry(
4713							"Note: found word $str at quote modifier location\n");
4714							}
4715							}
4716
4717							# re-initialize
4718	144					283	$allowed_quote_modifiers = EMPTY_STRING;
4719							}
4720	2464					4526	return;
4721							} ## end sub do_FOLLOW_QUOTE
4722
4723							# ------------------------------------------------------------
4724							# begin hash of code for handling most token types
4725							# ------------------------------------------------------------
4726							my $tokenization_code = {
4727
4728							'>' => \&do_GREATER_THAN_SIGN,
4729							'\|' => \&do_VERTICAL_LINE,
4730							'$' => \&do_DOLLAR_SIGN,
4731							'(' => \&do_LEFT_PARENTHESIS,
4732							')' => \&do_RIGHT_PARENTHESIS,
4733							',' => \&do_COMMA,
4734							';' => \&do_SEMICOLON,
4735							'"' => \&do_QUOTATION_MARK,
4736							"'" => \&do_APOSTROPHE,
4737							'`' => \&do_BACKTICK,
4738							'/' => \&do_SLASH,
4739							'{' => \&do_LEFT_CURLY_BRACKET,
4740							'}' => \&do_RIGHT_CURLY_BRACKET,
4741							'&' => \&do_AMPERSAND,
4742							'<' => \&do_LESS_THAN_SIGN,
4743							'?' => \&do_QUESTION_MARK,
4744							'*' => \&do_STAR,
4745							'.' => \&do_DOT,
4746							':' => \&do_COLON,
4747							'+' => \&do_PLUS_SIGN,
4748							'@' => \&do_AT_SIGN,
4749							'%' => \&do_PERCENT_SIGN,
4750							'[' => \&do_LEFT_SQUARE_BRACKET,
4751							']' => \&do_RIGHT_SQUARE_BRACKET,
4752							'-' => \&do_MINUS_SIGN,
4753							'^' => \&do_CARAT_SIGN,
4754							'::' => \&do_DOUBLE_COLON,
4755							'<<' => \&do_LEFT_SHIFT,
4756							'<<~' => \&do_NEW_HERE_DOC,
4757							'->' => \&do_POINTER,
4758							'++' => \&do_PLUS_PLUS,
4759							'=>' => \&do_FAT_COMMA,
4760							'--' => \&do_MINUS_MINUS,
4761							'&&' => \&do_LOGICAL_AND,
4762							'\|\|' => \&do_LOGICAL_OR,
4763							'//' => \&do_SLASH_SLASH,
4764
4765							# No special code for these types yet, but syntax checks
4766							# could be added.
4767							## '!' => undef,
4768							## '!=' => undef,
4769							## '!~' => undef,
4770							## '%=' => undef,
4771							## '&&=' => undef,
4772							## '&=' => undef,
4773							## '+=' => undef,
4774							## '-=' => undef,
4775							## '..' => undef,
4776							## '..' => undef,
4777							## '...' => undef,
4778							## '.=' => undef,
4779							## '<<=' => undef,
4780							## '<=' => undef,
4781							## '<=>' => undef,
4782							## '<>' => undef,
4783							## '=' => undef,
4784							## '==' => undef,
4785							## '=~' => undef,
4786							## '>=' => undef,
4787							## '>>' => undef,
4788							## '>>=' => undef,
4789							## '\\' => undef,
4790							## '^=' => undef,
4791							## '\|=' => undef,
4792							## '\|\|=' => undef,
4793							## '//=' => undef,
4794							## '~' => undef,
4795							## '~~' => undef,
4796							## '!~~' => undef,
4797
4798							};
4799
4800							# ------------------------------------------------------------
4801							# end hash of code for handling individual token types
4802							# ------------------------------------------------------------
4803
4804	38			38		442	use constant DEBUG_TOKENIZE => 0;
	38					92
	38					186316
4805
4806							sub tokenize_this_line {
4807
4808							# This routine breaks a line of perl code into tokens which are of use in
4809							# indentation and reformatting. One of my goals has been to define tokens
4810							# such that a newline may be inserted between any pair of tokens without
4811							# changing or invalidating the program. This version comes close to this,
4812							# although there are necessarily a few exceptions which must be caught by
4813							# the formatter. Many of these involve the treatment of bare words.
4814							#
4815							# The tokens and their types are returned in arrays. See previous
4816							# routine for their names.
4817							#
4818							# See also the array "valid_token_types" in the BEGIN section for an
4819							# up-to-date list.
4820							#
4821							# To simplify things, token types are either a single character, or they
4822							# are identical to the tokens themselves.
4823							#
4824							# As a debugging aid, the -D flag creates a file containing a side-by-side
4825							# comparison of the input string and its tokenization for each line of a file.
4826							# This is an invaluable debugging aid.
4827							#
4828							# In addition to tokens, and some associated quantities, the tokenizer
4829							# also returns flags indication any special line types. These include
4830							# quotes, here_docs, formats.
4831							#
4832							# -----------------------------------------------------------------------
4833							#
4834							# How to add NEW_TOKENS:
4835							#
4836							# New token types will undoubtedly be needed in the future both to keep up
4837							# with changes in perl and to help adapt the tokenizer to other applications.
4838							#
4839							# Here are some notes on the minimal steps. I wrote these notes while
4840							# adding the 'v' token type for v-strings, which are things like version
4841							# numbers 5.6.0, and ip addresses, and will use that as an example. ( You
4842							# can use your editor to search for the string "NEW_TOKENS" to find the
4843							# appropriate sections to change):
4844							#
4845							# *. Try to talk somebody else into doing it! If not, ..
4846							#
4847							# *. Make a backup of your current version in case things don't work out!
4848							#
4849							# *. Think of a new, unused character for the token type, and add to
4850							# the array @valid_token_types in the BEGIN section of this package.
4851							# For example, I used 'v' for v-strings.
4852							#
4853							# *. Implement coding to recognize the $type of the token in this routine.
4854							# This is the hardest part, and is best done by imitating or modifying
4855							# some of the existing coding. For example, to recognize v-strings, I
4856							# patched 'sub scan_bare_identifier' to recognize v-strings beginning with
4857							# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
4858							#
4859							# *. Update sub operator_expected. This update is critically important but
4860							# the coding is trivial. Look at the comments in that routine for help.
4861							# For v-strings, which should behave like numbers, I just added 'v' to the
4862							# regex used to handle numbers and strings (types 'n' and 'Q').
4863							#
4864							# *. Implement a 'bond strength' rule in sub set_bond_strengths in
4865							# Perl::Tidy::Formatter for breaking lines around this token type. You can
4866							# skip this step and take the default at first, then adjust later to get
4867							# desired results. For adding type 'v', I looked at sub bond_strength and
4868							# saw that number type 'n' was using default strengths, so I didn't do
4869							# anything. I may tune it up someday if I don't like the way line
4870							# breaks with v-strings look.
4871							#
4872							# *. Implement a 'whitespace' rule in sub set_whitespace_flags in
4873							# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
4874							# and saw that type 'n' used spaces on both sides, so I just added 'v'
4875							# to the array @spaces_both_sides.
4876							#
4877							# *. Update HtmlWriter package so that users can colorize the token as
4878							# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
4879							# that package. For v-strings, I initially chose to use a default color
4880							# equal to the default for numbers, but it might be nice to change that
4881							# eventually.
4882							#
4883							# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
4884							#
4885							# *. Run lots and lots of debug tests. Start with special files designed
4886							# to test the new token type. Run with the -D flag to create a .DEBUG
4887							# file which shows the tokenization. When these work ok, test as many old
4888							# scripts as possible. Start with all of the '.t' files in the 'test'
4889							# directory of the distribution file. Compare .tdy output with previous
4890							# version and updated version to see the differences. Then include as
4891							# many more files as possible. My own technique has been to collect a huge
4892							# number of perl scripts (thousands!) into one directory and run perltidy
4893							# *, then run diff between the output of the previous version and the
4894							# current version.
4895							#
4896							# *. For another example, search for the smartmatch operator '~~'
4897							# with your editor to see where updates were made for it.
4898							#
4899							# -----------------------------------------------------------------------
4900
4901	7502			7502	0	14957	my ( $self, $line_of_tokens ) = @_;
4902	7502					14326	my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
4903
4904							# Extract line number for use in error messages
4905	7502					12476	$input_line_number = $line_of_tokens->{_line_number};
4906
4907							# Check for pod documentation
4908	7502	100	66			21313	if ( substr( $untrimmed_input_line, 0, 1 ) eq '='
4909							&& $untrimmed_input_line =~ /^=[A-Za-z_]/ )
4910							{
4911
4912							# Must not be in multi-line quote
4913							# and must not be in an equation
4914	13	50	33			97	if ( !$in_quote
4915							&& ( $self->operator_expected( [ 'b', '=', 'b' ] ) == TERM ) )
4916							{
4917	13					35	$self->[_in_pod_] = 1;
4918	13					27	return;
4919							}
4920							}
4921
4922	7489					14352	$input_line = $untrimmed_input_line;
4923
4924	7489					14361	chomp $input_line;
4925
4926							# Set a flag to indicate if we might be at an __END__ or __DATA__ line
4927							# This will be used below to avoid quoting a bare word followed by
4928							# a fat comma.
4929	7489					11267	my $is_END_or_DATA;
4930
4931							# Reinitialize the multi-line quote flag
4932	7489	100	100			18373	if ( $in_quote && $quote_type eq 'Q' ) {
4933	47					149	$line_of_tokens->{_starting_in_quote} = 1;
4934							}
4935							else {
4936	7442					13293	$line_of_tokens->{_starting_in_quote} = 0;
4937
4938							# Trim start of this line unless we are continuing a quoted line.
4939							# Do not trim end because we might end in a quote (test: deken4.pl)
4940							# Perl::Tidy::Formatter will delete needless trailing blanks
4941	7442	100				33972	if ( !length($input_line) ) {
		100
4942
4943							# line is empty
4944							}
4945							elsif ( $input_line =~ m/\S/g ) {
4946
4947							# There are $spaces blank characters before a nonblank character
4948	6636					13687	my $spaces = pos($input_line) - 1;
4949	6636	100				14945	if ( $spaces > 0 ) {
4950
4951							# Trim the leading spaces
4952	3537					9417	$input_line = substr( $input_line, $spaces );
4953
4954							# Find actual space count if there are leading tabs
4955	3537	100	66			12674	if (
4956							ord( substr( $untrimmed_input_line, 0, 1 ) ) == ORD_TAB
4957							&& $untrimmed_input_line =~ /^(\t+)/ )
4958							{
4959	213					519	my $tabsize = $self->[_tabsize_];
4960	213					710	$spaces += length($1) * ( $tabsize - 1 );
4961							}
4962
4963							# Calculate a guessed level for nonblank lines to avoid
4964							# calls to sub guess_old_indentation_level()
4965	3537					7773	my $indent_columns = $self->[_indent_columns_];
4966							$line_of_tokens->{_guessed_indentation_level} =
4967	3537					10094	int( $spaces / $indent_columns );
4968							}
4969							}
4970							else {
4971
4972							# line has all blank characters
4973	9					43	$input_line = EMPTY_STRING;
4974							}
4975
4976	7442		100			19810	$is_END_or_DATA = substr( $input_line, 0, 1 ) eq '_'
4977							&& $input_line =~ /^__(END\|DATA)__\s*$/;
4978							}
4979
4980							# Optimize for a full-line comment.
4981	7489	100				15613	if ( !$in_quote ) {
4982	7305	100				16514	if ( substr( $input_line, 0, 1 ) eq '#' ) {
4983
4984							# and check for skipped section
4985	788	100	66			6098	if ( $rOpts_code_skipping
4986							&& $input_line =~ /$code_skipping_pattern_begin/ )
4987							{
4988	2					9	$self->[_in_skipped_] = $self->[_last_line_number_];
4989	2					6	return;
4990							}
4991
4992							# Optional fast processing of a block comment
4993	786					2251	my $ci_string_sum =
4994							( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
4995	786					1611	my $ci_string_i = $ci_string_sum + $in_statement_continuation;
4996	786					1669	$line_of_tokens->{_line_type} = 'CODE';
4997	786					2380	$line_of_tokens->{_rtokens} = [$input_line];
4998	786					2100	$line_of_tokens->{_rtoken_type} = ['#'];
4999	786					2067	$line_of_tokens->{_rlevels} = [$level_in_tokenizer];
5000	786					1948	$line_of_tokens->{_rci_levels} = [$ci_string_i];
5001	786					2104	$line_of_tokens->{_rblock_type} = [EMPTY_STRING];
5002	786					1868	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5003	786					3789	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
5004	786					1930	return;
5005							}
5006
5007							# Optimize handling of a blank line
5008	6517	100				15214	if ( !length($input_line) ) {
5009	806					2186	$line_of_tokens->{_line_type} = 'CODE';
5010	806					1964	$line_of_tokens->{_rtokens} = [];
5011	806					1827	$line_of_tokens->{_rtoken_type} = [];
5012	806					1686	$line_of_tokens->{_rlevels} = [];
5013	806					1664	$line_of_tokens->{_rci_levels} = [];
5014	806					1646	$line_of_tokens->{_rblock_type} = [];
5015	806					1724	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5016	806					2440	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
5017	806					1826	return;
5018							}
5019							}
5020
5021							# update the copy of the line for use in error messages
5022							# This must be exactly what we give the pre_tokenizer
5023	5895					10694	$self->[_line_of_text_] = $input_line;
5024
5025							# re-initialize for the main loop
5026	5895					14798	$routput_token_list = []; # stack of output token indexes
5027	5895					18180	$routput_token_type = []; # token types
5028	5895					15995	$routput_block_type = []; # types of code block
5029	5895					15340	$routput_container_type = []; # paren types, such as if, elsif, ..
5030	5895					13778	$routput_type_sequence = []; # nesting sequential number
5031
5032	5895					9353	$rhere_target_list = [];
5033
5034	5895					9768	$tok = $last_nonblank_token;
5035	5895					9253	$type = $last_nonblank_type;
5036	5895					9186	$prototype = $last_nonblank_prototype;
5037	5895					8683	$last_nonblank_i = -1;
5038	5895					9044	$block_type = $last_nonblank_block_type;
5039	5895					8700	$container_type = $last_nonblank_container_type;
5040	5895					8661	$type_sequence = $last_nonblank_type_sequence;
5041	5895					7959	$indent_flag = 0;
5042	5895					8202	$peeked_ahead = 0;
5043
5044	5895					18362	$self->tokenizer_main_loop($is_END_or_DATA);
5045
5046							#-----------------------------------------------
5047							# all done tokenizing this line ...
5048							# now prepare the final list of tokens and types
5049							#-----------------------------------------------
5050	5895	50				11704	if ( $self->[_calculate_ci_] ) {
5051	0					0	$self->OLD_tokenizer_wrapup_line($line_of_tokens);
5052							}
5053							else {
5054	5895					16321	$self->tokenizer_wrapup_line($line_of_tokens);
5055							}
5056
5057	5895					11390	return;
5058							} ## end sub tokenize_this_line
5059
5060							sub tokenizer_main_loop {
5061
5062	5895			5895	0	12160	my ( $self, $is_END_or_DATA ) = @_;
5063
5064							#---------------------------------
5065							# Break one input line into tokens
5066							#---------------------------------
5067
5068							# Input parameter:
5069							# $is_END_or_DATA is true for a __END__ or __DATA__ line
5070
5071							# start by breaking the line into pre-tokens
5072	5895					8758	my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
5073	5895					13955	( $rtokens, $rtoken_map, $rtoken_type ) =
5074							pre_tokenize( $input_line, $max_tokens_wanted );
5075
5076	5895					23371	$max_token_index = scalar( @{$rtokens} ) - 1;
	5895					11233
5077	5895					9064	push( @{$rtokens}, SPACE, SPACE, SPACE )
	5895					15096
5078							; # extra whitespace simplifies logic
5079	5895					8930	push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced
	5895					12648
5080	5895					8783	push( @{$rtoken_type}, 'b', 'b', 'b' );
	5895					12945
5081
5082							# initialize for main loop
5083	5895					8486	if (0) { #<<< this is not necessary
5084							foreach my $ii ( 0 .. $max_token_index + 3 ) {
5085							$routput_token_type->[$ii] = EMPTY_STRING;
5086							$routput_block_type->[$ii] = EMPTY_STRING;
5087							$routput_container_type->[$ii] = EMPTY_STRING;
5088							$routput_type_sequence->[$ii] = EMPTY_STRING;
5089							$routput_indent_flag->[$ii] = 0;
5090							}
5091							}
5092
5093	5895					8928	$i = -1;
5094	5895					8732	$i_tok = -1;
5095
5096							#-----------------------------
5097							# begin main tokenization loop
5098							#-----------------------------
5099
5100							# we are looking at each pre-token of one line and combining them
5101							# into tokens
5102	5895					13764	while ( ++$i <= $max_token_index ) {
5103
5104							# continue looking for the end of a quote
5105	50693	100				90154	if ($in_quote) {
5106	2763					8506	$self->do_FOLLOW_QUOTE();
5107	2763	100	100			10572	last if ( $in_quote \|\| $i > $max_token_index );
5108							}
5109
5110	50394	100	100			138777	if ( $type ne 'b' && $tok ne 'CORE::' ) {
5111
5112							# try to catch some common errors
5113	35221	100	100			76896	if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
5114
5115	1588	100				4938	if ( $last_nonblank_token eq 'eq' ) {
		50
5116	9					71	$self->complain("Should 'eq' be '==' here ?\n");
5117							}
5118							elsif ( $last_nonblank_token eq 'ne' ) {
5119	0					0	$self->complain("Should 'ne' be '!=' here ?\n");
5120							}
5121							}
5122
5123							# fix c090, only rotate vars if a new token will be stored
5124	35221	100				65611	if ( $i_tok >= 0 ) {
5125	29464					43857	$last_last_nonblank_token = $last_nonblank_token;
5126	29464					39870	$last_last_nonblank_type = $last_nonblank_type;
5127	29464					40270	$last_last_nonblank_block_type = $last_nonblank_block_type;
5128	29464					40287	$last_last_nonblank_container_type =
5129							$last_nonblank_container_type;
5130	29464					41364	$last_last_nonblank_type_sequence =
5131							$last_nonblank_type_sequence;
5132
5133							# Fix part #3 for git82: propagate type 'Z' though L-R pair
5134	29464	100	100			62616	unless ( $type eq 'R' && $last_nonblank_type eq 'Z' ) {
5135	29461					40596	$last_nonblank_token = $tok;
5136	29461					39359	$last_nonblank_type = $type;
5137							}
5138	29464					41125	$last_nonblank_prototype = $prototype;
5139	29464					40938	$last_nonblank_block_type = $block_type;
5140	29464					40168	$last_nonblank_container_type = $container_type;
5141	29464					40860	$last_nonblank_type_sequence = $type_sequence;
5142	29464					39077	$last_nonblank_i = $i_tok;
5143							}
5144
5145							# Patch for c030: Fix things in case a '->' got separated from
5146							# the subsequent identifier by a side comment. We need the
5147							# last_nonblank_token to have a leading -> to avoid triggering
5148							# an operator expected error message at the next '('. See also
5149							# fix for git #63.
5150	35221	100				65070	if ( $last_last_nonblank_token eq '->' ) {
5151	885	100	66			4976	if ( $last_nonblank_type eq 'w'
5152							\|\| $last_nonblank_type eq 'i' )
5153							{
5154	674					2647	$last_nonblank_token = '->' . $last_nonblank_token;
5155	674					1395	$last_nonblank_type = 'i';
5156							}
5157							}
5158							}
5159
5160							# store previous token type
5161	50394	100				86480	if ( $i_tok >= 0 ) {
5162	44637					85008	$routput_token_type->[$i_tok] = $type;
5163	44637					72322	$routput_block_type->[$i_tok] = $block_type;
5164	44637					72244	$routput_container_type->[$i_tok] = $container_type;
5165	44637					70708	$routput_type_sequence->[$i_tok] = $type_sequence;
5166	44637					69279	$routput_indent_flag->[$i_tok] = $indent_flag;
5167							}
5168
5169							# get the next pre-token and type
5170							# $tok and $type will be modified to make the output token
5171	50394					81888	my $pre_tok = $tok = $rtokens->[$i]; # get the next pre-token
5172	50394					76515	my $pre_type = $type = $rtoken_type->[$i]; # and type
5173
5174							# remember the starting index of this token; we will be updating $i
5175	50394					67556	$i_tok = $i;
5176
5177							# re-initialize various flags for the next output token
5178	50394		100			90928	$block_type &&= EMPTY_STRING;
5179	50394		100			92555	$container_type &&= EMPTY_STRING;
5180	50394		100			98745	$type_sequence &&= EMPTY_STRING;
5181	50394		100			84653	$indent_flag &&= 0;
5182	50394		100			81402	$prototype &&= EMPTY_STRING;
5183
5184							# this pre-token will start an output token
5185	50394					65002	push( @{$routput_token_list}, $i_tok );
	50394					87335
5186
5187							#--------------------------
5188							# handle a whitespace token
5189							#--------------------------
5190	50394	100				105920	next if ( $pre_type eq 'b' );
5191
5192							#-----------------
5193							# handle a comment
5194							#-----------------
5195	35079	100				60510	last if ( $pre_type eq '#' );
5196
5197							# continue gathering identifier if necessary
5198	34751	100				59634	if ($id_scan_state) {
5199
5200	17	100	66			145	if ( $is_sub{$id_scan_state} \|\| $is_package{$id_scan_state} ) {
5201	10					38	$self->scan_id();
5202							}
5203							else {
5204	7					30	$self->scan_identifier();
5205							}
5206
5207	17	100				80	if ($id_scan_state) {
5208
5209							# Still scanning ...
5210							# Check for side comment between sub and prototype (c061)
5211
5212							# done if nothing left to scan on this line
5213	1	50				8	last if ( $i > $max_token_index );
5214
5215	1					6	my ( $next_nonblank_token, $i_next ) =
5216							find_next_nonblank_token_on_this_line( $i, $rtokens,
5217							$max_token_index );
5218
5219							# done if it was just some trailing space
5220	1	50				4	last if ( $i_next > $max_token_index );
5221
5222							# something remains on the line ... must be a side comment
5223	1					7	next;
5224							}
5225
5226	16	100	100			125	next if ( ( $i > 0 ) \|\| $type );
5227
5228							# didn't find any token; start over
5229	7					26	$type = $pre_type;
5230	7					16	$tok = $pre_tok;
5231							}
5232
5233							## my $prev_tok = $i > 0 ? $rtokens->[ $i - 1 ] : SPACE;
5234	34741	100				75058	my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b';
5235
5236							#-----------------------------------------------------------
5237							# Combine pre-tokens into digraphs and trigraphs if possible
5238							#-----------------------------------------------------------
5239
5240							# See if we can make a digraph...
5241							# The following tokens are excluded and handled specially:
5242							# '/=' is excluded because the / might start a pattern.
5243							# 'x=' is excluded since it might be $x=, with $ on previous line
5244							# '*' and = might be typeglobs of punctuation variables
5245							# I have allowed tokens starting with <, such as <=,
5246							# because I don't think these could be valid angle operators.
5247							# test file: storrs4.pl
5248	34741	100	100			108014	if ( $can_start_digraph{$tok}
			100
5249							&& $i < $max_token_index
5250							&& $is_digraph{ $tok . $rtokens->[ $i + 1 ] } )
5251							{
5252
5253	2555					4793	my $combine_ok = 1;
5254	2555					5222	my $test_tok = $tok . $rtokens->[ $i + 1 ];
5255
5256							# check for special cases which cannot be combined
5257
5258							# '//' must be defined_or operator if an operator is expected.
5259							# TODO: Code for other ambiguous digraphs (/=, x=, *, =)
5260							# could be migrated here for clarity
5261
5262							# Patch for RT#102371, misparsing a // in the following snippet:
5263							# state $b //= ccc();
5264							# The solution is to always accept the digraph (or trigraph)
5265							# after type 'Z' (possible file handle). The reason is that
5266							# sub operator_expected gives TERM expected here, which is
5267							# wrong in this case.
5268	2555	100	66			7252	if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
5269
5270							# note that here $tok = '/' and the next tok and type is '/'
5271	16					69	$expecting =
5272							$self->operator_expected( [ $prev_type, $tok, '/' ] );
5273
5274							# Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
5275	16	100				69	$combine_ok = 0 if ( $expecting == TERM );
5276							}
5277
5278							# Patch for RT #114359: mis-parsing of "print $x ** 0.5;
5279							# Accept the digraphs '**' only after type 'Z'
5280							# Otherwise postpone the decision.
5281	2555	100				5581	if ( $test_tok eq '**' ) {
5282	39	100				131	if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
	37					79
5283							}
5284
5285	2555	50	66			16304	if (
			66
			33
5286
5287							# still ok to combine?
5288							$combine_ok
5289
5290							&& ( $test_tok ne '/=' ) # might be pattern
5291							&& ( $test_tok ne 'x=' ) # might be $x
5292							&& ( $test_tok ne '*=' ) # typeglob?
5293
5294							# Moved above as part of fix for
5295							# RT #114359: Missparsing of "print $x ** 0.5;
5296							# && ( $test_tok ne '**' ) # typeglob?
5297							)
5298							{
5299	2514					4087	$tok = $test_tok;
5300	2514					3618	$i++;
5301
5302							# Now try to assemble trigraphs. Note that all possible
5303							# perl trigraphs can be constructed by appending a character
5304							# to a digraph.
5305	2514					4327	$test_tok = $tok . $rtokens->[ $i + 1 ];
5306
5307	2514	100				8726	if ( $is_trigraph{$test_tok} ) {
		100
5308	76					183	$tok = $test_tok;
5309	76					181	$i++;
5310							}
5311
5312							# The only current tetragraph is the double diamond operator
5313							# and its first three characters are not a trigraph, so
5314							# we do can do a special test for it
5315							elsif ( $test_tok eq '<<>' ) {
5316	1					3	$test_tok .= $rtokens->[ $i + 2 ];
5317	1	50				5	if ( $is_tetragraph{$test_tok} ) {
5318	1					2	$tok = $test_tok;
5319	1					3	$i += 2;
5320							}
5321							}
5322							}
5323							}
5324
5325	34741					49705	$type = $tok;
5326	34741					57544	$next_tok = $rtokens->[ $i + 1 ];
5327	34741					53018	$next_type = $rtoken_type->[ $i + 1 ];
5328
5329	34741					43615	DEBUG_TOKENIZE && do {
5330							local $LIST_SEPARATOR = ')(';
5331							my @debug_list = (
5332							$last_nonblank_token, $tok,
5333							$next_tok, $brace_depth,
5334							$rbrace_type->[$brace_depth], $paren_depth,
5335							$rparen_type->[$paren_depth],
5336							);
5337							print STDOUT "TOKENIZE:(@debug_list)\n";
5338							};
5339
5340							# Turn off attribute list on first non-blank, non-bareword.
5341							# Added '#' to fix c038 (later moved above).
5342	34741	100	100			103304	if ( $pre_type ne 'w' && $self->[_in_attribute_list_] ) {
5343	39					90	$self->[_in_attribute_list_] = 0;
5344							}
5345
5346							#--------------------------------------------------------
5347							# We have the next token, $tok.
5348							# Now we have to examine this token and decide what it is
5349							# and define its $type
5350							#
5351							# section 1: bare words
5352							#--------------------------------------------------------
5353
5354	34741	100				72622	if ( $pre_type eq 'w' ) {
		100
5355	5826					21455	$expecting =
5356							$self->operator_expected( [ $prev_type, $tok, $next_type ] );
5357	5826					18321	my $is_last = $self->do_BAREWORD($is_END_or_DATA);
5358	5826	100				17438	last if ($is_last);
5359							}
5360
5361							#-----------------------------
5362							# section 2: strings of digits
5363							#-----------------------------
5364							elsif ( $pre_type eq 'd' ) {
5365	1929					7459	$expecting =
5366							$self->operator_expected( [ $prev_type, $tok, $next_type ] );
5367	1929					6483	$self->do_DIGITS();
5368							}
5369
5370							#----------------------------
5371							# section 3: all other tokens
5372							#----------------------------
5373							else {
5374	26986					55688	my $code = $tokenization_code->{$tok};
5375	26986	100				51207	if ($code) {
5376	25261					86658	$expecting =
5377							$self->operator_expected(
5378							[ $prev_type, $tok, $next_type ] );
5379	25261					89188	$code->($self);
5380	25261	100				68527	redo if $in_quote;
5381							}
5382							}
5383							}
5384
5385							# -----------------------------
5386							# end of main tokenization loop
5387							# -----------------------------
5388
5389							# Store the final token
5390	5895	100				12912	if ( $i_tok >= 0 ) {
5391	5757					12608	$routput_token_type->[$i_tok] = $type;
5392	5757					10434	$routput_block_type->[$i_tok] = $block_type;
5393	5757					10051	$routput_container_type->[$i_tok] = $container_type;
5394	5757					10329	$routput_type_sequence->[$i_tok] = $type_sequence;
5395	5757					9833	$routput_indent_flag->[$i_tok] = $indent_flag;
5396							}
5397
5398							# Remember last nonblank values
5399	5895	100	100			21242	if ( $type ne 'b' && $type ne '#' ) {
5400	5422					9194	$last_last_nonblank_token = $last_nonblank_token;
5401	5422					7841	$last_last_nonblank_type = $last_nonblank_type;
5402	5422					8268	$last_last_nonblank_block_type = $last_nonblank_block_type;
5403	5422					8269	$last_last_nonblank_container_type = $last_nonblank_container_type;
5404	5422					8162	$last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
5405	5422					7766	$last_nonblank_token = $tok;
5406	5422					7625	$last_nonblank_type = $type;
5407	5422					7866	$last_nonblank_block_type = $block_type;
5408	5422					7660	$last_nonblank_container_type = $container_type;
5409	5422					8001	$last_nonblank_type_sequence = $type_sequence;
5410	5422					8327	$last_nonblank_prototype = $prototype;
5411							}
5412
5413							# reset indentation level if necessary at a sub or package
5414							# in an attempt to recover from a nesting error
5415	5895	50				12329	if ( $level_in_tokenizer < 0 ) {
5416	0	0				0	if ( $input_line =~ /^\s*(sub\|package)\s+(\w+)/ ) {
5417	0					0	reset_indentation_level(0);
5418	0					0	$self->brace_warning("resetting level to 0 at $1 $2\n");
5419							}
5420							}
5421
5422	5895					9895	$self->[_in_quote_] = $in_quote;
5423	5895	100				13080	$self->[_quote_target_] =
5424							$in_quote ? matching_end_token($quote_character) : EMPTY_STRING;
5425	5895					10444	$self->[_rhere_target_list_] = $rhere_target_list;
5426
5427	5895					10019	return;
5428							} ## end sub tokenizer_main_loop
5429
5430							sub OLD_tokenizer_wrapup_line {
5431	0			0	0	0	my ( $self, $line_of_tokens ) = @_;
5432
5433							#---------------------------------------------------------
5434							# Package a line of tokens for shipping back to the caller
5435							#---------------------------------------------------------
5436
5437							# NOTE: This routine is retained for testing purposes only; it should
5438							# be removed by about 2025. Until then, it can be called for testing
5439							# with -exp=ci0 or -exp=ci1.
5440
5441							# Most of the remaining work involves defining the two indentation
5442							# parameters that the formatter needs for each token:
5443							# - $level = structural indentation level and
5444							# - $ci_level = continuation indentation level
5445
5446							# The method for setting the indentation level is straightforward.
5447							# But the method used to define the continuation indentation is
5448							# complicated because it has evolved over a long time by trial and
5449							# error. It could undoubtedly be simplified but it works okay as is.
5450
5451							# Here is a brief description of how indentation is computed.
5452							# Perl::Tidy computes indentation as the sum of 2 terms:
5453							#
5454							# (1) structural indentation, such as if/else/elsif blocks
5455							# (2) continuation indentation, such as long parameter call lists.
5456							#
5457							# These are occasionally called primary and secondary indentation.
5458							#
5459							# Structural indentation is introduced by tokens of type '{',
5460							# although the actual tokens might be '{', '(', or '['. Structural
5461							# indentation is of two types: BLOCK and non-BLOCK. Default
5462							# structural indentation is 4 characters if the standard indentation
5463							# scheme is used.
5464							#
5465							# Continuation indentation is introduced whenever a line at BLOCK
5466							# level is broken before its termination. Default continuation
5467							# indentation is 2 characters in the standard indentation scheme.
5468							#
5469							# Both types of indentation may be nested arbitrarily deep and
5470							# interlaced. The distinction between the two is somewhat arbitrary.
5471							#
5472							# For each token, we will define two variables which would apply if
5473							# the current statement were broken just before that token, so that
5474							# that token started a new line:
5475							#
5476							# $level = the structural indentation level,
5477							# $ci_level = the continuation indentation level
5478							#
5479							# The total indentation will be $level * (4 spaces) + $ci_level * (2
5480							# spaces), assuming defaults. However, in some special cases it is
5481							# customary to modify $ci_level from this strict value.
5482							#
5483							# The total structural indentation is easy to compute by adding and
5484							# subtracting 1 from a saved value as types '{' and '}' are seen.
5485							# The running value of this variable is $level_in_tokenizer.
5486							#
5487							# The total continuation is much more difficult to compute, and
5488							# requires several variables. These variables are:
5489							#
5490							# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
5491							# each indentation level, if there are intervening open secondary
5492							# structures just prior to that level.
5493							# $continuation_string_in_tokenizer = a string of 1's and 0's
5494							# indicating if the last token at that level is "continued", meaning
5495							# that it is not the first token of an expression.
5496							# $nesting_block_string = a string of 1's and 0's indicating, for each
5497							# indentation level, if the level is of type BLOCK or not.
5498							# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
5499							# $nesting_list_string = a string of 1's and 0's indicating, for each
5500							# indentation level, if it is appropriate for list formatting.
5501							# If so, continuation indentation is used to indent long list items.
5502							# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
5503							# @{$rslevel_stack} = a stack of total nesting depths at each
5504							# structural indentation level, where "total nesting depth" means
5505							# the nesting depth that would occur if every nesting token
5506							# -- '{', '[', # and '(' -- , regardless of context, is used to
5507							# compute a nesting depth.
5508
5509							# Notes on the Continuation Indentation
5510							#
5511							# There is a sort of chicken-and-egg problem with continuation
5512							# indentation. The formatter can't make decisions on line breaks
5513							# without knowing what 'ci' will be at arbitrary locations.
5514							#
5515							# But a problem with setting the continuation indentation (ci) here
5516							# in the tokenizer is that we do not know where line breaks will
5517							# actually be. As a result, we don't know if we should propagate
5518							# continuation indentation to higher levels of structure.
5519							#
5520							# For nesting of only structural indentation, we never need to do
5521							# this. For example, in a long if statement, like this
5522							#
5523							# if ( !$output_block_type[$i]
5524							# && ($in_statement_continuation) )
5525							# { <--outdented
5526							# do_something();
5527							# }
5528							#
5529							# the second line has ci but we do normally give the lines within
5530							# the BLOCK any ci. This would be true if we had blocks nested
5531							# arbitrarily deeply.
5532							#
5533							# But consider something like this, where we have created a break
5534							# after an opening paren on line 1, and the paren is not (currently)
5535							# a structural indentation token:
5536							#
5537							# my $file = $menubar->Menubutton(
5538							# qw/-text File -underline 0 -menuitems/ => [
5539							# [
5540							# Cascade => '~View',
5541							# -menuitems => [
5542							# ...
5543							#
5544							# The second line has ci, so it would seem reasonable to propagate
5545							# it down, giving the third line 1 ci + 1 indentation. This
5546							# suggests the following rule, which is currently used to
5547							# propagating ci down: if there are any non-structural opening
5548							# parens (or brackets, or braces), before an opening structural
5549							# brace, then ci is propagated down, and otherwise
5550							# not. The variable $intervening_secondary_structure contains this
5551							# information for the current token, and the string
5552							# "$ci_string_in_tokenizer" is a stack of previous values of this
5553							# variable.
5554
5555	0					0	my @token_type = (); # stack of output token types
5556	0					0	my @block_type = (); # stack of output code block types
5557	0					0	my @type_sequence = (); # stack of output type sequence numbers
5558	0					0	my @tokens = (); # output tokens
5559	0					0	my @levels = (); # structural brace levels of output tokens
5560	0					0	my @ci_string = (); # string needed to compute continuation indentation
5561
5562							# Count the number of '1's in the string (previously sub ones_count)
5563	0					0	my $ci_string_sum = ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
5564
5565	0					0	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
5566
5567	0					0	my ( $ci_string_i, $level_i );
5568
5569							#-----------------
5570							# Loop over tokens
5571							#-----------------
5572	0					0	my $rtoken_map_im;
5573	0					0	foreach my $i ( @{$routput_token_list} ) {
	0					0
5574
5575	0					0	my $type_i = $routput_token_type->[$i];
5576	0					0	$level_i = $level_in_tokenizer;
5577
5578							# Quick handling of indentation levels for blanks and comments
5579	0	0	0			0	if ( $type_i eq 'b' \|\| $type_i eq '#' ) {
5580	0					0	$ci_string_i = $ci_string_sum + $in_statement_continuation;
5581							}
5582
5583							# All other types
5584							else {
5585
5586							# $tok_i is the PRE-token. It only equals the token for symbols
5587	0					0	my $tok_i = $rtokens->[$i];
5588
5589							# Check for an invalid token type..
5590							# This can happen by running perltidy on non-scripts although
5591							# it could also be bug introduced by programming change. Perl
5592							# silently accepts a 032 (^Z) and takes it as the end
5593	0	0				0	if ( !$is_valid_token_type{$type_i} ) {
5594	0					0	my $val = ord($type_i);
5595	0					0	$self->warning(
5596							"unexpected character decimal $val ($type_i) in script\n"
5597							);
5598	0					0	$self->[_in_error_] = 1;
5599							}
5600
5601							# $ternary_indentation_flag indicates that we need a change
5602							# in level at a nested ternary, as follows
5603							# 1 => at a nested ternary ?
5604							# -1 => at a nested ternary :
5605							# 0 => otherwise
5606	0					0	my $ternary_indentation_flag = $routput_indent_flag->[$i];
5607
5608							#-------------------------------------------
5609							# Section 1: handle a level-increasing token
5610							#-------------------------------------------
5611							# set primary indentation levels based on structural braces
5612							# Note: these are set so that the leading braces have a HIGHER
5613							# level than their CONTENTS, which is convenient for indentation
5614							# Also, define continuation indentation for each token.
5615	0	0	0			0	if ( $type_i eq '{'
		0	0
			0
			0
5616							\|\| $type_i eq 'L'
5617							\|\| $ternary_indentation_flag > 0 )
5618							{
5619
5620							# if the difference between total nesting levels is not 1,
5621							# there are intervening non-structural nesting types between
5622							# this '{' and the previous unclosed '{'
5623	0					0	my $intervening_secondary_structure = 0;
5624	0	0				0	if ( @{$rslevel_stack} ) {
	0					0
5625	0					0	$intervening_secondary_structure =
5626							$slevel_in_tokenizer - $rslevel_stack->[-1];
5627							}
5628
5629							# save the current states
5630	0					0	push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
	0					0
5631	0					0	$level_in_tokenizer++;
5632
5633							##NOTE: _maximum_level_ does not seem to be needed now
5634	0	0				0	if ( $level_in_tokenizer > $self->[_maximum_level_] ) {
5635	0					0	$self->[_maximum_level_] = $level_in_tokenizer;
5636							}
5637
5638	0	0				0	if ($ternary_indentation_flag) {
5639
5640							# break BEFORE '?' in a nested ternary
5641	0	0				0	if ( $type_i eq '?' ) {
5642	0					0	$level_i = $level_in_tokenizer;
5643							}
5644
5645	0					0	$nesting_block_string .= "$nesting_block_flag";
5646							} ## end if ($ternary_indentation_flag)
5647							else {
5648
5649	0	0				0	if ( $routput_block_type->[$i] ) {
5650	0					0	$nesting_block_flag = 1;
5651	0					0	$nesting_block_string .= '1';
5652							}
5653							else {
5654	0					0	$nesting_block_flag = 0;
5655	0					0	$nesting_block_string .= '0';
5656							}
5657							}
5658
5659							# we will use continuation indentation within containers
5660							# which are not blocks and not logical expressions
5661	0					0	my $bit = 0;
5662	0	0				0	if ( !$routput_block_type->[$i] ) {
5663
5664							# propagate flag down at nested open parens
5665	0	0				0	if ( $routput_container_type->[$i] eq '(' ) {
5666	0	0				0	$bit = 1 if $nesting_list_flag;
5667							}
5668
5669							# use list continuation if not a logical grouping
5670							# /^(if\|elsif\|unless\|while\|and\|or\|not\|&&\|!\|\\|\\|\|for\|foreach)$/
5671							else {
5672							$bit = 1
5673							unless
5674	0	0				0	$is_logical_container{ $routput_container_type
5675							->[$i] };
5676							}
5677							}
5678	0					0	$nesting_list_string .= $bit;
5679	0					0	$nesting_list_flag = $bit;
5680
5681	0	0				0	$ci_string_in_tokenizer .=
5682							( $intervening_secondary_structure != 0 ) ? '1' : '0';
5683	0					0	$ci_string_sum =
5684							( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
5685	0	0				0	$continuation_string_in_tokenizer .=
5686							( $in_statement_continuation > 0 ) ? '1' : '0';
5687
5688							# Sometimes we want to give an opening brace
5689							# continuation indentation, and sometimes not. For code
5690							# blocks, we don't do it, so that the leading '{' gets
5691							# outdented, like this:
5692							#
5693							# if ( !$output_block_type[$i]
5694							# && ($in_statement_continuation) )
5695							# { <--outdented
5696							#
5697							# For other types, we will give them continuation
5698							# indentation. For example, here is how a list looks
5699							# with the opening paren indented:
5700							#
5701							# @LoL =
5702							# ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
5703							# [ "homer", "marge", "bart" ], );
5704							#
5705							# This looks best when 'ci' is one-half of the
5706							# indentation (i.e., 2 and 4)
5707
5708	0					0	my $total_ci = $ci_string_sum;
5709	0	0	0			0	if (
			0
			0
5710							!$routput_block_type->[$i] # patch: skip for BLOCK
5711							&& ($in_statement_continuation)
5712							&& !( $ternary_indentation_flag && $type_i eq ':' )
5713							)
5714							{
5715	0	0				0	$total_ci += $in_statement_continuation
5716							unless (
5717							substr( $ci_string_in_tokenizer, -1 ) eq '1' );
5718							}
5719
5720	0					0	$ci_string_i = $total_ci;
5721	0					0	$in_statement_continuation = 0;
5722							} ## end if ( $type_i eq '{' \|\|...})
5723
5724							#-------------------------------------------
5725							# Section 2: handle a level-decreasing token
5726							#-------------------------------------------
5727							elsif ($type_i eq '}'
5728							\|\| $type_i eq 'R'
5729							\|\| $ternary_indentation_flag < 0 )
5730							{
5731
5732							# only a nesting error in the script would prevent
5733							# popping here
5734	0	0				0	if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
	0					0
	0					0
	0					0
5735
5736	0					0	$level_i = --$level_in_tokenizer;
5737
5738	0	0				0	if ( $level_in_tokenizer < 0 ) {
5739	0	0				0	unless ( $self->[_saw_negative_indentation_] ) {
5740	0					0	$self->[_saw_negative_indentation_] = 1;
5741	0					0	$self->warning("Starting negative indentation\n");
5742							}
5743							}
5744
5745							# restore previous level values
5746	0	0				0	if ( length($nesting_block_string) > 1 )
5747							{ # true for valid script
5748	0					0	chop $nesting_block_string;
5749	0					0	$nesting_block_flag =
5750							substr( $nesting_block_string, -1 ) eq '1';
5751	0					0	chop $nesting_list_string;
5752	0					0	$nesting_list_flag =
5753							substr( $nesting_list_string, -1 ) eq '1';
5754
5755	0					0	chop $ci_string_in_tokenizer;
5756	0					0	$ci_string_sum =
5757							( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
5758
5759	0					0	$in_statement_continuation =
5760							chop $continuation_string_in_tokenizer;
5761
5762							# zero continuation flag at terminal BLOCK '}' which
5763							# ends a statement.
5764	0					0	my $block_type_i = $routput_block_type->[$i];
5765	0	0				0	if ($block_type_i) {
		0
5766
5767							# ...These include non-anonymous subs
5768							# note: could be sub ::abc { or sub 'abc
5769	0	0	0			0	if ( substr( $block_type_i, 0, 3 ) eq 'sub'
		0	0
		0
		0
5770							&& $block_type_i =~ m/^sub\s*/gc )
5771							{
5772
5773							# note: older versions of perl require the /gc
5774							# modifier here or else the \G does not work.
5775	0	0				0	$in_statement_continuation = 0
5776							if ( $block_type_i =~ /\G('\|::\|\w)/gc );
5777							}
5778
5779							# ...and include all block types except user subs
5780							# with block prototypes and these:
5781							# (sort\|grep\|map\|do\|eval)
5782							elsif (
5783							$is_zero_continuation_block_type{$block_type_i}
5784							)
5785							{
5786	0					0	$in_statement_continuation = 0;
5787							}
5788
5789							# ..but these are not terminal types:
5790							# /^(sort\|grep\|map\|do\|eval)$/ )
5791							elsif ($is_sort_map_grep_eval_do{$block_type_i}
5792							\|\| $is_grep_alias{$block_type_i} )
5793							{
5794							}
5795
5796							# ..and a block introduced by a label
5797							# /^\w+\s*:$/gc ) {
5798							elsif ( $block_type_i =~ /:$/ ) {
5799	0					0	$in_statement_continuation = 0;
5800							}
5801
5802							# user function with block prototype
5803							else {
5804	0					0	$in_statement_continuation = 0;
5805							}
5806							} ## end if ($block_type_i)
5807
5808							# If we are in a list, then
5809							# we must set continuation indentation at the closing
5810							# paren of something like this (paren after $check):
5811							# assert(
5812							# __LINE__,
5813							# ( not defined $check )
5814							# or ref $check
5815							# or $check eq "new"
5816							# or $check eq "old",
5817							# );
5818							elsif ( $tok_i eq ')' ) {
5819							$in_statement_continuation = 1
5820							if (
5821							$is_list_end_type{
5822	0	0				0	$routput_container_type->[$i]
5823							}
5824							);
5825							##if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
5826							}
5827							} ## end if ( length($nesting_block_string...))
5828
5829	0					0	$ci_string_i = $ci_string_sum + $in_statement_continuation;
5830							} ## end elsif ( $type_i eq '}' \|\|...{)
5831
5832							#-----------------------------------------
5833							# Section 3: handle a constant level token
5834							#-----------------------------------------
5835							else {
5836
5837							# zero the continuation indentation at certain tokens so
5838							# that they will be at the same level as its container. For
5839							# commas, this simplifies the -lp indentation logic, which
5840							# counts commas. For ?: it makes them stand out.
5841	0	0	0			0	if (
5842							$nesting_list_flag
5843							## $type_i =~ /^[,\?\:]$/
5844							&& $is_comma_question_colon{$type_i}
5845							)
5846							{
5847	0					0	$in_statement_continuation = 0;
5848							}
5849
5850							# Be sure binary operators get continuation indentation.
5851							# Note: the check on $nesting_block_flag is only needed
5852							# to add ci to binary operators following a 'try' block,
5853							# or similar extended syntax block operator (see c158).
5854	0	0	0			0	if (
			0
			0
			0
5855							!$in_statement_continuation
5856							&& ( $nesting_block_flag \|\| $nesting_list_flag )
5857							&& ( $type_i eq 'k' && $is_binary_keyword{$tok_i}
5858							\|\| $is_binary_type{$type_i} )
5859							)
5860							{
5861	0					0	$in_statement_continuation = 1;
5862							}
5863
5864							# continuation indentation is sum of any open ci from
5865							# previous levels plus the current level
5866	0					0	$ci_string_i = $ci_string_sum + $in_statement_continuation;
5867
5868							# update continuation flag ...
5869
5870							# if we are in a BLOCK
5871	0	0				0	if ($nesting_block_flag) {
5872
5873							# the next token after a ';' and label starts a new stmt
5874	0	0	0			0	if ( $type_i eq ';' \|\| $type_i eq 'J' ) {
5875	0					0	$in_statement_continuation = 0;
5876							}
5877
5878							# otherwise, we are continuing the current statement
5879							else {
5880	0					0	$in_statement_continuation = 1;
5881							}
5882							}
5883
5884							# if we are not in a BLOCK..
5885							else {
5886
5887							# do not use continuation indentation if not list
5888							# environment (could be within if/elsif clause)
5889	0	0	0			0	if ( !$nesting_list_flag ) {
		0
5890	0					0	$in_statement_continuation = 0;
5891							}
5892
5893							# otherwise, the token after a ',' starts a new term
5894
5895							# Patch FOR RT#99961; no continuation after a ';'
5896							# This is needed because perltidy currently marks
5897							# a block preceded by a type character like % or @
5898							# as a non block, to simplify formatting. But these
5899							# are actually blocks and can have semicolons.
5900							# See code_block_type() and is_non_structural_brace().
5901							elsif ( $type_i eq ',' \|\| $type_i eq ';' ) {
5902	0					0	$in_statement_continuation = 0;
5903							}
5904
5905							# otherwise, we are continuing the current term
5906							else {
5907	0					0	$in_statement_continuation = 1;
5908							}
5909							} ## end else [ if ($nesting_block_flag)]
5910
5911							} ## end else [ if ( $type_i eq '{' \|\|...})]
5912
5913							#-------------------------------------------
5914							# Section 4: operations common to all levels
5915							#-------------------------------------------
5916
5917							# set secondary nesting levels based on all containment token
5918							# types Note: these are set so that the nesting depth is the
5919							# depth of the PREVIOUS TOKEN, which is convenient for setting
5920							# the strength of token bonds
5921
5922							# /^[L\{\(\[]$/
5923	0	0				0	if ( $is_opening_type{$type_i} ) {
		0
5924	0					0	$slevel_in_tokenizer++;
5925	0					0	$nesting_token_string .= $tok_i;
5926	0					0	$nesting_type_string .= $type_i;
5927							}
5928
5929							# /^[R\}\)\]]$/
5930							elsif ( $is_closing_type{$type_i} ) {
5931	0					0	$slevel_in_tokenizer--;
5932	0					0	my $char = chop $nesting_token_string;
5933
5934	0	0				0	if ( $char ne $matching_start_token{$tok_i} ) {
5935	0					0	$nesting_token_string .= $char . $tok_i;
5936	0					0	$nesting_type_string .= $type_i;
5937							}
5938							else {
5939	0					0	chop $nesting_type_string;
5940							}
5941							}
5942
5943							# apply token type patch:
5944							# - output anonymous 'sub' as keyword (type 'k')
5945							# - output __END__, __DATA__, and format as type 'k' instead
5946							# of ';' to make html colors correct, etc.
5947							# The following hash tests are equivalent to these older tests:
5948							# if ( $type_i eq 't' && $is_sub{$tok_i} ) { $fix_type = 'k' }
5949							# if ( $type_i eq ';' && $tok_i =~ /\w/ ) { $fix_type = 'k' }
5950	0	0	0			0	if ( $is_END_DATA_format_sub{$tok_i}
5951							&& $is_semicolon_or_t{$type_i} )
5952							{
5953	0					0	$type_i = 'k';
5954							}
5955							} ## end else [ if ( $type_i eq 'b' \|\|...)]
5956
5957							#--------------------------------
5958							# Store the values for this token
5959							#--------------------------------
5960	0	0				0	push( @ci_string, $ci_string_i ? 1 : 0 ); # clip ci to 1
5961	0					0	push( @levels, $level_i );
5962	0					0	push( @block_type, $routput_block_type->[$i] );
5963	0					0	push( @type_sequence, $routput_type_sequence->[$i] );
5964	0					0	push( @token_type, $type_i );
5965
5966							# Form and store the PREVIOUS token
5967	0	0				0	if ( defined($rtoken_map_im) ) {
5968	0					0	my $numc =
5969							$rtoken_map->[$i] - $rtoken_map_im; # how many characters
5970
5971	0	0				0	if ( $numc > 0 ) {
5972	0					0	push( @tokens,
5973							substr( $input_line, $rtoken_map_im, $numc ) );
5974							}
5975							else {
5976
5977							# Should not happen unless @{$rtoken_map} is corrupted
5978	0					0	DEVEL_MODE
5979							&& $self->Fault(
5980							"number of characters is '$numc' but should be >0\n");
5981							}
5982							}
5983
5984							# or grab some values for the leading token (needed for log output)
5985							else {
5986	0					0	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
5987							}
5988
5989	0					0	$rtoken_map_im = $rtoken_map->[$i];
5990							} ## end foreach my $i ( @{$routput_token_list...})
5991
5992							#------------------------
5993							# End loop to over tokens
5994							#------------------------
5995
5996							# Form and store the final token of this line
5997	0	0				0	if ( defined($rtoken_map_im) ) {
5998	0					0	my $numc = length($input_line) - $rtoken_map_im;
5999	0	0				0	if ( $numc > 0 ) {
6000	0					0	push( @tokens, substr( $input_line, $rtoken_map_im, $numc ) );
6001							}
6002							else {
6003
6004							# Should not happen unless @{$rtoken_map} is corrupted
6005	0					0	DEVEL_MODE
6006							&& $self->Fault(
6007							"Number of Characters is '$numc' but should be >0\n");
6008							}
6009							}
6010
6011							#----------------------------------------------------------
6012							# Wrap up this line of tokens for shipping to the Formatter
6013							#----------------------------------------------------------
6014	0					0	$line_of_tokens->{_rtoken_type} = \@token_type;
6015	0					0	$line_of_tokens->{_rtokens} = \@tokens;
6016	0					0	$line_of_tokens->{_rblock_type} = \@block_type;
6017	0					0	$line_of_tokens->{_rtype_sequence} = \@type_sequence;
6018	0					0	$line_of_tokens->{_rlevels} = \@levels;
6019	0					0	$line_of_tokens->{_rci_levels} = \@ci_string;
6020
6021	0					0	return;
6022							} ## end sub OLD_tokenizer_wrapup_line
6023
6024							sub tokenizer_wrapup_line {
6025	5895			5895	0	11401	my ( $self, $line_of_tokens ) = @_;
6026
6027							#---------------------------------------------------------
6028							# Package a line of tokens for shipping back to the caller
6029							#---------------------------------------------------------
6030
6031							# Note: This is the new version of this routine. It does not compute
6032							# continuation indentation; it returns values ci=0. The ci values
6033							# are computed later by sub Formatter::set_ci.
6034
6035							# Arrays to hold token values for this line:
6036	5895					10447	my @levels = (); # structural brace levels of output tokens
6037	5895					8740	my @block_type = (); # stack of output code block types
6038	5895					9314	my @type_sequence = (); # stack of output type sequence numbers
6039	5895					8435	my @token_type = (); # stack of output token types
6040	5895					8485	my @tokens = (); # output tokens
6041
6042	5895					13816	$line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
6043
6044							# Remember starting nesting block string
6045	5895					9470	my $nesting_block_string_0 = $nesting_block_string;
6046
6047							#-----------------
6048							# Loop over tokens
6049							#-----------------
6050	5895					8279	my $rtoken_map_im;
6051
6052							# $i is the index of the pretoken which starts this full token
6053	5895					9164	foreach my $i ( @{$routput_token_list} ) {
	5895					12814
6054
6055	50578					73883	my $type_i = $routput_token_type->[$i];
6056
6057							#--------------------------------
6058							# 1. Handle a non-sequenced token
6059							#--------------------------------
6060	50578	100				79824	if ( !$routput_type_sequence->[$i] ) {
6061
6062							# 1.1 types ';' and 't'
6063							# - output anonymous 'sub' as keyword (type 'k')
6064							# - output __END__, __DATA__, and format as type 'k' instead
6065							# of ';' to make html colors correct, etc.
6066	41456	100				96097	if ( $is_semicolon_or_t{$type_i} ) {
		50
6067	2674					6262	my $tok_i = $rtokens->[$i];
6068	2674	100				7504	if ( $is_END_DATA_format_sub{$tok_i} ) {
6069	172					536	$type_i = 'k';
6070							}
6071							}
6072
6073							# 1.2 Check for an invalid token type..
6074							# This can happen by running perltidy on non-scripts although
6075							# it could also be bug introduced by programming change. Perl
6076							# silently accepts a 032 (^Z) and takes it as the end
6077							elsif ( !$is_valid_token_type{$type_i} ) {
6078	0					0	my $val = ord($type_i);
6079	0					0	$self->warning(
6080							"unexpected character decimal $val ($type_i) in script\n"
6081							);
6082	0					0	$self->[_in_error_] = 1;
6083							}
6084
6085							# Store values for a non-sequenced token
6086	41456					67147	push( @levels, $level_in_tokenizer );
6087	41456					62770	push( @block_type, EMPTY_STRING );
6088	41456					60594	push( @type_sequence, EMPTY_STRING );
6089	41456					78946	push( @token_type, $type_i );
6090
6091							}
6092
6093							#----------------------------
6094							# 2. Handle a sequenced token
6095							# One of { [ ( ? ) ] } :
6096							#----------------------------
6097							else {
6098
6099							# $level_i is the level we will store. Levels of braces are
6100							# set so that the leading braces have a HIGHER level than their
6101							# CONTENTS, which is convenient for indentation.
6102	9122					13706	my $level_i = $level_in_tokenizer;
6103
6104							# $tok_i is the PRE-token. It only equals the token for symbols
6105	9122					14365	my $tok_i = $rtokens->[$i];
6106
6107							# $routput_indent_flag->[$i] indicates that we need a change
6108							# in level at a nested ternary, as follows
6109							# 1 => at a nested ternary ?
6110							# -1 => at a nested ternary :
6111							# 0 => otherwise
6112
6113							#------------------------------------
6114							# 2.1 handle a level-increasing token
6115							#------------------------------------
6116	9122	100				24292	if ( $is_opening_or_ternary_type{$type_i} ) {
		50
6117
6118	4561	100				9089	if ( $type_i eq '?' ) {
6119
6120	187	100				927	if ( $routput_indent_flag->[$i] > 0 ) {
6121	8					19	$level_in_tokenizer++;
6122
6123							# break BEFORE '?' in a nested ternary
6124	8					14	$level_i = $level_in_tokenizer;
6125	8					21	$nesting_block_string .= "$nesting_block_flag";
6126
6127							}
6128							}
6129							else {
6130
6131	4374					7406	$nesting_token_string .= $tok_i;
6132
6133	4374	100	100			12255	if ( $type_i eq '{' \|\| $type_i eq 'L' ) {
6134
6135	4067					5926	$level_in_tokenizer++;
6136
6137	4067	100				8046	if ( $routput_block_type->[$i] ) {
6138	967					1964	$nesting_block_flag = 1;
6139	967					1939	$nesting_block_string .= '1';
6140							}
6141							else {
6142	3100					5260	$nesting_block_flag = 0;
6143	3100					5535	$nesting_block_string .= '0';
6144							}
6145							}
6146							}
6147							}
6148
6149							#------------------------------------
6150							# 2.2 handle a level-decreasing token
6151							#------------------------------------
6152							elsif ( $is_closing_or_ternary_type{$type_i} ) {
6153
6154	4561	100				10264	if ( $type_i ne ':' ) {
6155	4374					8216	my $char = chop $nesting_token_string;
6156	4374	50				11282	if ( $char ne $matching_start_token{$tok_i} ) {
6157	0					0	$nesting_token_string .= $char . $tok_i;
6158							}
6159							}
6160
6161	4561	100	100			15286	if (
			100
			100
6162							$type_i eq '}'
6163							\|\| $type_i eq 'R'
6164
6165							# only the second and higher ? : have levels
6166							\|\| $type_i eq ':' && $routput_indent_flag->[$i] < 0
6167							)
6168							{
6169
6170	4075					6309	$level_i = --$level_in_tokenizer;
6171
6172	4075	50				7994	if ( $level_in_tokenizer < 0 ) {
6173	0	0				0	unless ( $self->[_saw_negative_indentation_] ) {
6174	0					0	$self->[_saw_negative_indentation_] = 1;
6175	0					0	$self->warning(
6176							"Starting negative indentation\n");
6177							}
6178							}
6179
6180							# restore previous level values
6181	4075	50				8824	if ( length($nesting_block_string) > 1 )
6182							{ # true for valid script
6183	4075					5882	chop $nesting_block_string;
6184	4075					7973	$nesting_block_flag =
6185							substr( $nesting_block_string, -1 ) eq '1';
6186							}
6187
6188							}
6189							}
6190
6191							#-------------------------------------------------------
6192							# 2.3 Unexpected sequenced token type - shouldn't happen
6193							#-------------------------------------------------------
6194							else {
6195
6196							# The tokenizer should only be assigning sequence numbers
6197							# to types { [ ( ? ) ] } :
6198	0					0	DEVEL_MODE && $self->Fault(<<EOM);
6199							unexpected sequence number on token type $type_i with pre-tok=$tok_i
6200							EOM
6201							}
6202
6203							# The starting nesting block string, which is used in any .LOG
6204							# output, should include the first token of the line
6205	9122	100				18290	if ( !@levels ) {
6206	1574					2860	$nesting_block_string_0 = $nesting_block_string;
6207							}
6208
6209							# Store values for a sequenced token
6210	9122					17077	push( @levels, $level_i );
6211	9122					17407	push( @block_type, $routput_block_type->[$i] );
6212	9122					15666	push( @type_sequence, $routput_type_sequence->[$i] );
6213	9122					19385	push( @token_type, $type_i );
6214
6215							}
6216
6217							}
6218
6219							# End loop to over tokens
6220
6221	5895					14565	$line_of_tokens->{_nesting_blocks_0} = $nesting_block_string_0;
6222
6223							#--------------------------
6224							# Form and store the tokens
6225							#--------------------------
6226	5895	50				13742	if (@levels) {
6227
6228	5895					8211	my $im = shift @{$routput_token_list};
	5895					11201
6229	5895					10941	my $offset = $rtoken_map->[$im];
6230	5895					8588	foreach my $i ( @{$routput_token_list} ) {
	5895					11000
6231	44683					58561	my $numc = $rtoken_map->[$i] - $offset;
6232	44683					80435	push( @tokens, substr( $input_line, $offset, $numc ) );
6233
6234	44683					53844	if ( DEVEL_MODE && $numc <= 0 ) {
6235
6236							# Should not happen unless @{$rtoken_map} is corrupted
6237							$self->Fault(
6238							"number of characters is '$numc' but should be >0\n");
6239							}
6240	44683					65556	$offset = $rtoken_map->[$i];
6241							}
6242
6243							# Form and store the final token of this line
6244	5895					11594	my $numc = length($input_line) - $offset;
6245	5895					12837	push( @tokens, substr( $input_line, $offset, $numc ) );
6246
6247	5895					9321	if ( DEVEL_MODE && $numc <= 0 ) {
6248							$self->Fault(
6249							"Number of Characters is '$numc' but should be >0\n");
6250							}
6251							}
6252
6253							# This sub returns zero ci values
6254	5895					20735	my @ci_levels = (0) x scalar(@levels);
6255
6256							#----------------------------------------------------------
6257							# Wrap up this line of tokens for shipping to the Formatter
6258							#----------------------------------------------------------
6259	5895					13743	$line_of_tokens->{_rtoken_type} = \@token_type;
6260	5895					11461	$line_of_tokens->{_rtokens} = \@tokens;
6261	5895					12090	$line_of_tokens->{_rblock_type} = \@block_type;
6262	5895					11047	$line_of_tokens->{_rtype_sequence} = \@type_sequence;
6263	5895					18987	$line_of_tokens->{_rlevels} = \@levels;
6264	5895					11569	$line_of_tokens->{_rci_levels} = \@ci_levels;
6265
6266	5895					16354	return;
6267							} ## end sub tokenizer_wrapup_line
6268
6269							} ## end tokenize_this_line
6270
6271							#######################################################################
6272							# Tokenizer routines which assist in identifying token types
6273							#######################################################################
6274
6275							# hash lookup table of operator expected values
6276							my %op_expected_table;
6277
6278							# exceptions to perl's weird parsing rules after type 'Z'
6279							my %is_weird_parsing_rule_exception;
6280
6281							my %is_paren_dollar;
6282
6283							my %is_n_v;
6284
6285							BEGIN {
6286
6287							# Always expecting TERM following these types:
6288							# note: this is identical to '@value_requestor_type' defined later.
6289	38			38		582	my @q = qw(
6290							; ! + x & ? F J - p / Y : % f U ~ A G j L * . \| ^ < = [ m { \ > t
6291							\|\| >= != mm = => .. !~ == && \|= .= pp -= =~ += <= %= ^= x= ~~ * << /=
6292							&= // >> ~. &. \|. ^.
6293							... **= <<= >>= &&= \|\|= //= <=> !~~ &.= \|.= ^.= <<~
6294							);
6295	38					161	push @q, ',';
6296	38					77	push @q, '('; # for completeness, not currently a token type
6297	38					71	push @q, '->'; # was previously in UNKNOWN
6298	38					2024	@{op_expected_table}{@q} = (TERM) x scalar(@q);
6299
6300							# Always UNKNOWN following these types;
6301							# previously had '->' in this list for c030
6302	38					241	@q = qw( w );
6303	38					104	@{op_expected_table}{@q} = (UNKNOWN) x scalar(@q);
6304
6305							# Always expecting OPERATOR ...
6306							# 'n' and 'v' are currently excluded because they might be VERSION numbers
6307							# 'i' is currently excluded because it might be a package
6308							# 'q' is currently excluded because it might be a prototype
6309							# Fix for c030: removed '->' from this list:
6310	38					128	@q = qw( -- C h R ++ ] Q <> ); ## n v q i );
6311	38					94	push @q, ')';
6312	38					229	@{op_expected_table}{@q} = (OPERATOR) x scalar(@q);
6313
6314							# Fix for git #62: added '*' and '%'
6315	38					109	@q = qw( < ? * % );
6316	38					112	@{is_weird_parsing_rule_exception}{@q} = (1) x scalar(@q);
6317
6318	38					90	@q = qw<) $>;
6319	38					110	@{is_paren_dollar}{@q} = (1) x scalar(@q);
6320
6321	38					87	@q = qw( n v );
6322	38					1338	@{is_n_v}{@q} = (1) x scalar(@q);
6323
6324							} ## end BEGIN
6325
6326	38			38		320	use constant DEBUG_OPERATOR_EXPECTED => 0;
	38					114
	38					88134
6327
6328							sub operator_expected {
6329
6330							# Returns a parameter indicating what types of tokens can occur next
6331
6332							# Call format:
6333							# $op_expected =
6334							# $self->operator_expected( [ $prev_type, $tok, $next_type ] );
6335							# where
6336							# $prev_type is the type of the previous token (blank or not)
6337							# $tok is the current token
6338							# $next_type is the type of the next token (blank or not)
6339
6340							# Many perl symbols have two or more meanings. For example, '<<'
6341							# can be a shift operator or a here-doc operator. The
6342							# interpretation of these symbols depends on the current state of
6343							# the tokenizer, which may either be expecting a term or an
6344							# operator. For this example, a << would be a shift if an OPERATOR
6345							# is expected, and a here-doc if a TERM is expected. This routine
6346							# is called to make this decision for any current token. It returns
6347							# one of three possible values:
6348							#
6349							# OPERATOR - operator expected (or at least, not a term)
6350							# UNKNOWN - can't tell
6351							# TERM - a term is expected (or at least, not an operator)
6352							#
6353							# The decision is based on what has been seen so far. This
6354							# information is stored in the "$last_nonblank_type" and
6355							# "$last_nonblank_token" variables. For example, if the
6356							# $last_nonblank_type is '=~', then we are expecting a TERM, whereas
6357							# if $last_nonblank_type is 'n' (numeric), we are expecting an
6358							# OPERATOR.
6359							#
6360							# If a UNKNOWN is returned, the calling routine must guess. A major
6361							# goal of this tokenizer is to minimize the possibility of returning
6362							# UNKNOWN, because a wrong guess can spoil the formatting of a
6363							# script.
6364							#
6365							# Adding NEW_TOKENS: it is critically important that this routine be
6366							# updated to allow it to determine if an operator or term is to be
6367							# expected after the new token. Doing this simply involves adding
6368							# the new token character to one of the regexes in this routine or
6369							# to one of the hash lists
6370							# that it uses, which are initialized in the BEGIN section.
6371							# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
6372							# $statement_type
6373
6374							# When possible, token types should be selected such that we can determine
6375							# the 'operator_expected' value by a simple hash lookup. If there are
6376							# exceptions, that is an indication that a new type is needed.
6377
6378	33045			33045	0	60074	my ( $self, $rarg ) = @_;
6379
6380							#-------------
6381							# Table lookup
6382							#-------------
6383
6384							# Many types are can be obtained by a table lookup given the previous type.
6385							# This typically handles half or more of the calls.
6386	33045					62265	my $op_expected = $op_expected_table{$last_nonblank_type};
6387	33045	100				62062	if ( defined($op_expected) ) {
6388	20534					25564	DEBUG_OPERATOR_EXPECTED
6389							&& print STDOUT
6390							"OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
6391	20534					40456	return $op_expected;
6392							}
6393
6394							#---------------------
6395							# Handle special cases
6396							#---------------------
6397
6398	12511					18342	$op_expected = UNKNOWN;
6399	12511					16963	my ( $prev_type, $tok, $next_type ) = @{$rarg};
	12511					25718
6400
6401							# Types 'k', '}' and 'Z' depend on context
6402							# Types 'i', 'n', 'v', 'q' currently also temporarily depend on context.
6403
6404							# identifier...
6405	12511	100				39000	if ( $last_nonblank_type eq 'i' ) {
		100
		100
		100
		100
		100
6406	4290					7486	$op_expected = OPERATOR;
6407
6408							# TODO: it would be cleaner to make this a special type
6409							# expecting VERSION or {} after package NAMESPACE;
6410							# maybe mark these words as type 'Y'?
6411	4290	50	66			13231	if ( substr( $last_nonblank_token, 0, 7 ) eq 'package'
			66
6412							&& $statement_type =~ /^package\b/
6413							&& $last_nonblank_token =~ /^package\b/ )
6414							{
6415	26					62	$op_expected = TERM;
6416							}
6417							}
6418
6419							# keyword...
6420							elsif ( $last_nonblank_type eq 'k' ) {
6421	2629					4337	$op_expected = TERM;
6422	2629	100				12214	if ( $expecting_operator_token{$last_nonblank_token} ) {
		100
6423	7					16	$op_expected = OPERATOR;
6424							}
6425							elsif ( $expecting_term_token{$last_nonblank_token} ) {
6426
6427							# Exceptions from TERM:
6428
6429							# // may follow perl functions which may be unary operators
6430							# see test file dor.t (defined or);
6431	2523	100	100			11981	if (
		50	100
			66
6432							$tok eq '/'
6433							&& $next_type eq '/'
6434							&& $is_keyword_rejecting_slash_as_pattern_delimiter{
6435							$last_nonblank_token}
6436							)
6437							{
6438	1					3	$op_expected = OPERATOR;
6439							}
6440
6441							# Patch to allow a ? following 'split' to be a deprecated pattern
6442							# delimiter. This patch is coordinated with the omission of split
6443							# from the list
6444							# %is_keyword_rejecting_question_as_pattern_delimiter. This patch
6445							# will force perltidy to guess.
6446							elsif ($tok eq '?'
6447							&& $last_nonblank_token eq 'split' )
6448							{
6449	0					0	$op_expected = UNKNOWN;
6450							}
6451							}
6452							} ## end type 'k'
6453
6454							# closing container token...
6455
6456							# Note that the actual token for type '}' may also be a ')'.
6457
6458							# Also note that $last_nonblank_token is not the token corresponding to
6459							# $last_nonblank_type when the type is a closing container. In that
6460							# case it is the token before the corresponding opening container token.
6461							# So for example, for this snippet
6462							# $a = do { BLOCK } / 2;
6463							# the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'.
6464
6465							elsif ( $last_nonblank_type eq '}' ) {
6466	3448					5718	$op_expected = UNKNOWN;
6467
6468							# handle something after 'do' and 'eval'
6469	3448	100	66			18814	if ( $is_block_operator{$last_nonblank_token} ) {
		100
		100
6470
6471							# something like $a = do { BLOCK } / 2;
6472	82					241	$op_expected = OPERATOR; # block mode following }
6473							}
6474
6475							# $last_nonblank_token =~ /^(\)\|\$\|\-\>)/
6476							elsif ( $is_paren_dollar{ substr( $last_nonblank_token, 0, 1 ) }
6477							\|\| substr( $last_nonblank_token, 0, 2 ) eq '->' )
6478							{
6479	2021					3375	$op_expected = OPERATOR;
6480	2021	50				4952	if ( $last_nonblank_token eq '$' ) { $op_expected = UNKNOWN }
	0					0
6481							}
6482
6483							# Check for smartmatch operator before preceding brace or square
6484							# bracket. For example, at the ? after the ] in the following
6485							# expressions we are expecting an operator:
6486							#
6487							# qr/3/ ~~ ['1234'] ? 1 : 0;
6488							# map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
6489							elsif ( $last_nonblank_token eq '~~' ) {
6490	20					48	$op_expected = OPERATOR;
6491							}
6492
6493							# A right brace here indicates the end of a simple block. All
6494							# non-structural right braces have type 'R' all braces associated with
6495							# block operator keywords have been given those keywords as
6496							# "last_nonblank_token" and caught above. (This statement is order
6497							# dependent, and must come after checking $last_nonblank_token).
6498							else {
6499
6500							# patch for dor.t (defined or).
6501	1325	50	33			6006	if ( $tok eq '/'
		100	33
6502							&& $next_type eq '/'
6503							&& $last_nonblank_token eq ']' )
6504							{
6505	0					0	$op_expected = OPERATOR;
6506							}
6507
6508							# Patch for RT #116344: misparse a ternary operator after an
6509							# anonymous hash, like this:
6510							# return ref {} ? 1 : 0;
6511							# The right brace should really be marked type 'R' in this case,
6512							# and it is safest to return an UNKNOWN here. Expecting a TERM will
6513							# cause the '?' to always be interpreted as a pattern delimiter
6514							# rather than introducing a ternary operator.
6515							elsif ( $tok eq '?' ) {
6516	1					4	$op_expected = UNKNOWN;
6517							}
6518							else {
6519	1324					2469	$op_expected = TERM;
6520							}
6521							}
6522							} ## end type '}'
6523
6524							# number or v-string...
6525							# An exception is for VERSION numbers a 'use' statement. It has the format
6526							# use Module VERSION LIST
6527							# We could avoid this exception by writing a special sub to parse 'use'
6528							# statements and perhaps mark these numbers with a new type V (for VERSION)
6529							##elsif ( $last_nonblank_type =~ /^[nv]$/ ) {
6530							elsif ( $is_n_v{$last_nonblank_type} ) {
6531	1916					3114	$op_expected = OPERATOR;
6532	1916	100				4350	if ( $statement_type eq 'use' ) {
6533	11					27	$op_expected = UNKNOWN;
6534							}
6535							}
6536
6537							# quote...
6538							# TODO: labeled prototype words would better be given type 'A' or maybe
6539							# 'J'; not 'q'; or maybe mark as type 'Y'?
6540							elsif ( $last_nonblank_type eq 'q' ) {
6541	137					296	$op_expected = OPERATOR;
6542	137	50				631	if ( $last_nonblank_token eq 'prototype' ) {
		100
6543	0					0	$op_expected = TERM;
6544							}
6545
6546							# update for --use-feature=class (rt145706):
6547							# Look for class VERSION after possible attribute, as in
6548							# class Example::Subclass : isa(Example::Base) 1.345 { ... }
6549							elsif ( $statement_type =~ /^package\b/ ) {
6550	3					6	$op_expected = TERM;
6551							}
6552							}
6553
6554							# file handle or similar
6555							elsif ( $last_nonblank_type eq 'Z' ) {
6556
6557	90					193	$op_expected = UNKNOWN;
6558
6559							# angle.t
6560	90	100	33			1199	if ( $last_nonblank_token =~ /^\w/ ) {
		50	100
		100
		100
		100
6561	2					7	$op_expected = UNKNOWN;
6562							}
6563
6564							# Exception to weird parsing rules for 'x(' ... see case b1205:
6565							# In something like 'print $vv x(...' the x is an operator;
6566							# Likewise in 'print $vv x$ww' the x is an operator (case b1207)
6567							# otherwise x follows the weird parsing rules.
6568							elsif ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) {
6569	0					0	$op_expected = OPERATOR;
6570							}
6571
6572							# The 'weird parsing rules' of next section do not work for '<' and '?'
6573							# It is best to mark them as unknown. Test case:
6574							# print $fh <DATA>;
6575							elsif ( $is_weird_parsing_rule_exception{$tok} ) {
6576	4					10	$op_expected = UNKNOWN;
6577							}
6578
6579							# For possible file handle like "$a", Perl uses weird parsing rules.
6580							# For example:
6581							# print $a/2,"/hi"; - division
6582							# print $a / 2,"/hi"; - division
6583							# print $a/ 2,"/hi"; - division
6584							# print $a /2,"/hi"; - pattern (and error)!
6585							# Some examples where this logic works okay, for '&','*','+':
6586							# print $fh &xsi_protos(@mods);
6587							# my $x = new $CompressClass *FH;
6588							# print $OUT +( $count % 15 ? ", " : "\n\t" );
6589							elsif ($prev_type eq 'b'
6590							&& $next_type ne 'b' )
6591							{
6592	9					15	$op_expected = TERM;
6593							}
6594
6595							# Note that '?' and '<' have been moved above
6596							# ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]\|\>\>)$/ ) {
6597							elsif ( $tok =~ /^([x\/\+\-\*\%\&\.]\|\>\>)$/ ) {
6598
6599							# Do not complain in 'use' statements, which have special syntax.
6600							# For example, from RT#130344:
6601							# use lib $FindBin::Bin . '/lib';
6602	9	50				34	if ( $statement_type ne 'use' ) {
6603	9					40	$self->complain(
6604							"operator in possible indirect object location not recommended\n"
6605							);
6606							}
6607	9					28	$op_expected = OPERATOR;
6608							}
6609							}
6610
6611							# anything else...
6612							else {
6613	1					3	$op_expected = UNKNOWN;
6614							}
6615
6616	12511					16242	DEBUG_OPERATOR_EXPECTED
6617							&& print STDOUT
6618							"OPERATOR_EXPECTED: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
6619
6620	12511					22751	return $op_expected;
6621
6622							} ## end sub operator_expected
6623
6624							sub new_statement_ok {
6625
6626							# return true if the current token can start a new statement
6627							# USES GLOBAL VARIABLES: $last_nonblank_type
6628
6629	81		66	81	0	310	return label_ok() # a label would be ok here
6630
6631							\|\| $last_nonblank_type eq 'J'; # or we follow a label
6632
6633							} ## end sub new_statement_ok
6634
6635							sub label_ok {
6636
6637							# Decide if a bare word followed by a colon here is a label
6638							# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
6639							# $brace_depth, $rbrace_type
6640
6641							# if it follows an opening or closing code block curly brace..
6642	114	100	100	114	0	1090	if ( ( $last_nonblank_token eq '{' \|\| $last_nonblank_token eq '}' )
			66
6643							&& $last_nonblank_type eq $last_nonblank_token )
6644							{
6645
6646							# it is a label if and only if the curly encloses a code block
6647	47					294	return $rbrace_type->[$brace_depth];
6648							}
6649
6650							# otherwise, it is a label if and only if it follows a ';' (real or fake)
6651							# or another label
6652							else {
6653	67		100			511	return ( $last_nonblank_type eq ';' \|\| $last_nonblank_type eq 'J' );
6654							}
6655							} ## end sub label_ok
6656
6657							sub code_block_type {
6658
6659							# Decide if this is a block of code, and its type.
6660							# Must be called only when $type = $token = '{'
6661							# The problem is to distinguish between the start of a block of code
6662							# and the start of an anonymous hash reference
6663							# Returns "" if not code block, otherwise returns 'last_nonblank_token'
6664							# to indicate the type of code block. (For example, 'last_nonblank_token'
6665							# might be 'if' for an if block, 'else' for an else block, etc).
6666							# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
6667							# $last_nonblank_block_type, $brace_depth, $rbrace_type
6668
6669							# handle case of multiple '{'s
6670
6671							# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
6672
6673	1296			1296	0	3300	my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
6674	1296	100	66			18038	if ( $last_nonblank_token eq '{'
		100	66
		100	66
		100	100
		100	66
		100	33
		50	33
		50
		50
		100
		100
		100
6675							&& $last_nonblank_type eq $last_nonblank_token )
6676							{
6677
6678							# opening brace where a statement may appear is probably
6679							# a code block but might be and anonymous hash reference
6680	90	50				326	if ( $rbrace_type->[$brace_depth] ) {
6681	90					326	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6682							$max_token_index );
6683							}
6684
6685							# cannot start a code block within an anonymous hash
6686							else {
6687	0					0	return EMPTY_STRING;
6688							}
6689							}
6690
6691							elsif ( $last_nonblank_token eq ';' ) {
6692
6693							# an opening brace where a statement may appear is probably
6694							# a code block but might be and anonymous hash reference
6695	48					297	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6696							$max_token_index );
6697							}
6698
6699							# handle case of '}{'
6700							elsif ($last_nonblank_token eq '}'
6701							&& $last_nonblank_type eq $last_nonblank_token )
6702							{
6703
6704							# a } { situation ...
6705							# could be hash reference after code block..(blktype1.t)
6706	9	50				39	if ($last_nonblank_block_type) {
6707	9					33	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6708							$max_token_index );
6709							}
6710
6711							# must be a block if it follows a closing hash reference
6712							else {
6713	0					0	return $last_nonblank_token;
6714							}
6715							}
6716
6717							#--------------------------------------------------------------
6718							# NOTE: braces after type characters start code blocks, but for
6719							# simplicity these are not identified as such. See also
6720							# sub is_non_structural_brace.
6721							#--------------------------------------------------------------
6722
6723							## elsif ( $last_nonblank_type eq 't' ) {
6724							## return $last_nonblank_token;
6725							## }
6726
6727							# brace after label:
6728							elsif ( $last_nonblank_type eq 'J' ) {
6729	34					117	return $last_nonblank_token;
6730							}
6731
6732							# otherwise, look at previous token. This must be a code block if
6733							# it follows any of these:
6734							# /^(BEGIN\|END\|CHECK\|INIT\|AUTOLOAD\|DESTROY\|UNITCHECK\|continue\|if\|elsif\|else\|unless\|do\|while\|until\|eval\|for\|foreach\|map\|grep\|sort)$/
6735							elsif ($is_code_block_token{$last_nonblank_token}
6736							\|\| $is_grep_alias{$last_nonblank_token} )
6737							{
6738
6739							# Bug Patch: Note that the opening brace after the 'if' in the following
6740							# snippet is an anonymous hash ref and not a code block!
6741							# print 'hi' if { x => 1, }->{x};
6742							# We can identify this situation because the last nonblank type
6743							# will be a keyword (instead of a closing paren)
6744	476	50	33			2515	if (
			66
6745							$last_nonblank_type eq 'k'
6746							&& ( $last_nonblank_token eq 'if'
6747							\|\| $last_nonblank_token eq 'unless' )
6748							)
6749							{
6750	0					0	return EMPTY_STRING;
6751							}
6752							else {
6753	476					1472	return $last_nonblank_token;
6754							}
6755							}
6756
6757							# or a sub or package BLOCK
6758							elsif ( ( $last_nonblank_type eq 'i' \|\| $last_nonblank_type eq 't' )
6759							&& $last_nonblank_token =~ /^(sub\|package)\b/ )
6760							{
6761	293					1036	return $last_nonblank_token;
6762							}
6763
6764							# or a sub alias
6765							elsif (( $last_nonblank_type eq 'i' \|\| $last_nonblank_type eq 't' )
6766							&& ( $is_sub{$last_nonblank_token} ) )
6767							{
6768	0					0	return 'sub';
6769							}
6770
6771							elsif ( $statement_type =~ /^(sub\|package)\b/ ) {
6772	0					0	return $statement_type;
6773							}
6774
6775							# user-defined subs with block parameters (like grep/map/eval)
6776							elsif ( $last_nonblank_type eq 'G' ) {
6777	0					0	return $last_nonblank_token;
6778							}
6779
6780							# check bareword
6781							elsif ( $last_nonblank_type eq 'w' ) {
6782
6783							# check for syntax 'use MODULE LIST'
6784							# This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031
6785	22	100				105	return EMPTY_STRING if ( $statement_type eq 'use' );
6786
6787	21					110	return $self->decide_if_code_block( $i, $rtokens, $rtoken_type,
6788							$max_token_index );
6789							}
6790
6791							# Patch for bug # RT #94338 reported by Daniel Trizen
6792							# for-loop in a parenthesized block-map triggering an error message:
6793							# map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
6794							# Check for a code block within a parenthesized function call
6795							elsif ( $last_nonblank_token eq '(' ) {
6796	81					213	my $paren_type = $rparen_type->[$paren_depth];
6797
6798							# /^(map\|grep\|sort)$/
6799	81	100	66			419	if ( $paren_type && $is_sort_map_grep{$paren_type} ) {
6800
6801							# We will mark this as a code block but use type 't' instead
6802							# of the name of the containing function. This will allow for
6803							# correct parsing but will usually produce better formatting.
6804							# Braces with block type 't' are not broken open automatically
6805							# in the formatter as are other code block types, and this usually
6806							# works best.
6807	1					4	return 't'; # (Not $paren_type)
6808							}
6809							else {
6810	80					233	return EMPTY_STRING;
6811							}
6812							}
6813
6814							# handle unknown syntax ') {'
6815							# we previously appended a '()' to mark this case
6816							elsif ( $last_nonblank_token =~ /$/ ) {
6817	14					53	return $last_nonblank_token;
6818							}
6819
6820							# anything else must be anonymous hash reference
6821							else {
6822	229					663	return EMPTY_STRING;
6823							}
6824							} ## end sub code_block_type
6825
6826							sub decide_if_code_block {
6827
6828							# USES GLOBAL VARIABLES: $last_nonblank_token
6829	168			168	0	450	my ( $self, $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
6830
6831	168					596	my ( $next_nonblank_token, $i_next ) =
6832							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
6833
6834							# we are at a '{' where a statement may appear.
6835							# We must decide if this brace starts an anonymous hash or a code
6836							# block.
6837							# return "" if anonymous hash, and $last_nonblank_token otherwise
6838
6839							# initialize to be code BLOCK
6840	168					448	my $code_block_type = $last_nonblank_token;
6841
6842							# Check for the common case of an empty anonymous hash reference:
6843							# Maybe something like sub { { } }
6844	168	100				557	if ( $next_nonblank_token eq '}' ) {
6845	5					14	$code_block_type = EMPTY_STRING;
6846							}
6847
6848							else {
6849
6850							# To guess if this '{' is an anonymous hash reference, look ahead
6851							# and test as follows:
6852							#
6853							# it is a hash reference if next come:
6854							# - a string or digit followed by a comma or =>
6855							# - bareword followed by =>
6856							# otherwise it is a code block
6857							#
6858							# Examples of anonymous hash ref:
6859							# {'aa',};
6860							# {1,2}
6861							#
6862							# Examples of code blocks:
6863							# {1; print "hello\n", 1;}
6864							# {$a,1};
6865
6866							# We are only going to look ahead one more (nonblank/comment) line.
6867							# Strange formatting could cause a bad guess, but that's unlikely.
6868	163					386	my @pre_types;
6869							my @pre_tokens;
6870
6871							# Ignore the rest of this line if it is a side comment
6872	163	100				459	if ( $next_nonblank_token ne '#' ) {
6873	139					487	@pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ];
	139					725
6874	139					448	@pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ];
	139					696
6875							}
6876
6877							# Here 20 is arbitrary but generous, and prevents wasting lots of time
6878							# in mangled files
6879	163					699	my ( $rpre_tokens, $rpre_types ) =
6880							$self->peek_ahead_for_n_nonblank_pre_tokens(20);
6881	163	100	66			601	if ( defined($rpre_types) && @{$rpre_types} ) {
	155					587
6882	155					288	push @pre_types, @{$rpre_types};
	155					648
6883	155					329	push @pre_tokens, @{$rpre_tokens};
	155					718
6884							}
6885
6886							# put a sentinel token to simplify stopping the search
6887	163					397	push @pre_types, '}';
6888	163					357	push @pre_types, '}';
6889
6890	163					301	my $jbeg = 0;
6891	163	100				490	$jbeg = 1 if $pre_types[0] eq 'b';
6892
6893							# first look for one of these
6894							# - bareword
6895							# - bareword with leading -
6896							# - digit
6897							# - quoted string
6898	163					288	my $j = $jbeg;
6899	163	100	33			1233	if ( $pre_types[$j] =~ /^[\'\"]/ ) {
		100
		100
		50
6900
6901							# find the closing quote; don't worry about escapes
6902	1					3	my $quote_mark = $pre_types[$j];
6903	1					5	foreach my $k ( $j + 1 .. @pre_types - 2 ) {
6904	1	50				6	if ( $pre_types[$k] eq $quote_mark ) {
6905	1					3	$j = $k + 1;
6906							##my $next = $pre_types[$j];
6907	1					2	last;
6908							}
6909							}
6910							}
6911							elsif ( $pre_types[$j] eq 'd' ) {
6912	8					15	$j++;
6913							}
6914							elsif ( $pre_types[$j] eq 'w' ) {
6915	71					168	$j++;
6916							}
6917							elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
6918	0					0	$j++;
6919							}
6920	163	100				487	if ( $j > $jbeg ) {
6921
6922	80	100				332	$j++ if $pre_types[$j] eq 'b';
6923
6924							# Patched for RT #95708
6925	80	100	33			665	if (
			66
			66
6926
6927							# it is a comma which is not a pattern delimiter except for qw
6928							(
6929							$pre_types[$j] eq ','
6930							## !~ /^(s\|m\|y\|tr\|qr\|q\|qq\|qx)$/
6931							&& !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] }
6932							)
6933
6934							# or a =>
6935							\|\| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
6936							)
6937							{
6938	18					37	$code_block_type = EMPTY_STRING;
6939							}
6940							}
6941
6942	163	100				524	if ($code_block_type) {
6943
6944							# Patch for cases b1085 b1128: It is uncertain if this is a block.
6945							# If this brace follows a bareword, then append a space as a signal
6946							# to the formatter that this may not be a block brace. To find the
6947							# corresponding code in Formatter.pm search for 'b1085'.
6948	145	100				1181	$code_block_type .= SPACE if ( $code_block_type =~ /^\w/ );
6949							}
6950							}
6951
6952	168					595	return $code_block_type;
6953							} ## end sub decide_if_code_block
6954
6955							sub report_unexpected {
6956
6957							# report unexpected token type and show where it is
6958							# USES GLOBAL VARIABLES: (none)
6959	0			0	0	0	my ( $self, $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
6960							$rpretoken_type, $input_line )
6961							= @_;
6962
6963	0	0				0	if ( ++$self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) {
6964	0					0	my $msg = "found $found where $expecting expected";
6965	0					0	my $pos = $rpretoken_map->[$i_tok];
6966	0					0	$self->interrupt_logfile();
6967	0					0	my $input_line_number = $self->[_last_line_number_];
6968	0					0	my ( $offset, $numbered_line, $underline ) =
6969							make_numbered_line( $input_line_number, $input_line, $pos );
6970	0					0	$underline = write_on_underline( $underline, $pos - $offset, '^' );
6971
6972	0					0	my $trailer = EMPTY_STRING;
6973	0	0	0			0	if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
6974	0					0	my $pos_prev = $rpretoken_map->[$last_nonblank_i];
6975	0					0	my $num;
6976	0	0				0	if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) {
6977	0					0	$num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev;
6978							}
6979							else {
6980	0					0	$num = $pos - $pos_prev;
6981							}
6982	0	0				0	if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
	0					0
	0					0
6983
6984							$underline =
6985	0					0	write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
6986	0					0	$trailer = " (previous token underlined)";
6987							}
6988	0					0	$underline =~ s/\s+$//;
6989	0					0	$self->warning( $numbered_line . "\n" );
6990	0					0	$self->warning( $underline . "\n" );
6991	0					0	$self->warning( $msg . $trailer . "\n" );
6992	0					0	$self->resume_logfile();
6993							}
6994	0					0	return;
6995							} ## end sub report_unexpected
6996
6997							my %is_sigil_or_paren;
6998							my %is_R_closing_sb;
6999
7000							BEGIN {
7001
7002	38			38		294	my @q = qw< $ & % * @ ) >;
7003	38					324	@{is_sigil_or_paren}{@q} = (1) x scalar(@q);
7004
7005	38					173	@q = qw(R ]);
7006	38					84484	@{is_R_closing_sb}{@q} = (1) x scalar(@q);
7007							} ## end BEGIN
7008
7009							sub is_non_structural_brace {
7010
7011							# Decide if a brace or bracket is structural or non-structural
7012							# by looking at the previous token and type
7013							# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
7014
7015							# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
7016							# Tentatively deactivated because it caused the wrong operator expectation
7017							# for this code:
7018							# $user = @vars[1] / 100;
7019							# Must update sub operator_expected before re-implementing.
7020							# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
7021							# return 0;
7022							# }
7023
7024							#--------------------------------------------------------------
7025							# NOTE: braces after type characters start code blocks, but for
7026							# simplicity these are not identified as such. See also
7027							# sub code_block_type
7028							#--------------------------------------------------------------
7029
7030							##if ($last_nonblank_type eq 't') {return 0}
7031
7032							# otherwise, it is non-structural if it is decorated
7033							# by type information.
7034							# For example, the '{' here is non-structural: ${xxx}
7035							# Removed '::' to fix c074
7036							## $last_nonblank_token =~ /^([\$\@\*\&\%\)]\|->\|::)/
7037							return (
7038							## $last_nonblank_token =~ /^([\$\@\*\&\%\)]\|->)/
7039							$is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) }
7040							\|\| substr( $last_nonblank_token, 0, 2 ) eq '->'
7041
7042							# or if we follow a hash or array closing curly brace or bracket
7043							# For example, the second '{' in this is non-structural: $a{'x'}{'y'}
7044							# because the first '}' would have been given type 'R'
7045							##\|\| $last_nonblank_type =~ /^([R\]])$/
7046	2253		66	2253	0	14696	\|\| $is_R_closing_sb{$last_nonblank_type}
7047							);
7048							} ## end sub is_non_structural_brace
7049
7050							#######################################################################
7051							# Tokenizer routines for tracking container nesting depths
7052							#######################################################################
7053
7054							# The following routines keep track of nesting depths of the nesting
7055							# types, ( [ { and ?. This is necessary for determining the indentation
7056							# level, and also for debugging programs. Not only do they keep track of
7057							# nesting depths of the individual brace types, but they check that each
7058							# of the other brace types is balanced within matching pairs. For
7059							# example, if the program sees this sequence:
7060							#
7061							# { ( ( ) }
7062							#
7063							# then it can determine that there is an extra left paren somewhere
7064							# between the { and the }. And so on with every other possible
7065							# combination of outer and inner brace types. For another
7066							# example:
7067							#
7068							# ( [ ..... ] ] )
7069							#
7070							# which has an extra ] within the parens.
7071							#
7072							# The brace types have indexes 0 .. 3 which are indexes into
7073							# the matrices.
7074							#
7075							# The pair ? : are treated as just another nesting type, with ? acting
7076							# as the opening brace and : acting as the closing brace.
7077							#
7078							# The matrix
7079							#
7080							# $rdepth_array->[$a][$b][ $rcurrent_depth->[$a] ] = $rcurrent_depth->[$b];
7081							#
7082							# saves the nesting depth of brace type $b (where $b is either of the other
7083							# nesting types) when brace type $a enters a new depth. When this depth
7084							# decreases, a check is made that the current depth of brace types $b is
7085							# unchanged, or otherwise there must have been an error. This can
7086							# be very useful for localizing errors, particularly when perl runs to
7087							# the end of a large file (such as this one) and announces that there
7088							# is a problem somewhere.
7089							#
7090							# A numerical sequence number is maintained for every nesting type,
7091							# so that each matching pair can be uniquely identified in a simple
7092							# way.
7093
7094							sub increase_nesting_depth {
7095	4561			4561	0	9081	my ( $self, $aa, $pos ) = @_;
7096
7097							# USES GLOBAL VARIABLES: $rcurrent_depth,
7098							# $rcurrent_sequence_number, $rdepth_array,
7099							# $rstarting_line_of_current_depth, $statement_type
7100	4561					8055	my $cd_aa = ++$rcurrent_depth->[$aa];
7101	4561					6715	$total_depth++;
7102	4561					8791	$rtotal_depth->[$aa][$cd_aa] = $total_depth;
7103	4561					7465	my $input_line_number = $self->[_last_line_number_];
7104	4561					7598	my $input_line = $self->[_line_of_text_];
7105
7106							# Sequence numbers increment by number of items. This keeps
7107							# a unique set of numbers but still allows the relative location
7108							# of any type to be determined.
7109
7110							# make a new unique sequence number
7111	4561					7776	my $seqno = $next_sequence_number++;
7112
7113	4561					8216	$rcurrent_sequence_number->[$aa][$cd_aa] = $seqno;
7114
7115	4561					13446	$rstarting_line_of_current_depth->[$aa][$cd_aa] =
7116							[ $input_line_number, $input_line, $pos ];
7117
7118	4561					13763	for my $bb ( 0 .. @closing_brace_names - 1 ) {
7119	18244	100				33322	next if ( $bb == $aa );
7120	13683					26753	$rdepth_array->[$aa][$bb][$cd_aa] = $rcurrent_depth->[$bb];
7121							}
7122
7123							# set a flag for indenting a nested ternary statement
7124	4561					8333	my $indent = 0;
7125	4561	100				10328	if ( $aa == QUESTION_COLON ) {
7126	187					614	$rnested_ternary_flag->[$cd_aa] = 0;
7127	187	100				629	if ( $cd_aa > 1 ) {
7128	17	100				98	if ( $rnested_ternary_flag->[ $cd_aa - 1 ] == 0 ) {
7129	16					66	my $pdepth = $rtotal_depth->[$aa][ $cd_aa - 1 ];
7130	16	100				68	if ( $pdepth == $total_depth - 1 ) {
7131	8					15	$indent = 1;
7132	8					26	$rnested_ternary_flag->[ $cd_aa - 1 ] = -1;
7133							}
7134							}
7135							}
7136							}
7137
7138							# Fix part #1 for git82: save last token type for propagation of type 'Z'
7139	4561					15208	$rnested_statement_type->[$aa][$cd_aa] =
7140							[ $statement_type, $last_nonblank_type, $last_nonblank_token ];
7141	4561					7928	$statement_type = EMPTY_STRING;
7142	4561					12693	return ( $seqno, $indent );
7143							} ## end sub increase_nesting_depth
7144
7145							sub is_balanced_closing_container {
7146
7147							# Return true if a closing container can go here without error
7148							# Return false if not
7149	47			47	0	125	my ($aa) = @_;
7150
7151							# cannot close if there was no opening
7152	47					95	my $cd_aa = $rcurrent_depth->[$aa];
7153	47	100				187	return unless ( $cd_aa > 0 );
7154
7155							# check that any other brace types $bb contained within would be balanced
7156	8					29	for my $bb ( 0 .. @closing_brace_names - 1 ) {
7157	8	50				24	next if ( $bb == $aa );
7158							return
7159							unless (
7160	8	50				41	$rdepth_array->[$aa][$bb][$cd_aa] == $rcurrent_depth->[$bb] );
7161							}
7162
7163							# OK, everything will be balanced
7164	0					0	return 1;
7165							} ## end sub is_balanced_closing_container
7166
7167							sub decrease_nesting_depth {
7168
7169	4561			4561	0	9104	my ( $self, $aa, $pos ) = @_;
7170
7171							# USES GLOBAL VARIABLES: $rcurrent_depth,
7172							# $rcurrent_sequence_number, $rdepth_array, $rstarting_line_of_current_depth
7173							# $statement_type
7174	4561					7024	my $seqno = 0;
7175	4561					7249	my $input_line_number = $self->[_last_line_number_];
7176	4561					7721	my $input_line = $self->[_line_of_text_];
7177
7178	4561					6882	my $outdent = 0;
7179	4561					6766	$total_depth--;
7180	4561					7878	my $cd_aa = $rcurrent_depth->[$aa];
7181	4561	50				9255	if ( $cd_aa > 0 ) {
7182
7183							# set a flag for un-indenting after seeing a nested ternary statement
7184	4561					8338	$seqno = $rcurrent_sequence_number->[$aa][$cd_aa];
7185	4561	100				9757	if ( $aa == QUESTION_COLON ) {
7186	187					556	$outdent = $rnested_ternary_flag->[$cd_aa];
7187							}
7188
7189							# Fix part #2 for git82: use saved type for propagation of type 'Z'
7190							# through type L-R braces. Perl seems to allow ${bareword}
7191							# as an indirect object, but nothing much more complex than that.
7192							( $statement_type, my $saved_type, my $saved_token ) =
7193	4561					6634	@{ $rnested_statement_type->[$aa][ $rcurrent_depth->[$aa] ] };
	4561					12980
7194	4561	50	100			16127	if ( $aa == BRACE
			66
			66
7195							&& $saved_type eq 'Z'
7196							&& $last_nonblank_type eq 'w'
7197							&& $rbrace_structural_type->[$brace_depth] eq 'L' )
7198							{
7199	1					4	$last_nonblank_type = $saved_type;
7200							}
7201
7202							# check that any brace types $bb contained within are balanced
7203	4561					12842	for my $bb ( 0 .. @closing_brace_names - 1 ) {
7204	18244	100				32516	next if ( $bb == $aa );
7205
7206	13683	50				31970	unless (
7207							$rdepth_array->[$aa][$bb][$cd_aa] == $rcurrent_depth->[$bb] )
7208							{
7209	0					0	my $diff =
7210							$rcurrent_depth->[$bb] - $rdepth_array->[$aa][$bb][$cd_aa];
7211
7212							# don't whine too many times
7213	0					0	my $saw_brace_error = $self->get_saw_brace_error();
7214	0	0	0			0	if (
			0
7215							$saw_brace_error <= MAX_NAG_MESSAGES
7216
7217							# if too many closing types have occurred, we probably
7218							# already caught this error
7219							&& ( ( $diff > 0 ) \|\| ( $saw_brace_error <= 0 ) )
7220							)
7221							{
7222	0					0	$self->interrupt_logfile();
7223	0					0	my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa];
7224	0					0	my $sl = $rsl->[0];
7225	0					0	my $rel = [ $input_line_number, $input_line, $pos ];
7226	0					0	my $el = $rel->[0];
7227	0					0	my ($ess);
7228
7229	0	0	0			0	if ( $diff == 1 \|\| $diff == -1 ) {
7230	0					0	$ess = EMPTY_STRING;
7231							}
7232							else {
7233	0					0	$ess = 's';
7234							}
7235	0	0				0	my $bname =
7236							( $diff > 0 )
7237							? $opening_brace_names[$bb]
7238							: $closing_brace_names[$bb];
7239	0					0	$self->write_error_indicator_pair( @{$rsl}, '^' );
	0					0
7240	0					0	my $msg = <<"EOM";
7241							Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
7242							EOM
7243
7244	0	0				0	if ( $diff > 0 ) {
7245	0					0	my $rml =
7246							$rstarting_line_of_current_depth->[$bb]
7247							[ $rcurrent_depth->[$bb] ];
7248	0					0	my $ml = $rml->[0];
7249	0					0	$msg .=
7250							" The most recent un-matched $bname is on line $ml\n";
7251	0					0	$self->write_error_indicator_pair( @{$rml}, '^' );
	0					0
7252							}
7253	0					0	$self->write_error_indicator_pair( @{$rel}, '^' );
	0					0
7254	0					0	$self->warning($msg);
7255	0					0	$self->resume_logfile();
7256							}
7257	0					0	$self->increment_brace_error();
7258							}
7259							}
7260	4561					8410	$rcurrent_depth->[$aa]--;
7261							}
7262							else {
7263
7264	0					0	my $saw_brace_error = $self->get_saw_brace_error();
7265	0	0				0	if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
7266	0					0	my $msg = <<"EOM";
7267							There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
7268							EOM
7269	0					0	$self->indicate_error( $msg, $input_line_number, $input_line, $pos,
7270							'^' );
7271							}
7272	0					0	$self->increment_brace_error();
7273
7274							# keep track of errors in braces alone (ignoring ternary nesting errors)
7275	0	0				0	$self->[_true_brace_error_count_]++
7276							if ( $closing_brace_names[$aa] ne "':'" );
7277							}
7278	4561					12189	return ( $seqno, $outdent );
7279							} ## end sub decrease_nesting_depth
7280
7281							sub check_final_nesting_depths {
7282
7283							# USES GLOBAL VARIABLES: $rcurrent_depth, $rstarting_line_of_current_depth
7284	556			556	0	1346	my $self = shift;
7285
7286	556					2342	for my $aa ( 0 .. @closing_brace_names - 1 ) {
7287
7288	2224					3752	my $cd_aa = $rcurrent_depth->[$aa];
7289	2224	50				5343	if ($cd_aa) {
7290	0					0	my $rsl = $rstarting_line_of_current_depth->[$aa][$cd_aa];
7291	0					0	my $sl = $rsl->[0];
7292	0					0	my $msg = <<"EOM";
7293							Final nesting depth of $opening_brace_names[$aa]s is $cd_aa
7294							The most recent un-matched $opening_brace_names[$aa] is on line $sl
7295							EOM
7296	0					0	$self->indicate_error( $msg, @{$rsl}, '^' );
	0					0
7297	0					0	$self->increment_brace_error();
7298							}
7299							}
7300	556					1513	return;
7301							} ## end sub check_final_nesting_depths
7302
7303							#######################################################################
7304							# Tokenizer routines for looking ahead in input stream
7305							#######################################################################
7306
7307							sub peek_ahead_for_n_nonblank_pre_tokens {
7308
7309							# returns next n pretokens if they exist
7310							# returns undef's if hits eof without seeing any pretokens
7311							# USES GLOBAL VARIABLES: (none)
7312	170			170	0	454	my ( $self, $max_pretokens ) = @_;
7313	170					307	my $line;
7314	170					289	my $i = 0;
7315	170					588	my ( $rpre_tokens, $rmap, $rpre_types );
7316
7317	170					749	while ( $line = $self->[_line_buffer_object_]->peek_ahead( $i++ ) ) {
7318	182					975	$line =~ s/^\s*//; # trim leading blanks
7319	182	100				642	next if ( length($line) <= 0 ); # skip blank
7320	176	100				601	next if ( $line =~ /^#/ ); # skip comment
7321	162					433	( $rpre_tokens, $rmap, $rpre_types ) =
7322							pre_tokenize( $line, $max_pretokens );
7323	162					464	last;
7324							}
7325	170					656	return ( $rpre_tokens, $rpre_types );
7326							} ## end sub peek_ahead_for_n_nonblank_pre_tokens
7327
7328							# look ahead for next non-blank, non-comment line of code
7329							sub peek_ahead_for_nonblank_token {
7330
7331							# USES GLOBAL VARIABLES: (none)
7332	115			115	0	374	my ( $self, $rtokens, $max_token_index ) = @_;
7333	115					216	my $line;
7334	115					235	my $i = 0;
7335
7336	115					586	while ( $line = $self->[_line_buffer_object_]->peek_ahead( $i++ ) ) {
7337	159					863	$line =~ s/^\s*//; # trim leading blanks
7338	159	100				619	next if ( length($line) <= 0 ); # skip blank
7339	134	100				551	next if ( $line =~ /^#/ ); # skip comment
7340
7341							# Updated from 2 to 3 to get trigraphs, added for case b1175
7342	113					415	my ( $rtok, $rmap, $rtype ) = pre_tokenize( $line, 3 );
7343	113					423	my $j = $max_token_index + 1;
7344
7345	113					265	foreach my $tok ( @{$rtok} ) {
	113					368
7346	327	100				901	last if ( $tok =~ "\n" );
7347	294					740	$rtokens->[ ++$j ] = $tok;
7348							}
7349	113					452	last;
7350							}
7351	115					364	return;
7352							} ## end sub peek_ahead_for_nonblank_token
7353
7354							#######################################################################
7355							# Tokenizer guessing routines for ambiguous situations
7356							#######################################################################
7357
7358							sub guess_if_pattern_or_conditional {
7359
7360							# this routine is called when we have encountered a ? following an
7361							# unknown bareword, and we must decide if it starts a pattern or not
7362							# input parameters:
7363							# $i - token index of the ? starting possible pattern
7364							# output parameters:
7365							# $is_pattern = 0 if probably not pattern, =1 if probably a pattern
7366							# msg = a warning or diagnostic message
7367							# USES GLOBAL VARIABLES: $last_nonblank_token
7368
7369	11			11	0	43	my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
7370	11					28	my $is_pattern = 0;
7371	11					48	my $msg = "guessing that ? after $last_nonblank_token starts a ";
7372
7373	11	50				44	if ( $i >= $max_token_index ) {
7374	0					0	$msg .= "conditional (no end to pattern found on the line)\n";
7375							}
7376							else {
7377	11					30	my $ibeg = $i;
7378	11					27	$i = $ibeg + 1;
7379	11					34	my $next_token = $rtokens->[$i]; # first token after ?
7380
7381							# look for a possible ending ? on this line..
7382	11					35	my $in_quote = 1;
7383	11					25	my $quote_depth = 0;
7384	11					28	my $quote_character = EMPTY_STRING;
7385	11					20	my $quote_pos = 0;
7386	11					31	my $quoted_string;
7387							(
7388
7389	11					54	$i,
7390							$in_quote,
7391							$quote_character,
7392							$quote_pos,
7393							$quote_depth,
7394							$quoted_string,
7395
7396							) = $self->follow_quoted_string(
7397
7398							$ibeg,
7399							$in_quote,
7400							$rtokens,
7401							$quote_character,
7402							$quote_pos,
7403							$quote_depth,
7404							$max_token_index,
7405
7406							);
7407
7408	11	50				70	if ($in_quote) {
7409
7410							# we didn't find an ending ? on this line,
7411							# so we bias towards conditional
7412	11					33	$is_pattern = 0;
7413	11					44	$msg .= "conditional (no ending ? on this line)\n";
7414
7415							# we found an ending ?, so we bias towards a pattern
7416							}
7417							else {
7418
7419							# Watch out for an ending ? in quotes, like this
7420							# my $case_flag = File::Spec->case_tolerant ? '(?i)' : '';
7421	0					0	my $s_quote = 0;
7422	0					0	my $d_quote = 0;
7423	0					0	my $colons = 0;
7424	0					0	foreach my $ii ( $ibeg + 1 .. $i - 1 ) {
7425	0					0	my $tok = $rtokens->[$ii];
7426	0	0				0	if ( $tok eq ":" ) { $colons++ }
	0					0
7427	0	0				0	if ( $tok eq "'" ) { $s_quote++ }
	0					0
7428	0	0				0	if ( $tok eq '"' ) { $d_quote++ }
	0					0
7429							}
7430	0	0	0			0	if ( $s_quote % 2 \|\| $d_quote % 2 \|\| $colons ) {
		0	0
7431	0					0	$is_pattern = 0;
7432	0					0	$msg .= "found ending ? but unbalanced quote chars\n";
7433							}
7434							elsif (
7435							$self->pattern_expected( $i, $rtokens, $max_token_index ) >= 0 )
7436							{
7437	0					0	$is_pattern = 1;
7438	0					0	$msg .= "pattern (found ending ? and pattern expected)\n";
7439							}
7440							else {
7441	0					0	$msg .= "pattern (uncertain, but found ending ?)\n";
7442							}
7443							}
7444							}
7445	11					43	return ( $is_pattern, $msg );
7446							} ## end sub guess_if_pattern_or_conditional
7447
7448							my %is_known_constant;
7449							my %is_known_function;
7450
7451							BEGIN {
7452
7453							# Constants like 'pi' in Trig.pm are common
7454	38			38		283	my @q = qw(pi pi2 pi4 pip2 pip4);
7455	38					297	@{is_known_constant}{@q} = (1) x scalar(@q);
7456
7457							# parenless calls of 'ok' are common
7458	38					139	@q = qw( ok );
7459	38					67026	@{is_known_function}{@q} = (1) x scalar(@q);
7460							} ## end BEGIN
7461
7462							sub guess_if_pattern_or_division {
7463
7464							# this routine is called when we have encountered a / following an
7465							# unknown bareword, and we must decide if it starts a pattern or is a
7466							# division
7467							# input parameters:
7468							# $i - token index of the / starting possible pattern
7469							# output parameters:
7470							# $is_pattern = 0 if probably division, =1 if probably a pattern
7471							# msg = a warning or diagnostic message
7472							# USES GLOBAL VARIABLES: $last_nonblank_token
7473	0			0	0	0	my ( $self, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
7474	0					0	my $is_pattern = 0;
7475	0					0	my $msg = "guessing that / after $last_nonblank_token starts a ";
7476
7477	0	0				0	if ( $i >= $max_token_index ) {
7478	0					0	$msg .= "division (no end to pattern found on the line)\n";
7479							}
7480							else {
7481	0					0	my $ibeg = $i;
7482	0					0	my $divide_possible =
7483							$self->is_possible_numerator( $i, $rtokens, $max_token_index );
7484
7485	0	0				0	if ( $divide_possible < 0 ) {
7486	0					0	$msg = "pattern (division not possible here)\n";
7487	0					0	$is_pattern = 1;
7488	0					0	return ( $is_pattern, $msg );
7489							}
7490
7491	0					0	$i = $ibeg + 1;
7492	0					0	my $next_token = $rtokens->[$i]; # first token after slash
7493
7494							# One of the things we can look at is the spacing around the slash.
7495							# There # are four possible spacings around the first slash:
7496							#
7497							# return pi/two;#/; -/-
7498							# return pi/ two;#/; -/+
7499							# return pi / two;#/; +/+
7500							# return pi /two;#/; +/- <-- possible pattern
7501							#
7502							# Spacing rule: a space before the slash but not after the slash
7503							# usually indicates a pattern. We can use this to break ties.
7504
7505	0		0			0	my $is_pattern_by_spacing =
7506							( $i > 1 && $next_token !~ m/^\s/ && $rtokens->[ $i - 2 ] =~ m/^\s/ );
7507
7508							# look for a possible ending / on this line..
7509	0					0	my $in_quote = 1;
7510	0					0	my $quote_depth = 0;
7511	0					0	my $quote_character = EMPTY_STRING;
7512	0					0	my $quote_pos = 0;
7513	0					0	my $quoted_string;
7514							(
7515	0					0	$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
7516							$quoted_string
7517							)
7518							= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens,
7519							$quote_character, $quote_pos, $quote_depth, $max_token_index );
7520
7521	0	0				0	if ($in_quote) {
7522
7523							# we didn't find an ending / on this line, so we bias towards
7524							# division
7525	0	0				0	if ( $divide_possible >= 0 ) {
7526	0					0	$is_pattern = 0;
7527	0					0	$msg .= "division (no ending / on this line)\n";
7528							}
7529							else {
7530
7531							# assuming a multi-line pattern ... this is risky, but division
7532							# does not seem possible. If this fails, it would either be due
7533							# to a syntax error in the code, or the division_expected logic
7534							# needs to be fixed.
7535	0					0	$msg = "multi-line pattern (division not possible)\n";
7536	0					0	$is_pattern = 1;
7537							}
7538							}
7539
7540							# we found an ending /, so we bias slightly towards a pattern
7541							else {
7542
7543	0					0	my $pattern_expected =
7544							$self->pattern_expected( $i, $rtokens, $max_token_index );
7545
7546	0	0				0	if ( $pattern_expected >= 0 ) {
7547
7548							# pattern looks possible...
7549	0	0				0	if ( $divide_possible >= 0 ) {
7550
7551							# Both pattern and divide can work here...
7552
7553							# Increase weight of divide if a pure number follows
7554	0					0	$divide_possible += $next_token =~ /^\d+$/;
7555
7556							# Check for known constants in the numerator, like 'pi'
7557	0	0				0	if ( $is_known_constant{$last_nonblank_token} ) {
		0
		0
		0
7558	0					0	$msg .=
7559							"division (pattern works too but saw known constant '$last_nonblank_token')\n";
7560	0					0	$is_pattern = 0;
7561							}
7562
7563							# A very common bare word in pattern expressions is 'ok'
7564							elsif ( $is_known_function{$last_nonblank_token} ) {
7565	0					0	$msg .=
7566							"pattern (division works too but saw '$last_nonblank_token')\n";
7567	0					0	$is_pattern = 1;
7568							}
7569
7570							# If one rule is more definite, use it
7571							elsif ( $divide_possible > $pattern_expected ) {
7572	0					0	$msg .=
7573							"division (more likely based on following tokens)\n";
7574	0					0	$is_pattern = 0;
7575							}
7576
7577							# otherwise, use the spacing rule
7578							elsif ($is_pattern_by_spacing) {
7579	0					0	$msg .=
7580							"pattern (guess on spacing, but division possible too)\n";
7581	0					0	$is_pattern = 1;
7582							}
7583							else {
7584	0					0	$msg .=
7585							"division (guess on spacing, but pattern is possible too)\n";
7586	0					0	$is_pattern = 0;
7587							}
7588							}
7589
7590							# divide_possible < 0 means divide can not work here
7591							else {
7592	0					0	$is_pattern = 1;
7593	0					0	$msg .= "pattern (division not possible)\n";
7594							}
7595							}
7596
7597							# pattern does not look possible...
7598							else {
7599
7600	0	0				0	if ( $divide_possible >= 0 ) {
7601	0					0	$is_pattern = 0;
7602	0					0	$msg .= "division (pattern not possible)\n";
7603							}
7604
7605							# Neither pattern nor divide look possible...go by spacing
7606							else {
7607	0	0				0	if ($is_pattern_by_spacing) {
7608	0					0	$msg .= "pattern (guess on spacing)\n";
7609	0					0	$is_pattern = 1;
7610							}
7611							else {
7612	0					0	$msg .= "division (guess on spacing)\n";
7613	0					0	$is_pattern = 0;
7614							}
7615							}
7616							}
7617							}
7618							}
7619	0					0	return ( $is_pattern, $msg );
7620							} ## end sub guess_if_pattern_or_division
7621
7622							# try to resolve here-doc vs. shift by looking ahead for
7623							# non-code or the end token (currently only looks for end token)
7624							# returns 1 if it is probably a here doc, 0 if not
7625							sub guess_if_here_doc {
7626
7627	0			0	0	0	my ( $self, $next_token ) = @_;
7628
7629							# This is how many lines we will search for a target as part of the
7630							# guessing strategy. It is a constant because there is probably
7631							# little reason to change it.
7632							# USES GLOBAL VARIABLES: $current_package $ris_constant,
7633	0					0	my $HERE_DOC_WINDOW = 40;
7634
7635	0					0	my $here_doc_expected = 0;
7636	0					0	my $line;
7637	0					0	my $k = 0;
7638	0					0	my $msg = "checking <<";
7639
7640	0					0	while ( $line = $self->[_line_buffer_object_]->peek_ahead( $k++ ) ) {
7641	0					0	chomp $line;
7642
7643	0	0				0	if ( $line =~ /^$next_token$/ ) {
7644	0					0	$msg .= " -- found target $next_token ahead $k lines\n";
7645	0					0	$here_doc_expected = 1; # got it
7646	0					0	last;
7647							}
7648	0	0				0	last if ( $k >= $HERE_DOC_WINDOW );
7649							}
7650
7651	0	0				0	unless ($here_doc_expected) {
7652
7653	0	0				0	if ( !defined($line) ) {
7654	0					0	$here_doc_expected = -1; # hit eof without seeing target
7655	0					0	$msg .= " -- must be shift; target $next_token not in file\n";
7656
7657							}
7658							else { # still unsure..taking a wild guess
7659
7660	0	0				0	if ( !$ris_constant->{$current_package}{$next_token} ) {
7661	0					0	$here_doc_expected = 1;
7662	0					0	$msg .=
7663							" -- guessing it's a here-doc ($next_token not a constant)\n";
7664							}
7665							else {
7666	0					0	$msg .=
7667							" -- guessing it's a shift ($next_token is a constant)\n";
7668							}
7669							}
7670							}
7671	0					0	$self->write_logfile_entry($msg);
7672	0					0	return $here_doc_expected;
7673							} ## end sub guess_if_here_doc
7674
7675							#######################################################################
7676							# Tokenizer Routines for scanning identifiers and related items
7677							#######################################################################
7678
7679							sub scan_bare_identifier_do {
7680
7681							# this routine is called to scan a token starting with an alphanumeric
7682							# variable or package separator, :: or '.
7683							# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
7684							# $last_nonblank_type, $rparen_type, $paren_depth
7685
7686	1672			1672	0	6003	my ( $self, $input_line, $i, $tok, $type, $prototype, $rtoken_map,
7687							$max_token_index )
7688							= @_;
7689	1672					2654	my $i_begin = $i;
7690	1672					2806	my $package = undef;
7691
7692	1672					2487	my $i_beg = $i;
7693
7694							# we have to back up one pretoken at a :: since each : is one pretoken
7695	1672	100				4142	if ( $tok eq '::' ) { $i_beg-- }
	9					19
7696	1672	50				3817	if ( $tok eq '->' ) { $i_beg-- }
	0					0
7697	1672					2995	my $pos_beg = $rtoken_map->[$i_beg];
7698	1672					4921	pos($input_line) = $pos_beg;
7699
7700							# Examples:
7701							# A::B::C
7702							# A::
7703							# ::A
7704							# A'B
7705	1672	50				12484	if ( $input_line =~ m/\G\s((?:\w(?:'\|::)))*(?:(?:->)?(\w+))?/gc ) {
7706
7707	1672					3345	my $pos = pos($input_line);
7708	1672					2842	my $numc = $pos - $pos_beg;
7709	1672					3597	$tok = substr( $input_line, $pos_beg, $numc );
7710
7711							# type 'w' includes anything without leading type info
7712							# ($,%,@,*) including something like abc::def::ghi
7713	1672					2837	$type = 'w';
7714
7715	1672					2784	my $sub_name = EMPTY_STRING;
7716	1672	100				4756	if ( defined($2) ) { $sub_name = $2; }
	1667					3264
7717	1672	100				4212	if ( defined($1) ) {
7718	274					639	$package = $1;
7719
7720							# patch: don't allow isolated package name which just ends
7721							# in the old style package separator (single quote). Example:
7722							# use CGI':all';
7723	274	50	66			1097	if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
7724	0					0	$pos--;
7725							}
7726
7727	274					783	$package =~ s/\'/::/g;
7728	274	100				828	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	9					23
7729	274					1224	$package =~ s/::$//;
7730							}
7731							else {
7732	1398					2695	$package = $current_package;
7733
7734							# patched for c043, part 1: keyword does not follow '->'
7735	1398	50	66			5374	if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) {
7736	0					0	$type = 'k';
7737							}
7738							}
7739
7740							# if it is a bareword.. patched for c043, part 2: not following '->'
7741	1672	100	66			8080	if ( $type eq 'w' && $last_nonblank_type ne '->' ) {
7742
7743							# check for v-string with leading 'v' type character
7744							# (This seems to have precedence over filehandle, type 'Y')
7745	1003	100	66			14264	if ( $tok =~ /^v\d[_\d]*$/ ) {
		100	66
		100	66
		50
		50
		100
		100
7746
7747							# we only have the first part - something like 'v101' -
7748							# look for more
7749	2	50				16	if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
7750	2					8	$pos = pos($input_line);
7751	2					4	$numc = $pos - $pos_beg;
7752	2					5	$tok = substr( $input_line, $pos_beg, $numc );
7753							}
7754	2					8	$type = 'v';
7755
7756							# warn if this version can't handle v-strings
7757	2					16	$self->report_v_string($tok);
7758							}
7759
7760							elsif ( $ris_constant->{$package}{$sub_name} ) {
7761	12					39	$type = 'C';
7762							}
7763
7764							# bareword after sort has implied empty prototype; for example:
7765							# @sorted = sort numerically ( 53, 29, 11, 32, 7 );
7766							# This has priority over whatever the user has specified.
7767							elsif ($last_nonblank_token eq 'sort'
7768							&& $last_nonblank_type eq 'k' )
7769							{
7770	1					3	$type = 'Z';
7771							}
7772
7773							# Note: strangely, perl does not seem to really let you create
7774							# functions which act like eval and do, in the sense that eval
7775							# and do may have operators following the final }, but any operators
7776							# that you create with prototype (&) apparently do not allow
7777							# trailing operators, only terms. This seems strange.
7778							# If this ever changes, here is the update
7779							# to make perltidy behave accordingly:
7780
7781							# elsif ( $ris_block_function->{$package}{$tok} ) {
7782							# $tok='eval'; # patch to do braces like eval - doesn't work
7783							# $type = 'k';
7784							#}
7785							# TODO: This could become a separate type to allow for different
7786							# future behavior:
7787							elsif ( $ris_block_function->{$package}{$sub_name} ) {
7788	0					0	$type = 'G';
7789							}
7790							elsif ( $ris_block_list_function->{$package}{$sub_name} ) {
7791	0					0	$type = 'G';
7792							}
7793							elsif ( $ris_user_function->{$package}{$sub_name} ) {
7794	6					14	$type = 'U';
7795	6					433	$prototype = $ruser_function_prototype->{$package}{$sub_name};
7796							}
7797
7798							# check for indirect object
7799							elsif (
7800
7801							# added 2001-03-27: must not be followed immediately by '('
7802							# see fhandle.t
7803							( $input_line !~ m/\G\(/gc )
7804
7805							# and
7806							&& (
7807
7808							# preceded by keyword like 'print', 'printf' and friends
7809							$is_indirect_object_taker{$last_nonblank_token}
7810
7811							# or preceded by something like 'print(' or 'printf('
7812							\|\| (
7813							( $last_nonblank_token eq '(' )
7814							&& $is_indirect_object_taker{
7815							$rparen_type->[$paren_depth]
7816							}
7817
7818							)
7819							)
7820							)
7821							{
7822
7823							# may not be indirect object unless followed by a space;
7824							# updated 2021-01-16 to consider newline to be a space.
7825							# updated for case b990 to look for either ';' or space
7826	4	50	33			63	if ( pos($input_line) == length($input_line)
7827							\|\| $input_line =~ m/\G[;\s]/gc )
7828							{
7829	4					14	$type = 'Y';
7830
7831							# Abandon Hope ...
7832							# Perl's indirect object notation is a very bad
7833							# thing and can cause subtle bugs, especially for
7834							# beginning programmers. And I haven't even been
7835							# able to figure out a sane warning scheme which
7836							# doesn't get in the way of good scripts.
7837
7838							# Complain if a filehandle has any lower case
7839							# letters. This is suggested good practice.
7840							# Use 'sub_name' because something like
7841							# main::MYHANDLE is ok for filehandle
7842	4	100				23	if ( $sub_name =~ /[a-z]/ ) {
7843
7844							# could be bug caused by older perltidy if
7845							# followed by '('
7846	1	50				12	if ( $input_line =~ m/\G\s*\(/gc ) {
7847	1					9	$self->complain(
7848							"Caution: unknown word '$tok' in indirect object slot\n"
7849							);
7850							}
7851							}
7852							}
7853
7854							# bareword not followed by a space -- may not be filehandle
7855							# (may be function call defined in a 'use' statement)
7856							else {
7857	0					0	$type = 'Z';
7858							}
7859							}
7860							}
7861
7862							# Now we must convert back from character position
7863							# to pre_token index.
7864							# I don't think an error flag can occur here ..but who knows
7865	1672					2854	my $error;
7866	1672					4811	( $i, $error ) =
7867							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
7868	1672	50				4629	if ($error) {
7869	0					0	$self->warning(
7870							"scan_bare_identifier: Possibly invalid tokenization\n");
7871							}
7872							}
7873
7874							# no match but line not blank - could be syntax error
7875							# perl will take '::' alone without complaint
7876							else {
7877	0					0	$type = 'w';
7878
7879							# change this warning to log message if it becomes annoying
7880	0					0	$self->warning("didn't find identifier after leading ::\n");
7881							}
7882	1672					7089	return ( $i, $tok, $type, $prototype );
7883							} ## end sub scan_bare_identifier_do
7884
7885							sub scan_id_do {
7886
7887							# This is the new scanner and will eventually replace scan_identifier.
7888							# Only type 'sub' and 'package' are implemented.
7889							# Token types $ * % @ & -> are not yet implemented.
7890							#
7891							# Scan identifier following a type token.
7892							# The type of call depends on $id_scan_state: $id_scan_state = ''
7893							# for starting call, in which case $tok must be the token defining
7894							# the type.
7895							#
7896							# If the type token is the last nonblank token on the line, a value
7897							# of $id_scan_state = $tok is returned, indicating that further
7898							# calls must be made to get the identifier. If the type token is
7899							# not the last nonblank token on the line, the identifier is
7900							# scanned and handled and a value of '' is returned.
7901
7902	330			330	0	1189	my ( $self, $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
7903							$max_token_index )
7904							= @_;
7905	38			38		410	use constant DEBUG_NSCAN => 0;
	38					133
	38					46761
7906	330					673	my $type = EMPTY_STRING;
7907	330					669	my ( $i_beg, $pos_beg );
7908
7909							#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
7910							#my ($a,$b,$c) = caller;
7911							#print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
7912
7913							# on re-entry, start scanning at first token on the line
7914	330	100				825	if ($id_scan_state) {
7915	10					39	$i_beg = $i;
7916	10					40	$type = EMPTY_STRING;
7917							}
7918
7919							# on initial entry, start scanning just after type token
7920							else {
7921	320					594	$i_beg = $i + 1;
7922	320					547	$id_scan_state = $tok;
7923	320					693	$type = 't';
7924							}
7925
7926							# find $i_beg = index of next nonblank token,
7927							# and handle empty lines
7928	330					573	my $blank_line = 0;
7929	330					736	my $next_nonblank_token = $rtokens->[$i_beg];
7930	330	100				950	if ( $i_beg > $max_token_index ) {
7931	2					5	$blank_line = 1;
7932							}
7933							else {
7934
7935							# only a '#' immediately after a '$' is not a comment
7936	328	50				1004	if ( $next_nonblank_token eq '#' ) {
7937	0	0				0	unless ( $tok eq '$' ) {
7938	0					0	$blank_line = 1;
7939							}
7940							}
7941
7942	328	100				1545	if ( $next_nonblank_token =~ /^\s/ ) {
7943	308					1152	( $next_nonblank_token, $i_beg ) =
7944							find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
7945							$max_token_index );
7946	308	100				1714	if ( $next_nonblank_token =~ /(^#\|^\s*$)/ ) {
7947	4					16	$blank_line = 1;
7948							}
7949							}
7950							}
7951
7952							# handle non-blank line; identifier, if any, must follow
7953	330	100				1006	unless ($blank_line) {
7954
7955	324	100				906	if ( $is_sub{$id_scan_state} ) {
		50
7956	298					3324	( $i, $tok, $type, $id_scan_state ) = $self->do_scan_sub(
7957							{
7958							input_line => $input_line,
7959							i => $i,
7960							i_beg => $i_beg,
7961							tok => $tok,
7962							type => $type,
7963							rtokens => $rtokens,
7964							rtoken_map => $rtoken_map,
7965							id_scan_state => $id_scan_state,
7966							max_token_index => $max_token_index,
7967							}
7968							);
7969							}
7970
7971							elsif ( $is_package{$id_scan_state} ) {
7972	26					104	( $i, $tok, $type ) =
7973							$self->do_scan_package( $input_line, $i, $i_beg, $tok, $type,
7974							$rtokens, $rtoken_map, $max_token_index );
7975	26					61	$id_scan_state = EMPTY_STRING;
7976							}
7977
7978							else {
7979	0					0	$self->warning("invalid token in scan_id: $tok\n");
7980	0					0	$id_scan_state = EMPTY_STRING;
7981							}
7982							}
7983
7984	330	50	33			1964	if ( $id_scan_state && ( !defined($type) \|\| !$type ) ) {
			66
7985
7986							# shouldn't happen:
7987	0					0	if (DEVEL_MODE) {
7988							$self->Fault(<<EOM);
7989							Program bug in scan_id: undefined type but scan_state=$id_scan_state
7990							EOM
7991							}
7992							$self->warning(
7993	0					0	"Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n"
7994							);
7995	0					0	$self->report_definite_bug();
7996							}
7997
7998	330					545	DEBUG_NSCAN && do {
7999							print STDOUT
8000							"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
8001							};
8002	330					1414	return ( $i, $tok, $type, $id_scan_state );
8003							} ## end sub scan_id_do
8004
8005							sub check_prototype {
8006	136			136	0	434	my ( $proto, $package, $subname ) = @_;
8007	136	50	33			794	return unless ( defined($package) && defined($subname) );
8008	136	100				423	if ( defined($proto) ) {
8009	34					202	$proto =~ s/^\s\(\s//;
8010	34					157	$proto =~ s/\s*\)$//;
8011	34	100				85	if ($proto) {
8012	5					24	$ris_user_function->{$package}{$subname} = 1;
8013	5					26	$ruser_function_prototype->{$package}{$subname} = "($proto)";
8014
8015							# prototypes containing '&' must be treated specially..
8016	5	100				23	if ( $proto =~ /\&/ ) {
8017
8018							# right curly braces of prototypes ending in
8019							# '&' may be followed by an operator
8020	1	50				10	if ( $proto =~ /\&$/ ) {
		50
8021	0					0	$ris_block_function->{$package}{$subname} = 1;
8022							}
8023
8024							# right curly braces of prototypes NOT ending in
8025							# '&' may NOT be followed by an operator
8026							elsif ( $proto !~ /\&$/ ) {
8027	1					4	$ris_block_list_function->{$package}{$subname} = 1;
8028							}
8029							}
8030							}
8031							else {
8032	29					113	$ris_constant->{$package}{$subname} = 1;
8033							}
8034							}
8035							else {
8036	102					325	$ris_user_function->{$package}{$subname} = 1;
8037							}
8038	136					304	return;
8039							} ## end sub check_prototype
8040
8041							sub do_scan_package {
8042
8043							# do_scan_package parses a package name
8044							# it is called with $i_beg equal to the index of the first nonblank
8045							# token following a 'package' token.
8046							# USES GLOBAL VARIABLES: $current_package,
8047
8048							# package NAMESPACE
8049							# package NAMESPACE VERSION
8050							# package NAMESPACE BLOCK
8051							# package NAMESPACE VERSION BLOCK
8052							#
8053							# If VERSION is provided, package sets the $VERSION variable in the given
8054							# namespace to a version object with the VERSION provided. VERSION must be
8055							# a "strict" style version number as defined by the version module: a
8056							# positive decimal number (integer or decimal-fraction) without
8057							# exponentiation or else a dotted-decimal v-string with a leading 'v'
8058							# character and at least three components.
8059							# reference http://perldoc.perl.org/functions/package.html
8060
8061							my (
8062	26			26	0	90	$self, $input_line, $i,
8063							$i_beg, $tok, $type,
8064							$rtokens, $rtoken_map, $max_token_index
8065							) = @_;
8066	26					52	my $package = undef;
8067	26					52	my $pos_beg = $rtoken_map->[$i_beg];
8068	26					87	pos($input_line) = $pos_beg;
8069
8070							# handle non-blank line; package name, if any, must follow
8071	26	50				221	if ( $input_line =~ m/\G\s((?:\w(?:'\|::))\w)/gc ) {
8072	26					69	$package = $1;
8073	26	50	33			157	$package = ( defined($1) && $1 ) ? $1 : 'main';
8074	26					75	$package =~ s/\'/::/g;
8075	26	50				82	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	0					0
8076	26					58	$package =~ s/::$//;
8077	26					56	my $pos = pos($input_line);
8078	26					57	my $numc = $pos - $pos_beg;
8079	26					82	$tok = 'package ' . substr( $input_line, $pos_beg, $numc );
8080	26					55	$type = 'i';
8081
8082							# Now we must convert back from character position
8083							# to pre_token index.
8084							# I don't think an error flag can occur here ..but ?
8085	26					44	my $error;
8086	26					97	( $i, $error ) =
8087							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
8088	26	50				87	if ($error) { $self->warning("Possibly invalid package\n") }
	0					0
8089	26					70	$current_package = $package;
8090
8091							# we should now have package NAMESPACE
8092							# now expecting VERSION, BLOCK, or ; to follow ...
8093							# package NAMESPACE VERSION
8094							# package NAMESPACE BLOCK
8095							# package NAMESPACE VERSION BLOCK
8096	26					81	my ( $next_nonblank_token, $i_next ) =
8097							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
8098
8099							# check that something recognizable follows, but do not parse.
8100							# A VERSION number will be parsed later as a number or v-string in the
8101							# normal way. What is important is to set the statement type if
8102							# everything looks okay so that the operator_expected() routine
8103							# knows that the number is in a package statement.
8104							# Examples of valid primitive tokens that might follow are:
8105							# 1235 . ; { } v3 v
8106							# FIX: added a '#' since a side comment may also follow
8107							# Added ':' for class attributes (for --use-feature=class, rt145706)
8108	26	50				120	if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#\:])\|v\d\|\d+$/ ) {
8109	26					72	$statement_type = $tok;
8110							}
8111							else {
8112	0					0	$self->warning(
8113							"Unexpected '$next_nonblank_token' after package name '$tok'\n"
8114							);
8115							}
8116							}
8117
8118							# no match but line not blank --
8119							# could be a label with name package, like package: , for example.
8120							else {
8121	0					0	$type = 'k';
8122							}
8123
8124	26					103	return ( $i, $tok, $type );
8125							} ## end sub do_scan_package
8126
8127							{ ## begin closure for sub scan_complex_identifier
8128
8129	38			38		371	use constant DEBUG_SCAN_ID => 0;
	38					141
	38					5293
8130
8131							# Constant hash:
8132							my %is_special_variable_char;
8133
8134							BEGIN {
8135
8136							# These are the only characters which can (currently) form special
8137							# variables, like $^W: (issue c066).
8138	38			38		369	my @q =
8139							qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
8140	38					137681	@{is_special_variable_char}{@q} = (1) x scalar(@q);
8141							} ## end BEGIN
8142
8143							# These are the possible states for this scanner:
8144							my $scan_state_SIGIL = '$';
8145							my $scan_state_ALPHA = 'A';
8146							my $scan_state_COLON = ':';
8147							my $scan_state_LPAREN = '(';
8148							my $scan_state_RPAREN = ')';
8149							my $scan_state_AMPERSAND = '&';
8150							my $scan_state_SPLIT = '^';
8151
8152							# Only these non-blank states may be returned to caller:
8153							my %is_returnable_scan_state = (
8154							$scan_state_SIGIL => 1,
8155							$scan_state_AMPERSAND => 1,
8156							);
8157
8158							# USES GLOBAL VARIABLES:
8159							# $context, $last_nonblank_token, $last_nonblank_type
8160
8161							#-----------
8162							# call args:
8163							#-----------
8164							my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
8165							$expecting, $container_type );
8166
8167							#-------------------------------------------
8168							# my variables, re-initialized on each call:
8169							#-------------------------------------------
8170							my $i_begin; # starting index $i
8171							my $type; # returned identifier type
8172							my $tok_begin; # starting token
8173							my $tok; # returned token
8174							my $id_scan_state_begin; # starting scan state
8175							my $identifier_begin; # starting identifier
8176							my $i_save; # a last good index, in case of error
8177							my $message; # hold error message for log file
8178							my $tok_is_blank;
8179							my $last_tok_is_blank;
8180							my $in_prototype_or_signature;
8181							my $saw_alpha;
8182							my $saw_type;
8183							my $allow_tick;
8184
8185							sub initialize_my_scan_id_vars {
8186
8187							# Initialize all 'my' vars on entry
8188	486			486	0	883	$i_begin = $i;
8189	486					871	$type = EMPTY_STRING;
8190	486					995	$tok_begin = $rtokens->[$i_begin];
8191	486					854	$tok = $tok_begin;
8192	486	50				1369	if ( $tok_begin eq ':' ) { $tok_begin = '::' }
	0					0
8193	486					890	$id_scan_state_begin = $id_scan_state;
8194	486					812	$identifier_begin = $identifier;
8195	486					816	$i_save = undef;
8196
8197	486					888	$message = EMPTY_STRING;
8198	486					864	$tok_is_blank = undef; # a flag to speed things up
8199	486					802	$last_tok_is_blank = undef;
8200
8201	486		100			1702	$in_prototype_or_signature =
8202							$container_type && $container_type =~ /^sub\b/;
8203
8204							# these flags will be used to help figure out the type:
8205	486					794	$saw_alpha = undef;
8206	486					762	$saw_type = undef;
8207
8208							# allow old package separator (') except in 'use' statement
8209	486					904	$allow_tick = ( $last_nonblank_token ne 'use' );
8210	486					842	return;
8211							} ## end sub initialize_my_scan_id_vars
8212
8213							#----------------------------------
8214							# Routines for handling scan states
8215							#----------------------------------
8216							sub do_id_scan_state_dollar {
8217
8218	514			514	0	889	my $self = shift;
8219
8220							# We saw a sigil, now looking to start a variable name
8221	514	100	66			4149	if ( $tok eq '$' ) {
		100	33
		100
		50
		50
		100
		100
		100
		100
8222
8223	31					144	$identifier .= $tok;
8224
8225							# we've got a punctuation variable if end of line (punct.t)
8226	31	50				161	if ( $i == $max_token_index ) {
8227	0					0	$type = 'i';
8228	0					0	$id_scan_state = EMPTY_STRING;
8229							}
8230							}
8231							elsif ( $tok =~ /^\w/ ) { # alphanumeric ..
8232	253					489	$saw_alpha = 1;
8233	253					495	$identifier .= $tok;
8234
8235							# now need :: except for special digit vars like '$1' (c208)
8236	253	100				862	$id_scan_state = $tok =~ /^\d/ ? EMPTY_STRING : $scan_state_COLON;
8237							}
8238							elsif ( $tok eq '::' ) {
8239	16					92	$id_scan_state = $scan_state_ALPHA;
8240	16					61	$identifier .= $tok;
8241							}
8242
8243							# POSTDEFREF ->@ ->% ->& ->*
8244							elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
8245	0					0	$identifier .= $tok;
8246							}
8247							elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
8248	0					0	$saw_alpha = 1;
8249	0					0	$id_scan_state = $scan_state_COLON; # now need ::
8250	0					0	$identifier .= $tok;
8251
8252							# Perl will accept leading digits in identifiers,
8253							# although they may not always produce useful results.
8254							# Something like $main::0 is ok. But this also works:
8255							#
8256							# sub howdy::123::bubba{ print "bubba $54321!\n" }
8257							# howdy::123::bubba();
8258							#
8259							}
8260							elsif ( $tok eq '#' ) {
8261
8262	99					256	my $is_punct_var = $identifier eq '$$';
8263
8264							# side comment or identifier?
8265	99	100	66			2034	if (
			66
			66
			33
8266
8267							# A '#' starts a comment if it follows a space. For example,
8268							# the following is equivalent to $ans=40.
8269							# my $ #
8270							# ans = 40;
8271							!$last_tok_is_blank
8272
8273							# a # inside a prototype or signature can only start a
8274							# comment
8275							&& !$in_prototype_or_signature
8276
8277							# these are valid punctuation vars: *# %# @# $#
8278							# May also be '$#array' or POSTDEFREF ->$#
8279							&& ( $identifier =~ /^[\%\@\$\*]$/
8280							\|\| $identifier =~ /\$$/ )
8281
8282							# but a '#' after '$$' is a side comment; see c147
8283							&& !$is_punct_var
8284
8285							)
8286							{
8287	95					275	$identifier .= $tok; # keep same state, a $ could follow
8288							}
8289							else {
8290
8291							# otherwise it is a side comment
8292	4	50				21	if ( $identifier eq '->' ) { }
		50
		50
8293	0					0	elsif ($is_punct_var) { $type = 'i' }
8294	4					6	elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' }
8295	0					0	else { $type = 'i' }
8296	4					7	$i = $i_save;
8297	4					10	$id_scan_state = EMPTY_STRING;
8298							}
8299							}
8300
8301							elsif ( $tok eq '{' ) {
8302
8303							# check for something like ${#} or ${?}, where ? is a special char
8304	38	100	100			539	if (
			66
			100
			100
8305							(
8306							$identifier eq '$'
8307							\|\| $identifier eq '@'
8308							\|\| $identifier eq '$#'
8309							)
8310							&& $i + 2 <= $max_token_index
8311							&& $rtokens->[ $i + 2 ] eq '}'
8312							&& $rtokens->[ $i + 1 ] !~ /[\s\w]/
8313							)
8314							{
8315	1					4	my $next2 = $rtokens->[ $i + 2 ];
8316	1					3	my $next1 = $rtokens->[ $i + 1 ];
8317	1					3	$identifier .= $tok . $next1 . $next2;
8318	1					3	$i += 2;
8319	1					2	$id_scan_state = EMPTY_STRING;
8320							}
8321							else {
8322
8323							# skip something like ${xxx} or ->{
8324	37					92	$id_scan_state = EMPTY_STRING;
8325
8326							# if this is the first token of a line, any tokens for this
8327							# identifier have already been accumulated
8328	37	100	66			180	if ( $identifier eq '$' \|\| $i == 0 ) {
8329	26					52	$identifier = EMPTY_STRING;
8330							}
8331	37					80	$i = $i_save;
8332							}
8333							}
8334
8335							# space ok after leading $ % * & @
8336							elsif ( $tok =~ /^\s*$/ ) {
8337
8338	20					65	$tok_is_blank = 1;
8339
8340							# note: an id with a leading '&' does not actually come this way
8341	20	50				106	if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
		0
8342
8343	20	100				70	if ( length($identifier) > 1 ) {
8344	8					17	$id_scan_state = EMPTY_STRING;
8345	8					18	$i = $i_save;
8346	8					18	$type = 'i'; # probably punctuation variable
8347							}
8348							else {
8349
8350							# fix c139: trim line-ending type 't'
8351	12	100				59	if ( $i == $max_token_index ) {
		100
8352	1					3	$i = $i_save;
8353	1					3	$type = 't';
8354							}
8355
8356							# spaces after $'s are common, and space after @
8357							# is harmless, so only complain about space
8358							# after other type characters. Space after $ and
8359							# @ will be removed in formatting. Report space
8360							# after % and * because they might indicate a
8361							# parsing error. In other words '% ' might be a
8362							# modulo operator. Delete this warning if it
8363							# gets annoying.
8364							elsif ( $identifier !~ /^[\@\$]$/ ) {
8365	1					5	$message =
8366							"Space in identifier, following $identifier\n";
8367							}
8368							else {
8369							## ok: silently accept space after '$' and '@' sigils
8370							}
8371							}
8372							}
8373
8374							elsif ( $identifier eq '->' ) {
8375
8376							# space after '->' is ok except at line end ..
8377							# so trim line-ending in type '->' (fixes c139)
8378	0	0				0	if ( $i == $max_token_index ) {
8379	0					0	$i = $i_save;
8380	0					0	$type = '->';
8381							}
8382							}
8383
8384							# stop at space after something other than -> or sigil
8385							# Example of what can arrive here:
8386							# eval { $MyClass->$$ };
8387							else {
8388	0					0	$id_scan_state = EMPTY_STRING;
8389	0					0	$i = $i_save;
8390	0					0	$type = 'i';
8391							}
8392							}
8393							elsif ( $tok eq '^' ) {
8394
8395							# check for some special variables like $^ $^W
8396	11	50				47	if ( $identifier =~ /^[\$\*\@\%]$/ ) {
8397	11					34	$identifier .= $tok;
8398	11					25	$type = 'i';
8399
8400							# There may be one more character, not a space, after the ^
8401	11					31	my $next1 = $rtokens->[ $i + 1 ];
8402	11					28	my $chr = substr( $next1, 0, 1 );
8403	11	100				47	if ( $is_special_variable_char{$chr} ) {
8404
8405							# It is something like $^W
8406							# Test case (c066) : $^Oeq'linux'
8407	9					20	$i++;
8408	9					19	$identifier .= $next1;
8409
8410							# If pretoken $next1 is more than one character long,
8411							# set a flag indicating that it needs to be split.
8412	9	100				39	$id_scan_state =
8413							( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
8414							}
8415							else {
8416
8417							# it is just $^
8418							# Simple test case (c065): '$aa=$^if($bb)';
8419	2					4	$id_scan_state = EMPTY_STRING;
8420							}
8421							}
8422							else {
8423	0					0	$id_scan_state = EMPTY_STRING;
8424	0					0	$i = $i_save;
8425							}
8426							}
8427							else { # something else
8428
8429	46	100	66			428	if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) {
		100	66
		100
		50
		0
		0
8430
8431							# We might be in an extrusion of
8432							# sub foo2 ( $first, $, $third ) {
8433							# looking at a line starting with a comma, like
8434							# $
8435							# ,
8436							# in this case the comma ends the signature variable
8437							# '$' which will have been previously marked type 't'
8438							# rather than 'i'.
8439	3	100				10	if ( $i == $i_begin ) {
8440	1					10	$identifier = EMPTY_STRING;
8441	1					4	$type = EMPTY_STRING;
8442							}
8443
8444							# at a # we have to mark as type 't' because more may
8445							# follow, otherwise, in a signature we can let '$' be an
8446							# identifier here for better formatting.
8447							# See 'mangle4.in' for a test case.
8448							else {
8449	2					4	$type = 'i';
8450	2	50	33			12	if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) {
8451	0					0	$type = 't';
8452							}
8453	2					6	$i = $i_save;
8454							}
8455	3					5	$id_scan_state = EMPTY_STRING;
8456							}
8457
8458							# check for various punctuation variables
8459							elsif ( $identifier =~ /^[\$\*\@\%]$/ ) {
8460	35					117	$identifier .= $tok;
8461							}
8462
8463							# POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
8464							elsif ($tok eq '*'
8465							&& $identifier =~ /\-\>([\@\%\$\*\&]\|\$\#)$/ )
8466							{
8467	6					13	$identifier .= $tok;
8468							}
8469
8470							elsif ( $identifier eq '$#' ) {
8471
8472	2	50				12	if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
	0	50				0
	0					0
8473
8474							# perl seems to allow just these: $#: $#- $#+
8475							elsif ( $tok =~ /^[\:\-\+]$/ ) {
8476	0					0	$type = 'i';
8477	0					0	$identifier .= $tok;
8478							}
8479							else {
8480	2					6	$i = $i_save;
8481	2					28	$self->write_logfile_entry(
8482							'Use of $# is deprecated' . "\n" );
8483							}
8484							}
8485							elsif ( $identifier eq '$$' ) {
8486
8487							# perl does not allow references to punctuation
8488							# variables without braces. For example, this
8489							# won't work:
8490							# $:=\4;
8491							# $a = $$:;
8492							# You would have to use
8493							# $a = ${$:};
8494
8495							# '$$' alone is punctuation variable for PID
8496	0					0	$i = $i_save;
8497	0	0				0	if ( $tok eq '{' ) { $type = 't' }
	0					0
8498	0					0	else { $type = 'i' }
8499							}
8500							elsif ( $identifier eq '->' ) {
8501	0					0	$i = $i_save;
8502							}
8503							else {
8504	0					0	$i = $i_save;
8505	0	0				0	if ( length($identifier) == 1 ) {
8506	0					0	$identifier = EMPTY_STRING;
8507							}
8508							}
8509	46					109	$id_scan_state = EMPTY_STRING;
8510							}
8511	514					973	return;
8512							} ## end sub do_id_scan_state_dollar
8513
8514							sub do_id_scan_state_alpha {
8515
8516	113			113	0	260	my $self = shift;
8517
8518							# looking for alphanumeric after ::
8519	113					418	$tok_is_blank = $tok =~ /^\s*$/;
8520
8521	113	100	33			520	if ( $tok =~ /^\w/ ) { # found it
		50	66
		50	33
		50
8522	100					201	$identifier .= $tok;
8523	100					174	$id_scan_state = $scan_state_COLON; # now need ::
8524	100					173	$saw_alpha = 1;
8525							}
8526							elsif ( $tok eq "'" && $allow_tick ) {
8527	0					0	$identifier .= $tok;
8528	0					0	$id_scan_state = $scan_state_COLON; # now need ::
8529	0					0	$saw_alpha = 1;
8530							}
8531							elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
8532	0					0	$id_scan_state = $scan_state_LPAREN;
8533	0					0	$identifier .= $tok;
8534							}
8535							elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
8536	0					0	$id_scan_state = $scan_state_RPAREN;
8537	0					0	$identifier .= $tok;
8538							}
8539							else {
8540	13					26	$id_scan_state = EMPTY_STRING;
8541	13					21	$i = $i_save;
8542							}
8543	113					191	return;
8544							} ## end sub do_id_scan_state_alpha
8545
8546							sub do_id_scan_state_colon {
8547
8548	434			434	0	816	my $self = shift;
8549
8550							# looking for possible :: after alphanumeric
8551
8552	434					1517	$tok_is_blank = $tok =~ /^\s*$/;
8553
8554	434	100	66			3722	if ( $tok eq '::' ) { # got it
		100	66
		100	66
		50
		50
8555	97					184	$identifier .= $tok;
8556	97					164	$id_scan_state = $scan_state_ALPHA; # now require alpha
8557							}
8558							elsif ( $tok =~ /^\w/ ) { # more alphanumeric is ok here
8559	20					66	$identifier .= $tok;
8560	20					46	$id_scan_state = $scan_state_COLON; # now need ::
8561	20					36	$saw_alpha = 1;
8562							}
8563							elsif ( $tok eq "'" && $allow_tick ) { # tick
8564
8565	12	50				30	if ( $is_keyword{$identifier} ) {
8566	0					0	$id_scan_state = EMPTY_STRING; # that's all
8567	0					0	$i = $i_save;
8568							}
8569							else {
8570	12					23	$identifier .= $tok;
8571							}
8572							}
8573							elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
8574	0					0	$id_scan_state = $scan_state_LPAREN;
8575	0					0	$identifier .= $tok;
8576							}
8577							elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
8578	0					0	$id_scan_state = $scan_state_RPAREN;
8579	0					0	$identifier .= $tok;
8580							}
8581							else {
8582	305					609	$id_scan_state = EMPTY_STRING; # that's all
8583	305					511	$i = $i_save;
8584							}
8585	434					747	return;
8586							} ## end sub do_id_scan_state_colon
8587
8588							sub do_id_scan_state_left_paren {
8589
8590	0			0	0	0	my $self = shift;
8591
8592							# looking for possible '(' of a prototype
8593
8594	0	0				0	if ( $tok eq '(' ) { # got it
		0
8595	0					0	$identifier .= $tok;
8596	0					0	$id_scan_state = $scan_state_RPAREN; # now find the end of it
8597							}
8598							elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
8599	0					0	$identifier .= $tok;
8600	0					0	$tok_is_blank = 1;
8601							}
8602							else {
8603	0					0	$id_scan_state = EMPTY_STRING; # that's all - no prototype
8604	0					0	$i = $i_save;
8605							}
8606	0					0	return;
8607							} ## end sub do_id_scan_state_left_paren
8608
8609							sub do_id_scan_state_right_paren {
8610
8611	0			0	0	0	my $self = shift;
8612
8613							# looking for a ')' of prototype to close a '('
8614
8615	0					0	$tok_is_blank = $tok =~ /^\s*$/;
8616
8617	0	0				0	if ( $tok eq ')' ) { # got it
		0
8618	0					0	$identifier .= $tok;
8619	0					0	$id_scan_state = EMPTY_STRING; # all done
8620							}
8621							elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
8622	0					0	$identifier .= $tok;
8623							}
8624							else { # probable error in script, but keep going
8625	0					0	warning("Unexpected '$tok' while seeking end of prototype\n");
8626	0					0	$identifier .= $tok;
8627							}
8628	0					0	return;
8629							} ## end sub do_id_scan_state_right_paren
8630
8631							sub do_id_scan_state_ampersand {
8632
8633	105			105	0	277	my $self = shift;
8634
8635							# Starting sub call after seeing an '&'
8636
8637	105	100	33			673	if ( $tok =~ /^[\$\w]/ ) { # alphanumeric ..
		50
		100
		50
		50
		0
8638	88					200	$id_scan_state = $scan_state_COLON; # now need ::
8639	88					169	$saw_alpha = 1;
8640	88					193	$identifier .= $tok;
8641							}
8642							elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
8643	0					0	$id_scan_state = $scan_state_COLON; # now need ::
8644	0					0	$saw_alpha = 1;
8645	0					0	$identifier .= $tok;
8646							}
8647							elsif ( $tok =~ /^\s*$/ ) { # allow space
8648	2					5	$tok_is_blank = 1;
8649
8650							# fix c139: trim line-ending type 't'
8651	2	50	33			12	if ( length($identifier) == 1 && $i == $max_token_index ) {
8652	2					3	$i = $i_save;
8653	2					5	$type = 't';
8654							}
8655							}
8656							elsif ( $tok eq '::' ) { # leading ::
8657	0					0	$id_scan_state = $scan_state_ALPHA; # accept alpha next
8658	0					0	$identifier .= $tok;
8659							}
8660							elsif ( $tok eq '{' ) {
8661	15	50	33			66	if ( $identifier eq '&' \|\| $i == 0 ) {
8662	15					42	$identifier = EMPTY_STRING;
8663							}
8664	15					34	$i = $i_save;
8665	15					31	$id_scan_state = EMPTY_STRING;
8666							}
8667							elsif ( $tok eq '^' ) {
8668	0	0				0	if ( $identifier eq '&' ) {
8669
8670							# Special variable (c066)
8671	0					0	$identifier .= $tok;
8672	0					0	$type = '&';
8673
8674							# There may be one more character, not a space, after the ^
8675	0					0	my $next1 = $rtokens->[ $i + 1 ];
8676	0					0	my $chr = substr( $next1, 0, 1 );
8677	0	0				0	if ( $is_special_variable_char{$chr} ) {
8678
8679							# It is something like &^O
8680	0					0	$i++;
8681	0					0	$identifier .= $next1;
8682
8683							# If pretoken $next1 is more than one character long,
8684							# set a flag indicating that it needs to be split.
8685	0	0				0	$id_scan_state =
8686							( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
8687							}
8688							else {
8689
8690							# it is &^
8691	0					0	$id_scan_state = EMPTY_STRING;
8692							}
8693							}
8694							else {
8695	0					0	$identifier = EMPTY_STRING;
8696	0					0	$i = $i_save;
8697							}
8698							}
8699							else {
8700
8701							# punctuation variable?
8702							# testfile: cunningham4.pl
8703							#
8704							# We have to be careful here. If we are in an unknown state,
8705							# we will reject the punctuation variable. In the following
8706							# example the '&' is a binary operator but we are in an unknown
8707							# state because there is no sigil on 'Prima', so we don't
8708							# know what it is. But it is a bad guess that
8709							# '&~' is a function variable.
8710							# $self->{text}->{colorMap}->[
8711							# Prima::PodView::COLOR_CODE_FOREGROUND
8712							# & ~tb::COLOR_INDEX ] =
8713							# $sec->{ColorCode}
8714
8715							# Fix for case c033: a '#' here starts a side comment
8716	0	0	0			0	if ( $identifier eq '&' && $expecting && $tok ne '#' ) {
			0
8717	0					0	$identifier .= $tok;
8718							}
8719							else {
8720	0					0	$identifier = EMPTY_STRING;
8721	0					0	$i = $i_save;
8722	0					0	$type = '&';
8723							}
8724	0					0	$id_scan_state = EMPTY_STRING;
8725							}
8726	105					203	return;
8727							} ## end sub do_id_scan_state_ampersand
8728
8729							#-------------------
8730							# hash of scanner subs
8731							#-------------------
8732							my $scan_identifier_code = {
8733							$scan_state_SIGIL => \&do_id_scan_state_dollar,
8734							$scan_state_ALPHA => \&do_id_scan_state_alpha,
8735							$scan_state_COLON => \&do_id_scan_state_colon,
8736							$scan_state_LPAREN => \&do_id_scan_state_left_paren,
8737							$scan_state_RPAREN => \&do_id_scan_state_right_paren,
8738							$scan_state_AMPERSAND => \&do_id_scan_state_ampersand,
8739							};
8740
8741							sub scan_complex_identifier {
8742
8743							# This routine assembles tokens into identifiers. It maintains a
8744							# scan state, id_scan_state. It updates id_scan_state based upon
8745							# current id_scan_state and token, and returns an updated
8746							# id_scan_state and the next index after the identifier.
8747
8748							# This routine now serves a a backup for sub scan_simple_identifier
8749							# which handles most identifiers.
8750
8751							# Note that $self must be a 'my' variable and not be a closure
8752							# variables like the other args. Otherwise it will not get
8753							# deleted by a DESTROY call at the end of a file. Then an
8754							# attempt to create multiple tokenizers can occur when multiple
8755							# files are processed, causing an error.
8756
8757							(
8758	486			486	0	2689	my $self, $i, $id_scan_state, $identifier, $rtokens,
8759							$max_token_index, $expecting, $container_type
8760							) = @_;
8761
8762							# return flag telling caller to split the pretoken
8763	486					2485	my $split_pretoken_flag;
8764
8765							#-------------------
8766							# Initialize my vars
8767							#-------------------
8768
8769	486					1628	initialize_my_scan_id_vars();
8770
8771							#--------------------------------------------------------
8772							# get started by defining a type and a state if necessary
8773							#--------------------------------------------------------
8774
8775	486	100				1286	if ( !$id_scan_state ) {
8776	479					843	$context = UNKNOWN_CONTEXT;
8777
8778							# fixup for digraph
8779	479	50				1273	if ( $tok eq '>' ) {
8780	0					0	$tok = '->';
8781	0					0	$tok_begin = $tok;
8782							}
8783	479					858	$identifier = $tok;
8784
8785	479	100	100			3030	if ( $last_nonblank_token eq '->' ) {
		100	100
		100	0
		50
		0
		0
		0
		0
8786	6					14	$identifier = '->' . $identifier;
8787	6					12	$id_scan_state = $scan_state_SIGIL;
8788							}
8789							elsif ( $tok eq '$' \|\| $tok eq '*' ) {
8790	293					564	$id_scan_state = $scan_state_SIGIL;
8791	293					529	$context = SCALAR_CONTEXT;
8792							}
8793							elsif ( $tok eq '%' \|\| $tok eq '@' ) {
8794	77					159	$id_scan_state = $scan_state_SIGIL;
8795	77					147	$context = LIST_CONTEXT;
8796							}
8797							elsif ( $tok eq '&' ) {
8798	103					220	$id_scan_state = $scan_state_AMPERSAND;
8799							}
8800							elsif ( $tok eq 'sub' or $tok eq 'package' ) {
8801	0					0	$saw_alpha = 0; # 'sub' is considered type info here
8802	0					0	$id_scan_state = $scan_state_SIGIL;
8803	0					0	$identifier .=
8804							SPACE; # need a space to separate sub from sub name
8805							}
8806							elsif ( $tok eq '::' ) {
8807	0					0	$id_scan_state = $scan_state_ALPHA;
8808							}
8809							elsif ( $tok =~ /^\w/ ) {
8810	0					0	$id_scan_state = $scan_state_COLON;
8811	0					0	$saw_alpha = 1;
8812							}
8813							elsif ( $tok eq '->' ) {
8814	0					0	$id_scan_state = $scan_state_SIGIL;
8815							}
8816							else {
8817
8818							# shouldn't happen: bad call parameter
8819	0					0	my $msg =
8820							"Program bug detected: scan_complex_identifier received bad starting token = '$tok'\n";
8821	0					0	if (DEVEL_MODE) { $self->Fault($msg) }
8822	0	0				0	if ( !$self->[_in_error_] ) {
8823	0					0	warning($msg);
8824	0					0	$self->[_in_error_] = 1;
8825							}
8826	0					0	$id_scan_state = EMPTY_STRING;
8827
8828							# emergency return
8829	0					0	goto RETURN;
8830							}
8831	479					912	$saw_type = !$saw_alpha;
8832							}
8833							else {
8834	7					22	$i--;
8835	7					32	$saw_alpha = ( $tok =~ /^\w/ );
8836	7					18	$saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
8837
8838							# check for a valid starting state
8839	7					16	if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) {
8840							$self->Fault(<<EOM);
8841							Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state'
8842							EOM
8843							}
8844							}
8845
8846							#------------------------------
8847							# loop to gather the identifier
8848							#------------------------------
8849
8850	486					861	$i_save = $i;
8851
8852	486		100			2188	while ( $i < $max_token_index && $id_scan_state ) {
8853
8854							# Be sure we have code to handle this state before we proceed
8855	1169					2648	my $code = $scan_identifier_code->{$id_scan_state};
8856	1169	100				2381	if ( !$code ) {
8857
8858	3	50				14	if ( $id_scan_state eq $scan_state_SPLIT ) {
8859							## OK: this is the signal to exit and split the pretoken
8860							}
8861
8862							# unknown state - should not happen
8863							else {
8864	0					0	if (DEVEL_MODE) {
8865							$self->Fault(<<EOM);
8866							Unknown scan state in sub scan_complex_identifier: '$id_scan_state'
8867							Scan state at sub entry was '$id_scan_state_begin'
8868							EOM
8869							}
8870	0					0	$id_scan_state = EMPTY_STRING;
8871	0					0	$i = $i_save;
8872							}
8873	3					6	last;
8874							}
8875
8876							# Remember the starting index for progress check below
8877	1166					1804	my $i_start_loop = $i;
8878
8879	1166					1941	$last_tok_is_blank = $tok_is_blank;
8880	1166	100				2092	if ($tok_is_blank) { $tok_is_blank = undef }
	11					29
8881	1155					1674	else { $i_save = $i }
8882
8883	1166					2117	$tok = $rtokens->[ ++$i ];
8884
8885							# patch to make digraph :: if necessary
8886	1166	100	100			3119	if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
8887	113					261	$tok = '::';
8888	113					172	$i++;
8889							}
8890
8891	1166					3438	$code->($self);
8892
8893							# check for forward progress: a decrease in the index $i
8894							# implies that scanning has finished
8895	1166	100				3979	last if ( $i <= $i_start_loop );
8896
8897							} ## end of main loop
8898
8899							#-------------
8900							# Check result
8901							#-------------
8902
8903							# Be sure a valid state is returned
8904	486	100				1293	if ($id_scan_state) {
8905
8906	20	100				86	if ( !$is_returnable_scan_state{$id_scan_state} ) {
8907
8908	13	100				57	if ( $id_scan_state eq $scan_state_SPLIT ) {
8909	3					5	$split_pretoken_flag = 1;
8910							}
8911
8912	13	50				53	if ( $id_scan_state eq $scan_state_RPAREN ) {
8913	0					0	warning(
8914							"Hit end of line while seeking ) to end prototype\n");
8915							}
8916
8917	13					28	$id_scan_state = EMPTY_STRING;
8918							}
8919
8920							# Patch: the deprecated variable $# does not combine with anything
8921							# on the next line.
8922	20	50				65	if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING }
	0					0
8923							}
8924
8925							# Be sure the token index is valid
8926	486	50				1303	if ( $i < 0 ) { $i = 0 }
	0					0
8927
8928							# Be sure a token type is defined
8929	486	100				1229	if ( !$type ) {
8930
8931	458	100				1049	if ($saw_type) {
		100
8932
8933	452	100	33			3562	if ($saw_alpha) {
		50	66
		100	66
			33
8934
8935							# The type without the -> should be the same as with the -> so
8936							# that if they get separated we get the same bond strengths,
8937							# etc. See b1234
8938	348	50	33			1387	if ( $identifier =~ /^->/
			33
8939							&& $last_nonblank_type eq 'w'
8940							&& substr( $identifier, 2, 1 ) =~ /^\w/ )
8941							{
8942	0					0	$type = 'w';
8943							}
8944	348					732	else { $type = 'i' }
8945							}
8946							elsif ( $identifier eq '->' ) {
8947	0					0	$type = '->';
8948							}
8949							elsif (
8950							( length($identifier) > 1 )
8951
8952							# In something like '@$=' we have an identifier '@$'
8953							# In something like '$${' we have type '$$' (and only
8954							# part of an identifier)
8955							&& !( $identifier =~ /\$$/ && $tok eq '{' )
8956
8957							## && ( $identifier !~ /^(sub \|package )$/ )
8958							&& $identifier ne 'sub '
8959							&& $identifier ne 'package '
8960							)
8961							{
8962	53					1730	$type = 'i';
8963							}
8964	51					119	else { $type = 't' }
8965							}
8966							elsif ($saw_alpha) {
8967
8968							# type 'w' includes anything without leading type info
8969							# ($,%,@,*) including something like abc::def::ghi
8970	5					10	$type = 'w';
8971
8972							# Fix for b1337, if restarting scan after line break between
8973							# '->' or sigil and identifier name, use type 'i'
8974	5	50	33			32	if ( $id_scan_state_begin
8975							&& $identifier =~ /^([\$\%\@\*\&]\|->)/ )
8976							{
8977	5					12	$type = 'i';
8978							}
8979							}
8980							else {
8981	1					13	$type = EMPTY_STRING;
8982							} # this can happen on a restart
8983							}
8984
8985							# See if we formed an identifier...
8986	486	100				2053	if ($identifier) {
8987	444					827	$tok = $identifier;
8988	444	100				1057	if ($message) { $self->write_logfile_entry($message) }
	1					7
8989							}
8990
8991							# did not find an identifier, back up
8992							else {
8993	42					85	$tok = $tok_begin;
8994	42					75	$i = $i_begin;
8995							}
8996
8997							RETURN:
8998
8999	486					1124	DEBUG_SCAN_ID && do {
9000							my ( $a, $b, $c ) = caller;
9001							print STDOUT
9002							"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
9003							print STDOUT
9004							"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
9005							};
9006	486					2326	return ( $i, $tok, $type, $id_scan_state, $identifier,
9007							$split_pretoken_flag );
9008							} ## end sub scan_complex_identifier
9009							} ## end closure for sub scan_complex_identifier
9010
9011							{ ## closure for sub do_scan_sub
9012
9013							my %warn_if_lexical;
9014
9015							BEGIN {
9016
9017							# lexical subs with these names can cause parsing errors in this version
9018	38			38		279	my @q = qw( m q qq qr qw qx s tr y );
9019	38					3277	@{warn_if_lexical}{@q} = (1) x scalar(@q);
9020							} ## end BEGIN
9021
9022							# saved package and subnames in case prototype is on separate line
9023							my ( $package_saved, $subname_saved );
9024
9025							# initialize subname each time a new 'sub' keyword is encountered
9026							sub initialize_subname {
9027	294			294	0	615	$package_saved = EMPTY_STRING;
9028	294					576	$subname_saved = EMPTY_STRING;
9029	294					511	return;
9030							}
9031
9032							use constant {
9033	38					87204	SUB_CALL => 1,
9034							PAREN_CALL => 2,
9035							PROTOTYPE_CALL => 3,
9036	38			38		321	};
	38					116
9037
9038							sub do_scan_sub {
9039
9040							# do_scan_sub parses a sub name and prototype.
9041
9042							# At present there are three basic CALL TYPES which are
9043							# distinguished by the starting value of '$tok':
9044							# 1. $tok='sub', id_scan_state='sub'
9045							# it is called with $i_beg equal to the index of the first nonblank
9046							# token following a 'sub' token.
9047							# 2. $tok='(', id_scan_state='sub',
9048							# it is called with $i_beg equal to the index of a '(' which may
9049							# start a prototype.
9050							# 3. $tok='prototype', id_scan_state='prototype'
9051							# it is called with $i_beg equal to the index of a '(' which is
9052							# preceded by ': prototype' and has $id_scan_state eq 'prototype'
9053
9054							# Examples:
9055
9056							# A single type 1 call will get both the sub and prototype
9057							# sub foo1 ( $$ ) { }
9058							# ^
9059
9060							# The subname will be obtained with a 'sub' call
9061							# The prototype on line 2 will be obtained with a '(' call
9062							# sub foo1
9063							# ^ <---call type 1
9064							# ( $$ ) { }
9065							# ^ <---call type 2
9066
9067							# The subname will be obtained with a 'sub' call
9068							# The prototype will be obtained with a 'prototype' call
9069							# sub foo1 ( $x, $y ) : prototype ( $$ ) { }
9070							# ^ <---type 1 ^ <---type 3
9071
9072							# TODO: add future error checks to be sure we have a valid
9073							# sub name. For example, 'sub &doit' is wrong. Also, be sure
9074							# a name is given if and only if a non-anonymous sub is
9075							# appropriate.
9076							# USES GLOBAL VARS: $current_package, $last_nonblank_token,
9077							# $rsaw_function_definition,
9078							# $statement_type
9079
9080	300			300	0	760	my ( $self, $rinput_hash ) = @_;
9081
9082	300					705	my $input_line = $rinput_hash->{input_line};
9083	300					600	my $i = $rinput_hash->{i};
9084	300					824	my $i_beg = $rinput_hash->{i_beg};
9085	300					652	my $tok = $rinput_hash->{tok};
9086	300					571	my $type = $rinput_hash->{type};
9087	300					600	my $rtokens = $rinput_hash->{rtokens};
9088	300					564	my $rtoken_map = $rinput_hash->{rtoken_map};
9089	300					568	my $id_scan_state = $rinput_hash->{id_scan_state};
9090	300					547	my $max_token_index = $rinput_hash->{max_token_index};
9091
9092	300					540	my $i_entry = $i;
9093
9094							# Determine the CALL TYPE
9095							# 1=sub
9096							# 2=(
9097							# 3=prototype
9098	300	100				1019	my $call_type =
		100
9099							$tok eq 'prototype' ? PROTOTYPE_CALL
9100							: $tok eq '(' ? PAREN_CALL
9101							: SUB_CALL;
9102
9103	300					509	$id_scan_state = EMPTY_STRING; # normally we get everything in one call
9104	300					559	my $subname = $subname_saved;
9105	300					558	my $package = $package_saved;
9106	300					623	my $proto = undef;
9107	300					547	my $attrs = undef;
9108	300					493	my $match;
9109
9110	300					568	my $pos_beg = $rtoken_map->[$i_beg];
9111	300					1034	pos($input_line) = $pos_beg;
9112
9113							# Look for the sub NAME if this is a SUB call
9114	300	100	100			2774	if (
9115							$call_type == SUB_CALL
9116							&& $input_line =~ m/\G\s*
9117							((?:\w(?:'\|::))) # package - something that ends in :: or '
9118							(\w+) # NAME - required
9119							/gcx
9120							)
9121							{
9122	121					313	$match = 1;
9123	121					337	$subname = $2;
9124
9125	121		33			452	my $is_lexical_sub =
9126							$last_nonblank_type eq 'k' && $last_nonblank_token eq 'my';
9127	121	0	33			383	if ( $is_lexical_sub && $1 ) {
9128	0					0	$self->warning("'my' sub $subname cannot be in package '$1'\n");
9129	0					0	$is_lexical_sub = 0;
9130							}
9131
9132	121	50				360	if ($is_lexical_sub) {
9133
9134							# lexical subs use the block sequence number as a package name
9135	0					0	my $seqno =
9136							$rcurrent_sequence_number->[BRACE]
9137							[ $rcurrent_depth->[BRACE] ];
9138	0	0				0	$seqno = 1 unless ( defined($seqno) );
9139	0					0	$package = $seqno;
9140	0	0				0	if ( $warn_if_lexical{$subname} ) {
9141	0					0	$self->warning(
9142							"'my' sub '$subname' matches a builtin name and may not be handled correctly in this perltidy version.\n"
9143							);
9144
9145							# This may end badly, it is safest to block formatting
9146							# For an example, see perl527/lexsub.t (issue c203)
9147	0					0	$self->[_in_trouble_] = 1;
9148							}
9149							}
9150							else {
9151	121	100	66			813	$package = ( defined($1) && $1 ) ? $1 : $current_package;
9152	121					438	$package =~ s/\'/::/g;
9153	121	50				457	if ( $package =~ /^\:/ ) { $package = 'main' . $package }
	0					0
9154	121					308	$package =~ s/::$//;
9155							}
9156
9157	121					261	my $pos = pos($input_line);
9158	121					260	my $numc = $pos - $pos_beg;
9159	121					376	$tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
9160	121					253	$type = 'i';
9161
9162							# remember the sub name in case another call is needed to
9163							# get the prototype
9164	121					267	$package_saved = $package;
9165	121					286	$subname_saved = $subname;
9166							}
9167
9168							# Now look for PROTO ATTRS for all call types
9169							# Look for prototype/attributes which are usually on the same
9170							# line as the sub name but which might be on a separate line.
9171							# For example, we might have an anonymous sub with attributes,
9172							# or a prototype on a separate line from its sub name
9173
9174							# NOTE: We only want to parse PROTOTYPES here. If we see anything that
9175							# does not look like a prototype, we assume it is a SIGNATURE and we
9176							# will stop and let the the standard tokenizer handle it. In
9177							# particular, we stop if we see any nested parens, braces, or commas.
9178							# Also note, a valid prototype cannot contain any alphabetic character
9179							# -- see https://perldoc.perl.org/perlsub
9180							# But it appears that an underscore is valid in a prototype, so the
9181							# regex below uses [A-Za-z] rather than \w
9182							# This is the old regex which has been replaced:
9183							# $input_line =~ m/\G(\s$[^$$\}\{\,#]$)? # PROTO
9184	300					1085	my $saw_opening_paren = $input_line =~ /\G\s*\(/;
9185	300	100	100			2896	if (
			66
9186							$input_line =~ m/\G(\s$[^$$\}\{\,#A-Za-z]$)? # PROTO
9187							(\s*:)? # ATTRS leading ':'
9188							/gcx
9189							&& ( $1 \|\| $2 )
9190							)
9191							{
9192	45					119	$proto = $1;
9193	45					100	$attrs = $2;
9194
9195							# Append the prototype to the starting token if it is 'sub' or
9196							# 'prototype'. This is not necessary but for compatibility with
9197							# previous versions when the -csc flag is used:
9198	45	100	100			298	if ( $proto && ( $match \|\| $call_type == PROTOTYPE_CALL ) ) {
		100	100
9199	24					64	$tok .= $proto;
9200							}
9201
9202							# If we just entered the sub at an opening paren on this call, not
9203							# a following :prototype, label it with the previous token. This is
9204							# necessary to propagate the sub name to its opening block.
9205							elsif ( $call_type == PAREN_CALL ) {
9206	2					8	$tok = $last_nonblank_token;
9207							}
9208
9209	45		100			161	$match \|\|= 1;
9210
9211							# Patch part #1 to fixes cases b994 and b1053:
9212							# Mark an anonymous sub keyword without prototype as type 'k', i.e.
9213							# 'sub : lvalue { ...'
9214	45					95	$type = 'i';
9215	45	100	100			202	if ( $tok eq 'sub' && !$proto ) { $type = 'k' }
	2					5
9216							}
9217
9218	300	100				902	if ($match) {
9219
9220							# ATTRS: if there are attributes, back up and let the ':' be
9221							# found later by the scanner.
9222	136					309	my $pos = pos($input_line);
9223	136	100				406	if ($attrs) {
9224	15					33	$pos -= length($attrs);
9225							}
9226
9227	136					312	my $next_nonblank_token = $tok;
9228
9229							# catch case of line with leading ATTR ':' after anonymous sub
9230	136	100	100			557	if ( $pos == $pos_beg && $tok eq ':' ) {
9231	1					2	$type = 'A';
9232	1					5	$self->[_in_attribute_list_] = 1;
9233							}
9234
9235							# Otherwise, if we found a match we must convert back from
9236							# string position to the pre_token index for continued parsing.
9237							else {
9238
9239							# I don't think an error flag can occur here ..but ?
9240	135					247	my $error;
9241	135					544	( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
9242							$max_token_index );
9243	135	50				448	if ($error) { $self->warning("Possibly invalid sub\n") }
	0					0
9244
9245							# Patch part #2 to fixes cases b994 and b1053:
9246							# Do not let spaces be part of the token of an anonymous sub
9247							# keyword which we marked as type 'k' above...i.e. for
9248							# something like:
9249							# 'sub : lvalue { ...'
9250							# Back up and let it be parsed as a blank
9251	135	50	66			621	if ( $type eq 'k'
			66
			33
9252							&& $attrs
9253							&& $i > $i_entry
9254							&& substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ )
9255							{
9256	2					5	$i--;
9257							}
9258
9259							# check for multiple definitions of a sub
9260	135					388	( $next_nonblank_token, my $i_next ) =
9261							find_next_nonblank_token_on_this_line( $i, $rtokens,
9262							$max_token_index );
9263							}
9264
9265	136	100				759	if ( $next_nonblank_token =~ /^(\s*\|#)$/ )
9266							{ # skip blank or side comment
9267	7					49	my ( $rpre_tokens, $rpre_types ) =
9268							$self->peek_ahead_for_n_nonblank_pre_tokens(1);
9269	7	50	33			39	if ( defined($rpre_tokens) && @{$rpre_tokens} ) {
	7					31
9270	7					26	$next_nonblank_token = $rpre_tokens->[0];
9271							}
9272							else {
9273	0					0	$next_nonblank_token = '}';
9274							}
9275							}
9276
9277							# See what's next...
9278	136	100				570	if ( $next_nonblank_token eq '{' ) {
		100
		50
		100
		50
		0
9279	104	100				364	if ($subname) {
9280
9281							# Check for multiple definitions of a sub, but
9282							# it is ok to have multiple sub BEGIN, etc,
9283							# so we do not complain if name is all caps
9284	94	50	33			616	if ( $rsaw_function_definition->{$subname}{$package}
9285							&& $subname !~ /^[A-Z]+$/ )
9286							{
9287							my $lno =
9288	0					0	$rsaw_function_definition->{$subname}{$package};
9289	0	0				0	if ( $package =~ /^\d/ ) {
9290	0					0	$self->warning(
9291							"already saw definition of lexical 'sub $subname' at line $lno\n"
9292							);
9293
9294							}
9295							else {
9296	0					0	$self->warning(
9297							"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
9298							) unless (DEVEL_MODE);
9299							}
9300							}
9301	94					368	$rsaw_function_definition->{$subname}{$package} =
9302							$self->[_last_line_number_];
9303							}
9304							}
9305							elsif ( $next_nonblank_token eq ';' ) {
9306							}
9307							elsif ( $next_nonblank_token eq '}' ) {
9308							}
9309
9310							# ATTRS - if an attribute list follows, remember the name
9311							# of the sub so the next opening brace can be labeled.
9312							# Setting 'statement_type' causes any ':'s to introduce
9313							# attributes.
9314							elsif ( $next_nonblank_token eq ':' ) {
9315	16	100				62	if ( $call_type == SUB_CALL ) {
9316	14	100				53	$statement_type =
9317							substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
9318							}
9319							}
9320
9321							# if we stopped before an open paren ...
9322							elsif ( $next_nonblank_token eq '(' ) {
9323
9324							# If we DID NOT see this paren above then it must be on the
9325							# next line so we will set a flag to come back here and see if
9326							# it is a PROTOTYPE
9327
9328							# Otherwise, we assume it is a SIGNATURE rather than a
9329							# PROTOTYPE and let the normal tokenizer handle it as a list
9330	15	100				49	if ( !$saw_opening_paren ) {
9331	4					11	$id_scan_state = 'sub'; # we must come back to get proto
9332							}
9333	15	50				73	if ( $call_type == SUB_CALL ) {
9334	15	50				71	$statement_type =
9335							substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
9336							}
9337							}
9338							elsif ($next_nonblank_token) { # EOF technically ok
9339
9340	0	0	0			0	if ( $rinput_hash->{tok} eq 'method' && $call_type == SUB_CALL )
9341							{
9342							# For a method call, silently ignore this error (rt145706)
9343							# to avoid needless warnings. Example which can produce it:
9344							# test(method Pack (), "method");
9345
9346							# TODO: scan for use feature 'class' and:
9347							# - if we saw 'use feature 'class' then issue the warning.
9348							# - if we did not see use feature 'class' then issue the
9349							# warning and suggest turning off --use-feature=class
9350							}
9351							else {
9352	0	0				0	$subname = EMPTY_STRING unless defined($subname);
9353	0					0	$self->warning(
9354							"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
9355							);
9356							}
9357							}
9358	136					678	check_prototype( $proto, $package, $subname );
9359							}
9360
9361							# no match to either sub name or prototype, but line not blank
9362							else {
9363
9364							}
9365	300					1585	return ( $i, $tok, $type, $id_scan_state );
9366							} ## end sub do_scan_sub
9367							}
9368
9369							#########################################################################
9370							# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
9371							#########################################################################
9372
9373							sub find_next_nonblank_token {
9374	6154			6154	0	12052	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9375
9376							# Returns the next nonblank token after the token at index $i
9377							# To skip past a side comment, and any subsequent block comments
9378							# and blank lines, call with i=$max_token_index
9379
9380	6154	100				13259	if ( $i >= $max_token_index ) {
9381	117	100				643	if ( !peeked_ahead() ) {
9382	115					382	peeked_ahead(1);
9383	115					600	$self->peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
9384							}
9385							}
9386
9387	6154					10894	my $next_nonblank_token = $rtokens->[ ++$i ];
9388	6154	50	33			22890	return ( SPACE, $i )
9389							unless ( defined($next_nonblank_token) && length($next_nonblank_token) );
9390
9391							# Quick test for nonblank ascii char. Note that we just have to
9392							# examine the first character here.
9393	6154					12510	my $ord = ord( substr( $next_nonblank_token, 0, 1 ) );
9394	6154	100	66			23682	if ( $ord >= ORD_PRINTABLE_MIN
		50	33
		0
9395							&& $ord <= ORD_PRINTABLE_MAX )
9396							{
9397	2353					7058	return ( $next_nonblank_token, $i );
9398							}
9399
9400							# Quick test to skip over an ascii space or tab
9401							elsif ( $ord == ORD_SPACE \|\| $ord == ORD_TAB ) {
9402	3801					6783	$next_nonblank_token = $rtokens->[ ++$i ];
9403	3801	50				8444	return ( SPACE, $i ) unless defined($next_nonblank_token);
9404							}
9405
9406							# Slow test to skip over something else identified as whitespace
9407							elsif ( $next_nonblank_token =~ /^\s*$/ ) {
9408	0					0	$next_nonblank_token = $rtokens->[ ++$i ];
9409	0	0				0	return ( SPACE, $i ) unless defined($next_nonblank_token);
9410							}
9411
9412							# We should be at a nonblank now
9413	3801					10821	return ( $next_nonblank_token, $i );
9414							} ## end sub find_next_nonblank_token
9415
9416							sub find_next_noncomment_type {
9417	98			98	0	358	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9418
9419							# Given the current character position, look ahead past any comments
9420							# and blank lines and return the next token, including digraphs and
9421							# trigraphs.
9422
9423	98					515	my ( $next_nonblank_token, $i_next ) =
9424							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
9425
9426							# skip past any side comment
9427	98	50				551	if ( $next_nonblank_token eq '#' ) {
9428	0					0	( $next_nonblank_token, $i_next ) =
9429							$self->find_next_nonblank_token( $i_next, $rtokens,
9430							$max_token_index );
9431							}
9432
9433							# check for a digraph
9434	98	50	33			927	if ( $next_nonblank_token
			33
9435							&& $next_nonblank_token ne SPACE
9436							&& defined( $rtokens->[ $i_next + 1 ] ) )
9437							{
9438	98					305	my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
9439	98	100				408	if ( $is_digraph{$test2} ) {
9440	15					48	$next_nonblank_token = $test2;
9441	15					67	$i_next = $i_next + 1;
9442
9443							# check for a trigraph
9444	15	50				67	if ( defined( $rtokens->[ $i_next + 1 ] ) ) {
9445	15					54	my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
9446	15	50				83	if ( $is_trigraph{$test3} ) {
9447	0					0	$next_nonblank_token = $test3;
9448	0					0	$i_next = $i_next + 1;
9449							}
9450							}
9451							}
9452							}
9453
9454	98					330	return ( $next_nonblank_token, $i_next );
9455							} ## end sub find_next_noncomment_type
9456
9457							sub is_possible_numerator {
9458
9459							# Look at the next non-comment character and decide if it could be a
9460							# numerator. Return
9461							# 1 - yes
9462							# 0 - can't tell
9463							# -1 - no
9464
9465	0			0	0	0	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9466	0					0	my $is_possible_numerator = 0;
9467
9468	0					0	my $next_token = $rtokens->[ $i + 1 ];
9469	0	0				0	if ( $next_token eq '=' ) { $i++; } # handle /=
	0					0
9470	0					0	my ( $next_nonblank_token, $i_next ) =
9471							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
9472
9473	0	0				0	if ( $next_nonblank_token eq '#' ) {
9474	0					0	( $next_nonblank_token, $i_next ) =
9475							$self->find_next_nonblank_token( $max_token_index, $rtokens,
9476							$max_token_index );
9477							}
9478
9479	0	0				0	if ( $next_nonblank_token =~ /(\(\|\$\|\w\|\.\|\@)/ ) {
		0
9480	0					0	$is_possible_numerator = 1;
9481							}
9482							elsif ( $next_nonblank_token =~ /^\s*$/ ) {
9483	0					0	$is_possible_numerator = 0;
9484							}
9485							else {
9486	0					0	$is_possible_numerator = -1;
9487							}
9488
9489	0					0	return $is_possible_numerator;
9490							} ## end sub is_possible_numerator
9491
9492							{ ## closure for sub pattern_expected
9493							my %pattern_test;
9494
9495							BEGIN {
9496
9497							# List of tokens which may follow a pattern. Note that we will not
9498							# have formed digraphs at this point, so we will see '&' instead of
9499							# '&&' and '\|' instead of '\|\|'
9500
9501							# /(\)\|\}\|\;\|\&\&\|\\|\\|\|and\|or\|while\|if\|unless)/
9502	38			38		303	my @q = qw( & && \| \|\| ? : + - * and or while if unless);
9503	38					155	push @q, ')', '}', ']', '>', ',', ';';
9504	38					175624	@{pattern_test}{@q} = (1) x scalar(@q);
9505							} ## end BEGIN
9506
9507							sub pattern_expected {
9508
9509							# This a filter for a possible pattern.
9510							# It looks at the token after a possible pattern and tries to
9511							# determine if that token could end a pattern.
9512							# returns -
9513							# 1 - yes
9514							# 0 - can't tell
9515							# -1 - no
9516	0			0	0	0	my ( $self, $i, $rtokens, $max_token_index ) = @_;
9517	0					0	my $is_pattern = 0;
9518
9519	0					0	my $next_token = $rtokens->[ $i + 1 ];
9520	0	0				0	if ( $next_token =~ /^[msixpodualgc]/ ) {
9521	0					0	$i++;
9522							} # skip possible modifier
9523	0					0	my ( $next_nonblank_token, $i_next ) =
9524							$self->find_next_nonblank_token( $i, $rtokens, $max_token_index );
9525
9526	0	0				0	if ( $pattern_test{$next_nonblank_token} ) {
9527	0					0	$is_pattern = 1;
9528							}
9529							else {
9530
9531							# Added '#' to fix issue c044
9532	0	0	0			0	if ( $next_nonblank_token =~ /^\s*$/
9533							\|\| $next_nonblank_token eq '#' )
9534							{
9535	0					0	$is_pattern = 0;
9536							}
9537							else {
9538	0					0	$is_pattern = -1;
9539							}
9540							}
9541	0					0	return $is_pattern;
9542							} ## end sub pattern_expected
9543							}
9544
9545							sub find_next_nonblank_token_on_this_line {
9546	453			453	0	1049	my ( $i, $rtokens, $max_token_index ) = @_;
9547	453					754	my $next_nonblank_token;
9548
9549	453	100				1111	if ( $i < $max_token_index ) {
9550	445					953	$next_nonblank_token = $rtokens->[ ++$i ];
9551
9552	445	100				1913	if ( $next_nonblank_token =~ /^\s*$/ ) {
9553
9554	120	100				480	if ( $i < $max_token_index ) {
9555	118					369	$next_nonblank_token = $rtokens->[ ++$i ];
9556							}
9557							}
9558							}
9559							else {
9560	8					26	$next_nonblank_token = EMPTY_STRING;
9561							}
9562	453					1435	return ( $next_nonblank_token, $i );
9563							} ## end sub find_next_nonblank_token_on_this_line
9564
9565							sub find_angle_operator_termination {
9566
9567							# We are looking at a '<' and want to know if it is an angle operator.
9568							# We are to return:
9569							# $i = pretoken index of ending '>' if found, current $i otherwise
9570							# $type = 'Q' if found, '>' otherwise
9571	8			8	0	42	my ( $self, $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index )
9572							= @_;
9573	8					17	my $i = $i_beg;
9574	8					21	my $type = '<';
9575	8					33	pos($input_line) = 1 + $rtoken_map->[$i];
9576
9577	8					19	my $filter;
9578
9579							# we just have to find the next '>' if a term is expected
9580	8	100				48	if ( $expecting == TERM ) { $filter = '[\>]' }
	6	50				19
9581
9582							# we have to guess if we don't know what is expected
9583	2					4	elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\\|\<]' }
9584
9585							# shouldn't happen - we shouldn't be here if operator is expected
9586							else {
9587	0					0	if (DEVEL_MODE) {
9588							$self->Fault(<<EOM);
9589							Bad call to find_angle_operator_termination
9590							EOM
9591							}
9592	0					0	return ( $i, $type );
9593							}
9594
9595							# To illustrate what we might be looking at, in case we are
9596							# guessing, here are some examples of valid angle operators
9597							# (or file globs):
9598							# <tmp_imp/*>
9599							# <FH>
9600							# <$fh>
9601							# <.c .h>
9602							# <_>
9603							# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
9604							# <${PREFIX}img.$IMAGE_TYPE>
9605							# <img*.$IMAGE_TYPE>
9606							# <Timg*.$IMAGE_TYPE>
9607							# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
9608							#
9609							# Here are some examples of lines which do not have angle operators:
9610							# return unless $self->[2]++ < $#{$self->[1]};
9611							# < 2 \|\| @$t >
9612							#
9613							# the following line from dlister.pl caused trouble:
9614							# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
9615							#
9616							# If the '<' starts an angle operator, it must end on this line and
9617							# it must not have certain characters like ';' and '=' in it. I use
9618							# this to limit the testing. This filter should be improved if
9619							# possible.
9620
9621	8	50				171	if ( $input_line =~ /($filter)/g ) {
9622
9623	8	50				48	if ( $1 eq '>' ) {
9624
9625							# We MAY have found an angle operator termination if we get
9626							# here, but we need to do more to be sure we haven't been
9627							# fooled.
9628	8					34	my $pos = pos($input_line);
9629
9630	8					20	my $pos_beg = $rtoken_map->[$i];
9631	8					28	my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
9632
9633							# Test for '<' after possible filehandle, issue c103
9634							# print $fh <>; # syntax error
9635							# print $fh <DATA>; # ok
9636							# print $fh < DATA>; # syntax error at '>'
9637							# print STDERR < DATA>; # ok, prints word 'DATA'
9638							# print BLABLA <DATA>; # ok; does nothing unless BLABLA is defined
9639	8	100				32	if ( $last_nonblank_type eq 'Z' ) {
9640
9641							# $str includes brackets; something like '<DATA>'
9642	1	0	33			22	if ( substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/
9643							&& substr( $str, 1, 1 ) !~ /[A-Za-z_]/ )
9644							{
9645	0					0	return ( $i, $type );
9646							}
9647							}
9648
9649							# Reject if the closing '>' follows a '-' as in:
9650							# if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
9651	8	100				36	if ( $expecting eq UNKNOWN ) {
9652	2					4	my $check = substr( $input_line, $pos - 2, 1 );
9653	2	100				12	if ( $check eq '-' ) {
9654	1					5	return ( $i, $type );
9655							}
9656							}
9657
9658							######################################debug#####
9659							#$self->write_diagnostics( "ANGLE? :$str\n");
9660							#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
9661							######################################debug#####
9662	7					16	$type = 'Q';
9663	7					13	my $error;
9664	7					33	( $i, $error ) =
9665							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
9666
9667							# It may be possible that a quote ends midway in a pretoken.
9668							# If this happens, it may be necessary to split the pretoken.
9669	7	50				34	if ($error) {
9670	0					0	if (DEVEL_MODE) {
9671							$self->Fault(<<EOM);
9672							unexpected error condition returned by inverse_pretoken_map
9673							EOM
9674							}
9675							$self->warning(
9676	0					0	"Possible tokinization error..please check this line\n");
9677							}
9678
9679							# Check for accidental formatting of a markup language doc...
9680							# Formatting will be skipped if we set _html_tag_count_ and
9681							# also set a warning of any kind.
9682	7					26	my $is_html_tag;
9683	7		33			33	my $is_first_string =
9684							$i_beg == 0 && $self->[_last_line_number_] == 1;
9685
9686							# html comment '<!...' of any type
9687	7	50	33			85	if ( $str =~ /^<\s*!/ ) {
		50
		50
9688	0					0	$is_html_tag = 1;
9689	0	0				0	if ($is_first_string) {
9690	0					0	$self->warning(
9691							"looks like a markup language, continuing error checks\n"
9692							);
9693							}
9694							}
9695
9696							# html end tag, something like </h1>
9697							elsif ( $str =~ /^<\s\/\w+\s>$/ ) {
9698	0					0	$is_html_tag = 1;
9699							}
9700
9701							# xml prolog?
9702							elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) {
9703	0					0	$is_html_tag = 1;
9704	0					0	$self->warning(
9705							"looks like a markup language, continuing error checks\n");
9706							}
9707
9708	7	50				32	if ($is_html_tag) {
9709	0					0	$self->[_html_tag_count_]++;
9710							}
9711
9712							# count blanks on inside of brackets
9713	7					35	my $blank_count = 0;
9714	7	100				47	$blank_count++ if ( $str =~ /<\s+/ );
9715	7	100				37	$blank_count++ if ( $str =~ /\s+>/ );
9716
9717							# Now let's see where we stand....
9718							# OK if math op not possible
9719	7	100				38	if ( $expecting == TERM ) {
		50
		50
		0
9720							}
9721
9722							elsif ($is_html_tag) {
9723							}
9724
9725							# OK if there are no more than 2 non-blank pre-tokens inside
9726							# (not possible to write 2 token math between < and >)
9727							# This catches most common cases
9728							elsif ( $i <= $i_beg + 3 + $blank_count ) {
9729
9730							# No longer any need to document this common case
9731							## $self->write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
9732							}
9733
9734							# OK if there is some kind of identifier inside
9735							# print $fh <tvg::INPUT>;
9736							elsif ( $str =~ /^<\s\$?(\w\|::\|\s)+\s>$/ ) {
9737	0					0	$self->write_diagnostics("ANGLE (contains identifier): $str\n");
9738							}
9739
9740							# Not sure..
9741							else {
9742
9743							# Let's try a Brace Test: any braces inside must balance
9744	0					0	my $br = 0;
9745	0					0	while ( $str =~ /\{/g ) { $br++ }
	0					0
9746	0					0	while ( $str =~ /\}/g ) { $br-- }
	0					0
9747	0					0	my $sb = 0;
9748	0					0	while ( $str =~ /\[/g ) { $sb++ }
	0					0
9749	0					0	while ( $str =~ /\]/g ) { $sb-- }
	0					0
9750	0					0	my $pr = 0;
9751	0					0	while ( $str =~ /\(/g ) { $pr++ }
	0					0
9752	0					0	while ( $str =~ /\)/g ) { $pr-- }
	0					0
9753
9754							# if braces do not balance - not angle operator
9755	0	0	0			0	if ( $br \|\| $sb \|\| $pr ) {
			0
9756	0					0	$i = $i_beg;
9757	0					0	$type = '<';
9758	0					0	$self->write_diagnostics(
9759							"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
9760							}
9761
9762							# we should keep doing more checks here...to be continued
9763							# Tentatively accepting this as a valid angle operator.
9764							# There are lots more things that can be checked.
9765							else {
9766	0					0	$self->write_diagnostics(
9767							"ANGLE-Guessing yes: $str expecting=$expecting\n");
9768	0					0	$self->write_logfile_entry(
9769							"Guessing angle operator here: $str\n");
9770							}
9771							}
9772							}
9773
9774							# didn't find ending >
9775							else {
9776	0	0				0	if ( $expecting == TERM ) {
9777	0					0	$self->warning("No ending > for angle operator\n");
9778							}
9779							}
9780							}
9781	7					48	return ( $i, $type );
9782							} ## end sub find_angle_operator_termination
9783
9784							sub scan_number_do {
9785
9786							# scan a number in any of the formats that Perl accepts
9787							# Underbars (_) are allowed in decimal numbers.
9788							# input parameters -
9789							# $input_line - the string to scan
9790							# $i - pre_token index to start scanning
9791							# $rtoken_map - reference to the pre_token map giving starting
9792							# character position in $input_line of token $i
9793							# output parameters -
9794							# $i - last pre_token index of the number just scanned
9795							# number - the number (characters); or undef if not a number
9796
9797	629			629	0	1620	my ( $self, $input_line, $i, $rtoken_map, $input_type, $max_token_index ) =
9798							@_;
9799	629					1133	my $pos_beg = $rtoken_map->[$i];
9800	629					919	my $pos;
9801	629					987	my $i_begin = $i;
9802	629					1001	my $number = undef;
9803	629					1338	my $type = $input_type;
9804
9805	629					1382	my $first_char = substr( $input_line, $pos_beg, 1 );
9806
9807							# Look for bad starting characters; Shouldn't happen..
9808	629	50				2814	if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
9809	0					0	if (DEVEL_MODE) {
9810							$self->Fault(<<EOM);
9811							Program bug - scan_number given bad first character = '$first_char'
9812							EOM
9813							}
9814	0					0	return ( $i, $type, $number );
9815							}
9816
9817							# handle v-string without leading 'v' character ('Two Dot' rule)
9818							# (vstring.t)
9819							# Here is the format prior to including underscores:
9820							## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
9821	629					1866	pos($input_line) = $pos_beg;
9822	629	50				3017	if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) {
9823	0					0	$pos = pos($input_line);
9824	0					0	my $numc = $pos - $pos_beg;
9825	0					0	$number = substr( $input_line, $pos_beg, $numc );
9826	0					0	$type = 'v';
9827	0					0	$self->report_v_string($number);
9828							}
9829
9830							# handle octal, hex, binary
9831	629	50				1631	if ( !defined($number) ) {
9832	629					1380	pos($input_line) = $pos_beg;
9833
9834							# Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0'
9835							# For reference, the format prior to hex floating point is:
9836							# /\G[+-]?0(([xX][0-9a-fA-F_]+)\|([0-7_]+)\|([bB][01_]+))/g )
9837							# (hex) (octal) (binary)
9838	629	100				2467	if (
9839							$input_line =~
9840
9841							/\G[+-]?0( # leading [signed] 0
9842
9843							# a hex float, i.e. '0x0.b17217f7d1cf78p0'
9844							([xX][0-9a-fA-F_]* # X and optional leading digits
9845							(\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction
9846							[Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit
9847							[0-9a-fA-F_]*) # optional Additional exponent digits
9848
9849							# or hex integer
9850							\|([xX][0-9a-fA-F_]+)
9851
9852							# or octal fraction
9853							\|([oO]?[0-7_]+ # string of octal digits
9854							(\.([0-7][0-7_]*)?)? # optional decimal and fraction
9855							[Pp][+-]?[0-7] # REQUIRED exponent, no underscore
9856							[0-7_]*) # Additional exponent digits with underscores
9857
9858							# or octal integer
9859							\|([oO]?[0-7_]+) # string of octal digits
9860
9861							# or a binary float
9862							\|([bB][01_]* # 'b' with string of binary digits
9863							(\.([01][01_]*)?)? # optional decimal and fraction
9864							[Pp][+-]?[01] # Required exponent indicator, no underscore
9865							[01_]*) # additional exponent bits
9866
9867							# or binary integer
9868							\|([bB][01_]+) # 'b' with string of binary digits
9869
9870							)/gx
9871							)
9872							{
9873	72					149	$pos = pos($input_line);
9874	72					129	my $numc = $pos - $pos_beg;
9875	72					157	$number = substr( $input_line, $pos_beg, $numc );
9876	72					132	$type = 'n';
9877							}
9878							}
9879
9880							# handle decimal
9881	629	100				1481	if ( !defined($number) ) {
9882	557					1137	pos($input_line) = $pos_beg;
9883
9884	557	50				2728	if ( $input_line =~ /\G([+-]?[\d_](\.[\d_])?([Ee][+-]?(\d+))?)/g ) {
9885	557					1067	$pos = pos($input_line);
9886
9887							# watch out for things like 0..40 which would give 0. by this;
9888	557	100	100			2067	if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
9889							&& ( substr( $input_line, $pos, 1 ) eq '.' ) )
9890							{
9891	37					58	$pos--;
9892							}
9893	557					929	my $numc = $pos - $pos_beg;
9894	557					1089	$number = substr( $input_line, $pos_beg, $numc );
9895	557					1052	$type = 'n';
9896							}
9897							}
9898
9899							# filter out non-numbers like e + - . e2 .e3 +e6
9900							# the rule: at least one digit, and any 'e' must be preceded by a digit
9901	629	100	66			3205	if (
			66
9902							$number !~ /\d/ # no digits
9903							\|\| ( $number =~ /^(.*)[eE]/
9904							&& $1 !~ /\d/ ) # or no digits before the 'e'
9905							)
9906							{
9907	303					479	$number = undef;
9908	303					530	$type = $input_type;
9909	303					1195	return ( $i, $type, $number );
9910							}
9911
9912							# Found a number; now we must convert back from character position
9913							# to pre_token index. An error here implies user syntax error.
9914							# An example would be an invalid octal number like '009'.
9915	326					581	my $error;
9916	326					905	( $i, $error ) =
9917							inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
9918	326	50				918	if ($error) { $self->warning("Possibly invalid number\n") }
	0					0
9919
9920	326					1239	return ( $i, $type, $number );
9921							} ## end sub scan_number_do
9922
9923							sub inverse_pretoken_map {
9924
9925							# Starting with the current pre_token index $i, scan forward until
9926							# finding the index of the next pre_token whose position is $pos.
9927	2166			2166	0	5131	my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
9928	2166					3656	my $error = 0;
9929
9930	2166					5526	while ( ++$i <= $max_token_index ) {
9931
9932	4032	100				9339	if ( $pos <= $rtoken_map->[$i] ) {
9933
9934							# Let the calling routine handle errors in which we do not
9935							# land on a pre-token boundary. It can happen by running
9936							# perltidy on some non-perl scripts, for example.
9937	2131	50				5285	if ( $pos < $rtoken_map->[$i] ) { $error = 1 }
	0					0
9938	2131					3442	$i--;
9939	2131					3690	last;
9940							}
9941							}
9942	2166					5461	return ( $i, $error );
9943							} ## end sub inverse_pretoken_map
9944
9945							sub find_here_doc {
9946
9947							# find the target of a here document, if any
9948							# input parameters:
9949							# $i - token index of the second < of <<
9950							# ($i must be less than the last token index if this is called)
9951							# output parameters:
9952							# $found_target = 0 didn't find target; =1 found target
9953							# HERE_TARGET - the target string (may be empty string)
9954							# $i - unchanged if not here doc,
9955							# or index of the last token of the here target
9956							# $saw_error - flag noting unbalanced quote on here target
9957	9			9	0	40	my ( $self, $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
9958
9959	9					23	my $ibeg = $i;
9960	9					25	my $found_target = 0;
9961	9					23	my $here_doc_target = EMPTY_STRING;
9962	9					30	my $here_quote_character = EMPTY_STRING;
9963	9					26	my $saw_error = 0;
9964	9					30	my ( $next_nonblank_token, $i_next_nonblank, $next_token );
9965	9					55	$next_token = $rtokens->[ $i + 1 ];
9966
9967							# perl allows a backslash before the target string (heredoc.t)
9968	9					26	my $backslash = 0;
9969	9	50				51	if ( $next_token eq '\\' ) {
9970	0					0	$backslash = 1;
9971	0					0	$next_token = $rtokens->[ $i + 2 ];
9972							}
9973
9974	9					55	( $next_nonblank_token, $i_next_nonblank ) =
9975							find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
9976
9977	9	100	33			73	if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
		50
		50
9978
9979	6					21	my $in_quote = 1;
9980	6					15	my $quote_depth = 0;
9981	6					19	my $quote_pos = 0;
9982	6					15	my $quoted_string;
9983
9984							(
9985	6					41	$i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
9986							$quoted_string
9987							)
9988							= $self->follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
9989							$here_quote_character, $quote_pos, $quote_depth, $max_token_index );
9990
9991	6	50				44	if ($in_quote) { # didn't find end of quote, so no target found
9992	0					0	$i = $ibeg;
9993	0	0				0	if ( $expecting == TERM ) {
9994	0					0	$self->warning(
9995							"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
9996							);
9997	0					0	$saw_error = 1;
9998							}
9999							}
10000							else { # found ending quote
10001	6					19	$found_target = 1;
10002
10003	6					14	my $tokj;
10004	6					958	foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) {
10005	6					24	$tokj = $rtokens->[$j];
10006
10007							# we have to remove any backslash before the quote character
10008							# so that the here-doc-target exactly matches this string
10009							next
10010	6	0	33			39	if ( $tokj eq "\\"
			33
10011							&& $j < $i - 1
10012							&& $rtokens->[ $j + 1 ] eq $here_quote_character );
10013	6					27	$here_doc_target .= $tokj;
10014							}
10015							}
10016							}
10017
10018							elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
10019	0					0	$found_target = 1;
10020	0					0	$self->write_logfile_entry(
10021							"found blank here-target after <<; suggest using \"\"\n");
10022	0					0	$i = $ibeg;
10023							}
10024							elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
10025
10026	3					12	my $here_doc_expected;
10027	3	50				12	if ( $expecting == UNKNOWN ) {
10028	0					0	$here_doc_expected = $self->guess_if_here_doc($next_token);
10029							}
10030							else {
10031	3					11	$here_doc_expected = 1;
10032							}
10033
10034	3	50				11	if ($here_doc_expected) {
10035	3					8	$found_target = 1;
10036	3					7	$here_doc_target = $next_token;
10037	3					9	$i = $ibeg + 1;
10038							}
10039
10040							}
10041							else {
10042
10043	0	0				0	if ( $expecting == TERM ) {
10044	0					0	$found_target = 1;
10045	0					0	$self->write_logfile_entry("Note: bare here-doc operator <<\n");
10046							}
10047							else {
10048	0					0	$i = $ibeg;
10049							}
10050							}
10051
10052							# patch to neglect any prepended backslash
10053	9	50	33			80	if ( $found_target && $backslash ) { $i++ }
	0					0
10054
10055	9					54	return ( $found_target, $here_doc_target, $here_quote_character, $i,
10056							$saw_error );
10057							} ## end sub find_here_doc
10058
10059							sub do_quote {
10060
10061							# follow (or continue following) quoted string(s)
10062							# $in_quote return code:
10063							# 0 - ok, found end
10064							# 1 - still must find end of quote whose target is $quote_character
10065							# 2 - still looking for end of first of two quotes
10066							#
10067							# Returns updated strings:
10068							# $quoted_string_1 = quoted string seen while in_quote=1
10069							# $quoted_string_2 = quoted string seen while in_quote=2
10070							my (
10071
10072	2763			2763	0	8255	$self,
10073							$i,
10074							$in_quote,
10075							$quote_character,
10076							$quote_pos,
10077							$quote_depth,
10078							$quoted_string_1,
10079							$quoted_string_2,
10080							$rtokens,
10081							$rtoken_map,
10082							$max_token_index,
10083
10084							) = @_;
10085
10086	2763					4362	my $quoted_string;
10087	2763	100				6388	if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
10088	29					67	my $ibeg = $i;
10089							(
10090	29					126	$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
10091							$quoted_string
10092							)
10093							= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens,
10094							$quote_character, $quote_pos, $quote_depth, $max_token_index );
10095	29					88	$quoted_string_2 .= $quoted_string;
10096	29	50				225	if ( $in_quote == 1 ) {
10097	29	50				114	if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
	0					0
10098	29					76	$quote_character = EMPTY_STRING;
10099							}
10100							else {
10101	0					0	$quoted_string_2 .= "\n";
10102							}
10103							}
10104
10105	2763	50				6007	if ( $in_quote == 1 ) { # one (more) quote to follow
10106	2763					4181	my $ibeg = $i;
10107							(
10108	2763					7635	$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
10109							$quoted_string
10110							)
10111							= $self->follow_quoted_string( $ibeg, $in_quote, $rtokens,
10112							$quote_character, $quote_pos, $quote_depth, $max_token_index );
10113	2763					5860	$quoted_string_1 .= $quoted_string;
10114	2763	100				6719	if ( $in_quote == 1 ) {
10115	183					404	$quoted_string_1 .= "\n";
10116							}
10117							}
10118							return (
10119
10120	2763					9482	$i,
10121							$in_quote,
10122							$quote_character,
10123							$quote_pos,
10124							$quote_depth,
10125							$quoted_string_1,
10126							$quoted_string_2,
10127
10128							);
10129							} ## end sub do_quote
10130
10131							sub follow_quoted_string {
10132
10133							# scan for a specific token, skipping escaped characters
10134							# if the quote character is blank, use the first non-blank character
10135							# input parameters:
10136							# $rtokens = reference to the array of tokens
10137							# $i = the token index of the first character to search
10138							# $in_quote = number of quoted strings being followed
10139							# $beginning_tok = the starting quote character
10140							# $quote_pos = index to check next for alphanumeric delimiter
10141							# output parameters:
10142							# $i = the token index of the ending quote character
10143							# $in_quote = decremented if found end, unchanged if not
10144							# $beginning_tok = the starting quote character
10145							# $quote_pos = index to check next for alphanumeric delimiter
10146							# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
10147							# $quoted_string = the text of the quote (without quotation tokens)
10148							my (
10149
10150	2809			2809	0	6366	$self,
10151							$i_beg,
10152							$in_quote,
10153							$rtokens,
10154							$beginning_tok,
10155							$quote_pos,
10156							$quote_depth,
10157							$max_token_index,
10158
10159							) = @_;
10160
10161	2809					4314	my ( $tok, $end_tok );
10162	2809					4629	my $i = $i_beg - 1;
10163	2809					4779	my $quoted_string = EMPTY_STRING;
10164
10165	2809					3947	0 && do {
10166							print STDOUT
10167							"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
10168							};
10169
10170							# get the corresponding end token
10171	2809	100				13063	if ( $beginning_tok !~ /^\s*$/ ) {
10172	183					908	$end_tok = matching_end_token($beginning_tok);
10173							}
10174
10175							# a blank token means we must find and use the first non-blank one
10176							else {
10177	2626	100				6550	my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
10178
10179	2626					6261	while ( $i < $max_token_index ) {
10180	2626					5089	$tok = $rtokens->[ ++$i ];
10181
10182	2626	50				7867	if ( $tok !~ /^\s*$/ ) {
10183
10184	2626	50	66			7596	if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
10185	0					0	$i = $max_token_index;
10186							}
10187							else {
10188
10189	2626	100				5522	if ( length($tok) > 1 ) {
10190	1	50				5	if ( $quote_pos <= 0 ) { $quote_pos = 1 }
	1					3
10191	1					10	$beginning_tok = substr( $tok, $quote_pos - 1, 1 );
10192							}
10193							else {
10194	2625					4624	$beginning_tok = $tok;
10195	2625					4394	$quote_pos = 0;
10196							}
10197	2626					6392	$end_tok = matching_end_token($beginning_tok);
10198	2626					4481	$quote_depth = 1;
10199	2626					4626	last;
10200							}
10201							}
10202							else {
10203	0					0	$allow_quote_comments = 1;
10204							}
10205							}
10206							}
10207
10208							# There are two different loops which search for the ending quote
10209							# character. In the rare case of an alphanumeric quote delimiter, we
10210							# have to look through alphanumeric tokens character-by-character, since
10211							# the pre-tokenization process combines multiple alphanumeric
10212							# characters, whereas for a non-alphanumeric delimiter, only tokens of
10213							# length 1 can match.
10214
10215							#----------------------------------------------------------------
10216							# Case 1 (rare): loop for case of alphanumeric quote delimiter..
10217							# "quote_pos" is the position the current word to begin searching
10218							#----------------------------------------------------------------
10219	2809	100				7408	if ( $beginning_tok =~ /\w/ ) {
10220
10221							# Note this because it is not recommended practice except
10222							# for obfuscated perl contests
10223	1	50				4	if ( $in_quote == 1 ) {
10224	1					6	$self->write_logfile_entry(
10225							"Note: alphanumeric quote delimiter ($beginning_tok) \n");
10226							}
10227
10228							# Note: changed < to <= here to fix c109. Relying on extra end blanks.
10229	1					7	while ( $i <= $max_token_index ) {
10230
10231	4	100	66			14	if ( $quote_pos == 0 \|\| ( $i < 0 ) ) {
10232	3					9	$tok = $rtokens->[ ++$i ];
10233
10234	3	100				9	if ( $tok eq '\\' ) {
10235
10236							# retain backslash unless it hides the end token
10237	1	50				6	$quoted_string .= $tok
10238							unless $rtokens->[ $i + 1 ] eq $end_tok;
10239	1					4	$quote_pos++;
10240	1	50				7	last if ( $i >= $max_token_index );
10241	1					6	$tok = $rtokens->[ ++$i ];
10242							}
10243							}
10244	4					8	my $old_pos = $quote_pos;
10245
10246	4	50	33			20	unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
			33
10247							{
10248
10249							}
10250	4					8	$quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
10251
10252	4	100				11	if ( $quote_pos > 0 ) {
10253
10254	1					3	$quoted_string .=
10255							substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
10256
10257							# NOTE: any quote modifiers will be at the end of '$tok'. If we
10258							# wanted to check them, this is the place to get them. But
10259							# this quote form is rarely used in practice, so it isn't
10260							# worthwhile.
10261
10262	1					3	$quote_depth--;
10263
10264	1	50				5	if ( $quote_depth == 0 ) {
10265	1					2	$in_quote--;
10266	1					4	last;
10267							}
10268							}
10269							else {
10270	3	50				8	if ( $old_pos <= length($tok) ) {
10271	3					8	$quoted_string .= substr( $tok, $old_pos );
10272							}
10273							}
10274							}
10275							}
10276
10277							#-----------------------------------------------------------------------
10278							# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
10279							#-----------------------------------------------------------------------
10280							else {
10281
10282	2808					6515	while ( $i < $max_token_index ) {
10283	10785					17025	$tok = $rtokens->[ ++$i ];
10284
10285	10785	100				25425	if ( $tok eq $end_tok ) {
		100
		100
10286	2615					4015	$quote_depth--;
10287
10288	2615	100				5979	if ( $quote_depth == 0 ) {
10289	2614					3734	$in_quote--;
10290	2614					4189	last;
10291							}
10292							}
10293							elsif ( $tok eq $beginning_tok ) {
10294	1					3	$quote_depth++;
10295							}
10296							elsif ( $tok eq '\\' ) {
10297
10298							# retain backslash unless it hides the beginning or end token
10299	376					901	$tok = $rtokens->[ ++$i ];
10300	376	100	100			2224	$quoted_string .= '\\'
10301							unless ( $tok eq $end_tok \|\| $tok eq $beginning_tok );
10302							}
10303	8171					15304	$quoted_string .= $tok;
10304							}
10305							}
10306	2809	50				6316	if ( $i > $max_token_index ) { $i = $max_token_index }
	0					0
10307							return (
10308
10309	2809					10143	$i,
10310							$in_quote,
10311							$beginning_tok,
10312							$quote_pos,
10313							$quote_depth,
10314							$quoted_string,
10315
10316							);
10317							} ## end sub follow_quoted_string
10318
10319							sub indicate_error {
10320	0			0	0	0	my ( $self, $msg, $line_number, $input_line, $pos, $carrat ) = @_;
10321	0					0	$self->interrupt_logfile();
10322	0					0	$self->warning($msg);
10323	0					0	$self->write_error_indicator_pair( $line_number, $input_line, $pos,
10324							$carrat );
10325	0					0	$self->resume_logfile();
10326	0					0	return;
10327							} ## end sub indicate_error
10328
10329							sub write_error_indicator_pair {
10330	0			0	0	0	my ( $self, $line_number, $input_line, $pos, $carrat ) = @_;
10331	0					0	my ( $offset, $numbered_line, $underline ) =
10332							make_numbered_line( $line_number, $input_line, $pos );
10333	0					0	$underline = write_on_underline( $underline, $pos - $offset, $carrat );
10334	0					0	$self->warning( $numbered_line . "\n" );
10335	0					0	$underline =~ s/\s*$//;
10336	0					0	$self->warning( $underline . "\n" );
10337	0					0	return;
10338							} ## end sub write_error_indicator_pair
10339
10340							sub make_numbered_line {
10341
10342							# Given an input line, its line number, and a character position of
10343							# interest, create a string not longer than 80 characters of the form
10344							# $lineno: sub_string
10345							# such that the sub_string of $str contains the position of interest
10346							#
10347							# Here is an example of what we want, in this case we add trailing
10348							# '...' because the line is long.
10349							#
10350							# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
10351							#
10352							# Here is another example, this time in which we used leading '...'
10353							# because of excessive length:
10354							#
10355							# 2: ... er of the World Wide Web Consortium's
10356							#
10357							# input parameters are:
10358							# $lineno = line number
10359							# $str = the text of the line
10360							# $pos = position of interest (the error) : 0 = first character
10361							#
10362							# We return :
10363							# - $offset = an offset which corrects the position in case we only
10364							# display part of a line, such that $pos-$offset is the effective
10365							# position from the start of the displayed line.
10366							# - $numbered_line = the numbered line as above,
10367							# - $underline = a blank 'underline' which is all spaces with the same
10368							# number of characters as the numbered line.
10369
10370	0			0	0	0	my ( $lineno, $str, $pos ) = @_;
10371	0	0				0	my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
10372	0					0	my $excess = length($str) - $offset - 68;
10373	0	0				0	my $numc = ( $excess > 0 ) ? 68 : undef;
10374
10375	0	0				0	if ( defined($numc) ) {
10376	0	0				0	if ( $offset == 0 ) {
10377	0					0	$str = substr( $str, $offset, $numc - 4 ) . " ...";
10378							}
10379							else {
10380	0					0	$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
10381							}
10382							}
10383							else {
10384
10385	0	0				0	if ( $offset == 0 ) {
10386							}
10387							else {
10388	0					0	$str = "... " . substr( $str, $offset + 4 );
10389							}
10390							}
10391
10392	0					0	my $numbered_line = sprintf( "%d: ", $lineno );
10393	0					0	$offset -= length($numbered_line);
10394	0					0	$numbered_line .= $str;
10395	0					0	my $underline = SPACE x length($numbered_line);
10396	0					0	return ( $offset, $numbered_line, $underline );
10397							} ## end sub make_numbered_line
10398
10399							sub write_on_underline {
10400
10401							# The "underline" is a string that shows where an error is; it starts
10402							# out as a string of blanks with the same length as the numbered line of
10403							# code above it, and we have to add marking to show where an error is.
10404							# In the example below, we want to write the string '--^' just below
10405							# the line of bad code:
10406							#
10407							# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
10408							# ---^
10409							# We are given the current underline string, plus a position and a
10410							# string to write on it.
10411							#
10412							# In the above example, there will be 2 calls to do this:
10413							# First call: $pos=19, pos_chr=^
10414							# Second call: $pos=16, pos_chr=---
10415							#
10416							# This is a trivial thing to do with substr, but there is some
10417							# checking to do.
10418
10419	0			0	0	0	my ( $underline, $pos, $pos_chr ) = @_;
10420
10421							# check for error..shouldn't happen
10422	0	0	0			0	unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
10423	0					0	return $underline;
10424							}
10425	0					0	my $excess = length($pos_chr) + $pos - length($underline);
10426	0	0				0	if ( $excess > 0 ) {
10427	0					0	$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
10428							}
10429	0					0	substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
10430	0					0	return ($underline);
10431							} ## end sub write_on_underline
10432
10433							sub pre_tokenize {
10434
10435	6170			6170	0	12148	my ( $str, $max_tokens_wanted ) = @_;
10436
10437							# Input parameter:
10438							# $max_tokens_wanted > 0 to stop on reaching this many tokens.
10439							# = 0 means get all tokens
10440
10441							# Break a string, $str, into a sequence of preliminary tokens. We
10442							# are interested in these types of tokens:
10443							# words (type='w'), example: 'max_tokens_wanted'
10444							# digits (type = 'd'), example: '0755'
10445							# whitespace (type = 'b'), example: ' '
10446							# any other single character (i.e. punct; type = the character itself).
10447							# We cannot do better than this yet because we might be in a quoted
10448							# string or pattern. Caller sets $max_tokens_wanted to 0 to get all
10449							# tokens.
10450
10451							# An advantage of doing this pre-tokenization step is that it keeps almost
10452							# all of the regex work highly localized. A disadvantage is that in some
10453							# very rare instances we will have to go back and split a pre-token.
10454
10455							# Return parameters:
10456	6170					10414	my @tokens = (); # array of the tokens themselves
10457	6170					12611	my @token_map = (0); # string position of start of each token
10458	6170					9807	my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
10459
10460	6170					9638	do {
10461
10462							# whitespace
10463	82014	100				267997	if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
	18684	100				32629
		100
		100
10464
10465							# numbers
10466							# note that this must come before words!
10467	2593					5527	elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
10468
10469							# words
10470	16628					30404	elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
10471
10472							# single-character punctuation
10473	38075					80674	elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
10474
10475							# that's all..
10476							else {
10477	6034					37446	return ( \@tokens, \@token_map, \@type );
10478							}
10479
10480	75980					135096	push @tokens, $1;
10481	75980					157063	push @token_map, pos($str);
10482
10483							} while ( --$max_tokens_wanted != 0 );
10484
10485	136					814	return ( \@tokens, \@token_map, \@type );
10486							} ## end sub pre_tokenize
10487
10488							sub show_tokens {
10489
10490							# this is an old debug routine
10491							# not called, but saved for reference
10492	0			0	0	0	my ( $rtokens, $rtoken_map ) = @_;
10493	0					0	my $num = scalar( @{$rtokens} );
	0					0
10494
10495	0					0	foreach my $i ( 0 .. $num - 1 ) {
10496	0					0	my $len = length( $rtokens->[$i] );
10497	0					0	print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
10498							}
10499	0					0	return;
10500							} ## end sub show_tokens
10501
10502							{ ## closure for sub matching end token
10503							my %matching_end_token;
10504
10505							BEGIN {
10506	38			38		60305	%matching_end_token = (
10507							'{' => '}',
10508							'(' => ')',
10509							'[' => ']',
10510							'<' => '>',
10511							);
10512							} ## end BEGIN
10513
10514							sub matching_end_token {
10515
10516							# return closing character for a pattern
10517	2993			2993	0	4834	my $beginning_token = shift;
10518	2993	100				7936	if ( $matching_end_token{$beginning_token} ) {
10519	373					961	return $matching_end_token{$beginning_token};
10520							}
10521	2620					5860	return ($beginning_token);
10522							} ## end sub matching_end_token
10523							}
10524
10525							sub dump_token_types {
10526	0			0	0		my ( $class, $fh ) = @_;
10527
10528							# This should be the latest list of token types in use
10529							# adding NEW_TOKENS: add a comment here
10530	0						$fh->print(<<'END_OF_LIST');
10531
10532							Here is a list of the token types currently used for lines of type 'CODE'.
10533							For the following tokens, the "type" of a token is just the token itself.
10534
10535							.. :: << >> ** && .. \|\| // -> => += -= .= %= &= \|= ^= *= <>
10536							( ) <= >= == =~ !~ != ++ -- /= x=
10537							... **= <<= >>= &&= \|\|= //= <=>
10538							, + - / * \| % ! x ~ = \ ? : . < > ^ &
10539
10540							The following additional token types are defined:
10541
10542							type meaning
10543							b blank (white space)
10544							{ indent: opening structural curly brace or square bracket or paren
10545							(code block, anonymous hash reference, or anonymous array reference)
10546							} outdent: right structural curly brace or square bracket or paren
10547							[ left non-structural square bracket (enclosing an array index)
10548							] right non-structural square bracket
10549							( left non-structural paren (all but a list right of an =)
10550							) right non-structural paren
10551							L left non-structural curly brace (enclosing a key)
10552							R right non-structural curly brace
10553							; terminal semicolon
10554							f indicates a semicolon in a "for" statement
10555							h here_doc operator <<
10556							# a comment
10557							Q indicates a quote or pattern
10558							q indicates a qw quote block
10559							k a perl keyword
10560							C user-defined constant or constant function (with void prototype = ())
10561							U user-defined function taking parameters
10562							G user-defined function taking block parameter (like grep/map/eval)
10563							M (unused, but reserved for subroutine definition name)
10564							P (unused, but -html uses it to label pod text)
10565							t type indicater such as %,$,@,*,&,sub
10566							w bare word (perhaps a subroutine call)
10567							i identifier of some type (with leading %, $, @, *, &, sub, -> )
10568							n a number
10569							v a v-string
10570							F a file test operator (like -e)
10571							Y File handle
10572							Z identifier in indirect object slot: may be file handle, object
10573							J LABEL: code block label
10574							j LABEL after next, last, redo, goto
10575							p unary +
10576							m unary -
10577							pp pre-increment operator ++
10578							mm pre-decrement operator --
10579							A : used as attribute separator
10580
10581							Here are the '_line_type' codes used internally:
10582							SYSTEM - system-specific code before hash-bang line
10583							CODE - line of perl code (including comments)
10584							POD_START - line starting pod, such as '=head'
10585							POD - pod documentation text
10586							POD_END - last line of pod section, '=cut'
10587							HERE - text of here-document
10588							HERE_END - last line of here-doc (target word)
10589							FORMAT - format section
10590							FORMAT_END - last line of format section, '.'
10591							SKIP - code skipping section
10592							SKIP_END - last line of code skipping section, '#>>V'
10593							DATA_START - __DATA__ line
10594							DATA - unidentified text following __DATA__
10595							END_START - __END__ line
10596							END - unidentified text following __END__
10597							ERROR - we are in big trouble, probably not a perl script
10598							END_OF_LIST
10599
10600	0						return;
10601							} ## end sub dump_token_types
10602
10603							BEGIN {
10604
10605							# These names are used in error messages
10606	38			38		371	@opening_brace_names = qw# '{' '[' '(' '?' #;
10607	38					163	@closing_brace_names = qw# '}' ']' ')' ':' #;
10608
10609	38					89	my @q;
10610
10611	38					227	my @digraphs = qw(
10612							.. :: << >> ** && \|\| // -> => += -= .= %= &= \|= ^= *= <>
10613							<= >= == =~ !~ != ++ -- /= x= ~~ ~. \|. &. ^.
10614							);
10615	38					807	@is_digraph{@digraphs} = (1) x scalar(@digraphs);
10616
10617	38					212	@q = qw(
10618							. : < > * & \| / - = + - % ^ ! x ~
10619							);
10620	38					462	@can_start_digraph{@q} = (1) x scalar(@q);
10621
10622	38					218	my @trigraphs = qw( ... **= <<= >>= &&= \|\|= //= <=> !~~ &.= \|.= ^.= <<~);
10623	38					317	@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
10624
10625	38					158	my @tetragraphs = qw( <<>> );
10626	38					170	@is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
10627
10628							# make a hash of all valid token types for self-checking the tokenizer
10629							# (adding NEW_TOKENS : select a new character and add to this list)
10630	38					641	my @valid_token_types = qw#
10631							A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
10632							{ } ( ) [ ] ; + - / * \| % ! x ~ = \ ? : . < > ^ &
10633							#;
10634	38					355	push( @valid_token_types, @digraphs );
10635	38					183	push( @valid_token_types, @trigraphs );
10636	38					98	push( @valid_token_types, @tetragraphs );
10637	38					92	push( @valid_token_types, ( '#', ',', 'CORE::' ) );
10638	38					1474	@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
10639
10640							# a list of file test letters, as in -e (Table 3-4 of 'camel 3')
10641	38					264	my @file_test_operators =
10642							qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
10643	38					808	@is_file_test_operator{@file_test_operators} =
10644							(1) x scalar(@file_test_operators);
10645
10646							# these functions have prototypes of the form (&), so when they are
10647							# followed by a block, that block MAY BE followed by an operator.
10648							# Smartmatch operator ~~ may be followed by anonymous hash or array ref
10649	38					211	@q = qw( do eval );
10650	38					148	@is_block_operator{@q} = (1) x scalar(@q);
10651
10652							# these functions allow an identifier in the indirect object slot
10653	38					117	@q = qw( print printf sort exec system say);
10654	38					237	@is_indirect_object_taker{@q} = (1) x scalar(@q);
10655
10656							# Note: 'field' will be added by sub check_options if --use-feature=class
10657	38					115	@q = qw(my our state);
10658	38					162	@is_my_our_state{@q} = (1) x scalar(@q);
10659
10660							# These tokens may precede a code block
10661							# patched for SWITCH/CASE/CATCH. Actually these could be removed
10662							# now and we could let the extended-syntax coding handle them.
10663							# Added 'default' for Switch::Plain.
10664							# Note: 'ADJUST' will be added by sub check_options if --use-feature=class
10665	38					211	@q =
10666							qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
10667							unless do while until eval for foreach map grep sort
10668							switch case given when default catch try finally);
10669	38					863	@is_code_block_token{@q} = (1) x scalar(@q);
10670
10671							# Note: this hash was formerly named '%is_not_zero_continuation_block_type'
10672							# to contrast it with the block types in '%is_zero_continuation_block_type'
10673	38					210	@q = qw( sort map grep eval do );
10674	38					155	@is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
10675
10676	38					119	@q = qw( sort map grep );
10677	38					124	@is_sort_map_grep{@q} = (1) x scalar(@q);
10678
10679	38					97	%is_grep_alias = ();
10680
10681							# I'll build the list of keywords incrementally
10682	38					99	my @Keywords = ();
10683
10684							# keywords and tokens after which a value or pattern is expected,
10685							# but not an operator. In other words, these should consume terms
10686							# to their right, or at least they are not expected to be followed
10687							# immediately by operators.
10688	38					1492	my @value_requestor = qw(
10689							AUTOLOAD
10690							BEGIN
10691							CHECK
10692							DESTROY
10693							END
10694							EQ
10695							GE
10696							GT
10697							INIT
10698							LE
10699							LT
10700							NE
10701							UNITCHECK
10702							abs
10703							accept
10704							alarm
10705							and
10706							atan2
10707							bind
10708							binmode
10709							bless
10710							break
10711							caller
10712							chdir
10713							chmod
10714							chomp
10715							chop
10716							chown
10717							chr
10718							chroot
10719							close
10720							closedir
10721							cmp
10722							connect
10723							continue
10724							cos
10725							crypt
10726							dbmclose
10727							dbmopen
10728							defined
10729							delete
10730							die
10731							dump
10732							each
10733							else
10734							elsif
10735							eof
10736							eq
10737							evalbytes
10738							exec
10739							exists
10740							exit
10741							exp
10742							fc
10743							fcntl
10744							fileno
10745							flock
10746							for
10747							foreach
10748							formline
10749							ge
10750							getc
10751							getgrgid
10752							getgrnam
10753							gethostbyaddr
10754							gethostbyname
10755							getnetbyaddr
10756							getnetbyname
10757							getpeername
10758							getpgrp
10759							getpriority
10760							getprotobyname
10761							getprotobynumber
10762							getpwnam
10763							getpwuid
10764							getservbyname
10765							getservbyport
10766							getsockname
10767							getsockopt
10768							glob
10769							gmtime
10770							goto
10771							grep
10772							gt
10773							hex
10774							if
10775							index
10776							int
10777							ioctl
10778							join
10779							keys
10780							kill
10781							last
10782							lc
10783							lcfirst
10784							le
10785							length
10786							link
10787							listen
10788							local
10789							localtime
10790							lock
10791							log
10792							lstat
10793							lt
10794							map
10795							mkdir
10796							msgctl
10797							msgget
10798							msgrcv
10799							msgsnd
10800							my
10801							ne
10802							next
10803							no
10804							not
10805							oct
10806							open
10807							opendir
10808							or
10809							ord
10810							our
10811							pack
10812							pipe
10813							pop
10814							pos
10815							print
10816							printf
10817							prototype
10818							push
10819							quotemeta
10820							rand
10821							read
10822							readdir
10823							readlink
10824							readline
10825							readpipe
10826							recv
10827							redo
10828							ref
10829							rename
10830							require
10831							reset
10832							return
10833							reverse
10834							rewinddir
10835							rindex
10836							rmdir
10837							scalar
10838							seek
10839							seekdir
10840							select
10841							semctl
10842							semget
10843							semop
10844							send
10845							sethostent
10846							setnetent
10847							setpgrp
10848							setpriority
10849							setprotoent
10850							setservent
10851							setsockopt
10852							shift
10853							shmctl
10854							shmget
10855							shmread
10856							shmwrite
10857							shutdown
10858							sin
10859							sleep
10860							socket
10861							socketpair
10862							sort
10863							splice
10864							split
10865							sprintf
10866							sqrt
10867							srand
10868							stat
10869							state
10870							study
10871							substr
10872							symlink
10873							syscall
10874							sysopen
10875							sysread
10876							sysseek
10877							system
10878							syswrite
10879							tell
10880							telldir
10881							tie
10882							tied
10883							truncate
10884							uc
10885							ucfirst
10886							umask
10887							undef
10888							unless
10889							unlink
10890							unpack
10891							unshift
10892							untie
10893							until
10894							use
10895							utime
10896							values
10897							vec
10898							waitpid
10899							warn
10900							while
10901							write
10902							xor
10903
10904							switch
10905							case
10906							default
10907							given
10908							when
10909							err
10910							say
10911							isa
10912
10913							catch
10914
10915							);
10916
10917							# Note: 'ADJUST', 'field' are added by sub check_options
10918							# if --use-feature=class
10919
10920							# patched above for SWITCH/CASE given/when err say
10921							# 'err' is a fairly safe addition.
10922							# Added 'default' for Switch::Plain. Note that we could also have
10923							# a separate set of keywords to include if we see 'use Switch::Plain'
10924	38					1551	push( @Keywords, @value_requestor );
10925
10926							# These are treated the same but are not keywords:
10927	38					174	my @extra_vr = qw(
10928							constant
10929							vars
10930							);
10931	38					290	push( @value_requestor, @extra_vr );
10932
10933	38					4778	@expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
10934
10935							# this list contains keywords which do not look for arguments,
10936							# so that they might be followed by an operator, or at least
10937							# not a term.
10938	38					344	my @operator_requestor = qw(
10939							endgrent
10940							endhostent
10941							endnetent
10942							endprotoent
10943							endpwent
10944							endservent
10945							fork
10946							getgrent
10947							gethostent
10948							getlogin
10949							getnetent
10950							getppid
10951							getprotoent
10952							getpwent
10953							getservent
10954							setgrent
10955							setpwent
10956							time
10957							times
10958							wait
10959							wantarray
10960							);
10961
10962	38					175	push( @Keywords, @operator_requestor );
10963
10964							# These are treated the same but are not considered keywords:
10965	38					129	my @extra_or = qw(
10966							STDERR
10967							STDIN
10968							STDOUT
10969							);
10970
10971	38					169	push( @operator_requestor, @extra_or );
10972
10973	38					887	@expecting_operator_token{@operator_requestor} =
10974							(1) x scalar(@operator_requestor);
10975
10976							# these token TYPES expect trailing operator but not a term
10977							# note: ++ and -- are post-increment and decrement, 'C' = constant
10978	38					248	my @operator_requestor_types = qw( ++ -- C <> q );
10979
10980							# NOTE: This hash is available but not currently used
10981	38					152	@expecting_operator_types{@operator_requestor_types} =
10982							(1) x scalar(@operator_requestor_types);
10983
10984							# these token TYPES consume values (terms)
10985							# note: pp and mm are pre-increment and decrement
10986							# f=semicolon in for, F=file test operator
10987	38					920	my @value_requestor_type = qw#
10988							L { ( [ ~ !~ =~ ; . .. ... A : && ! \|\| // = + - x
10989							*= += -= .= /= = %= x= &= \|= ^= <<= >>= &&= \|\|= //=
10990							<= >= == != => \ > < % * / ? & \| ** <=> ~~ !~~ <<~
10991							f F pp mm Y p m U J G j >> << ^ t
10992							~. ^. \|. &. ^.= \|.= &.=
10993							#;
10994	38					216	push( @value_requestor_type, ',' )
10995							; # (perl doesn't like a ',' in a qw block)
10996
10997							# NOTE: This hash is available but not currently used
10998	38					1008	@expecting_term_types{@value_requestor_type} =
10999							(1) x scalar(@value_requestor_type);
11000
11001							# Note: the following valid token types are not assigned here to
11002							# hashes requesting to be followed by values or terms, but are
11003							# instead currently hard-coded into sub operator_expected:
11004							# ) -> :: Q R Z ] b h i k n v w } #
11005
11006							# For simple syntax checking, it is nice to have a list of operators which
11007							# will really be unhappy if not followed by a term. This includes most
11008							# of the above...
11009	38					1304	@really_want_term{@value_requestor_type} =
11010							(1) x scalar(@value_requestor_type);
11011
11012							# with these exceptions...
11013	38					194	delete $really_want_term{'U'}; # user sub, depends on prototype
11014	38					86	delete $really_want_term{'F'}; # file test works on $_ if no following term
11015	38					91	delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
11016							# let perl do it
11017	38					177	@q = qw(q qq qx qr s y tr m);
11018	38					227	@is_q_qq_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
11019
11020							# Note added 'qw' here
11021	38					165	@q = qw(q qq qw qx qr s y tr m);
11022	38					3963	@is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
11023
11024							# Note: 'class' will be added by sub check_options if -use-feature=class
11025	38					265	@q = qw(package);
11026	38					139	@is_package{@q} = (1) x scalar(@q);
11027
11028	38					86	@q = qw( ? : );
11029	38					98	push @q, ',';
11030	38					107	@is_comma_question_colon{@q} = (1) x scalar(@q);
11031
11032	38					157	@q = qw( if elsif unless );
11033	38					105	@is_if_elsif_unless{@q} = (1) x scalar(@q);
11034
11035	38					83	@q = qw( ; t );
11036	38					95	@is_semicolon_or_t{@q} = (1) x scalar(@q);
11037
11038	38					104	@q = qw( if elsif unless case when );
11039	38					111	@is_if_elsif_unless_case_when{@q} = (1) x scalar(@q);
11040
11041							# Hash of other possible line endings which may occur.
11042							# Keep these coordinated with the regex where this is used.
11043							# Note: chr(13) = chr(015)="\r".
11044	38					119	@q = ( chr(13), chr(29), chr(26) );
11045	38					209	@other_line_endings{@q} = (1) x scalar(@q);
11046
11047							# These keywords are handled specially in the tokenizer code:
11048	38					226	my @special_keywords = qw(
11049							do
11050							eval
11051							format
11052							m
11053							package
11054							q
11055							qq
11056							qr
11057							qw
11058							qx
11059							s
11060							sub
11061							tr
11062							y
11063							);
11064	38					185	push( @Keywords, @special_keywords );
11065
11066							# Keywords after which list formatting may be used
11067							# WARNING: do not include \|map\|grep\|eval or perl may die on
11068							# syntax errors (map1.t).
11069	38					685	my @keyword_taking_list = qw(
11070							and
11071							chmod
11072							chomp
11073							chop
11074							chown
11075							dbmopen
11076							die
11077							elsif
11078							exec
11079							fcntl
11080							for
11081							foreach
11082							formline
11083							getsockopt
11084							if
11085							index
11086							ioctl
11087							join
11088							kill
11089							local
11090							msgctl
11091							msgrcv
11092							msgsnd
11093							my
11094							open
11095							or
11096							our
11097							pack
11098							print
11099							printf
11100							push
11101							read
11102							readpipe
11103							recv
11104							return
11105							reverse
11106							rindex
11107							seek
11108							select
11109							semctl
11110							semget
11111							send
11112							setpriority
11113							setsockopt
11114							shmctl
11115							shmget
11116							shmread
11117							shmwrite
11118							socket
11119							socketpair
11120							sort
11121							splice
11122							split
11123							sprintf
11124							state
11125							substr
11126							syscall
11127							sysopen
11128							sysread
11129							sysseek
11130							system
11131							syswrite
11132							tie
11133							unless
11134							unlink
11135							unpack
11136							unshift
11137							until
11138							vec
11139							warn
11140							while
11141							given
11142							when
11143							);
11144
11145							# NOTE: This hash is available but not currently used
11146	38					1214	@is_keyword_taking_list{@keyword_taking_list} =
11147							(1) x scalar(@keyword_taking_list);
11148
11149							# perl functions which may be unary operators.
11150
11151							# This list is used to decide if a pattern delimited by slashes, /pattern/,
11152							# can follow one of these keywords.
11153	38					191	@q = qw(
11154							chomp eof eval fc lc pop shift uc undef
11155							);
11156
11157	38					238	@is_keyword_rejecting_slash_as_pattern_delimiter{@q} =
11158							(1) x scalar(@q);
11159
11160							# These are keywords for which an arg may optionally be omitted. They are
11161							# currently only used to disambiguate a ? used as a ternary from one used
11162							# as a (deprecated) pattern delimiter. In the future, they might be used
11163							# to give a warning about ambiguous syntax before a /.
11164							# Note: split has been omitted (see note below).
11165	38					592	my @keywords_taking_optional_arg = qw(
11166							abs
11167							alarm
11168							caller
11169							chdir
11170							chomp
11171							chop
11172							chr
11173							chroot
11174							close
11175							cos
11176							defined
11177							die
11178							eof
11179							eval
11180							evalbytes
11181							exit
11182							exp
11183							fc
11184							getc
11185							glob
11186							gmtime
11187							hex
11188							int
11189							last
11190							lc
11191							lcfirst
11192							length
11193							localtime
11194							log
11195							lstat
11196							mkdir
11197							next
11198							oct
11199							ord
11200							pop
11201							pos
11202							print
11203							printf
11204							prototype
11205							quotemeta
11206							rand
11207							readline
11208							readlink
11209							readpipe
11210							redo
11211							ref
11212							require
11213							reset
11214							reverse
11215							rmdir
11216							say
11217							select
11218							shift
11219							sin
11220							sleep
11221							sqrt
11222							srand
11223							stat
11224							study
11225							tell
11226							uc
11227							ucfirst
11228							umask
11229							undef
11230							unlink
11231							warn
11232							write
11233							);
11234	38					887	@is_keyword_taking_optional_arg{@keywords_taking_optional_arg} =
11235							(1) x scalar(@keywords_taking_optional_arg);
11236
11237							# This list is used to decide if a pattern delimited by question marks,
11238							# ?pattern?, can follow one of these keywords. Note that from perl 5.22
11239							# on, a ?pattern? is not recognized, so we can be much more strict than
11240							# with a /pattern/. Note that 'split' is not in this list. In current
11241							# versions of perl a question following split must be a ternary, but
11242							# in older versions it could be a pattern. The guessing algorithm will
11243							# decide. We are combining two lists here to simplify the test.
11244	38					810	@q = ( @keywords_taking_optional_arg, @operator_requestor );
11245	38					1437	@is_keyword_rejecting_question_as_pattern_delimiter{@q} =
11246							(1) x scalar(@q);
11247
11248							# These are not used in any way yet
11249							# my @unused_keywords = qw(
11250							# __FILE__
11251							# __LINE__
11252							# __PACKAGE__
11253							# );
11254
11255							# The list of keywords was originally extracted from function 'keyword' in
11256							# perl file toke.c version 5.005.03, using this utility, plus a
11257							# little editing: (file getkwd.pl):
11258							# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
11259							# Add 'get' prefix where necessary, then split into the above lists.
11260							# This list should be updated as necessary.
11261							# The list should not contain these special variables:
11262							# ARGV DATA ENV SIG STDERR STDIN STDOUT
11263							# __DATA__ __END__
11264
11265	38					8823	@is_keyword{@Keywords} = (1) x scalar(@Keywords);
11266							} ## end BEGIN
11267							1;